mirror of https://github.com/Cisco-Talos/clamav
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
416 lines
12 KiB
416 lines
12 KiB
/*
|
|
* HTML Entity & Encoding normalization.
|
|
*
|
|
* Copyright (C) 2006-2007 Török Edvin <edwin@clamav.net>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301, USA.
|
|
*
|
|
*/
|
|
#include <clamav-config.h>
|
|
#ifdef CL_EXPERIMENTAL
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "cltypes.h"
|
|
#include "clamav.h"
|
|
#include "others.h"
|
|
#include "hashtab.h"
|
|
|
|
|
|
static const size_t prime_list[] =
|
|
{
|
|
53ul, 97ul, 193ul, 389ul, 769ul,
|
|
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
|
|
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
|
|
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
|
|
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
|
|
1610612741ul, 3221225473ul
|
|
};
|
|
|
|
|
|
static const size_t prime_n = sizeof(prime_list)/sizeof(prime_list[0]);
|
|
|
|
static unsigned char DELETED_KEY[] = "";
|
|
|
|
static size_t get_nearest_capacity(const size_t capacity)
|
|
{
|
|
size_t i;
|
|
for(i=0 ;i < prime_n; i++) {
|
|
if (prime_list[i] > capacity)
|
|
return prime_list[i];
|
|
}
|
|
cli_errmsg("Requested hashtable size is too big!");
|
|
return prime_list[prime_n-1];
|
|
}
|
|
|
|
#ifdef PROFILE_HASHTABLE
|
|
/* I know, this is ugly, most of these functions get a const s, that gets its const-ness discarded,
|
|
* and then these functions modify something the compiler assumes is readonly.
|
|
* Please, never use PROFILE_HASHTABLE in production code, and in releases. Use it for development only!*/
|
|
|
|
static inline void PROFILE_INIT(struct hashtable *s)
|
|
{
|
|
memset(&s->PROFILE_STRUCT,0,sizeof(s->PROFILE_STRUCT));
|
|
}
|
|
|
|
static inline void PROFILE_CALC_HASH(struct hashtable *s)
|
|
{
|
|
s->PROFILE_STRUCT.calc_hash++;
|
|
}
|
|
|
|
static inline void PROFILE_FIND_ELEMENT(struct hashtable *s)
|
|
{
|
|
s->PROFILE_STRUCT.find_req++;
|
|
}
|
|
|
|
static inline void PROFILE_FIND_NOTFOUND(struct hashtable *s, size_t tries)
|
|
{
|
|
s->PROFILE_STRUCT.not_found++;
|
|
s->PROFILE_STRUCT.not_found_tries += tries;
|
|
}
|
|
|
|
static inline void PROFILE_FIND_FOUND(struct hashtable *s, size_t tries)
|
|
{
|
|
s->PROFILE_STRUCT.found++;
|
|
s->PROFILE_STRUCT.found_tries += tries;
|
|
}
|
|
|
|
static inline void PROFILE_HASH_EXHAUSTED(struct hashtable *s)
|
|
{
|
|
s->PROFILE_STRUCT.hash_exhausted++;
|
|
}
|
|
|
|
static inline void PROFILE_GROW_START(struct hashtable *s)
|
|
{
|
|
s->PROFILE_STRUCT.grow++;
|
|
}
|
|
|
|
static inline void PROFILE_GROW_FOUND(struct hashtable *s, size_t tries)
|
|
{
|
|
s->PROFILE_STRUCT.grow_found++;
|
|
s->PROFILE_STRUCT.grow_found_tries += tries;
|
|
}
|
|
|
|
static inline void PROFILE_GROW_DONE(struct hashtable *s)
|
|
{
|
|
}
|
|
|
|
static inline void PROFILE_DELETED_REUSE(struct hashtable *s, size_t tries)
|
|
{
|
|
s->PROFILE_STRUCT.deleted_reuse++;
|
|
s->PROFILE_STRUCT.deleted_tries += tries;
|
|
}
|
|
|
|
static inline void PROFILE_INSERT(struct hashtable *s, size_t tries)
|
|
{
|
|
s->PROFILE_STRUCT.inserts++;
|
|
s->PROFILE_STRUCT.insert_tries += tries;
|
|
}
|
|
|
|
static inline void PROFILE_DATA_UPDATE(struct hashtable *s, size_t tries)
|
|
{
|
|
s->PROFILE_STRUCT.update++;
|
|
s->PROFILE_STRUCT.update_tries += tries;
|
|
}
|
|
|
|
static inline void PROFILE_HASH_DELETE(struct hashtable *s)
|
|
{
|
|
s->PROFILE_STRUCT.deletes++;
|
|
}
|
|
|
|
static inline void PROFILE_HASH_CLEAR(struct hashtable *s)
|
|
{
|
|
s->PROFILE_STRUCT.clear++;
|
|
}
|
|
|
|
static inline void PROFILE_REPORT(const struct hashtable *s)
|
|
{
|
|
size_t lookups, queries, insert_tries, inserts;
|
|
cli_dbgmsg("--------Hashtable usage report for %p--------------\n",(const void*)s);
|
|
cli_dbgmsg("hash function calculations:%ld\n",s->PROFILE_STRUCT.calc_hash);
|
|
cli_dbgmsg("successfull finds/total searches: %ld/%ld; lookups: %ld\n", s->PROFILE_STRUCT.found, s->PROFILE_STRUCT.find_req, s->PROFILE_STRUCT.found_tries);
|
|
cli_dbgmsg("unsuccessfull finds/total searches: %ld/%ld; lookups: %ld\n", s->PROFILE_STRUCT.not_found, s->PROFILE_STRUCT.find_req , s->PROFILE_STRUCT.not_found_tries);
|
|
cli_dbgmsg("successfull finds during grow:%ld; lookups: %ld\n",s->PROFILE_STRUCT.grow_found, s->PROFILE_STRUCT.grow_found_tries);
|
|
lookups = s->PROFILE_STRUCT.found_tries + s->PROFILE_STRUCT.not_found_tries + s->PROFILE_STRUCT.grow_found_tries;
|
|
queries = s->PROFILE_STRUCT.find_req + s->PROFILE_STRUCT.grow_found;
|
|
cli_dbgmsg("Find Lookups/total queries: %ld/%ld = %3f\n", lookups, queries, lookups*1.0/queries);
|
|
insert_tries = s->PROFILE_STRUCT.insert_tries + s->PROFILE_STRUCT.update_tries + s->PROFILE_STRUCT.deleted_tries;
|
|
|
|
cli_dbgmsg("new item insert tries/new items: %ld/%ld\n", s->PROFILE_STRUCT.insert_tries, s->PROFILE_STRUCT.inserts);
|
|
cli_dbgmsg("update tries/updates: %ld/%ld\n", s->PROFILE_STRUCT.update_tries, s->PROFILE_STRUCT.update);
|
|
cli_dbgmsg("deleted item reuse tries/deleted&reused items: %ld/%ld\n", s->PROFILE_STRUCT.deleted_tries, s->PROFILE_STRUCT.deleted_reuse);
|
|
inserts = s->PROFILE_STRUCT.inserts + s->PROFILE_STRUCT.update + s->PROFILE_STRUCT.deleted_reuse;
|
|
cli_dbgmsg("Insert tries/total inserts: %ld/%ld = %3f\n", insert_tries, inserts, insert_tries*1.0/inserts);
|
|
|
|
cli_dbgmsg("Grows: %ld, Deletes : %ld, hashtable clears: %ld\n",s->PROFILE_STRUCT.grow,s->PROFILE_STRUCT.deletes, s->PROFILE_STRUCT.clear);
|
|
cli_dbgmsg("--------Report end-------------\n");
|
|
}
|
|
|
|
#else
|
|
#define PROFILE_INIT(s)
|
|
#define PROFILE_CALC_HASH(s)
|
|
#define PROFILE_FIND_ELEMENT(s)
|
|
#define PROFILE_FIND_NOTFOUND(s, tries)
|
|
#define PROFILE_FIND_FOUND(s, tries)
|
|
#define PROFILE_HASH_EXHAUSTED(s)
|
|
#define PROFILE_GROW_START(s)
|
|
#define PROFILE_GROW_FOUND(s, tries)
|
|
#define PROFILE_GROW_DONE(s)
|
|
#define PROFILE_DELETED_REUSE(s, tries)
|
|
#define PROFILE_INSERT(s, tries)
|
|
#define PROFILE_DATA_UPDATE(s, tries)
|
|
#define PROFILE_HASH_DELETE(s)
|
|
#define PROFILE_HASH_CLEAR(s)
|
|
#define PROFILE_REPORT(s)
|
|
#endif
|
|
|
|
int hashtab_init(struct hashtable *s,size_t capacity)
|
|
{
|
|
if(!s)
|
|
return CL_ENULLARG;
|
|
|
|
PROFILE_INIT(s);
|
|
|
|
capacity = get_nearest_capacity(capacity);
|
|
s->htable = cli_calloc(capacity,sizeof(*s->htable));
|
|
if(!s->htable)
|
|
return CL_EMEM;
|
|
s->capacity = capacity;
|
|
s->used = 0;
|
|
s->maxfill = 8*capacity/10;
|
|
return 0;
|
|
}
|
|
|
|
static size_t hash(const unsigned char* k,const size_t len,const size_t SIZE)
|
|
{
|
|
size_t Hash = 0;
|
|
size_t i;
|
|
for(i=len;i>0;i--)
|
|
Hash = ((Hash << 8) + k[i-1]) % SIZE;
|
|
return Hash;
|
|
}
|
|
|
|
/* if returned element has key==NULL, then key was not found in table */
|
|
struct element* hashtab_find(const struct hashtable *s,const unsigned char* key,const size_t len)
|
|
{
|
|
struct element* element;
|
|
size_t tries = 1;
|
|
size_t idx;
|
|
|
|
if(!s)
|
|
return NULL;
|
|
PROFILE_CALC_HASH(s);
|
|
PROFILE_FIND_ELEMENT(s);
|
|
idx = hash(key, len, s->capacity);
|
|
element = &s->htable[idx];
|
|
do {
|
|
if(!element->key) {
|
|
PROFILE_FIND_NOTFOUND(s, tries);
|
|
return NULL; /* element not found, place is empty*/
|
|
}
|
|
else if(element->key != DELETED_KEY && strncmp((const char*)key,(const char*)element->key,len)==0) {
|
|
PROFILE_FIND_FOUND(s, tries);
|
|
return element;/* found */
|
|
}
|
|
else {
|
|
idx = (idx + tries++) % s->capacity;
|
|
element = &s->htable[idx];
|
|
}
|
|
} while (tries <= s->capacity);
|
|
PROFILE_HASH_EXHAUSTED(s);
|
|
return NULL; /* not found */
|
|
}
|
|
|
|
static int hashtab_grow(struct hashtable *s)
|
|
{
|
|
const size_t new_capacity = get_nearest_capacity(s->capacity);
|
|
struct element* htable = cli_calloc(new_capacity, sizeof(*s->htable));
|
|
size_t i,idx, used = 0;
|
|
if(new_capacity == s->capacity || !htable)
|
|
return CL_EMEM;
|
|
|
|
PROFILE_GROW_START(s);
|
|
cli_dbgmsg("hashtab.c: Warning: growing open-addressing hashtables is slow. Either allocate more storage when initializing, or use other hashtable types!\n");
|
|
for(i=0; i < s->capacity;i++) {
|
|
if(s->htable[i].key && s->htable[i].key != DELETED_KEY) {
|
|
struct element* element;
|
|
size_t tries = 1;
|
|
|
|
PROFILE_CALC_HASH(s);
|
|
idx = hash(s->htable[i].key, strlen((const char*)s->htable[i].key), new_capacity);
|
|
element = &htable[idx];
|
|
|
|
while(element->key && tries <= new_capacity) {
|
|
idx = (idx + tries++) % new_capacity;
|
|
element = &htable[idx];
|
|
}
|
|
if(!element->key) {
|
|
/* copy element from old hashtable to new */
|
|
PROFILE_GROW_FOUND(s, tries);
|
|
*element = s->htable[i];
|
|
used++;
|
|
}
|
|
else {
|
|
cli_errmsg("hashtab.c: Impossible - unable to rehash table");
|
|
return CL_EMEM;/* this means we didn't find enough room for all elements in the new table, should never happen */
|
|
}
|
|
}
|
|
}
|
|
free(s->htable);
|
|
s->htable = htable;
|
|
s->used = used;
|
|
s->capacity = new_capacity;
|
|
s->maxfill = new_capacity*8/10;
|
|
cli_dbgmsg("Table %p size after grow:%ld\n",(void*)s,s->capacity);
|
|
PROFILE_GROW_DONE(s);
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
|
|
int hashtab_insert(struct hashtable *s,const unsigned char* key,const size_t len,const element_data data)
|
|
{
|
|
struct element* element;
|
|
struct element* deleted_element = NULL;
|
|
size_t tries = 1;
|
|
size_t idx;
|
|
if(!s)
|
|
return CL_ENULLARG;
|
|
do {
|
|
PROFILE_CALC_HASH(s);
|
|
idx = hash(key, len, s->capacity);
|
|
element = &s->htable[idx];
|
|
|
|
do {
|
|
if(!element->key) {
|
|
unsigned char* thekey;
|
|
/* element not found, place is empty, insert*/
|
|
if(deleted_element) {
|
|
/* reuse deleted elements*/
|
|
element = deleted_element;
|
|
PROFILE_DELETED_REUSE(s, tries);
|
|
}
|
|
else {
|
|
PROFILE_INSERT(s, tries);
|
|
}
|
|
thekey = cli_malloc(len+1);
|
|
if(!thekey)
|
|
return CL_EMEM;
|
|
strncpy((char*)thekey,(const char*)key,len+1);
|
|
element->key = thekey;
|
|
element->data = data;
|
|
s->used++;
|
|
if(s->used > s->maxfill) {
|
|
cli_dbgmsg("hashtab.c:Growing hashtable %p, because it has exceeded maxfill, old size:%ld\n",(void*)s,s->capacity);
|
|
hashtab_grow(s);
|
|
}
|
|
return 0;
|
|
}
|
|
else if(element->key == DELETED_KEY) {
|
|
deleted_element = element;
|
|
}
|
|
else if(strncmp((const char*)key,(const char*)element->key,len)==0) {
|
|
PROFILE_DATA_UPDATE(s, tries);
|
|
element->data = data;/* key found, update */
|
|
return 0;
|
|
}
|
|
else {
|
|
idx = (idx + tries++) % s->capacity;
|
|
element = &s->htable[idx];
|
|
}
|
|
} while (tries <= s->capacity);
|
|
/* no free place found*/
|
|
PROFILE_HASH_EXHAUSTED(s);
|
|
cli_dbgmsg("hashtab.c: Growing hashtable %p, because its full, old size:%ld.\n",(void*)s,s->capacity);
|
|
} while( hashtab_grow(s) >= 0 );
|
|
cli_warnmsg("hashtab.c: Unable to grow hashtable\n");
|
|
return CL_EMEM;
|
|
}
|
|
|
|
void hashtab_delete(struct hashtable *s,const unsigned char* key,const size_t len)
|
|
{
|
|
struct element* e = hashtab_find(s,key,len);
|
|
if(e && e->key) {
|
|
PROFILE_HASH_DELETE(s);
|
|
free(e->key);/*FIXME: any way to shut up warnings here? if I make key char*, I get tons of warnings in entitylist.h */
|
|
e->key = DELETED_KEY;
|
|
s->used--;
|
|
}
|
|
}
|
|
|
|
void hashtab_clear(struct hashtable *s)
|
|
{
|
|
size_t i;
|
|
PROFILE_HASH_CLEAR(s);
|
|
for(i=0;i < s->capacity;i++) {
|
|
if(s->htable[i].key && s->htable[i].key != DELETED_KEY)
|
|
free(s->htable[i].key);/*FIXME: shut up warnings */
|
|
}
|
|
memset(s->htable, 0, s->capacity);
|
|
s->used = 0;
|
|
}
|
|
|
|
|
|
int hashtab_store(const struct hashtable *s,FILE* out)
|
|
{
|
|
size_t i;
|
|
for(i=0; i < s->capacity; i++) {
|
|
const struct element* e = &s->htable[i];
|
|
if(e->key && e->key != DELETED_KEY) {
|
|
fprintf(out,"%ld %s\n",e->data,e->key);
|
|
}
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
int hashtab_generate_c(const struct hashtable *s,const char* name)
|
|
{
|
|
size_t i;
|
|
printf("/* TODO: include GPL headers */\n");
|
|
printf("#include <hashtab.h>\n");
|
|
printf("static struct element %s_elements[] = {\n",name);
|
|
for(i=0; i < s->capacity; i++) {
|
|
const struct element* e = &s->htable[i];
|
|
if(!e->key)
|
|
printf("\t{NULL, 0},\n");
|
|
else if(e->key == DELETED_KEY)
|
|
printf("\t{DELETED_KEY,0},\n");
|
|
else
|
|
printf("\t{(const unsigned char*)\"%s\", %ld},\n", e->key, e->data);
|
|
}
|
|
printf("};\n");
|
|
printf("const struct hashtable %s = {\n",name);
|
|
printf("\t%s_elements, %ld, %ld, %ld", name, s->capacity, s->used, s->maxfill);
|
|
printf("\n};\n");
|
|
|
|
PROFILE_REPORT(s);
|
|
return 0;
|
|
}
|
|
|
|
int hashtab_load(FILE* in, struct hashtable *s)
|
|
{
|
|
char line[1024];
|
|
while (fgets(line, sizeof(line), in)) {
|
|
unsigned char l[1024];
|
|
int val;
|
|
sscanf(line,"%d %1023s",&val,l);
|
|
hashtab_insert(s,l,strlen((const char*)l),val);
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
#endif
|
|
|