optimize u16_normalize, encode as hex entities (Ȁ)

update dependencies to rebuild on hashtab.c change
support keys with common prefix by checking match length
update due to hashtab change


git-svn: trunk@3536
remotes/push_mirror/metadata
Török Edvin 18 years ago
parent c1856fcbce
commit b8a505eeda
  1. 8
      ChangeLog
  2. 4
      contrib/entitynorm/Makefile
  3. 106
      libclamav/encoding_aliases.h
  4. 38
      libclamav/entconv.c
  5. 3086
      libclamav/entitylist.h
  6. 17
      libclamav/hashtab.c
  7. 1
      libclamav/hashtab.h

@ -1,3 +1,11 @@
Wed Jan 23 21:52:06 EET 2008 (edwin)
------------------------------------
* libclamav/entconv.c: optimize u16_normalize, encode as hex entities
(Ȁ)
* contrib/entitynorm: update dependencies to rebuild on hashtab.c change
* libclamav/hashtab.[ch]: support keys with common prefix by checking match length
* libclamav/entitylist.h, encoding_aliases.h: update due to hashtab change
Wed Jan 23 17:53:10 CET 2008 (acab)
-----------------------------------
* libclamunrar_iface: realign structs (related to bb#474)

@ -6,10 +6,10 @@ all: entitylist.h encoding_aliases.h gentbl encname_chars.h
entities_parsed: entities entities/* entity_decl_parse.pl
$(PERL) entity_decl_parse.pl $</* | sort -u >$@
generate_entitylist: generate_entitylist.c ../../libclamav/hashtab.h
generate_entitylist: generate_entitylist.c ../../libclamav/hashtab.h ../../libclamav/hashtab.c ../../libclamav/others.c
$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@
generate_encoding_aliases: generate_encoding_aliases.c ../../libclamav/htmlnorm.h ../../libclamav/entconv.h ../../libclamav/cltypes.h ../../libclamav/hashtab.h ../../libclamav/hashtab.h
generate_encoding_aliases: generate_encoding_aliases.c ../../libclamav/hashtab.c ../../libclamav/others.c ../../libclamav/htmlnorm.h ../../libclamav/entconv.h ../../libclamav/cltypes.h ../../libclamav/hashtab.h ../../libclamav/hashtab.h
$(CC) -I. -DHAVE_CONFIG_H -DCLI_MEMFUNSONLY $< ../../libclamav/hashtab.c ../../libclamav/others.c -o $@
entitylist.h: generate_entitylist entities_parsed

@ -21,59 +21,59 @@
#include <hashtab.h>
static struct element aliases_htable_elements[] = {
{NULL, 0},
{NULL, 0},
{NULL, 0},
{"UTF8", 8},
{"ISO-10646/UTF-8", 8},
{NULL, 0},
{"UTF-16", 1},
{"UTF16LE", 7},
{NULL, 0},
{"UTF-32", 0},
{"10646-1:1993/UCS4", 0},
{NULL, 0},
{"UTF-16LE", 7},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{"UCS-4LE", 2},
{"UCS-4", 0},
{"UCS2", 1},
{"UTF-16BE", 6},
{NULL, 0},
{"UTF-32LE", 2},
{NULL, 0},
{"UTF16BE", 6},
{"UTF32", 0},
{"UTF-32BE", 3},
{"UTF32LE", 2},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{"UCS-4BE", 3},
{"ISO-10646/UCS2", 1},
{NULL, 0},
{"10646-1:1993", 0},
{"ISO-10646/UCS4", 0},
{"ISO-10646", 0},
{"UTF-8", 8},
{"UTF32BE", 3},
{"ISO-10646/UTF8", 8},
{NULL, 0},
{NULL, 0},
{"UCS4", 0},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{NULL, 0},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{"UTF8", 8, 4},
{"ISO-10646/UTF-8", 8, 15},
{NULL,0,0},
{"UTF-16", 1, 6},
{"UTF16LE", 7, 7},
{NULL,0,0},
{"UTF-32", 0, 6},
{"10646-1:1993/UCS4", 0, 17},
{NULL,0,0},
{"UTF-16LE", 7, 8},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{"UCS-4LE", 2, 7},
{"UCS-4", 0, 5},
{"UCS2", 1, 4},
{"UTF-16BE", 6, 8},
{NULL,0,0},
{"UTF-32LE", 2, 8},
{NULL,0,0},
{"UTF16BE", 6, 7},
{"UTF32", 0, 5},
{"UTF-32BE", 3, 8},
{"UTF32LE", 2, 7},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{"UCS-4BE", 3, 7},
{"ISO-10646/UCS2", 1, 14},
{NULL,0,0},
{"10646-1:1993", 0, 12},
{"ISO-10646/UCS4", 0, 14},
{"ISO-10646", 0, 9},
{"UTF-8", 8, 5},
{"UTF32BE", 3, 7},
{"ISO-10646/UTF8", 8, 14},
{NULL,0,0},
{NULL,0,0},
{"UCS4", 0, 4},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
{NULL,0,0},
};
const struct hashtable aliases_htable = {
aliases_htable_elements, 53, 25, 42

@ -67,38 +67,38 @@ typedef struct {
} * iconv_t;
#endif
static unsigned char tohex[] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
/* TODO: gcc refuses to inline because it consider call unlikely and code size grows */
static inline unsigned char* u16_normalize(uint16_t u16, unsigned char* out, const ssize_t limit)
{
assert(limit > 0 && "u16_normalize must be called with positive limit");
/* \0 is just ignored */
if(u16 > 0 && u16 < 0xff) {
if(!u16) {
return out;
}
if(u16 < 0xff) {
assert((uint8_t)u16 != 0);
*out++ = (uint8_t)u16;
}
else if (u16) {
} else {
size_t i;
/* normalize only >255 to speed up */
char buf[6];
/* &#65535; 8 bytes, buffer: 65535\0 6 bytes => max_num = sizeof(buf) + 2 */
const ssize_t max_num_length = sizeof(buf) + 2;
size_t i = sizeof(buf)-1;
if(limit <= max_num_length) {
if(limit <= 8) {
/* not enough space available */
return NULL;
}
/* inline version of
* out += snprintf(out, max_num_length, "&#%d;", u16) */
buf[i] = '\0';
while(u16 && i > 0 ) {
buf[--i] = '0' + (u16 % 10);
u16 /= 10;
* out += snprintf(out, max_num_length, "&#x%x;", u16) */
out[0] = '&';
out[1] = '#';
out[2] = 'x';
out[7] = ';';
for(i=6; i >= 3; --i) {
out[i] = tohex[u16 & 0xf];
u16 >>= 4;
}
*out++ = '&';
*out++ = '#';
while(buf[i]) *out++ = buf[i++];
*out++ = ';';
out += 8;
}
return out;
}

File diff suppressed because it is too large Load Diff

@ -221,7 +221,7 @@ struct element* hashtab_find(const struct hashtable *s,const char* key,const siz
PROFILE_FIND_NOTFOUND(s, tries);
return NULL; /* element not found, place is empty*/
}
else if(element->key != DELETED_KEY && strncmp(key, element->key,len)==0) {
else if(element->key != DELETED_KEY && len == element->len && strncmp(key, element->key,len)==0) {
PROFILE_FIND_FOUND(s, tries);
return element;/* found */
}
@ -250,7 +250,7 @@ static int hashtab_grow(struct hashtable *s)
size_t tries = 1;
PROFILE_CALC_HASH(s);
idx = hash((const unsigned char*)s->htable[i].key, strlen(s->htable[i].key), new_capacity);
idx = hash((const unsigned char*)s->htable[i].key, s->htable[i].len, new_capacity);
element = &htable[idx];
while(element->key && tries <= new_capacity) {
@ -307,9 +307,10 @@ int hashtab_insert(struct hashtable *s, const char* key, const size_t len, const
thekey = cli_malloc(len+1);
if(!thekey)
return CL_EMEM;
strncpy(thekey,(const char*)key,len+1);
strncpy(thekey, key, len+1);
element->key = thekey;
element->data = data;
element->len = len;
s->used++;
if(s->used > s->maxfill) {
cli_dbgmsg("hashtab.c:Growing hashtable %p, because it has exceeded maxfill, old size:%ld\n",(void*)s,s->capacity);
@ -320,10 +321,10 @@ int hashtab_insert(struct hashtable *s, const char* key, const size_t len, const
else if(element->key == DELETED_KEY) {
deleted_element = element;
}
else if(strncmp((const char*)key,(const char*)element->key,len)==0) {
else if(len == element->len && strncmp(key, element->key, len)==0) {
PROFILE_DATA_UPDATE(s, tries);
element->data = data;/* key found, update */
return 0;
return 0;
}
else {
idx = (idx + tries++) % s->capacity;
@ -383,11 +384,11 @@ int hashtab_generate_c(const struct hashtable *s,const char* name)
for(i=0; i < s->capacity; i++) {
const struct element* e = &s->htable[i];
if(!e->key)
printf("\t{NULL, 0},\n");
printf("\t{NULL,0,0},\n");
else if(e->key == DELETED_KEY)
printf("\t{DELETED_KEY,0},\n");
printf("\t{DELETED_KEY,0,0},\n");
else
printf("\t{\"%s\", %ld},\n", e->key, e->data);
printf("\t{\"%s\", %ld, %ld},\n", e->key, e->data, e->len);
}
printf("};\n");
printf("const struct hashtable %s = {\n",name);

@ -60,6 +60,7 @@ typedef struct {
struct element
{
const char* key;
size_t len;
element_data data;
};

Loading…
Cancel
Save