code cleanup

git-svn: trunk@2379
remotes/push_mirror/metadata
Tomasz Kojm 19 years ago
parent 5b68299b4c
commit 15b08fbbe2
  1. 6
      clamav-devel/ChangeLog
  2. 66
      clamav-devel/libclamav/phishcheck.c
  3. 8
      clamav-devel/libclamav/phishcheck.h
  4. 6
      clamav-devel/libclamav/readdb.c
  5. 34
      clamav-devel/libclamav/regex_list.c

@ -1,10 +1,14 @@
Sun Oct 15 01:49:55 CEST 2006 (tk)
----------------------------------
* libclamav: anti-phish code cleanup (Edwin)
Sat Oct 14 23:09:12 CEST 2006 (tk)
----------------------------------
* libclamav/dsig.c: new function cli_versigpss(): digital signature
verification based on RSASSA-PSS with 2048 bit RSA
key and SHA256 hash function
* libclamav/sha256.[ch]: new files (SHA256 implementation from mhash)
* sigtool/sigtool.c: genetate compressed and signed .cdiff files
* sigtool/sigtool.c: generate compressed and signed .cdiff files
* shared/cdiff.c: handle new .cdiff files
Fri Oct 13 15:42:43 BST 2006 (njh)

@ -19,6 +19,9 @@
* MA 02110-1301, USA.
*
* $Log: phishcheck.c,v $
* Revision 1.14 2006/10/14 23:52:01 tkojm
* code cleanup
*
* Revision 1.13 2006/10/10 23:51:49 tkojm
* apply patches for the anti-phish code from Edwin
*
@ -378,7 +381,8 @@ void string_free(struct string* str)
if(str->ref)/* don't free, this is a portion of another string */
str=str->ref;/* try to free that one*/
else {
free(str->data);
if(str->data)
free(str->data);
break;
}
}
@ -416,9 +420,11 @@ static inline void string_init_c(struct string* dest,char* data)
}
/* make a copy of the string between start -> end*/
void string_assign_dup(struct string* dest,const char* start,const char* end)
int string_assign_dup(struct string* dest,const char* start,const char* end)
{
char* ret = cli_malloc(end-start+1);
if(!ret)
return CL_EMEM;
strncpy(ret,start,end-start);
ret[end-start]='\0';
@ -426,6 +432,7 @@ void string_assign_dup(struct string* dest,const char* start,const char* end)
dest->data=ret;
dest->refcount=1;
dest->ref=NULL;
return CL_SUCCESS;
}
static inline void string_assign_null(struct string* dest)
@ -470,21 +477,21 @@ static int build_regex(regex_t* preg,const char* regex,int nosub)
cli_errmsg("Error in compiling regex:%s\nDisabling phishing checks\n",errbuf);
free(errbuf);
} else
cli_errmsg("Error in compiling regex, disabling phishing checks\n");
cli_errmsg("Error in compiling regex, disabling phishing checks. Additionaly an Out-of-memory error was encountered while generating a detailed error message\n");
#endif
return 1;
}
return 0;
return CL_SUCCESS;
}
/*static regex_t* host_preg = NULL;
static const char* host_regex="cid:.+|mailto:(.+)|([[:alpha:]]+://)?(([^:/?]+@)+([^:/?]+)([:/?].+)?|([^@:/?]+)([:/?].+)?)"; <- this is slower than the function below
*/
/* allocates memory */
void get_host(const struct phishcheck* s,struct string* dest,const char* URL,int isReal,int* phishy)
int get_host(const struct phishcheck* s,struct string* dest,const char* URL,int isReal,int* phishy)
{
const char mailto[] = "mailto:";
int ismailto = 0;
int rc,ismailto = 0;
const char* start;
const char* end=NULL;
if(!URL) {
@ -536,7 +543,10 @@ void get_host(const struct phishcheck* s,struct string* dest,const char* URL,int
if(realhost) {
const char* tld = strrchr(realhost,'.');
if(tld && isTLD(s,tld,tld-realhost-1))
rc = tld ? isTLD(s,tld,tld-realhost-1) : 0;
if(rc < 0)
return rc;
if(rc)
*phishy |= PHISHY_USERNAME_IN_URL;/* if the url contains a username that is there just to fool people,
like http://www.ebay.com@somevilplace.someevildomain.com/ */
start=realhost+1;/*skip the username*/
@ -553,7 +563,8 @@ void get_host(const struct phishcheck* s,struct string* dest,const char* URL,int
end = start + strlen(start);
}
string_assign_dup(dest,start,end);
if(rc = string_assign_dup(dest,start,end))
return rc;
}
int isCountryCode(const struct phishcheck* s,const char* str)
@ -568,11 +579,14 @@ int isTLD(const struct phishcheck* pchk,const char* str,int len)
else {
char* s = cli_malloc(len+1);
int rc;
if(!s)
return CL_EMEM;
strncpy(s,str,len);
s[len]='\0';
rc = !regexec(&pchk->preg_tld,s,0,NULL,0);
free(s);
return rc;
return rc ? 1 : 0;
}
}
@ -689,7 +703,8 @@ str_hex_to_char(char **begin, const char **end)
char *sbegin = *begin;
const char *str_end = *end;
massert(str_end>sbegin);
if(str_end <= sbegin)
return;
if(strlen(sbegin) <= 2)
return;
@ -725,11 +740,9 @@ str_strip(char **begin, const char **end, const char *what, size_t what_len)
const char *str_end_what;
size_t cmp_len = what_len;
if(begin == NULL)
if(begin == NULL || str_end <= sbegin)
return;
massert(str_end > sbegin);
/*if(str_end < (sbegin + what_len))
return;*/
if(strlen(sbegin) < what_len)
@ -831,7 +844,7 @@ str_fixup_spaces(char **begin, const char **end)
}
/* allocates memory */
void
int
cleanupURL(struct string *URL, int isReal)
{
char *begin = URL->data;
@ -866,6 +879,8 @@ cleanupURL(struct string *URL, int isReal)
else {
size_t host_len;
char* host_begin;
int rc;
str_replace(begin,end,'\\','/');
str_strip(&begin,&end,"\"",1);
str_strip(&begin,&end,lt,lt_len);
@ -880,7 +895,8 @@ cleanupURL(struct string *URL, int isReal)
/* convert %xx to real value */
str_hex_to_char(&begin,&end);
str_fixup_spaces(&begin,&end);
string_assign_dup(URL,begin,end+1);
if (rc = string_assign_dup(URL,begin,end+1))
return rc;
/*cli_dbgmsg("%p::%s\n",URL->data,URL->data);*/
}
}
@ -926,7 +942,11 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
blobClose(hrefs->contents[i]);
}*/
string_init_c(&urls.displayLink,(char*)blobGetData(hrefs->contents[i]));
massert(!urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]);
if (urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]) {
cli_warnmsg("urls.displayLink.data[...]");
return CL_CLEAN;
}
/* massert(strlen(urls.displayLink.data) < blobGetDataSize(hrefs->contents[i]));*/
urls.realLink.refcount=-1;
urls.displayLink.refcount=-1;/*don't free these, caller will free*/
@ -995,7 +1015,9 @@ static char* str_compose(const char* a,const char* b,const char* c)
const size_t b_len = strlen(b);
const size_t c_len = strlen(c);
const size_t r_len = a_len+b_len+c_len+1;
char* concated = malloc(r_len);
char* concated = cli_malloc(r_len);
if(!concated)
return NULL;
strncpy(concated,a,a_len);
strncpy(concated+a_len,b,b_len);
strncpy(concated+a_len+b_len,c,c_len);
@ -1026,6 +1048,8 @@ int phishing_init(struct cl_engine* engine)
}
else {
pchk = engine->phishcheck;
if(!pchk)
return CL_ENULLARG;
if(!pchk->is_disabled) {
/* already initialized */
return CL_SUCCESS;
@ -1123,7 +1147,7 @@ enum phish_status cleanupURLs(struct url_check* urls)
return CL_PHISH_NODECISION;
}
enum phish_status url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy)
int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy)
{
struct string* host = isReal ? &host_url->realLink : &host_url->displayLink;
get_host(pchk, host, isReal ? url->realLink.data : url->displayLink.data, isReal, phishy);
@ -1205,7 +1229,8 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check*
return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */
if((rc = cleanupURLs(urls))) {
massert(!isPhishing(rc));/* not allowed to decide this is phishing */
if(isPhishing(rc))/* not allowed to decide this is phishing */
return CL_PHISH_CLEAN;
return rc;/* URLs identical after cleanup */
}
@ -1223,7 +1248,8 @@ enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check*
if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_DISPLAY,&phishy))) {
free_if_needed(&host_url);
massert(!isPhishing(rc));
if(isPhishing(rc))
return CL_PHISH_CLEAN;
return rc;
}

@ -80,10 +80,10 @@ void url_check_init(struct url_check* urls);
void string_free(struct string* str);
void string_assign(struct string* dest,struct string* src);
void string_assign_c(struct string* dest,char* data);
void string_assign_dup(struct string* dest,const char* start,const char* end);
int string_assign_dup(struct string* dest,const char* start,const char* end);
void string_assign_ref(struct string* dest,struct string* ref,char* data);
void free_if_needed(struct url_check* url);
void get_host(const struct phishcheck* pchk,struct string* dest,const char* URL,int isReal,int* phishy);
int get_host(const struct phishcheck* pchk,struct string* dest,const char* URL,int isReal,int* phishy);
int isCountryCode(const struct phishcheck* s,const char* str);
int isTLD(const struct phishcheck* s,const char* str,int len);
void get_domain(const struct phishcheck* pchk,struct string* dest,struct string* host);
@ -91,12 +91,12 @@ int ip_reverse(struct url_check* urls,int isReal);
void reverse_lookup(struct url_check* url,int isReal);
int isNumeric(const char* host);
int isSSL(const char* URL);
void cleanupURL(struct string* URL,int isReal);
int cleanupURL(struct string* URL,int isReal);
void get_redirected_URL(struct string* URL);
int isURL(const struct phishcheck* pchk,const char* URL);
enum phish_status cleanupURLs(struct url_check* urls);
int isNumericURL(const struct phishcheck* pchk, const char* URL);
enum phish_status url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy);
int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy);
void url_get_domain(const struct phishcheck* pchk, struct url_check* url,struct url_check* domains);
enum phish_status phishy_map(int phishy,enum phish_status fallback);
int isEncoded(const char* url);

@ -499,7 +499,7 @@ static int cli_initengine(struct cl_engine **engine, unsigned int options)
}
#ifdef CL_EXPERIMENTAL
if(rc = phishing_init(*engine))
if((rc = phishing_init(*engine)))
return rc;
#endif
@ -619,7 +619,7 @@ static int cli_loadwdb(FILE *fd, struct cl_engine **engine, unsigned int options
}
if(!(*engine)->whitelist_matcher) {
if(ret = init_whitelist(*engine)) {
if((ret = init_whitelist(*engine))) {
phishing_done(*engine);
cl_free(*engine);
return ret;
@ -646,7 +646,7 @@ static int cli_loadpdb(FILE *fd, struct cl_engine **engine, unsigned int options
}
if(!(*engine)->domainlist_matcher) {
if(ret = init_domainlist(*engine)) {
if((ret = init_domainlist(*engine))) {
phishing_done(*engine);
cl_free(*engine);
return ret;

@ -19,6 +19,9 @@
* MA 02110-1301, USA.
*
* $Log: regex_list.c,v $
* Revision 1.10 2006/10/14 23:52:02 tkojm
* code cleanup
*
* Revision 1.9 2006/10/10 23:51:49 tkojm
* apply patches for the anti-phish code from Edwin
*
@ -570,7 +573,7 @@ int load_regex_matcher(struct regex_matcher* matcher,FILE* fd,unsigned int optio
pattern[0]='\0';
flags = buffer+1;
pattern++;
if((buffer[0] == 'R' && !is_whitelist) || (buffer[0] == 'X' && !is_whitelist)) {/*regex*/
if((buffer[0] == 'R' && !is_whitelist) || (buffer[0] == 'X' && is_whitelist)) {/*regex*/
if(( rc = add_pattern(matcher,(const unsigned char*)pattern,flags) ))
return rc==CL_EMEM ? CL_EMEM : CL_EMALFDB;
}
@ -740,13 +743,17 @@ void regex_list_done(struct regex_matcher* matcher)
regex_list_cleanup(matcher);
if(matcher->list_loaded) {
cli_ac_free(matcher->root_hosts);
free(matcher->root_hosts);
matcher->root_hosts=NULL;
if(matcher->root_hosts) {
cli_ac_free(matcher->root_hosts);
free(matcher->root_hosts);
matcher->root_hosts=NULL;
}
cli_ac_free(matcher->root_urls);
free(matcher->root_urls);
matcher->root_urls=NULL;
if(matcher->root_urls) {
cli_ac_free(matcher->root_urls);
free(matcher->root_urls);
matcher->root_urls=NULL;
}
matcher->list_built=0;
destroy_tree(matcher);
@ -951,7 +958,7 @@ static const unsigned char* find_regex_start(const unsigned char* pat)
if(token.type!=TOKEN_REGEX) {
last = tmp;
lasttype = token.type;
if(token.type==TOKEN_BRACKET)
if(token.type==TOKEN_BRACKET && token.u.bitmap)
free(token.u.bitmap);
if(token.type==TOKEN_ALT || token.type==TOKEN_PAR_OPEN) {
/* save this position on stack, succesfully parsed till here*/
@ -1233,6 +1240,8 @@ static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,co
if(charclass == std_class_cnt) {/*not a std char class*/
new->op = OP_CUSTOMCLASS;
new->u.children = cli_malloc(sizeof(new->u.children[0])*2);
if(!new->u.children)
return CL_EMEM;
new->u.bitmap[0] = token.u.bitmap;
new->u.bitmap[1] = NULL;
tree_node_insert_nonbin(node,new);
@ -1259,17 +1268,23 @@ static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,co
case TOKEN_REGEX:
case TOKEN_DONE: {
struct leaf_info* leaf=cli_malloc(sizeof(*leaf));
if(!leaf)
return CL_EMEM;
leaf->info=strdup(info);
if(token.type==TOKEN_REGEX) {
int rc;
struct tree_node* new;
regex_t* preg;
preg=cli_malloc(sizeof(*preg));
if(!preg)
return CL_EMEM;
rc = regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL));
leaf->preg=preg;
if(rc)
return rc;
new=cli_malloc(sizeof(*new));
if(!new)
return CL_EMEM;
new->op=OP_LEAF;
new->next=node;
new->alternatives=0;
@ -1511,7 +1526,8 @@ static void destroy_tree(struct regex_matcher* matcher)
destroy_tree_internal(matcher,matcher->root_regex);
while (matcher->node_stack.cnt) {
struct tree_node* node = stack_pop(&matcher->node_stack);
free(node);
if(node)
free(node);
}
}
#ifndef NDEBUG

Loading…
Cancel
Save