add comments, no functionality change.

git-svn: trunk@4954
0.95
Török Edvin 16 years ago
parent 6f60f808d9
commit b02aff65f5
  1. 4
      ChangeLog
  2. 20
      libclamav/phishcheck.c

@ -1,3 +1,7 @@
Mon Mar 16 15:53:52 EET 2009 (edwin)
------------------------------------
* libclamav/phishcheck.c: add comments, no functionality change.
Mon Mar 16 15:41:17 EET 2009 (edwin)
------------------------------------
* docs/phishsigs_howto.tex, libclamav/phishcheck.c: document URL

@ -1235,18 +1235,22 @@ int cli_url_canon(const char *inurl, size_t len, char *urlbuff, size_t dest_len,
urlbuff[dest_len] = urlbuff[dest_len+1] = urlbuff[dest_len+2] = '\0';
url = urlbuff;
/* canonicalize only real URLs, with a protocol */
host_begin = strchr(url, ':');
if(!host_begin)
return CL_PHISH_CLEAN;
++host_begin;
/* ignore username in URL */
p = strchr(host_begin, '@');
if (p)
host_begin = p+1;
url = host_begin;
/* repeatedly % unescape characters */
str_hex_to_char(&url, &urlend);
host_begin = url;
len = urlend - url;
/* skip to beginning of hostname */
while((host_begin < urlend) && *host_begin == '/') ++host_begin;
while(*host_begin == '.' && host_begin < urlend) ++host_begin;
@ -1255,11 +1259,13 @@ int cli_url_canon(const char *inurl, size_t len, char *urlbuff, size_t dest_len,
while (p < urlend) {
if (p+2 < urlend && *p == '/' && p[1] == '.' ) {
if (p[2] == '/') {
/* remove /./ */
if (p + 3 < urlend)
memmove(p+1, p+3, urlend - p - 3);
urlend -= 2;
}
else if (p[2] == '.' && (p[3] == '/' || p[3] == '\0') && last) {
/* remove /component/../ */
if (p+4 < urlend)
memmove(last+1, p+4, urlend - p - 4);
urlend -= 3 + (p - last);
@ -1276,6 +1282,7 @@ int cli_url_canon(const char *inurl, size_t len, char *urlbuff, size_t dest_len,
while (p < urlend && p+2 < url + dest_len) {
unsigned char c = *p;
if (c <= 32 || c >= 127 || c == '%' || c == '#') {
/* convert non-ascii characters back to % escaped */
const char hexchars[] = "0123456789ABCDEF";
memmove(p+3, p+1, urlend - p - 1);
*p++ = '%';
@ -1288,9 +1295,11 @@ int cli_url_canon(const char *inurl, size_t len, char *urlbuff, size_t dest_len,
*p = '\0';
urlend = p;
len = urlend - url;
/* determine end of hostname */
host_len = strcspn(host_begin, ":/?");
path_begin = host_begin + host_len;
if(host_len < len) {
/* url without path, use a single / */
memmove(path_begin + 2, path_begin + 1, len - host_len);
*path_begin++ = '/';
*path_begin++ = '\0';
@ -1299,6 +1308,7 @@ int cli_url_canon(const char *inurl, size_t len, char *urlbuff, size_t dest_len,
path_len = url + len - path_begin + 1;
p = strchr(path_begin, '#');
if (p) {
/* ignore anchor */
*p = '\0';
path_len = p - path_begin;
}
@ -1307,6 +1317,7 @@ int cli_url_canon(const char *inurl, size_t len, char *urlbuff, size_t dest_len,
path_len = 0;
*path = "";
}
/* lowercase entire URL */
str_make_lowercase(host_begin, host_len);
*host = host_begin;
*hostlen = host_len;
@ -1330,6 +1341,8 @@ static int url_hash_match(const struct regex_matcher *rlist, const char *inurl,
unsigned count;
if(!rlist || !rlist->sha256_hashes.bm_patterns) {
/* no hashes loaded -> don't waste time canonicalizing and
* looking up */
return CL_SUCCESS;
}
if(!inurl)
@ -1338,6 +1351,8 @@ static int url_hash_match(const struct regex_matcher *rlist, const char *inurl,
rc = cli_url_canon(inurl, len, urlbuff, sizeof(urlbuff), &host_begin, &host_len, &path_begin, &path_len);
if (rc == CL_PHISH_CLEAN)
return rc;
/* get last 5 components of hostname */
j=COMPONENTS;
component = strrchr(host_begin, '.');
while(component && j > 0) {
@ -1351,6 +1366,7 @@ static int url_hash_match(const struct regex_matcher *rlist, const char *inurl,
}
lp[j] = host_begin;
/* get first 5 components of path */
pp[0] = path_len;
if(path_len) {
pp[1] = strcspn(path_begin, "?");
@ -1376,6 +1392,7 @@ static int url_hash_match(const struct regex_matcher *rlist, const char *inurl,
rlist->hostkey_prefix.bm_patterns;
--ji;
assert(pp[ki] <= path_len);
/* lookup prefix/suffix hashes of URL */
rc = hash_match(rlist, lp[ji], host_begin + host_len - lp[ji] + 1, path_begin, pp[ki],
need_prefixmatch ? &prefix_matched : NULL);
if(rc) {
@ -1383,6 +1400,9 @@ static int url_hash_match(const struct regex_matcher *rlist, const char *inurl,
}
count++;
if (count == 2 && !prefix_matched && rlist->hostkey_prefix.bm_patterns) {
/* if hostkey is not matched, don't bother calculating
* hashes for other parts of the URL, they are not in the DB
*/
cli_dbgmsg("hostkey prefix not matched, short-circuiting lookups\n");
return CL_SUCCESS;
}

Loading…
Cancel
Save