Added call to match R-type PDB signatures in phishcheck.c:phishingCheck(). This makes R-type PDB signatures functional, and operate as described in the documentation.

pull/111/head
Micah Snyder (micasnyd) 7 years ago committed by Micah Snyder
parent 0109718800
commit 3390c26c99
  1. 66
      libclamav/phishcheck.c
  2. 16
      libclamav/regex_list.c

@ -665,8 +665,8 @@ cleanupURL(struct string* URL, struct string* pre_URL, int isReal)
str_replace(begin, end, '\\', '/');
/* find beginning of hostname, because:
* - we want to keep only protocol, host, and
* strip path & query parameter(s)
* - we want to keep only protocol, host, and
* strip path & query parameter(s)
* - we want to make hostname lowercase*/
host_begin = strchr(begin, ':');
while (host_begin && (host_begin < end) && (host_begin[1] == '/')) host_begin++;
@ -677,7 +677,7 @@ cleanupURL(struct string* URL, struct string* pre_URL, int isReal)
host_len = strcspn(host_begin, ":/?");
if (host_begin + host_len > end + 1) {
/* prevent hostname extending beyond end, it can happen
* if we have spaces at the end, we don't want those part of
* if we have spaces at the end, we don't want those part of
* the hostname */
host_len = end - host_begin + 1;
} else {
@ -1473,16 +1473,59 @@ static enum phish_status phishingCheck(const struct cl_engine* engine, struct ur
return CL_PHISH_CLEAN;
}
/*
* Whitelist X-type WDB signatures: X:RealURL:DisplayedURL
* Eg:
* X:.+\.benign\.com([/?].*)?:.+\.benign\.de
*/
if (whitelist_check(engine, urls, 0))
return CL_PHISH_CLEAN; /* if url is whitelisted don't perform further checks */
url_check_init(&host_url);
/*
* Match R-type PDB signatures: R:RealURL:DisplayedURL
* Eg:
* R:.+\.malicious\.net([/?].*)?:.+\.benign\.com
*/
if (domainlist_match(engine, urls->realLink.data, urls->displayLink.data, &urls->pre_fixup, 0)) {
phishy |= DOMAIN_LISTED;
}
/*
* Get copy of URLs stripped down to just the FQDN.
*/
url_check_init(&host_url);
if ((rc = url_get_host(urls, &host_url, DOMAIN_DISPLAY, &phishy))) {
free_if_needed(&host_url);
return rc < 0 ? rc : CL_PHISH_CLEAN;
}
if ((rc = url_get_host(urls, &host_url, DOMAIN_REAL, &phishy))) {
free_if_needed(&host_url);
return rc < 0 ? rc : CL_PHISH_CLEAN;
}
/*
* Exit early if the realLink and displayLink are the same.
*/
if (!strcmp(urls->realLink.data, urls->displayLink.data)) {
free_if_needed(&host_url);
return CL_PHISH_CLEAN;
}
/*
* Whitelist M-type WDB signatures: M:RealHostname:DisplayedHostname
* Eg:
* M:email.isbenign.com:benign.com
*/
if (whitelist_check(engine, &host_url, 1)) {
free_if_needed(&host_url);
return CL_PHISH_CLEAN;
}
/*
* Match H-type PDB signatures: H:DisplayedHostname
* Eg:
* H:malicious.com
*/
if (domainlist_match(engine, host_url.displayLink.data, host_url.realLink.data, &urls->pre_fixup, 1)) {
phishy |= DOMAIN_LISTED;
} else {
@ -1516,21 +1559,6 @@ static enum phish_status phishingCheck(const struct cl_engine* engine, struct ur
return CL_PHISH_CLEAN;
}
if ((rc = url_get_host(urls, &host_url, DOMAIN_REAL, &phishy))) {
free_if_needed(&host_url);
return rc < 0 ? rc : CL_PHISH_CLEAN;
}
if (whitelist_check(engine, &host_url, 1)) {
free_if_needed(&host_url);
return CL_PHISH_CLEAN;
}
if (!strcmp(urls->realLink.data, urls->displayLink.data)) {
free_if_needed(&host_url);
return CL_PHISH_CLEAN;
}
{
struct url_check domain_url;
url_check_init(&domain_url);

@ -119,7 +119,7 @@ static int validate_subdomain(const struct regex_list *regex, const struct pre_f
if (real_url[pos] != '.') {
/* we need to shift left, and insert a '.'
* we have an extra '.' at the beginning inserted by get_host to have room,
* orig_real_url has to be used here,
* orig_real_url has to be used here,
* because we want to overwrite that extra '.' */
size_t orig_real_len = strlen(orig_real_url);
cli_dbgmsg("No dot here:%s\n", real_url + pos);
@ -185,7 +185,11 @@ int regex_list_match(struct regex_matcher *matcher, char *real_url, const char *
strncpy(buffer, real_url, real_len);
buffer[real_len] = (!is_whitelist && hostOnly) ? '/' : ':';
/* For H-type PDB signatures, real_url is actually the DisplayedHostname.
RealHostname is not used. */
if (!hostOnly || is_whitelist) {
/* For all other PDB and WDB signatures concatenate Real:Displayed. */
strncpy(buffer + real_len + 1, display_url, display_len);
}
buffer[buffer_len - 1] = '/';
@ -413,20 +417,20 @@ int load_regex_matcher(struct cl_engine *engine, struct regex_matcher *matcher,
* Multiple lines of form, (empty lines are skipped):
* Flags RealURL DisplayedURL
* Where:
* Flags:
* Flags:
*
* .pdb files:
* R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing
* R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing
* flags that should be filtered.
* [i.e. phishcheck urls.flags that we don't want to be done for this particular host]
*
*
* .wdb files:
* X - full URL regex
* X - full URL regex
* Y - host-only regex
* M - host simple pattern
*
* If a line in the file doesn't conform to this format, loading fails
*
*
*/
while (cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
char *pattern;

Loading…
Cancel
Save