merged from not_for_0.92_tempbranch.

remove "all domain scan" feature from phishcheck
	(--no-phishing-restrictedscan). Nobody is using it.
	Don't care why an url is clean, just state it is clean.
	Various cleanups resulting from this.
	Prepare to introduce selective turn on of sub-features.


git-svn: trunk@3417
remotes/push_mirror/metadata
Török Edvin 18 years ago
commit 2fe76d5051
  1. 10
      ChangeLog
  2. 13
      clamd/server-th.c
  3. 4
      clamscan/manager.c
  4. 5
      docs/man/clamd.conf.5.in
  5. 6
      etc/clamd.conf
  6. 3
      libclamav/clamav.h
  7. 268
      libclamav/phishcheck.c
  8. 29
      libclamav/phishcheck.h

@ -1,3 +1,13 @@
Fri Dec 14 13:02:38 EET 2007 (edwin)
------------------------------------
* libclamav/phishcheck.[ch]:
merged from not_for_0.92_tempbranch.
remove "all domain scan" feature from phishcheck
(--no-phishing-restrictedscan). Nobody is using it.
Don't care why an url is clean, just state it is clean.
Various cleanups resulting from this.
Prepare to introduce selective turn on of sub-features.
Thu Dec 13 23:34:22 CET 2007 (tk)
---------------------------------
* libclamav: rewritten decompressor for mscompress - faster and more secure

@ -434,19 +434,6 @@ int acceptloop_th(int *socketds, int nsockets, struct cl_engine *engine, unsigne
if(cfgopt(copt,"PhishingScanURLs")->enabled) {
if(cfgopt(copt,"PhishingRestrictedScan")->enabled) {
/* we don't scan urls from all domains, just those listed in
* .pdb file. This is the safe default
*/
options |= CL_SCAN_PHISHING_DOMAINLIST;
} else {
/* This is a false positive prone option, since newsletters, etc.
* often contain links that will be classified as phishing attempts,
* even though the site they link to isn't a phish site.
*/
logg("Phishing: Checking all URLs, regardless of domain (FP prone).\n");
}
if(cfgopt(copt,"PhishingAlwaysBlockCloak")->enabled) {
options |= CL_SCAN_PHISHING_BLOCKCLOAK;
logg("Phishing: Always checking for cloaked urls\n");

@ -174,10 +174,6 @@ int scanmanager(const struct optstruct *opt)
if(!opt_check(opt,"no-phishing-scan-urls"))
dboptions |= CL_DB_PHISHING_URLS;
if(!opt_check(opt,"no-phishing-restrictedscan")) {
/* not scanning all domains, check only URLs with domains from .pdb */
options |= CL_SCAN_PHISHING_DOMAINLIST;
}
if(opt_check(opt,"phishing-ssl")) {
options |= CL_SCAN_PHISHING_BLOCKSSL;
}

@ -255,11 +255,6 @@ Scan URLs found in mails for phishing attempts using heuristics. This will class
.br
Default: yes
.TP
\fBPhishingRestrictedScan BOOL\fR
Use phishing detection only for domains listed in the .pdb database. It is not recommended to have this option turned off, because scanning of all domains may lead to many false positives!
.br
Default: yes
.TP
\fBPhishingAlwaysBlockSSLMismatch BOOL\fR
Always block SSL mismatches in URLs, even if the URL isn't in the database. This can lead to false positives.
.br

@ -237,12 +237,6 @@ LocalSocket /tmp/clamd.socket
# Default: yes
#PhishingScanURLs yes
# Use phishing detection only for domains listed in the .pdb database. It is
# not recommended to have this option turned off, because scanning of all
# domains may lead to many false positives!
# Default: yes
#PhishingRestrictedScan yes
# Always block SSL mismatches in URLs, even if the URL isn't in the database.
# This can lead to false positives.
#

@ -85,14 +85,13 @@ extern "C"
#define CL_SCAN_MAILURL 0x80
#define CL_SCAN_BLOCKMAX 0x100
#define CL_SCAN_ALGORITHMIC 0x200
#define CL_SCAN_PHISHING_DOMAINLIST 0x400
#define CL_SCAN_PHISHING_BLOCKSSL 0x800 /* ssl mismatches, not ssl by itself*/
#define CL_SCAN_PHISHING_BLOCKCLOAK 0x1000
#define CL_SCAN_ELF 0x2000
#define CL_SCAN_PDF 0x4000
/* recommended scan settings */
#define CL_SCAN_STDOPT (CL_SCAN_ARCHIVE | CL_SCAN_MAIL | CL_SCAN_OLE2 | CL_SCAN_HTML | CL_SCAN_PE | CL_SCAN_ALGORITHMIC | CL_SCAN_ELF | CL_SCAN_PHISHING_DOMAINLIST)
#define CL_SCAN_STDOPT (CL_SCAN_ARCHIVE | CL_SCAN_MAIL | CL_SCAN_OLE2 | CL_SCAN_HTML | CL_SCAN_PE | CL_SCAN_ALGORITHMIC | CL_SCAN_ELF)
/* aliases for backward compatibility */
#define CL_RAW CL_SCAN_RAW

@ -64,34 +64,30 @@
* Phishing design documentation,
(initially written at http://wiki.clamav.net/index.php/phishing_design as discussed with aCaB)
TODO:update this doc
*Warning*: if flag *--phish-scan-alldomains* (or equivalent clamd/clamav-milter config option) isn't given, then phishing scanning is done only for domains listed in daily.pdb.
If your daily.pdb is empty, then by default NO PHISHING is DONE, UNLESS you give the *--phish-scan-alldomains*
This is just a side-effect, daily.pdb is empty, because it isn't yet officialy in daily.cvd.
TODO: update this doc whenever behaviour changes
phishingCheck() determines if @displayedLink is a legit representation of @realLink.
Steps:
1. if _realLink_ *==* _displayLink_ => *CLEAN*
1. if _realLink_ == _displayLink_ => CLEAN
2. url cleanup (normalization)
- whitespace elimination
strip all spaces, and leading and trailing garbage.
When matching we have to keep in account whether we stripped any spaces or not.
See str_fixup_spaces.
- html entity conversion
- handle hex-encoded characters
- convert hostname to lowercase
- normalize \ to /
If there is a dot after the last space, then all spaces are replaced with dots,
otherwise spaces are stripped.
So both: 'Go to yahoo.com', and 'Go to e b a y . c o m', and 'Go to ebay. com' will work.
3. Matched the urls against a _whitelist_:
a _realLink_, _displayedLink_ pair is matched against the _whitelist_.
the _whitelist_ is a list of pairs of realLink, displayedLink. Any of the elements of those pairs can be a _regex_.
if url *is found* in _whitelist_ --> *CLEAN*
4. URL is looked up in the _domainlist_, unless disabled via flags (_--phish-scan-alldomains_).
4. URL is looked up in the _domainlist_
The _domainlist_ is a list of pairs of realLink, displayedLink (any of which can be regex).
This is the list of domains we do phishing detection for (such as ebay,paypal,chase,....)
We can't decide to stop processing here or not, so we just set a flag.
@ -120,7 +116,6 @@ Checks if realLink is http, but displayedLink is https or viceversa.
12. Numeric IP detection.
If url is a numeric IP, then -> phish.
Maybe we should do DNS lookup?
Maybe we should disable numericIP checks for --phish-scan-alldomains?
13. isURL(displayedLink).
Checks if displayedLink is really a url.
@ -227,21 +222,21 @@ static const char numeric_url_regex[] = "^ *"URI_numeric_fragmentaddress" *$";
/* generated by contrib/phishing/generate_tables.c */
static const short int hextable[256] = {
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
};
@ -352,13 +347,13 @@ static int build_regex(regex_t* preg,const char* regex,int nosub)
cli_dbgmsg("Phishcheck: Compiling regex: %s\n",regex);
rc = cli_regcomp(preg,regex,REG_EXTENDED|REG_ICASE|(nosub ? REG_NOSUB :0));
if(rc) {
#ifdef C_WINDOWS
cli_errmsg("Phishcheck: Error in compiling regex, disabling phishing checks\n");
#else
size_t buflen = cli_regerror(rc,preg,NULL,0);
char *errbuf = cli_malloc(buflen);
if(errbuf) {
cli_regerror(rc,preg,errbuf,buflen);
cli_errmsg("Phishcheck: Error in compiling regex:%s\nDisabling phishing checks\n",errbuf);
@ -682,7 +677,7 @@ cleanupURL(struct string *URL,struct string *pre_URL, int isReal)
char *begin = URL->data;
const char *end;
size_t len;
clear_msb(begin);
/*if(begin == NULL)
return;*/
@ -782,16 +777,13 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
if(hrefs->contents[i]) {
struct url_check urls;
enum phish_status rc;
urls.always_check_flags = DOMAINLIST_REQUIRED;/* required to work correctly */
urls.flags = strncmp((char*)hrefs->tag[i],href_text,href_text_len)? (CL_PHISH_ALL_CHECKS&~CHECK_SSL): CL_PHISH_ALL_CHECKS;
urls.link_type = 0;
if(!strncmp((char*)hrefs->tag[i],src_text,src_text_len)) {
if (!(urls.flags&CHECK_IMG_URL))
continue;
urls.link_type |= LINKTYPE_IMAGE;
urls.link_type |= LINKTYPE_IMAGE;
}
if (ctx->options&CL_SCAN_PHISHING_DOMAINLIST)
urls.flags |= DOMAINLIST_REQUIRED;
if (ctx->options & CL_SCAN_PHISHING_BLOCKSSL) {
urls.always_check_flags |= CHECK_SSL;
}
@ -823,20 +815,6 @@ int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
switch(rc)/*TODO: support flags from ctx->options,*/
{
case CL_PHISH_CLEAN:
case CL_PHISH_CLEANUP_OK:
case CL_PHISH_HOST_OK:
case CL_PHISH_DOMAIN_OK:
case CL_PHISH_REDIR_OK:
case CL_PHISH_HOST_REDIR_OK:
case CL_PHISH_DOMAIN_REDIR_OK:
case CL_PHISH_HOST_REVERSE_OK:
case CL_PHISH_DOMAIN_REVERSE_OK:
case CL_PHISH_WHITELISTED:
case CL_PHISH_HOST_WHITELISTED:
case CL_PHISH_MAILTO_OK:
case CL_PHISH_TEXTURL:
case CL_PHISH_HOST_NOT_LISTED:
case CL_PHISH_CLEAN_CID:
continue;
/* break;*/
case CL_PHISH_HEX_URL:
@ -935,7 +913,7 @@ int phishing_init(struct cl_engine* engine)
free_regex(&pchk->preg_cctld);
free(pchk);
engine->phishcheck = NULL;
return CL_EFORMAT;
return CL_EFORMAT;
}
url_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|("URI_fragmentaddress1,URI_fragmentaddress2,URI_fragmentaddress3")) *$");
if(build_regex(&pchk->preg,url_regex,1)) {
@ -992,7 +970,7 @@ void phishing_done(struct cl_engine* engine)
cli_dbgmsg("Freeing phishcheck struct\n");
free(pchk);
engine->phishcheck = NULL;
}
}
cli_dbgmsg("Phishcheck cleaned up\n");
}
@ -1029,7 +1007,7 @@ static enum phish_status cleanupURLs(struct url_check* urls)
if(!urls->displayLink.data || !urls->realLink.data)
return CL_PHISH_NODECISION;
if(!strcmp(urls->realLink.data,urls->displayLink.data))
return CL_PHISH_CLEANUP_OK;
return CL_PHISH_CLEAN;
}
return CL_PHISH_NODECISION;
}
@ -1046,33 +1024,31 @@ static int url_get_host(const struct phishcheck* pchk, struct url_check* url,str
if(!start || !end) {
string_assign_null(host);
}
else {
if(( rc = string_assign_dup(host,start,end) ))
return rc;
else if(( rc = string_assign_dup(host,start,end) )) {
return rc;
}
cli_dbgmsg("Phishcheck:host:%s\n", host->data);
if(!isReal) {
url->pre_fixup.host_start = start - URL;
url->pre_fixup.host_end = end - URL;
}
if(!host->data)
return CL_PHISH_CLEANUP_OK;
if(*phishy&REAL_IS_MAILTO)
return CL_PHISH_MAILTO_OK;
if(strchr(host->data,' ')) {
string_free(host);
return CL_PHISH_TEXTURL;
if(!host->data || (isReal && host->data[0]=='\0') || *phishy&REAL_IS_MAILTO || strchr(host->data,' ')) {
/* no host,
* link without domain, such as: href="/isapi.dll?...
* mailto:
* spaces in hostname
*/
return CL_PHISH_CLEAN;
}
if(url->flags&CHECK_CLOAKING && !cli_regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) {
/* uses a regex here, so that we don't accidentally block 0xacab.net style hosts */
string_free(host);
return CL_PHISH_HEX_URL;
}
if(isReal && host->data[0]=='\0')
return CL_PHISH_CLEAN;/* link without domain, such as: href="/isapi.dll?... */
if(isNumeric(host->data)) {
*phishy |= PHISHY_NUMERIC_IP;
}
if(!isReal) {
url->pre_fixup.host_start = start - URL;
url->pre_fixup.host_end = end - URL;
}
return CL_PHISH_NODECISION;
}
@ -1111,45 +1087,15 @@ static int whitelist_check(const struct cl_engine* engine,struct url_check* urls
return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly);
}
static int isPhishing(enum phish_status rc)
{
switch(rc) {
case CL_PHISH_CLEAN:
case CL_PHISH_CLEANUP_OK:
case CL_PHISH_WHITELISTED:
case CL_PHISH_HOST_WHITELISTED:
case CL_PHISH_HOST_OK:
case CL_PHISH_DOMAIN_OK:
case CL_PHISH_REDIR_OK:
case CL_PHISH_HOST_REDIR_OK:
case CL_PHISH_DOMAIN_REDIR_OK:
case CL_PHISH_HOST_REVERSE_OK:
case CL_PHISH_DOMAIN_REVERSE_OK:
case CL_PHISH_MAILTO_OK:
case CL_PHISH_TEXTURL:
case CL_PHISH_HOST_NOT_LISTED:
case CL_PHISH_CLEAN_CID:
return 0;
case CL_PHISH_HEX_URL:
case CL_PHISH_CLOAKED_NULL:
case CL_PHISH_SSL_SPOOF:
case CL_PHISH_CLOAKED_UIU:
case CL_PHISH_NUMERIC_IP:
case CL_PHISH_NOMATCH:
return 1;
default:
return 1;
}
}
/* urls can't contain null pointer, caller must ensure this */
static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls)
{
struct url_check host_url;
enum phish_status rc=CL_PHISH_NODECISION;
int rc = CL_PHISH_NODECISION;
int phishy=0;
const struct phishcheck* pchk = (const struct phishcheck*) engine->phishcheck;
if(!urls->realLink.data)
if(!urls->realLink.data || urls->displayLink.data[0]=='\0')
return CL_PHISH_CLEAN;
cli_dbgmsg("Phishcheck:Checking url %s->%s\n", urls->realLink.data,
@ -1159,59 +1105,43 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */
if((rc = cleanupURLs(urls))) {
if(isPhishing(rc))/* not allowed to decide this is phishing */
return CL_PHISH_CLEAN;
return rc;/* URLs identical after cleanup */
/* it can only return an error, or say its clean;
* it is not allowed to decide it is phishing */
return rc < 0 ? rc : CL_PHISH_CLEAN;
}
if(whitelist_check(engine,urls,0))
return CL_PHISH_WHITELISTED;/* if url is whitelist don't perform further checks */
if(whitelist_check(engine, urls, 0))
return CL_PHISH_CLEAN;/* if url is whitelisted don't perform further checks */
if((!isURL(pchk, urls->displayLink.data) || !isRealURL(pchk, urls->realLink.data) )&&
if((!isURL(pchk, urls->displayLink.data) || !isRealURL(pchk, urls->realLink.data) ) &&
( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) ||
!(phishy&PHISHY_NUMERIC_IP))) {
cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data);
return CL_PHISH_TEXTURL;
return CL_PHISH_CLEAN;
}
if(urls->flags&DOMAINLIST_REQUIRED && domainlist_match(engine,urls->realLink.data,urls->displayLink.data,NULL,0,&urls->flags))
if(domainlist_match(engine, urls->realLink.data, urls->displayLink.data, NULL, 0, &urls->flags)) {
phishy |= DOMAIN_LISTED;
else {
} else {
/* although entire url is not listed, the host might be,
* so defer phishing decisions till we know if host is listed*/
}
url_check_init(&host_url);
if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_DISPLAY,&phishy))) {
if((rc = url_get_host(pchk, urls, &host_url, DOMAIN_DISPLAY, &phishy))) {
free_if_needed(&host_url);
if(isPhishing(rc))
return CL_PHISH_CLEAN;
return rc;
return rc < 0 ? rc : CL_PHISH_CLEAN;
}
if(urls->flags&DOMAINLIST_REQUIRED) {
if(!(phishy&DOMAIN_LISTED)) {
if(domainlist_match(engine,host_url.displayLink.data,host_url.realLink.data,&urls->pre_fixup,1,&urls->flags))
phishy |= DOMAIN_LISTED;
else {
}
}
if(!(phishy&DOMAIN_LISTED) &&
!domainlist_match(engine,host_url.displayLink.data,host_url.realLink.data,&urls->pre_fixup,1,&urls->flags)) {
return CL_PHISH_CLEAN; /* domain not listed */
}
/* link type filtering must occur after last domainlist_match */
if(urls->link_type & LINKTYPE_IMAGE && !(urls->flags&CHECK_IMG_URL))
return CL_PHISH_HOST_NOT_LISTED;/* its listed, but this link type is filtered */
if(urls->flags & DOMAINLIST_REQUIRED && !(phishy & DOMAIN_LISTED) ) {
urls->flags &= urls->always_check_flags;
if(!urls->flags) {
free_if_needed(&host_url);
return CL_PHISH_HOST_NOT_LISTED;
}
}
return CL_PHISH_CLEAN;/* its listed, but this link type is filtered */
if(urls->flags&CHECK_CLOAKING) {
/*Checks if URL is cloaked.
@ -1227,63 +1157,41 @@ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url
}
}
if(urls->displayLink.data[0]=='\0') {
free_if_needed(&host_url);
return CL_PHISH_CLEAN;
}
if(urls->flags&CHECK_SSL && isSSL(urls->displayLink.data) && !isSSL(urls->realLink.data)) {
free_if_needed(&host_url);
return CL_PHISH_SSL_SPOOF;
}
if(!urls->flags&CHECK_CLOAKING && urls->flags & DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED) ) {
free_if_needed(&host_url);
return CL_PHISH_HOST_NOT_LISTED;
}
if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_REAL,&phishy)))
{
free_if_needed(&host_url);
return rc;
return rc < 0 ? rc : CL_PHISH_CLEAN;
}
if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED)) {
if(whitelist_check(engine,&host_url,1)) {
free_if_needed(&host_url);
return CL_PHISH_HOST_NOT_LISTED;
return CL_PHISH_CLEAN;
}
if(whitelist_check(engine,&host_url,1)) {
if(!strcmp(urls->realLink.data,urls->displayLink.data)) {
free_if_needed(&host_url);
return CL_PHISH_HOST_WHITELISTED;
return CL_PHISH_CLEAN;
}
if(urls->flags&HOST_SUFFICIENT) {
if(!strcmp(urls->realLink.data,urls->displayLink.data)) {
{
struct url_check domain_url;
url_check_init(&domain_url);
url_get_domain(pchk, &host_url,&domain_url);
if(!strcmp(domain_url.realLink.data,domain_url.displayLink.data)) {
free_if_needed(&host_url);
return CL_PHISH_HOST_OK;
}
if(urls->flags&DOMAIN_SUFFICIENT) {
struct url_check domain_url;
url_check_init(&domain_url);
url_get_domain(pchk, &host_url,&domain_url);
if(!strcmp(domain_url.realLink.data,domain_url.displayLink.data)) {
free_if_needed(&host_url);
free_if_needed(&domain_url);
return CL_PHISH_DOMAIN_OK;
}
free_if_needed(&domain_url);
return CL_PHISH_CLEAN;
}
free_if_needed(&domain_url);
}
free_if_needed(&host_url);
}/*HOST_SUFFICIENT*/
free_if_needed(&host_url);
/*we failed to find a reason why the 2 URLs are different, this is definitely phishing*/
if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED))
return CL_PHISH_HOST_NOT_LISTED;
return phishy_map(phishy,CL_PHISH_NOMATCH);
}
@ -1292,28 +1200,6 @@ static const char* phishing_ret_toString(enum phish_status rc)
switch(rc) {
case CL_PHISH_CLEAN:
return "Clean";
case CL_PHISH_CLEANUP_OK:
return "URLs match after cleanup";
case CL_PHISH_WHITELISTED:
return "URL is whitelisted";
case CL_PHISH_HOST_WHITELISTED:
return "host part of URL is whitelist";
case CL_PHISH_HOST_OK:
return "Hosts match";
case CL_PHISH_DOMAIN_OK:
return "Domains match";
case CL_PHISH_REDIR_OK:
return "After redirecting realURL, they match";
case CL_PHISH_HOST_REDIR_OK:
return "After redirecting realURL, hosts match";
case CL_PHISH_DOMAIN_REDIR_OK:
return "After redirecting the domains match";
case CL_PHISH_MAILTO_OK:
return "URL is mailto";
case CL_PHISH_NUMERIC_IP:
return "IP address encountered in hostname";
case CL_PHISH_TEXTURL:
return "Displayed link is not an URL, can't check if phishing or not";
case CL_PHISH_CLOAKED_NULL:
return "Link URL is cloaked (null byte %00)";
case CL_PHISH_CLOAKED_UIU:
@ -1323,10 +1209,6 @@ static const char* phishing_ret_toString(enum phish_status rc)
return "Visible links is SSL, real link is not";
case CL_PHISH_NOMATCH:
return "URLs are way too different";
case CL_PHISH_HOST_NOT_LISTED:
return "Host not listed in .pdb -> not checked";
case CL_PHISH_CLEAN_CID:
return "Embedded image in mail -> clean";
case CL_PHISH_HEX_URL:
return "Embedded hex urls";
default:

@ -23,30 +23,17 @@
#include "regex/regex.h"
#define CL_PHISH_BASE 100
enum phish_status {CL_PHISH_NODECISION=0,CL_PHISH_CLEAN=CL_PHISH_BASE, CL_PHISH_CLEANUP_OK,CL_PHISH_HOST_OK, CL_PHISH_DOMAIN_OK,
CL_PHISH_HOST_NOT_LISTED,
CL_PHISH_REDIR_OK, CL_PHISH_HOST_REDIR_OK, CL_PHISH_DOMAIN_REDIR_OK,
CL_PHISH_HOST_REVERSE_OK,CL_PHISH_DOMAIN_REVERSE_OK,
CL_PHISH_WHITELISTED,CL_PHISH_HOST_WHITELISTED,
CL_PHISH_CLEAN_CID,
CL_PHISH_TEXTURL, CL_PHISH_MAILTO_OK,
CL_PHISH_CLOAKED_UIU, CL_PHISH_NUMERIC_IP,CL_PHISH_HEX_URL,CL_PHISH_CLOAKED_NULL,CL_PHISH_SSL_SPOOF, CL_PHISH_NOMATCH};
#define HOST_SUFFICIENT 1
#define DOMAIN_SUFFICIENT (HOST_SUFFICIENT | 2)
#define DO_REVERSE_LOOKUP 4
#define CHECK_REDIR 8
#define CHECK_SSL 16
#define CHECK_CLOAKING 32
#define CLEANUP_URL 64
#define CHECK_DOMAIN_REVERSE 128
#define CHECK_IMG_URL 256
#define DOMAINLIST_REQUIRED 512
/* img checking disabled by default */
enum phish_status {CL_PHISH_NODECISION=0, CL_PHISH_CLEAN=CL_PHISH_BASE,
CL_PHISH_CLOAKED_UIU, CL_PHISH_NUMERIC_IP, CL_PHISH_HEX_URL, CL_PHISH_CLOAKED_NULL, CL_PHISH_SSL_SPOOF, CL_PHISH_NOMATCH};
#define CHECK_SSL 1
#define CHECK_CLOAKING 2
#define CLEANUP_URL 4
#define CHECK_IMG_URL 8
#define LINKTYPE_IMAGE 1
#define CL_PHISH_ALL_CHECKS (CLEANUP_URL|DOMAIN_SUFFICIENT|CHECK_SSL|CHECK_CLOAKING|CHECK_IMG_URL)
#define CL_PHISH_ALL_CHECKS (CLEANUP_URL|CHECK_SSL|CHECK_CLOAKING|CHECK_IMG_URL)
struct string {
int refcount;

Loading…
Cancel
Save