mirror of https://github.com/Cisco-Talos/clamav
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
449 lines
13 KiB
449 lines
13 KiB
/*
|
|
* Simple library to detect and validate SSN and Credit Card numbers.
|
|
*
|
|
* Copyright (C) 2007-2008 Sourcefire, Inc.
|
|
*
|
|
* Authors: Martin Roesch <roesch@sourcefire.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301, USA.
|
|
*/
|
|
|
|
#if HAVE_CONFIG_H
|
|
#include "clamav-config.h"
|
|
#endif
|
|
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
#include <stdarg.h>
|
|
#include <stdlib.h>
|
|
#include "dlp.h"
|
|
#include "others.h"
|
|
#include "str.h"
|
|
|
|
/* detection mode macros for the contains_* functions */
|
|
#define DETECT_MODE_DETECT 0
|
|
#define DETECT_MODE_COUNT 1
|
|
|
|
/* group number mapping is here */
|
|
/* http://www.socialsecurity.gov/employer/highgroup.txt */
|
|
/* here's a perl script to convert the raw data from the highgroup.txt
|
|
* file to the data set in ssn_max_group[]:
|
|
--
|
|
local $/;
|
|
my $i = <>;
|
|
my $count = 0;
|
|
while ($i =~ s/(\d{3}) (\d{2})//) {
|
|
print int($2) .", ";
|
|
if ($count == 18)
|
|
{
|
|
print "\n";
|
|
$count = 0;
|
|
}
|
|
else
|
|
{
|
|
$count++;
|
|
}
|
|
}
|
|
--
|
|
*
|
|
* run 'perl convert.pl < highgroup.txt' to generate the data
|
|
*
|
|
*/
|
|
|
|
/* MAX_AREA is the maximum assigned area number. This can be derived from
|
|
* the data in the highgroup.txt file by looking at the last area->group
|
|
* mapping from that file.
|
|
*/
|
|
#define MAX_AREA 772
|
|
|
|
/* array of max group numbers for a given area number */
|
|
static int ssn_max_group[MAX_AREA+1] = { 0,
|
|
6, 6, 4, 8, 8, 8, 6, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
|
|
90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 88, 88, 88, 88, 72, 72, 72, 72,
|
|
70, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96, 96, 96, 96, 96, 96, 96, 96,
|
|
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
|
|
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
|
|
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
|
|
96, 96, 96, 96, 96, 96, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,
|
|
94, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 17, 17, 17, 17, 17, 17,
|
|
17, 17, 17, 17, 17, 17, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
|
|
84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 82, 82, 82, 82, 82, 82, 82, 82,
|
|
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82,
|
|
82, 82, 79, 79, 79, 79, 79, 79, 79, 79, 77, 6, 4, 99, 99, 99, 99, 99, 99,
|
|
99, 99, 99, 53, 53, 53, 53, 53, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
|
|
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
|
|
99, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
|
13, 13, 13, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 33, 33,
|
|
31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 6, 6, 6, 6, 6, 6,
|
|
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
|
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
35, 35, 35, 35, 35, 35, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
|
|
33, 33, 33, 33, 33, 33, 29, 29, 29, 29, 29, 29, 29, 29, 27, 27, 27, 27, 27,
|
|
67, 67, 67, 67, 67, 67, 67, 67, 99, 99, 99, 99, 99, 99, 99, 99, 63, 61, 61,
|
|
61, 61, 61, 61, 61, 61, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
|
|
99, 99, 23, 23, 23, 23, 23, 23, 23, 21, 21, 99, 99, 99, 99, 99, 99, 99, 99,
|
|
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 51, 51, 51, 51, 49, 49, 49, 49,
|
|
49, 49, 37, 37, 37, 37, 37, 37, 37, 37, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
|
25, 25, 25, 23, 23, 23, 33, 33, 41, 39, 53, 51, 51, 51, 27, 27, 27, 27, 27,
|
|
27, 27, 45, 43, 79, 77, 55, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 63, 63,
|
|
63, 61, 61, 61, 61, 61, 61, 75, 73, 73, 73, 73, 99, 99, 99, 99, 99, 99, 99,
|
|
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
|
|
99, 99, 99, 51, 99, 99, 45, 45, 43, 37, 99, 99, 99, 99, 99, 61, 99, 3, 99,
|
|
99, 99, 99, 99, 99, 99, 84, 84, 84, 84, 99, 99, 67, 67, 65, 65, 65, 65, 65,
|
|
65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 11,
|
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96,
|
|
96, 44, 44, 46, 46, 46, 44, 28, 26, 26, 26, 26, 16, 16, 16, 14, 14, 14, 14,
|
|
36, 34, 34, 34, 34, 34, 34, 34, 34, 14, 14, 12, 12, 90, 14, 14, 14, 14, 12,
|
|
12, 12, 12, 12, 12, 9, 9, 7, 7, 7, 7, 7, 7, 7, 18, 18, 18, 18, 18,
|
|
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
|
28, 18, 18, 10, 14, 10, 10, 10, 10, 10, 9, 9, 3, 1, 5, 5, 5, 5, 5,
|
|
5, 3, 3, 82, 82, 66, 66, 64, 64, 64, 64, 64
|
|
};
|
|
|
|
|
|
|
|
int dlp_is_valid_cc(const unsigned char *buffer, int length)
|
|
{
|
|
int even = 1;
|
|
int sum = 0;
|
|
int i = 0;
|
|
int val = 0;
|
|
int digits = 0;
|
|
char cc_digits[20];
|
|
|
|
if(buffer == NULL || length < 13)
|
|
return 0;
|
|
/* if the first digit is greater than 6 it isn't one of the major
|
|
* credit cards
|
|
* reference => http://www.beachnet.com/~hstiles/cardtype.html
|
|
*/
|
|
if(!isdigit(buffer[0]) || buffer[0] > '6')
|
|
return 0;
|
|
|
|
if(length > 19)
|
|
length = 19;
|
|
|
|
for(i = 0; i < length; i++)
|
|
{
|
|
if(isdigit(buffer[i]) == 0)
|
|
{
|
|
if(isspace(buffer[i]))
|
|
continue;
|
|
else
|
|
break;
|
|
}
|
|
|
|
cc_digits[digits] = buffer[i];
|
|
digits++;
|
|
val = buffer[i] - '0';
|
|
|
|
if(even)
|
|
{
|
|
if((val *= 2) > 9) val = (val - 10) + 1;
|
|
}
|
|
|
|
even = !even;
|
|
sum += val;
|
|
}
|
|
cc_digits[digits] = 0;
|
|
if(i < length && isdigit(buffer[i]))
|
|
return 0;
|
|
|
|
if((sum % 10 != 0) || (digits < 13))
|
|
return 0;
|
|
|
|
if(digits == 13) /* VISA */
|
|
{
|
|
if(cc_digits[0] == '4') {
|
|
cli_dbgmsg("dlp_is_valid_cc: VISA [1] (%s)\n", cc_digits);
|
|
return 1;
|
|
}
|
|
}
|
|
else if(digits == 14) /* Diners Club */
|
|
{
|
|
if(cc_digits[0] == '3' && (cc_digits[1] == '6' || cc_digits[1] == '8'))
|
|
{
|
|
cli_dbgmsg("dlp_is_valid_cc: Diners Club [1] (%s)\n", cc_digits);
|
|
return 1;
|
|
}
|
|
else if(cc_digits[0] == '3' && cc_digits[1] == '0')
|
|
{
|
|
val = cc_digits[2] - '0';
|
|
if(val >= 0 && val <= 5) {
|
|
cli_dbgmsg("dlp_is_valid_cc: Diners Club [2] (%s)\n", cc_digits);
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
else if(digits == 15)
|
|
{
|
|
if(cc_digits[0] == '3' && (cc_digits[1] == '4' || cc_digits[1] == '7')) /*AMEX*/
|
|
{
|
|
cli_dbgmsg("dlp_is_valid_cc: AMEX (%s)\n", cc_digits);
|
|
return 1;
|
|
}
|
|
else if(!strncmp(cc_digits, "2131", 4) || !strncmp(cc_digits, "1800", 4))
|
|
{ /* JCB */
|
|
cli_dbgmsg("dlp_is_valid_cc: JCB [1] (%s)\n", cc_digits);
|
|
return 1;
|
|
}
|
|
}
|
|
else if(digits == 16)
|
|
{
|
|
if(cc_digits[0] == '3') /* JCB */
|
|
{
|
|
cli_dbgmsg("dlp_is_valid_cc: JCB [2] (%s)\n", cc_digits);
|
|
return 1;
|
|
}
|
|
else if(cc_digits[0] == '4') /* VISA */
|
|
{
|
|
cli_dbgmsg("dlp_is_valid_cc: VISA [2] (%s)\n", cc_digits);
|
|
return 1;
|
|
}
|
|
else if(cc_digits[0] == '5') /* MASTERCARD */
|
|
{
|
|
val = cc_digits[1] - '0';
|
|
if(val >= 1 && val <= 5) {
|
|
cli_dbgmsg("dlp_is_valid_cc: MASTERCARD (%s)\n", cc_digits);
|
|
return 1;
|
|
}
|
|
}
|
|
else if(!strncmp(cc_digits, "6011", 4)) /* Discover */
|
|
{
|
|
cli_dbgmsg("dlp_is_valid_cc: Discover (%s)\n", cc_digits);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int contains_cc(const unsigned char *buffer, int length, int detmode)
|
|
{
|
|
const unsigned char *idx;
|
|
const unsigned char *end;
|
|
int count = 0;
|
|
|
|
if(buffer == NULL || length < 13)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
end = buffer + length;
|
|
idx = buffer;
|
|
while(idx < end)
|
|
{
|
|
if(isdigit(*idx))
|
|
{
|
|
if((idx == buffer || !isdigit(idx[-1])) && dlp_is_valid_cc(idx, length - (idx - buffer)) == 1)
|
|
{
|
|
if(detmode == DETECT_MODE_DETECT)
|
|
return 1;
|
|
else
|
|
{
|
|
count++;
|
|
/* if we got a valid match we should increment the idx ptr
|
|
* to gain a little performance
|
|
*/
|
|
idx += (length > 15?15:(length-1));
|
|
}
|
|
}
|
|
}
|
|
idx++;
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
int dlp_get_cc_count(const unsigned char *buffer, int length)
|
|
{
|
|
return contains_cc(buffer, length, DETECT_MODE_COUNT);
|
|
}
|
|
|
|
int dlp_has_cc(const unsigned char *buffer, int length)
|
|
{
|
|
return contains_cc(buffer, length, DETECT_MODE_DETECT);
|
|
}
|
|
|
|
int dlp_is_valid_ssn(const unsigned char *buffer, int length, int format)
|
|
{
|
|
int area_number;
|
|
int group_number;
|
|
int serial_number;
|
|
int minlength;
|
|
int retval = 1;
|
|
char numbuf[12];
|
|
|
|
if(buffer == NULL)
|
|
return 0;
|
|
|
|
minlength = (format==SSN_FORMAT_HYPHENS?11:9);
|
|
|
|
if(length < minlength)
|
|
return 0;
|
|
|
|
if((length > minlength) && isdigit(buffer[minlength]))
|
|
return 0;
|
|
|
|
strncpy(numbuf, (const char*)buffer, minlength);
|
|
numbuf[minlength] = 0;
|
|
|
|
/* sscanf parses and (basically) validates the string for us */
|
|
switch(format)
|
|
{
|
|
case SSN_FORMAT_HYPHENS:
|
|
if(numbuf[3] != '-' || numbuf[6] != '-')
|
|
return 0;
|
|
|
|
if(sscanf((const char *) numbuf,
|
|
"%3d-%2d-%4d",
|
|
&area_number,
|
|
&group_number,
|
|
&serial_number) != 3)
|
|
{
|
|
return 0;
|
|
}
|
|
break;
|
|
case SSN_FORMAT_STRIPPED:
|
|
if(!cli_isnumber(numbuf))
|
|
return 0;
|
|
|
|
if(sscanf((const char *) numbuf,
|
|
"%3d%2d%4d",
|
|
&area_number,
|
|
&group_number,
|
|
&serial_number) != 3)
|
|
{
|
|
return 0;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* start validating */
|
|
/* validation data taken from
|
|
* http://en.wikipedia.org/wiki/Social_Security_number_%28United_States%29
|
|
*/
|
|
if(area_number > MAX_AREA ||
|
|
area_number == 666 ||
|
|
area_number <= 0 ||
|
|
group_number <= 0 ||
|
|
group_number > 99 ||
|
|
serial_number <= 0 ||
|
|
serial_number > 9999)
|
|
retval = 0;
|
|
|
|
if(area_number == 987 && group_number == 65)
|
|
{
|
|
if(serial_number >= 4320 && serial_number <= 4329)
|
|
retval = 0;
|
|
}
|
|
|
|
if(group_number > ssn_max_group[area_number])
|
|
retval = 0;
|
|
|
|
if(retval)
|
|
cli_dbgmsg("dlp_is_valid_ssn: SSN_%s: %s\n", format == SSN_FORMAT_HYPHENS ? "HYPHENS" : "STRIPPED", numbuf);
|
|
|
|
return retval;
|
|
}
|
|
|
|
static int contains_ssn(const unsigned char *buffer, int length, int format, int detmode)
|
|
{
|
|
const unsigned char *idx;
|
|
const unsigned char *end;
|
|
int count = 0;
|
|
|
|
if(buffer == NULL || length < 11)
|
|
return 0;
|
|
|
|
end = buffer + length;
|
|
idx = buffer;
|
|
while(idx < end)
|
|
{
|
|
if(isdigit(*idx))
|
|
{
|
|
/* check for area number and the first hyphen */
|
|
if((idx == buffer || !isdigit(idx[-1])) && dlp_is_valid_ssn(idx, length - (idx - buffer), format) == 1)
|
|
{
|
|
if(detmode == DETECT_MODE_COUNT)
|
|
{
|
|
count++;
|
|
/* hop over the matched bytes if we found an SSN */
|
|
idx += ((format == SSN_FORMAT_HYPHENS)?11:9);
|
|
}
|
|
else
|
|
{
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
idx++;
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
int dlp_get_stripped_ssn_count(const unsigned char *buffer, int length)
|
|
{
|
|
return contains_ssn(buffer,
|
|
length,
|
|
SSN_FORMAT_STRIPPED,
|
|
DETECT_MODE_COUNT);
|
|
}
|
|
|
|
int dlp_get_normal_ssn_count(const unsigned char *buffer, int length)
|
|
{
|
|
return contains_ssn(buffer,
|
|
length,
|
|
SSN_FORMAT_HYPHENS,
|
|
DETECT_MODE_COUNT);
|
|
}
|
|
|
|
int dlp_get_ssn_count(const unsigned char *buffer, int length)
|
|
{
|
|
/* this will suck for performance but will find SSNs in either
|
|
* format
|
|
*/
|
|
return (dlp_get_stripped_ssn_count(buffer, length) + dlp_get_normal_ssn_count(buffer, length));
|
|
}
|
|
|
|
int dlp_has_ssn(const unsigned char *buffer, int length)
|
|
{
|
|
return (contains_ssn(buffer,
|
|
length,
|
|
SSN_FORMAT_HYPHENS,
|
|
DETECT_MODE_DETECT)
|
|
| contains_ssn(buffer,
|
|
length,
|
|
SSN_FORMAT_STRIPPED,
|
|
DETECT_MODE_DETECT));
|
|
}
|
|
|
|
int dlp_has_stripped_ssn(const unsigned char *buffer, int length)
|
|
{
|
|
return contains_ssn(buffer,
|
|
length,
|
|
SSN_FORMAT_STRIPPED,
|
|
DETECT_MODE_DETECT);
|
|
}
|
|
|
|
int dlp_has_normal_ssn(const unsigned char *buffer, int length)
|
|
{
|
|
return contains_ssn(buffer,
|
|
length,
|
|
SSN_FORMAT_HYPHENS,
|
|
DETECT_MODE_DETECT);
|
|
}
|
|
|