ClamAV is an open source (GPLv2) anti-virus toolkit.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
clamav/libclamav/readdb.h

216 lines
7.8 KiB

/*
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2007-2013 Sourcefire, Inc.
* Copyright (C) 2002-2007 Tomasz Kojm <tkojm@clamav.net>
*
* Authors: Tomasz Kojm
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef __READDB_H
#define __READDB_H
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include "clamav.h"
#include "str.h"
#include "cvd.h"
#include "matcher.h"
#define MAX_LDB_SUBSIGS 64
struct cli_matcher;
/* NOTE: We don't include .info in CLI_DBEXT because they are only used for
* one specific purpose - verifying the contents of database container files.
* This list is geared towards file extensions of files that users can provide
* to ClamAV directly. */
#ifdef HAVE_YARA
#define CLI_DBEXT(ext) \
( \
cli_strbcasestr(ext, ".db") || \
cli_strbcasestr(ext, ".hdb") || \
cli_strbcasestr(ext, ".hdu") || \
cli_strbcasestr(ext, ".fp") || \
cli_strbcasestr(ext, ".mdb") || \
cli_strbcasestr(ext, ".mdu") || \
cli_strbcasestr(ext, ".hsb") || \
cli_strbcasestr(ext, ".hsu") || \
cli_strbcasestr(ext, ".sfp") || \
cli_strbcasestr(ext, ".msb") || \
cli_strbcasestr(ext, ".msu") || \
cli_strbcasestr(ext, ".ndb") || \
cli_strbcasestr(ext, ".ndu") || \
cli_strbcasestr(ext, ".ldb") || \
cli_strbcasestr(ext, ".ldu") || \
cli_strbcasestr(ext, ".sdb") || \
cli_strbcasestr(ext, ".zmd") || \
cli_strbcasestr(ext, ".rmd") || \
cli_strbcasestr(ext, ".pdb") || \
cli_strbcasestr(ext, ".gdb") || \
cli_strbcasestr(ext, ".wdb") || \
cli_strbcasestr(ext, ".cbc") || \
cli_strbcasestr(ext, ".ftm") || \
cli_strbcasestr(ext, ".cfg") || \
cli_strbcasestr(ext, ".cvd") || \
cli_strbcasestr(ext, ".cld") || \
cli_strbcasestr(ext, ".cud") || \
cli_strbcasestr(ext, ".cdb") || \
cli_strbcasestr(ext, ".cat") || \
cli_strbcasestr(ext, ".crb") || \
cli_strbcasestr(ext, ".idb") || \
cli_strbcasestr(ext, ".ioc") || \
cli_strbcasestr(ext, ".yar") || \
cli_strbcasestr(ext, ".yara") || \
PE parsing code improvements, db loading bug fixes Consolidate the PE parsing code into one function. I tried to preserve all existing functionality from the previous, distinct implementations to a large extent (with the exceptions mentioned below). If I noticed potential bugs/improvements, I added a TODO statement about those so that they can be fixed in a smaller commit later. Also, there are more TODOs in places where I'm not entirely sure why certain actions are performed - more research is needed for these. I'm submitting a pull request now so that regression testing can be done, and because merging what I have thus far now will likely have fewer conflicts than if I try to merge later PE parsing code improvements: - PEs without all 16 data directories are parsed more appropriately now - Added lots more debug statements Also: - Allow MAX_BC and MAX_TRACKED_PCRE to be specified via CFLAGS When doing performance testing with the latest CVD, MAX_BC and MAX_TRACKED_PCRE need to be raised to track all the events. Allow these to be specified via CFLAGS by not redefining them if they are already defined - Fix an issue preventing wildcard sizes in .MDB/.MSB rules I'm not sure what the original intent of the check I removed was, but it prevents using wildcard sizes in .MDB/.MSB rules. AFAICT these wildcard sizes should be handled appropriately by the MD5 section hash computation code, so I don't think a check on that is needed. - Fix several issues related to db loading - .imp files will now get loaded if they exist in a directory passed via clamscan's '-d' flag - .pwdb files will now get loaded if they exist in a directory passed via clamscan's '-d' flag even when compiling without yara support - Changes to .imp, .ign, and .ign2 files will now be reflected in calls to cl_statinidir and cl_statchkdir (and also .pwdb files, even when compiling without yara support) - The contents of .sfp files won't be included in some of the signature counts, and the contents of .cud files will be - Any local.gdb files will no longer be loaded twice - For .imp files, you are no longer required to specify a minimum flevel for wildcard rules, since this isn't needed
6 years ago
cli_strbcasestr(ext, ".pwdb") || \
cli_strbcasestr(ext, ".ign") || \
cli_strbcasestr(ext, ".ign2") || \
cli_strbcasestr(ext, ".imp"))
#else
6 years ago
#define CLI_DBEXT(ext) \
( \
cli_strbcasestr(ext, ".db") || \
cli_strbcasestr(ext, ".hdb") || \
cli_strbcasestr(ext, ".hdu") || \
cli_strbcasestr(ext, ".fp") || \
cli_strbcasestr(ext, ".mdb") || \
cli_strbcasestr(ext, ".mdu") || \
cli_strbcasestr(ext, ".hsb") || \
cli_strbcasestr(ext, ".hsu") || \
cli_strbcasestr(ext, ".sfp") || \
cli_strbcasestr(ext, ".msb") || \
cli_strbcasestr(ext, ".msu") || \
cli_strbcasestr(ext, ".ndb") || \
cli_strbcasestr(ext, ".ndu") || \
cli_strbcasestr(ext, ".ldb") || \
cli_strbcasestr(ext, ".ldu") || \
cli_strbcasestr(ext, ".sdb") || \
cli_strbcasestr(ext, ".zmd") || \
cli_strbcasestr(ext, ".rmd") || \
cli_strbcasestr(ext, ".pdb") || \
cli_strbcasestr(ext, ".gdb") || \
cli_strbcasestr(ext, ".wdb") || \
cli_strbcasestr(ext, ".cbc") || \
cli_strbcasestr(ext, ".ftm") || \
cli_strbcasestr(ext, ".cfg") || \
cli_strbcasestr(ext, ".cvd") || \
cli_strbcasestr(ext, ".cld") || \
cli_strbcasestr(ext, ".cud") || \
cli_strbcasestr(ext, ".cdb") || \
cli_strbcasestr(ext, ".cat") || \
cli_strbcasestr(ext, ".crb") || \
cli_strbcasestr(ext, ".idb") || \
PE parsing code improvements, db loading bug fixes Consolidate the PE parsing code into one function. I tried to preserve all existing functionality from the previous, distinct implementations to a large extent (with the exceptions mentioned below). If I noticed potential bugs/improvements, I added a TODO statement about those so that they can be fixed in a smaller commit later. Also, there are more TODOs in places where I'm not entirely sure why certain actions are performed - more research is needed for these. I'm submitting a pull request now so that regression testing can be done, and because merging what I have thus far now will likely have fewer conflicts than if I try to merge later PE parsing code improvements: - PEs without all 16 data directories are parsed more appropriately now - Added lots more debug statements Also: - Allow MAX_BC and MAX_TRACKED_PCRE to be specified via CFLAGS When doing performance testing with the latest CVD, MAX_BC and MAX_TRACKED_PCRE need to be raised to track all the events. Allow these to be specified via CFLAGS by not redefining them if they are already defined - Fix an issue preventing wildcard sizes in .MDB/.MSB rules I'm not sure what the original intent of the check I removed was, but it prevents using wildcard sizes in .MDB/.MSB rules. AFAICT these wildcard sizes should be handled appropriately by the MD5 section hash computation code, so I don't think a check on that is needed. - Fix several issues related to db loading - .imp files will now get loaded if they exist in a directory passed via clamscan's '-d' flag - .pwdb files will now get loaded if they exist in a directory passed via clamscan's '-d' flag even when compiling without yara support - Changes to .imp, .ign, and .ign2 files will now be reflected in calls to cl_statinidir and cl_statchkdir (and also .pwdb files, even when compiling without yara support) - The contents of .sfp files won't be included in some of the signature counts, and the contents of .cud files will be - Any local.gdb files will no longer be loaded twice - For .imp files, you are no longer required to specify a minimum flevel for wildcard rules, since this isn't needed
6 years ago
cli_strbcasestr(ext, ".ioc") || \
cli_strbcasestr(ext, ".pwdb") || \
cli_strbcasestr(ext, ".ign") || \
cli_strbcasestr(ext, ".ign2") || \
cli_strbcasestr(ext, ".imp"))
#endif
char *cli_virname(const char *virname, unsigned int official);
/**
* @brief Parse & load a body-based pattern for a logical signature (LDB or Yara)
* that may have subsignature modifiers at the end.
*
* This function creates a new pattern with the required modification before calling
* cli_add_content_match_pattern() to load the content patterns to match with the AC matcher or BM matcher.
*
* For more info about subsignature modifiers: https://docs.clamav.net/manual/Signatures/LogicalSignatures.html#subsignature-modifiers
*
* @param root
* @param virname
* @param hexsig
* @param sigopts
* @param rtype
* @param type
* @param offset
* @param target
* @param lsigid
* @param options
* @return cl_error_t
*/
cl_error_t cli_sigopts_handler(struct cli_matcher *root, const char *virname, const char *hexsig,
uint8_t sigopts, uint16_t rtype, uint16_t type,
const char *offset, uint8_t target, const uint32_t *lsigid, unsigned int options);
/**
* @brief Parse body-based patterns that DO NOT have subsignature modifiers.
*
* This function will split up body-based signature patterns that have {n-m} and * wildcards
* into multiple subsignatures, adding each.
*
* @param root
* @param virname
* @param hexsig
* @param sigopts
* @param rtype
* @param type
* @param offset
* @param target
* @param lsigid
* @param options
* @return cl_error_t
*/
cl_error_t cli_add_content_match_pattern(struct cli_matcher *root, const char *virname, const char *hexsig,
uint8_t sigopts, uint16_t rtype, uint16_t type,
const char *offset, uint8_t target, const uint32_t *lsigid, unsigned int options);
/**
* @brief Parse a subsignature from a logical signature.
*
* Called once for each subsignature in a logical signature.
* Not for use in other signature types (ndb, yara, etc).
*
* This function determines what type of subsiganture it is, whether that's:
* - a macro subsignature
* - a pcre subsignature
* - a byte compare subsignature
* - a fuzzy hash subsignature
* - or else treated as a file content matching subsignature.
*
* @param root
* @param virname
* @param hexsig
* @param offset
* @param target
* @param lsigid An array of 2 uint32_t numbers: lsig_id and subsig_id. May be NULL for testing.
* @param options
* @param current_subsig_index
* @param num_subsigs
* @param tdb
* @return cl_error_t
*/
cl_error_t readdb_parse_ldb_subsignature(struct cli_matcher *root, const char *virname, char *hexsig,
const char *offset, uint8_t target, const uint32_t *lsigid, unsigned int options,
int current_subsig_index, int num_subsigs, struct cli_lsig_tdb *tdb);
cl_error_t cli_load(const char *filename, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio);
char *cli_dbgets(char *buff, unsigned int size, FILE *fs, struct cli_dbio *dbio);
cl_error_t cli_initroots(struct cl_engine *engine, unsigned int options);
#ifdef HAVE_YARA
cl_error_t cli_yara_init(struct cl_engine *engine);
void cli_yara_free(struct cl_engine *engine);
#endif
#endif