clamav/libclamav/regex_suffix.h

/*
 *  Parse a regular expression, and extract a static suffix.
 *
 *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
 *  Copyright (C) 2007-2013 Sourcefire, Inc.
 *
 *  Authors: Török Edvin
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 *  MA 02110-1301, USA.
 */
#ifndef REGEX_SUFFIX_H
#define REGEX_SUFFIX_H
#include "regex/regex.h"

struct regex_list {
    char *pattern;
    regex_t *preg;
    struct regex_list *nxt;
};

typedef cl_error_t (*suffix_callback)(void *cbdata, const char *suffix, size_t len, const struct regex_list *regex);

/**
 * @brief Build a suffix tree given a regex pattern
 *
 * @param pattern The regex pattern
 * @param pregs
 * @param cb
 * @param cbdata
 * @return cl_error_t May return a clam error type or may return a regex error integer.
 *         TODO: separate regex error code from clam error code into out param.
 */
cl_error_t cli_regex2suffix(const char *pattern, regex_t *preg, suffix_callback cb, void *cbdata);

#endif
split up regex_list. begin testing for regex_suffix git-svn: trunk@3985 17 years ago			`/*`
			`* Parse a regular expression, and extract a static suffix.`
			`*`
Bump copyright for 2022 Includes minor format corrections. 3 years ago			`* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.`
Updating and cleaning up copyright notices. 6 years ago			`* Copyright (C) 2007-2013 Sourcefire, Inc.`
split up regex_list. begin testing for regex_suffix git-svn: trunk@3985 17 years ago			`*`
			`* Authors: Török Edvin`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License version 2 as`
			`* published by the Free Software Foundation.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,`
			`* MA 02110-1301, USA.`
			`*/`
			`#ifndef REGEX_SUFFIX_H`
			`#define REGEX_SUFFIX_H`
			`#include "regex/regex.h"`

			`struct regex_list {`
clang-format'd using new .clang-format rules. 7 years ago			`char *pattern;`
			`regex_t *preg;`
			`struct regex_list *nxt;`
split up regex_list. begin testing for regex_suffix git-svn: trunk@3985 17 years ago			`};`
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety. 5 years ago
Return code checking corrections to regex suffix code. 6 years ago			`typedef cl_error_t (suffix_callback)(void cbdata, const char suffix, size_t len, const struct regex_list regex);`
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety. 5 years ago
			`/**`
			`* @brief Build a suffix tree given a regex pattern`
			`*`
			`* @param pattern The regex pattern`
			`* @param pregs`
			`* @param cb`
			`* @param cbdata`
			`* @return cl_error_t May return a clam error type or may return a regex error integer.`
			`* TODO: separate regex error code from clam error code into out param.`
			`*/`
Correcting types from int to cl_error_t where appropriate. Eliminating unused variables and referencing unused parameters to remove warnings. 6 years ago			`cl_error_t cli_regex2suffix(const char pattern, regex_t preg, suffix_callback cb, void *cbdata);`
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety. 5 years ago
split up regex_list. begin testing for regex_suffix git-svn: trunk@3985 17 years ago			`#endif`