Allmatch-mode overhaul, part 1: append_virus

Rework the append_virus mechanism to store evidence (strong indicators,
pua indicators, and eventually weak indicators) in vectors. When
appending a "virus", we will return CLEAN when in allmatch-mode, and
simply add the indicator to the appropriate vector.
Later we can check if there were any alerts to return a vector by
summing the lengths of the strong and pua indicator vectors.

This does away with storing the latest "virname" in the scan context.
Instead, we can query for the last indicator in the evidence, giving
priority to strong indicators.

When heuristic-precendence is enabled, add PUA as Strong instead of
as PotentiallyUnwanted. This way, they will be treated equally and
reported in order in allmatch mode.

Also document reason for disabling cache with metadata JSON enabled
pull/727/head
Micah Snyder 3 years ago committed by Micah Snyder
parent d09a7ed6c7
commit 621381e0cd
  1. 3
      clambc/bcrun.c
  2. 22
      libclamav/bytecode.c
  3. 158
      libclamav/cache.c
  4. 48
      libclamav/cache.h
  5. 2
      libclamav/fmap.c
  6. 16
      libclamav/fmap.h
  7. 22
      libclamav/gif.c
  8. 36
      libclamav/jpeg.c
  9. 7
      libclamav/matcher.c
  10. 7
      libclamav/mbox.c
  11. 135
      libclamav/others.c
  12. 16
      libclamav/others.h
  13. 2
      libclamav/pdf.c
  14. 15
      libclamav/pe.c
  15. 19
      libclamav/phishcheck.c
  16. 10
      libclamav/png.c
  17. 4
      libclamav/readdb.c
  18. 95
      libclamav/scanners.c
  19. 2
      libclamav/scanners.h
  20. 14
      libclamav/tiff.c
  21. 11
      libclamav_rust/cbindgen.toml
  22. 238
      libclamav_rust/src/evidence.rs
  23. 4
      libclamav_rust/src/fuzzy_hash.rs
  24. 1
      libclamav_rust/src/lib.rs
  25. 10
      libclamav_rust/src/sys.rs
  26. 22
      sigtool/sigtool.c
  27. 7
      sigtool/vba.c
  28. 5
      unit_tests/check_bytecode.c
  29. 57
      unit_tests/check_matchers.c
  30. 82
      unit_tests/check_regex.c

@ -34,6 +34,7 @@
#include "others.h"
#include "bytecode.h"
#include "bytecode_priv.h"
#include "clamav_rust.h"
// common
#include "optparser.h"
@ -395,6 +396,7 @@ int main(int argc, char *argv[])
}
ctx->ctx = &cctx;
cctx.engine = engine;
cctx.evidence = evidence_new();
cctx.recursion_stack_size = cctx.engine->max_recursion_level;
cctx.recursion_stack = cli_calloc(sizeof(recursion_level_t), cctx.recursion_stack_size);
@ -478,6 +480,7 @@ int main(int argc, char *argv[])
funmap(map);
cl_engine_free(engine);
free(cctx.recursion_stack);
evidence_free(cctx.evidence);
}
cli_bytecode_destroy(bc);
cli_bytecode_done(&bcs);

@ -2787,7 +2787,7 @@ int cli_bytecode_runlsig(cli_ctx *cctx, struct cli_target_info *tinfo,
struct cli_bc_ctx ctx;
const struct cli_bc *bc = &bcs->all_bcs[bc_idx - 1];
struct cli_pe_hook_data pehookdata;
const char* bc_name = NULL;
const char *bc_name = NULL;
if (bc_idx == 0)
return CL_ENULLARG;
@ -2835,19 +2835,17 @@ int cli_bytecode_runlsig(cli_ctx *cctx, struct cli_target_info *tinfo,
return CL_SUCCESS;
}
if (ctx.virname) {
if (cctx->num_viruses == 0) {
int rc;
cli_dbgmsg("Bytecode found virus: %s\n", ctx.virname);
if (!strncmp(ctx.virname, "BC.Heuristics", 13))
rc = cli_append_possibly_unwanted(cctx, ctx.virname);
else
rc = cli_append_virus(cctx, ctx.virname);
cli_bytecode_context_clear(&ctx);
return rc;
int rc;
cli_dbgmsg("Bytecode found virus: %s\n", ctx.virname);
if (!strncmp(ctx.virname, "BC.Heuristics", 13)) {
rc = cli_append_potentially_unwanted(cctx, ctx.virname);
} else {
cli_bytecode_context_clear(&ctx);
return CL_VIRUS;
rc = cli_append_virus(cctx, ctx.virname);
}
cli_bytecode_context_clear(&ctx);
return rc;
}
ret = cli_bytecode_context_getresult_int(&ctx);
cli_dbgmsg("Bytecode '%s' (id: %u) returned code: %u\n", bc_name, bc->id, ret);

@ -33,6 +33,8 @@
#include "cache.h"
#include "fmap.h"
#include "clamav_rust.h"
/* The number of root trees and the chooser function
Each tree is protected by a mutex against concurrent access */
/* #define TREES 1 */
@ -73,6 +75,13 @@ struct cache_set { /* a tree */
struct node *last;
};
struct CACHE {
struct cache_set cacheset;
#ifdef CL_THREAD_SAFE
pthread_mutex_t mutex;
#endif
};
/* Allocates all the nodes and sets up the replacement chain */
static int cacheset_init(struct cache_set *cs, mpool_t *mempool)
{
@ -519,40 +528,62 @@ static inline void cacheset_remove(struct cache_set *cs, unsigned char *md5, siz
printchain("remove (after)", cs);
}
/* COMMON STUFF --------------------------------------------------------------------- */
/* Looks up an hash in the proper tree */
static int cache_lookup_hash(unsigned char *md5, size_t len, struct CACHE *cache, uint32_t recursion_level)
{
unsigned int key = 0;
int ret = CL_VIRUS;
struct CACHE *c;
if (!md5) {
cli_dbgmsg("cache_lookup: No hash available. Nothing to look up.\n");
return ret;
}
key = getkey(md5);
c = &cache[key];
struct CACHE {
struct cache_set cacheset;
#ifdef CL_THREAD_SAFE
pthread_mutex_t mutex;
if (pthread_mutex_lock(&c->mutex)) {
cli_errmsg("cache_lookup_hash: cache_lookup_hash: mutex lock fail\n");
return ret;
}
#endif
};
/* Allocates the trees for the engine cache */
int cli_cache_init(struct cl_engine *engine)
ret = (cacheset_lookup(&c->cacheset, md5, len, recursion_level)) ? CL_CLEAN : CL_VIRUS;
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&c->mutex);
#endif
return ret;
}
int clean_cache_init(struct cl_engine *engine)
{
struct CACHE *cache;
unsigned int i, j;
if (!engine) {
cli_errmsg("cli_cache_init: mpool malloc fail\n");
cli_errmsg("clean_cache_init: mpool malloc fail\n");
return 1;
}
if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
cli_dbgmsg("cli_cache_init: Caching disabled.\n");
cli_dbgmsg("clean_cache_init: Caching disabled.\n");
return 0;
}
if (!(cache = MPOOL_MALLOC(engine->mempool, sizeof(struct CACHE) * TREES))) {
cli_errmsg("cli_cache_init: mpool malloc fail\n");
cli_errmsg("clean_cache_init: mpool malloc fail\n");
return 1;
}
for (i = 0; i < TREES; i++) {
#ifdef CL_THREAD_SAFE
if (pthread_mutex_init(&cache[i].mutex, NULL)) {
cli_errmsg("cli_cache_init: mutex init fail\n");
cli_errmsg("clean_cache_init: mutex init fail\n");
for (j = 0; j < i; j++) cacheset_destroy(&cache[j].cacheset, engine->mempool);
for (j = 0; j < i; j++) pthread_mutex_destroy(&cache[j].mutex);
MPOOL_FREE(engine->mempool, cache);
@ -572,8 +603,7 @@ int cli_cache_init(struct cl_engine *engine)
return 0;
}
/* Frees the engine cache */
void cli_cache_destroy(struct cl_engine *engine)
void clean_cache_destroy(struct cl_engine *engine)
{
struct CACHE *cache;
unsigned int i;
@ -594,40 +624,7 @@ void cli_cache_destroy(struct cl_engine *engine)
MPOOL_FREE(engine->mempool, cache);
}
/* Looks up an hash in the proper tree */
static int cache_lookup_hash(unsigned char *md5, size_t len, struct CACHE *cache, uint32_t recursion_level)
{
unsigned int key = 0;
int ret = CL_VIRUS;
struct CACHE *c;
if (!md5) {
cli_dbgmsg("cache_lookup: No hash available. Nothing to look up.\n");
return ret;
}
key = getkey(md5);
c = &cache[key];
#ifdef CL_THREAD_SAFE
if (pthread_mutex_lock(&c->mutex)) {
cli_errmsg("cache_lookup_hash: cache_lookup_hash: mutex lock fail\n");
return ret;
}
#endif
/* cli_warnmsg("cache_lookup_hash: key is %u\n", key); */
ret = (cacheset_lookup(&c->cacheset, md5, len, recursion_level)) ? CL_CLEAN : CL_VIRUS;
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&c->mutex);
// if(ret == CL_CLEAN) cli_warnmsg("cached\n");
#endif
return ret;
}
/* Adds an hash to the cache */
void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx)
void clean_cache_add(unsigned char *md5, size_t size, cli_ctx *ctx)
{
const char *errmsg = NULL;
@ -639,24 +636,40 @@ void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx)
return;
if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
cli_dbgmsg("cache_add: Caching disabled. Not adding sample to cache.\n");
cli_dbgmsg("clean_cache_add: Caching disabled. Not adding sample to cache.\n");
return;
}
if (!md5) {
cli_dbgmsg("cache_add: No hash available. Nothing to add to cache.\n");
cli_dbgmsg("clean_cache_add: No hash available. Nothing to add to cache.\n");
return;
}
key = getkey(md5);
level = (ctx->fmap && ctx->fmap->dont_cache_flag) ? ctx->recursion_level : 0;
if (ctx->found_possibly_unwanted && (level || 0 == ctx->recursion_level))
if (SCAN_COLLECT_METADATA) {
// Don't cache when using the "collect metadata" feature.
// We don't cache the JSON, so we can't reproduce it when the cache is positive.
cli_dbgmsg("clean_cache_add: collect metadata feature enabled, skipping cache\n");
return;
if (SCAN_ALLMATCHES && (ctx->num_viruses > 0)) {
cli_dbgmsg("cache_add: alert found within same topfile, skipping cache\n");
}
if (ctx->fmap && ctx->fmap->dont_cache_flag == true) {
cli_dbgmsg("clean_cache_add: caching disabled for this layer, skipping cache\n");
return;
}
c = &ctx->engine->cache[key];
if (0 < evidence_num_alerts(ctx->evidence)) {
// TODO: The dont cache flag should take care of preventing caching of files with embedded files that alert.
// Consider removing this check to allow caching of other actually clean files found within archives.
// It would be a (very) minor optimization.
cli_dbgmsg("clean_cache_add: alert found within same topfile, skipping cache\n");
return;
}
level = (ctx->fmap && ctx->fmap->dont_cache_flag) ? ctx->recursion_level : 0;
key = getkey(md5);
c = &ctx->engine->cache[key];
#ifdef CL_THREAD_SAFE
if (pthread_mutex_lock(&c->mutex)) {
cli_errmsg("cli_add: mutex lock fail\n");
@ -664,8 +677,6 @@ void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx)
}
#endif
/* cli_warnmsg("cache_add: key is %u\n", key); */
errmsg = cacheset_add(&c->cacheset, md5, size, level);
#ifdef CL_THREAD_SAFE
@ -674,12 +685,13 @@ void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx)
if (errmsg != NULL) {
cli_errmsg("%s\n", errmsg);
}
cli_dbgmsg("cache_add: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (level %u)\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15], level);
cli_dbgmsg("clean_cache_add: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (level %u)\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15], level);
return;
}
/* Removes a hash from the cache */
void cache_remove(unsigned char *md5, size_t size, const struct cl_engine *engine)
void clean_cache_remove(unsigned char *md5, size_t size, const struct cl_engine *engine)
{
unsigned int key = 0;
struct CACHE *c;
@ -688,12 +700,12 @@ void cache_remove(unsigned char *md5, size_t size, const struct cl_engine *engin
return;
if (engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
cli_dbgmsg("cache_remove: Caching disabled.\n");
cli_dbgmsg("clean_cache_remove: Caching disabled.\n");
return;
}
if (!md5) {
cli_dbgmsg("cache_remove: No hash available. Nothing to remove from cache.\n");
cli_dbgmsg("clean_cache_remove: No hash available. Nothing to remove from cache.\n");
return;
}
@ -712,28 +724,30 @@ void cache_remove(unsigned char *md5, size_t size, const struct cl_engine *engin
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&c->mutex);
#endif
cli_dbgmsg("cache_remove: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15]);
cli_dbgmsg("clean_cache_remove: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15]);
return;
}
/* Hashes a file onto the provided buffer and looks it up the cache.
Returns CL_VIRUS if found, CL_CLEAN if not FIXME or a recoverable error,
and returns CL_EREAD if unrecoverable */
cl_error_t cache_check(unsigned char *hash, cli_ctx *ctx)
cl_error_t clean_cache_check(unsigned char *md5, size_t size, cli_ctx *ctx)
{
fmap_t *map;
int ret;
if (!ctx || !ctx->engine || !ctx->engine->cache)
return CL_VIRUS;
if (SCAN_COLLECT_METADATA) {
// Don't cache when using the "collect metadata" feature.
// We don't cache the JSON, so we can't reproduce it when the cache is positive.
cli_dbgmsg("clean_cache_check: collect metadata feature enabled, skipping cache\n");
return CL_VIRUS;
}
if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) {
cli_dbgmsg("cache_check: Caching disabled. Returning CL_VIRUS.\n");
cli_dbgmsg("clean_cache_check: Caching disabled. Returning CL_VIRUS.\n");
return CL_VIRUS;
}
map = ctx->fmap;
ret = cache_lookup_hash(hash, map->len, ctx->engine->cache, ctx->recursion_level);
cli_dbgmsg("cache_check: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x is %s\n", hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7], hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15], (ret == CL_VIRUS) ? "negative" : "positive");
ret = cache_lookup_hash(md5, size, ctx->engine->cache, ctx->recursion_level);
cli_dbgmsg("clean_cache_check: %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x is %s\n", md5[0], md5[1], md5[2], md5[3], md5[4], md5[5], md5[6], md5[7], md5[8], md5[9], md5[10], md5[11], md5[12], md5[13], md5[14], md5[15], (ret == CL_VIRUS) ? "negative" : "positive");
return ret;
}

@ -25,11 +25,47 @@
#include "clamav.h"
#include "others.h"
void cache_add(unsigned char *md5, size_t size, cli_ctx *ctx);
/* Removes a hash from the cache */
void cache_remove(unsigned char *md5, size_t size, const struct cl_engine *engine);
cl_error_t cache_check(unsigned char *hash, cli_ctx *ctx);
int cli_cache_init(struct cl_engine *engine);
void cli_cache_destroy(struct cl_engine *engine);
/**
* @brief Add a hash to the cache of clean files.
*
* @param md5 The file to add.
* @param size The size of the file.
* @param ctx The scanning context.
*/
void clean_cache_add(unsigned char *md5, size_t size, cli_ctx *ctx);
/**
* @brief Removes a hash from the clean cache
*
* @param md5 The file to remove.
* @param size The size of the file.
* @param ctx The scanning context.
*/
void clean_cache_remove(unsigned char *md5, size_t size, const struct cl_engine *engine);
/**
* @brief Hashes a file onto the provided buffer and looks it up the clean cache.
*
* @param hash Hash to check
* @param ctx
* @return CL_VIRUS if found, CL_CLEAN if not FIXME or a recoverable error.
@return CL_EREAD if unrecoverable.
*/
cl_error_t clean_cache_check(unsigned char *md5, size_t size, cli_ctx *ctx);
/**
* @brief Allocates the trees for the clean cache.
*
* @param engine
* @return int
*/
int clean_cache_init(struct cl_engine *engine);
/**
* @brief Frees the clean cache
*
* @param engine
*/
void clean_cache_destroy(struct cl_engine *engine);
#endif

@ -422,7 +422,7 @@ extern cl_fmap_t *cl_fmap_open_handle(void *handle, size_t offset, size_t len,
m->pages = pages;
m->pgsz = pgsz;
m->paged = 0;
m->dont_cache_flag = 0;
m->dont_cache_flag = false;
m->unmap = unmap_handle;
m->need = handle_need;
m->need_offstr = handle_need_offstr;

@ -54,14 +54,14 @@ struct cl_fmap {
uint64_t pgsz;
uint64_t paged;
uint16_t aging;
uint16_t dont_cache_flag; /** indicates if we should not cache scan results for this fmap. Used if limits exceeded */
uint16_t handle_is_fd; /** non-zero if map->handle is an fd. */
size_t offset; /** file offset representing start of original fmap, if the fmap created reading from a file starting at offset other than 0 */
size_t nested_offset; /** offset from start of original fmap (data) for nested scan. 0 for orig fmap. */
size_t real_len; /** len from start of original fmap (data) to end of current (possibly nested) map. */
/* real_len == nested_offset + len.
real_len is needed for nested maps because we only reference the original mapping data.
We convert caller's fmap offsets & lengths to real data offsets using nested_offset & real_len. */
bool dont_cache_flag; /** indicates if we should not cache scan results for this fmap. Used if limits exceeded */
uint16_t handle_is_fd; /** non-zero if map->handle is an fd. */
size_t offset; /** file offset representing start of original fmap, if the fmap created reading from a file starting at offset other than 0 */
size_t nested_offset; /** offset from start of original fmap (data) for nested scan. 0 for orig fmap. */
size_t real_len; /** len from start of original fmap (data) to end of current (possibly nested) map. */
/* real_len == nested_offset + len.
real_len is needed for nested maps because we only reference the original mapping data.
We convert caller's fmap offsets & lengths to real data offsets using nested_offset & real_len. */
/* external */
size_t len; /** length of data from nested_offset, accessible via current fmap */

@ -210,7 +210,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
*/
if (fmap_readn(map, &screen_desc, offset, sizeof(screen_desc)) != sizeof(screen_desc)) {
cli_errmsg("GIF: Can't read logical screen description, file truncated?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedScreenDescriptor");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedScreenDescriptor");
status = CL_EPARSE;
goto scan_overlay;
}
@ -226,7 +226,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
if (offset + (size_t)global_color_table_size > map->len) {
cli_errmsg("GIF: EOF in the middle of the global color table, file truncated?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedGlobalColorTable");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedGlobalColorTable");
status = CL_EPARSE;
goto scan_overlay;
}
@ -248,7 +248,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
cli_dbgmsg("GIF: Missing GIF trailer, slightly (but acceptably) malformed.\n");
} else {
cli_errmsg("GIF: Can't read block label, EOF before image data. File truncated?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.GIF.MissingImageData");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.GIF.MissingImageData");
}
status = CL_EPARSE;
goto scan_overlay;
@ -270,7 +270,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
if (fmap_readn(map, &extension_label, offset, sizeof(extension_label)) != sizeof(extension_label)) {
cli_errmsg("GIF: Failed to read the extension block label, file truncated?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedExtension");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedExtension");
status = CL_EPARSE;
goto scan_overlay;
}
@ -304,7 +304,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
uint8_t extension_block_size = 0;
if (fmap_readn(map, &extension_block_size, offset, sizeof(extension_block_size)) != sizeof(extension_block_size)) {
cli_errmsg("GIF: EOF while attempting to read the block size for an extension, file truncated?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedExtension");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedExtension");
status = CL_EPARSE;
goto scan_overlay;
} else {
@ -319,7 +319,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
if (offset + (size_t)extension_block_size > map->len) {
cli_errmsg("GIF: EOF in the middle of a graphic control extension sub-block, file truncated?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedExtensionSubBlock");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedExtensionSubBlock");
status = CL_EPARSE;
goto scan_overlay;
}
@ -335,7 +335,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
cli_dbgmsg("GIF: Found an image descriptor.\n");
if (fmap_readn(map, &image_desc, offset, sizeof(image_desc)) != sizeof(image_desc)) {
cli_errmsg("GIF: Can't read image descriptor, file truncated?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedImageDescriptor");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedImageDescriptor");
status = CL_EPARSE;
goto scan_overlay;
} else {
@ -368,7 +368,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
uint8_t image_data_block_size = 0;
if (fmap_readn(map, &image_data_block_size, offset, sizeof(image_data_block_size)) != sizeof(image_data_block_size)) {
cli_errmsg("GIF: EOF while attempting to read the block size for an image data block, file truncated?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedImageDataBlock");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedImageDataBlock");
status = CL_EPARSE;
goto scan_overlay;
} else {
@ -383,7 +383,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
if (offset + (size_t)image_data_block_size > map->len) {
cli_errmsg("GIF: EOF in the middle of an image data sub-block, file truncated?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedImageDataBlock");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.GIF.TruncatedImageDataBlock");
status = CL_EPARSE;
goto scan_overlay;
}
@ -395,7 +395,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
default: {
// An unknown code: break.
cli_errmsg("GIF: Found an unfamiliar block label: 0x%x\n", block_label);
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.GIF.UnknownBlockLabel");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.GIF.UnknownBlockLabel");
status = CL_EPARSE;
goto scan_overlay;
}
@ -404,7 +404,7 @@ cl_error_t cli_parsegif(cli_ctx *ctx)
scan_overlay:
if (status == CL_EPARSE) {
/* We added with cli_append_possibly_unwanted so it will alert at the end if nothing else matches. */
/* We added with cli_append_potentially_unwanted so it will alert at the end if nothing else matches. */
status = CL_CLEAN;
// Some recovery (I saw some "GIF89a;" or things like this)

@ -354,7 +354,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
} else {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_errmsg("JPEG: Failed to read marker, file corrupted?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.CantReadMarker");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.CantReadMarker");
status = CL_EPARSE;
} else {
cli_dbgmsg("Failed to read marker, file corrupted?\n");
@ -370,7 +370,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
if (i == 16) {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_warnmsg("JPEG: Spurious bytes before segment %u\n", segment);
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SpuriousBytesBeforeSegment");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SpuriousBytesBeforeSegment");
status = CL_EPARSE;
} else {
cli_dbgmsg("Spurious bytes before segment %u\n", segment);
@ -398,7 +398,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
if (fmap_readn(map, &len_u16, offset, sizeof(len_u16)) != sizeof(len_u16)) {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_errmsg("JPEG: Failed to read the segment size, file corrupted?\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.CantReadSegmentSize");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.CantReadSegmentSize");
status = CL_EPARSE;
} else {
cli_dbgmsg("Failed to read the segment size, file corrupted?\n");
@ -411,7 +411,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
if (len < 2) {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_warnmsg("JPEG: Invalid segment size\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.InvalidSegmentSize");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.InvalidSegmentSize");
status = CL_EPARSE;
} else {
cli_dbgmsg("Invalid segment size\n");
@ -421,7 +421,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
if (len >= map->len - offset + sizeof(len_u16)) {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_warnmsg("JPEG: Segment data out of file\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SegmentDataOutOfFile");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SegmentDataOutOfFile");
status = CL_EPARSE;
} else {
cli_dbgmsg("Segment data out of file\n");
@ -444,7 +444,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
if (found_app && num_JFIF > 0) {
cli_warnmsg("JPEG: Duplicate Application Marker found (JFIF)\n");
cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFdupAppMarker");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFdupAppMarker");
status = CL_EPARSE;
goto done;
}
@ -457,13 +457,13 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
* If segment 1 wasn't a comment or Exif, then the file structure is unusual. */
cli_warnmsg("JPEG: JFIF marker at wrong position, found in segment # %d\n", segment);
cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFmarkerBadPosition");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFmarkerBadPosition");
status = CL_EPARSE;
goto done;
}
if (len < 16) {
cli_warnmsg("JPEG: JFIF header too short\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFheaderTooShort");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFheaderTooShort");
status = CL_EPARSE;
goto done;
}
@ -490,20 +490,20 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
if (found_app && (num_Exif > 0 || num_SPIFF > 0)) {
cli_warnmsg("JPEG: Duplicate Application Marker found (Exif)\n");
cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifDupAppMarker");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifDupAppMarker");
status = CL_EPARSE;
goto done;
}
if (segment > 3 && !found_comment && num_JFIF > 0) {
/* If Exif was found after segment 3 and previous segments weren't a comment or JFIF, something is unusual. */
cli_warnmsg("JPEG: Exif marker at wrong position\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderBadPosition");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderBadPosition");
status = CL_EPARSE;
goto done;
}
if (len < 16) {
cli_warnmsg("JPEG: Exif header too short\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderTooShort");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderTooShort");
status = CL_EPARSE;
goto done;
}
@ -546,19 +546,19 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
if (found_app) {
cli_warnmsg("JPEG: Duplicate Application Marker found (SPIFF)\n");
cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFdupAppMarker");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFdupAppMarker");
status = CL_EPARSE;
goto done;
}
if (segment != 1 && (segment != 2 || !found_comment)) {
cli_warnmsg("JPEG: SPIFF marker at wrong position\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFmarkerBadPosition");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFmarkerBadPosition");
status = CL_EPARSE;
goto done;
}
if (len < 16) {
cli_warnmsg("JPEG: SPIFF header too short\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFheaderTooShort");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFheaderTooShort");
status = CL_EPARSE;
goto done;
}
@ -657,7 +657,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
if (found_app) {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_warnmsg("JPEG: Application Marker before JPG7\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.AppMarkerBeforeJPG7");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.AppMarkerBeforeJPG7");
status = CL_EPARSE;
goto done;
}
@ -681,7 +681,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
*/
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_warnmsg("JPEG: No image in jpeg\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.NoImages");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.NoImages");
status = CL_EPARSE;
}
goto done;
@ -700,7 +700,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
if (prev_segment != JPEG_MARKER_SEGMENT_DTI) {
cli_warnmsg("JPEG: No DTI segment before DTT\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.DTTMissingDTISegment");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.DTTMissingDTISegment");
status = CL_EPARSE;
goto done;
}
@ -717,7 +717,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
done:
if (status == CL_EPARSE) {
/* We added with cli_append_possibly_unwanted so it will alert at the end if nothing else matches. */
/* We added with cli_append_potentially_unwanted so it will alert at the end if nothing else matches. */
status = CL_CLEAN;
}

@ -744,8 +744,7 @@ int32_t cli_bcapi_matchicon(struct cli_bc_ctx *ctx, const uint8_t *grp1, int32_t
if ((size_t)grp1len > sizeof(group1) - 1 ||
(size_t)grp2len > sizeof(group2) - 1)
return -1;
oldvirname = ((cli_ctx *)ctx->ctx)->virname;
((cli_ctx *)ctx->ctx)->virname = NULL;
memcpy(group1, grp1, grp1len);
memcpy(group2, grp2, grp2len);
group1[grp1len] = 0;
@ -763,9 +762,9 @@ int32_t cli_bcapi_matchicon(struct cli_bc_ctx *ctx, const uint8_t *grp1, int32_t
info.nsections = ctx->hooks.pedata->nsections;
info.hdr_size = ctx->hooks.pedata->hdr_size;
cli_dbgmsg("bytecode matchicon %s %s\n", group1, group2);
ret = matchicon(ctx->ctx, &info, group1[0] ? group1 : NULL,
ret = matchicon(ctx->ctx, &info, group1[0] ? group1 : NULL,
group2[0] ? group2 : NULL);
((cli_ctx *)ctx->ctx)->virname = oldvirname;
return (int32_t)ret;
}

@ -593,13 +593,6 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
messageDestroy(body);
}
if ((retcode == CL_CLEAN) && ctx->found_possibly_unwanted &&
(*ctx->virname == NULL || SCAN_ALLMATCHES)) {
retcode = cli_append_virus(ctx, "Heuristics.Phishing.Email");
ctx->found_possibly_unwanted = 0;
}
cli_dbgmsg("cli_mbox returning %d\n", retcode);
return retcode;

@ -727,7 +727,7 @@ cl_error_t cl_engine_set_num(struct cl_engine *engine, enum cl_engine_field fiel
} else {
engine->engine_options &= ~(ENGINE_OPTIONS_DISABLE_CACHE);
if (!(engine->cache))
cli_cache_init(engine);
clean_cache_init(engine);
}
break;
case CL_ENGINE_DISABLE_PE_STATS:
@ -1096,7 +1096,7 @@ void cli_append_virus_if_heur_exceedsmax(cli_ctx *ctx, char *vname)
// TODO: consider changing this from a bool to a threshold so we could at least see more than 1 limits exceeded
if (SCAN_HEURISTIC_EXCEEDS_MAX) {
cli_append_possibly_unwanted(ctx, vname);
cli_append_potentially_unwanted(ctx, vname);
cli_dbgmsg("%s: scanning may be incomplete and additional analysis needed for this file.\n", vname);
}
@ -1304,45 +1304,63 @@ int cli_unlink(const char *pathname)
return 0;
}
void cli_virus_found_cb(cli_ctx *ctx)
void cli_virus_found_cb(cli_ctx *ctx, const char *virname)
{
if (ctx->engine->cb_virus_found)
ctx->engine->cb_virus_found(fmap_fd(ctx->fmap), (const char *)*ctx->virname, ctx->cb_ctx);
if (ctx->engine->cb_virus_found) {
ctx->engine->cb_virus_found(
fmap_fd(ctx->fmap),
virname,
ctx->cb_ctx);
}
}
cl_error_t cli_append_possibly_unwanted(cli_ctx *ctx, const char *virname)
/**
* @brief Add an indicator to the scan evidence.
*
* @param ctx
* @param virname Name of the indicator
* @param type Type of the indicator
* @return Returns CL_SUCCESS if added and IS in ALLMATCH mode, or if was PUA and not in HEURISTIC-PRECEDENCE-mode.
* @return Returns CL_VIRUS if added and NOT in ALLMATCH mode, or if was PUA and not in ALLMATCH but IS in HEURISTIC-PRECEDENCE-mode.
* @return Returns some other error code like CL_ERROR or CL_EMEM if something went wrong.
*/
static cl_error_t append_virus(cli_ctx *ctx, const char *virname, IndicatorType type)
{
if (SCAN_ALLMATCHES) {
return cli_append_virus(ctx, virname);
} else if (SCAN_HEURISTIC_PRECEDENCE) {
return cli_append_virus(ctx, virname);
} else if (ctx->num_viruses == 0 && ctx->virname != NULL && *ctx->virname == NULL) {
ctx->found_possibly_unwanted = 1;
ctx->num_viruses++;
*ctx->virname = virname;
}
return CL_CLEAN;
}
cl_error_t status = CL_ERROR;
FFIError *add_indicator_error = NULL;
bool add_successful;
cl_error_t cli_append_virus(cli_ctx *ctx, const char *virname)
{
if (ctx->virname == NULL) {
return CL_CLEAN;
char *location = NULL;
if (ctx->evidence == NULL) {
// evidence storage not initialized, cannot continue.
status = CL_SUCCESS;
goto done;
}
if ((ctx->fmap != NULL) &&
(ctx->recursion_stack != NULL) &&
(CL_VIRUS != cli_check_fp(ctx, virname))) {
return CL_CLEAN;
// FP signature found for one of the layers. Ignore indicator.
status = CL_SUCCESS;
goto done;
}
if (!SCAN_ALLMATCHES && ctx->num_viruses != 0) {
if (SCAN_HEURISTIC_PRECEDENCE) {
return CL_CLEAN;
}
add_successful = evidence_add_indicator(
ctx->evidence,
virname,
type,
&add_indicator_error);
if (!add_successful) {
cli_errmsg("Failed to add indicator to scan evidence: %s\n", ffierror_fmt(add_indicator_error));
status = CL_ERROR;
goto done;
}
ctx->num_viruses++;
*ctx->virname = virname;
cli_virus_found_cb(ctx);
if (type == IndicatorType_Strong) {
// Run that virus callback which in clamscan says "<signature name> FOUND"
cli_virus_found_cb(ctx, virname);
}
#if HAVE_JSON
if (SCAN_COLLECT_METADATA && ctx->wrkproperty) {
@ -1351,33 +1369,80 @@ cl_error_t cli_append_virus(cli_ctx *ctx, const char *virname)
arrobj = json_object_new_array();
if (NULL == arrobj) {
cli_errmsg("cli_append_virus: no memory for json virus array\n");
return CL_EMEM;
status = CL_EMEM;
goto done;
}
json_object_object_add(ctx->wrkproperty, "Viruses", arrobj);
}
virobj = json_object_new_string(virname);
if (NULL == virobj) {
cli_errmsg("cli_append_virus: no memory for json virus name object\n");
return CL_EMEM;
status = CL_EMEM;
goto done;
}
json_object_array_add(arrobj, virobj);
}
#endif
return CL_VIRUS;
if (SCAN_ALLMATCHES) {
// Never break.
status = CL_SUCCESS;
} else {
// Usually break.
switch (type) {
case IndicatorType_Strong: {
status = CL_VIRUS;
break;
}
case IndicatorType_PotentiallyUnwanted: {
status = CL_SUCCESS;
break;
}
default: {
status = CL_SUCCESS;
}
}
}
done:
if (NULL != location) {
free(location);
}
return status;
}
cl_error_t cli_append_potentially_unwanted(cli_ctx *ctx, const char *virname)
{
if (SCAN_HEURISTIC_PRECEDENCE) {
return append_virus(ctx, virname, IndicatorType_Strong);
} else {
return append_virus(ctx, virname, IndicatorType_PotentiallyUnwanted);
}
}
cl_error_t cli_append_virus(cli_ctx *ctx, const char *virname)
{
return append_virus(ctx, virname, IndicatorType_Strong);
}
const char *cli_get_last_virus(const cli_ctx *ctx)
{
if (!ctx || !ctx->virname || !(*ctx->virname))
if (!ctx || !ctx->evidence) {
return NULL;
return *ctx->virname;
}
return evidence_get_last_alert(ctx->evidence);
}
const char *cli_get_last_virus_str(const cli_ctx *ctx)
{
const char *ret;
if ((ret = cli_get_last_virus(ctx)))
if (NULL != (ret = cli_get_last_virus(ctx))) {
return ret;
}
return "";
}

@ -190,20 +190,20 @@ typedef struct recursion_level_tag {
bool calculated_image_fuzzy_hash; /* Used for image/graphics files to store a fuzzy hash. */
} recursion_level_t;
typedef void *evidence_t;
/* internal clamav context */
typedef struct cli_ctx_tag {
char *target_filepath; /**< (optional) The filepath of the original scan target. */
const char *sub_filepath; /**< (optional) The filepath of the current file being parsed. May be a temp file. */
char *sub_tmpdir; /**< The directory to store tmp files at this recursion depth. */
const char **virname;
unsigned int num_viruses;
char *target_filepath; /* (optional) The filepath of the original scan target. */
const char *sub_filepath; /* (optional) The filepath of the current file being parsed. May be a temp file. */
char *sub_tmpdir; /* The directory to store tmp files at this recursion depth. */
evidence_t evidence; /* Stores the evidence for this scan to alert (alerting indicators). */
unsigned long int *scanned;
const struct cli_matcher *root;
const struct cl_engine *engine;
uint64_t scansize;
struct cl_scan_options *options;
unsigned int scannedfiles;
unsigned int found_possibly_unwanted;
unsigned int corrupted_input;
recursion_level_t *recursion_stack; /* Array of recursion levels used as a stack. */
uint32_t recursion_stack_size; /* stack size must == engine->max_recursion_level */
@ -730,11 +730,11 @@ cl_error_t cli_append_virus(cli_ctx *ctx, const char *virname);
* @param virname The alert name.
* @return cl_error_t CL_VIRUS if scan should be halted due to an alert, CL_CLEAN if scan should continue.
*/
cl_error_t cli_append_possibly_unwanted(cli_ctx *ctx, const char *virname);
cl_error_t cli_append_potentially_unwanted(cli_ctx *ctx, const char *virname);
const char *cli_get_last_virus(const cli_ctx *ctx);
const char *cli_get_last_virus_str(const cli_ctx *ctx);
void cli_virus_found_cb(cli_ctx *ctx);
void cli_virus_found_cb(cli_ctx *ctx, const char *virname);
/**
* @brief Push a new fmap onto our scan recursion stack.

@ -3695,7 +3695,7 @@ cl_error_t cli_pdf(const char *dir, cli_ctx *ctx, off_t offset)
if (!rc && SCAN_HEURISTICS && (ctx->dconf->other & OTHER_CONF_PDFNAMEOBJ)) {
if (pdf.flags & (1 << ESCAPED_COMMON_PDFNAME)) {
/* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */
cli_append_possibly_unwanted(ctx, "Heuristics.PDF.ObfuscatedNameObject");
cli_append_potentially_unwanted(ctx, "Heuristics.PDF.ObfuscatedNameObject");
}
}
#if 0

@ -88,6 +88,8 @@
#include "json_api.h"
#include "clamav_rust.h"
#define DCONF ctx->dconf->pe
#define PE_IMAGE_DOS_SIGNATURE 0x5a4d /* MZ */
@ -4206,20 +4208,15 @@ int cli_scanpe(cli_ctx *ctx)
#endif
do {
unsigned int yc_unp_num_viruses = ctx->num_viruses;
const char *yc_unp_virname = NULL;
if (ctx->virname)
yc_unp_virname = ctx->virname[0];
size_t num_alerts = evidence_num_alerts(ctx->evidence);
cli_dbgmsg("%d,%d,%d,%d\n", peinfo->nsections - 1, peinfo->e_lfanew, ecx, offset);
CLI_UNPTEMP("cli_scanpe: yC", (spinned, 0));
CLI_UNPRESULTS("cli_scanpe: yC", (yc_decrypt(ctx, spinned, fsize, peinfo->sections, peinfo->nsections - 1, peinfo->e_lfanew, ndesc, ecx, offset)), 0, (spinned, 0));
if (SCAN_ALLMATCHES && yc_unp_num_viruses != ctx->num_viruses) {
cli_exe_info_destroy(peinfo);
return CL_VIRUS;
} else if (ctx->virname && yc_unp_virname != ctx->virname[0]) {
if (SCAN_ALLMATCHES && num_alerts != evidence_num_alerts(ctx->evidence)) {
// In ALLMATCH-mode, CLI_UNPRESULTS() will not return CL_VIRUS when something is found.
// We apparently want to return CL_VIRUS here if CLI_UNPRESULTS() found something (preserving previous behavior).
cli_exe_info_destroy(peinfo);
return CL_VIRUS;
}

@ -732,9 +732,6 @@ cl_error_t phishingScan(cli_ctx* ctx, tag_arguments_t* hrefs)
goto done;
}
if (!ctx->found_possibly_unwanted && !SCAN_ALLMATCHES)
*ctx->virname = NULL;
for (i = 0; i < hrefs->count; i++) {
struct url_check urls;
enum phish_status phishing_verdict;
@ -776,29 +773,29 @@ cl_error_t phishingScan(cli_ctx* ctx, tag_arguments_t* hrefs)
case CL_PHISH_CLEAN:
continue;
case CL_PHISH_NUMERIC_IP:
status = cli_append_possibly_unwanted(ctx, "Heuristics.Phishing.Email.Cloaked.NumericIP");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Phishing.Email.Cloaked.NumericIP");
break;
case CL_PHISH_CLOAKED_NULL:
status = cli_append_possibly_unwanted(ctx, "Heuristics.Phishing.Email.Cloaked.Null"); /*fakesite%01%00@fake.example.com*/
status = cli_append_potentially_unwanted(ctx, "Heuristics.Phishing.Email.Cloaked.Null"); /*fakesite%01%00@fake.example.com*/
break;
case CL_PHISH_SSL_SPOOF:
status = cli_append_possibly_unwanted(ctx, "Heuristics.Phishing.Email.SSL-Spoof");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Phishing.Email.SSL-Spoof");
break;
case CL_PHISH_CLOAKED_UIU:
status = cli_append_possibly_unwanted(ctx, "Heuristics.Phishing.Email.Cloaked.Username"); /*http://banksite@fake.example.com*/
status = cli_append_potentially_unwanted(ctx, "Heuristics.Phishing.Email.Cloaked.Username"); /*http://banksite@fake.example.com*/
break;
case CL_PHISH_HASH0:
status = cli_append_possibly_unwanted(ctx, "Heuristics.Safebrowsing.Suspected-malware_safebrowsing.clamav.net");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Safebrowsing.Suspected-malware_safebrowsing.clamav.net");
break;
case CL_PHISH_HASH1:
status = cli_append_possibly_unwanted(ctx, "Heuristics.Phishing.URL.Blocked");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Phishing.URL.Blocked");
break;
case CL_PHISH_HASH2:
status = cli_append_possibly_unwanted(ctx, "Heuristics.Safebrowsing.Suspected-phishing_safebrowsing.clamav.net");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Safebrowsing.Suspected-phishing_safebrowsing.clamav.net");
break;
case CL_PHISH_NOMATCH:
default:
status = cli_append_possibly_unwanted(ctx, "Heuristics.Phishing.Email.SpoofedDomain");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Phishing.Email.SpoofedDomain");
break;
}
if (CL_CLEAN != status && !SCAN_ALLMATCHES) {

@ -114,7 +114,7 @@ cl_error_t cli_parsepng(cli_ctx *ctx)
if (chunk_data_length > (uint64_t)0x7fffffff) {
cli_dbgmsg("PNG: invalid chunk length (too large): 0x" STDx64 "\n", chunk_data_length);
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.PNG.InvalidChunkLength");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.PNG.InvalidChunkLength");
status = CL_EPARSE;
}
goto scan_overlay;
@ -123,7 +123,7 @@ cl_error_t cli_parsepng(cli_ctx *ctx)
if (fmap_readn(map, chunk_type, offset, PNG_CHUNK_TYPE_SIZE) != PNG_CHUNK_TYPE_SIZE) {
cli_dbgmsg("PNG: EOF while reading chunk type\n");
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.PNG.EOFReadingChunkType");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.PNG.EOFReadingChunkType");
status = CL_EPARSE;
}
goto scan_overlay;
@ -141,7 +141,7 @@ cl_error_t cli_parsepng(cli_ctx *ctx)
if (NULL == ptr) {
cli_warnmsg("PNG: Unexpected early end-of-file.\n");
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.PNG.EOFReadingChunk");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.PNG.EOFReadingChunk");
status = CL_EPARSE;
}
goto scan_overlay;
@ -272,7 +272,7 @@ cl_error_t cli_parsepng(cli_ctx *ctx)
if (fmap_readn(map, &chunk_crc, offset, PNG_CHUNK_CRC_SIZE) != PNG_CHUNK_CRC_SIZE) {
cli_dbgmsg("PNG: EOF while reading chunk crc\n");
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.PNG.EOFReadingChunkCRC");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.PNG.EOFReadingChunkCRC");
status = CL_EPARSE;
}
goto scan_overlay;
@ -295,7 +295,7 @@ cl_error_t cli_parsepng(cli_ctx *ctx)
scan_overlay:
if (status == CL_EPARSE) {
/* We added with cli_append_possibly_unwanted so it will alert at the end if nothing else matches. */
/* We added with cli_append_potentially_unwanted so it will alert at the end if nothing else matches. */
status = CL_CLEAN;
}

@ -5304,7 +5304,7 @@ cl_error_t cl_load(const char *path, struct cl_engine *engine, unsigned int *sig
cli_dbgmsg("Bytecode engine disabled\n");
}
if (!engine->cache && cli_cache_init(engine))
if (!engine->cache && clean_cache_init(engine))
return CL_EMEM;
engine->dboptions |= dboptions;
@ -5842,7 +5842,7 @@ cl_error_t cl_engine_free(struct cl_engine *engine)
TASK_COMPLETE();
if (engine->cache) {
cli_cache_destroy(engine);
clean_cache_destroy(engine);
}
TASK_COMPLETE();

@ -54,7 +54,6 @@
#include <zlib.h>
#include "clamav_rust.h"
#include "clamav.h"
#include "others.h"
#include "dconf.h"
@ -4030,7 +4029,7 @@ void emax_reached(cli_ctx *ctx)
fmap_t *map = ctx->recursion_stack[stack_index].fmap;
if (NULL != map) {
map->dont_cache_flag = 1;
map->dont_cache_flag = true;
}
stack_index -= 1;
@ -4428,10 +4427,7 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
*/
perf_start(ctx, PERFT_CACHE);
if (!(SCAN_COLLECT_METADATA))
res = cache_check(hash, ctx);
else
res = CL_VIRUS;
res = clean_cache_check(hash, hashed_size, ctx);
#if HAVE_JSON
if (SCAN_COLLECT_METADATA /* ctx.options->general & CL_SCAN_GENERAL_COLLECT_METADATA && ctx->wrkproperty != NULL */) {
@ -5055,31 +5051,33 @@ done:
ctx->wrkproperty = (struct json_object *)(parent_property);
#endif
if (ret == CL_CLEAN && ctx->found_possibly_unwanted) {
if ((ret == CL_SUCCESS) &&
(evidence_num_alerts(ctx->evidence) > 0)) {
cb_retcode = CL_VIRUS;
} else {
if (ret == CL_CLEAN && ctx->num_viruses != 0)
cb_retcode = CL_VIRUS;
else
cb_retcode = ret;
cb_retcode = ret;
}
cli_dbgmsg("cli_magic_scan_desc: returning %d %s\n", ret, __AT__);
if (ctx->engine->cb_post_scan) {
cl_error_t callbacK_ret;
const char *virusname = NULL;
perf_start(ctx, PERFT_POSTCB);
if (cb_retcode == CL_VIRUS)
virusname = cli_get_last_virus(ctx);
switch (ctx->engine->cb_post_scan(fmap_fd(ctx->fmap), cb_retcode, virusname, ctx->cb_ctx)) {
perf_start(ctx, PERFT_POSTCB);
callbacK_ret = ctx->engine->cb_post_scan(fmap_fd(ctx->fmap), cb_retcode, virusname, ctx->cb_ctx);
perf_stop(ctx, PERFT_POSTCB);
switch (callbacK_ret) {
case CL_BREAK:
cli_dbgmsg("cli_magic_scan_desc: file allowed by post_scan callback\n");
perf_stop(ctx, PERFT_POSTCB);
ret = CL_CLEAN;
break;
case CL_VIRUS:
cli_dbgmsg("cli_magic_scan_desc: file blocked by post_scan callback\n");
cli_append_virus(ctx, "Detected.By.Callback");
perf_stop(ctx, PERFT_POSTCB);
if (ret != CL_VIRUS) {
ret = cli_check_fp(ctx, NULL);
}
@ -5089,12 +5087,11 @@ done:
default:
cli_warnmsg("cli_magic_scan_desc: ignoring bad return code from post_scan callback\n");
}
perf_stop(ctx, PERFT_POSTCB);
}
if (cb_retcode == CL_CLEAN && cache_clean && !ctx->fmap->dont_cache_flag && !SCAN_COLLECT_METADATA) {
if (cb_retcode == CL_CLEAN && cache_clean) {
perf_start(ctx, PERFT_CACHE);
cache_add(hash, hashed_size, ctx);
clean_cache_add(hash, hashed_size, ctx);
perf_stop(ctx, PERFT_CACHE);
}
@ -5378,6 +5375,8 @@ cl_error_t cli_magic_scan_buff(const void *buffer, size_t length, cli_ctx *ctx,
static cl_error_t scan_common(cl_fmap_t *map, const char *filepath, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
{
cl_error_t status;
cl_error_t verdict = CL_CLEAN;
cli_ctx ctx = {0};
char *target_basename = NULL;
@ -5392,12 +5391,14 @@ static cl_error_t scan_common(cl_fmap_t *map, const char *filepath, const char *
return CL_ENULLARG;
}
*virname = NULL;
ctx.engine = engine;
ctx.virname = virname;
ctx.scanned = scanned;
ctx.options = malloc(sizeof(struct cl_scan_options));
memcpy(ctx.options, scanoptions, sizeof(struct cl_scan_options));
ctx.found_possibly_unwanted = 0;
ctx.evidence = evidence_new();
ctx.dconf = (struct cli_dconf *)engine->dconf;
ctx.cb_ctx = context;
@ -5523,8 +5524,21 @@ static cl_error_t scan_common(cl_fmap_t *map, const char *filepath, const char *
status = cli_magic_scan(&ctx, CL_TYPE_ANY);
if (status == CL_CLEAN && ctx.found_possibly_unwanted) {
cli_virus_found_cb(&ctx);
// Set the output pointer to the "latest" alert signature name.
*virname = cli_get_last_virus_str(&ctx);
if (0 < evidence_num_alerts(ctx.evidence)) {
verdict = CL_VIRUS;
}
if (!(ctx.options->general & CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE) &&
(0 == evidence_num_indicators_type(ctx.evidence, IndicatorType_Strong)) &&
(0 != evidence_num_indicators_type(ctx.evidence, IndicatorType_PotentiallyUnwanted))) {
// "Heuristic precedence" mode not enabled, and the only alerts so far have been PUA.
// But Heuristic-signatures / PUA sigs were recorded but never reported...
// ... Now is the time to report them!
// TODO: Report more than one if in ALLMATCH mode. For now, just reporting the "latest".
cli_virus_found_cb(&ctx, cli_get_last_virus(&ctx));
}
#if HAVE_JSON
@ -5612,13 +5626,10 @@ static cl_error_t scan_common(cl_fmap_t *map, const char *filepath, const char *
}
#endif // HAVE_JSON
if (status == CL_CLEAN) {
if ((ctx.found_possibly_unwanted) ||
((ctx.num_viruses != 0) &&
((ctx.options->general & CL_SCAN_GENERAL_ALLMATCHES) ||
(ctx.options->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX)))) {
status = CL_VIRUS;
}
if (verdict != CL_CLEAN) {
// Reporting "VIRUS" is more important than reporting and error,
// because... unfortunately we can only do one with the current API.
status = verdict;
}
cli_logg_unsetup();
@ -5655,6 +5666,10 @@ done:
free(ctx.options);
}
if (NULL != ctx.evidence) {
evidence_free(ctx.evidence);
}
return status;
}
@ -5730,28 +5745,6 @@ cl_error_t cl_scanmap_callback(cl_fmap_t *map, const char *filename, const char
return scan_common(map, filename, virname, scanned, engine, scanoptions, context);
}
cl_error_t cli_found_possibly_unwanted(cli_ctx *ctx)
{
if (cli_get_last_virus(ctx)) {
cli_dbgmsg("found Possibly Unwanted: %s\n", cli_get_last_virus(ctx));
if (SCAN_HEURISTIC_PRECEDENCE) {
/* we found a heuristic match, don't scan further,
* but consider it a virus. */
cli_dbgmsg("cli_found_possibly_unwanted: CL_VIRUS\n");
return CL_VIRUS;
}
/* heuristic scan isn't taking precedence, keep scanning.
* If this is part of an archive, and
* we find a real malware we report that instead of the
* heuristic match */
ctx->found_possibly_unwanted = 1;
} else {
cli_warnmsg("cli_found_possibly_unwanted called, but virname is not set\n");
}
emax_reached(ctx);
return CL_CLEAN;
}
cl_error_t cli_magic_scan_file(const char *filename, cli_ctx *ctx, const char *original_name, uint32_t attributes)
{
int fd = -1;

@ -102,8 +102,6 @@ cl_error_t cli_magic_scan_nested_fmap_type(cl_fmap_t *map, size_t offset, size_t
cl_error_t cli_magic_scan_buff(const void *buffer, size_t length, cli_ctx *ctx,
const char *name, uint32_t attributes);
cl_error_t cli_found_possibly_unwanted(cli_ctx *ctx);
/**
* @brief Internal-use version of cl_scanfile.
*

@ -78,7 +78,7 @@ cl_error_t cli_parsetiff(cli_ctx *ctx)
/* acquire offset of first IFD */
if (fmap_readn(map, &offset, offset, 4) != 4) {
cli_dbgmsg("cli_parsetiff: Failed to acquire offset of first IFD, file appears to be truncated.\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.TIFF.EOFReadingFirstIFDOffset");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.TIFF.EOFReadingFirstIFDOffset");
status = CL_EPARSE;
goto done;
}
@ -89,7 +89,7 @@ cl_error_t cli_parsetiff(cli_ctx *ctx)
if (!offset) {
cli_errmsg("cli_parsetiff: Invalid offset for first IFD\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.TIFF.InvalidIFDOffset");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.TIFF.InvalidIFDOffset");
status = CL_EPARSE;
goto done;
}
@ -99,7 +99,7 @@ cl_error_t cli_parsetiff(cli_ctx *ctx)
/* acquire number of directory entries in current IFD */
if (fmap_readn(map, &num_entries, offset, 2) != 2) {
cli_dbgmsg("cli_parsetiff: Failed to acquire number of directory entries in current IFD, file appears to be truncated.\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.TIFF.EOFReadingNumIFDDirectoryEntries");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.TIFF.EOFReadingNumIFDDirectoryEntries");
status = CL_EPARSE;
goto done;
}
@ -112,7 +112,7 @@ cl_error_t cli_parsetiff(cli_ctx *ctx)
for (i = 0; i < num_entries; i++) {
if (fmap_readn(map, &entry, offset, sizeof(entry)) != sizeof(entry)) {
cli_dbgmsg("cli_parsetiff: Failed to read next IFD entry, file appears to be truncated.\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.TIFF.EOFReadingIFDEntry");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.TIFF.EOFReadingIFDEntry");
status = CL_EPARSE;
goto done;
}
@ -188,7 +188,7 @@ cl_error_t cli_parsetiff(cli_ctx *ctx)
/* acquire next IFD location, gets 0 if last IFD */
if (fmap_readn(map, &offset, offset, sizeof(offset)) != sizeof(offset)) {
cli_dbgmsg("cli_parsetiff: Failed to aquire next IFD location, file appears to be truncated.\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.TIFF.EOFReadingChunkCRC");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.TIFF.EOFReadingChunkCRC");
status = CL_EPARSE;
goto done;
}
@ -198,7 +198,7 @@ cl_error_t cli_parsetiff(cli_ctx *ctx)
/*If the offsets are not in order, that is suspicious.*/
if (last_offset >= offset) {
cli_dbgmsg("cli_parsetiff: Next offset is before current offset, file appears to be malformed.\n");
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.TIFF.OutOfOrderIFDOffset");
cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.TIFF.OutOfOrderIFDOffset");
status = CL_EPARSE;
goto done;
}
@ -211,7 +211,7 @@ cl_error_t cli_parsetiff(cli_ctx *ctx)
done:
if (status == CL_EPARSE) {
/* We added with cli_append_possibly_unwanted so it will alert at the end if nothing else matches. */
/* We added with cli_append_potentially_unwanted so it will alert at the end if nothing else matches. */
status = CL_CLEAN;
}

@ -28,6 +28,14 @@ include = [
"frs_error::ffierror_fmt",
"frs_error::ffierror_free",
"logging::clrs_eprint",
"evidence::evidence_new",
"evidence::evidence_free",
"evidence::evidence_render_verdict",
"evidence::evidence_get_last_alert",
"evidence::evidence_num_alerts",
"evidence::evidence_num_indicators_type",
"evidence::evidence_add_indicator",
"evidence::IndicatorType",
]
# prefix = "CAPI_"
@ -48,3 +56,6 @@ all_features = false
crates = []
default_features = true
features = []
[enum]
prefix_with_name = true

@ -0,0 +1,238 @@
/*
* Functions and structures for recording, reporting evidence towards a scan verdict.
*
* Copyright (C) 2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
*
* Authors: Micah Snyder
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
use std::{collections::HashMap, ffi::CStr, mem::ManuallyDrop, os::raw::c_char};
use log::{debug, error, warn};
use thiserror::Error;
use crate::{ffi_util::FFIError, rrf_call, sys, validate_str_param};
/// CdiffError enumerates all possible errors returned by this library.
#[derive(Error, Debug)]
pub enum EvidenceError {
#[error("Invalid format")]
Format,
#[error("Invalid parameter: {0}")]
InvalidParameter(String),
#[error("{0} parmeter is NULL")]
NullParam(&'static str),
}
#[repr(C)]
pub enum IndicatorType {
/// For hash-based indicators.
Strong,
/// For potentially unwanted applications/programs that are not malicious but may be used maliciously.
PotentiallyUnwanted,
#[cfg(feature = "not_ready")]
/// Weak indicators that together with other indicators can be used to form a stronger indicator.
/// This type of indicator should NEVER alert the user on its own.
Weak,
}
#[derive(Debug, Default, Clone)]
pub struct Evidence {
strong: HashMap<String, Vec<IndicatorMeta>>,
pua: HashMap<String, Vec<IndicatorMeta>>,
#[cfg(feature = "not_ready")]
weak: HashMap<String, Vec<IndicatorMeta>>,
}
#[derive(Debug, Clone)]
pub struct IndicatorMeta {
/// The original string pointer for the "virname", to pass back.
static_virname: *const c_char,
}
/// Initialize a match vector
#[no_mangle]
pub extern "C" fn evidence_new() -> sys::evidence_t {
Box::into_raw(Box::new(Evidence::default())) as sys::evidence_t
}
/// Free the evidence
#[no_mangle]
pub extern "C" fn evidence_free(evidence: sys::evidence_t) {
if evidence.is_null() {
warn!("Attempted to free a NULL evidence pointer. Please report this at: https://github.com/Cisco-Talos/clamav/issues");
} else {
let _ = unsafe { Box::from_raw(evidence as *mut Evidence) };
}
}
/// C interface for Evidence::render_verdict().
/// Handles all the unsafe ffi stuff.
///
/// Render a verdict based on the evidence, depending on the severity of the
/// indicators found and the scan configuration.
///
/// The individual alerting-indicators would have already been printed at this point.
///
/// # Safety
///
/// No parameters may be NULL
#[export_name = "evidence_render_verdict"]
pub unsafe extern "C" fn _evidence_render_verdict(evidence: sys::evidence_t) -> bool {
let evidence = ManuallyDrop::new(Box::from_raw(evidence as *mut Evidence));
evidence.render_verdict()
}
/// C interface to get a string name for one of the alerts.
/// Will first check for one from the strong indicators, then pua.
///
/// # Safety
///
/// Returns a string that is either static, or allocated when reading the database.
/// So the lifetime of the string is good at least until you reload or unload the databases.
///
/// No parameters may be NULL
#[export_name = "evidence_get_last_alert"]
pub unsafe extern "C" fn _evidence_get_last_alert(evidence: sys::evidence_t) -> *const c_char {
let evidence = ManuallyDrop::new(Box::from_raw(evidence as *mut Evidence));
if let Some(meta) = evidence.strong.values().last() {
meta.last().unwrap().static_virname as *const c_char
} else if let Some(meta) = evidence.pua.values().last() {
meta.last().unwrap().static_virname as *const c_char
} else {
// no alerts, return NULL
std::ptr::null()
}
}
/// C interface to check number of alerting indicators in evidence.
///
/// # Safety
///
/// No parameters may be NULL
#[export_name = "evidence_num_alerts"]
pub unsafe extern "C" fn _evidence_num_alerts(evidence: sys::evidence_t) -> usize {
let evidence = ManuallyDrop::new(Box::from_raw(evidence as *mut Evidence));
evidence.strong.len() + evidence.pua.len()
}
/// C interface to check number of indicators in evidence.
/// Handles all the unsafe ffi stuff.
///
/// # Safety
///
/// No parameters may be NULL
#[export_name = "evidence_num_indicators_type"]
pub unsafe extern "C" fn _evidence_num_indicators_type(
evidence: sys::evidence_t,
indicator_type: IndicatorType,
) -> usize {
let evidence = ManuallyDrop::new(Box::from_raw(evidence as *mut Evidence));
match indicator_type {
IndicatorType::Strong => evidence.strong.len(),
IndicatorType::PotentiallyUnwanted => evidence.pua.len(),
#[cfg(feature = "not_ready")]
// TODO: Implement a way to record, report number of indicators in the tree (you know, after making this a tree).
IndicatorType::Weak => evidence.weak.len(),
}
}
/// C interface for Evidence::add_indicator().
/// Handles all the unsafe ffi stuff.
///
/// Add an indicator to the evidence.
///
/// # Safety
///
/// `hexsig` and `err` must not be NULL
#[export_name = "evidence_add_indicator"]
pub unsafe extern "C" fn _evidence_add_indicator(
evidence: sys::evidence_t,
name: *const c_char,
indicator_type: IndicatorType,
err: *mut *mut FFIError,
) -> bool {
let name_str = validate_str_param!(name);
let mut evidence = ManuallyDrop::new(Box::from_raw(evidence as *mut Evidence));
rrf_call!(
err = err,
evidence.add_indicator(name_str, name, indicator_type)
)
}
impl Evidence {
/// Check if we have any indicators that should alert the user.
pub fn render_verdict(&self) -> bool {
debug!("Checking verdict...");
let num_alerting_indicators = self.strong.len() + self.pua.len();
if num_alerting_indicators > 0 {
debug!("Found {} alerting indicators", num_alerting_indicators);
return true;
}
false
}
/// Add an indicator to the evidence.
pub fn add_indicator(
&mut self,
name: &str,
static_virname: *const c_char,
indicator_type: IndicatorType,
) -> Result<(), EvidenceError> {
let meta: IndicatorMeta = IndicatorMeta { static_virname };
match indicator_type {
IndicatorType::Strong => {
self.strong
.entry(name.to_string())
.or_insert_with(Vec::new)
.push(meta);
}
IndicatorType::PotentiallyUnwanted => {
self.pua
.entry(name.to_string())
.or_insert_with(Vec::new)
.push(meta);
}
#[cfg(feature = "not_ready")]
// TODO: Implement a tree structure for recording weak indicators, to
// match the archive/extraction level at which each was found.
// This will be required for alerting signatures to depend on weak-indicators for embedded content.
IndicatorType::Weak => {
self.weak
.entry(name.to_string())
.or_insert_with(Vec::new)
.push(meta);
}
}
Ok(())
}
}

@ -132,7 +132,7 @@ pub extern "C" fn fuzzy_hash_free_hashmap(fuzzy_hashmap: sys::fuzzyhashmap_t) {
}
}
/// C interface for fuzzy_hash_check().
/// C interface for FuzzyHashMap::check().
/// Handles all the unsafe ffi stuff.
///
/// # Safety
@ -162,7 +162,7 @@ pub unsafe extern "C" fn _fuzzy_hash_check(
true
}
/// C interface for fuzzy_hash_load_subsignature().
/// C interface for FuzzyHashMap::load_subsignature().
/// Handles all the unsafe ffi stuff.
///
/// # Safety

@ -24,6 +24,7 @@
pub mod sys;
pub mod cdiff;
pub mod evidence;
pub mod ffi_util;
pub mod fuzzy_hash;
pub mod logging;

@ -435,7 +435,7 @@ pub struct cl_fmap {
pub pgsz: u64,
pub paged: u64,
pub aging: u16,
pub dont_cache_flag: u16,
pub dont_cache_flag: bool,
#[doc = " indicates if we should not cache scan results for this fmap. Used if limits exceeded"]
pub handle_is_fd: u16,
#[doc = " non-zero if map->handle is an fd."]
@ -782,24 +782,20 @@ pub struct recursion_level_tag {
pub calculated_image_fuzzy_hash: bool,
}
pub type recursion_level_t = recursion_level_tag;
pub type evidence_t = *mut ::std::os::raw::c_void;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cli_ctx_tag {
#[doc = "< (optional) The filepath of the original scan target."]
pub target_filepath: *mut ::std::os::raw::c_char,
#[doc = "< (optional) The filepath of the current file being parsed. May be a temp file."]
pub sub_filepath: *const ::std::os::raw::c_char,
#[doc = "< The directory to store tmp files at this recursion depth."]
pub sub_tmpdir: *mut ::std::os::raw::c_char,
pub virname: *mut *const ::std::os::raw::c_char,
pub num_viruses: ::std::os::raw::c_uint,
pub evidence: evidence_t,
pub scanned: *mut ::std::os::raw::c_ulong,
pub root: *const cli_matcher,
pub engine: *const cl_engine,
pub scansize: u64,
pub options: *mut cl_scan_options,
pub scannedfiles: ::std::os::raw::c_uint,
pub found_possibly_unwanted: ::std::os::raw::c_uint,
pub corrupted_input: ::std::os::raw::c_uint,
pub recursion_stack: *mut recursion_level_t,
pub recursion_stack_size: u32,

@ -54,12 +54,12 @@
#include "others.h"
#include "pe.h"
#include "entconv.h"
#include "clamav_rust.h"
// common
#include "output.h"
#include "optparser.h"
#include "misc.h"
#include "clamav_rust.h"
#include "tar.h"
#include "vba.h"
@ -207,6 +207,9 @@ static int hashpe(const char *filename, unsigned int class, int type)
/* prepare context */
ctx.engine = engine;
ctx.evidence = evidence_new();
ctx.options = &options;
ctx.options->parse = ~0;
ctx.dconf = (struct cli_dconf *)engine->dconf;
@ -273,6 +276,9 @@ done:
if (NULL != ctx.recursion_stack) {
free(ctx.recursion_stack);
}
if (NULL != ctx.evidence) {
evidence_free(ctx.evidence);
}
if (NULL != engine) {
cl_engine_free(engine);
}
@ -358,7 +364,7 @@ static int fuzzy_img_file(char *filename)
bytes_read = read(target_fd, mem, (size_t)st.st_size);
if (bytes_read == -1) {
char err[128];
mprintf(LOGG_ERROR, "%s: Failed to read file.\n", basename(filename), cli_strerror(errno, err, sizeof(err)));
mprintf(LOGG_ERROR, "%s: Failed to read file: %s\n", basename(filename), cli_strerror(errno, err, sizeof(err)));
goto done;
}
if (bytes_read < (ssize_t)st.st_size) {
@ -2101,6 +2107,9 @@ static void matchsig(char *sig, const char *offset, int fd)
}
ctx.engine = engine;
ctx.evidence = evidence_new();
ctx.options = &options;
ctx.options->parse = ~0;
ctx.dconf = (struct cli_dconf *)engine->dconf;
@ -2152,6 +2161,9 @@ done:
if (NULL != ctx.recursion_stack) {
free(ctx.recursion_stack);
}
if (NULL != ctx.evidence) {
evidence_free(ctx.evidence);
}
if (NULL != engine) {
cl_engine_free(engine);
}
@ -3316,6 +3328,9 @@ static int dumpcerts(const struct optstruct *opts)
/* prepare context */
ctx.engine = engine;
ctx.evidence = evidence_new();
ctx.options = &options;
ctx.options->parse = ~0;
ctx.dconf = (struct cli_dconf *)engine->dconf;
@ -3365,6 +3380,9 @@ done:
if (NULL != ctx.recursion_stack) {
free(ctx.recursion_stack);
}
if (NULL != ctx.evidence) {
evidence_free(ctx.evidence);
}
if (NULL != engine) {
cl_engine_free(engine);
}

@ -38,6 +38,7 @@
#include "vba_extract.h"
#include "ole2_extract.h"
#include "readdb.h"
#include "clamav_rust.h"
// common
#include "output.h"
@ -102,6 +103,8 @@ cli_ctx *convenience_ctx(int fd)
ctx->engine = (const struct cl_engine *)engine;
ctx->evidence = evidence_new();
ctx->dconf = (struct cli_dconf *)engine->dconf;
ctx->recursion_stack_size = ctx->engine->max_recursion_level;
@ -182,6 +185,10 @@ void destroy_ctx(cli_ctx *ctx)
ctx->options = NULL;
}
if (NULL != ctx->evidence) {
evidence_free(ctx->evidence);
}
free(ctx);
}
}

@ -40,6 +40,7 @@
#include "dconf.h"
#include "bytecode_priv.h"
#include "pe.h"
#include "clamav_rust.h"
#include "checks.h"
@ -72,12 +73,13 @@ static void runtest(const char *file, uint64_t expected, int fail, int nojit,
cctx.options = &options;
cctx.options->general |= CL_SCAN_GENERAL_ALLMATCHES;
cctx.virname = &virname;
cctx.engine = engine = cl_engine_new();
ck_assert_msg(!!cctx.engine, "cannot create engine");
rc = cl_engine_compile(engine);
ck_assert_msg(!rc, "cannot compile engine");
cctx.evidence = evidence_new();
cctx.dconf = cctx.engine->dconf;
cctx.recursion_stack_size = cctx.engine->max_recursion_level;
@ -160,6 +162,7 @@ static void runtest(const char *file, uint64_t expected, int fail, int nojit,
cli_bytecode_done(&bcs);
free(cctx.recursion_stack);
cl_engine_free(engine);
evidence_free(cctx.evidence);
if (fdin >= 0)
close(fdin);
}

@ -35,6 +35,7 @@
#include "matcher-pcre.h"
#include "others.h"
#include "default.h"
#include "clamav_rust.h"
#include "checks.h"
@ -169,8 +170,9 @@ static void setup(void)
memset(&options, 0, sizeof(struct cl_scan_options));
ctx.options = &options;
ctx.virname = &virname;
ctx.engine = cl_engine_new();
ctx.evidence = evidence_new();
ctx.engine = cl_engine_new();
ck_assert_msg(!!ctx.engine, "cl_engine_new() failed");
ctx.dconf = ctx.engine->dconf;
@ -197,6 +199,7 @@ static void teardown(void)
{
cl_engine_free((struct cl_engine *)ctx.engine);
free(ctx.recursion_stack);
evidence_free(ctx.evidence);
}
START_TEST(test_ac_scanbuff)
@ -277,10 +280,17 @@ START_TEST(test_ac_scanbuff_allscan)
ck_assert_msg(!strncmp(virname, ac_testdata[i].virname, strlen(ac_testdata[i].virname)), "Dataset %u matched with %s", i, virname);
ret = cli_scan_buff((const unsigned char *)ac_testdata[i].data, strlen(ac_testdata[i].data), 0, &ctx, 0, NULL);
ck_assert_msg(ret == CL_VIRUS, "cli_scan_buff() failed for %s", ac_testdata[i].virname);
ck_assert_msg(ret == CL_SUCCESS, "cli_scan_buff() failed for %s", ac_testdata[i].virname);
// phishingScan() doesn't check the number of alerts. When using CL_SCAN_GENERAL_ALLMATCHES
// or if using `CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE` and `cli_append_potentially_unwanted()`
// we need to count the number of alerts manually to determine the verdict.
ck_assert_msg(0 < evidence_num_alerts(ctx.evidence), "cli_scan_buff() failed for %s", ac_testdata[i].virname);
ck_assert_msg(!strncmp(virname, ac_testdata[i].virname, strlen(ac_testdata[i].virname)), "Dataset %u matched with %s", i, virname);
if (ctx.num_viruses)
ctx.num_viruses = 0;
if (evidence_num_alerts(ctx.evidence) > 0) {
evidence_free(ctx.evidence);
ctx.evidence = evidence_new();
}
}
cli_ac_freedata(&mdata);
@ -360,15 +370,28 @@ START_TEST(test_ac_scanbuff_allscan_ex)
ctx.options->general |= CL_SCAN_GENERAL_ALLMATCHES; /* enable all-match */
for (i = 0; ac_sigopts_testdata[i].data; i++) {
cl_error_t verdict = CL_CLEAN;
ret = cli_ac_scanbuff((const unsigned char *)ac_sigopts_testdata[i].data, ac_sigopts_testdata[i].dlength, &virname, NULL, NULL, root, &mdata, 0, 0, NULL, AC_SCAN_VIR, NULL);
ck_assert_msg(ret == ac_sigopts_testdata[i].expected_result, "[ac_ex] cli_ac_scanbuff() failed for %s (%d != %d)", ac_sigopts_testdata[i].virname, ret, ac_sigopts_testdata[i].expected_result);
if (ac_sigopts_testdata[i].expected_result == CL_VIRUS)
ck_assert_msg(!strncmp(virname, ac_sigopts_testdata[i].virname, strlen(ac_sigopts_testdata[i].virname)), "[ac_ex] Dataset %u matched with %s", i, virname);
ret = cli_scan_buff((const unsigned char *)ac_sigopts_testdata[i].data, ac_sigopts_testdata[i].dlength, 0, &ctx, 0, NULL);
ck_assert_msg(ret == ac_sigopts_testdata[i].expected_result, "[ac_ex] cli_ac_scanbuff() failed for %s (%d != %d)", ac_sigopts_testdata[i].virname, ret, ac_sigopts_testdata[i].expected_result);
if (ctx.num_viruses)
ctx.num_viruses = 0;
ck_assert_msg(ret == CL_SUCCESS, "[ac_ex] cli_ac_scanbuff() failed for %s (%d != %d)", ac_sigopts_testdata[i].virname, ret, ac_sigopts_testdata[i].expected_result);
// phishingScan() doesn't check the number of alerts. When using CL_SCAN_GENERAL_ALLMATCHES
// or if using `CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE` and `cli_append_potentially_unwanted()`
// we need to count the number of alerts manually to determine the verdict.
if (0 < evidence_num_alerts(ctx.evidence)) {
verdict = CL_VIRUS;
}
ck_assert_msg(verdict == ac_sigopts_testdata[i].expected_result, "[ac_ex] cli_ac_scanbuff() failed for %s (%d != %d)", ac_sigopts_testdata[i].virname, verdict, ac_sigopts_testdata[i].expected_result);
if (evidence_num_alerts(ctx.evidence) > 0) {
evidence_free(ctx.evidence);
ctx.evidence = evidence_new();
}
}
cli_ac_freedata(&mdata);
@ -530,16 +553,28 @@ START_TEST(test_pcre_scanbuff_allscan)
ctx.options->general |= CL_SCAN_GENERAL_ALLMATCHES; /* enable all-match */
for (i = 0; pcre_testdata[i].data; i++) {
cl_error_t verdict = CL_CLEAN;
ret = cli_pcre_scanbuf((const unsigned char *)pcre_testdata[i].data, strlen(pcre_testdata[i].data), &virname, NULL, root, NULL, NULL, NULL);
ck_assert_msg(ret == pcre_testdata[i].expected_result, "[pcre] cli_pcre_scanbuff() failed for %s (%d != %d)", pcre_testdata[i].virname, ret, pcre_testdata[i].expected_result);
// we cannot check if the virname matches because we didn't load a whole logical signature, and virnames are stored in the lsig structure, now.
ret = cli_scan_buff((const unsigned char *)pcre_testdata[i].data, strlen(pcre_testdata[i].data), 0, &ctx, 0, NULL);
ck_assert_msg(ret == pcre_testdata[i].expected_result, "[pcre] cli_scan_buff() failed for %s", pcre_testdata[i].virname);
// cli_scan_buff() doesn't check the number of alerts. When using CL_SCAN_GENERAL_ALLMATCHES
// or if using `CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE` and `cli_append_potentially_unwanted()`
// we need to count the number of alerts manually to determine the verdict.
if (0 < evidence_num_alerts(ctx.evidence)) {
verdict = CL_VIRUS;
}
ck_assert_msg(verdict == pcre_testdata[i].expected_result, "[pcre] cli_scan_buff() failed for %s", pcre_testdata[i].virname);
/* num_virus field add to test case struct */
if (ctx.num_viruses)
ctx.num_viruses = 0;
if (evidence_num_alerts(ctx.evidence) > 0) {
evidence_free(ctx.evidence);
ctx.evidence = evidence_new();
}
}
cli_ac_freedata(&mdata);

@ -43,6 +43,8 @@
#include "phish_domaincheck_db.h"
#include "phish_allow_list.h"
#include "clamav_rust.h"
#include "checks.h"
static size_t cb_called = 0;
@ -405,8 +407,8 @@ static void do_phishing_test(const struct rtest *rtest)
hrefs.tag[0] = (unsigned char *)cli_strdup("href");
hrefs.contents[0] = (unsigned char *)cli_strdup(rtest->displayurl);
ctx.engine = engine;
ctx.virname = &virname;
ctx.engine = engine;
ctx.evidence = evidence_new();
rc = phishingScan(&ctx, &hrefs);
@ -414,30 +416,34 @@ static void do_phishing_test(const struct rtest *rtest)
ck_assert_msg(rc == CL_CLEAN, "phishingScan");
switch (rtest->result) {
case RTR_PHISH:
ck_assert_msg(ctx.found_possibly_unwanted,
ck_assert_msg(evidence_num_indicators_type(ctx.evidence, IndicatorType_PotentiallyUnwanted),
"this should be phishing, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
break;
case RTR_ALLOWED:
ck_assert_msg(!ctx.found_possibly_unwanted,
ck_assert_msg(!evidence_num_indicators_type(ctx.evidence, IndicatorType_PotentiallyUnwanted),
"this should be allowed, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
break;
case RTR_CLEAN:
ck_assert_msg(!ctx.found_possibly_unwanted,
ck_assert_msg(!evidence_num_indicators_type(ctx.evidence, IndicatorType_PotentiallyUnwanted),
"this should be clean, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
break;
case RTR_BLOCKED:
if (!loaded_2)
ck_assert_msg(!ctx.found_possibly_unwanted,
ck_assert_msg(!evidence_num_indicators_type(ctx.evidence, IndicatorType_PotentiallyUnwanted),
"this should be clean, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
else {
ck_assert_msg(ctx.found_possibly_unwanted,
const char *viruname = NULL;
ck_assert_msg(evidence_num_indicators_type(ctx.evidence, IndicatorType_PotentiallyUnwanted),
"this should be blocked, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
if (*ctx.virname) {
virname = cli_get_last_virus_str(&ctx);
if (NULL != virname) {
char *phishingFound = NULL;
char *detectionName = NULL;
if (strstr(rtest->realurl, "malware-test")) {
@ -447,8 +453,8 @@ static void do_phishing_test(const struct rtest *rtest)
detectionName = "Heuristics.Safebrowsing.Suspected-phishing_safebrowsing.clamav.net";
}
ck_assert_msg(detectionName != NULL, "\n\t Block list test case error - malware-test or phishing-test not found in: %s\n", rtest->realurl);
phishingFound = strstr((const char *)*ctx.virname, detectionName);
ck_assert_msg(phishingFound != NULL, "\n\t should be: %s,\n\t but is: %s\n", detectionName, *ctx.virname);
phishingFound = strstr((const char *)virname, detectionName);
ck_assert_msg(phishingFound != NULL, "\n\t should be: %s,\n\t but is: %s\n", detectionName, virname);
}
}
break;
@ -456,6 +462,8 @@ static void do_phishing_test(const struct rtest *rtest)
/* don't worry about it, this was tested in regex_list_match_test() */
break;
}
evidence_free(ctx.evidence);
}
static void do_phishing_test_allscan(const struct rtest *rtest)
@ -465,6 +473,7 @@ static void do_phishing_test_allscan(const struct rtest *rtest)
const char *virname = NULL;
tag_arguments_t hrefs;
cl_error_t rc;
cl_error_t verdict = CL_CLEAN;
struct cl_scan_options options;
memset(&ctx, 0, sizeof(ctx));
@ -485,50 +494,63 @@ static void do_phishing_test_allscan(const struct rtest *rtest)
hrefs.tag[0] = (unsigned char *)cli_strdup("href");
hrefs.contents[0] = (unsigned char *)cli_strdup(rtest->displayurl);
ctx.engine = engine;
ctx.virname = &virname;
ctx.options->general |= CL_SCAN_GENERAL_ALLMATCHES;
ctx.engine = engine;
ctx.evidence = evidence_new();
rc = phishingScan(&ctx, &hrefs);
ck_assert_msg(rc == CL_SUCCESS || rc == CL_VIRUS, "phishingScan failed with error code: %s (%u)",
cl_strerror(rc),
rc);
// phishingScan() doesn't check the number of alerts. When using CL_SCAN_GENERAL_ALLMATCHES
// or if using `CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE` and `cli_append_potentially_unwanted()`
// we need to count the number of alerts manually to determine the verdict.
if (0 < evidence_num_alerts(ctx.evidence)) {
verdict = CL_VIRUS;
}
html_tag_arg_free(&hrefs);
if (rtest->result == RTR_PHISH || (loaded_2 != 0 && rtest->result == RTR_BLOCKED)) {
ck_assert_msg(rc == CL_VIRUS, "phishingScan returned \"%s\", expected \"%s\". \n\trealURL: %s \n\tdisplayURL: %s",
cl_strerror(rc),
ck_assert_msg(verdict == CL_VIRUS, "phishingScan returned \"%s\", expected \"%s\". \n\trealURL: %s \n\tdisplayURL: %s",
cl_strerror(verdict),
cl_strerror(CL_VIRUS),
rtest->realurl, rtest->displayurl);
} else {
ck_assert_msg(rc == CL_CLEAN, "phishingScan returned \"%s\", expected \"%s\". \n\trealURL: %s \n\tdisplayURL: %s",
cl_strerror(rc),
ck_assert_msg(verdict == CL_CLEAN, "phishingScan returned \"%s\", expected \"%s\". \n\trealURL: %s \n\tdisplayURL: %s",
cl_strerror(verdict),
cl_strerror(CL_CLEAN),
rtest->realurl, rtest->displayurl);
}
switch (rtest->result) {
case RTR_PHISH:
ck_assert_msg(ctx.num_viruses,
ck_assert_msg(evidence_num_alerts(ctx.evidence),
"this should be phishing, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
break;
case RTR_ALLOWED:
ck_assert_msg(!ctx.num_viruses,
ck_assert_msg(!evidence_num_alerts(ctx.evidence),
"this should be allowed, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
break;
case RTR_CLEAN:
ck_assert_msg(!ctx.num_viruses,
ck_assert_msg(!evidence_num_alerts(ctx.evidence),
"this should be clean, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
break;
case RTR_BLOCKED:
if (!loaded_2)
ck_assert_msg(!ctx.num_viruses,
if (!loaded_2) {
ck_assert_msg(!evidence_num_alerts(ctx.evidence),
"this should be clean, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
else {
ck_assert_msg(ctx.num_viruses,
} else {
const char *viruname = NULL;
ck_assert_msg(evidence_num_alerts(ctx.evidence),
"this should be blocked, realURL: %s, displayURL: %s",
rtest->realurl, rtest->displayurl);
if (*ctx.virname) {
virname = cli_get_last_virus_str(&ctx);
if (NULL != virname) {
char *phishingFound = NULL;
char *detectionName = NULL;
if (strstr(rtest->realurl, "malware-test")) {
@ -538,8 +560,8 @@ static void do_phishing_test_allscan(const struct rtest *rtest)
detectionName = "Heuristics.Safebrowsing.Suspected-phishing_safebrowsing.clamav.net";
}
ck_assert_msg(detectionName != NULL, "\n\t Block list test case error - malware-test or phishing-test not found in: %s\n", rtest->realurl);
phishingFound = strstr((const char *)*ctx.virname, detectionName);
ck_assert_msg(phishingFound != NULL, "\n\t should be: %s,\n\t but is: %s\n", detectionName, *ctx.virname);
phishingFound = strstr(virname, detectionName);
ck_assert_msg(phishingFound != NULL, "\n\t should be: %s,\n\t but is: %s\n", detectionName, virname);
}
}
break;
@ -547,6 +569,10 @@ static void do_phishing_test_allscan(const struct rtest *rtest)
/* don't worry about it, this was tested in regex_list_match_test() */
break;
}
html_tag_arg_free(&hrefs);
evidence_free(ctx.evidence);
}
START_TEST(phishingScan_test)

Loading…
Cancel
Save