Add CL_TYPE_AI_MODEL and associated file type magic signatures

This is just preliminary support for identifying an assortment of
different AI model files.

So far, this detects the following types:
- GGML GGUF (.gguf)
- ONNX AI (.onnx)
- TensorFlow Lite (.tflite)

Additional types to consider:
- SafeTensors (.safetensors)
- TensorFlow (.pb, .ckpt, .tfrecords)
- Keras (.keras)
- pickle (.pkl)
- numpy (.npy, .npz)
- coreml (.coreml)
- PyTorch (.pt, .pth, .bin, .mar, .pte, .pt2, .ptl)

Outside of being able to differentiate by file type, the scanner
will treat CL_TYPE_AI_MODEL the same as CL_TYPE_BINARY_DATA.
We're not adding parsers to further process these files, for now.
pull/1476/head
Val Snyder 2 months ago
parent 640413d9c5
commit 8a77214c82
No known key found for this signature in database
GPG Key ID: 3449E631914956D0
  1. 1
      libclamav/filetypes.c
  2. 1
      libclamav/filetypes.h
  3. 22
      libclamav/filetypes_int.h
  4. 1
      libclamav/scanners.c

@ -142,6 +142,7 @@ static const struct ftmap_s {
{ "CL_TYPE_ONENOTE", CL_TYPE_ONENOTE },
{ "CL_TYPE_PYTHON_COMPILED", CL_TYPE_PYTHON_COMPILED },
{ "CL_TYPE_LHA_LZH", CL_TYPE_LHA_LZH },
{ "CL_TYPE_AI_MODEL", CL_TYPE_AI_MODEL },
{ NULL, CL_TYPE_IGNORED }
};
// clang-format on

@ -95,6 +95,7 @@ typedef enum cli_file {
CL_TYPE_ONENOTE,
CL_TYPE_PYTHON_COMPILED,
CL_TYPE_LHA_LZH,
CL_TYPE_AI_MODEL,
/* Section for partition types */
CL_TYPE_PART_ANY, /* unknown partition type */

@ -302,5 +302,27 @@ static const char *ftypes_int[] = {
"1:2:2d6c7a(73|34|35)2d:LHA archive using .LZS extension:CL_TYPE_ANY:CL_TYPE_LHA_LZH:210",
"1:2:2d706d302d:LHA archive using PMarc (.PMA) extension:CL_TYPE_ANY:CL_TYPE_LHA_LZH:210",
"0:0:414c5a01:ALZ:CL_TYPE_ANY:CL_TYPE_ALZ:210",
// GGML GGUF models
"0:0:4747554601000000:GGUF AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
"0:0:4747554602000000:GGUF AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
"0:0:4747554603000000:GGUF AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
// ONNX AI model detection, looking for: onnx_tool or onnx-tool
"1:0:08??12??6f6e6e78(2d|5f)746f6f6c:ONNX AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
// ONNX AI model detection, looking for: tf2onnx
"1:0:08??12??7466326f6e6e78:ONNX AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
// ONNX AI model detection, looking for: pytorch
"1:0:08??12??7079746f726368:ONNX AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
// ONNX AI model detection, looking for: caffe:
"1:0:08??12??63616666653a:ONNX AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
// ONNX AI model detection, looking for: OnnxMLTools:
"1:0:08??12??4f6e6e784d4c546f6f6c73:ONNX AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
// ONNX AI model detection, looking for: CNTK
"1:0:08??12??434e544b:ONNX AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
// ONNX AI model detection, looking for: onnx-caffe2:
"1:0:08??12??6f6e6e782d636166666532:ONNX AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
// ONNX AI model detection, looking for: onnx-caffe2:
"1:0:08??12??6f6e6e782d636166666532:ONNX AI Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
// tflite model detection
"0:4:54464c33:TensorFlow Lite Model File:CL_TYPE_ANY:CL_TYPE_AI_MODEL:220",
NULL};
#endif

@ -5448,6 +5448,7 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
perf_nested_stop(ctx, PERFT_MACHO, PERFT_SCAN);
break;
case CL_TYPE_AI_MODEL:
case CL_TYPE_PYTHON_COMPILED:
case CL_TYPE_BINARY_DATA:
ret = cli_scan_fmap(ctx, CL_TYPE_OTHER, false, NULL, AC_SCAN_VIR, NULL, NULL);

Loading…
Cancel
Save