/* * Copyright (C) 2013-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2007-2013 Sourcefire, Inc. * * Authors: Trog * * Summary: Extract component parts of OLE2 files (e.g. MS Office Documents). * * Acknowledgements: Some ideas and algorithms were based upon OpenOffice and libgsf. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #if HAVE_CONFIG_H #include "clamav-config.h" #endif #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include "clamav.h" #include "others.h" #include "hwp.h" #include "ole2_extract.h" #include "xlm_extract.h" #include "scanners.h" #include "fmap.h" #include "json_api.h" #include "msdoc.h" #include "rijndael.h" #include "ole2_encryption.h" #ifdef DEBUG_OLE2_LIST #define ole2_listmsg(...) cli_dbgmsg(__VA_ARGS__) #else #define ole2_listmsg(...) ; #endif #define ole2_endian_convert_16(v) le16_to_host((uint16_t)(v)) #define ole2_endian_convert_32(v) le32_to_host((uint32_t)(v)) #define ole2_endian_convert_64(v) le64_to_host((uint64_t)(v)) #ifndef HAVE_ATTRIB_PACKED #define __attribute__(x) #endif #ifdef HAVE_PRAGMA_PACK #pragma pack(1) #endif #ifdef HAVE_PRAGMA_PACK_HPPA #pragma pack 1 #endif // https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/05060311-bfce-4b12-874d-71fd4ce63aea typedef struct ole2_header_tag { unsigned char magic[8]; /* should be: 0xd0cf11e0a1b11ae1 */ unsigned char clsid[16]; uint16_t minor_version __attribute__((packed)); uint16_t dll_version __attribute__((packed)); int16_t byte_order __attribute__((packed)); /* -2=intel */ uint16_t log2_big_block_size __attribute__((packed)); /* usually 9 (2^9 = 512) */ uint32_t log2_small_block_size __attribute__((packed)); /* usually 6 (2^6 = 64) */ int32_t reserved[2] __attribute__((packed)); int32_t bat_count __attribute__((packed)); int32_t prop_start __attribute__((packed)); uint32_t signature __attribute__((packed)); uint32_t sbat_cutoff __attribute__((packed)); /* cutoff for files held * in small blocks * (4096) */ int32_t sbat_start __attribute__((packed)); int32_t sbat_block_count __attribute__((packed)); int32_t xbat_start __attribute__((packed)); int32_t xbat_count __attribute__((packed)); int32_t bat_array[109] __attribute__((packed)); /* * The following is not part of the ole2 header, but stuff we need in * order to decode. * * IMPORTANT: These must take account of the size of variables below here * when calculating hdr_size to read the header. * * See the top of cli_ole2_extract(). */ int32_t sbat_root_start __attribute__((packed)); uint32_t max_block_no; size_t m_length; bitset_t *bitset; struct uniq *U; fmap_t *map; bool has_vba; bool has_xlm; bool has_image; hwp5_header_t *is_hwp; // This value MUST be last in this structure, // otherwise you will get short file reads. } ole2_header_t; /* * DirectoryEntry * * https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/60fe8611-66c3-496b-b70d-a504c94c9ace */ typedef struct property_tag { char name[64]; /* in unicode */ uint16_t name_size __attribute__((packed)); unsigned char type; /* 1=dir 2=file 5=root */ unsigned char color; /* black or red */ uint32_t prev __attribute__((packed)); uint32_t next __attribute__((packed)); uint32_t child __attribute__((packed)); unsigned char clsid[16]; uint32_t user_flags __attribute__((packed)); uint32_t create_lowdate __attribute__((packed)); uint32_t create_highdate __attribute__((packed)); uint32_t mod_lowdate __attribute__((packed)); uint32_t mod_highdate __attribute__((packed)); uint32_t start_block __attribute__((packed)); uint32_t size __attribute__((packed)); unsigned char reserved[4]; } property_t; /* * File Information Block Base. * Naming is consistent with * https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-doc/26fb6c06-4e5c-4778-ab4e-edbf26a545bb * */ typedef struct __attribute__((packed)) fib_base_type { uint16_t wIdent; uint16_t nFib; uint16_t unused; uint16_t lid; uint16_t pnNext; uint16_t ABCDEFGHIJKLM; uint16_t nFibBack; uint32_t lKey; uint8_t envr; uint8_t NOPQRS; uint16_t reserved3; uint16_t reserved4; uint32_t reserved5; uint32_t reserved6; } fib_base_t; struct ole2_list_node; typedef struct ole2_list_node { uint32_t Val; struct ole2_list_node *Next; } ole2_list_node_t; typedef struct ole2_list { uint32_t Size; ole2_list_node_t *Head; } ole2_list_t; int ole2_list_init(ole2_list_t *list); int ole2_list_is_empty(ole2_list_t *list); uint32_t ole2_list_size(ole2_list_t *list); int ole2_list_push(ole2_list_t *list, uint32_t val); uint32_t ole2_list_pop(ole2_list_t *list); int ole2_list_delete(ole2_list_t *list); int ole2_list_init(ole2_list_t *list) { list->Head = NULL; list->Size = 0; return CL_SUCCESS; } int ole2_list_is_empty(ole2_list_t *list) { return (list->Head == NULL); } uint32_t ole2_list_size(ole2_list_t *list) { return (list->Size); } int ole2_list_push(ole2_list_t *list, uint32_t val) { ole2_list_node_t *new_node = NULL; int status = CL_EMEM; CLI_MALLOC_OR_GOTO_DONE(new_node, sizeof(ole2_list_node_t), cli_dbgmsg("OLE2: could not allocate new node for worklist!\n")); new_node->Val = val; new_node->Next = list->Head; list->Head = new_node; (list->Size)++; status = CL_SUCCESS; done: return status; } uint32_t ole2_list_pop(ole2_list_t *list) { uint32_t val; ole2_list_node_t *next; if (ole2_list_is_empty(list)) { cli_dbgmsg("OLE2: work list is empty and ole2_list_pop() called!\n"); return -1; } val = list->Head->Val; next = list->Head->Next; free(list->Head); list->Head = next; (list->Size)--; return val; } int ole2_list_delete(ole2_list_t *list) { while (!ole2_list_is_empty(list)) ole2_list_pop(list); return CL_SUCCESS; } #ifdef HAVE_PRAGMA_PACK #pragma pack() #endif #ifdef HAVE_PRAGMA_PACK_HPPA #pragma pack #endif static unsigned char magic_id[] = {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1}; /** * @brief Get the property name, converting to lower case and replacing non-printable characters. * * @param name The property name * @param size The size of the property name * @return char* The new property name */ char *cli_ole2_get_property_name2(const char *name, int size) { int i, j; char *newname = NULL; if ((name[0] == 0 && name[1] == 0) || size <= 0 || size > 128) { return NULL; } // We may need to replace every character with '_XY_' or '_XYZ_' to form a printable name. // This is because the name may contain non-printable characters. // Allocate 5 times the size of the name to be safe, plus 1 for the NULL terminator. CLI_MAX_MALLOC_OR_GOTO_DONE(newname, size * 5 + 1, cli_errmsg("OLE2 [cli_ole2_get_property_name2]: Unable to allocate memory for newname: %u\n", size * 5)); j = 0; /* size-2 to ignore trailing NULL */ for (i = 0; i < size - 2; i += 2) { if ((!(name[i] & 0x80)) && (isprint(name[i])) && (name[i + 1] == 0)) { // Printable ASCII newname[j++] = tolower(name[i]); } else { // Non-printable. Expand to something unique and printable. if (name[i] < 10 && name[i] >= 0 && name[i + 1] == 0) { // Single digit (next byte is NULL) newname[j++] = '_'; newname[j++] = name[i] + '0'; } else { // Two digits (next byte is not NULL) const uint16_t x = (((uint16_t)name[i]) << 8) | name[i + 1]; newname[j++] = '_'; newname[j++] = 'a' + ((x & 0xF)); newname[j++] = 'a' + ((x >> 4) & 0xF); newname[j++] = 'a' + ((x >> 8) & 0xF); } newname[j++] = '_'; } } newname[j] = '\0'; if (strlen(newname) == 0) { free(newname); newname = NULL; } done: return newname; } static char * get_property_name(char *name, int size) { const char *carray = "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz._"; int csize = size >> 1; char *newname = NULL; char *cname = NULL; char *oname = name; if (csize <= 0) { return NULL; } CLI_MAX_MALLOC_OR_GOTO_DONE(newname, size, cli_errmsg("OLE2 [get_property_name]: Unable to allocate memory for newname %u\n", size)); cname = newname; while (--csize) { uint16_t lo, hi, u = cli_readint16(oname) - 0x3800; oname += 2; if (u > 0x1040) { CLI_FREE_AND_SET_NULL(newname); return cli_ole2_get_property_name2(name, size); } lo = u % 64; u >>= 6; hi = u % 64; *cname++ = carray[lo]; if (csize != 1 || u != 64) { *cname++ = carray[hi]; } } *cname = '\0'; done: return newname; } static void print_ole2_property(property_t *property) { char spam[128], *buf; if (property->name_size > 64) { cli_dbgmsg("[err name len: %d]\n", property->name_size); return; } buf = get_property_name(property->name, property->name_size); snprintf(spam, sizeof(spam), "OLE2: %s ", buf ? buf : ""); spam[sizeof(spam) - 1] = '\0'; if (buf) free(buf); switch (property->type) { case 2: strncat(spam, " [file] ", sizeof(spam) - 1 - strlen(spam)); break; case 1: strncat(spam, " [dir ] ", sizeof(spam) - 1 - strlen(spam)); break; case 5: strncat(spam, " [root] ", sizeof(spam) - 1 - strlen(spam)); break; default: strncat(spam, " [unkn] ", sizeof(spam) - 1 - strlen(spam)); } spam[sizeof(spam) - 1] = '\0'; switch (property->color) { case 0: strncat(spam, " r ", sizeof(spam) - 1 - strlen(spam)); break; case 1: strncat(spam, " b ", sizeof(spam) - 1 - strlen(spam)); break; default: strncat(spam, " u ", sizeof(spam) - 1 - strlen(spam)); } spam[sizeof(spam) - 1] = '\0'; cli_dbgmsg("%s size:0x%.8x flags:0x%.8x\n", spam, property->size, property->user_flags); } static void print_ole2_header(ole2_header_t *hdr) { if (!hdr || !cli_debug_flag) { return; } cli_dbgmsg("\n"); cli_dbgmsg("Magic:\t\t\t0x%x%x%x%x%x%x%x%x\n", hdr->magic[0], hdr->magic[1], hdr->magic[2], hdr->magic[3], hdr->magic[4], hdr->magic[5], hdr->magic[6], hdr->magic[7]); cli_dbgmsg("CLSID:\t\t\t{%x%x%x%x-%x%x-%x%x-%x%x-%x%x%x%x%x%x}\n", hdr->clsid[0], hdr->clsid[1], hdr->clsid[2], hdr->clsid[3], hdr->clsid[4], hdr->clsid[5], hdr->clsid[6], hdr->clsid[7], hdr->clsid[8], hdr->clsid[9], hdr->clsid[10], hdr->clsid[11], hdr->clsid[12], hdr->clsid[13], hdr->clsid[14], hdr->clsid[15]); cli_dbgmsg("Minor version:\t\t0x%x\n", hdr->minor_version); cli_dbgmsg("DLL version:\t\t0x%x\n", hdr->dll_version); cli_dbgmsg("Byte Order:\t\t%d\n", hdr->byte_order); cli_dbgmsg("Big Block Size:\t%i\n", hdr->log2_big_block_size); cli_dbgmsg("Small Block Size:\t%i\n", hdr->log2_small_block_size); cli_dbgmsg("BAT count:\t\t%d\n", hdr->bat_count); cli_dbgmsg("Prop start:\t\t%d\n", hdr->prop_start); cli_dbgmsg("SBAT cutoff:\t\t%d\n", hdr->sbat_cutoff); cli_dbgmsg("SBat start:\t\t%d\n", hdr->sbat_start); cli_dbgmsg("SBat block count:\t%d\n", hdr->sbat_block_count); cli_dbgmsg("XBat start:\t\t%d\n", hdr->xbat_start); cli_dbgmsg("XBat block count:\t%d\n", hdr->xbat_count); cli_dbgmsg("\n"); return; } static bool ole2_read_block(ole2_header_t *hdr, void *buff, size_t size, int32_t blockno) { size_t offset, offend; const void *pblock; if (blockno < 0) { return false; } /* other methods: (blockno+1) * 512 or (blockno * block_size) + 512; */ if (((uint64_t)blockno << hdr->log2_big_block_size) < (INT32_MAX - MAX(512, (uint64_t)1 << hdr->log2_big_block_size))) { /* 512 is header size */ offset = (blockno << hdr->log2_big_block_size) + MAX(512, 1 << hdr->log2_big_block_size); offend = offset + size; } else { offset = INT32_MAX - size; offend = INT32_MAX; } if ((offend == 0) || (offset >= hdr->m_length)) { return false; } else if (offend > hdr->m_length) { /* bb#11369 - ole2 files may not be a block multiple in size */ memset(buff, 0, size); size = hdr->m_length - offset; } if (!(pblock = fmap_need_off_once(hdr->map, offset, size))) { return false; } memcpy(buff, pblock, size); return true; } static int32_t ole2_get_next_bat_block(ole2_header_t *hdr, int32_t current_block) { int32_t bat_array_index; uint32_t bat[128]; if (current_block < 0) { return -1; } bat_array_index = current_block / 128; if (bat_array_index > hdr->bat_count) { cli_dbgmsg("bat_array index error\n"); return -10; } if (!ole2_read_block(hdr, &bat, 512, ole2_endian_convert_32(hdr->bat_array[bat_array_index]))) { return -1; } return ole2_endian_convert_32(bat[current_block - (bat_array_index * 128)]); } static int32_t ole2_get_next_xbat_block(ole2_header_t *hdr, int32_t current_block) { int32_t xbat_index, xbat_block_index, bat_index, bat_blockno; uint32_t xbat[128], bat[128]; if (current_block < 0) { return -1; } xbat_index = current_block / 128; /* * NB: The last entry in each XBAT points to the next XBAT block. * This reduces the number of entries in each block by 1. */ xbat_block_index = (xbat_index - 109) / 127; bat_blockno = (xbat_index - 109) % 127; bat_index = current_block % 128; if (!ole2_read_block(hdr, &xbat, 512, hdr->xbat_start)) { return -1; } /* Follow the chain of XBAT blocks */ while (xbat_block_index > 0) { if (!ole2_read_block(hdr, &xbat, 512, ole2_endian_convert_32(xbat[127]))) { return -1; } xbat_block_index--; } if (!ole2_read_block(hdr, &bat, 512, ole2_endian_convert_32(xbat[bat_blockno]))) { return -1; } return ole2_endian_convert_32(bat[bat_index]); } static int32_t ole2_get_next_block_number(ole2_header_t *hdr, int32_t current_block) { if (current_block < 0) { return -1; } if ((current_block / 128) > 108) { return ole2_get_next_xbat_block(hdr, current_block); } else { return ole2_get_next_bat_block(hdr, current_block); } } static int32_t ole2_get_next_sbat_block(ole2_header_t *hdr, int32_t current_block) { int32_t iter, current_bat_block; uint32_t sbat[128]; if (current_block < 0) { return -1; } current_bat_block = hdr->sbat_start; iter = current_block / 128; while (iter > 0) { current_bat_block = ole2_get_next_block_number(hdr, current_bat_block); iter--; } if (!ole2_read_block(hdr, &sbat, 512, current_bat_block)) { return -1; } return ole2_endian_convert_32(sbat[current_block % 128]); } /* Retrieve the block containing the data for the given sbat index */ static bool ole2_get_sbat_data_block(ole2_header_t *hdr, void *buff, int32_t sbat_index) { int32_t block_count, current_block; if (sbat_index < 0) { return false; } if (hdr->sbat_root_start < 0) { cli_dbgmsg("No root start block\n"); return false; } block_count = sbat_index / (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)); current_block = hdr->sbat_root_start; while (block_count > 0) { current_block = ole2_get_next_block_number(hdr, current_block); block_count--; } /* * current_block now contains the block number of the sbat array * containing the entry for the required small block */ return (ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)); } /** * @brief File handler for use when walking ole2 property trees. * * @param hdr The ole2 header metadata * @param prop The property * @param dir (optional) directory to write temp files to. * @param ctx The scan context * @param ole2_data (optional) Context needed by the handler * @return cl_error_t */ typedef cl_error_t ole2_walk_property_tree_file_handler(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx); static cl_error_t handler_writefile(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx); static cl_error_t handler_enum(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx); static cl_error_t handler_otf_encrypted(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx); static cl_error_t handler_otf(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx); /* * Compare strings ignoring case. * This is a somewhat special case, since name is actually a utf-16 encoded string, stored * in a char * with a known size of 64 bytes, so we can avoid a 'alloc since the size is * so small. See https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/60fe8611-66c3-496b-b70d-a504c94c9ace * * @param name: 'name' from property_t struct * @param name_size: 'name_size' from property_t struct * @param keyword: Known value we are looking for * * @return int: Return '0' if the values are equivalent, something else otherwise. */ static int ole2_cmp_name(const char *const name, uint32_t name_size, const char *const keyword) { char decoded[64]; uint32_t i = 0, j = 0; if (64 < name_size || name_size % 2) { return -1; } memset(decoded, 0, sizeof(decoded)); for (i = 0, j = 0; i < name_size; i += 2, j++) { decoded[j] = ((unsigned char)name[i + 1]) << 4; decoded[j] += name[i]; } return strcasecmp(decoded, keyword); } static void copy_fib_base(fib_base_t *pFib, const uint8_t *const ptr) { memcpy(pFib, ptr, sizeof(fib_base_t)); pFib->wIdent = ole2_endian_convert_16(pFib->wIdent); pFib->nFib = ole2_endian_convert_16(pFib->nFib); pFib->unused = ole2_endian_convert_16(pFib->unused); pFib->lid = ole2_endian_convert_16(pFib->lid); pFib->pnNext = ole2_endian_convert_16(pFib->pnNext); /*Don't know whether to do this or not.*/ pFib->ABCDEFGHIJKLM = ole2_endian_convert_16(pFib->ABCDEFGHIJKLM); pFib->nFibBack = ole2_endian_convert_16(pFib->nFibBack); pFib->nFibBack = ole2_endian_convert_32(pFib->lKey); pFib->reserved3 = ole2_endian_convert_16(pFib->reserved3); pFib->reserved4 = ole2_endian_convert_16(pFib->reserved4); pFib->reserved5 = ole2_endian_convert_32(pFib->reserved5); pFib->reserved6 = ole2_endian_convert_32(pFib->reserved6); } static inline bool is_encrypted(const fib_base_t *const pFib) { return pFib->ABCDEFGHIJKLM & (1 << 8); } #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-function" static void dump_fib_base(fib_base_t *pFib) { fprintf(stderr, "%s::%d::%x\n", __FUNCTION__, __LINE__, pFib->wIdent); } /* * This is currently unused, but I am leaving it in in case it can be useful in the future. See * https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-doc/79dea1e9-4dce-4fa0-8c6b-56ba37b68351 * * I have not looked into it in detail, but if it is a 1-byte xor, it could be possible to brute-force it in some cases. * */ static inline bool is_obfuscated(const fib_base_t *const pFib) { return pFib->ABCDEFGHIJKLM & (1 << 15); } #pragma GCC diagnostic pop typedef struct { bool velvet_sweatshop; bool encrypted; const char *encryption_type; } encryption_status_t; const char *const ENCRYPTED_JSON_KEY = "Encrypted"; const char *const RC4_ENCRYPTION = "RC4"; const char *const XOR_OBFUSCATION = "XORObfuscation"; const char *const AES128_ENCRYPTION = "AES128"; const char *const AES192_ENCRYPTION = "AES192"; const char *const AES256_ENCRYPTION = "AES256"; const char *const VELVET_SWEATSHOP_ENCRYPTION = "VelvetSweatshop"; const char *const GENERIC_ENCRYPTED = "ENCRYPTION_TYPE_UNKNOWN"; const char *const OLE2_HEURISTIC_ENCRYPTED_WARNING = "Heuristics.Encrypted.OLE2"; const uint16_t XLS_XOR_OBFUSCATION = 0; const uint16_t XLS_RC4_ENCRYPTION = 1; const uint32_t MINISTREAM_CUTOFF_SIZE = 0x1000; static size_t get_stream_data_offset(ole2_header_t *hdr, const property_t *word_block, uint16_t sector) { size_t offset = (1 << hdr->log2_big_block_size); size_t sector_size = offset; size_t fib_offset = 0; if (word_block->size < MINISTREAM_CUTOFF_SIZE) { fib_offset = offset + sector_size * hdr->sbat_root_start; fib_offset += (word_block->start_block * (1 << hdr->log2_small_block_size)); } else { fib_offset = offset + sector_size * sector; } return fib_offset; } /* See information about the File Information Block here * https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-doc/26fb6c06-4e5c-4778-ab4e-edbf26a545bb * for more information. */ static void test_for_encryption(const property_t *word_block, ole2_header_t *hdr, encryption_status_t *pEncryptionStatus) { const uint8_t *ptr = NULL; fib_base_t fib = {0}; uint32_t fib_offset = get_stream_data_offset(hdr, word_block, word_block->start_block); if ((size_t)(hdr->m_length) < (size_t)(fib_offset + sizeof(fib_base_t))) { cli_dbgmsg("ERROR: Invalid offset for File Information Block %d (0x%x)\n", fib_offset, fib_offset); return; } ptr = fmap_need_off_once(hdr->map, fib_offset, sizeof(fib_base_t)); if (NULL == ptr) { cli_dbgmsg("ERROR: Invalid offset for File Information Block %d (0x%x)\n", fib_offset, fib_offset); return; } copy_fib_base(&fib, ptr); #define FIB_BASE_IDENTIFIER 0xa5ec if (FIB_BASE_IDENTIFIER != fib.wIdent) { cli_dbgmsg("ERROR: Invalid identifier for File Information Block %d (0x%x)\n", fib.wIdent, fib.wIdent); return; } /*TODO: Look into whether or not it's possible to determine the xor key when * a document is obfuscated with xor * (is_obfuscated function) */ pEncryptionStatus->encrypted = is_encrypted(&fib); if (is_obfuscated(&fib)) { pEncryptionStatus->encryption_type = XOR_OBFUSCATION; } } static size_t read_uint16(const uint8_t *const ptr, uint32_t ptr_size, uint32_t *idx, uint16_t *dst) { if (*idx + sizeof(uint16_t) >= ptr_size) { return 0; } memcpy(dst, &(ptr[*idx]), 2); *dst = ole2_endian_convert_16(*dst); *idx += sizeof(uint16_t); return sizeof(uint16_t); } /* Search for the FILE_PASS number. If I don't find it, the next two bytes are * a length. Consume that length of data, and try again. Go until you either find * the number or run out of data. */ static bool find_file_pass(const uint8_t *const ptr, uint32_t ptr_size, uint32_t *idx) { uint16_t val, size; const uint32_t FILE_PASS_NUM = 47; while (true) { if (sizeof(uint16_t) != read_uint16(ptr, ptr_size, idx, &val)) { return false; } if (sizeof(uint16_t) != read_uint16(ptr, ptr_size, idx, &size)) { return false; } if (FILE_PASS_NUM == val) { return true; } *idx += size; } /*Should never get here.*/ return false; } /* * Search for the FilePass structure. * https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cf9ae8d5-4e8c-40a2-95f1-3b31f16b5529 */ static void test_for_xls_encryption(const property_t *word_block, ole2_header_t *hdr, encryption_status_t *pEncryptionStatus) { uint16_t tmp16; uint32_t idx; uint32_t stream_data_offset = get_stream_data_offset(hdr, word_block, word_block->start_block); uint32_t block_size = (1 << hdr->log2_big_block_size); const uint8_t *const ptr = fmap_need_off_once(hdr->map, stream_data_offset, block_size); if (NULL == ptr) { cli_dbgmsg("ERROR: Invalid offset for File Information Block %d (0x%x)\n", stream_data_offset, stream_data_offset); return; } /*Validate keyword*/ idx = 0; if (sizeof(uint16_t) != read_uint16(ptr, block_size, &idx, &tmp16)) { return; } /*Invalid keyword*/ if (2057 != tmp16) { return; } /*Skip past this size.*/ if (sizeof(uint16_t) != read_uint16(ptr, block_size, &idx, &tmp16)) { return; } idx += tmp16; if (!find_file_pass(ptr, block_size, &idx)) { return; } if (sizeof(uint16_t) != read_uint16(ptr, block_size, &idx, &tmp16)) { return; } if (XLS_RC4_ENCRYPTION == tmp16) { pEncryptionStatus->encryption_type = RC4_ENCRYPTION; pEncryptionStatus->encrypted = true; } else if (XLS_XOR_OBFUSCATION == tmp16) { pEncryptionStatus->encryption_type = XOR_OBFUSCATION; pEncryptionStatus->encrypted = true; } } /** * @brief Walk an ole2 property tree, calling the handler for each file found * * @param hdr The ole2 header metadata (an ole2-specific context struct) * @param dir (optional) directory to write temp files to, passed to the handler. * @param prop_index Index of the property being walked, to be recorded with a pointer to the root node in an ole2 node list. * @param handler The file handler to call when a file is found. * @param rec_level The recursion level. Max is 100. * @param[in,out] file_count A running count of the total # of files. Max is 100000. * @param ctx The scan context * @param[in,out] scansize A running sum of the file sizes processed. * @return int */ static int ole2_walk_property_tree(ole2_header_t *hdr, const char *dir, int32_t prop_index, ole2_walk_property_tree_file_handler handler, unsigned int rec_level, unsigned int *file_count, cli_ctx *ctx, unsigned long *scansize, void *handler_ctx, encryption_status_t *pEncryptionStatus) { property_t prop_block[4]; int32_t idx, current_block, i, curindex; char *dirname; ole2_list_t node_list; cl_error_t ret; char *name; int toval = 0; ole2_listmsg("ole2_walk_property_tree() called\n"); ole2_list_init(&node_list); ole2_listmsg("rec_level: %d\n", rec_level); ole2_listmsg("file_count: %d\n", *file_count); if ((rec_level > 100) || (*file_count > 100000)) { return CL_SUCCESS; } if (ctx && ctx->engine->max_recursion_level && (rec_level > ctx->engine->max_recursion_level)) { // Note: engine->max_recursion_level is re-purposed here out of convenience. // ole2 recursion does not leverage the ctx->recursion_stack stack. cli_dbgmsg("OLE2: Recursion limit reached (max: %d)\n", ctx->engine->max_recursion_level); cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxRecursion"); return CL_EMAXREC; } // push the 'root' node for the level onto the local list if ((ret = ole2_list_push(&node_list, prop_index)) != CL_SUCCESS) { ole2_list_delete(&node_list); return ret; } while (!ole2_list_is_empty(&node_list)) { ole2_listmsg("within working loop, worklist size: %d\n", ole2_list_size(&node_list)); if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) { ole2_list_delete(&node_list); return CL_ETIMEOUT; } current_block = hdr->prop_start; // pop off a node to work on curindex = ole2_list_pop(&node_list); ole2_listmsg("current index: %d\n", curindex); if ((curindex < 0) || (curindex > (int32_t)hdr->max_block_no)) { continue; } // read in the sector referenced by the current index idx = curindex / 4; for (i = 0; i < idx; i++) { current_block = ole2_get_next_block_number(hdr, current_block); if (current_block < 0) { continue; } } idx = curindex % 4; if (!ole2_read_block(hdr, prop_block, 512, current_block)) { continue; } if (prop_block[idx].type <= 0) { continue; } ole2_listmsg("reading prop block\n"); prop_block[idx].name_size = ole2_endian_convert_16(prop_block[idx].name_size); prop_block[idx].prev = ole2_endian_convert_32(prop_block[idx].prev); prop_block[idx].next = ole2_endian_convert_32(prop_block[idx].next); prop_block[idx].child = ole2_endian_convert_32(prop_block[idx].child); prop_block[idx].user_flags = ole2_endian_convert_32(prop_block[idx].user_flags); prop_block[idx].create_lowdate = ole2_endian_convert_32(prop_block[idx].create_lowdate); prop_block[idx].create_highdate = ole2_endian_convert_32(prop_block[idx].create_highdate); prop_block[idx].mod_lowdate = ole2_endian_convert_32(prop_block[idx].mod_lowdate); prop_block[idx].mod_highdate = ole2_endian_convert_32(prop_block[idx].mod_highdate); prop_block[idx].start_block = ole2_endian_convert_32(prop_block[idx].start_block); prop_block[idx].size = ole2_endian_convert_32(prop_block[idx].size); if ((64 < prop_block[idx].name_size) || (prop_block[idx].name_size % 2)) { cli_dbgmsg("ERROR: Invalid name_size %d\n", prop_block[idx].name_size); continue; } if (0 == ole2_cmp_name(prop_block[idx].name, prop_block[idx].name_size, "WORDDocument")) { test_for_encryption(&(prop_block[idx]), hdr, pEncryptionStatus); } else if (0 == ole2_cmp_name(prop_block[idx].name, prop_block[idx].name_size, "WorkBook")) { test_for_xls_encryption(&(prop_block[idx]), hdr, pEncryptionStatus); } else if (0 == ole2_cmp_name(prop_block[idx].name, prop_block[idx].name_size, "PowerPoint Document")) { test_for_encryption(&(prop_block[idx]), hdr, pEncryptionStatus); } else if (0 == ole2_cmp_name(prop_block[idx].name, prop_block[idx].name_size, "EncryptionInfo")) { pEncryptionStatus->encrypted = true; } else if (0 == ole2_cmp_name(prop_block[idx].name, prop_block[idx].name_size, "EncryptedPackage")) { pEncryptionStatus->encrypted = true; } ole2_listmsg("printing ole2 property\n"); if (dir) print_ole2_property(&prop_block[idx]); ole2_listmsg("checking bitset\n"); /* Check we aren't in a loop */ if (cli_bitset_test(hdr->bitset, (unsigned long)curindex)) { /* Loop in property tree detected */ cli_dbgmsg("OLE2: Property tree loop detected at index %d\n", curindex); ole2_list_delete(&node_list); return CL_BREAK; } ole2_listmsg("setting bitset\n"); if (!cli_bitset_set(hdr->bitset, (unsigned long)curindex)) { continue; } ole2_listmsg("prev: %d next %d child %d\n", prop_block[idx].prev, prop_block[idx].next, prop_block[idx].child); ole2_listmsg("node type: %d\n", prop_block[idx].type); switch (prop_block[idx].type) { case 5: /* Root Entry */ ole2_listmsg("root node\n"); if ((curindex != 0) || (rec_level != 0) || (*file_count != 0)) { /* Can only have RootEntry as the top */ cli_dbgmsg("ERROR: illegal Root Entry\n"); continue; } hdr->sbat_root_start = prop_block[idx].start_block; if ((int)(prop_block[idx].child) != -1) { ret = ole2_walk_property_tree(hdr, dir, prop_block[idx].child, handler, rec_level + 1, file_count, ctx, scansize, handler_ctx, pEncryptionStatus); if (ret != CL_SUCCESS) { ole2_list_delete(&node_list); return ret; } } if ((int)(prop_block[idx].prev) != -1) { if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) { ole2_list_delete(&node_list); return ret; } } if ((int)(prop_block[idx].next) != -1) { if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) { ole2_list_delete(&node_list); return ret; } } break; case 2: /* File */ ole2_listmsg("file node\n"); if (ctx && ctx->engine->maxfiles && ((*file_count > ctx->engine->maxfiles) || (ctx->scannedfiles > ctx->engine->maxfiles - *file_count))) { cli_dbgmsg("OLE2: files limit reached (max: %u)\n", ctx->engine->maxfiles); cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles"); ole2_list_delete(&node_list); return CL_EMAXFILES; } if (!ctx || !(ctx->engine->maxfilesize) || prop_block[idx].size <= ctx->engine->maxfilesize || prop_block[idx].size <= *scansize) { (*file_count)++; *scansize -= prop_block[idx].size; ole2_listmsg("running file handler\n"); ret = handler(hdr, &prop_block[idx], dir, ctx, handler_ctx); if (ret != CL_SUCCESS) { ole2_listmsg("file handler returned %d\n", ret); ole2_list_delete(&node_list); return ret; } } else { cli_dbgmsg("OLE2: filesize exceeded\n"); } if ((int)(prop_block[idx].child) != -1) { ret = ole2_walk_property_tree(hdr, dir, prop_block[idx].child, handler, rec_level, file_count, ctx, scansize, handler_ctx, pEncryptionStatus); if (ret != CL_SUCCESS) { ole2_list_delete(&node_list); return ret; } } if ((int)(prop_block[idx].prev) != -1) { if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) { ole2_list_delete(&node_list); return ret; } } if ((int)(prop_block[idx].next) != -1) { if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) { ole2_list_delete(&node_list); return ret; } } break; case 1: /* Directory */ ole2_listmsg("directory node\n"); if (dir) { if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) { if (!json_object_object_get_ex(ctx->wrkproperty, "DigitalSignatures", NULL)) { name = cli_ole2_get_property_name2(prop_block[idx].name, prop_block[idx].name_size); if (name) { if (!strcmp(name, "_xmlsignatures") || !strcmp(name, "_signatures")) { cli_jsonbool(ctx->wrkproperty, "HasDigitalSignatures", 1); } free(name); } } } dirname = (char *)cli_max_malloc(strlen(dir) + 8); if (!dirname) { ole2_listmsg("OLE2: malloc failed for dirname\n"); ole2_list_delete(&node_list); return CL_EMEM; } snprintf(dirname, strlen(dir) + 8, "%s" PATHSEP "%.6d", dir, curindex); if (mkdir(dirname, 0700) != 0) { ole2_listmsg("OLE2: mkdir failed for directory %s\n", dirname); free(dirname); ole2_list_delete(&node_list); return CL_BREAK; } cli_dbgmsg("OLE2 dir entry: %s\n", dirname); } else dirname = NULL; if ((int)(prop_block[idx].child) != -1) { ret = ole2_walk_property_tree(hdr, dirname, prop_block[idx].child, handler, rec_level + 1, file_count, ctx, scansize, handler_ctx, pEncryptionStatus); if (ret != CL_SUCCESS) { ole2_list_delete(&node_list); if (dirname) { free(dirname); } return ret; } } if (dirname) { free(dirname); dirname = NULL; } if ((int)(prop_block[idx].prev) != -1) { if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) { ole2_list_delete(&node_list); return ret; } } if ((int)(prop_block[idx].next) != -1) { if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) { ole2_list_delete(&node_list); return ret; } } break; default: cli_dbgmsg("ERROR: unknown OLE2 entry type: %d\n", prop_block[idx].type); break; } ole2_listmsg("loop ended: %d %d\n", ole2_list_size(&node_list), ole2_list_is_empty(&node_list)); } ole2_list_delete(&node_list); return CL_SUCCESS; } /* Write file Handler - write the contents of the entry to a file */ static cl_error_t handler_writefile(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx) { cl_error_t ret = CL_BREAK; char newname[1024]; char *name = NULL; unsigned char *buff = NULL; int32_t current_block = 0; size_t len = 0, offset = 0; int ofd = -1; char *hash = NULL; bitset_t *blk_bitset = NULL; uint32_t cnt = 0; UNUSEDPARAM(ctx); UNUSEDPARAM(handler_ctx); if (prop->type != 2) { /* Not a file */ ret = CL_SUCCESS; goto done; } if (prop->name_size > 64) { cli_dbgmsg("OLE2 [handler_writefile]: property name too long: %d\n", prop->name_size); ret = CL_SUCCESS; goto done; } name = cli_ole2_get_property_name2(prop->name, prop->name_size); if (name) { cli_dbgmsg("Storing %s in uniq\n", name); if (CL_SUCCESS != uniq_add(hdr->U, name, strlen(name), &hash, &cnt)) { cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n"); goto done; } } else { if (CL_SUCCESS != uniq_add(hdr->U, NULL, 0, &hash, &cnt)) { cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n"); goto done; } } snprintf(newname, sizeof(newname), "%s" PATHSEP "%s_%u", dir, hash, cnt); newname[sizeof(newname) - 1] = '\0'; cli_dbgmsg("OLE2 [handler_writefile]: Dumping '%s' to '%s'\n", name ? name : "", newname); ofd = open(newname, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); if (ofd < 0) { cli_errmsg("OLE2 [handler_writefile]: failed to create file: %s\n", newname); ret = CL_SUCCESS; goto done; } current_block = prop->start_block; len = prop->size; CLI_MAX_MALLOC_OR_GOTO_DONE(buff, 1 << hdr->log2_big_block_size, cli_errmsg("OLE2 [handler_writefile]: Unable to allocate memory for buff: %u\n", 1 << hdr->log2_big_block_size); ret = CL_EMEM); blk_bitset = cli_bitset_init(); if (!blk_bitset) { cli_errmsg("OLE2 [handler_writefile]: init bitset failed\n"); goto done; } while ((current_block >= 0) && (len > 0)) { if (current_block > (int32_t)hdr->max_block_no) { cli_dbgmsg("OLE2 [handler_writefile]: Max block number for file size exceeded: %d\n", current_block); break; } /* Check we aren't in a loop */ if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) { /* Loop in block list */ cli_dbgmsg("OLE2 [handler_writefile]: Block list loop detected\n"); break; } if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) { break; } if (prop->size < (int64_t)hdr->sbat_cutoff) { /* Small block file */ if (!ole2_get_sbat_data_block(hdr, buff, current_block)) { cli_dbgmsg("OLE2 [handler_writefile]: ole2_get_sbat_data_block failed\n"); break; } /* buff now contains the block with N small blocks in it */ offset = (((size_t)1) << hdr->log2_small_block_size) * (((size_t)current_block) % (((size_t)1) << (hdr->log2_big_block_size - hdr->log2_small_block_size))); if (cli_writen(ofd, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size)) != MIN(len, 1 << hdr->log2_small_block_size)) { goto done; } len -= MIN(len, 1 << hdr->log2_small_block_size); current_block = ole2_get_next_sbat_block(hdr, current_block); } else { /* Big block file */ if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) { break; } if (cli_writen(ofd, buff, MIN(len, (1 << hdr->log2_big_block_size))) != MIN(len, (1 << hdr->log2_big_block_size))) { ret = CL_EWRITE; goto done; } current_block = ole2_get_next_block_number(hdr, current_block); len -= MIN(len, (1 << hdr->log2_big_block_size)); } } /* * Unlike w/ handler_otf(), the ole2 summary JSON will be recorded * when we re-ingest the files we wrote above when we scan the directory. * See cli_ole2_tempdir_scan_vba() */ ret = CL_SUCCESS; done: CLI_FREE_AND_SET_NULL(name); if (-1 != ofd) { close(ofd); } CLI_FREE_AND_SET_NULL(buff); if (NULL != blk_bitset) { cli_bitset_free(blk_bitset); } return ret; } enum biff_parser_states { BIFF_PARSER_INITIAL, BIFF_PARSER_EXPECTING_2ND_TAG_BYTE, BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE, BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE, BIFF_PARSER_NAME_RECORD, BIFF_PARSER_BOUNDSHEET_RECORD, BIFF_PARSER_MSODRAWINGGROUP_RECORD, BIFF_PARSER_DATA, }; struct biff_parser_state { enum biff_parser_states state; uint16_t opcode; uint16_t length; uint16_t data_offset; uint8_t tmp; }; /** * Scan through a buffer of BIFF records and find PARSERNAME, BOUNDSHEET records (Which indicate XLM macros). * BIFF streams follow the format OOLLDDDDDDDDD..., where OO is the opcode (little endian 16 bit value), * LL is the data length (little endian 16 bit value), followed by LL bytes of data. Records are defined in * the MICROSOFT OFFICE EXCEL 97-2007 BINARY FILE FORMAT SPECIFICATION. * * \param state The parser state. * \param buff The buffer. * \param len The buffer's size in bytes. * \param ctx The ClamAV context for emitting JSON about the document. * \returns true if a macro has been found, false otherwise. */ static cl_error_t scan_biff_for_xlm_macros_and_images( struct biff_parser_state *state, unsigned char *buff, size_t len, cli_ctx *ctx, bool *found_macro, bool *found_image) { cl_error_t status = CL_EFORMAT; size_t i; for (i = 0; i < len; ++i) { switch (state->state) { case BIFF_PARSER_INITIAL: state->opcode = buff[i]; state->state = BIFF_PARSER_EXPECTING_2ND_TAG_BYTE; break; case BIFF_PARSER_EXPECTING_2ND_TAG_BYTE: state->opcode |= buff[i] << 8; state->state = BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE; break; case BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE: state->length = buff[i]; state->state = BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE; break; case BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE: state->length |= buff[i] << 8; state->data_offset = 0; switch (state->opcode) { case OPC_BOUNDSHEET: state->state = BIFF_PARSER_BOUNDSHEET_RECORD; break; case OPC_NAME: state->state = BIFF_PARSER_NAME_RECORD; break; case OPC_MSODRAWINGGROUP: state->state = BIFF_PARSER_MSODRAWINGGROUP_RECORD; break; default: state->state = BIFF_PARSER_DATA; break; } if (state->length == 0) { state->state = BIFF_PARSER_INITIAL; } break; default: switch (state->state) { case BIFF_PARSER_NAME_RECORD: if (state->data_offset == 0) { state->tmp = buff[i] & 0x20; } else if ((state->data_offset == 14 || state->data_offset == 15) && state->tmp) { if (buff[i] == 1 || buff[i] == 2) { if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) { json_object *indicators = cli_jsonarray(ctx->wrkproperty, "MacroIndicators"); if (indicators) { cli_jsonstr(indicators, NULL, "autorun"); } else { cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Failed to add \"autorun\" entry to MacroIndicators JSON array\n"); } } } if (buff[i] != 0) { state->tmp = 0; } } break; case BIFF_PARSER_BOUNDSHEET_RECORD: if (state->data_offset == 4) { state->tmp = buff[i]; } else if (state->data_offset == 5 && buff[i] == 1) { // Excel 4.0 macro sheet cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Found XLM macro sheet\n"); if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) { cli_jsonbool(ctx->wrkproperty, "HasMacros", 1); json_object *macro_languages = cli_jsonarray(ctx->wrkproperty, "MacroLanguages"); if (macro_languages) { cli_jsonstr(macro_languages, NULL, "XLM"); } else { cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Failed to add \"XLM\" entry to MacroLanguages JSON array\n"); } if (state->tmp == 1 || state->tmp == 2) { json_object *indicators = cli_jsonarray(ctx->wrkproperty, "MacroIndicators"); if (indicators) { cli_jsonstr(indicators, NULL, "hidden"); } else { cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Failed to add \"hidden\" entry to MacroIndicators JSON array\n"); } } } *found_macro = true; } break; case BIFF_PARSER_DATA: break; case BIFF_PARSER_MSODRAWINGGROUP_RECORD: // Embedded image found if (true != *found_image) { *found_image = true; cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Found image in sheet\n"); } break; default: // Should never arrive here cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Unexpected state value %d\n", (int)state->state); break; } state->data_offset += 1; if (state->data_offset >= state->length) { state->state = BIFF_PARSER_INITIAL; } } } status = CL_SUCCESS; return status; } /** * @brief Scan for XLM (Excel 4.0) macro sheets and images in an OLE2 Workbook stream. * * The stream should be encoded with <= BIFF8. * The found_macro and found_image out-params should be checked even if an error occurred. * * @param hdr * @param prop * @param ctx * @param found_macro [out] If any macros were found * @param found_image [out] If any images were found * @return cl_error_t CL_EPARSE if an error was encountered * @return cl_error_t CL_EMEM if a memory issue was encountered. * @return cl_error_t CL_SUCCESS if no errors were encountered. */ static cl_error_t scan_for_xlm_macros_and_images(ole2_header_t *hdr, property_t *prop, cli_ctx *ctx, bool *found_macro, bool *found_image) { cl_error_t status = CL_EPARSE; unsigned char *buff = NULL; int32_t current_block = 0; size_t len = 0, offset = 0; bitset_t *blk_bitset = NULL; struct biff_parser_state state = {0}; if (prop->type != 2) { /* Not a file */ goto done; } memset(&state, 0, sizeof(state)); state.state = BIFF_PARSER_INITIAL; current_block = prop->start_block; len = prop->size; CLI_MAX_MALLOC_OR_GOTO_DONE(buff, 1 << hdr->log2_big_block_size, cli_errmsg("OLE2 [scan_for_xlm_macros_and_images]: Unable to allocate memory for buff: %u\n", 1 << hdr->log2_big_block_size); status = CL_EMEM); blk_bitset = cli_bitset_init(); if (!blk_bitset) { cli_errmsg("OLE2 [scan_for_xlm_macros_and_images]: init bitset failed\n"); goto done; } while ((current_block >= 0) && (len > 0)) { if (current_block > (int32_t)hdr->max_block_no) { cli_dbgmsg("OLE2 [scan_for_xlm_macros_and_images]: Max block number for file size exceeded: %d\n", current_block); goto done; } /* Check we aren't in a loop */ if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) { /* Loop in block list */ cli_dbgmsg("OLE2 [scan_for_xlm_macros_and_images]: Block list loop detected\n"); goto done; } if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) { goto done; } if (prop->size < (int64_t)hdr->sbat_cutoff) { /* Small block file */ if (!ole2_get_sbat_data_block(hdr, buff, current_block)) { cli_dbgmsg("OLE2 [scan_for_xlm_macros_and_images]: ole2_get_sbat_data_block failed\n"); goto done; } /* buff now contains the block with N small blocks in it */ offset = (1 << hdr->log2_small_block_size) * (current_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size))); (void)scan_biff_for_xlm_macros_and_images(&state, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size), ctx, found_macro, found_image); len -= MIN(len, 1 << hdr->log2_small_block_size); current_block = ole2_get_next_sbat_block(hdr, current_block); } else { /* Big block file */ if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) { goto done; } (void)scan_biff_for_xlm_macros_and_images(&state, buff, MIN(len, (1 << hdr->log2_big_block_size)), ctx, found_macro, found_image); current_block = ole2_get_next_block_number(hdr, current_block); len -= MIN(len, (1 << hdr->log2_big_block_size)); } } status = CL_SUCCESS; done: CLI_FREE_AND_SET_NULL(buff); if (blk_bitset) { cli_bitset_free(blk_bitset); } return status; } /** * @brief enum file Handler - checks for VBA presence * * @param hdr * @param prop * @param dir * @param ctx the scan context * @return cl_error_t */ static cl_error_t handler_enum(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx) { cl_error_t status = CL_EREAD; char *name = NULL; unsigned char *hwp_check = NULL; int32_t offset = 0; json_object *arrobj = NULL; json_object *strmobj = NULL; UNUSEDPARAM(handler_ctx); UNUSEDPARAM(dir); name = cli_ole2_get_property_name2(prop->name, prop->name_size); if (name) { if (SCAN_COLLECT_METADATA && ctx->wrkproperty != NULL) { arrobj = cli_jsonarray(ctx->wrkproperty, "Streams"); if (NULL == arrobj) { cli_warnmsg("ole2: no memory for streams list or streams is not an array\n"); } else { strmobj = json_object_new_string(name); json_object_array_add(arrobj, strmobj); } if (!strcmp(name, "powerpoint document")) { cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSPPT"); } if (!strcmp(name, "worddocument")) { cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSWORD"); } if (!strcmp(name, "workbook")) { cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSXL"); } } } if (!hdr->has_vba) { if (!name) name = cli_ole2_get_property_name2(prop->name, prop->name_size); if (name) { if (!strcmp(name, "_vba_project") || !strcmp(name, "powerpoint document") || !strcmp(name, "worddocument") || !strcmp(name, "_1_ole10native")) hdr->has_vba = 1; } } /* * if we can find a root entry fileheader, it may be a HWP file * identify the HWP signature "HWP Document File" at offset 0 stream */ if (!hdr->is_hwp) { if (!name) { name = cli_ole2_get_property_name2(prop->name, prop->name_size); } if (name) { if (!strcmp(name, "fileheader")) { CLI_MAX_CALLOC_OR_GOTO_DONE(hwp_check, 1, 1 << hdr->log2_big_block_size, status = CL_EMEM); /* reading safety checks; do-while used for breaks */ do { if (prop->size == 0) break; if (prop->start_block > hdr->max_block_no) break; /* read the header block (~256 bytes) */ offset = 0; if (prop->size < (int64_t)hdr->sbat_cutoff) { if (!ole2_get_sbat_data_block(hdr, hwp_check, prop->start_block)) { break; } offset = (1 << hdr->log2_small_block_size) * (prop->start_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size))); /* reading safety */ if (offset + 40 >= 1 << hdr->log2_big_block_size) break; } else { if (!ole2_read_block(hdr, hwp_check, 1 << hdr->log2_big_block_size, prop->start_block)) { break; } } /* compare against HWP signature; we could add the 15 padding NULLs too */ if (!memcmp(hwp_check + offset, "HWP Document File", 17)) { hwp5_header_t *hwp_new; cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_HWP5"); CLI_CALLOC_OR_GOTO_DONE(hwp_new, 1, sizeof(hwp5_header_t), status = CL_EMEM); /* * Copy the header information into our header struct. */ memcpy(hwp_new, hwp_check + offset, sizeof(hwp5_header_t)); hwp_new->version = ole2_endian_convert_32(hwp_new->version); hwp_new->flags = ole2_endian_convert_32(hwp_new->flags); hdr->is_hwp = hwp_new; } } while (0); } } } /* If we've already found a macro and an image, we can skip this initial check. This scan step is to save a little time so we don't have to fully parse it later if never find anything.. */ if (!hdr->has_xlm || !hdr->has_image) { if (!name) { name = cli_ole2_get_property_name2(prop->name, prop->name_size); } if (name && (strcmp(name, "workbook") == 0 || strcmp(name, "book") == 0)) { (void)scan_for_xlm_macros_and_images(hdr, prop, ctx, &hdr->has_xlm, &hdr->has_image); } } status = CL_SUCCESS; done: CLI_FREE_AND_SET_NULL(name); CLI_FREE_AND_SET_NULL(hwp_check); return status; } static int likely_mso_stream(int fd) { off_t fsize; unsigned char check[2]; fsize = lseek(fd, 0, SEEK_END); if (fsize == -1) { cli_dbgmsg("likely_mso_stream: call to lseek() failed\n"); return 0; } else if (fsize < 6) { return 0; } if (lseek(fd, 4, SEEK_SET) == -1) { cli_dbgmsg("likely_mso_stream: call to lseek() failed\n"); return 0; } if (cli_readn(fd, check, 2) != 2) { cli_dbgmsg("likely_mso_stream: reading from fd failed\n"); return 0; } if (check[0] == 0x78 && check[1] == 0x9C) return 1; return 0; } static cl_error_t scan_mso_stream(int fd, cli_ctx *ctx) { int zret, ofd; cl_error_t ret = CL_SUCCESS; fmap_t *input; off_t off_in = 0; size_t count, outsize = 0; z_stream zstrm; char *tmpname; uint32_t prefix; unsigned char inbuf[FILEBUFF], outbuf[FILEBUFF]; /* fmap the input file for easier manipulation */ if (fd < 0) { cli_dbgmsg("scan_mso_stream: Invalid file descriptor argument\n"); return CL_ENULLARG; } else { STATBUF statbuf; if (FSTAT(fd, &statbuf) == -1) { cli_dbgmsg("scan_mso_stream: Can't stat file descriptor\n"); return CL_ESTAT; } input = fmap(fd, 0, statbuf.st_size, NULL); if (!input) { cli_dbgmsg("scan_mso_stream: Failed to get fmap for input stream\n"); return CL_EMAP; } } /* reserve tempfile for output and scanning */ if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &ofd)) != CL_SUCCESS) { cli_errmsg("scan_mso_stream: Can't generate temporary file\n"); funmap(input); return ret; } /* initialize zlib inflation stream */ memset(&zstrm, 0, sizeof(zstrm)); zstrm.zalloc = Z_NULL; zstrm.zfree = Z_NULL; zstrm.opaque = Z_NULL; zstrm.next_in = inbuf; zstrm.next_out = outbuf; zstrm.avail_in = 0; zstrm.avail_out = FILEBUFF; zret = inflateInit(&zstrm); if (zret != Z_OK) { cli_dbgmsg("scan_mso_stream: Can't initialize zlib inflation stream\n"); ret = CL_EUNPACK; goto mso_end; } /* extract 32-bit prefix */ if (fmap_readn(input, &prefix, off_in, sizeof(prefix)) != sizeof(prefix)) { cli_dbgmsg("scan_mso_stream: Can't extract 4-byte prefix\n"); ret = CL_EREAD; goto mso_end; } /* RFC1952 says numbers are stored with least significant byte first */ prefix = le32_to_host(prefix); off_in += sizeof(uint32_t); cli_dbgmsg("scan_mso_stream: stream prefix = %08x(%d)\n", prefix, prefix); /* inflation loop */ do { if (zstrm.avail_in == 0) { size_t bytes_read; zstrm.next_in = inbuf; bytes_read = fmap_readn(input, inbuf, off_in, FILEBUFF); if (bytes_read == (size_t)-1) { cli_errmsg("scan_mso_stream: Error reading MSO file\n"); ret = CL_EUNPACK; goto mso_end; } if (bytes_read == 0) break; zstrm.avail_in = bytes_read; off_in += bytes_read; } zret = inflate(&zstrm, Z_SYNC_FLUSH); count = FILEBUFF - zstrm.avail_out; if (count) { if (cli_checklimits("MSO", ctx, outsize + count, 0, 0) != CL_SUCCESS) break; if (cli_writen(ofd, outbuf, count) != count) { cli_errmsg("scan_mso_stream: Can't write to file %s\n", tmpname); ret = CL_EWRITE; goto mso_end; } outsize += count; } zstrm.next_out = outbuf; zstrm.avail_out = FILEBUFF; } while (zret == Z_OK); /* post inflation checks */ if (zret != Z_STREAM_END && zret != Z_OK) { if (outsize == 0) { cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. No data decompressed.\n"); ret = CL_EUNPACK; goto mso_end; } cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. Scanning what was decompressed.\n"); } cli_dbgmsg("scan_mso_stream: Decompressed %llu bytes to %s\n", (long long unsigned)outsize, tmpname); if (outsize != prefix) { cli_warnmsg("scan_mso_stream: declared prefix != inflated stream size, %llu != %llu\n", (long long unsigned)prefix, (long long unsigned)outsize); } else { cli_dbgmsg("scan_mso_stream: declared prefix == inflated stream size, %llu == %llu\n", (long long unsigned)prefix, (long long unsigned)outsize); } /* scanning inflated stream */ ret = cli_magic_scan_desc(ofd, tmpname, ctx, NULL, LAYER_ATTRIBUTES_NONE); /* clean-up */ mso_end: zret = inflateEnd(&zstrm); if (zret != Z_OK) ret = CL_EUNPACK; close(ofd); if (!ctx->engine->keeptmp) if (cli_unlink(tmpname)) ret = CL_EUNLINK; free(tmpname); funmap(input); return ret; } static cl_error_t handler_otf(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx) { cl_error_t ret = CL_BREAK; char *tempfile = NULL; char *name = NULL; unsigned char *buff = NULL; int32_t current_block = 0; size_t len = 0, offset = 0; int ofd = -1; int is_mso = 0; bitset_t *blk_bitset = NULL; UNUSEDPARAM(dir); UNUSEDPARAM(handler_ctx); if (prop->type != 2) { /* Not a file */ ret = CL_SUCCESS; goto done; } print_ole2_property(prop); if (!(tempfile = cli_gentemp(ctx->sub_tmpdir))) { ret = CL_EMEM; goto done; } if ((ofd = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) { cli_dbgmsg("OLE2 [handler_otf]: Can't create file %s\n", tempfile); ret = CL_ECREAT; goto done; } current_block = prop->start_block; len = prop->size; if (cli_debug_flag) { if (!name) { name = cli_ole2_get_property_name2(prop->name, prop->name_size); } cli_dbgmsg("OLE2 [handler_otf]: Dumping '%s' to '%s'\n", name, tempfile); } CLI_MAX_MALLOC_OR_GOTO_DONE(buff, 1 << hdr->log2_big_block_size, ret = CL_EMEM); blk_bitset = cli_bitset_init(); if (!blk_bitset) { cli_errmsg("OLE2 [handler_otf]: init bitset failed\n"); goto done; } while ((current_block >= 0) && (len > 0)) { if (current_block > (int32_t)hdr->max_block_no) { cli_dbgmsg("OLE2 [handler_otf]: Max block number for file size exceeded: %d\n", current_block); break; } /* Check we aren't in a loop */ if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) { /* Loop in block list */ cli_dbgmsg("OLE2 [handler_otf]: Block list loop detected\n"); break; } if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) { break; } if (prop->size < (int64_t)hdr->sbat_cutoff) { /* Small block file */ if (!ole2_get_sbat_data_block(hdr, buff, current_block)) { cli_dbgmsg("OLE2 [handler_otf]: ole2_get_sbat_data_block failed\n"); break; } /* buff now contains the block with N small blocks in it */ offset = (1 << hdr->log2_small_block_size) * (current_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size))); if (cli_writen(ofd, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size)) != MIN(len, 1 << hdr->log2_small_block_size)) { goto done; } len -= MIN(len, 1 << hdr->log2_small_block_size); current_block = ole2_get_next_sbat_block(hdr, current_block); } else { /* Big block file */ if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) { break; } if (cli_writen(ofd, buff, MIN(len, (1 << hdr->log2_big_block_size))) != MIN(len, (1 << hdr->log2_big_block_size))) { ret = CL_EWRITE; goto done; } current_block = ole2_get_next_block_number(hdr, current_block); len -= MIN(len, (1 << hdr->log2_big_block_size)); } } /* defragmenting of ole2 stream complete */ is_mso = likely_mso_stream(ofd); if (lseek(ofd, 0, SEEK_SET) == -1) { ret = CL_ESEEK; goto done; } /* JSON Output Summary Information */ if (SCAN_COLLECT_METADATA && (ctx->properties != NULL)) { if (!name) { name = cli_ole2_get_property_name2(prop->name, prop->name_size); } if (name) { if (!strncmp(name, "_5_summaryinformation", 21)) { cli_dbgmsg("OLE2: detected a '_5_summaryinformation' stream\n"); /* JSONOLE2 - what to do if something breaks? */ if (cli_ole2_summary_json(ctx, ofd, 0) == CL_ETIMEOUT) { ret = CL_ETIMEOUT; goto done; } } if (!strncmp(name, "_5_documentsummaryinformation", 29)) { cli_dbgmsg("OLE2: detected a '_5_documentsummaryinformation' stream\n"); /* JSONOLE2 - what to do if something breaks? */ if (cli_ole2_summary_json(ctx, ofd, 1) == CL_ETIMEOUT) { ret = CL_ETIMEOUT; goto done; } } } } if (hdr->is_hwp) { if (!name) { name = cli_ole2_get_property_name2(prop->name, prop->name_size); } ret = cli_scanhwp5_stream(ctx, hdr->is_hwp, name, ofd, tempfile); } else if (is_mso < 0) { ret = CL_ESEEK; } else if (is_mso) { /* MSO Stream Scan */ ret = scan_mso_stream(ofd, ctx); } else { /* Normal File Scan */ ret = cli_magic_scan_desc(ofd, tempfile, ctx, NULL, LAYER_ATTRIBUTES_NONE); } ret = ret == CL_VIRUS ? CL_VIRUS : CL_SUCCESS; done: CLI_FREE_AND_SET_NULL(name); if (-1 != ofd) { close(ofd); } CLI_FREE_AND_SET_NULL(buff); if (NULL != blk_bitset) { cli_bitset_free(blk_bitset); } if (NULL != tempfile) { if (!ctx->engine->keeptmp) { if (cli_unlink(tempfile)) { ret = CL_EUNLINK; } } free(tempfile); tempfile = NULL; } return ret; } /* * @brief Extracts encrypted files. * @param hdr ole2_header_t structure * @param prop property_t structure (DirectoryEntry) * @param dir dir pointer. Unused by this function * @param ctx cli_ctx * @param handler_ctx handler context. For this function, it is the encryption key * initialized by 'initialize_encryption_key' * @return Success or failure depending on whether validation was successful. * * For more information, see below * https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/e5ad39b8-9bc1-4a19-bad3-44e6246d21e6 */ static cl_error_t handler_otf_encrypted(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx) { cl_error_t ret = CL_BREAK; char *tempfile = NULL; char *name = NULL; uint8_t *buff = NULL; int32_t current_block = 0; size_t len = 0; size_t offset = 0; int ofd = -1; int is_mso = 0; bitset_t *blk_bitset = NULL; int nrounds = 0; uint8_t *decryptDst = NULL; encryption_key_t *key = (encryption_key_t *)handler_ctx; uint32_t *rk = NULL; uint32_t bytesRead = 0; uint64_t actualFileLength; uint64_t bytesWritten = 0; uint32_t leftover = 0; uint32_t readIdx = 0; UNUSEDPARAM(dir); if (NULL == key) { cli_errmsg("%s::%d::key NULL\n", __FUNCTION__, __LINE__); goto done; } if (prop->type != 2) { /* Not a file */ ret = CL_SUCCESS; goto done; } CLI_MAX_MALLOC_OR_GOTO_DONE(rk, RKLENGTH(key->key_length_bits) * sizeof(uint32_t), ret = CL_EMEM); print_ole2_property(prop); nrounds = rijndaelSetupDecrypt(rk, key->key, key->key_length_bits); if (!(tempfile = cli_gentemp(ctx->sub_tmpdir))) { ret = CL_EMEM; goto done; } if ((ofd = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) { cli_dbgmsg("OLE2 [handler_otf]: Can't create file %s\n", tempfile); ret = CL_ECREAT; goto done; } current_block = prop->start_block; len = prop->size; if (cli_debug_flag) { if (!name) { name = cli_ole2_get_property_name2(prop->name, prop->name_size); } cli_dbgmsg("OLE2 [handler_otf]: Dumping '%s' to '%s'\n", name, tempfile); } uint32_t blockSize = 1 << hdr->log2_big_block_size; CLI_MAX_MALLOC_OR_GOTO_DONE(buff, blockSize + sizeof(uint64_t), ret = CL_EMEM); CLI_MAX_MALLOC_OR_GOTO_DONE(decryptDst, blockSize, ret = CL_EMEM); blk_bitset = cli_bitset_init(); if (!blk_bitset) { cli_errmsg("OLE2 [handler_otf]: init bitset failed\n"); goto done; } while (bytesRead < len) { if (current_block > (int32_t)hdr->max_block_no) { cli_dbgmsg("OLE2 [handler_otf]: Max block number for file size exceeded: %d\n", current_block); break; } /* Check we aren't in a loop */ if (cli_bitset_test(blk_bitset, (uint64_t)current_block)) { /* Loop in block list */ cli_dbgmsg("OLE2 [handler_otf]: Block list loop detected\n"); break; } if (!cli_bitset_set(blk_bitset, (uint64_t)current_block)) { break; } if (prop->size < (int64_t)hdr->sbat_cutoff) { /* Small block file */ if (!ole2_get_sbat_data_block(hdr, buff, current_block)) { cli_dbgmsg("OLE2 [handler_otf]: ole2_get_sbat_data_block failed\n"); break; } /* buff now contains the block with N small blocks in it */ offset = (((size_t)1) << hdr->log2_small_block_size) * (((size_t)current_block) % (((size_t)1) << (hdr->log2_big_block_size - hdr->log2_small_block_size))); if (cli_writen(ofd, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size)) != MIN(len, 1 << hdr->log2_small_block_size)) { goto done; } len -= MIN(len, 1 << hdr->log2_small_block_size); current_block = ole2_get_next_sbat_block(hdr, current_block); // These small block files don't seem to be encrypted. } else { uint32_t bytesToWrite = MIN(len - bytesRead, blockSize); uint32_t writeIdx = 0; uint32_t decryptDstIdx = 0; if (!ole2_read_block(hdr, &(buff[readIdx]), blockSize, current_block)) { break; } if (0 == bytesRead) { // first block. account for size of file. writeIdx += sizeof(uint64_t); memcpy(&actualFileLength, buff, sizeof(actualFileLength)); actualFileLength = ole2_endian_convert_64(actualFileLength); } bytesRead += blockSize; for (; writeIdx + 16 <= leftover + bytesToWrite; writeIdx += 16, decryptDstIdx += 16) { rijndaelDecrypt(rk, nrounds, &(buff[writeIdx]), &(decryptDst[decryptDstIdx])); } /*Since our buffer size is a power of 2, leftover should always be * either 0 or 8, but we have to decrypt in multiples of 16.*/ if (((leftover + bytesToWrite) - writeIdx) > 8) { goto done; } /*Make sure we don't write more data than the file is actually supposed to be.*/ if ((decryptDstIdx + bytesWritten) > actualFileLength) { decryptDstIdx = actualFileLength - bytesWritten; } if (cli_writen(ofd, decryptDst, decryptDstIdx) != decryptDstIdx) { cli_errmsg("ole2: Error writing to file '%s'\n", tempfile); goto done; } bytesWritten += decryptDstIdx; leftover = (leftover + bytesToWrite) - writeIdx; if (leftover) { memmove(buff, &(buff[writeIdx]), leftover); } readIdx = leftover; current_block = ole2_get_next_block_number(hdr, current_block); } } /* defragmenting of ole2 stream complete */ is_mso = likely_mso_stream(ofd); if (lseek(ofd, 0, SEEK_SET) == -1) { ret = CL_ESEEK; goto done; } /* JSON Output Summary Information */ if (SCAN_COLLECT_METADATA && (ctx->properties != NULL)) { if (!name) { name = cli_ole2_get_property_name2(prop->name, prop->name_size); } if (name) { if (!strncmp(name, "_5_summaryinformation", 21)) { cli_dbgmsg("OLE2: detected a '_5_summaryinformation' stream\n"); /* JSONOLE2 - what to do if something breaks? */ if (cli_ole2_summary_json(ctx, ofd, 0) == CL_ETIMEOUT) { ret = CL_ETIMEOUT; goto done; } } if (!strncmp(name, "_5_documentsummaryinformation", 29)) { cli_dbgmsg("OLE2: detected a '_5_documentsummaryinformation' stream\n"); /* JSONOLE2 - what to do if something breaks? */ if (cli_ole2_summary_json(ctx, ofd, 1) == CL_ETIMEOUT) { ret = CL_ETIMEOUT; goto done; } } } } if (hdr->is_hwp) { if (!name) { name = cli_ole2_get_property_name2(prop->name, prop->name_size); } ret = cli_scanhwp5_stream(ctx, hdr->is_hwp, name, ofd, tempfile); } else if (is_mso < 0) { ret = CL_ESEEK; } else if (is_mso) { /* MSO Stream Scan */ ret = scan_mso_stream(ofd, ctx); } else { /* Normal File Scan */ ret = cli_magic_scan_desc(ofd, tempfile, ctx, NULL, LAYER_ATTRIBUTES_NONE); } ret = ret == CL_VIRUS ? CL_VIRUS : CL_SUCCESS; done: CLI_FREE_AND_SET_NULL(name); if (-1 != ofd) { close(ofd); } CLI_FREE_AND_SET_NULL(buff); if (NULL != blk_bitset) { cli_bitset_free(blk_bitset); } if (NULL != tempfile) { if (!ctx->engine->keeptmp) { if (cli_unlink(tempfile)) { ret = CL_EUNLINK; } } free(tempfile); tempfile = NULL; } CLI_FREE_AND_SET_NULL(decryptDst); CLI_FREE_AND_SET_NULL(rk); return ret; } #if !defined(HAVE_ATTRIB_PACKED) && !defined(HAVE_PRAGMA_PACK) && !defined(HAVE_PRAGMA_PACK_HPPA) static bool ole2_read_header(int fd, ole2_header_t *hdr) { int i; if (cli_readn(fd, &hdr->magic, 8) != 8) { return false; } if (cli_readn(fd, &hdr->clsid, 16) != 16) { return false; } if (cli_readn(fd, &hdr->minor_version, 2) != 2) { return false; } if (cli_readn(fd, &hdr->dll_version, 2) != 2) { return false; } if (cli_readn(fd, &hdr->byte_order, 2) != 2) { return false; } if (cli_readn(fd, &hdr->log2_big_block_size, 2) != 2) { return false; } if (cli_readn(fd, &hdr->log2_small_block_size, 4) != 4) { return false; } if (cli_readn(fd, &hdr->reserved, 8) != 8) { return false; } if (cli_readn(fd, &hdr->bat_count, 4) != 4) { return false; } if (cli_readn(fd, &hdr->prop_start, 4) != 4) { return false; } if (cli_readn(fd, &hdr->signature, 4) != 4) { return false; } if (cli_readn(fd, &hdr->sbat_cutoff, 4) != 4) { return false; } if (cli_readn(fd, &hdr->sbat_start, 4) != 4) { return false; } if (cli_readn(fd, &hdr->sbat_block_count, 4) != 4) { return false; } if (cli_readn(fd, &hdr->xbat_start, 4) != 4) { return false; } if (cli_readn(fd, &hdr->xbat_count, 4) != 4) { return false; } for (i = 0; i < 109; i++) { if (cli_readn(fd, &hdr->bat_array[i], 4) != 4) { return false; } } return true; } #endif void copy_encryption_info_stream_standard(encryption_info_stream_standard_t *dst, const uint8_t *src) { memcpy(dst, src, sizeof(encryption_info_stream_standard_t)); dst->version_major = ole2_endian_convert_16(dst->version_major); dst->version_minor = ole2_endian_convert_16(dst->version_minor); dst->flags = ole2_endian_convert_32(dst->flags); dst->size = ole2_endian_convert_32(dst->size); dst->encryptionInfo.flags = ole2_endian_convert_32(dst->encryptionInfo.flags); dst->encryptionInfo.sizeExtra = ole2_endian_convert_32(dst->encryptionInfo.sizeExtra); dst->encryptionInfo.algorithmID = ole2_endian_convert_32(dst->encryptionInfo.algorithmID); dst->encryptionInfo.algorithmIDHash = ole2_endian_convert_32(dst->encryptionInfo.algorithmIDHash); dst->encryptionInfo.keySize = ole2_endian_convert_32(dst->encryptionInfo.keySize); dst->encryptionInfo.providerType = ole2_endian_convert_32(dst->encryptionInfo.providerType); dst->encryptionInfo.reserved1 = ole2_endian_convert_32(dst->encryptionInfo.reserved1); dst->encryptionInfo.reserved2 = ole2_endian_convert_32(dst->encryptionInfo.reserved2); } void copy_encryption_verifier(encryption_verifier_t *dst, const uint8_t *src) { memcpy(dst, src, sizeof(encryption_verifier_t)); dst->salt_size = ole2_endian_convert_32(dst->salt_size); dst->verifier_hash_size = ole2_endian_convert_32(dst->verifier_hash_size); } static inline bool key_length_valid_aes_bits(const uint32_t keyLength) { switch (keyLength) { case SE_HEADER_EI_AES128_KEYSIZE: /* fall-through */ case SE_HEADER_EI_AES192_KEYSIZE: /* fall-through */ case SE_HEADER_EI_AES256_KEYSIZE: return true; } return false; } /*Definitions for generate_key_aes*/ #define GENERATE_KEY_AES_ITERATIONS 50000 /* * @brief Generate the key for aes encryption based on the password * @param password Password to generate the key from * @param key [out] location to store the key * @param verifier encryption_verifier_t from the header. Contains information necessary to generate the key * * @return Error code based on whether or not the key was generated. This function * does NOT validate the key, you must call 'verify_key' for that. */ static cl_error_t generate_key_aes(const char *const password, encryption_key_t *key, encryption_verifier_t *verifier) { uint8_t *buffer = NULL; size_t bufLen = 0; cl_error_t ret = CL_ERROR; uint32_t i = 0; uint8_t sha1[sizeof(uint32_t) + SHA1_HASH_SIZE + sizeof(uint32_t)] = {0}; uint8_t *sha1Dst = &(sha1[sizeof(uint32_t)]); uint8_t buf1[64]; uint8_t buf2[64]; uint8_t doubleSha[SHA1_HASH_SIZE * 2]; uint32_t tmp = 0; if (!key_length_valid_aes_bits(key->key_length_bits)) { cli_errmsg("ole2: Invalid key length '0x%x'\n", key->key_length_bits / 8); goto done; } memset(key->key, 0, key->key_length_bits / 8); bufLen = verifier->salt_size + (strlen(password) * 2); buffer = calloc(bufLen, 1); if (NULL == buffer) { cli_errmsg("ole2: calloc failed\n"); ret = CL_EMEM; goto done; } tmp = verifier->salt_size; if (verifier->salt_size > sizeof(verifier->salt)) { cli_dbgmsg("ole2: Invalid salt length '0x%x'\n", verifier->salt_size); tmp = sizeof(verifier->salt); } memcpy(buffer, verifier->salt, tmp); /*Convert to UTF16-LE*/ for (i = 0; i < (uint32_t)strlen(password); i++) { buffer[verifier->salt_size + (i * 2)] = password[i]; } (void)cl_sha1(buffer, bufLen, sha1Dst, NULL); for (i = 0; i < GENERATE_KEY_AES_ITERATIONS; i++) { uint32_t eye = ole2_endian_convert_32(i); memcpy(sha1, &eye, sizeof(eye)); (void)cl_sha1(sha1, SHA1_HASH_SIZE + sizeof(uint32_t), sha1Dst, NULL); } memset(&(sha1Dst[SHA1_HASH_SIZE]), 0, sizeof(uint32_t)); (void)cl_sha1(sha1Dst, SHA1_HASH_SIZE + sizeof(uint32_t), sha1Dst, NULL); memset(buf1, 0x36, sizeof(buf1)); for (i = 0; i < SHA1_HASH_SIZE; i++) { buf1[i] = buf1[i] ^ sha1Dst[i]; } // now sha1 buf1 (void)cl_sha1(buf1, sizeof(buf1), doubleSha, NULL); memset(buf2, 0x5c, sizeof(buf2)); for (i = 0; i < SHA1_HASH_SIZE; i++) { buf2[i] = buf2[i] ^ sha1Dst[i]; } (void)cl_sha1(buf2, sizeof(buf2), &(doubleSha[SHA1_HASH_SIZE]), NULL); tmp = key->key_length_bits / 8; if (tmp > sizeof(key->key)) { cli_warnmsg("ole2: Invalid key length 0x%x\n", key->key_length_bits / 8); tmp = sizeof(key->key); } memcpy(key->key, doubleSha, tmp); ret = CL_SUCCESS; done: CLI_FREE_AND_SET_NULL(buffer); return ret; } static bool aes_128ecb_decrypt(const unsigned char *in, size_t length, unsigned char *out, const encryption_key_t *const key) { uint32_t rk[RKLENGTH(128)]; int nrounds; size_t i; bool bRet = false; if (SE_HEADER_EI_AES128_KEYSIZE != key->key_length_bits) { cli_dbgmsg("ole2: Unsupported AES key length in aes_128ecb_decrypt\n"); goto done; } nrounds = rijndaelSetupDecrypt(rk, (const unsigned char *)key->key, key->key_length_bits); if (!nrounds) { cli_errmsg("ole2: Unable to initialize decryption.\n"); goto done; } else { for (i = 0; i < length; i += 16) { rijndaelDecrypt(rk, nrounds, &(in[i]), &(out[i])); } } bRet = true; done: return bRet; } /*Definitions for verify_key_aes*/ #define AES_VERIFIER_HASH_LEN 32 /* * @brief Returns true if it is actually encrypted with the key. * @param key encryption_key_t to attempt validation * @param verifier encryption_verifier_t to attempt validation. * @return Success or failure depending on whether validation was successful. * * For more information, see below * https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/e5ad39b8-9bc1-4a19-bad3-44e6246d21e6 */ static bool verify_key_aes(const encryption_key_t *const key, encryption_verifier_t *verifier) { bool bRet = false; uint8_t sha[SHA1_HASH_SIZE]; uint8_t decrypted[AES_VERIFIER_HASH_LEN] = {0}; uint32_t actual_hash_size = 0; // The hash size should be 20 bytes, even though the buffer is 32 bytes. // If it claims to be LARGER than 32 bytes, we have a problem - because the buffer isn't that big. actual_hash_size = verifier->verifier_hash_size; if (actual_hash_size > sizeof(verifier->encrypted_verifier_hash)) { cli_dbgmsg("ole2: Invalid encrypted verifier hash length 0x%x\n", verifier->verifier_hash_size); actual_hash_size = sizeof(verifier->encrypted_verifier_hash); } switch (key->key_length_bits) { case SE_HEADER_EI_AES128_KEYSIZE: // Decrypt the verifier, which is a randomly generated Verifier value encrypted using // the algorithm chosen by the implementation. if (!aes_128ecb_decrypt(verifier->encrypted_verifier, sizeof(verifier->encrypted_verifier), decrypted, key)) { goto done; } // Get hash of decrypted verifier. // The hash type is from the encryption header, but in this case should always be SHA1. (void)cl_sha1(decrypted, sizeof(verifier->encrypted_verifier), sha, NULL); // Decrypt the verifier hash, which, for contains the encrypted form of the hash of the randomly generated Verifier value if (!aes_128ecb_decrypt(verifier->encrypted_verifier_hash, actual_hash_size, decrypted, key)) { goto done; } break; case SE_HEADER_EI_AES192_KEYSIZE: // not implemented goto done; case SE_HEADER_EI_AES256_KEYSIZE: // not implemented goto done; default: // unsupported/invalid key size goto done; } // Compare our (20-byte) SHA1 with the decrypted hash, which should be the same. // Note: the hash size is different then ... what are we gonna do? We only support SHA1 hashes for this algorithm. // So we'll just assume they're the same for this comparison. bRet = (0 == memcmp(sha, decrypted, SHA1_HASH_SIZE)); done: return bRet; } /*Definitions for initialize_encryption_key*/ #define SE_HEADER_FCRYPTOAPI (1 << 2) #define SE_HEADER_FEXTERNAL (1 << 4) #define SE_HEADER_FDOCPROPS (1 << 3) #define SE_HEADER_FAES (1 << 5) #define SE_HEADER_EI_AES128 0x0000660e #define SE_HEADER_EI_AES192 0x0000660f #define SE_HEADER_EI_AES256 0x00006610 #define SE_HEADER_EI_RC4 0x00006801 #define SE_HEADER_EI_SHA1 0x00008004 #define SE_HEADER_EI_AES_PROVIDERTYPE 0x00000018 /** * @brief Initialize encryption key, if the encryption validation passes. * * @param encryptionInfo Pointer to the encryption header. * @param encryptionKey [out] Pointer to encryption_key_t structure to be initialized by this function. * @return Success or failure depending on whether or not the * encryption verifier was successful with the * standard password (VelvetSweatshop). * * Information about the encryption keys is here * https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/dca653b5-b93b-48df-8e1e-0fb9e1c83b0f * https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/2895eba1-acb1-4624-9bde-2cdad3fea015 * */ static bool initialize_encryption_key( const uint8_t *encryptionInfoStreamPtr, size_t remainingBytes, encryption_key_t *encryptionKey, encryption_status_t *pEncryptionStatus) { bool bRet = false; size_t idx = 0; encryption_key_t key; bool bAES = false; encryption_info_stream_standard_t encryptionInfo = {0}; uint16_t *encryptionInfo_CSPName = NULL; size_t CSPName_length = 0; const uint8_t *encryptionVerifierPtr = NULL; encryption_verifier_t encryptionVerifier = {0}; // Populate the encryption_info_stream_standard_t structure copy_encryption_info_stream_standard(&encryptionInfo, encryptionInfoStreamPtr); memset(encryptionKey, 0, sizeof(encryption_key_t)); memset(&key, 0, sizeof(encryption_key_t)); cli_dbgmsg("Major Version = 0x%x\n", encryptionInfo.version_major); cli_dbgmsg("Minor Version = 0x%x\n", encryptionInfo.version_minor); cli_dbgmsg("Flags = 0x%x\n", encryptionInfo.flags); /*Bit 0 and 1 must be 0*/ if (1 & encryptionInfo.flags) { cli_dbgmsg("ole2: Invalid first bit, must be 0\n"); goto done; } if ((1 << 1) & encryptionInfo.flags) { cli_dbgmsg("ole2: Invalid second bit, must be 0\n"); goto done; } // https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/200a3d61-1ab4-4402-ae11-0290b28ab9cb if ((SE_HEADER_FDOCPROPS & encryptionInfo.flags)) { cli_dbgmsg("ole2: Unsupported document properties encrypted\n"); goto done; } if ((SE_HEADER_FEXTERNAL & encryptionInfo.flags) && (SE_HEADER_FEXTERNAL != encryptionInfo.flags)) { cli_dbgmsg("ole2: Invalid fExternal flags. If fExternal bit is set, nothing else can be\n"); goto done; } if (SE_HEADER_FAES & encryptionInfo.flags) { if (!(SE_HEADER_FCRYPTOAPI & encryptionInfo.flags)) { cli_dbgmsg("ole2: Invalid combo of fAES and fCryptoApi flags\n"); goto done; } cli_dbgmsg("Flags = AES\n"); } cli_dbgmsg("Size = 0x%x\n", encryptionInfo.size); if (encryptionInfo.flags != encryptionInfo.encryptionInfo.flags) { cli_dbgmsg("ole2: Flags must match\n"); goto done; } if (0 != encryptionInfo.encryptionInfo.sizeExtra) { cli_dbgmsg("ole2: Size Extra must be 0\n"); goto done; } switch (encryptionInfo.encryptionInfo.algorithmID) { case SE_HEADER_EI_AES128: if (SE_HEADER_EI_AES128_KEYSIZE != encryptionInfo.encryptionInfo.keySize) { cli_dbgmsg("ole2: Key length does not match algorithm id\n"); goto done; } bAES = true; pEncryptionStatus->encryption_type = AES128_ENCRYPTION; break; case SE_HEADER_EI_AES192: // not implemented if (SE_HEADER_EI_AES192_KEYSIZE != encryptionInfo.encryptionInfo.keySize) { cli_dbgmsg("ole2: Key length does not match algorithm id\n"); goto done; } bAES = true; pEncryptionStatus->encryption_type = AES192_ENCRYPTION; goto done; case SE_HEADER_EI_AES256: // not implemented if (SE_HEADER_EI_AES256_KEYSIZE != encryptionInfo.encryptionInfo.keySize) { cli_dbgmsg("ole2: Key length does not match algorithm id\n"); goto done; } bAES = true; pEncryptionStatus->encryption_type = AES256_ENCRYPTION; goto done; case SE_HEADER_EI_RC4: // not implemented pEncryptionStatus->encryption_type = RC4_ENCRYPTION; goto done; default: cli_dbgmsg("ole2: Invalid Algorithm ID: 0x%x\n", encryptionInfo.encryptionInfo.algorithmID); goto done; } if (SE_HEADER_EI_SHA1 != encryptionInfo.encryptionInfo.algorithmIDHash) { cli_dbgmsg("ole2: Invalid Algorithm ID Hash: 0x%x\n", encryptionInfo.encryptionInfo.algorithmIDHash); goto done; } if (!key_length_valid_aes_bits(encryptionInfo.encryptionInfo.keySize)) { cli_dbgmsg("ole2: Invalid key size: 0x%x\n", encryptionInfo.encryptionInfo.keySize); goto done; } cli_dbgmsg("KeySize = 0x%x\n", encryptionInfo.encryptionInfo.keySize); if (SE_HEADER_EI_AES_PROVIDERTYPE != encryptionInfo.encryptionInfo.providerType) { cli_dbgmsg("ole2: WARNING: Provider Type should be '0x%x', is '0x%x'\n", SE_HEADER_EI_AES_PROVIDERTYPE, encryptionInfo.encryptionInfo.providerType); goto done; } cli_dbgmsg("Reserved1 = 0x%x\n", encryptionInfo.encryptionInfo.reserved1); if (0 != encryptionInfo.encryptionInfo.reserved2) { cli_dbgmsg("ole2: Reserved 2 must be zero, is 0x%x\n", encryptionInfo.encryptionInfo.reserved2); goto done; } /* The encryption info is at the end of the CPSName string. * Find the end, and we'll have the index of the EncryptionVerifier. * The CPSName string *should* always be either * 'Microsoft Enhanced RSA and AES Cryptographic Provider' * or * 'Microsoft Enhanced RSA and AES Cryptographic Provider (Prototype)' */ encryptionInfo_CSPName = (uint16_t *)(encryptionInfoStreamPtr + sizeof(encryption_info_stream_standard_t)); remainingBytes -= sizeof(encryption_info_stream_standard_t); if (0 == remainingBytes) { cli_dbgmsg("ole2: No CSPName or encryption_verifier_t\n"); goto done; } while (true) { // Check if we've gone past the end of the buffer without finding the end of the CSPName string. if ((idx + 1) * sizeof(uint16_t) > remainingBytes) { cli_dbgmsg("ole2: CSPName is missing null terminator before end of buffer.\n"); goto done; } // Check if we've found the end of the CSPName string. if (encryptionInfo_CSPName[idx] == 0) { break; } // Found another character in the CSPName string, keep going. idx++; } CSPName_length = (idx + 1) * sizeof(uint16_t); encryptionVerifierPtr = (uint8_t *)encryptionInfo_CSPName + CSPName_length; remainingBytes -= CSPName_length; if (remainingBytes < sizeof(encryption_verifier_t)) { cli_dbgmsg("ole2: No encryption_verifier_t\n"); goto done; } copy_encryption_verifier(&encryptionVerifier, encryptionVerifierPtr); key.key_length_bits = encryptionInfo.encryptionInfo.keySize; if (!bAES) { cli_dbgmsg("ole2: Unsupported encryption algorithm\n"); goto done; } if (CL_SUCCESS != generate_key_aes("VelvetSweatshop", &key, &encryptionVerifier)) { /*Error message printed by generate_key_aes*/ goto done; } if (!verify_key_aes(&key, &encryptionVerifier)) { cli_dbgmsg("ole2: Key verification for '%s' failed, unable to decrypt.\n", "VelvetSweatshop"); goto done; } memcpy(encryptionKey, &key, sizeof(encryption_key_t)); bRet = true; pEncryptionStatus->encryption_type = VELVET_SWEATSHOP_ENCRYPTION; done: if (pEncryptionStatus->encryption_type) { pEncryptionStatus->encrypted = true; } pEncryptionStatus->velvet_sweatshop = bRet; return bRet; } /** * @brief Extract macros and images from an ole2 file * * @param dirname A temp directory where we should store extracted content * @param ctx The scan context * @param files [out] A store of file names of extracted things to be processed later. * @param has_vba [out] If the ole2 contained 1 or more VBA macros * @param has_xlm [out] If the ole2 contained 1 or more XLM macros * @param has_image [out] If the ole2 contained 1 or more images * @return cl_error_t */ cl_error_t cli_ole2_extract(const char *dirname, cli_ctx *ctx, struct uniq **files, int *has_vba, int *has_xlm, int *has_image) { ole2_header_t hdr; cl_error_t ret = CL_CLEAN; size_t hdr_size; unsigned int file_count = 0; unsigned long scansize, scansize2; const void *phdr; encryption_key_t key; bool bEncrypted = false; size_t encryption_offset = 0; encryption_status_t encryption_status = {0}; cli_dbgmsg("in cli_ole2_extract()\n"); if (!ctx) { return CL_ENULLARG; } hdr.is_hwp = NULL; hdr.bitset = NULL; if (ctx->engine->maxscansize) { if (ctx->engine->maxscansize > ctx->scansize) { scansize = ctx->engine->maxscansize - ctx->scansize; } else { return CL_EMAXSIZE; } } else { scansize = -1; } scansize2 = scansize; /* size of header - size of other values in struct */ hdr_size = sizeof(struct ole2_header_tag) - sizeof(int32_t) - // sbat_root_start sizeof(uint32_t) - // max_block_no sizeof(off_t) - // m_length sizeof(bitset_t *) - // bitset sizeof(struct uniq *) - // U sizeof(fmap_t *) - // map sizeof(bool) - // has_vba sizeof(bool) - // has_xlm sizeof(bool) - // has_image sizeof(hwp5_header_t *); // is_hwp if ((size_t)(ctx->fmap->len) < (size_t)(hdr_size)) { return CL_CLEAN; } hdr.map = ctx->fmap; hdr.m_length = hdr.map->len; phdr = fmap_need_off_once(hdr.map, 0, hdr_size); if (phdr) { memcpy(&hdr, phdr, hdr_size); } else { cli_dbgmsg("cli_ole2_extract: failed to read header\n"); goto done; } hdr.minor_version = ole2_endian_convert_16(hdr.minor_version); hdr.dll_version = ole2_endian_convert_16(hdr.dll_version); hdr.byte_order = ole2_endian_convert_16(hdr.byte_order); hdr.log2_big_block_size = ole2_endian_convert_16(hdr.log2_big_block_size); hdr.log2_small_block_size = ole2_endian_convert_32(hdr.log2_small_block_size); hdr.bat_count = ole2_endian_convert_32(hdr.bat_count); hdr.prop_start = ole2_endian_convert_32(hdr.prop_start); hdr.sbat_cutoff = ole2_endian_convert_32(hdr.sbat_cutoff); hdr.sbat_start = ole2_endian_convert_32(hdr.sbat_start); hdr.sbat_block_count = ole2_endian_convert_32(hdr.sbat_block_count); hdr.xbat_start = ole2_endian_convert_32(hdr.xbat_start); hdr.xbat_count = ole2_endian_convert_32(hdr.xbat_count); hdr.sbat_root_start = -1; hdr.bitset = cli_bitset_init(); if (!hdr.bitset) { ret = CL_EMEM; goto done; } if (memcmp(hdr.magic, magic_id, 8) != 0) { cli_dbgmsg("OLE2 magic failed!\n"); ret = CL_EFORMAT; goto done; } if (hdr.log2_big_block_size < 6 || hdr.log2_big_block_size > 28) { // The big block size (aka Sector Shift) is expected to be: // - 9 for Major Version 3 // - 12 for Major Version 4 // - TBD for Major Version 5? // To allow for future changes, and prevent overflowing an int32_t, we're limiting to 28. cli_dbgmsg("CAN'T PARSE: Invalid big block size (2^%u)\n", hdr.log2_big_block_size); goto done; } if (!hdr.log2_small_block_size || hdr.log2_small_block_size > hdr.log2_big_block_size) { cli_dbgmsg("CAN'T PARSE: Invalid small block size (2^%u)\n", hdr.log2_small_block_size); goto done; } if (hdr.sbat_cutoff != 4096) { cli_dbgmsg("WARNING: Untested sbat cutoff (%u); data may not extract correctly\n", hdr.sbat_cutoff); } if (hdr.map->len > INT32_MAX) { cli_dbgmsg("OLE2 extract: Overflow detected\n"); ret = CL_EFORMAT; goto done; } /* determine if encrypted with VelvetSweatshop password */ encryption_offset = 4 * (1 << hdr.log2_big_block_size); if ((encryption_offset + sizeof(encryption_info_stream_standard_t)) <= hdr.m_length) { bEncrypted = initialize_encryption_key( &(((const uint8_t *)phdr)[encryption_offset]), hdr.m_length - encryption_offset, &key, &encryption_status); } /* 8 SBAT blocks per file block */ hdr.max_block_no = (hdr.map->len - MAX(512, 1 << hdr.log2_big_block_size)) / (1 << hdr.log2_small_block_size); print_ole2_header(&hdr); cli_dbgmsg("Max block number: %lu\n", (unsigned long int)hdr.max_block_no); /* PASS 1 : Count files and check for VBA */ hdr.has_vba = false; hdr.has_xlm = false; hdr.has_image = false; ret = ole2_walk_property_tree(&hdr, NULL, 0, handler_enum, 0, &file_count, ctx, &scansize, NULL, &encryption_status); cli_bitset_free(hdr.bitset); hdr.bitset = NULL; if (!file_count || !(hdr.bitset = cli_bitset_init())) { goto done; } if (hdr.is_hwp) { cli_dbgmsg("OLE2: identified HWP document\n"); cli_dbgmsg("OLE2: HWP version: 0x%08x\n", hdr.is_hwp->version); cli_dbgmsg("OLE2: HWP flags: 0x%08x\n", hdr.is_hwp->flags); ret = cli_hwp5header(ctx, hdr.is_hwp); if (ret != CL_SUCCESS) { goto done; } } /* If there's no VBA we scan OTF */ if (hdr.has_vba || hdr.has_xlm || hdr.has_image) { /* PASS 2/A : VBA scan */ cli_dbgmsg("OLE2: VBA project found\n"); if (!(hdr.U = uniq_init(file_count))) { cli_dbgmsg("OLE2: uniq_init() failed\n"); ret = CL_EMEM; goto done; } file_count = 0; ole2_walk_property_tree(&hdr, dirname, 0, handler_writefile, 0, &file_count, ctx, &scansize2, NULL, &encryption_status); ret = CL_CLEAN; *files = hdr.U; if (has_vba) { *has_vba = hdr.has_vba; } if (has_xlm) { *has_xlm = hdr.has_xlm; } if (has_image) { *has_image = hdr.has_image; } } else { cli_dbgmsg("OLE2: no VBA projects found\n"); /* PASS 2/B : OTF scan */ file_count = 0; if (bEncrypted) { ret = ole2_walk_property_tree(&hdr, NULL, 0, handler_otf_encrypted, 0, &file_count, ctx, &scansize2, &key, &encryption_status); } else { ret = ole2_walk_property_tree(&hdr, NULL, 0, handler_otf, 0, &file_count, ctx, &scansize2, NULL, &encryption_status); } } if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) { if (encryption_status.encrypted) { if (encryption_status.encryption_type) { cli_jsonstr(ctx->wrkproperty, ENCRYPTED_JSON_KEY, encryption_status.encryption_type); } else { cli_jsonstr(ctx->wrkproperty, ENCRYPTED_JSON_KEY, GENERIC_ENCRYPTED); } } } if (SCAN_HEURISTIC_ENCRYPTED_DOC && encryption_status.encrypted && (!encryption_status.velvet_sweatshop)) { cl_error_t status = cli_append_potentially_unwanted(ctx, OLE2_HEURISTIC_ENCRYPTED_WARNING); if (CL_SUCCESS != status) { cli_errmsg("OLE2 : Unable to warn potentially unwanted signature '%s'\n", "Heuristics.Encrypted.OLE2"); ret = status; } } done: if (hdr.bitset) { cli_bitset_free(hdr.bitset); } if (hdr.is_hwp) { free(hdr.is_hwp); } return ret == CL_BREAK ? CL_CLEAN : ret; }