mirror of https://github.com/Cisco-Talos/clamav
Store URLs found in HTML `<a>` and `<form>` tags during scan of HTML files when recording scan metadata. HTML URL recording will be ON by default, but is a part of the generate-metadata-json feature. The generate-metadata-json feature is OFF by default. This introduces a new general scan option: - libclamav: `CL_SCAN_GENERAL_STORE_HTML_URLS`. - ClamD: `JsonStoreHTMLUrls`. - ClamScan: `--json-store-html-urls` Thank you Matt Jolly for the helpful comment on the pull request.pull/1281/head
parent
8ae19eca40
commit
666e047f2b
@ -0,0 +1,62 @@ |
||||
# Copyright (C) 2020-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. |
||||
|
||||
""" |
||||
Run clamscan tests. |
||||
""" |
||||
|
||||
import sys |
||||
import os |
||||
import re |
||||
import shutil |
||||
|
||||
sys.path.append('../unit_tests') |
||||
import testcase |
||||
|
||||
|
||||
class TC(testcase.TestCase): |
||||
@classmethod |
||||
def setUpClass(cls): |
||||
super(TC, cls).setUpClass() |
||||
|
||||
@classmethod |
||||
def tearDownClass(cls): |
||||
super(TC, cls).tearDownClass() |
||||
|
||||
def setUp(self): |
||||
super(TC, self).setUp() |
||||
|
||||
def tearDown(self): |
||||
super(TC, self).tearDown() |
||||
|
||||
# Remove scan temps directory between tests |
||||
if (self.path_tmp / "TD").exists(): |
||||
shutil.rmtree(self.path_tmp / "TD") |
||||
|
||||
self.verify_valgrind_log() |
||||
|
||||
def test_save_links(self): |
||||
self.step_name('Extract Links') |
||||
|
||||
tempdir=self.path_tmp / "TD" |
||||
if not os.path.isdir(tempdir): |
||||
os.makedirs(tempdir); |
||||
|
||||
testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'html' / 'index.html' |
||||
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} --gen-json --leave-temps --tempdir={tempdir} {testfile}'.format( |
||||
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, |
||||
path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'Clamav-Unit-Test-Signature.ndb', |
||||
tempdir=tempdir, |
||||
testfile=testfile, |
||||
) |
||||
output = self.execute_command(command) |
||||
|
||||
assert output.ec == 0 # clean |
||||
|
||||
expected_strings = [ 'HTMLUrls' |
||||
, '"https://www.clamav.net/reports/malware"' |
||||
, '"http://www.google.com"' |
||||
] |
||||
self.verify_metadata_json(tempdir, expected_strings) |
||||
|
||||
|
||||
|
@ -0,0 +1,16 @@ |
||||
<!DOCTYPE html> |
||||
<html> |
||||
<body> |
||||
|
||||
<h1>Save Links Unittest</h1> |
||||
<p>Paragraph</p> |
||||
<a href="https://www.clamav.net/reports/malware">Report Malware</a> |
||||
|
||||
<form action="http://www.google.com"> |
||||
<input type="submit"> |
||||
</form> |
||||
|
||||
|
||||
</body> |
||||
</html> |
||||
|
Loading…
Reference in new issue