Files
mvt/tests/common/test_indicators.py
besendorf 6c537c624e Fix STIX2 hash key parsing to accept spec-compliant algorithm names (#767)
* Fix betterproto2 migration: update generated proto code and callers

The dependency switch from betterproto to betterproto2 was incomplete.
This updates all affected files to use the betterproto2 API:

- tombstone.py: rewrite generated code to use betterproto2.field() with
  explicit TYPE_* constants, repeated/optional/group flags, and map_meta()
  for map fields
- tombstone_crashes.py: update import and fix to_dict() call to use
  keyword-only casing= argument required by betterproto2
- pyproject.toml: replace betterproto[compiler] dev dep with betterproto2-compiler
- Makefile: update protoc plugin flag to --python_betterproto2_out

* Fix STIX2 hash key parsing to accept spec-compliant algorithm names

The STIX2 specification requires single quotes around hash algorithm
names that contain hyphens (e.g. file:hashes.'SHA-256'). MVT only
accepted a non-standard lowercase form (file:hashes.sha256), silently
dropping any indicators using the spec-correct spelling.

Normalize hash algorithm keys in _process_indicator by stripping quotes
and hyphens from the algorithm portion before matching, so all of the
following are accepted for SHA-256, SHA-1 and MD5:

  file:hashes.'SHA-256'   (STIX2 spec)
  file:hashes.SHA-256
  file:hashes.SHA256
  file:hashes.sha256      (previously the only accepted form)

The same normalization is applied to app:cert.* keys.

Update generate_stix.py to use the spec-compliant quoted forms, and add
test_parse_stix2_hash_key_variants to cover all spelling variants.
2026-04-07 20:41:40 +02:00

182 lines
7.6 KiB
Python

# Mobile Verification Toolkit (MVT)
# Copyright (c) 2021-2023 The MVT Authors.
# Use of this software is governed by the MVT License 1.1 that can be found at
# https://license.mvt.re/1.1/
import logging
import os
from mvt.common.config import settings
from mvt.common.indicators import Indicators
from ..utils import get_artifact_folder
class TestIndicators:
def test_parse_stix2(self, indicator_file):
ind = Indicators(log=logging)
ind.load_indicators_files([indicator_file], load_default=False)
assert len(ind.ioc_collections) == 1
assert ind.ioc_collections[0]["count"] == 9
assert len(ind.ioc_collections[0]["domains"]) == 2
assert len(ind.ioc_collections[0]["emails"]) == 1
assert len(ind.ioc_collections[0]["file_names"]) == 1
assert len(ind.ioc_collections[0]["processes"]) == 1
assert len(ind.ioc_collections[0]["android_property_names"]) == 1
assert len(ind.ioc_collections[0]["files_sha256"]) == 1
assert len(ind.ioc_collections[0]["files_sha1"]) == 1
assert len(ind.ioc_collections[0]["urls"]) == 1
def test_parse_stix2_amnesty(self):
"""
STIX2 file from
https://github.com/AmnestyTech/investigations/blob/master/2021-12-16_cytrox/cytrox.stix2
"""
ind = Indicators(log=logging)
file = os.path.join(get_artifact_folder(), "stix2", "cytrox.stix2")
ind.load_indicators_files([file], load_default=False)
assert len(ind.ioc_collections) == 1
assert ind.ioc_collections[0]["count"] == 343
assert len(ind.ioc_collections[0]["domains"]) == 336
assert len(ind.ioc_collections[0]["emails"]) == 0
assert len(ind.ioc_collections[0]["file_names"]) == 0
assert len(ind.ioc_collections[0]["file_paths"]) == 6
assert len(ind.ioc_collections[0]["ios_profile_ids"]) == 1
assert len(ind.ioc_collections[0]["processes"]) == 0
assert len(ind.ioc_collections[0]["android_property_names"]) == 0
assert len(ind.ioc_collections[0]["urls"]) == 0
def test_parse_stix2_otx(self):
"""
STIX2 file from OTX Pulse
https://otx.alienvault.com/pulse/638cd3ee5e5f019f84f9e0ea
"""
ind = Indicators(log=logging)
file = os.path.join(
get_artifact_folder(), "stix2", "638cd3ee5e5f019f84f9e0ea.json"
)
ind.load_indicators_files([file], load_default=False)
assert len(ind.ioc_collections) == 1
assert ind.ioc_collections[0]["count"] == 69
assert len(ind.ioc_collections[0]["domains"]) == 15
assert len(ind.ioc_collections[0]["emails"]) == 0
assert len(ind.ioc_collections[0]["file_names"]) == 0
assert len(ind.ioc_collections[0]["processes"]) == 0
assert len(ind.ioc_collections[0]["android_property_names"]) == 0
assert len(ind.ioc_collections[0]["urls"]) == 54
def test_check_url(self, indicator_file):
ind = Indicators(log=logging)
ind.load_indicators_files([indicator_file], load_default=False)
assert ind.check_url(42) is None
assert ind.check_url("http://example.com/thisisbad")
assert ind.check_url("http://example.com/thisisgood") is None
assert ind.check_url("https://www.example.org/foobar")
assert ind.check_url("http://example.org:8080/toto")
assert ind.check_url("https://github.com") is None
assert ind.check_url("https://example.com/") is None
# Test detecting IP address indicators from STIX.
assert ind.check_url("https://198.51.100.1:8080/")
assert ind.check_url("https://1.1.1.1/") is None
def test_check_file_hash(self, indicator_file):
ind = Indicators(log=logging)
ind.load_indicators_files([indicator_file], load_default=False)
assert (
ind.check_file_hash(
"003764fd74bf13cff9bf1ddd870cbf593b23e2b584ba4465114023870ea6fbef"
)
is None
)
assert ind.check_file_hash(
"570cd76bf49cf52e0cb347a68bdcf0590b2eaece134e1b1eba7e8d66261bdbe6"
)
assert ind.check_file_hash("da0611a300a9ce9aa7a09d1212f203fca5856794")
def test_parse_stix2_hash_key_variants(self, tmp_path):
"""STIX2 spec requires single-quoted algorithm names that contain hyphens,
e.g. file:hashes.'SHA-256'. Verify MVT accepts both spec-compliant and
non-standard lowercase spellings for MD5, SHA-1 and SHA-256."""
import json
sha256_hash = "570cd76bf49cf52e0cb347a68bdcf0590b2eaece134e1b1eba7e8d66261bdbe6"
sha1_hash = "da0611a300a9ce9aa7a09d1212f203fca5856794"
md5_hash = "d41d8cd98f00b204e9800998ecf8427e"
variants = [
# (pattern_key, expected_bucket)
("file:hashes.'SHA-256'", "files_sha256"),
("file:hashes.SHA-256", "files_sha256"),
("file:hashes.SHA256", "files_sha256"),
("file:hashes.sha256", "files_sha256"),
("file:hashes.'SHA-1'", "files_sha1"),
("file:hashes.SHA-1", "files_sha1"),
("file:hashes.SHA1", "files_sha1"),
("file:hashes.sha1", "files_sha1"),
("file:hashes.MD5", "files_md5"),
("file:hashes.'MD5'", "files_md5"),
("file:hashes.md5", "files_md5"),
]
hash_for = {
"files_sha256": sha256_hash,
"files_sha1": sha1_hash,
"files_md5": md5_hash,
}
for pattern_key, bucket in variants:
h = hash_for[bucket]
stix = {
"type": "bundle",
"id": "bundle--test",
"objects": [
{
"type": "malware",
"id": "malware--test",
"name": "TestMalware",
"is_family": False,
},
{
"type": "indicator",
"id": "indicator--test",
"indicator_types": ["malicious-activity"],
"pattern": f"[{pattern_key}='{h}']",
"pattern_type": "stix",
"valid_from": "2024-01-01T00:00:00Z",
},
{
"type": "relationship",
"id": "relationship--test",
"relationship_type": "indicates",
"source_ref": "indicator--test",
"target_ref": "malware--test",
},
],
}
stix_file = tmp_path / "test.stix2"
stix_file.write_text(json.dumps(stix))
ind = Indicators(log=logging)
ind.load_indicators_files([str(stix_file)], load_default=False)
assert len(ind.ioc_collections[0][bucket]) == 1, (
f"Pattern key '{pattern_key}' was not parsed into '{bucket}'"
)
assert ind.check_file_hash(h) is not None, (
f"check_file_hash failed for pattern key '{pattern_key}'"
)
def test_check_android_property(self, indicator_file):
ind = Indicators(log=logging)
ind.load_indicators_files([indicator_file], load_default=False)
assert ind.check_android_property_name("sys.foobar")
assert ind.check_android_property_name("sys.soundsokay") is None
def test_env_stix(self, indicator_file):
os.environ["MVT_STIX2"] = indicator_file
settings.__init__() # Reset settings
ind = Indicators(log=logging)
ind.load_indicators_files([], load_default=False)
assert ind.total_ioc_count == 9