Compare commits

...

3 Commits

Author SHA1 Message Date
besendorf
6ba6d2ebb8 Merge branch 'main' into fix/stix2-hash-key-normalization 2026-04-07 20:40:01 +02:00
Janik Besendorf
e80a05992e Fix STIX2 hash key parsing to accept spec-compliant algorithm names
The STIX2 specification requires single quotes around hash algorithm
names that contain hyphens (e.g. file:hashes.'SHA-256'). MVT only
accepted a non-standard lowercase form (file:hashes.sha256), silently
dropping any indicators using the spec-correct spelling.

Normalize hash algorithm keys in _process_indicator by stripping quotes
and hyphens from the algorithm portion before matching, so all of the
following are accepted for SHA-256, SHA-1 and MD5:

  file:hashes.'SHA-256'   (STIX2 spec)
  file:hashes.SHA-256
  file:hashes.SHA256
  file:hashes.sha256      (previously the only accepted form)

The same normalization is applied to app:cert.* keys.

Update generate_stix.py to use the spec-compliant quoted forms, and add
test_parse_stix2_hash_key_variants to cover all spelling variants.
2026-04-07 20:38:37 +02:00
Janik Besendorf
47330e4e45 Fix betterproto2 migration: update generated proto code and callers
The dependency switch from betterproto to betterproto2 was incomplete.
This updates all affected files to use the betterproto2 API:

- tombstone.py: rewrite generated code to use betterproto2.field() with
  explicit TYPE_* constants, repeated/optional/group flags, and map_meta()
  for map fields
- tombstone_crashes.py: update import and fix to_dict() call to use
  keyword-only casing= argument required by betterproto2
- pyproject.toml: replace betterproto[compiler] dev dep with betterproto2-compiler
- Makefile: update protoc plugin flag to --python_betterproto2_out
2026-04-07 14:07:19 +02:00
3 changed files with 85 additions and 2 deletions

View File

@@ -100,6 +100,17 @@ class Indicators:
key, value = indicator.get("pattern", "").strip("[]").split("=")
key = key.strip()
# Normalize hash algorithm keys so that both the STIX2-spec-compliant
# form (e.g. file:hashes.'SHA-256', which requires quotes around
# algorithm names that contain hyphens) and the non-standard lowercase
# form (e.g. file:hashes.sha256) are accepted. Strip single quotes and
# hyphens from the algorithm name only, then lowercase it.
for sep in ("hashes.", "cert."):
if sep in key:
prefix, _, algo = key.partition(sep)
key = prefix + sep + algo.replace("'", "").replace("-", "").lower()
break
if key == "domain-name:value":
# We force domain names to lower case.
self._add_indicator(

View File

@@ -82,7 +82,7 @@ def generate_test_stix_file(file_path):
for h in sha256:
i = Indicator(
indicator_types=["malicious-activity"],
pattern="[file:hashes.sha256='{}']".format(h),
pattern="[file:hashes.'SHA-256'='{}']".format(h),
pattern_type="stix",
)
res.append(i)
@@ -91,7 +91,7 @@ def generate_test_stix_file(file_path):
for h in sha1:
i = Indicator(
indicator_types=["malicious-activity"],
pattern="[file:hashes.sha1='{}']".format(h),
pattern="[file:hashes.'SHA-1'='{}']".format(h),
pattern_type="stix",
)
res.append(i)

View File

@@ -94,6 +94,78 @@ class TestIndicators:
)
assert ind.check_file_hash("da0611a300a9ce9aa7a09d1212f203fca5856794")
def test_parse_stix2_hash_key_variants(self, tmp_path):
"""STIX2 spec requires single-quoted algorithm names that contain hyphens,
e.g. file:hashes.'SHA-256'. Verify MVT accepts both spec-compliant and
non-standard lowercase spellings for MD5, SHA-1 and SHA-256."""
import json
sha256_hash = "570cd76bf49cf52e0cb347a68bdcf0590b2eaece134e1b1eba7e8d66261bdbe6"
sha1_hash = "da0611a300a9ce9aa7a09d1212f203fca5856794"
md5_hash = "d41d8cd98f00b204e9800998ecf8427e"
variants = [
# (pattern_key, expected_bucket)
("file:hashes.'SHA-256'", "files_sha256"),
("file:hashes.SHA-256", "files_sha256"),
("file:hashes.SHA256", "files_sha256"),
("file:hashes.sha256", "files_sha256"),
("file:hashes.'SHA-1'", "files_sha1"),
("file:hashes.SHA-1", "files_sha1"),
("file:hashes.SHA1", "files_sha1"),
("file:hashes.sha1", "files_sha1"),
("file:hashes.MD5", "files_md5"),
("file:hashes.'MD5'", "files_md5"),
("file:hashes.md5", "files_md5"),
]
hash_for = {
"files_sha256": sha256_hash,
"files_sha1": sha1_hash,
"files_md5": md5_hash,
}
for pattern_key, bucket in variants:
h = hash_for[bucket]
stix = {
"type": "bundle",
"id": "bundle--test",
"objects": [
{
"type": "malware",
"id": "malware--test",
"name": "TestMalware",
"is_family": False,
},
{
"type": "indicator",
"id": "indicator--test",
"indicator_types": ["malicious-activity"],
"pattern": f"[{pattern_key}='{h}']",
"pattern_type": "stix",
"valid_from": "2024-01-01T00:00:00Z",
},
{
"type": "relationship",
"id": "relationship--test",
"relationship_type": "indicates",
"source_ref": "indicator--test",
"target_ref": "malware--test",
},
],
}
stix_file = tmp_path / "test.stix2"
stix_file.write_text(json.dumps(stix))
ind = Indicators(log=logging)
ind.load_indicators_files([str(stix_file)], load_default=False)
assert len(ind.ioc_collections[0][bucket]) == 1, (
f"Pattern key '{pattern_key}' was not parsed into '{bucket}'"
)
assert ind.check_file_hash(h) is not None, (
f"check_file_hash failed for pattern key '{pattern_key}'"
)
def test_check_android_property(self, indicator_file):
ind = Indicators(log=logging)
ind.load_indicators_files([indicator_file], load_default=False)