From 7d79844749764609681ee05e44bfe94975d6c8b3 Mon Sep 17 00:00:00 2001 From: Tek Date: Tue, 21 Feb 2023 20:16:32 +0100 Subject: [PATCH] Improves generation of hashes (#327) * Improves generation of hashes * Adds generation of reference info.json hash --- mvt/android/cli.py | 14 +++--- mvt/android/cmd_check_androidqf.py | 4 +- mvt/android/cmd_check_backup.py | 4 +- mvt/android/cmd_check_bugreport.py | 4 +- mvt/common/command.py | 61 ++++++++++---------------- mvt/common/help.py | 1 + mvt/common/utils.py | 63 +++++++++++++++++++-------- mvt/ios/cli.py | 29 +++++++++--- mvt/ios/cmd_check_backup.py | 4 +- mvt/ios/cmd_check_fs.py | 4 +- mvt/ios/versions.py | 3 +- tests/common/test_date_conversions.py | 31 ------------- tests/common/test_utils.py | 55 +++++++++++++++++++++++ 13 files changed, 175 insertions(+), 102 deletions(-) delete mode 100644 tests/common/test_date_conversions.py create mode 100644 tests/common/test_utils.py diff --git a/mvt/android/cli.py b/mvt/android/cli.py index 5d76d61..00057a8 100644 --- a/mvt/android/cli.py +++ b/mvt/android/cli.py @@ -11,7 +11,8 @@ from rich.logging import RichHandler from mvt.common.cmd_check_iocs import CmdCheckIOCS from mvt.common.help import (HELP_MSG_FAST, HELP_MSG_IOC, HELP_MSG_LIST_MODULES, HELP_MSG_MODULE, - HELP_MSG_OUTPUT, HELP_MSG_SERIAL) + HELP_MSG_OUTPUT, HELP_MSG_SERIAL, + HELP_MSG_HASHES) from mvt.common.logo import logo from mvt.common.updates import IndicatorsUpdates @@ -144,9 +145,10 @@ def check_adb(ctx, serial, iocs, output, fast, list_modules, module): @click.argument("BUGREPORT_PATH", type=click.Path(exists=True)) @click.pass_context def check_bugreport(ctx, iocs, output, list_modules, module, bugreport_path): + # Always generate hashes as bug reports are small. cmd = CmdAndroidCheckBugreport(target_path=bugreport_path, results_path=output, ioc_files=iocs, - module_name=module) + module_name=module, hashes=True) if list_modules: cmd.list_modules() @@ -174,8 +176,9 @@ def check_bugreport(ctx, iocs, output, list_modules, module, bugreport_path): @click.argument("BACKUP_PATH", type=click.Path(exists=True)) @click.pass_context def check_backup(ctx, iocs, output, list_modules, backup_path): + # Always generate hashes as backups are generally small. cmd = CmdAndroidCheckBackup(target_path=backup_path, results_path=output, - ioc_files=iocs) + ioc_files=iocs, hashes=True) if list_modules: cmd.list_modules() @@ -201,12 +204,13 @@ def check_backup(ctx, iocs, output, list_modules, backup_path): help=HELP_MSG_OUTPUT) @click.option("--list-modules", "-l", is_flag=True, help=HELP_MSG_LIST_MODULES) @click.option("--module", "-m", help=HELP_MSG_MODULE) +@click.option("--hashes", "-H", is_flag=True, help=HELP_MSG_HASHES) @click.argument("ANDROIDQF_PATH", type=click.Path(exists=True)) @click.pass_context -def check_androidqf(ctx, iocs, output, list_modules, module, androidqf_path): +def check_androidqf(ctx, iocs, output, list_modules, module, hashes, androidqf_path): cmd = CmdAndroidCheckAndroidQF(target_path=androidqf_path, results_path=output, ioc_files=iocs, - module_name=module) + module_name=module, hashes=hashes) if list_modules: cmd.list_modules() diff --git a/mvt/android/cmd_check_androidqf.py b/mvt/android/cmd_check_androidqf.py index 6e6bc0e..d0d81dd 100644 --- a/mvt/android/cmd_check_androidqf.py +++ b/mvt/android/cmd_check_androidqf.py @@ -23,10 +23,12 @@ class CmdAndroidCheckAndroidQF(Command): module_name: Optional[str] = None, serial: Optional[str] = None, fast_mode: Optional[bool] = False, + hashes: Optional[bool] = False, ) -> None: super().__init__(target_path=target_path, results_path=results_path, ioc_files=ioc_files, module_name=module_name, - serial=serial, fast_mode=fast_mode, log=log) + serial=serial, fast_mode=fast_mode, hashes=hashes, + log=log) self.name = "check-androidqf" self.modules = ANDROIDQF_MODULES diff --git a/mvt/android/cmd_check_backup.py b/mvt/android/cmd_check_backup.py index 86a90d1..cf27c46 100644 --- a/mvt/android/cmd_check_backup.py +++ b/mvt/android/cmd_check_backup.py @@ -33,10 +33,12 @@ class CmdAndroidCheckBackup(Command): module_name: Optional[str] = None, serial: Optional[str] = None, fast_mode: Optional[bool] = False, + hashes: Optional[bool] = False, ) -> None: super().__init__(target_path=target_path, results_path=results_path, ioc_files=ioc_files, module_name=module_name, - serial=serial, fast_mode=fast_mode, log=log) + serial=serial, fast_mode=fast_mode, hashes=hashes, + log=log) self.name = "check-backup" self.modules = BACKUP_MODULES diff --git a/mvt/android/cmd_check_bugreport.py b/mvt/android/cmd_check_bugreport.py index 38a9d99..a86b5ea 100644 --- a/mvt/android/cmd_check_bugreport.py +++ b/mvt/android/cmd_check_bugreport.py @@ -26,10 +26,12 @@ class CmdAndroidCheckBugreport(Command): module_name: Optional[str] = None, serial: Optional[str] = None, fast_mode: Optional[bool] = False, + hashes: Optional[bool] = False, ) -> None: super().__init__(target_path=target_path, results_path=results_path, ioc_files=ioc_files, module_name=module_name, - serial=serial, fast_mode=fast_mode, log=log) + serial=serial, fast_mode=fast_mode, hashes=hashes, + log=log) self.name = "check-bugreport" self.modules = BUGREPORT_MODULES diff --git a/mvt/common/command.py b/mvt/common/command.py index 966ff97..1ed0d52 100644 --- a/mvt/common/command.py +++ b/mvt/common/command.py @@ -13,7 +13,7 @@ from typing import Callable, Optional from mvt.common.indicators import Indicators from mvt.common.module import run_module, save_timeline -from mvt.common.utils import convert_datetime_to_iso +from mvt.common.utils import convert_datetime_to_iso, generate_hashes_from_path, get_sha256_from_file_path from mvt.common.version import MVT_VERSION @@ -27,6 +27,7 @@ class Command: module_name: Optional[str] = None, serial: Optional[str] = None, fast_mode: Optional[bool] = False, + hashes: Optional[bool] = False, log: logging.Logger = logging.getLogger(__name__), ) -> None: self.name = "" @@ -49,6 +50,8 @@ class Command: self.detected_count = 0 + self.hashes = hashes + self.hash_values = [] self.timeline = [] self.timeline_detected = [] @@ -107,45 +110,29 @@ class Command: if ioc_file_path and ioc_file_path not in info["ioc_files"]: info["ioc_files"].append(ioc_file_path) - # TODO: Revisit if setting this from environment variable is good - # enough. - if self.target_path and os.environ.get("MVT_HASH_FILES"): - if os.path.isfile(self.target_path): - sha256 = hashlib.sha256() - with open(self.target_path, "rb") as handle: - sha256.update(handle.read()) + if self.target_path and (os.environ.get("MVT_HASH_FILES") or self.hashes): + self.generate_hashes() - info["hashes"].append({ - "file_path": self.target_path, - "sha256": sha256.hexdigest(), - }) - elif os.path.isdir(self.target_path): - for (root, _, files) in os.walk(self.target_path): - for file in files: - file_path = os.path.join(root, file) - sha256 = hashlib.sha256() - - try: - with open(file_path, "rb") as handle: - sha256.update(handle.read()) - except FileNotFoundError: - self.log.error("Failed to hash the file %s: might be a symlink", - file_path) - continue - except PermissionError: - self.log.error("Failed to hash the file %s: permission denied", - file_path) - continue - - info["hashes"].append({ - "file_path": file_path, - "sha256": sha256.hexdigest(), - }) + info["hashes"] = self.hash_values info_path = os.path.join(self.results_path, "info.json") with open(info_path, "w+", encoding="utf-8") as handle: json.dump(info, handle, indent=4) + if self.target_path and (os.environ.get("MVT_HASH_FILES") or self.hashes): + info_hash = get_sha256_from_file_path(info_path) + self.log.warning("Reference hash of the info.json file : %s", info_hash) + + def generate_hashes(self) -> None: + """ + Compute hashes for files in the target_path + """ + if not self.target_path: + return + + for file in generate_hashes_from_path(self.target_path, self.log): + self.hash_values.append(file) + def list_modules(self) -> None: self.log.info("Following is the list of available %s modules:", self.name) @@ -203,10 +190,10 @@ class Command: self.timeline.extend(m.timeline) self.timeline_detected.extend(m.timeline_detected) - self._store_timeline() - self._store_info() - try: self.finish() except NotImplementedError: pass + + self._store_timeline() + self._store_info() diff --git a/mvt/common/help.py b/mvt/common/help.py index 1945fa3..f68832a 100644 --- a/mvt/common/help.py +++ b/mvt/common/help.py @@ -9,6 +9,7 @@ HELP_MSG_IOC = "Path to indicators file (can be invoked multiple time)" HELP_MSG_FAST = "Avoid running time/resource consuming features" HELP_MSG_LIST_MODULES = "Print list of available modules and exit" HELP_MSG_MODULE = "Name of a single module you would like to run instead of all" +HELP_MSG_HASHES = "Generate hashes of all the files analyzed" # Android-specific. HELP_MSG_SERIAL = "Specify a device serial number or HOST:PORT connection string" diff --git a/mvt/common/utils.py b/mvt/common/utils.py index 8fdeb76..3658589 100644 --- a/mvt/common/utils.py +++ b/mvt/common/utils.py @@ -3,13 +3,14 @@ # Use of this software is governed by the MVT License 1.1 that can be found at # https://license.mvt.re/1.1/ +import os import datetime import hashlib import re -from typing import Union +from typing import Union, Iterator -def convert_chrometime_to_datetime(timestamp: int) -> int: +def convert_chrometime_to_datetime(timestamp: int) -> datetime.datetime: """Converts Chrome timestamp to a datetime. :param timestamp: Chrome timestamp as int. @@ -122,21 +123,6 @@ def check_for_links(text: str) -> list: return re.findall(r"(?Phttps?://[^\s]+)", text, re.IGNORECASE) -def get_sha256_from_file_path(file_path: str) -> str: - """Calculate the SHA256 hash of a file from a file path. - - :param file_path: Path to the file to hash - :returns: The SHA256 hash string - - """ - sha256_hash = hashlib.sha256() - with open(file_path, "rb") as handle: - for byte_block in iter(lambda: handle.read(4096), b""): - sha256_hash.update(byte_block) - - return sha256_hash.hexdigest() - - # Note: taken from here: # https://stackoverflow.com/questions/57014259/json-dumps-on-dictionary-with-bytes-for-keys def keys_bytes_to_string(obj) -> str: @@ -165,3 +151,46 @@ def keys_bytes_to_string(obj) -> str: new_obj[key] = value return new_obj + + +def get_sha256_from_file_path(file_path: str) -> str: + """Calculate the SHA256 hash of a file from a file path. + + :param file_path: Path to the file to hash + :returns: The SHA256 hash string + + """ + sha256_hash = hashlib.sha256() + with open(file_path, "rb") as handle: + for byte_block in iter(lambda: handle.read(4096), b""): + sha256_hash.update(byte_block) + + return sha256_hash.hexdigest() + + +def generate_hashes_from_path(path: str, log) -> Iterator[dict]: + """ + Generates hashes of all files at the given path. + + :params path: Path of the given folder or file + :returns: generator of dict {"file_path", "hash"} + """ + if os.path.isfile(path): + hash_value = get_sha256_from_file_path(path) + yield {"file_path": path, "sha256": hash_value} + elif os.path.isdir(path): + for (root, _, files) in os.walk(path): + for file in files: + file_path = os.path.join(root, file) + try: + sha256 = get_sha256_from_file_path(file_path) + except FileNotFoundError: + log.error("Failed to hash the file %s: might be a symlink", + file_path) + continue + except PermissionError: + log.error("Failed to hash the file %s: permission denied", + file_path) + continue + + yield {"file_path": file_path, "sha256": sha256} diff --git a/mvt/ios/cli.py b/mvt/ios/cli.py index 1efb7a7..90e6386 100644 --- a/mvt/ios/cli.py +++ b/mvt/ios/cli.py @@ -5,6 +5,7 @@ import logging import os +import json import click from rich.logging import RichHandler @@ -13,10 +14,11 @@ from rich.prompt import Prompt from mvt.common.cmd_check_iocs import CmdCheckIOCS from mvt.common.help import (HELP_MSG_FAST, HELP_MSG_IOC, HELP_MSG_LIST_MODULES, HELP_MSG_MODULE, - HELP_MSG_OUTPUT) + HELP_MSG_OUTPUT, HELP_MSG_HASHES) from mvt.common.logo import logo from mvt.common.options import MutuallyExclusiveOption from mvt.common.updates import IndicatorsUpdates +from mvt.common.utils import generate_hashes_from_path from .cmd_check_backup import CmdIOSCheckBackup from .cmd_check_fs import CmdIOSCheckFS @@ -68,9 +70,10 @@ def version(): help="File containing raw encryption key to use to decrypt " "the backup", mutually_exclusive=["password"]) +@click.option("--hashes", "-H", is_flag=True, help=HELP_MSG_HASHES) @click.argument("BACKUP_PATH", type=click.Path(exists=True)) @click.pass_context -def decrypt_backup(ctx, destination, password, key_file, backup_path): +def decrypt_backup(ctx, destination, password, key_file, hashes, backup_path): backup = DecryptBackup(backup_path, destination) if key_file: @@ -101,6 +104,16 @@ def decrypt_backup(ctx, destination, password, key_file, backup_path): backup.process_backup() + if hashes: + info = {"encrypted": [], "decrypted": []} + for file in generate_hashes_from_path(backup_path, log): + info["encrypted"].append(file) + for file in generate_hashes_from_path(destination, log): + info["decrypted"].append(file) + info_path = os.path.join(destination, "info.json") + with open(info_path, "w+", encoding="utf-8") as handle: + json.dump(info, handle, indent=4) + #============================================================================== # Command: extract-key @@ -152,11 +165,13 @@ def extract_key(password, key_file, backup_path): @click.option("--fast", "-f", is_flag=True, help=HELP_MSG_FAST) @click.option("--list-modules", "-l", is_flag=True, help=HELP_MSG_LIST_MODULES) @click.option("--module", "-m", help=HELP_MSG_MODULE) +@click.option("--hashes", "-H", is_flag=True, help=HELP_MSG_HASHES) @click.argument("BACKUP_PATH", type=click.Path(exists=True)) @click.pass_context -def check_backup(ctx, iocs, output, fast, list_modules, module, backup_path): +def check_backup(ctx, iocs, output, fast, list_modules, module, hashes, backup_path): cmd = CmdIOSCheckBackup(target_path=backup_path, results_path=output, - ioc_files=iocs, module_name=module, fast_mode=fast) + ioc_files=iocs, module_name=module, fast_mode=fast, + hashes=hashes) if list_modules: cmd.list_modules() @@ -183,11 +198,13 @@ def check_backup(ctx, iocs, output, fast, list_modules, module, backup_path): @click.option("--fast", "-f", is_flag=True, help=HELP_MSG_FAST) @click.option("--list-modules", "-l", is_flag=True, help=HELP_MSG_LIST_MODULES) @click.option("--module", "-m", help=HELP_MSG_MODULE) +@click.option("--hashes", "-H", is_flag=True, help=HELP_MSG_HASHES) @click.argument("DUMP_PATH", type=click.Path(exists=True)) @click.pass_context -def check_fs(ctx, iocs, output, fast, list_modules, module, dump_path): +def check_fs(ctx, iocs, output, fast, list_modules, module, hashes, dump_path): cmd = CmdIOSCheckFS(target_path=dump_path, results_path=output, - ioc_files=iocs, module_name=module, fast_mode=fast) + ioc_files=iocs, module_name=module, fast_mode=fast, + hashes=hashes) if list_modules: cmd.list_modules() diff --git a/mvt/ios/cmd_check_backup.py b/mvt/ios/cmd_check_backup.py index 4627f3d..53ed6bd 100644 --- a/mvt/ios/cmd_check_backup.py +++ b/mvt/ios/cmd_check_backup.py @@ -24,10 +24,12 @@ class CmdIOSCheckBackup(Command): module_name: Optional[str] = None, serial: Optional[str] = None, fast_mode: Optional[bool] = False, + hashes: Optional[bool] = False, ) -> None: super().__init__(target_path=target_path, results_path=results_path, ioc_files=ioc_files, module_name=module_name, - serial=serial, fast_mode=fast_mode, log=log) + serial=serial, fast_mode=fast_mode, hashes=hashes, + log=log) self.name = "check-backup" self.modules = BACKUP_MODULES + MIXED_MODULES diff --git a/mvt/ios/cmd_check_fs.py b/mvt/ios/cmd_check_fs.py index 2204287..cb9175b 100644 --- a/mvt/ios/cmd_check_fs.py +++ b/mvt/ios/cmd_check_fs.py @@ -24,10 +24,12 @@ class CmdIOSCheckFS(Command): module_name: Optional[str] = None, serial: Optional[str] = None, fast_mode: Optional[bool] = False, + hashes: Optional[bool] = False, ) -> None: super().__init__(target_path=target_path, results_path=results_path, ioc_files=ioc_files, module_name=module_name, - serial=serial, fast_mode=fast_mode, log=log) + serial=serial, fast_mode=fast_mode, hashes=hashes, + log=log) self.name = "check-fs" self.modules = FS_MODULES + MIXED_MODULES diff --git a/mvt/ios/versions.py b/mvt/ios/versions.py index 164df77..05c8a6d 100644 --- a/mvt/ios/versions.py +++ b/mvt/ios/versions.py @@ -2,6 +2,7 @@ # Copyright (c) 2021-2023 Claudio Guarnieri. # Use of this software is governed by the MVT License 1.1 that can be found at # https://license.mvt.re/1.1/ +from typing import Dict from logging import Logger from typing import Optional @@ -290,7 +291,7 @@ def find_version_by_build(build: str) -> str: return "" -def latest_ios_version() -> str: +def latest_ios_version() -> Dict[str, str]: return IPHONE_IOS_VERSIONS[-1] diff --git a/tests/common/test_date_conversions.py b/tests/common/test_date_conversions.py deleted file mode 100644 index 2e67f73..0000000 --- a/tests/common/test_date_conversions.py +++ /dev/null @@ -1,31 +0,0 @@ -# Mobile Verification Toolkit (MVT) -# Copyright (c) 2021-2023 Claudio Guarnieri. -# Use of this software is governed by the MVT License 1.1 that can be found at -# https://license.mvt.re/1.1/ - -from mvt.common.utils import (convert_datetime_to_iso, convert_mactime_to_iso, - convert_unix_to_iso, - convert_unix_to_utc_datetime) - -TEST_DATE_EPOCH = 1626566400 -TEST_DATE_ISO = "2021-07-18 00:00:00.000000" -TEST_DATE_MAC = TEST_DATE_EPOCH - 978307200 - - -class TestDateConversions: - - def test_convert_unix_to_iso(self): - assert convert_unix_to_iso(TEST_DATE_EPOCH) == TEST_DATE_ISO - - def test_convert_mactime_to_iso(self): - assert convert_mactime_to_iso(TEST_DATE_MAC) == TEST_DATE_ISO - - def test_convert_unix_to_utc_datetime(self): - converted = convert_unix_to_utc_datetime(TEST_DATE_EPOCH) - assert converted.year == 2021 - assert converted.month == 7 - assert converted.day == 18 - - def test_convert_datetime_to_iso(self): - converted = convert_unix_to_utc_datetime(TEST_DATE_EPOCH) - assert convert_datetime_to_iso(converted) == TEST_DATE_ISO diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py new file mode 100644 index 0000000..2d4af40 --- /dev/null +++ b/tests/common/test_utils.py @@ -0,0 +1,55 @@ +# Mobile Verification Toolkit (MVT) +# Copyright (c) 2021-2022 Claudio Guarnieri. +# Use of this software is governed by the MVT License 1.1 that can be found at +# https://license.mvt.re/1.1/ + +import os +import logging +from ..utils import get_artifact_folder +from mvt.common.utils import (convert_datetime_to_iso, convert_mactime_to_iso, + convert_unix_to_iso, + convert_unix_to_utc_datetime, + generate_hashes_from_path, + get_sha256_from_file_path) + +TEST_DATE_EPOCH = 1626566400 +TEST_DATE_ISO = "2021-07-18 00:00:00.000000" +TEST_DATE_MAC = TEST_DATE_EPOCH - 978307200 + + +class TestDateConversions: + + def test_convert_unix_to_iso(self): + assert convert_unix_to_iso(TEST_DATE_EPOCH) == TEST_DATE_ISO + + def test_convert_mactime_to_iso(self): + assert convert_mactime_to_iso(TEST_DATE_MAC) == TEST_DATE_ISO + + def test_convert_unix_to_utc_datetime(self): + converted = convert_unix_to_utc_datetime(TEST_DATE_EPOCH) + assert converted.year == 2021 + assert converted.month == 7 + assert converted.day == 18 + + def test_convert_datetime_to_iso(self): + converted = convert_unix_to_utc_datetime(TEST_DATE_EPOCH) + assert convert_datetime_to_iso(converted) == TEST_DATE_ISO + + +class TestHashes: + + def test_hash_from_file(self): + path = os.path.join(get_artifact_folder(), "androidqf", "backup.ab") + sha256 = get_sha256_from_file_path(path) + assert sha256 == "f0e32fe8a7fd5ac0e2de19636d123c0072e979396986139ba2bc49ec385dc325" + + def test_hash_from_folder(self): + path = os.path.join(get_artifact_folder(), "androidqf") + hashes = list(generate_hashes_from_path(path, logging)) + assert len(hashes) == 5 + # Sort the files to have reliable order for tests. + hashes = sorted(hashes, key=lambda x: x["file_path"]) + assert hashes[0]["file_path"] == os.path.join(path, "backup.ab") + assert hashes[0]["sha256"] == "f0e32fe8a7fd5ac0e2de19636d123c0072e979396986139ba2bc49ec385dc325" + assert hashes[1]["file_path"] == os.path.join(path, "dumpsys.txt") + assert hashes[1]["sha256"] == "bac858001784657a43c7cfa771fd1fc4a49428eb6b7c458a1ebf2fdeef78dd86"