From 524bfcf6496f0fa7a602c87074a3ae4005416fbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donncha=20=C3=93=20Cearbhaill?= Date: Mon, 30 Sep 2024 18:39:11 +0200 Subject: [PATCH] WIP: Better dumpstate parser --- mvt/android/artifacts/dumpstate_artifact.py | 150 ++++++++++++++++++ tests/android/test_artifact_dumpstate.py | 42 +++++ .../android_data/bugreport/dumpstate.txt | 38 ++++- 3 files changed, 227 insertions(+), 3 deletions(-) create mode 100644 mvt/android/artifacts/dumpstate_artifact.py create mode 100644 tests/android/test_artifact_dumpstate.py diff --git a/mvt/android/artifacts/dumpstate_artifact.py b/mvt/android/artifacts/dumpstate_artifact.py new file mode 100644 index 0000000..f6e702b --- /dev/null +++ b/mvt/android/artifacts/dumpstate_artifact.py @@ -0,0 +1,150 @@ +# Mobile Verification Toolkit (MVT) +# Copyright (c) 2021-2023 The MVT Authors. +# Use of this software is governed by the MVT License 1.1 that can be found at +# https://license.mvt.re/1.1/ +import re + +from .artifact import AndroidArtifact + + +# The AOSP dumpstate code is available at https://cs.android.com/android/platform/superproject/+/master:frameworks/native/cmds/dumpstate/ +# The dumpstate code is used to generate bugreports on Android devices. It looks like there are +# bugs in the code that leave some sections with out ending lines. We need to handle these cases. +# +# The approach here is to flag probably broken section, and to search for plausible new section headers +# to close the previous section. This is a heuristic approach, and may not work in all cases. We can't do +# this for all sections as we will detect subsections as new sections. +SECTION_BROKEN_TERMINATORS = [ + b"VM TRACES AT LAST ANR" +] + + +class DumpStateArtifact(AndroidArtifact): + def __init__(self, *args, **kwargs): + self.dumpstate_sections = [] + self.dumpstate_header = {} + self.unparsed_lines = [] + super().__init__(*args, **kwargs) + + def _parse_dumpstate_header(self, header_text): + """ + Parse dumpstate header metadata + """ + fields = {} + for line in header_text.splitlines(): + if line.startswith(b"="): + continue + + if b":" in line: + # Save line if it's a key-value pair. + key, value = line.split(b":", 1) + fields[key] = value[1:] + + if not line and fields: + # Finish if we get an empty line and already parsed lines + break + else: + # Skip until we find lines + continue + + self.dumpstate_header = fields + return fields + + def _get_section_header(self, header_match): + """ + Create internal dictionary to track dumpsys section. + """ + section_full = header_match.group(0).strip(b"-").strip() + section_name = header_match.group(1).rstrip() + + if header_match.group(2): + section_command = header_match.group(2).strip(b"()") + else: + # Some headers can missed the command + section_command = "" + # import pdb; pdb.set_trace() + + has_broken_terminator = section_name in SECTION_BROKEN_TERMINATORS + + section = { + "section_name": section_name, + "section_command": section_command, + "section_full": section_full, + "missing_terminator": has_broken_terminator, + "lines": [], + "error": False, + } + self.dumpstate_sections.append(section) + return section + + def parse_dumpstate(self, text: str) -> list: + """ + Extract all sections from a full dumpstate file. + + :param text: content of the full dumpstate file (string) + """ + # Parse the header + self._parse_dumpstate_header(text) + + header = b"------ " + + # Regexes to parse headers + section_name_re = re.compile(rb"------ ([\w\d\s\-\/\&]+)(\(.*\))? ------") + missing_file_error_re = re.compile(rb"\*\*\* (.*): No such file or directory") + generic_error_re = re.compile(rb"\*\*\* (.*)") + + section = None + + # Parse each line in dumpstate and look for headers + for line in text.splitlines(): + if not section: + possible_section_header = re.match(section_name_re, line) + if possible_section_header: + section = self._get_section_header(possible_section_header) + # print("found section", section) + continue + else: + # We continue to next line as we weren't already in a section + self.unparsed_lines.append(line) + continue + + if line.lstrip().startswith(header): + # This may be an internal section, or the terminator for our current section + # Ending looks like: ------ 0.557s was the duration of 'DUMPSYS CRITICAL' ------ + + # Check that we have the end for the right command. + section_command_in_quotes = b"'" + section["section_name"] + b"'" + if ( + section_command_in_quotes in line + or section["section_full"] + in line # Needed for 0.070s was the duration of 'KERNEL LOG (dmesg)' + ): + # Add end line and finish up the section + section["lines"].append(line) + section = None + continue + + # If we haven't closed previous, but this matches a section header, we can try close. + # Probably a bug where not closed properly. We explicitly flag known broken fields. + + # This fails on these blocks if we dont blacklist. Maybe we need to make a blacklist of badly closed items + # ------ DUMP BLOCK STAT ------ + # ------ BLOCK STAT (/sys/block/dm-20) ------ + + possible_section_header = re.match(section_name_re, line) + if possible_section_header and section["missing_terminator"]: + section = self._get_section_header(possible_section_header) + else: + # Probably terminator for subsection, ignore and treat as a regular line. + pass + + # Handle lines with special meaning + if re.match(missing_file_error_re, line) or re.match(generic_error_re, line): + # The line in a failed file read which is dumped without an header end section. + section["failed"] = True + section["lines"].append(line) + section = None + else: + section["lines"].append(line) + + return self.dumpstate_sections diff --git a/tests/android/test_artifact_dumpstate.py b/tests/android/test_artifact_dumpstate.py new file mode 100644 index 0000000..7d1bb7c --- /dev/null +++ b/tests/android/test_artifact_dumpstate.py @@ -0,0 +1,42 @@ +# Mobile Verification Toolkit (MVT) +# Copyright (c) 2021-2023 The MVT Authors. +# Use of this software is governed by the MVT License 1.1 that can be found at +# https://license.mvt.re/1.1/ +from mvt.android.artifacts.dumpstate_artifact import DumpStateArtifact + +from ..utils import get_artifact + + +class TestAndroidArtifactDumpState: + def _parse_dump_state(self): + """ + Load the test artifact + """ + file = get_artifact("android_data/bugreport/dumpstate.txt") + with open(file, "rb") as f: + data = f.read() + dumpstate = DumpStateArtifact() + dumpstate.parse_dumpstate(data) + return dumpstate + + def test_extract_dumpstate_sections(self): + """ + Test parsing of dumpstate sections + """ + dumpstate = self._parse_dump_state() + assert len(dumpstate.dumpstate_sections) == 4 + + assert len(dumpstate.dumpstate_header) == 4 + assert dumpstate.dumpstate_header.get(b"Bugreport format version") == b"2.0" + + for section in dumpstate.dumpstate_sections: + if section["section_name"] == b"SYSTEM LOG": + assert len(section["lines"]) == 5 + assert section["lines"][0].startswith(b"--------- beginning of system") + + elif section["section_name"] == b"MODEM CRASH HISTORY": + # Test parsing where section only has an error message + assert len(section["lines"]) == 1 + assert section["lines"][0] == b"*** /data/tombstones//modem/mcrash_history: No such file or directory" + + assert len(dumpstate.unparsed_lines) == 10 \ No newline at end of file diff --git a/tests/artifacts/android_data/bugreport/dumpstate.txt b/tests/artifacts/android_data/bugreport/dumpstate.txt index b30d30b..61e638b 100644 --- a/tests/artifacts/android_data/bugreport/dumpstate.txt +++ b/tests/artifacts/android_data/bugreport/dumpstate.txt @@ -1,3 +1,25 @@ +======================================================== +== dumpstate: 2024-04-21 10:00:11 +======================================================== + +Build: TP1A.220624.014 +Uptime: up 0 weeks, 0 days, 0 hours, 20 minutes, load average: 20.00, 19.92, 15.46 +Bugreport format version: 2.0 +Dumpstate info: id=1 pid=21015 dry_run=0 parallel_run=1 args=/system/bin/dumpstate -S bugreport_mode= + +------ DUMPSYS CRITICAL (/system/bin/dumpsys) ------ +------------------------------------------------------------------------------- +DUMP OF SERVICE CRITICAL SurfaceFlinger: +now = 1202781815070 +Build configuration: [sf PRESENT_TIME_OFFSET=0 FORCE_HWC_FOR_RBG_TO_YUV=1 MAX_VIRT_DISPLAY_DIM=0 RUNNING_WITHOUT_SYNC_FRAMEWORK=0 NUM_FRAMEBUFFER_SURFACE_BUFFERS=3] + +Display identification data: +Display 0 (HWC display 0): no identification data + +Wide-Color information: +Device has wide color built-in display: 0 +Device uses color management: 1 + Currently running services: AAS AODManagerService @@ -246,6 +268,16 @@ Packages: com.instagram.direct.share.handler.DirectMultipleExternalMediaShareActivity com.instagram.share.handleractivity.ClipsShareHandlerActivity com.instagram.direct.share.handler.DirectMultipleExternalMediaShareActivityInterop - - - +------ 0.557s was the duration of 'DUMPSYS CRITICAL' ------ +------ 0.023s was the duration of 'DUMPSYS CRITICAL PROTO' ------ +------ SERIALIZE PERFETTO TRACE (perfetto --save-for-bugreport) ------ +test +------ 0.036s was the duration of 'SERIALIZE PERFETTO TRACE' ------ +------ MODEM CRASH HISTORY (/data/tombstones//modem/mcrash_history) ------ +*** /data/tombstones//modem/mcrash_history: No such file or directory +------ SYSTEM LOG (logcat -v threadtime -v printable -v uid -d *:v) ------ +--------- beginning of system +05-28 09:44:19.845 root 578 578 I vold : Vold 3.0 (the awakening) firing up +05-28 09:44:19.845 root 578 578 D vold : Detected support for: exfat ext4 f2fs ntfs vfat +05-28 09:44:19.849 root 578 578 W vold : [libfs_mgr]Warning: unknown flag: resize +------ 0.417s was the duration of 'SYSTEM LOG' ------ \ No newline at end of file