From e5aaf7bacd20b5b4f7ce151cdd16fb65e4a24982 Mon Sep 17 00:00:00 2001 From: Karmaz95 Date: Wed, 6 Nov 2024 23:38:23 +0100 Subject: [PATCH] Further optimization, aka Three Times A Charm. --- I. Mach-O/python/MachOFileFinder.py | 180 +++++++++++++++++++--------- 1 file changed, 125 insertions(+), 55 deletions(-) diff --git a/I. Mach-O/python/MachOFileFinder.py b/I. Mach-O/python/MachOFileFinder.py index 998f1f2..32ec10f 100755 --- a/I. Mach-O/python/MachOFileFinder.py +++ b/I. Mach-O/python/MachOFileFinder.py @@ -1,70 +1,141 @@ #!/usr/bin/env python3 import os -import magic import sys import argparse +import struct -# Mapping for known file types based on `python-magic` output strings +# Mach-O and FAT magic numbers +MACHO_MAGIC = 0xFEEDFACE +MACHO_MAGIC_64 = 0xFEEDFACF +MACHO_CIGAM = 0xCEFAEDFE +MACHO_CIGAM_64 = 0xCFFAEDFE +FAT_MAGIC = 0xCAFEBABE +FAT_CIGAM = 0xBEBAFECA + +# Supported Mach-O file types FILE_TYPE_MAP = { - "bundle": "BUNDLE", - "dSYM companion file": "DSYM", - "dynamic linker": "DYLINKER", - "kext bundle": "KEXT_BUNDLE", - "dynamically linked shared library": "DYLIB", - "dynamically linked shared library stub": "DYLIB_STUB", - "preload executable": "PRELOAD", - "fixed virtual memory shared library": "FVMLIB", - "core": "CORE", - "object": "OBJECT", - "executable": "EXECUTE" + 0x1: "OBJECT", + 0x2: "EXECUTE", + 0x3: "FVMLIB", + 0x4: "CORE", + 0x5: "PRELOAD", + 0x6: "DYLIB", + 0x7: "DYLINKER", + 0x8: "BUNDLE", + 0x9: "DYLIB_STUB", + 0xA: "DSYM", + 0xB: "KEXT_BUNDLE", } -class MachOFileFinder: - '''Class for finding Mach-O binaries in a given directory, with an option to filter for ARM64 architecture only.''' +# CPU type constant for ARM64 +CPU_TYPE_ARM64 = 0x0100000C - def __init__(self, directory_path, recursive=False, only_arm64=False): - '''Initialize the directory path, recursive flag, and architecture filter.''' - self.directory_path = directory_path - self.recursive = recursive - self.only_arm64 = only_arm64 +# Determine system endianness +system_endianness = sys.byteorder # "little" or "big" - def is_mach_o(self, file_path): - '''Check if a file is a Mach-O binary and optionally filter by ARM64 architecture.''' - try: - mime = magic.Magic() - file_type = mime.from_file(file_path) +def determine_file_endianness(magic): + """Determine the endianness of the file based on the magic number and system endianness.""" + if magic in (MACHO_CIGAM, MACHO_CIGAM_64, FAT_CIGAM): + return '<' # Little-endian file + else: + return '>' # Big-endian file + +def get_macho_info(file_path, only_arm64): + """Check if a file is a Mach-O binary or FAT binary and optionally filter for ARM64.""" + with open(file_path, 'rb') as f: + file_size = os.path.getsize(file_path) + # Read the first 4 bytes to check the magic number + magic_data = f.read(4) + if len(magic_data) < 4: + return None + + magic = struct.unpack(">I", magic_data)[0] + + # Determine file endianness + endian = determine_file_endianness(magic) + + # Check if the file is a single-architecture Mach-O binary + if magic in (MACHO_MAGIC, MACHO_MAGIC_64, MACHO_CIGAM, MACHO_CIGAM_64): + header_data = f.read(12) # Read CPU type, subtype, and file type fields + + if len(header_data) < 12: + return "UNKNOWN" + + cpu_type, cpu_subtype, file_type = struct.unpack(endian + "Iii", header_data) + + if only_arm64 and cpu_type != CPU_TYPE_ARM64: + return None + + return FILE_TYPE_MAP.get(file_type, "UNKNOWN") + + # Check if the file is a FAT binary + elif magic in (FAT_MAGIC, FAT_CIGAM): + num_archs = struct.unpack(endian + "I", f.read(4))[0] + + # Process each architecture entry in FAT binary + for _ in range(num_archs): + arch_info = f.read(20) # Read architecture info (CPU type, subtype, offset, size, align) + if len(arch_info) < 20: + continue + + cpu_type, _, offset, _, _ = struct.unpack(endian + "IIIII", arch_info) + + # Ensure offset is within file bounds + if offset >= file_size: + continue # Skip this architecture if offset is beyond file size + + # Move to offset to read Mach-O header for this architecture + current_pos = f.tell() + f.seek(offset) + + # Read Mach-O magic and check for valid Mach-O binary + macho_magic_data = f.read(4) + if len(macho_magic_data) < 4: + f.seek(current_pos) + continue + + macho_magic = struct.unpack(">I", macho_magic_data)[0] + + # Determine endianness for this architecture + arch_endian = determine_file_endianness(macho_magic) + + if macho_magic in (MACHO_MAGIC, MACHO_MAGIC_64, MACHO_CIGAM, MACHO_CIGAM_64): + arch_header_data = f.read(12) + + if len(arch_header_data) < 12: + f.seek(current_pos) + continue + + _, _, file_type = struct.unpack(arch_endian + "Iii", arch_header_data) + + if only_arm64 and cpu_type != CPU_TYPE_ARM64: + f.seek(current_pos) + continue + + file_type_name = FILE_TYPE_MAP.get(file_type, "UNKNOWN") + return file_type_name + + # Reset to the position in the FAT header + f.seek(current_pos) - # Check if it's a Mach-O file and filter by ARM64 if needed - if "Mach-O" in file_type: - if self.only_arm64 and "arm64" not in file_type: - return None - return file_type - except Exception: - pass # Ignore errors for non-Mach-O files or inaccessible files return None - def map_file_type(self, file_type): - '''Map the file type string from python-magic to the required output format.''' - for key, label in FILE_TYPE_MAP.items(): - if key in file_type: - return label - return "UNKNOWN" # Default to UNKNOWN if no known type is found +def process_directory(root, files, recursive, only_arm64): + """Process all files in the specified directory.""" + for file_name in files: + file_path = os.path.abspath(os.path.join(root, file_name)) + + # Check if the file is a Mach-O binary or FAT binary + file_type = get_macho_info(file_path, only_arm64) + if file_type: + print(f"{file_type}:{file_path}") - def process_directory(self, root, files): - '''Process all files in the specified directory.''' - for file_name in files: - file_path = os.path.abspath(os.path.join(root, file_name)) - file_type = self.is_mach_o(file_path) - if file_type: - mapped_type = self.map_file_type(file_type) - print(f"{mapped_type}:{file_path}") - - def process_files(self): - '''Process files based on the specified search type.''' - for root, dirs, files in os.walk(self.directory_path): - self.process_directory(root, files) - if not self.recursive: - break # Stop if not searching recursively +def process_files(directory_path, recursive, only_arm64): + """Walk through the directory and process files.""" + for root, dirs, files in os.walk(directory_path): + process_directory(root, files, recursive, only_arm64) + if not recursive: + break # Stop recursion if not recursive if __name__ == "__main__": parser = argparse.ArgumentParser(description='Find Mach-O binaries in a directory with an option to filter for ARM64.') @@ -79,5 +150,4 @@ if __name__ == "__main__": print(f"Error: {directory_path} is not a valid directory.") sys.exit(1) - macho_finder = MachOFileFinder(directory_path, recursive=args.recursive, only_arm64=args.only_arm64) - macho_finder.process_files() \ No newline at end of file + process_files(directory_path, recursive=args.recursive, only_arm64=args.only_arm64)