Files
SnakeAppleSecurityFiles/I. Mach-O/python/MachOFileFinder.py
Karol Mazurek cac76ae2aa Add Lock to synchronize print statements and prevent stdout corruption when multiple threads write simultaneously. Example corruption before:
```
DYLIB:/Applications/NordVPN.app/Contents/Frameworks/norddropFFI.framework/Versions/A/norddropFFI

/* No comment provided by engineer. */
"Update Error!" = "Virhe p�ivEXECUTE:/Applications/KnockKnock.app/Contents/MacOS/KnockKnock
```
2025-12-06 21:38:19 +01:00

208 lines
9.1 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import sys
import argparse
import struct
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
import stat
class MachOFileFinder:
# Mach-O and FAT magic numbers
MACHO_MAGIC = 0xFEEDFACE
MACHO_MAGIC_64 = 0xFEEDFACF
MACHO_CIGAM = 0xCEFAEDFE
MACHO_CIGAM_64 = 0xCFFAEDFE
FAT_MAGIC = 0xCAFEBABE
FAT_CIGAM = 0xBEBAFECA
# Supported Mach-O file types
FILE_TYPE_MAP = {
0x1: "OBJECT",
0x2: "EXECUTE",
0x3: "FVMLIB",
0x4: "CORE",
0x5: "PRELOAD",
0x6: "DYLIB",
0x7: "DYLINKER",
0x8: "BUNDLE",
0x9: "DYLIB_STUB",
0xA: "DSYM",
0xB: "KEXT_BUNDLE",
}
# CPU type constant for ARM64
CPU_TYPE_ARM64 = 0x0100000C
def __init__(self, directory_path, recursive=False, only_arm64=False):
self.directory_path = directory_path
self.recursive = recursive
self.only_arm64 = only_arm64
self.print_lock = Lock()
def isRegularFile(self, file_path):
"""Check if the specified file is a regular file."""
try:
return stat.S_ISREG(os.stat(file_path).st_mode)
except (OSError, IOError) as e:
# print(f"Error checking file type for {file_path}: {e}")
return False
def determineFileEndianness(self, magic):
"""Determine the endianness of the file based on the magic number."""
if magic in (self.MACHO_CIGAM, self.MACHO_CIGAM_64, self.FAT_CIGAM):
return '<' # Little-endian file
else:
return '>' # Big-endian file
def getMachoInfo(self, file_path):
"""Check if a file is a Mach-O binary or FAT binary and optionally filter for ARM64."""
try:
with open(file_path, 'rb') as f:
file_size = os.path.getsize(file_path)
# Read the first 4 bytes to check the magic number
magic_data = f.read(4)
if len(magic_data) < 4:
return None
magic = struct.unpack(">I", magic_data)[0]
# Determine file endianness
endian = self.determineFileEndianness(magic)
# Check if the file is a single-architecture Mach-O binary
if magic in (self.MACHO_MAGIC, self.MACHO_MAGIC_64, self.MACHO_CIGAM, self.MACHO_CIGAM_64):
header_data = f.read(12) # Read CPU type, subtype, and file type fields
if len(header_data) < 12:
return "UNKNOWN"
cpu_type, cpu_subtype, file_type = struct.unpack(endian + "Iii", header_data)
if self.only_arm64 and cpu_type != self.CPU_TYPE_ARM64:
return None
return self.FILE_TYPE_MAP.get(file_type, "UNKNOWN")
# Check if the file is a FAT binary
elif magic in (self.FAT_MAGIC, self.FAT_CIGAM):
num_archs = struct.unpack(endian + "I", f.read(4))[0]
arm64_offset = None
# First pass: Find ARM64 architecture if present
for _ in range(num_archs):
arch_info = f.read(20) # Read architecture info (CPU type, subtype, offset, size, align)
if len(arch_info) < 20:
continue
cpu_type, _, offset, _, _ = struct.unpack(endian + "IIIII", arch_info)
# Validate offset before any further processing to avoid unnecessary reads
if offset < 0 or offset >= file_size:
continue # Skip this architecture if offset is invalid
if self.only_arm64 and cpu_type == self.CPU_TYPE_ARM64:
arm64_offset = offset
break # Stop once we find ARM64
# If only_arm64 is specified and no ARM64 architecture was found, skip this file
if self.only_arm64 and arm64_offset is None:
return None
# If ARM64 was found, process only that architecture
if arm64_offset is not None:
f.seek(arm64_offset)
macho_magic_data = f.read(4)
if len(macho_magic_data) < 4:
return None
macho_magic = struct.unpack(">I", macho_magic_data)[0]
arch_endian = self.determineFileEndianness(macho_magic)
if macho_magic in (self.MACHO_MAGIC, self.MACHO_MAGIC_64, self.MACHO_CIGAM, self.MACHO_CIGAM_64):
arch_header_data = f.read(12)
if len(arch_header_data) < 12:
return None
_, _, file_type = struct.unpack(arch_endian + "Iii", arch_header_data)
return self.FILE_TYPE_MAP.get(file_type, "UNKNOWN")
# If not only_arm64, process all architectures in FAT binary
if not self.only_arm64:
f.seek(8) # Seek back to after the FAT magic and num_archs
for _ in range(num_archs):
arch_info = f.read(20) # Read architecture info (CPU type, subtype, offset, size, align)
if len(arch_info) < 20:
continue
cpu_type, _, offset, _, _ = struct.unpack(endian + "IIIII", arch_info)
# Validate offset before any further processing to avoid unnecessary reads
if offset < 0 or offset >= file_size:
continue # Skip this architecture if offset is invalid
# Move to offset to read Mach-O header for this architecture
f.seek(offset)
# Read Mach-O magic and check for valid Mach-O binary
macho_magic_data = f.read(4)
if len(macho_magic_data) < 4:
continue
macho_magic = struct.unpack(">I", macho_magic_data)[0]
# Determine endianness for this architecture
arch_endian = self.determineFileEndianness(macho_magic)
if macho_magic in (self.MACHO_MAGIC, self.MACHO_MAGIC_64, self.MACHO_CIGAM, self.MACHO_CIGAM_64):
arch_header_data = f.read(12)
if len(arch_header_data) < 12:
continue
_, _, file_type = struct.unpack(arch_endian + "Iii", arch_header_data)
file_type_name = self.FILE_TYPE_MAP.get(file_type, "UNKNOWN")
return file_type_name
return None
except (IOError, OSError) as e:
return None
def processDirectory(self, root, files):
"""Process all files in the specified directory."""
for file_name in files:
file_path = os.path.abspath(os.path.join(root, file_name))
# Check if the file is a regular file before processing
if not self.isRegularFile(file_path):
#print(f"Skipping non-regular file: {file_path}")
continue
# Check if the file is a Mach-O binary or FAT binary
file_type = self.getMachoInfo(file_path)
if file_type:
with self.print_lock:
print(f"{file_type}:{file_path}")
def processFiles(self):
"""Walk through the directory and process files using threading for faster execution."""
with ThreadPoolExecutor() as executor:
for root, dirs, files in os.walk(self.directory_path):
executor.submit(self.processDirectory, root, files)
if not self.recursive:
break # Stop recursion if not recursive
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Find Mach-O binaries in a directory with an option to filter for ARM64.')
parser.add_argument('path', metavar='PATH', type=str, help='the directory path to search for Mach-O binaries')
parser.add_argument('-r', '--recursive', action='store_true', help='search recursively (default: false)')
parser.add_argument('--only_arm64', action='store_true', help='only match ARM64 architecture binaries')
args = parser.parse_args()
directory_path = args.path
if not os.path.isdir(directory_path):
print(f"Error: {directory_path} is not a valid directory.")
sys.exit(1)
finder = MachOFileFinder(directory_path, recursive=args.recursive, only_arm64=args.only_arm64)
finder.processFiles()