mirror of
https://github.com/Karmaz95/Snake_Apple.git
synced 2026-03-30 14:00:16 +02:00
Final version of MachOFileFinder aka Tornado.
This commit is contained in:
@@ -3,139 +3,176 @@ import os
|
||||
import sys
|
||||
import argparse
|
||||
import struct
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
# Mach-O and FAT magic numbers
|
||||
MACHO_MAGIC = 0xFEEDFACE
|
||||
MACHO_MAGIC_64 = 0xFEEDFACF
|
||||
MACHO_CIGAM = 0xCEFAEDFE
|
||||
MACHO_CIGAM_64 = 0xCFFAEDFE
|
||||
FAT_MAGIC = 0xCAFEBABE
|
||||
FAT_CIGAM = 0xBEBAFECA
|
||||
class MachOFileFinder:
|
||||
# Mach-O and FAT magic numbers
|
||||
MACHO_MAGIC = 0xFEEDFACE
|
||||
MACHO_MAGIC_64 = 0xFEEDFACF
|
||||
MACHO_CIGAM = 0xCEFAEDFE
|
||||
MACHO_CIGAM_64 = 0xCFFAEDFE
|
||||
FAT_MAGIC = 0xCAFEBABE
|
||||
FAT_CIGAM = 0xBEBAFECA
|
||||
|
||||
# Supported Mach-O file types
|
||||
FILE_TYPE_MAP = {
|
||||
0x1: "OBJECT",
|
||||
0x2: "EXECUTE",
|
||||
0x3: "FVMLIB",
|
||||
0x4: "CORE",
|
||||
0x5: "PRELOAD",
|
||||
0x6: "DYLIB",
|
||||
0x7: "DYLINKER",
|
||||
0x8: "BUNDLE",
|
||||
0x9: "DYLIB_STUB",
|
||||
0xA: "DSYM",
|
||||
0xB: "KEXT_BUNDLE",
|
||||
}
|
||||
# Supported Mach-O file types
|
||||
FILE_TYPE_MAP = {
|
||||
0x1: "OBJECT",
|
||||
0x2: "EXECUTE",
|
||||
0x3: "FVMLIB",
|
||||
0x4: "CORE",
|
||||
0x5: "PRELOAD",
|
||||
0x6: "DYLIB",
|
||||
0x7: "DYLINKER",
|
||||
0x8: "BUNDLE",
|
||||
0x9: "DYLIB_STUB",
|
||||
0xA: "DSYM",
|
||||
0xB: "KEXT_BUNDLE",
|
||||
}
|
||||
|
||||
# CPU type constant for ARM64
|
||||
CPU_TYPE_ARM64 = 0x0100000C
|
||||
# CPU type constant for ARM64
|
||||
CPU_TYPE_ARM64 = 0x0100000C
|
||||
|
||||
# Determine system endianness
|
||||
system_endianness = sys.byteorder # "little" or "big"
|
||||
def __init__(self, directory_path, recursive=False, only_arm64=False):
|
||||
self.directory_path = directory_path
|
||||
self.recursive = recursive
|
||||
self.only_arm64 = only_arm64
|
||||
|
||||
def determine_file_endianness(magic):
|
||||
"""Determine the endianness of the file based on the magic number and system endianness."""
|
||||
if magic in (MACHO_CIGAM, MACHO_CIGAM_64, FAT_CIGAM):
|
||||
return '<' # Little-endian file
|
||||
else:
|
||||
return '>' # Big-endian file
|
||||
def determineFileEndianness(self, magic):
|
||||
"""Determine the endianness of the file based on the magic number."""
|
||||
if magic in (self.MACHO_CIGAM, self.MACHO_CIGAM_64, self.FAT_CIGAM):
|
||||
return '<' # Little-endian file
|
||||
else:
|
||||
return '>' # Big-endian file
|
||||
|
||||
def getMachoInfo(self, file_path):
|
||||
"""Check if a file is a Mach-O binary or FAT binary and optionally filter for ARM64."""
|
||||
try:
|
||||
with open(file_path, 'rb') as f:
|
||||
file_size = os.path.getsize(file_path)
|
||||
# Read the first 4 bytes to check the magic number
|
||||
magic_data = f.read(4)
|
||||
if len(magic_data) < 4:
|
||||
return None
|
||||
|
||||
magic = struct.unpack(">I", magic_data)[0]
|
||||
|
||||
# Determine file endianness
|
||||
endian = self.determineFileEndianness(magic)
|
||||
|
||||
# Check if the file is a single-architecture Mach-O binary
|
||||
if magic in (self.MACHO_MAGIC, self.MACHO_MAGIC_64, self.MACHO_CIGAM, self.MACHO_CIGAM_64):
|
||||
header_data = f.read(12) # Read CPU type, subtype, and file type fields
|
||||
|
||||
if len(header_data) < 12:
|
||||
return "UNKNOWN"
|
||||
|
||||
cpu_type, cpu_subtype, file_type = struct.unpack(endian + "Iii", header_data)
|
||||
|
||||
if self.only_arm64 and cpu_type != self.CPU_TYPE_ARM64:
|
||||
return None
|
||||
|
||||
return self.FILE_TYPE_MAP.get(file_type, "UNKNOWN")
|
||||
|
||||
# Check if the file is a FAT binary
|
||||
elif magic in (self.FAT_MAGIC, self.FAT_CIGAM):
|
||||
num_archs = struct.unpack(endian + "I", f.read(4))[0]
|
||||
arm64_offset = None
|
||||
|
||||
# First pass: Find ARM64 architecture if present
|
||||
for _ in range(num_archs):
|
||||
arch_info = f.read(20) # Read architecture info (CPU type, subtype, offset, size, align)
|
||||
if len(arch_info) < 20:
|
||||
continue
|
||||
|
||||
cpu_type, _, offset, _, _ = struct.unpack(endian + "IIIII", arch_info)
|
||||
|
||||
# Validate offset before any further processing to avoid unnecessary reads
|
||||
if offset < 0 or offset >= file_size:
|
||||
continue # Skip this architecture if offset is invalid
|
||||
|
||||
if self.only_arm64 and cpu_type == self.CPU_TYPE_ARM64:
|
||||
arm64_offset = offset
|
||||
break # Stop once we find ARM64
|
||||
|
||||
# If only_arm64 is specified and no ARM64 architecture was found, skip this file
|
||||
if self.only_arm64 and arm64_offset is None:
|
||||
return None
|
||||
|
||||
# If ARM64 was found, process only that architecture
|
||||
if arm64_offset is not None:
|
||||
f.seek(arm64_offset)
|
||||
macho_magic_data = f.read(4)
|
||||
if len(macho_magic_data) < 4:
|
||||
return None
|
||||
|
||||
macho_magic = struct.unpack(">I", macho_magic_data)[0]
|
||||
arch_endian = self.determineFileEndianness(macho_magic)
|
||||
|
||||
if macho_magic in (self.MACHO_MAGIC, self.MACHO_MAGIC_64, self.MACHO_CIGAM, self.MACHO_CIGAM_64):
|
||||
arch_header_data = f.read(12)
|
||||
if len(arch_header_data) < 12:
|
||||
return None
|
||||
_, _, file_type = struct.unpack(arch_endian + "Iii", arch_header_data)
|
||||
return self.FILE_TYPE_MAP.get(file_type, "UNKNOWN")
|
||||
|
||||
# If not only_arm64, process all architectures in FAT binary
|
||||
if not self.only_arm64:
|
||||
f.seek(8) # Seek back to after the FAT magic and num_archs
|
||||
for _ in range(num_archs):
|
||||
arch_info = f.read(20) # Read architecture info (CPU type, subtype, offset, size, align)
|
||||
if len(arch_info) < 20:
|
||||
continue
|
||||
|
||||
cpu_type, _, offset, _, _ = struct.unpack(endian + "IIIII", arch_info)
|
||||
|
||||
# Validate offset before any further processing to avoid unnecessary reads
|
||||
if offset < 0 or offset >= file_size:
|
||||
continue # Skip this architecture if offset is invalid
|
||||
|
||||
# Move to offset to read Mach-O header for this architecture
|
||||
f.seek(offset)
|
||||
|
||||
# Read Mach-O magic and check for valid Mach-O binary
|
||||
macho_magic_data = f.read(4)
|
||||
if len(macho_magic_data) < 4:
|
||||
continue
|
||||
|
||||
macho_magic = struct.unpack(">I", macho_magic_data)[0]
|
||||
|
||||
# Determine endianness for this architecture
|
||||
arch_endian = self.determineFileEndianness(macho_magic)
|
||||
|
||||
if macho_magic in (self.MACHO_MAGIC, self.MACHO_MAGIC_64, self.MACHO_CIGAM, self.MACHO_CIGAM_64):
|
||||
arch_header_data = f.read(12)
|
||||
|
||||
if len(arch_header_data) < 12:
|
||||
continue
|
||||
|
||||
_, _, file_type = struct.unpack(arch_endian + "Iii", arch_header_data)
|
||||
file_type_name = self.FILE_TYPE_MAP.get(file_type, "UNKNOWN")
|
||||
return file_type_name
|
||||
|
||||
def get_macho_info(file_path, only_arm64):
|
||||
"""Check if a file is a Mach-O binary or FAT binary and optionally filter for ARM64."""
|
||||
with open(file_path, 'rb') as f:
|
||||
file_size = os.path.getsize(file_path)
|
||||
# Read the first 4 bytes to check the magic number
|
||||
magic_data = f.read(4)
|
||||
if len(magic_data) < 4:
|
||||
return None
|
||||
|
||||
magic = struct.unpack(">I", magic_data)[0]
|
||||
|
||||
# Determine file endianness
|
||||
endian = determine_file_endianness(magic)
|
||||
except (IOError, OSError) as e:
|
||||
return None
|
||||
|
||||
# Check if the file is a single-architecture Mach-O binary
|
||||
if magic in (MACHO_MAGIC, MACHO_MAGIC_64, MACHO_CIGAM, MACHO_CIGAM_64):
|
||||
header_data = f.read(12) # Read CPU type, subtype, and file type fields
|
||||
|
||||
if len(header_data) < 12:
|
||||
return "UNKNOWN"
|
||||
|
||||
cpu_type, cpu_subtype, file_type = struct.unpack(endian + "Iii", header_data)
|
||||
def processDirectory(self, root, files):
|
||||
"""Process all files in the specified directory."""
|
||||
for file_name in files:
|
||||
file_path = os.path.abspath(os.path.join(root, file_name))
|
||||
|
||||
if only_arm64 and cpu_type != CPU_TYPE_ARM64:
|
||||
return None
|
||||
|
||||
return FILE_TYPE_MAP.get(file_type, "UNKNOWN")
|
||||
# Check if the file is a Mach-O binary or FAT binary
|
||||
file_type = self.getMachoInfo(file_path)
|
||||
if file_type:
|
||||
print(f"{file_type}:{file_path}")
|
||||
|
||||
# Check if the file is a FAT binary
|
||||
elif magic in (FAT_MAGIC, FAT_CIGAM):
|
||||
num_archs = struct.unpack(endian + "I", f.read(4))[0]
|
||||
|
||||
# Process each architecture entry in FAT binary
|
||||
for _ in range(num_archs):
|
||||
arch_info = f.read(20) # Read architecture info (CPU type, subtype, offset, size, align)
|
||||
if len(arch_info) < 20:
|
||||
continue
|
||||
|
||||
cpu_type, _, offset, _, _ = struct.unpack(endian + "IIIII", arch_info)
|
||||
|
||||
# Ensure offset is within file bounds
|
||||
if offset >= file_size:
|
||||
continue # Skip this architecture if offset is beyond file size
|
||||
|
||||
# Move to offset to read Mach-O header for this architecture
|
||||
current_pos = f.tell()
|
||||
f.seek(offset)
|
||||
|
||||
# Read Mach-O magic and check for valid Mach-O binary
|
||||
macho_magic_data = f.read(4)
|
||||
if len(macho_magic_data) < 4:
|
||||
f.seek(current_pos)
|
||||
continue
|
||||
|
||||
macho_magic = struct.unpack(">I", macho_magic_data)[0]
|
||||
|
||||
# Determine endianness for this architecture
|
||||
arch_endian = determine_file_endianness(macho_magic)
|
||||
|
||||
if macho_magic in (MACHO_MAGIC, MACHO_MAGIC_64, MACHO_CIGAM, MACHO_CIGAM_64):
|
||||
arch_header_data = f.read(12)
|
||||
|
||||
if len(arch_header_data) < 12:
|
||||
f.seek(current_pos)
|
||||
continue
|
||||
|
||||
_, _, file_type = struct.unpack(arch_endian + "Iii", arch_header_data)
|
||||
|
||||
if only_arm64 and cpu_type != CPU_TYPE_ARM64:
|
||||
f.seek(current_pos)
|
||||
continue
|
||||
|
||||
file_type_name = FILE_TYPE_MAP.get(file_type, "UNKNOWN")
|
||||
return file_type_name
|
||||
|
||||
# Reset to the position in the FAT header
|
||||
f.seek(current_pos)
|
||||
|
||||
return None
|
||||
|
||||
def process_directory(root, files, recursive, only_arm64):
|
||||
"""Process all files in the specified directory."""
|
||||
for file_name in files:
|
||||
file_path = os.path.abspath(os.path.join(root, file_name))
|
||||
|
||||
# Check if the file is a Mach-O binary or FAT binary
|
||||
file_type = get_macho_info(file_path, only_arm64)
|
||||
if file_type:
|
||||
print(f"{file_type}:{file_path}")
|
||||
|
||||
def process_files(directory_path, recursive, only_arm64):
|
||||
"""Walk through the directory and process files."""
|
||||
for root, dirs, files in os.walk(directory_path):
|
||||
process_directory(root, files, recursive, only_arm64)
|
||||
if not recursive:
|
||||
break # Stop recursion if not recursive
|
||||
def processFiles(self):
|
||||
"""Walk through the directory and process files using threading for faster execution."""
|
||||
with ThreadPoolExecutor() as executor:
|
||||
for root, dirs, files in os.walk(self.directory_path):
|
||||
executor.submit(self.processDirectory, root, files)
|
||||
if not self.recursive:
|
||||
break # Stop recursion if not recursive
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Find Mach-O binaries in a directory with an option to filter for ARM64.')
|
||||
@@ -150,4 +187,5 @@ if __name__ == "__main__":
|
||||
print(f"Error: {directory_path} is not a valid directory.")
|
||||
sys.exit(1)
|
||||
|
||||
process_files(directory_path, recursive=args.recursive, only_arm64=args.only_arm64)
|
||||
finder = MachOFileFinder(directory_path, recursive=args.recursive, only_arm64=args.only_arm64)
|
||||
finder.processFiles()
|
||||
|
||||
Reference in New Issue
Block a user