diff --git a/I. Mach-O/custom/MyBundle.c b/I. Mach-O/custom/MyBundle.c new file mode 100644 index 0000000..bd8d819 --- /dev/null +++ b/I. Mach-O/custom/MyBundle.c @@ -0,0 +1,7 @@ +// MyBundle.c +#include + +void sayHello() { + printf("Hello from MyBundle!\n"); +} + diff --git a/I. Mach-O/custom/hello.c b/I. Mach-O/custom/hello.c new file mode 100644 index 0000000..f26b97c --- /dev/null +++ b/I. Mach-O/custom/hello.c @@ -0,0 +1,6 @@ +#include + +int main() { + printf("Hello, World!\n"); + return 0; +} diff --git a/I. Mach-O/custom/mylib.c b/I. Mach-O/custom/mylib.c new file mode 100644 index 0000000..1292f06 --- /dev/null +++ b/I. Mach-O/custom/mylib.c @@ -0,0 +1,5 @@ +#include + +void my_function() { + printf("Hello from my_function!\n"); +} diff --git a/I. Mach-O/mac/byte_order.h b/I. Mach-O/mac/byte_order.h new file mode 100644 index 0000000..c698e71 --- /dev/null +++ b/I. Mach-O/mac/byte_order.h @@ -0,0 +1,374 @@ +// Extracted from Xcode 15 Beta 7 +// /Library/Developer/CommandLineTools/SDKs/MacOSX14.0.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/architecture/byte_order.h */ + +/* + * Copyright (c) 1999-2002 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights + * Reserved. This file contains Original Code and/or Modifications of + * Original Code as defined in and that are subject to the Apple Public + * Source License Version 1.0 (the 'License'). You may not use this file + * except in compliance with the License. Please obtain a copy of the + * License at http://www.apple.com/publicsource and read it before using + * this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License." + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Copyright (c) 1992 NeXT Computer, Inc. + * + * Byte ordering conversion. + * + */ + +#ifndef _ARCHITECTURE_BYTE_ORDER_H_ +#define _ARCHITECTURE_BYTE_ORDER_H_ + +#include + +typedef unsigned long NXSwappedFloat; +typedef unsigned long long NXSwappedDouble; + +static __inline__ +unsigned short +NXSwapShort( + unsigned short inv +) +{ + return (unsigned short)OSSwapInt16((uint16_t)inv); +} + +static __inline__ +unsigned int +NXSwapInt( + unsigned int inv +) +{ + return (unsigned int)OSSwapInt32((uint32_t)inv); +} + +static __inline__ +unsigned long +NXSwapLong( + unsigned long inv +) +{ + return (unsigned long)OSSwapInt32((uint32_t)inv); +} + +static __inline__ +unsigned long long +NXSwapLongLong( + unsigned long long inv +) +{ + return (unsigned long long)OSSwapInt64((uint64_t)inv); +} + +static __inline__ NXSwappedFloat +NXConvertHostFloatToSwapped(float x) +{ + union fconv { + float number; + NXSwappedFloat sf; + } u; + u.number = x; + return u.sf; +} + +static __inline__ float +NXConvertSwappedFloatToHost(NXSwappedFloat x) +{ + union fconv { + float number; + NXSwappedFloat sf; + } u; + u.sf = x; + return u.number; +} + +static __inline__ NXSwappedDouble +NXConvertHostDoubleToSwapped(double x) +{ + union dconv { + double number; + NXSwappedDouble sd; + } u; + u.number = x; + return u.sd; +} + +static __inline__ double +NXConvertSwappedDoubleToHost(NXSwappedDouble x) +{ + union dconv { + double number; + NXSwappedDouble sd; + } u; + u.sd = x; + return u.number; +} + +static __inline__ NXSwappedFloat +NXSwapFloat(NXSwappedFloat x) +{ + return (NXSwappedFloat)OSSwapInt32((uint32_t)x); +} + +static __inline__ NXSwappedDouble +NXSwapDouble(NXSwappedDouble x) +{ + return (NXSwappedDouble)OSSwapInt64((uint64_t)x); +} + +/* + * Identify the byte order + * of the current host. + */ + +enum NXByteOrder { + NX_UnknownByteOrder, + NX_LittleEndian, + NX_BigEndian +}; + +static __inline__ +enum NXByteOrder +NXHostByteOrder(void) +{ +#if defined(__LITTLE_ENDIAN__) + return NX_LittleEndian; +#elif defined(__BIG_ENDIAN__) + return NX_BigEndian; +#else + return NX_UnknownByteOrder; +#endif +} + +static __inline__ +unsigned short +NXSwapBigShortToHost( + unsigned short x +) +{ + return (unsigned short)OSSwapBigToHostInt16((uint16_t)x); +} + +static __inline__ +unsigned int +NXSwapBigIntToHost( + unsigned int x +) +{ + return (unsigned int)OSSwapBigToHostInt32((uint32_t)x); +} + +static __inline__ +unsigned long +NXSwapBigLongToHost( + unsigned long x +) +{ + return (unsigned long)OSSwapBigToHostInt32((uint32_t)x); +} + +static __inline__ +unsigned long long +NXSwapBigLongLongToHost( + unsigned long long x +) +{ + return (unsigned long long)OSSwapBigToHostInt64((uint64_t)x); +} + +static __inline__ +double +NXSwapBigDoubleToHost( + NXSwappedDouble x +) +{ + return NXConvertSwappedDoubleToHost((NXSwappedDouble)OSSwapBigToHostInt64((uint64_t)x)); +} + +static __inline__ +float +NXSwapBigFloatToHost( + NXSwappedFloat x +) +{ + return NXConvertSwappedFloatToHost((NXSwappedFloat)OSSwapBigToHostInt32((uint32_t)x)); +} + +static __inline__ +unsigned short +NXSwapHostShortToBig( + unsigned short x +) +{ + return (unsigned short)OSSwapHostToBigInt16((uint16_t)x); +} + +static __inline__ +unsigned int +NXSwapHostIntToBig( + unsigned int x +) +{ + return (unsigned int)OSSwapHostToBigInt32((uint32_t)x); +} + +static __inline__ +unsigned long +NXSwapHostLongToBig( + unsigned long x +) +{ + return (unsigned long)OSSwapHostToBigInt32((uint32_t)x); +} + +static __inline__ +unsigned long long +NXSwapHostLongLongToBig( + unsigned long long x +) +{ + return (unsigned long long)OSSwapHostToBigInt64((uint64_t)x); +} + +static __inline__ +NXSwappedDouble +NXSwapHostDoubleToBig( + double x +) +{ + return (NXSwappedDouble)OSSwapHostToBigInt64((uint64_t)NXConvertHostDoubleToSwapped(x)); +} + +static __inline__ +NXSwappedFloat +NXSwapHostFloatToBig( + float x +) +{ + return (NXSwappedFloat)OSSwapHostToBigInt32((uint32_t)NXConvertHostFloatToSwapped(x)); +} + +static __inline__ +unsigned short +NXSwapLittleShortToHost( + unsigned short x +) +{ + return (unsigned short)OSSwapLittleToHostInt16((uint16_t)x); +} + +static __inline__ +unsigned int +NXSwapLittleIntToHost( + unsigned int x +) +{ + return (unsigned int)OSSwapLittleToHostInt32((uint32_t)x); +} + +static __inline__ +unsigned long +NXSwapLittleLongToHost( + unsigned long x +) +{ + return (unsigned long)OSSwapLittleToHostInt32((uint32_t)x); +} + +static __inline__ +unsigned long long +NXSwapLittleLongLongToHost( + unsigned long long x +) +{ + return (unsigned long long)OSSwapLittleToHostInt64((uint64_t)x); +} + +static __inline__ +double +NXSwapLittleDoubleToHost( + NXSwappedDouble x +) +{ + return NXConvertSwappedDoubleToHost((NXSwappedDouble)OSSwapLittleToHostInt64((uint64_t)x)); +} + +static __inline__ +float +NXSwapLittleFloatToHost( + NXSwappedFloat x +) +{ + return NXConvertSwappedFloatToHost((NXSwappedFloat)OSSwapLittleToHostInt32((uint32_t)x)); +} + +static __inline__ +unsigned short +NXSwapHostShortToLittle( + unsigned short x +) +{ + return (unsigned short)OSSwapHostToLittleInt16((uint16_t)x); +} + +static __inline__ +unsigned int +NXSwapHostIntToLittle( + unsigned int x +) +{ + return (unsigned int)OSSwapHostToLittleInt32((uint32_t)x); +} + +static __inline__ +unsigned long +NXSwapHostLongToLittle( + unsigned long x +) +{ + return (unsigned long)OSSwapHostToLittleInt32((uint32_t)x); +} + +static __inline__ +unsigned long long +NXSwapHostLongLongToLittle( + unsigned long long x +) +{ + return (unsigned long long)OSSwapHostToLittleInt64((uint64_t)x); +} + +static __inline__ +NXSwappedDouble +NXSwapHostDoubleToLittle( + double x +) +{ + return (NXSwappedDouble)OSSwapHostToLittleInt64((uint64_t)NXConvertHostDoubleToSwapped(x)); +} + +static __inline__ +NXSwappedFloat +NXSwapHostFloatToLittle( + float x +) +{ + return (NXSwappedFloat)OSSwapHostToLittleInt32((uint32_t)NXConvertHostFloatToSwapped(x)); +} + +#endif /* _ARCHITECTURE_BYTE_ORDER_H_ */ \ No newline at end of file diff --git a/I. Mach-O/mac/fat.h b/I. Mach-O/mac/fat.h new file mode 100644 index 0000000..03e7359 --- /dev/null +++ b/I. Mach-O/mac/fat.h @@ -0,0 +1,67 @@ +// Extracted from Xcode 15 Beta 7 +// /Library/Developer/CommandLineTools/SDKs/MacOSX14.0.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach-o/fat.h */ + +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _MACH_O_FAT_H_ +#define _MACH_O_FAT_H_ +/* + * This header file describes the structures of the file format for "fat" + * architecture specific file (wrapper design). At the begining of the file + * there is one fat_header structure followed by a number of fat_arch + * structures. For each architecture in the file, specified by a pair of + * cputype and cpusubtype, the fat_header describes the file offset, file + * size and alignment in the file of the architecture specific member. + * The padded bytes in the file to place each member on it's specific alignment + * are defined to be read as zeros and can be left as "holes" if the file system + * can support them as long as they read as zeros. + * + * All structures defined here are always written and read to/from disk + * in big-endian order. + */ + +/* + * is needed here for the cpu_type_t and cpu_subtype_t types + * and contains the constants for the possible values of these types. + */ +#include +#include +#include + +#define FAT_MAGIC 0xcafebabe +#define FAT_CIGAM 0xbebafeca /* NXSwapLong(FAT_MAGIC) */ + +struct fat_header { + uint32_t magic; /* FAT_MAGIC */ + uint32_t nfat_arch; /* number of structs that follow */ +}; + +struct fat_arch { + cpu_type_t cputype; /* cpu specifier (int) */ + cpu_subtype_t cpusubtype; /* machine specifier (int) */ + uint32_t offset; /* file offset to this object file */ + uint32_t size; /* size of this object file */ + uint32_t align; /* alignment as a power of 2 */ +}; + +#endif /* _MACH_O_FAT_H_ */ \ No newline at end of file diff --git a/I. Mach-O/mac/fixup-chains.h b/I. Mach-O/mac/fixup-chains.h new file mode 100644 index 0000000..f56dcfd --- /dev/null +++ b/I. Mach-O/mac/fixup-chains.h @@ -0,0 +1,259 @@ +// Extracted from Xcode 15 Beta 7 +// /Library/Developer/CommandLineTools/SDKs/MacOSX14.0.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach-o/fixup-chains.h */ +/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- + * + * Copyright (c) 2018 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef __MACH_O_FIXUP_CHAINS__ +#define __MACH_O_FIXUP_CHAINS__ 5 + + +#include + + +//#define LC_DYLD_EXPORTS_TRIE 0x80000033 // used with linkedit_data_command +//#define LC_DYLD_CHAINED_FIXUPS 0x80000034 // used with linkedit_data_command, payload is dyld_chained_fixups_header + + +// header of the LC_DYLD_CHAINED_FIXUPS payload +struct dyld_chained_fixups_header +{ + uint32_t fixups_version; // 0 + uint32_t starts_offset; // offset of dyld_chained_starts_in_image in chain_data + uint32_t imports_offset; // offset of imports table in chain_data + uint32_t symbols_offset; // offset of symbol strings in chain_data + uint32_t imports_count; // number of imported symbol names + uint32_t imports_format; // DYLD_CHAINED_IMPORT* + uint32_t symbols_format; // 0 => uncompressed, 1 => zlib compressed +}; + +// This struct is embedded in LC_DYLD_CHAINED_FIXUPS payload +struct dyld_chained_starts_in_image +{ + uint32_t seg_count; + uint32_t seg_info_offset[1]; // each entry is offset into this struct for that segment + // followed by pool of dyld_chain_starts_in_segment data +}; + +// This struct is embedded in dyld_chain_starts_in_image +// and passed down to the kernel for page-in linking +struct dyld_chained_starts_in_segment +{ + uint32_t size; // size of this (amount kernel needs to copy) + uint16_t page_size; // 0x1000 or 0x4000 + uint16_t pointer_format; // DYLD_CHAINED_PTR_* + uint64_t segment_offset; // offset in memory to start of segment + uint32_t max_valid_pointer; // for 32-bit OS, any value beyond this is not a pointer + uint16_t page_count; // how many pages are in array + uint16_t page_start[1]; // each entry is offset in each page of first element in chain + // or DYLD_CHAINED_PTR_START_NONE if no fixups on page + // uint16_t chain_starts[1]; // some 32-bit formats may require multiple starts per page. + // for those, if high bit is set in page_starts[], then it + // is index into chain_starts[] which is a list of starts + // the last of which has the high bit set +}; + +enum { + DYLD_CHAINED_PTR_START_NONE = 0xFFFF, // used in page_start[] to denote a page with no fixups + DYLD_CHAINED_PTR_START_MULTI = 0x8000, // used in page_start[] to denote a page which has multiple starts + DYLD_CHAINED_PTR_START_LAST = 0x8000, // used in chain_starts[] to denote last start in list for page +}; + +// This struct is embedded in __TEXT,__chain_starts section in firmware +struct dyld_chained_starts_offsets +{ + uint32_t pointer_format; // DYLD_CHAINED_PTR_32_FIRMWARE + uint32_t starts_count; // number of starts in array + uint32_t chain_starts[1]; // array chain start offsets +}; + + +// values for dyld_chained_starts_in_segment.pointer_format +enum { + DYLD_CHAINED_PTR_ARM64E = 1, // stride 8, unauth target is vmaddr + DYLD_CHAINED_PTR_64 = 2, // target is vmaddr + DYLD_CHAINED_PTR_32 = 3, + DYLD_CHAINED_PTR_32_CACHE = 4, + DYLD_CHAINED_PTR_32_FIRMWARE = 5, + DYLD_CHAINED_PTR_64_OFFSET = 6, // target is vm offset + DYLD_CHAINED_PTR_ARM64E_OFFSET = 7, // old name + DYLD_CHAINED_PTR_ARM64E_KERNEL = 7, // stride 4, unauth target is vm offset + DYLD_CHAINED_PTR_64_KERNEL_CACHE = 8, + DYLD_CHAINED_PTR_ARM64E_USERLAND = 9, // stride 8, unauth target is vm offset + DYLD_CHAINED_PTR_ARM64E_FIRMWARE = 10, // stride 4, unauth target is vmaddr + DYLD_CHAINED_PTR_X86_64_KERNEL_CACHE = 11, // stride 1, x86_64 kernel caches + DYLD_CHAINED_PTR_ARM64E_USERLAND24 = 12, // stride 8, unauth target is vm offset, 24-bit bind +}; + + +// DYLD_CHAINED_PTR_ARM64E +struct dyld_chained_ptr_arm64e_rebase +{ + uint64_t target : 43, + high8 : 8, + next : 11, // 4 or 8-byte stide + bind : 1, // == 0 + auth : 1; // == 0 +}; + +// DYLD_CHAINED_PTR_ARM64E +struct dyld_chained_ptr_arm64e_bind +{ + uint64_t ordinal : 16, + zero : 16, + addend : 19, // +/-256K + next : 11, // 4 or 8-byte stide + bind : 1, // == 1 + auth : 1; // == 0 +}; + +// DYLD_CHAINED_PTR_ARM64E +struct dyld_chained_ptr_arm64e_auth_rebase +{ + uint64_t target : 32, // runtimeOffset + diversity : 16, + addrDiv : 1, + key : 2, + next : 11, // 4 or 8-byte stide + bind : 1, // == 0 + auth : 1; // == 1 +}; + +// DYLD_CHAINED_PTR_ARM64E +struct dyld_chained_ptr_arm64e_auth_bind +{ + uint64_t ordinal : 16, + zero : 16, + diversity : 16, + addrDiv : 1, + key : 2, + next : 11, // 4 or 8-byte stide + bind : 1, // == 1 + auth : 1; // == 1 +}; + +// DYLD_CHAINED_PTR_64/DYLD_CHAINED_PTR_64_OFFSET +struct dyld_chained_ptr_64_rebase +{ + uint64_t target : 36, // 64GB max image size (DYLD_CHAINED_PTR_64 => vmAddr, DYLD_CHAINED_PTR_64_OFFSET => runtimeOffset) + high8 : 8, // top 8 bits set to this (DYLD_CHAINED_PTR_64 => after slide added, DYLD_CHAINED_PTR_64_OFFSET => before slide added) + reserved : 7, // all zeros + next : 12, // 4-byte stride + bind : 1; // == 0 +}; + +// DYLD_CHAINED_PTR_64 +struct dyld_chained_ptr_64_bind +{ + uint64_t ordinal : 24, + addend : 8, // 0 thru 255 + reserved : 19, // all zeros + next : 12, // 4-byte stride + bind : 1; // == 1 +}; + +// DYLD_CHAINED_PTR_64_KERNEL_CACHE, DYLD_CHAINED_PTR_X86_64_KERNEL_CACHE +struct dyld_chained_ptr_64_kernel_cache_rebase +{ + uint64_t target : 30, // basePointers[cacheLevel] + target + cacheLevel : 2, // what level of cache to bind to (indexes a mach_header array) + diversity : 16, + addrDiv : 1, + key : 2, + next : 12, // 1 or 4-byte stide + isAuth : 1; // 0 -> not authenticated. 1 -> authenticated +}; + +// DYLD_CHAINED_PTR_32 +// Note: for DYLD_CHAINED_PTR_32 some non-pointer values are co-opted into the chain +// as out of range rebases. If an entry in the chain is > max_valid_pointer, then it +// is not a pointer. To restore the value, subtract off the bias, which is +// (64MB+max_valid_pointer)/2. +struct dyld_chained_ptr_32_rebase +{ + uint32_t target : 26, // vmaddr, 64MB max image size + next : 5, // 4-byte stride + bind : 1; // == 0 +}; + +// DYLD_CHAINED_PTR_32 +struct dyld_chained_ptr_32_bind +{ + uint32_t ordinal : 20, + addend : 6, // 0 thru 63 + next : 5, // 4-byte stride + bind : 1; // == 1 +}; + +// DYLD_CHAINED_PTR_32_CACHE +struct dyld_chained_ptr_32_cache_rebase +{ + uint32_t target : 30, // 1GB max dyld cache TEXT and DATA + next : 2; // 4-byte stride +}; + + +// DYLD_CHAINED_PTR_32_FIRMWARE +struct dyld_chained_ptr_32_firmware_rebase +{ + uint32_t target : 26, // 64MB max firmware TEXT and DATA + next : 6; // 4-byte stride +}; + + + +// values for dyld_chained_fixups_header.imports_format +enum { + DYLD_CHAINED_IMPORT = 1, + DYLD_CHAINED_IMPORT_ADDEND = 2, + DYLD_CHAINED_IMPORT_ADDEND64 = 3, +}; + +// DYLD_CHAINED_IMPORT +struct dyld_chained_import +{ + uint32_t lib_ordinal : 8, + weak_import : 1, + name_offset : 23; +}; + +// DYLD_CHAINED_IMPORT_ADDEND +struct dyld_chained_import_addend +{ + uint32_t lib_ordinal : 8, + weak_import : 1, + name_offset : 23; + int32_t addend; +}; + +// DYLD_CHAINED_IMPORT_ADDEND64 +struct dyld_chained_import_addend64 +{ + uint64_t lib_ordinal : 16, + weak_import : 1, + reserved : 15, + name_offset : 32; + uint64_t addend; +}; + +#endif // __MACH_O_FIXUP_CHAINS__ diff --git a/I. Mach-O/mac/loader.h b/I. Mach-O/mac/loader.h new file mode 100644 index 0000000..ac7ff18 --- /dev/null +++ b/I. Mach-O/mac/loader.h @@ -0,0 +1,1593 @@ +// Extracted from Xcode 15 Beta 7 +/* /Library/Developer/CommandLineTools/SDKs/MacOSX14.0.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach-o/loader.h */ + +/* + * Copyright (c) 1999-2019 Apple Inc. All Rights Reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _MACHO_LOADER_H_ +#define _MACHO_LOADER_H_ + +/* + * This file describes the format of mach object files. + */ +#include + +/* + * is needed here for the cpu_type_t and cpu_subtype_t types + * and contains the constants for the possible values of these types. + */ +#include + +/* + * is needed here for the vm_prot_t type and contains the + * constants that are or'ed together for the possible values of this type. + */ +#include + +/* + * is expected to define the flavors of the thread + * states and the structures of those flavors for each machine. + */ +#include +#include + +/* + * The 32-bit mach header appears at the very beginning of the object file for + * 32-bit architectures. + */ +struct mach_header { + uint32_t magic; /* mach magic number identifier */ + cpu_type_t cputype; /* cpu specifier */ + cpu_subtype_t cpusubtype; /* machine specifier */ + uint32_t filetype; /* type of file */ + uint32_t ncmds; /* number of load commands */ + uint32_t sizeofcmds; /* the size of all the load commands */ + uint32_t flags; /* flags */ +}; + +/* Constant for the magic field of the mach_header (32-bit architectures) */ +#define MH_MAGIC 0xfeedface /* the mach magic number */ +#define MH_CIGAM 0xcefaedfe /* NXSwapInt(MH_MAGIC) */ + +/* + * The 64-bit mach header appears at the very beginning of object files for + * 64-bit architectures. + */ +struct mach_header_64 { + uint32_t magic; /* mach magic number identifier */ + cpu_type_t cputype; /* cpu specifier */ + cpu_subtype_t cpusubtype; /* machine specifier */ + uint32_t filetype; /* type of file */ + uint32_t ncmds; /* number of load commands */ + uint32_t sizeofcmds; /* the size of all the load commands */ + uint32_t flags; /* flags */ + uint32_t reserved; /* reserved */ +}; + +/* Constant for the magic field of the mach_header_64 (64-bit architectures) */ +#define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */ +#define MH_CIGAM_64 0xcffaedfe /* NXSwapInt(MH_MAGIC_64) */ + +/* + * The layout of the file depends on the filetype. For all but the MH_OBJECT + * file type the segments are padded out and aligned on a segment alignment + * boundary for efficient demand pageing. The MH_EXECUTE, MH_FVMLIB, MH_DYLIB, + * MH_DYLINKER and MH_BUNDLE file types also have the headers included as part + * of their first segment. + * + * The file type MH_OBJECT is a compact format intended as output of the + * assembler and input (and possibly output) of the link editor (the .o + * format). All sections are in one unnamed segment with no segment padding. + * This format is used as an executable format when the file is so small the + * segment padding greatly increases its size. + * + * The file type MH_PRELOAD is an executable format intended for things that + * are not executed under the kernel (proms, stand alones, kernels, etc). The + * format can be executed under the kernel but may demand paged it and not + * preload it before execution. + * + * A core file is in MH_CORE format and can be any in an arbritray legal + * Mach-O file. + * + * Constants for the filetype field of the mach_header + */ +#define MH_OBJECT 0x1 /* relocatable object file */ +#define MH_EXECUTE 0x2 /* demand paged executable file */ +#define MH_FVMLIB 0x3 /* fixed VM shared library file */ +#define MH_CORE 0x4 /* core file */ +#define MH_PRELOAD 0x5 /* preloaded executable file */ +#define MH_DYLIB 0x6 /* dynamically bound shared library */ +#define MH_DYLINKER 0x7 /* dynamic link editor */ +#define MH_BUNDLE 0x8 /* dynamically bound bundle file */ +#define MH_DYLIB_STUB 0x9 /* shared library stub for static */ + /* linking only, no section contents */ +#define MH_DSYM 0xa /* companion file with only debug */ + /* sections */ +#define MH_KEXT_BUNDLE 0xb /* x86_64 kexts */ +#define MH_FILESET 0xc /* set of mach-o's */ + +/* Constants for the flags field of the mach_header */ +#define MH_NOUNDEFS 0x1 /* the object file has no undefined + references */ +#define MH_INCRLINK 0x2 /* the object file is the output of an + incremental link against a base file + and can't be link edited again */ +#define MH_DYLDLINK 0x4 /* the object file is input for the + dynamic linker and can't be staticly + link edited again */ +#define MH_BINDATLOAD 0x8 /* the object file's undefined + references are bound by the dynamic + linker when loaded. */ +#define MH_PREBOUND 0x10 /* the file has its dynamic undefined + references prebound. */ +#define MH_SPLIT_SEGS 0x20 /* the file has its read-only and + read-write segments split */ +#define MH_LAZY_INIT 0x40 /* the shared library init routine is + to be run lazily via catching memory + faults to its writeable segments + (obsolete) */ +#define MH_TWOLEVEL 0x80 /* the image is using two-level name + space bindings */ +#define MH_FORCE_FLAT 0x100 /* the executable is forcing all images + to use flat name space bindings */ +#define MH_NOMULTIDEFS 0x200 /* this umbrella guarantees no multiple + defintions of symbols in its + sub-images so the two-level namespace + hints can always be used. */ +#define MH_NOFIXPREBINDING 0x400 /* do not have dyld notify the + prebinding agent about this + executable */ +#define MH_PREBINDABLE 0x800 /* the binary is not prebound but can + have its prebinding redone. only used + when MH_PREBOUND is not set. */ +#define MH_ALLMODSBOUND 0x1000 /* indicates that this binary binds to + all two-level namespace modules of + its dependent libraries. only used + when MH_PREBINDABLE and MH_TWOLEVEL + are both set. */ +#define MH_SUBSECTIONS_VIA_SYMBOLS 0x2000/* safe to divide up the sections into + sub-sections via symbols for dead + code stripping */ +#define MH_CANONICAL 0x4000 /* the binary has been canonicalized + via the unprebind operation */ +#define MH_WEAK_DEFINES 0x8000 /* the final linked image contains + external weak symbols */ +#define MH_BINDS_TO_WEAK 0x10000 /* the final linked image uses + weak symbols */ + +#define MH_ALLOW_STACK_EXECUTION 0x20000/* When this bit is set, all stacks + in the task will be given stack + execution privilege. Only used in + MH_EXECUTE filetypes. */ +#define MH_ROOT_SAFE 0x40000 /* When this bit is set, the binary + declares it is safe for use in + processes with uid zero */ + +#define MH_SETUID_SAFE 0x80000 /* When this bit is set, the binary + declares it is safe for use in + processes when issetugid() is true */ + +#define MH_NO_REEXPORTED_DYLIBS 0x100000 /* When this bit is set on a dylib, + the static linker does not need to + examine dependent dylibs to see + if any are re-exported */ +#define MH_PIE 0x200000 /* When this bit is set, the OS will + load the main executable at a + random address. Only used in + MH_EXECUTE filetypes. */ +#define MH_DEAD_STRIPPABLE_DYLIB 0x400000 /* Only for use on dylibs. When + linking against a dylib that + has this bit set, the static linker + will automatically not create a + LC_LOAD_DYLIB load command to the + dylib if no symbols are being + referenced from the dylib. */ +#define MH_HAS_TLV_DESCRIPTORS 0x800000 /* Contains a section of type + S_THREAD_LOCAL_VARIABLES */ + +#define MH_NO_HEAP_EXECUTION 0x1000000 /* When this bit is set, the OS will + run the main executable with + a non-executable heap even on + platforms (e.g. i386) that don't + require it. Only used in MH_EXECUTE + filetypes. */ + +#define MH_APP_EXTENSION_SAFE 0x02000000 /* The code was linked for use in an + application extension. */ + +#define MH_NLIST_OUTOFSYNC_WITH_DYLDINFO 0x04000000 /* The external symbols + listed in the nlist symbol table do + not include all the symbols listed in + the dyld info. */ + +#define MH_SIM_SUPPORT 0x08000000 /* Allow LC_MIN_VERSION_MACOS and + LC_BUILD_VERSION load commands with + the platforms macOS, iOSMac, + iOSSimulator, tvOSSimulator and + watchOSSimulator. */ + +#define MH_DYLIB_IN_CACHE 0x80000000 /* Only for use on dylibs. When this bit + is set, the dylib is part of the dyld + shared cache, rather than loose in + the filesystem. */ + +/* + * The load commands directly follow the mach_header. The total size of all + * of the commands is given by the sizeofcmds field in the mach_header. All + * load commands must have as their first two fields cmd and cmdsize. The cmd + * field is filled in with a constant for that command type. Each command type + * has a structure specifically for it. The cmdsize field is the size in bytes + * of the particular load command structure plus anything that follows it that + * is a part of the load command (i.e. section structures, strings, etc.). To + * advance to the next load command the cmdsize can be added to the offset or + * pointer of the current load command. The cmdsize for 32-bit architectures + * MUST be a multiple of 4 bytes and for 64-bit architectures MUST be a multiple + * of 8 bytes (these are forever the maximum alignment of any load commands). + * The padded bytes must be zero. All tables in the object file must also + * follow these rules so the file can be memory mapped. Otherwise the pointers + * to these tables will not work well or at all on some machines. With all + * padding zeroed like objects will compare byte for byte. + */ +struct load_command { + uint32_t cmd; /* type of load command */ + uint32_t cmdsize; /* total size of command in bytes */ +}; + +/* + * After MacOS X 10.1 when a new load command is added that is required to be + * understood by the dynamic linker for the image to execute properly the + * LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic + * linker sees such a load command it it does not understand will issue a + * "unknown load command required for execution" error and refuse to use the + * image. Other load commands without this bit that are not understood will + * simply be ignored. + */ +#define LC_REQ_DYLD 0x80000000 + +/* Constants for the cmd field of all load commands, the type */ +#define LC_SEGMENT 0x1 /* segment of this file to be mapped */ +#define LC_SYMTAB 0x2 /* link-edit stab symbol table info */ +#define LC_SYMSEG 0x3 /* link-edit gdb symbol table info (obsolete) */ +#define LC_THREAD 0x4 /* thread */ +#define LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */ +#define LC_LOADFVMLIB 0x6 /* load a specified fixed VM shared library */ +#define LC_IDFVMLIB 0x7 /* fixed VM shared library identification */ +#define LC_IDENT 0x8 /* object identification info (obsolete) */ +#define LC_FVMFILE 0x9 /* fixed VM file inclusion (internal use) */ +#define LC_PREPAGE 0xa /* prepage command (internal use) */ +#define LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */ +#define LC_LOAD_DYLIB 0xc /* load a dynamically linked shared library */ +#define LC_ID_DYLIB 0xd /* dynamically linked shared lib ident */ +#define LC_LOAD_DYLINKER 0xe /* load a dynamic linker */ +#define LC_ID_DYLINKER 0xf /* dynamic linker identification */ +#define LC_PREBOUND_DYLIB 0x10 /* modules prebound for a dynamically */ + /* linked shared library */ +#define LC_ROUTINES 0x11 /* image routines */ +#define LC_SUB_FRAMEWORK 0x12 /* sub framework */ +#define LC_SUB_UMBRELLA 0x13 /* sub umbrella */ +#define LC_SUB_CLIENT 0x14 /* sub client */ +#define LC_SUB_LIBRARY 0x15 /* sub library */ +#define LC_TWOLEVEL_HINTS 0x16 /* two-level namespace lookup hints */ +#define LC_PREBIND_CKSUM 0x17 /* prebind checksum */ + +/* + * load a dynamically linked shared library that is allowed to be missing + * (all symbols are weak imported). + */ +#define LC_LOAD_WEAK_DYLIB (0x18 | LC_REQ_DYLD) + +#define LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be + mapped */ +#define LC_ROUTINES_64 0x1a /* 64-bit image routines */ +#define LC_UUID 0x1b /* the uuid */ +#define LC_RPATH (0x1c | LC_REQ_DYLD) /* runpath additions */ +#define LC_CODE_SIGNATURE 0x1d /* local of code signature */ +#define LC_SEGMENT_SPLIT_INFO 0x1e /* local of info to split segments */ +#define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD) /* load and re-export dylib */ +#define LC_LAZY_LOAD_DYLIB 0x20 /* delay load of dylib until first use */ +#define LC_ENCRYPTION_INFO 0x21 /* encrypted segment information */ +#define LC_DYLD_INFO 0x22 /* compressed dyld information */ +#define LC_DYLD_INFO_ONLY (0x22|LC_REQ_DYLD) /* compressed dyld information only */ +#define LC_LOAD_UPWARD_DYLIB (0x23 | LC_REQ_DYLD) /* load upward dylib */ +#define LC_VERSION_MIN_MACOSX 0x24 /* build for MacOSX min OS version */ +#define LC_VERSION_MIN_IPHONEOS 0x25 /* build for iPhoneOS min OS version */ +#define LC_FUNCTION_STARTS 0x26 /* compressed table of function start addresses */ +#define LC_DYLD_ENVIRONMENT 0x27 /* string for dyld to treat + like environment variable */ +#define LC_MAIN (0x28|LC_REQ_DYLD) /* replacement for LC_UNIXTHREAD */ +#define LC_DATA_IN_CODE 0x29 /* table of non-instructions in __text */ +#define LC_SOURCE_VERSION 0x2A /* source version used to build binary */ +#define LC_DYLIB_CODE_SIGN_DRS 0x2B /* Code signing DRs copied from linked dylibs */ +#define LC_ENCRYPTION_INFO_64 0x2C /* 64-bit encrypted segment information */ +#define LC_LINKER_OPTION 0x2D /* linker options in MH_OBJECT files */ +#define LC_LINKER_OPTIMIZATION_HINT 0x2E /* optimization hints in MH_OBJECT files */ +#define LC_VERSION_MIN_TVOS 0x2F /* build for AppleTV min OS version */ +#define LC_VERSION_MIN_WATCHOS 0x30 /* build for Watch min OS version */ +#define LC_NOTE 0x31 /* arbitrary data included within a Mach-O file */ +#define LC_BUILD_VERSION 0x32 /* build for platform min OS version */ +#define LC_DYLD_EXPORTS_TRIE (0x33 | LC_REQ_DYLD) /* used with linkedit_data_command, payload is trie */ +#define LC_DYLD_CHAINED_FIXUPS (0x34 | LC_REQ_DYLD) /* used with linkedit_data_command */ +#define LC_FILESET_ENTRY (0x35 | LC_REQ_DYLD) /* used with fileset_entry_command */ + +/* + * A variable length string in a load command is represented by an lc_str + * union. The strings are stored just after the load command structure and + * the offset is from the start of the load command structure. The size + * of the string is reflected in the cmdsize field of the load command. + * Once again any padded bytes to bring the cmdsize field to a multiple + * of 4 bytes must be zero. + */ +union lc_str { + uint32_t offset; /* offset to the string */ +#ifndef __LP64__ + char *ptr; /* pointer to the string */ +#endif +}; + +/* + * The segment load command indicates that a part of this file is to be + * mapped into the task's address space. The size of this segment in memory, + * vmsize, maybe equal to or larger than the amount to map from this file, + * filesize. The file is mapped starting at fileoff to the beginning of + * the segment in memory, vmaddr. The rest of the memory of the segment, + * if any, is allocated zero fill on demand. The segment's maximum virtual + * memory protection and initial virtual memory protection are specified + * by the maxprot and initprot fields. If the segment has sections then the + * section structures directly follow the segment command and their size is + * reflected in cmdsize. + */ +struct segment_command { /* for 32-bit architectures */ + uint32_t cmd; /* LC_SEGMENT */ + uint32_t cmdsize; /* includes sizeof section structs */ + char segname[16]; /* segment name */ + uint32_t vmaddr; /* memory address of this segment */ + uint32_t vmsize; /* memory size of this segment */ + uint32_t fileoff; /* file offset of this segment */ + uint32_t filesize; /* amount to map from the file */ + vm_prot_t maxprot; /* maximum VM protection */ + vm_prot_t initprot; /* initial VM protection */ + uint32_t nsects; /* number of sections in segment */ + uint32_t flags; /* flags */ +}; + +/* + * The 64-bit segment load command indicates that a part of this file is to be + * mapped into a 64-bit task's address space. If the 64-bit segment has + * sections then section_64 structures directly follow the 64-bit segment + * command and their size is reflected in cmdsize. + */ +struct segment_command_64 { /* for 64-bit architectures */ + uint32_t cmd; /* LC_SEGMENT_64 */ + uint32_t cmdsize; /* includes sizeof section_64 structs */ + char segname[16]; /* segment name */ + uint64_t vmaddr; /* memory address of this segment */ + uint64_t vmsize; /* memory size of this segment */ + uint64_t fileoff; /* file offset of this segment */ + uint64_t filesize; /* amount to map from the file */ + vm_prot_t maxprot; /* maximum VM protection */ + vm_prot_t initprot; /* initial VM protection */ + uint32_t nsects; /* number of sections in segment */ + uint32_t flags; /* flags */ +}; + +/* Constants for the flags field of the segment_command */ +#define SG_HIGHVM 0x1 /* the file contents for this segment is for + the high part of the VM space, the low part + is zero filled (for stacks in core files) */ +#define SG_FVMLIB 0x2 /* this segment is the VM that is allocated by + a fixed VM library, for overlap checking in + the link editor */ +#define SG_NORELOC 0x4 /* this segment has nothing that was relocated + in it and nothing relocated to it, that is + it maybe safely replaced without relocation*/ +#define SG_PROTECTED_VERSION_1 0x8 /* This segment is protected. If the + segment starts at file offset 0, the + first page of the segment is not + protected. All other pages of the + segment are protected. */ +#define SG_READ_ONLY 0x10 /* This segment is made read-only after fixups */ + + + +/* + * A segment is made up of zero or more sections. Non-MH_OBJECT files have + * all of their segments with the proper sections in each, and padded to the + * specified segment alignment when produced by the link editor. The first + * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header + * and load commands of the object file before its first section. The zero + * fill sections are always last in their segment (in all formats). This + * allows the zeroed segment padding to be mapped into memory where zero fill + * sections might be. The gigabyte zero fill sections, those with the section + * type S_GB_ZEROFILL, can only be in a segment with sections of this type. + * These segments are then placed after all other segments. + * + * The MH_OBJECT format has all of its sections in one segment for + * compactness. There is no padding to a specified segment boundary and the + * mach_header and load commands are not part of the segment. + * + * Sections with the same section name, sectname, going into the same segment, + * segname, are combined by the link editor. The resulting section is aligned + * to the maximum alignment of the combined sections and is the new section's + * alignment. The combined sections are aligned to their original alignment in + * the combined section. Any padded bytes to get the specified alignment are + * zeroed. + * + * The format of the relocation entries referenced by the reloff and nreloc + * fields of the section structure for mach object files is described in the + * header file . + */ +struct section { /* for 32-bit architectures */ + char sectname[16]; /* name of this section */ + char segname[16]; /* segment this section goes in */ + uint32_t addr; /* memory address of this section */ + uint32_t size; /* size in bytes of this section */ + uint32_t offset; /* file offset of this section */ + uint32_t align; /* section alignment (power of 2) */ + uint32_t reloff; /* file offset of relocation entries */ + uint32_t nreloc; /* number of relocation entries */ + uint32_t flags; /* flags (section type and attributes)*/ + uint32_t reserved1; /* reserved (for offset or index) */ + uint32_t reserved2; /* reserved (for count or sizeof) */ +}; + +struct section_64 { /* for 64-bit architectures */ + char sectname[16]; /* name of this section */ + char segname[16]; /* segment this section goes in */ + uint64_t addr; /* memory address of this section */ + uint64_t size; /* size in bytes of this section */ + uint32_t offset; /* file offset of this section */ + uint32_t align; /* section alignment (power of 2) */ + uint32_t reloff; /* file offset of relocation entries */ + uint32_t nreloc; /* number of relocation entries */ + uint32_t flags; /* flags (section type and attributes)*/ + uint32_t reserved1; /* reserved (for offset or index) */ + uint32_t reserved2; /* reserved (for count or sizeof) */ + uint32_t reserved3; /* reserved */ +}; + +/* + * The flags field of a section structure is separated into two parts a section + * type and section attributes. The section types are mutually exclusive (it + * can only have one type) but the section attributes are not (it may have more + * than one attribute). + */ +#define SECTION_TYPE 0x000000ff /* 256 section types */ +#define SECTION_ATTRIBUTES 0xffffff00 /* 24 section attributes */ + +/* Constants for the type of a section */ +#define S_REGULAR 0x0 /* regular section */ +#define S_ZEROFILL 0x1 /* zero fill on demand section */ +#define S_CSTRING_LITERALS 0x2 /* section with only literal C strings*/ +#define S_4BYTE_LITERALS 0x3 /* section with only 4 byte literals */ +#define S_8BYTE_LITERALS 0x4 /* section with only 8 byte literals */ +#define S_LITERAL_POINTERS 0x5 /* section with only pointers to */ + /* literals */ +/* + * For the two types of symbol pointers sections and the symbol stubs section + * they have indirect symbol table entries. For each of the entries in the + * section the indirect symbol table entries, in corresponding order in the + * indirect symbol table, start at the index stored in the reserved1 field + * of the section structure. Since the indirect symbol table entries + * correspond to the entries in the section the number of indirect symbol table + * entries is inferred from the size of the section divided by the size of the + * entries in the section. For symbol pointers sections the size of the entries + * in the section is 4 bytes and for symbol stubs sections the byte size of the + * stubs is stored in the reserved2 field of the section structure. + */ +#define S_NON_LAZY_SYMBOL_POINTERS 0x6 /* section with only non-lazy + symbol pointers */ +#define S_LAZY_SYMBOL_POINTERS 0x7 /* section with only lazy symbol + pointers */ +#define S_SYMBOL_STUBS 0x8 /* section with only symbol + stubs, byte size of stub in + the reserved2 field */ +#define S_MOD_INIT_FUNC_POINTERS 0x9 /* section with only function + pointers for initialization*/ +#define S_MOD_TERM_FUNC_POINTERS 0xa /* section with only function + pointers for termination */ +#define S_COALESCED 0xb /* section contains symbols that + are to be coalesced */ +#define S_GB_ZEROFILL 0xc /* zero fill on demand section + (that can be larger than 4 + gigabytes) */ +#define S_INTERPOSING 0xd /* section with only pairs of + function pointers for + interposing */ +#define S_16BYTE_LITERALS 0xe /* section with only 16 byte + literals */ +#define S_DTRACE_DOF 0xf /* section contains + DTrace Object Format */ +#define S_LAZY_DYLIB_SYMBOL_POINTERS 0x10 /* section with only lazy + symbol pointers to lazy + loaded dylibs */ +/* + * Section types to support thread local variables + */ +#define S_THREAD_LOCAL_REGULAR 0x11 /* template of initial + values for TLVs */ +#define S_THREAD_LOCAL_ZEROFILL 0x12 /* template of initial + values for TLVs */ +#define S_THREAD_LOCAL_VARIABLES 0x13 /* TLV descriptors */ +#define S_THREAD_LOCAL_VARIABLE_POINTERS 0x14 /* pointers to TLV + descriptors */ +#define S_THREAD_LOCAL_INIT_FUNCTION_POINTERS 0x15 /* functions to call + to initialize TLV + values */ +#define S_INIT_FUNC_OFFSETS 0x16 /* 32-bit offsets to + initializers */ + +/* + * Constants for the section attributes part of the flags field of a section + * structure. + */ +#define SECTION_ATTRIBUTES_USR 0xff000000 /* User setable attributes */ +#define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section contains only true + machine instructions */ +#define S_ATTR_NO_TOC 0x40000000 /* section contains coalesced + symbols that are not to be + in a ranlib table of + contents */ +#define S_ATTR_STRIP_STATIC_SYMS 0x20000000 /* ok to strip static symbols + in this section in files + with the MH_DYLDLINK flag */ +#define S_ATTR_NO_DEAD_STRIP 0x10000000 /* no dead stripping */ +#define S_ATTR_LIVE_SUPPORT 0x08000000 /* blocks are live if they + reference live blocks */ +#define S_ATTR_SELF_MODIFYING_CODE 0x04000000 /* Used with i386 code stubs + written on by dyld */ +/* + * If a segment contains any sections marked with S_ATTR_DEBUG then all + * sections in that segment must have this attribute. No section other than + * a section marked with this attribute may reference the contents of this + * section. A section with this attribute may contain no symbols and must have + * a section type S_REGULAR. The static linker will not copy section contents + * from sections with this attribute into its output file. These sections + * generally contain DWARF debugging info. + */ +#define S_ATTR_DEBUG 0x02000000 /* a debug section */ +#define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */ +#define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some + machine instructions */ +#define S_ATTR_EXT_RELOC 0x00000200 /* section has external + relocation entries */ +#define S_ATTR_LOC_RELOC 0x00000100 /* section has local + relocation entries */ + + +/* + * The names of segments and sections in them are mostly meaningless to the + * link-editor. But there are few things to support traditional UNIX + * executables that require the link-editor and assembler to use some names + * agreed upon by convention. + * + * The initial protection of the "__TEXT" segment has write protection turned + * off (not writeable). + * + * The link-editor will allocate common symbols at the end of the "__common" + * section in the "__DATA" segment. It will create the section and segment + * if needed. + */ + +/* The currently known segment names and the section names in those segments */ + +#define SEG_PAGEZERO "__PAGEZERO" /* the pagezero segment which has no */ + /* protections and catches NULL */ + /* references for MH_EXECUTE files */ + + +#define SEG_TEXT "__TEXT" /* the tradition UNIX text segment */ +#define SECT_TEXT "__text" /* the real text part of the text */ + /* section no headers, and no padding */ +#define SECT_FVMLIB_INIT0 "__fvmlib_init0" /* the fvmlib initialization */ + /* section */ +#define SECT_FVMLIB_INIT1 "__fvmlib_init1" /* the section following the */ + /* fvmlib initialization */ + /* section */ + +#define SEG_DATA "__DATA" /* the tradition UNIX data segment */ +#define SECT_DATA "__data" /* the real initialized data section */ + /* no padding, no bss overlap */ +#define SECT_BSS "__bss" /* the real uninitialized data section*/ + /* no padding */ +#define SECT_COMMON "__common" /* the section common symbols are */ + /* allocated in by the link editor */ + +#define SEG_OBJC "__OBJC" /* objective-C runtime segment */ +#define SECT_OBJC_SYMBOLS "__symbol_table" /* symbol table */ +#define SECT_OBJC_MODULES "__module_info" /* module information */ +#define SECT_OBJC_STRINGS "__selector_strs" /* string table */ +#define SECT_OBJC_REFS "__selector_refs" /* string table */ + +#define SEG_ICON "__ICON" /* the icon segment */ +#define SECT_ICON_HEADER "__header" /* the icon headers */ +#define SECT_ICON_TIFF "__tiff" /* the icons in tiff format */ + +#define SEG_LINKEDIT "__LINKEDIT" /* the segment containing all structs */ + /* created and maintained by the link */ + /* editor. Created with -seglinkedit */ + /* option to ld(1) for MH_EXECUTE and */ + /* FVMLIB file types only */ + +#define SEG_LINKINFO "__LINKINFO" /* the segment overlapping with linkedit */ + /* containing linking information */ + +#define SEG_UNIXSTACK "__UNIXSTACK" /* the unix stack segment */ + +#define SEG_IMPORT "__IMPORT" /* the segment for the self (dyld) */ + /* modifing code stubs that has read, */ + /* write and execute permissions */ + +/* + * Fixed virtual memory shared libraries are identified by two things. The + * target pathname (the name of the library as found for execution), and the + * minor version number. The address of where the headers are loaded is in + * header_addr. (THIS IS OBSOLETE and no longer supported). + */ +struct fvmlib { + union lc_str name; /* library's target pathname */ + uint32_t minor_version; /* library's minor version number */ + uint32_t header_addr; /* library's header address */ +}; + +/* + * A fixed virtual shared library (filetype == MH_FVMLIB in the mach header) + * contains a fvmlib_command (cmd == LC_IDFVMLIB) to identify the library. + * An object that uses a fixed virtual shared library also contains a + * fvmlib_command (cmd == LC_LOADFVMLIB) for each library it uses. + * (THIS IS OBSOLETE and no longer supported). + */ +struct fvmlib_command { + uint32_t cmd; /* LC_IDFVMLIB or LC_LOADFVMLIB */ + uint32_t cmdsize; /* includes pathname string */ + struct fvmlib fvmlib; /* the library identification */ +}; + +/* + * Dynamicly linked shared libraries are identified by two things. The + * pathname (the name of the library as found for execution), and the + * compatibility version number. The pathname must match and the compatibility + * number in the user of the library must be greater than or equal to the + * library being used. The time stamp is used to record the time a library was + * built and copied into user so it can be use to determined if the library used + * at runtime is exactly the same as used to built the program. + */ +struct dylib { + union lc_str name; /* library's path name */ + uint32_t timestamp; /* library's build time stamp */ + uint32_t current_version; /* library's current version number */ + uint32_t compatibility_version; /* library's compatibility vers number*/ +}; + +/* + * A dynamically linked shared library (filetype == MH_DYLIB in the mach header) + * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. + * An object that uses a dynamically linked shared library also contains a + * dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or + * LC_REEXPORT_DYLIB) for each library it uses. + */ +struct dylib_command { + uint32_t cmd; /* LC_ID_DYLIB, LC_LOAD_{,WEAK_}DYLIB, + LC_REEXPORT_DYLIB */ + uint32_t cmdsize; /* includes pathname string */ + struct dylib dylib; /* the library identification */ +}; + +/* + * A dynamically linked shared library may be a subframework of an umbrella + * framework. If so it will be linked with "-umbrella umbrella_name" where + * Where "umbrella_name" is the name of the umbrella framework. A subframework + * can only be linked against by its umbrella framework or other subframeworks + * that are part of the same umbrella framework. Otherwise the static link + * editor produces an error and states to link against the umbrella framework. + * The name of the umbrella framework for subframeworks is recorded in the + * following structure. + */ +struct sub_framework_command { + uint32_t cmd; /* LC_SUB_FRAMEWORK */ + uint32_t cmdsize; /* includes umbrella string */ + union lc_str umbrella; /* the umbrella framework name */ +}; + +/* + * For dynamically linked shared libraries that are subframework of an umbrella + * framework they can allow clients other than the umbrella framework or other + * subframeworks in the same umbrella framework. To do this the subframework + * is built with "-allowable_client client_name" and an LC_SUB_CLIENT load + * command is created for each -allowable_client flag. The client_name is + * usually a framework name. It can also be a name used for bundles clients + * where the bundle is built with "-client_name client_name". + */ +struct sub_client_command { + uint32_t cmd; /* LC_SUB_CLIENT */ + uint32_t cmdsize; /* includes client string */ + union lc_str client; /* the client name */ +}; + +/* + * A dynamically linked shared library may be a sub_umbrella of an umbrella + * framework. If so it will be linked with "-sub_umbrella umbrella_name" where + * Where "umbrella_name" is the name of the sub_umbrella framework. When + * staticly linking when -twolevel_namespace is in effect a twolevel namespace + * umbrella framework will only cause its subframeworks and those frameworks + * listed as sub_umbrella frameworks to be implicited linked in. Any other + * dependent dynamic libraries will not be linked it when -twolevel_namespace + * is in effect. The primary library recorded by the static linker when + * resolving a symbol in these libraries will be the umbrella framework. + * Zero or more sub_umbrella frameworks may be use by an umbrella framework. + * The name of a sub_umbrella framework is recorded in the following structure. + */ +struct sub_umbrella_command { + uint32_t cmd; /* LC_SUB_UMBRELLA */ + uint32_t cmdsize; /* includes sub_umbrella string */ + union lc_str sub_umbrella; /* the sub_umbrella framework name */ +}; + +/* + * A dynamically linked shared library may be a sub_library of another shared + * library. If so it will be linked with "-sub_library library_name" where + * Where "library_name" is the name of the sub_library shared library. When + * staticly linking when -twolevel_namespace is in effect a twolevel namespace + * shared library will only cause its subframeworks and those frameworks + * listed as sub_umbrella frameworks and libraries listed as sub_libraries to + * be implicited linked in. Any other dependent dynamic libraries will not be + * linked it when -twolevel_namespace is in effect. The primary library + * recorded by the static linker when resolving a symbol in these libraries + * will be the umbrella framework (or dynamic library). Zero or more sub_library + * shared libraries may be use by an umbrella framework or (or dynamic library). + * The name of a sub_library framework is recorded in the following structure. + * For example /usr/lib/libobjc_profile.A.dylib would be recorded as "libobjc". + */ +struct sub_library_command { + uint32_t cmd; /* LC_SUB_LIBRARY */ + uint32_t cmdsize; /* includes sub_library string */ + union lc_str sub_library; /* the sub_library name */ +}; + +/* + * A program (filetype == MH_EXECUTE) that is + * prebound to its dynamic libraries has one of these for each library that + * the static linker used in prebinding. It contains a bit vector for the + * modules in the library. The bits indicate which modules are bound (1) and + * which are not (0) from the library. The bit for module 0 is the low bit + * of the first byte. So the bit for the Nth module is: + * (linked_modules[N/8] >> N%8) & 1 + */ +struct prebound_dylib_command { + uint32_t cmd; /* LC_PREBOUND_DYLIB */ + uint32_t cmdsize; /* includes strings */ + union lc_str name; /* library's path name */ + uint32_t nmodules; /* number of modules in library */ + union lc_str linked_modules; /* bit vector of linked modules */ +}; + +/* + * A program that uses a dynamic linker contains a dylinker_command to identify + * the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker + * contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER). + * A file can have at most one of these. + * This struct is also used for the LC_DYLD_ENVIRONMENT load command and + * contains string for dyld to treat like environment variable. + */ +struct dylinker_command { + uint32_t cmd; /* LC_ID_DYLINKER, LC_LOAD_DYLINKER or + LC_DYLD_ENVIRONMENT */ + uint32_t cmdsize; /* includes pathname string */ + union lc_str name; /* dynamic linker's path name */ +}; + +/* + * Thread commands contain machine-specific data structures suitable for + * use in the thread state primitives. The machine specific data structures + * follow the struct thread_command as follows. + * Each flavor of machine specific data structure is preceded by an uint32_t + * constant for the flavor of that data structure, an uint32_t that is the + * count of uint32_t's of the size of the state data structure and then + * the state data structure follows. This triple may be repeated for many + * flavors. The constants for the flavors, counts and state data structure + * definitions are expected to be in the header file . + * These machine specific data structures sizes must be multiples of + * 4 bytes. The cmdsize reflects the total size of the thread_command + * and all of the sizes of the constants for the flavors, counts and state + * data structures. + * + * For executable objects that are unix processes there will be one + * thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor. + * This is the same as a LC_THREAD, except that a stack is automatically + * created (based on the shell's limit for the stack size). Command arguments + * and environment variables are copied onto that stack. + */ +struct thread_command { + uint32_t cmd; /* LC_THREAD or LC_UNIXTHREAD */ + uint32_t cmdsize; /* total size of this command */ + /* uint32_t flavor flavor of thread state */ + /* uint32_t count count of uint32_t's in thread state */ + /* struct XXX_thread_state state thread state for this flavor */ + /* ... */ +}; + +/* + * The routines command contains the address of the dynamic shared library + * initialization routine and an index into the module table for the module + * that defines the routine. Before any modules are used from the library the + * dynamic linker fully binds the module that defines the initialization routine + * and then calls it. This gets called before any module initialization + * routines (used for C++ static constructors) in the library. + */ +struct routines_command { /* for 32-bit architectures */ + uint32_t cmd; /* LC_ROUTINES */ + uint32_t cmdsize; /* total size of this command */ + uint32_t init_address; /* address of initialization routine */ + uint32_t init_module; /* index into the module table that */ + /* the init routine is defined in */ + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + uint32_t reserved6; +}; + +/* + * The 64-bit routines command. Same use as above. + */ +struct routines_command_64 { /* for 64-bit architectures */ + uint32_t cmd; /* LC_ROUTINES_64 */ + uint32_t cmdsize; /* total size of this command */ + uint64_t init_address; /* address of initialization routine */ + uint64_t init_module; /* index into the module table that */ + /* the init routine is defined in */ + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + uint64_t reserved5; + uint64_t reserved6; +}; + +/* + * The symtab_command contains the offsets and sizes of the link-edit 4.3BSD + * "stab" style symbol table information as described in the header files + * and . + */ +struct symtab_command { + uint32_t cmd; /* LC_SYMTAB */ + uint32_t cmdsize; /* sizeof(struct symtab_command) */ + uint32_t symoff; /* symbol table offset */ + uint32_t nsyms; /* number of symbol table entries */ + uint32_t stroff; /* string table offset */ + uint32_t strsize; /* string table size in bytes */ +}; + +/* + * This is the second set of the symbolic information which is used to support + * the data structures for the dynamically link editor. + * + * The original set of symbolic information in the symtab_command which contains + * the symbol and string tables must also be present when this load command is + * present. When this load command is present the symbol table is organized + * into three groups of symbols: + * local symbols (static and debugging symbols) - grouped by module + * defined external symbols - grouped by module (sorted by name if not lib) + * undefined external symbols (sorted by name if MH_BINDATLOAD is not set, + * and in order the were seen by the static + * linker if MH_BINDATLOAD is set) + * In this load command there are offsets and counts to each of the three groups + * of symbols. + * + * This load command contains a the offsets and sizes of the following new + * symbolic information tables: + * table of contents + * module table + * reference symbol table + * indirect symbol table + * The first three tables above (the table of contents, module table and + * reference symbol table) are only present if the file is a dynamically linked + * shared library. For executable and object modules, which are files + * containing only one module, the information that would be in these three + * tables is determined as follows: + * table of contents - the defined external symbols are sorted by name + * module table - the file contains only one module so everything in the + * file is part of the module. + * reference symbol table - is the defined and undefined external symbols + * + * For dynamically linked shared library files this load command also contains + * offsets and sizes to the pool of relocation entries for all sections + * separated into two groups: + * external relocation entries + * local relocation entries + * For executable and object modules the relocation entries continue to hang + * off the section structures. + */ +struct dysymtab_command { + uint32_t cmd; /* LC_DYSYMTAB */ + uint32_t cmdsize; /* sizeof(struct dysymtab_command) */ + + /* + * The symbols indicated by symoff and nsyms of the LC_SYMTAB load command + * are grouped into the following three groups: + * local symbols (further grouped by the module they are from) + * defined external symbols (further grouped by the module they are from) + * undefined symbols + * + * The local symbols are used only for debugging. The dynamic binding + * process may have to use them to indicate to the debugger the local + * symbols for a module that is being bound. + * + * The last two groups are used by the dynamic binding process to do the + * binding (indirectly through the module table and the reference symbol + * table when this is a dynamically linked shared library file). + */ + uint32_t ilocalsym; /* index to local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextdefsym;/* index to externally defined symbols */ + uint32_t nextdefsym;/* number of externally defined symbols */ + + uint32_t iundefsym; /* index to undefined symbols */ + uint32_t nundefsym; /* number of undefined symbols */ + + /* + * For the for the dynamic binding process to find which module a symbol + * is defined in the table of contents is used (analogous to the ranlib + * structure in an archive) which maps defined external symbols to modules + * they are defined in. This exists only in a dynamically linked shared + * library file. For executable and object modules the defined external + * symbols are sorted by name and is use as the table of contents. + */ + uint32_t tocoff; /* file offset to table of contents */ + uint32_t ntoc; /* number of entries in table of contents */ + + /* + * To support dynamic binding of "modules" (whole object files) the symbol + * table must reflect the modules that the file was created from. This is + * done by having a module table that has indexes and counts into the merged + * tables for each module. The module structure that these two entries + * refer to is described below. This exists only in a dynamically linked + * shared library file. For executable and object modules the file only + * contains one module so everything in the file belongs to the module. + */ + uint32_t modtaboff; /* file offset to module table */ + uint32_t nmodtab; /* number of module table entries */ + + /* + * To support dynamic module binding the module structure for each module + * indicates the external references (defined and undefined) each module + * makes. For each module there is an offset and a count into the + * reference symbol table for the symbols that the module references. + * This exists only in a dynamically linked shared library file. For + * executable and object modules the defined external symbols and the + * undefined external symbols indicates the external references. + */ + uint32_t extrefsymoff; /* offset to referenced symbol table */ + uint32_t nextrefsyms; /* number of referenced symbol table entries */ + + /* + * The sections that contain "symbol pointers" and "routine stubs" have + * indexes and (implied counts based on the size of the section and fixed + * size of the entry) into the "indirect symbol" table for each pointer + * and stub. For every section of these two types the index into the + * indirect symbol table is stored in the section header in the field + * reserved1. An indirect symbol table entry is simply a 32bit index into + * the symbol table to the symbol that the pointer or stub is referring to. + * The indirect symbol table is ordered to match the entries in the section. + */ + uint32_t indirectsymoff; /* file offset to the indirect symbol table */ + uint32_t nindirectsyms; /* number of indirect symbol table entries */ + + /* + * To support relocating an individual module in a library file quickly the + * external relocation entries for each module in the library need to be + * accessed efficiently. Since the relocation entries can't be accessed + * through the section headers for a library file they are separated into + * groups of local and external entries further grouped by module. In this + * case the presents of this load command who's extreloff, nextrel, + * locreloff and nlocrel fields are non-zero indicates that the relocation + * entries of non-merged sections are not referenced through the section + * structures (and the reloff and nreloc fields in the section headers are + * set to zero). + * + * Since the relocation entries are not accessed through the section headers + * this requires the r_address field to be something other than a section + * offset to identify the item to be relocated. In this case r_address is + * set to the offset from the vmaddr of the first LC_SEGMENT command. + * For MH_SPLIT_SEGS images r_address is set to the the offset from the + * vmaddr of the first read-write LC_SEGMENT command. + * + * The relocation entries are grouped by module and the module table + * entries have indexes and counts into them for the group of external + * relocation entries for that the module. + * + * For sections that are merged across modules there must not be any + * remaining external relocation entries for them (for merged sections + * remaining relocation entries must be local). + */ + uint32_t extreloff; /* offset to external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + /* + * All the local relocation entries are grouped together (they are not + * grouped by their module since they are only used if the object is moved + * from it staticly link edited address). + */ + uint32_t locreloff; /* offset to local relocation entries */ + uint32_t nlocrel; /* number of local relocation entries */ + +}; + +/* + * An indirect symbol table entry is simply a 32bit index into the symbol table + * to the symbol that the pointer or stub is refering to. Unless it is for a + * non-lazy symbol pointer section for a defined symbol which strip(1) as + * removed. In which case it has the value INDIRECT_SYMBOL_LOCAL. If the + * symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that. + */ +#define INDIRECT_SYMBOL_LOCAL 0x80000000 +#define INDIRECT_SYMBOL_ABS 0x40000000 + + +/* a table of contents entry */ +struct dylib_table_of_contents { + uint32_t symbol_index; /* the defined external symbol + (index into the symbol table) */ + uint32_t module_index; /* index into the module table this symbol + is defined in */ +}; + +/* a module table entry */ +struct dylib_module { + uint32_t module_name; /* the module name (index into string table) */ + + uint32_t iextdefsym; /* index into externally defined symbols */ + uint32_t nextdefsym; /* number of externally defined symbols */ + uint32_t irefsym; /* index into reference symbol table */ + uint32_t nrefsym; /* number of reference symbol table entries */ + uint32_t ilocalsym; /* index into symbols for local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextrel; /* index into external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + uint32_t iinit_iterm; /* low 16 bits are the index into the init + section, high 16 bits are the index into + the term section */ + uint32_t ninit_nterm; /* low 16 bits are the number of init section + entries, high 16 bits are the number of + term section entries */ + + uint32_t /* for this module address of the start of */ + objc_module_info_addr; /* the (__OBJC,__module_info) section */ + uint32_t /* for this module size of */ + objc_module_info_size; /* the (__OBJC,__module_info) section */ +}; + +/* a 64-bit module table entry */ +struct dylib_module_64 { + uint32_t module_name; /* the module name (index into string table) */ + + uint32_t iextdefsym; /* index into externally defined symbols */ + uint32_t nextdefsym; /* number of externally defined symbols */ + uint32_t irefsym; /* index into reference symbol table */ + uint32_t nrefsym; /* number of reference symbol table entries */ + uint32_t ilocalsym; /* index into symbols for local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextrel; /* index into external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + uint32_t iinit_iterm; /* low 16 bits are the index into the init + section, high 16 bits are the index into + the term section */ + uint32_t ninit_nterm; /* low 16 bits are the number of init section + entries, high 16 bits are the number of + term section entries */ + + uint32_t /* for this module size of */ + objc_module_info_size; /* the (__OBJC,__module_info) section */ + uint64_t /* for this module address of the start of */ + objc_module_info_addr; /* the (__OBJC,__module_info) section */ +}; + +/* + * The entries in the reference symbol table are used when loading the module + * (both by the static and dynamic link editors) and if the module is unloaded + * or replaced. Therefore all external symbols (defined and undefined) are + * listed in the module's reference table. The flags describe the type of + * reference that is being made. The constants for the flags are defined in + * as they are also used for symbol table entries. + */ +struct dylib_reference { + uint32_t isym:24, /* index into the symbol table */ + flags:8; /* flags to indicate the type of reference */ +}; + +/* + * The twolevel_hints_command contains the offset and number of hints in the + * two-level namespace lookup hints table. + */ +struct twolevel_hints_command { + uint32_t cmd; /* LC_TWOLEVEL_HINTS */ + uint32_t cmdsize; /* sizeof(struct twolevel_hints_command) */ + uint32_t offset; /* offset to the hint table */ + uint32_t nhints; /* number of hints in the hint table */ +}; + +/* + * The entries in the two-level namespace lookup hints table are twolevel_hint + * structs. These provide hints to the dynamic link editor where to start + * looking for an undefined symbol in a two-level namespace image. The + * isub_image field is an index into the sub-images (sub-frameworks and + * sub-umbrellas list) that made up the two-level image that the undefined + * symbol was found in when it was built by the static link editor. If + * isub-image is 0 the the symbol is expected to be defined in library and not + * in the sub-images. If isub-image is non-zero it is an index into the array + * of sub-images for the umbrella with the first index in the sub-images being + * 1. The array of sub-images is the ordered list of sub-images of the umbrella + * that would be searched for a symbol that has the umbrella recorded as its + * primary library. The table of contents index is an index into the + * library's table of contents. This is used as the starting point of the + * binary search or a directed linear search. + */ +struct twolevel_hint { + uint32_t + isub_image:8, /* index into the sub images */ + itoc:24; /* index into the table of contents */ +}; + +/* + * The prebind_cksum_command contains the value of the original check sum for + * prebound files or zero. When a prebound file is first created or modified + * for other than updating its prebinding information the value of the check sum + * is set to zero. When the file has it prebinding re-done and if the value of + * the check sum is zero the original check sum is calculated and stored in + * cksum field of this load command in the output file. If when the prebinding + * is re-done and the cksum field is non-zero it is left unchanged from the + * input file. + */ +struct prebind_cksum_command { + uint32_t cmd; /* LC_PREBIND_CKSUM */ + uint32_t cmdsize; /* sizeof(struct prebind_cksum_command) */ + uint32_t cksum; /* the check sum or zero */ +}; + +/* + * The uuid load command contains a single 128-bit unique random number that + * identifies an object produced by the static link editor. + */ +struct uuid_command { + uint32_t cmd; /* LC_UUID */ + uint32_t cmdsize; /* sizeof(struct uuid_command) */ + uint8_t uuid[16]; /* the 128-bit uuid */ +}; + +/* + * The rpath_command contains a path which at runtime should be added to + * the current run path used to find @rpath prefixed dylibs. + */ +struct rpath_command { + uint32_t cmd; /* LC_RPATH */ + uint32_t cmdsize; /* includes string */ + union lc_str path; /* path to add to run path */ +}; + +/* + * The linkedit_data_command contains the offsets and sizes of a blob + * of data in the __LINKEDIT segment. + */ +struct linkedit_data_command { + uint32_t cmd; /* LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, + LC_FUNCTION_STARTS, LC_DATA_IN_CODE, + LC_DYLIB_CODE_SIGN_DRS, + LC_LINKER_OPTIMIZATION_HINT, + LC_DYLD_EXPORTS_TRIE, or + LC_DYLD_CHAINED_FIXUPS. */ + uint32_t cmdsize; /* sizeof(struct linkedit_data_command) */ + uint32_t dataoff; /* file offset of data in __LINKEDIT segment */ + uint32_t datasize; /* file size of data in __LINKEDIT segment */ +}; + +struct fileset_entry_command { + uint32_t cmd; /* LC_FILESET_ENTRY */ + uint32_t cmdsize; /* includes id string */ + uint64_t vmaddr; /* memory address of the dylib */ + uint64_t fileoff; /* file offset of the dylib */ + union lc_str entry_id; /* contained entry id */ + uint32_t reserved; /* entry_id is 32-bits long, so this is the reserved padding */ +}; + +/* + * The encryption_info_command contains the file offset and size of an + * of an encrypted segment. + */ +struct encryption_info_command { + uint32_t cmd; /* LC_ENCRYPTION_INFO */ + uint32_t cmdsize; /* sizeof(struct encryption_info_command) */ + uint32_t cryptoff; /* file offset of encrypted range */ + uint32_t cryptsize; /* file size of encrypted range */ + uint32_t cryptid; /* which enryption system, + 0 means not-encrypted yet */ +}; + +/* + * The encryption_info_command_64 contains the file offset and size of an + * of an encrypted segment (for use in x86_64 targets). + */ +struct encryption_info_command_64 { + uint32_t cmd; /* LC_ENCRYPTION_INFO_64 */ + uint32_t cmdsize; /* sizeof(struct encryption_info_command_64) */ + uint32_t cryptoff; /* file offset of encrypted range */ + uint32_t cryptsize; /* file size of encrypted range */ + uint32_t cryptid; /* which enryption system, + 0 means not-encrypted yet */ + uint32_t pad; /* padding to make this struct's size a multiple + of 8 bytes */ +}; + +/* + * The version_min_command contains the min OS version on which this + * binary was built to run. + */ +struct version_min_command { + uint32_t cmd; /* LC_VERSION_MIN_MACOSX or + LC_VERSION_MIN_IPHONEOS or + LC_VERSION_MIN_WATCHOS or + LC_VERSION_MIN_TVOS */ + uint32_t cmdsize; /* sizeof(struct min_version_command) */ + uint32_t version; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */ + uint32_t sdk; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */ +}; + +/* + * The build_version_command contains the min OS version on which this + * binary was built to run for its platform. The list of known platforms and + * tool values following it. + */ +struct build_version_command { + uint32_t cmd; /* LC_BUILD_VERSION */ + uint32_t cmdsize; /* sizeof(struct build_version_command) plus */ + /* ntools * sizeof(struct build_tool_version) */ + uint32_t platform; /* platform */ + uint32_t minos; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */ + uint32_t sdk; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */ + uint32_t ntools; /* number of tool entries following this */ +}; + +struct build_tool_version { + uint32_t tool; /* enum for the tool */ + uint32_t version; /* version number of the tool */ +}; + +/* Known values for the platform field above. */ +#define PLATFORM_MACOS 1 +#define PLATFORM_IOS 2 +#define PLATFORM_TVOS 3 +#define PLATFORM_WATCHOS 4 +#define PLATFORM_BRIDGEOS 5 +#define PLATFORM_MACCATALYST 6 +#define PLATFORM_IOSSIMULATOR 7 +#define PLATFORM_TVOSSIMULATOR 8 +#define PLATFORM_WATCHOSSIMULATOR 9 +#define PLATFORM_DRIVERKIT 10 +#define PLATFORM_MAX PLATFORM_DRIVERKIT +/* Addition of simulated platfrom also needs to update proc_is_simulated() */ + +/* Known values for the tool field above. */ +#define TOOL_CLANG 1 +#define TOOL_SWIFT 2 +#define TOOL_LD 3 + +/* + * The dyld_info_command contains the file offsets and sizes of + * the new compressed form of the information dyld needs to + * load the image. This information is used by dyld on Mac OS X + * 10.6 and later. All information pointed to by this command + * is encoded using byte streams, so no endian swapping is needed + * to interpret it. + */ +struct dyld_info_command { + uint32_t cmd; /* LC_DYLD_INFO or LC_DYLD_INFO_ONLY */ + uint32_t cmdsize; /* sizeof(struct dyld_info_command) */ + + /* + * Dyld rebases an image whenever dyld loads it at an address different + * from its preferred address. The rebase information is a stream + * of byte sized opcodes whose symbolic names start with REBASE_OPCODE_. + * Conceptually the rebase information is a table of tuples: + * + * The opcodes are a compressed way to encode the table by only + * encoding when a column changes. In addition simple patterns + * like "every n'th offset for m times" can be encoded in a few + * bytes. + */ + uint32_t rebase_off; /* file offset to rebase info */ + uint32_t rebase_size; /* size of rebase info */ + + /* + * Dyld binds an image during the loading process, if the image + * requires any pointers to be initialized to symbols in other images. + * The bind information is a stream of byte sized + * opcodes whose symbolic names start with BIND_OPCODE_. + * Conceptually the bind information is a table of tuples: + * + * The opcodes are a compressed way to encode the table by only + * encoding when a column changes. In addition simple patterns + * like for runs of pointers initialzed to the same value can be + * encoded in a few bytes. + */ + uint32_t bind_off; /* file offset to binding info */ + uint32_t bind_size; /* size of binding info */ + + /* + * Some C++ programs require dyld to unique symbols so that all + * images in the process use the same copy of some code/data. + * This step is done after binding. The content of the weak_bind + * info is an opcode stream like the bind_info. But it is sorted + * alphabetically by symbol name. This enable dyld to walk + * all images with weak binding information in order and look + * for collisions. If there are no collisions, dyld does + * no updating. That means that some fixups are also encoded + * in the bind_info. For instance, all calls to "operator new" + * are first bound to libstdc++.dylib using the information + * in bind_info. Then if some image overrides operator new + * that is detected when the weak_bind information is processed + * and the call to operator new is then rebound. + */ + uint32_t weak_bind_off; /* file offset to weak binding info */ + uint32_t weak_bind_size; /* size of weak binding info */ + + /* + * Some uses of external symbols do not need to be bound immediately. + * Instead they can be lazily bound on first use. The lazy_bind + * are contains a stream of BIND opcodes to bind all lazy symbols. + * Normal use is that dyld ignores the lazy_bind section when + * loading an image. Instead the static linker arranged for the + * lazy pointer to initially point to a helper function which + * pushes the offset into the lazy_bind area for the symbol + * needing to be bound, then jumps to dyld which simply adds + * the offset to lazy_bind_off to get the information on what + * to bind. + */ + uint32_t lazy_bind_off; /* file offset to lazy binding info */ + uint32_t lazy_bind_size; /* size of lazy binding infs */ + + /* + * The symbols exported by a dylib are encoded in a trie. This + * is a compact representation that factors out common prefixes. + * It also reduces LINKEDIT pages in RAM because it encodes all + * information (name, address, flags) in one small, contiguous range. + * The export area is a stream of nodes. The first node sequentially + * is the start node for the trie. + * + * Nodes for a symbol start with a uleb128 that is the length of + * the exported symbol information for the string so far. + * If there is no exported symbol, the node starts with a zero byte. + * If there is exported info, it follows the length. + * + * First is a uleb128 containing flags. Normally, it is followed by + * a uleb128 encoded offset which is location of the content named + * by the symbol from the mach_header for the image. If the flags + * is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is + * a uleb128 encoded library ordinal, then a zero terminated + * UTF8 string. If the string is zero length, then the symbol + * is re-export from the specified dylib with the same name. + * If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following + * the flags is two uleb128s: the stub offset and the resolver offset. + * The stub is used by non-lazy pointers. The resolver is used + * by lazy pointers and must be called to get the actual address to use. + * + * After the optional exported symbol information is a byte of + * how many edges (0-255) that this node has leaving it, + * followed by each edge. + * Each edge is a zero terminated UTF8 of the addition chars + * in the symbol, followed by a uleb128 offset for the node that + * edge points to. + * + */ + uint32_t export_off; /* file offset to lazy binding info */ + uint32_t export_size; /* size of lazy binding infs */ +}; + +/* + * The following are used to encode rebasing information + */ +#define REBASE_TYPE_POINTER 1 +#define REBASE_TYPE_TEXT_ABSOLUTE32 2 +#define REBASE_TYPE_TEXT_PCREL32 3 + +#define REBASE_OPCODE_MASK 0xF0 +#define REBASE_IMMEDIATE_MASK 0x0F +#define REBASE_OPCODE_DONE 0x00 +#define REBASE_OPCODE_SET_TYPE_IMM 0x10 +#define REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x20 +#define REBASE_OPCODE_ADD_ADDR_ULEB 0x30 +#define REBASE_OPCODE_ADD_ADDR_IMM_SCALED 0x40 +#define REBASE_OPCODE_DO_REBASE_IMM_TIMES 0x50 +#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES 0x60 +#define REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB 0x70 +#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB 0x80 + + +/* + * The following are used to encode binding information + */ +#define BIND_TYPE_POINTER 1 +#define BIND_TYPE_TEXT_ABSOLUTE32 2 +#define BIND_TYPE_TEXT_PCREL32 3 + +#define BIND_SPECIAL_DYLIB_SELF 0 +#define BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE -1 +#define BIND_SPECIAL_DYLIB_FLAT_LOOKUP -2 +#define BIND_SPECIAL_DYLIB_WEAK_LOOKUP -3 + +#define BIND_SYMBOL_FLAGS_WEAK_IMPORT 0x1 +#define BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION 0x8 + +#define BIND_OPCODE_MASK 0xF0 +#define BIND_IMMEDIATE_MASK 0x0F +#define BIND_OPCODE_DONE 0x00 +#define BIND_OPCODE_SET_DYLIB_ORDINAL_IMM 0x10 +#define BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB 0x20 +#define BIND_OPCODE_SET_DYLIB_SPECIAL_IMM 0x30 +#define BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM 0x40 +#define BIND_OPCODE_SET_TYPE_IMM 0x50 +#define BIND_OPCODE_SET_ADDEND_SLEB 0x60 +#define BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x70 +#define BIND_OPCODE_ADD_ADDR_ULEB 0x80 +#define BIND_OPCODE_DO_BIND 0x90 +#define BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB 0xA0 +#define BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED 0xB0 +#define BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB 0xC0 +#define BIND_OPCODE_THREADED 0xD0 +#define BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB 0x00 +#define BIND_SUBOPCODE_THREADED_APPLY 0x01 + + +/* + * The following are used on the flags byte of a terminal node + * in the export information. + */ +#define EXPORT_SYMBOL_FLAGS_KIND_MASK 0x03 +#define EXPORT_SYMBOL_FLAGS_KIND_REGULAR 0x00 +#define EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL 0x01 +#define EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE 0x02 +#define EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION 0x04 +#define EXPORT_SYMBOL_FLAGS_REEXPORT 0x08 +#define EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER 0x10 + +/* + * The linker_option_command contains linker options embedded in object files. + */ +struct linker_option_command { + uint32_t cmd; /* LC_LINKER_OPTION only used in MH_OBJECT filetypes */ + uint32_t cmdsize; + uint32_t count; /* number of strings */ + /* concatenation of zero terminated UTF8 strings. + Zero filled at end to align */ +}; + +/* + * The symseg_command contains the offset and size of the GNU style + * symbol table information as described in the header file . + * The symbol roots of the symbol segments must also be aligned properly + * in the file. So the requirement of keeping the offsets aligned to a + * multiple of a 4 bytes translates to the length field of the symbol + * roots also being a multiple of a long. Also the padding must again be + * zeroed. (THIS IS OBSOLETE and no longer supported). + */ +struct symseg_command { + uint32_t cmd; /* LC_SYMSEG */ + uint32_t cmdsize; /* sizeof(struct symseg_command) */ + uint32_t offset; /* symbol segment offset */ + uint32_t size; /* symbol segment size in bytes */ +}; + +/* + * The ident_command contains a free format string table following the + * ident_command structure. The strings are null terminated and the size of + * the command is padded out with zero bytes to a multiple of 4 bytes/ + * (THIS IS OBSOLETE and no longer supported). + */ +struct ident_command { + uint32_t cmd; /* LC_IDENT */ + uint32_t cmdsize; /* strings that follow this command */ +}; + +/* + * The fvmfile_command contains a reference to a file to be loaded at the + * specified virtual address. (Presently, this command is reserved for + * internal use. The kernel ignores this command when loading a program into + * memory). + */ +struct fvmfile_command { + uint32_t cmd; /* LC_FVMFILE */ + uint32_t cmdsize; /* includes pathname string */ + union lc_str name; /* files pathname */ + uint32_t header_addr; /* files virtual address */ +}; + + +/* + * The entry_point_command is a replacement for thread_command. + * It is used for main executables to specify the location (file offset) + * of main(). If -stack_size was used at link time, the stacksize + * field will contain the stack size need for the main thread. + */ +struct entry_point_command { + uint32_t cmd; /* LC_MAIN only used in MH_EXECUTE filetypes */ + uint32_t cmdsize; /* 24 */ + uint64_t entryoff; /* file (__TEXT) offset of main() */ + uint64_t stacksize;/* if not zero, initial stack size */ +}; + + +/* + * The source_version_command is an optional load command containing + * the version of the sources used to build the binary. + */ +struct source_version_command { + uint32_t cmd; /* LC_SOURCE_VERSION */ + uint32_t cmdsize; /* 16 */ + uint64_t version; /* A.B.C.D.E packed as a24.b10.c10.d10.e10 */ +}; + + +/* + * The LC_DATA_IN_CODE load commands uses a linkedit_data_command + * to point to an array of data_in_code_entry entries. Each entry + * describes a range of data in a code section. + */ +struct data_in_code_entry { + uint32_t offset; /* from mach_header to start of data range*/ + uint16_t length; /* number of bytes in data range */ + uint16_t kind; /* a DICE_KIND_* value */ +}; +#define DICE_KIND_DATA 0x0001 +#define DICE_KIND_JUMP_TABLE8 0x0002 +#define DICE_KIND_JUMP_TABLE16 0x0003 +#define DICE_KIND_JUMP_TABLE32 0x0004 +#define DICE_KIND_ABS_JUMP_TABLE32 0x0005 + + + +/* + * Sections of type S_THREAD_LOCAL_VARIABLES contain an array + * of tlv_descriptor structures. + */ +struct tlv_descriptor +{ + void* (*thunk)(struct tlv_descriptor*); + unsigned long key; + unsigned long offset; +}; + +/* + * LC_NOTE commands describe a region of arbitrary data included in a Mach-O + * file. Its initial use is to record extra data in MH_CORE files. + */ +struct note_command { + uint32_t cmd; /* LC_NOTE */ + uint32_t cmdsize; /* sizeof(struct note_command) */ + char data_owner[16]; /* owner name for this LC_NOTE */ + uint64_t offset; /* file offset of this data */ + uint64_t size; /* length of data region */ +}; + +#endif /* _MACHO_LOADER_H_ */ diff --git a/I. Mach-O/mac/mach_loader.c b/I. Mach-O/mac/mach_loader.c new file mode 100644 index 0000000..98a3ca2 --- /dev/null +++ b/I. Mach-O/mac/mach_loader.c @@ -0,0 +1,3912 @@ +// SOURCE: https://raw.githubusercontent.com/apple-oss-distributions/xnu/xnu-10002.61.3/bsd/kern/mach_loader.c + +/* + * Copyright (c) 2000-2020 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * Copyright (C) 1988, 1989, NeXT, Inc. + * + * File: kern/mach_loader.c + * Author: Avadis Tevanian, Jr. + * + * Mach object file loader (kernel version, for now). + * + * 21-Jul-88 Avadis Tevanian, Jr. (avie) at NeXT + * Started. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include /* vm_allocate() */ +#include /* mach_vm_allocate() */ +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include /* for kIOReturnNotPrivileged */ +#include /* for IOVnodeHasEntitlement */ + +#include +#include + +#include "kern_exec_internal.h" + +/* XXX should have prototypes in a shared header file */ +extern int get_map_nentries(vm_map_t); + +extern kern_return_t memory_object_signed(memory_object_control_t control, + boolean_t is_signed); + + +/* An empty load_result_t */ +static const load_result_t load_result_null = { + .mach_header = MACH_VM_MIN_ADDRESS, + .entry_point = MACH_VM_MIN_ADDRESS, + .user_stack = MACH_VM_MIN_ADDRESS, + .user_stack_size = 0, + .user_stack_alloc = MACH_VM_MIN_ADDRESS, + .user_stack_alloc_size = 0, + .all_image_info_addr = MACH_VM_MIN_ADDRESS, + .all_image_info_size = 0, + .thread_count = 0, + .unixproc = 0, + .dynlinker = 0, + .needs_dynlinker = 0, + .validentry = 0, + .using_lcmain = 0, + .is_64bit_addr = 0, + .is_64bit_data = 0, + .custom_stack = 0, + .csflags = 0, + .has_pagezero = 0, + .uuid = { 0 }, + .min_vm_addr = MACH_VM_MAX_ADDRESS, + .max_vm_addr = MACH_VM_MIN_ADDRESS, + .ro_vm_start = MACH_VM_MIN_ADDRESS, + .ro_vm_end = MACH_VM_MIN_ADDRESS, + .cs_end_offset = 0, + .threadstate = NULL, + .threadstate_sz = 0, + .is_rosetta = 0, + .dynlinker_ro_vm_start = 0, + .dynlinker_ro_vm_end = 0, + .dynlinker_mach_header = MACH_VM_MIN_ADDRESS, + .dynlinker_fd = -1, +}; + +/* + * Prototypes of static functions. + */ +static load_return_t +parse_machfile( + struct vnode *vp, + vm_map_t map, + thread_t thread, + struct mach_header *header, + off_t file_offset, + off_t macho_size, + int depth, + int64_t slide, + int64_t dyld_slide, + load_result_t *result, + load_result_t *binresult, + struct image_params *imgp + ); + +static load_return_t +load_segment( + struct load_command *lcp, + uint32_t filetype, + void *control, + off_t pager_offset, + off_t macho_size, + struct vnode *vp, + vm_map_t map, + int64_t slide, + load_result_t *result, + struct image_params *imgp + ); + +static load_return_t +load_uuid( + struct uuid_command *uulp, + char *command_end, + load_result_t *result + ); + +static load_return_t +load_version( + struct version_min_command *vmc, + boolean_t *found_version_cmd, + struct image_params *imgp, + load_result_t *result + ); + +static load_return_t +load_code_signature( + struct linkedit_data_command *lcp, + struct vnode *vp, + off_t macho_offset, + off_t macho_size, + cpu_type_t cputype, + cpu_subtype_t cpusubtype, + load_result_t *result, + struct image_params *imgp); + +#if CONFIG_CODE_DECRYPTION +static load_return_t +set_code_unprotect( + struct encryption_info_command *lcp, + caddr_t addr, + vm_map_t map, + int64_t slide, + struct vnode *vp, + off_t macho_offset, + cpu_type_t cputype, + cpu_subtype_t cpusubtype); +#endif + +static +load_return_t +load_main( + struct entry_point_command *epc, + thread_t thread, + int64_t slide, + load_result_t *result + ); + +static +load_return_t +setup_driver_main( + thread_t thread, + int64_t slide, + load_result_t *result + ); + +static load_return_t +load_unixthread( + struct thread_command *tcp, + thread_t thread, + int64_t slide, + boolean_t is_x86_64_compat_binary, + load_result_t *result + ); + +static load_return_t +load_threadstate( + thread_t thread, + uint32_t *ts, + uint32_t total_size, + load_result_t * + ); + +static load_return_t +load_threadstack( + thread_t thread, + uint32_t *ts, + uint32_t total_size, + mach_vm_offset_t *user_stack, + int *customstack, + boolean_t is_x86_64_compat_binary, + load_result_t *result + ); + +static load_return_t +load_threadentry( + thread_t thread, + uint32_t *ts, + uint32_t total_size, + mach_vm_offset_t *entry_point + ); + +static load_return_t +load_dylinker( + struct dylinker_command *lcp, + integer_t archbits, + vm_map_t map, + thread_t thread, + int depth, + int64_t slide, + load_result_t *result, + struct image_params *imgp + ); + + +#if CONFIG_ROSETTA +static load_return_t +load_rosetta( + vm_map_t map, + thread_t thread, + load_result_t *result, + struct image_params *imgp + ); +#endif + +#if __x86_64__ +extern int bootarg_no32exec; +static boolean_t +check_if_simulator_binary( + struct image_params *imgp, + off_t file_offset, + off_t macho_size); +#endif + +struct macho_data; + +static load_return_t +get_macho_vnode( + const char *path, + integer_t archbits, + struct mach_header *mach_header, + off_t *file_offset, + off_t *macho_size, + struct macho_data *macho_data, + struct vnode **vpp, + struct image_params *imgp + ); + +static inline void +widen_segment_command(const struct segment_command *scp32, + struct segment_command_64 *scp) +{ + scp->cmd = scp32->cmd; + scp->cmdsize = scp32->cmdsize; + bcopy(scp32->segname, scp->segname, sizeof(scp->segname)); + scp->vmaddr = scp32->vmaddr; + scp->vmsize = scp32->vmsize; + scp->fileoff = scp32->fileoff; + scp->filesize = scp32->filesize; + scp->maxprot = scp32->maxprot; + scp->initprot = scp32->initprot; + scp->nsects = scp32->nsects; + scp->flags = scp32->flags; +} + +static void +note_all_image_info_section(const struct segment_command_64 *scp, + boolean_t is64, size_t section_size, const void *sections, + int64_t slide, load_result_t *result) +{ + const union { + struct section s32; + struct section_64 s64; + } *sectionp; + unsigned int i; + + + if (strncmp(scp->segname, "__DATA_DIRTY", sizeof(scp->segname)) != 0 && + strncmp(scp->segname, "__DATA", sizeof(scp->segname)) != 0) { + return; + } + for (i = 0; i < scp->nsects; ++i) { + sectionp = (const void *) + ((const char *)sections + section_size * i); + if (0 == strncmp(sectionp->s64.sectname, "__all_image_info", + sizeof(sectionp->s64.sectname))) { + result->all_image_info_addr = + is64 ? sectionp->s64.addr : sectionp->s32.addr; + result->all_image_info_addr += slide; + result->all_image_info_size = + is64 ? sectionp->s64.size : sectionp->s32.size; + return; + } + } +} + +#if __arm64__ +/* + * Allow bypassing some security rules (hard pagezero, no write+execute) + * in exchange for better binary compatibility for legacy apps built + * before 16KB-alignment was enforced. + */ +const int fourk_binary_compatibility_unsafe = TRUE; +const int fourk_binary_compatibility_allow_wx = FALSE; +#endif /* __arm64__ */ + +#if __has_feature(ptrauth_calls) && XNU_TARGET_OS_OSX +/** + * Determines whether this is an arm64e process which may host in-process + * plugins. + */ +static inline bool +arm64e_plugin_host(struct image_params *imgp, load_result_t *result) +{ + if (imgp->ip_flags & IMGPF_NOJOP) { + return false; + } + + if (!result->platform_binary) { + return false; + } + + struct cs_blob *csblob = csvnode_get_blob(imgp->ip_vp, imgp->ip_arch_offset); + const char *identity = csblob_get_identity(csblob); + if (!identity) { + return false; + } + + /* Check if override host plugin entitlement is present and posix spawn attribute to disable A keys is passed */ + if (IOVnodeHasEntitlement(imgp->ip_vp, (int64_t)imgp->ip_arch_offset, OVERRIDE_PLUGIN_HOST_ENTITLEMENT)) { + bool ret = imgp->ip_flags & IMGPF_PLUGIN_HOST_DISABLE_A_KEYS; + if (ret) { + proc_t p = vfs_context_proc(imgp->ip_vfs_context); + set_proc_name(imgp, p); + os_log(OS_LOG_DEFAULT, "%s: running binary \"%s\" in keys-off mode due to posix_spawnattr_disable_ptr_auth_a_keys_np", __func__, p->p_name); + } + return ret; + } + + /* Disabling library validation is a good signal that this process plans to host plugins */ + const char *const disable_lv_entitlements[] = { + "com.apple.security.cs.disable-library-validation", + "com.apple.private.cs.automator-plugins", + CLEAR_LV_ENTITLEMENT, + }; + for (size_t i = 0; i < ARRAY_COUNT(disable_lv_entitlements); i++) { + const char *entitlement = disable_lv_entitlements[i]; + if (IOVnodeHasEntitlement(imgp->ip_vp, (int64_t)imgp->ip_arch_offset, entitlement)) { + proc_t p = vfs_context_proc(imgp->ip_vfs_context); + set_proc_name(imgp, p); + os_log(OS_LOG_DEFAULT, "%s: running binary \"%s\" in keys-off mode due to entitlement: %s", __func__, p->p_name, entitlement); + return true; + } + } + + /* From /System/Library/Security/HardeningExceptions.plist */ + const char *const hardening_exceptions[] = { + "com.apple.perl5", /* Scripting engines may load third party code and jit*/ + "com.apple.perl", /* Scripting engines may load third party code and jit*/ + "org.python.python", /* Scripting engines may load third party code and jit*/ + "com.apple.expect", /* Scripting engines may load third party code and jit*/ + "com.tcltk.wish", /* Scripting engines may load third party code and jit*/ + "com.tcltk.tclsh", /* Scripting engines may load third party code and jit*/ + "com.apple.ruby", /* Scripting engines may load third party code and jit*/ + "com.apple.bash", /* Required for the 'enable' command */ + "com.apple.zsh", /* Required for the 'zmodload' command */ + "com.apple.ksh", /* Required for 'builtin' command */ + }; + for (size_t i = 0; i < ARRAY_COUNT(hardening_exceptions); i++) { + if (strncmp(hardening_exceptions[i], identity, strlen(hardening_exceptions[i])) == 0) { + proc_t p = vfs_context_proc(imgp->ip_vfs_context); + set_proc_name(imgp, p); + os_log(OS_LOG_DEFAULT, "%s: running binary \"%s\" in keys-off mode due to identity: %s", __func__, p->p_name, identity); + return true; + } + } + + return false; +} +#endif /* __has_feature(ptrauth_calls) && XNU_TARGET_OS_OSX */ + +load_return_t +load_machfile( + struct image_params *imgp, + struct mach_header *header, + thread_t thread, + vm_map_t *mapp, + load_result_t *result + ) +{ + struct vnode *vp = imgp->ip_vp; + off_t file_offset = imgp->ip_arch_offset; + off_t macho_size = imgp->ip_arch_size; + off_t total_size = 0; + off_t file_size = imgp->ip_vattr->va_data_size; + pmap_t pmap = 0; /* protected by create_map */ + vm_map_t map; + load_result_t myresult; + load_return_t lret; + boolean_t enforce_hard_pagezero = TRUE; + int in_exec = (imgp->ip_flags & IMGPF_EXEC); + task_t task = current_task(); + int64_t aslr_page_offset = 0; + int64_t dyld_aslr_page_offset = 0; + int64_t aslr_section_size = 0; + int64_t aslr_section_offset = 0; + kern_return_t kret; + unsigned int pmap_flags = 0; + + if (os_add_overflow(file_offset, macho_size, &total_size) || + total_size > file_size) { + return LOAD_BADMACHO; + } + + result->is_64bit_addr = ((imgp->ip_flags & IMGPF_IS_64BIT_ADDR) == IMGPF_IS_64BIT_ADDR); + result->is_64bit_data = ((imgp->ip_flags & IMGPF_IS_64BIT_DATA) == IMGPF_IS_64BIT_DATA); +#if defined(HAS_APPLE_PAC) + pmap_flags |= (imgp->ip_flags & IMGPF_NOJOP) ? PMAP_CREATE_DISABLE_JOP : 0; +#endif /* defined(HAS_APPLE_PAC) */ +#if CONFIG_ROSETTA + pmap_flags |= (imgp->ip_flags & IMGPF_ROSETTA) ? PMAP_CREATE_ROSETTA : 0; +#endif + pmap_flags |= result->is_64bit_addr ? PMAP_CREATE_64BIT : 0; + + task_t ledger_task; + if (imgp->ip_new_thread) { + ledger_task = get_threadtask(imgp->ip_new_thread); + } else { + ledger_task = task; + } + +#if XNU_TARGET_OS_OSX && _POSIX_SPAWN_FORCE_4K_PAGES && PMAP_CREATE_FORCE_4K_PAGES + if (imgp->ip_px_sa != NULL) { + struct _posix_spawnattr* psa = (struct _posix_spawnattr *) imgp->ip_px_sa; + if (psa->psa_flags & _POSIX_SPAWN_FORCE_4K_PAGES) { + pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES; + } + } +#endif /* XNU_TARGET_OS_OSX && _POSIX_SPAWN_FORCE_4K_PAGES && PMAP_CREATE_FORCE_4K_PAGE */ + + pmap = pmap_create_options(get_task_ledger(ledger_task), + (vm_map_size_t) 0, + pmap_flags); + if (pmap == NULL) { + return LOAD_RESOURCE; + } + map = vm_map_create_options(pmap, 0, + vm_compute_max_offset(result->is_64bit_addr), + VM_MAP_CREATE_PAGEABLE); + +#if defined(__arm64__) + if (result->is_64bit_addr) { + /* enforce 16KB alignment of VM map entries */ + vm_map_set_page_shift(map, SIXTEENK_PAGE_SHIFT); + } else { + vm_map_set_page_shift(map, page_shift_user32); + } +#endif /* __arm64__ */ + +#if PMAP_CREATE_FORCE_4K_PAGES + if (pmap_flags & PMAP_CREATE_FORCE_4K_PAGES) { + DEBUG4K_LIFE("***** launching '%s' as 4k *****\n", vp->v_name); + vm_map_set_page_shift(map, FOURK_PAGE_SHIFT); + } +#endif /* PMAP_CREATE_FORCE_4K_PAGES */ + +#ifndef CONFIG_ENFORCE_SIGNED_CODE + /* This turns off faulting for executable pages, which allows + * to circumvent Code Signing Enforcement. The per process + * flag (CS_ENFORCEMENT) is not set yet, but we can use the + * global flag. + */ + if (!cs_process_global_enforcement() && (header->flags & MH_ALLOW_STACK_EXECUTION)) { + vm_map_disable_NX(map); + // TODO: Message Trace or log that this is happening + } +#endif + + /* Forcibly disallow execution from data pages on even if the arch + * normally permits it. */ + if ((header->flags & MH_NO_HEAP_EXECUTION) && !(imgp->ip_flags & IMGPF_ALLOW_DATA_EXEC)) { + vm_map_disallow_data_exec(map); + } + + /* + * Compute a random offset for ASLR, and an independent random offset for dyld. + */ + if (!(imgp->ip_flags & IMGPF_DISABLE_ASLR)) { + vm_map_get_max_aslr_slide_section(map, &aslr_section_offset, &aslr_section_size); + aslr_section_offset = (random() % aslr_section_offset) * aslr_section_size; + + aslr_page_offset = random(); + aslr_page_offset = (aslr_page_offset % (vm_map_get_max_aslr_slide_pages(map) - 1)) + 1; + aslr_page_offset <<= vm_map_page_shift(map); + + dyld_aslr_page_offset = random(); + dyld_aslr_page_offset = (dyld_aslr_page_offset % + (vm_map_get_max_loader_aslr_slide_pages(map) - 1)) + 1; + dyld_aslr_page_offset <<= vm_map_page_shift(map); + + aslr_page_offset += aslr_section_offset; + } + if (vm_map_page_shift(map) < (int)PAGE_SHIFT) { + DEBUG4K_LOAD("slide=0x%llx dyld_slide=0x%llx\n", aslr_page_offset, dyld_aslr_page_offset); + } + + if (!result) { + result = &myresult; + } + + *result = load_result_null; + + /* + * re-set the bitness on the load result since we cleared the load result above. + */ + result->is_64bit_addr = ((imgp->ip_flags & IMGPF_IS_64BIT_ADDR) == IMGPF_IS_64BIT_ADDR); + result->is_64bit_data = ((imgp->ip_flags & IMGPF_IS_64BIT_DATA) == IMGPF_IS_64BIT_DATA); + + lret = parse_machfile(vp, map, thread, header, file_offset, macho_size, + 0, aslr_page_offset, dyld_aslr_page_offset, result, + NULL, imgp); + + if (lret != LOAD_SUCCESS) { + vm_map_deallocate(map); /* will lose pmap reference too */ + return lret; + } + +#if __x86_64__ + /* + * On x86, for compatibility, don't enforce the hard page-zero restriction for 32-bit binaries. + */ + if (!result->is_64bit_addr) { + enforce_hard_pagezero = FALSE; + } + + /* + * For processes with IMGPF_HIGH_BITS_ASLR, add a few random high bits + * to the start address for "anywhere" memory allocations. + */ +#define VM_MAP_HIGH_START_BITS_COUNT 8 +#define VM_MAP_HIGH_START_BITS_SHIFT 27 + if (result->is_64bit_addr && + (imgp->ip_flags & IMGPF_HIGH_BITS_ASLR)) { + int random_bits; + vm_map_offset_t high_start; + + random_bits = random(); + random_bits &= (1 << VM_MAP_HIGH_START_BITS_COUNT) - 1; + high_start = (((vm_map_offset_t)random_bits) + << VM_MAP_HIGH_START_BITS_SHIFT); + vm_map_set_high_start(map, high_start); + } +#endif /* __x86_64__ */ + + /* + * Check to see if the page zero is enforced by the map->min_offset. + */ + if (enforce_hard_pagezero && + (vm_map_has_hard_pagezero(map, 0x1000) == FALSE)) { +#if __arm64__ + if ( + !result->is_64bit_addr && /* not 64-bit address space */ + !(header->flags & MH_PIE) && /* not PIE */ + (vm_map_page_shift(map) != FOURK_PAGE_SHIFT || + PAGE_SHIFT != FOURK_PAGE_SHIFT) && /* page size != 4KB */ + result->has_pagezero && /* has a "soft" page zero */ + fourk_binary_compatibility_unsafe) { + /* + * For backwards compatibility of "4K" apps on + * a 16K system, do not enforce a hard page zero... + */ + } else +#endif /* __arm64__ */ + { + vm_map_deallocate(map); /* will lose pmap reference too */ + return LOAD_BADMACHO; + } + } + +#if __arm64__ + if (enforce_hard_pagezero && result->is_64bit_addr && (header->cputype == CPU_TYPE_ARM64)) { + /* 64 bit ARM binary must have "hard page zero" of 4GB to cover the lower 32 bit address space */ + if (vm_map_has_hard_pagezero(map, 0x100000000) == FALSE) { + vm_map_deallocate(map); /* will lose pmap reference too */ + return LOAD_BADMACHO; + } + } +#endif + + vm_commit_pagezero_status(map); + + /* + * If this is an exec, then we are going to destroy the old + * task, and it's correct to halt it; if it's spawn, the + * task is not yet running, and it makes no sense. + */ + if (in_exec) { + proc_t p = current_proc(); + /* + * Mark the task as halting and start the other + * threads towards terminating themselves. Then + * make sure any threads waiting for a process + * transition get informed that we are committed to + * this transition, and then finally complete the + * task halting (wait for threads and then cleanup + * task resources). + * + * NOTE: task_start_halt() makes sure that no new + * threads are created in the task during the transition. + * We need to mark the workqueue as exiting before we + * wait for threads to terminate (at the end of which + * we no longer have a prohibition on thread creation). + * + * Finally, clean up any lingering workqueue data structures + * that may have been left behind by the workqueue threads + * as they exited (and then clean up the work queue itself). + */ + kret = task_start_halt(task); + if (kret != KERN_SUCCESS) { + vm_map_deallocate(map); /* will lose pmap reference too */ + return LOAD_FAILURE; + } + proc_transcommit(p, 0); + workq_mark_exiting(p); + task_complete_halt(task); + workq_exit(p); + + /* + * Roll up accounting info to new task. The roll up is done after + * task_complete_halt to make sure the thread accounting info is + * rolled up to current_task. + */ + task_rollup_accounting_info(get_threadtask(thread), task); + } + *mapp = map; + +#if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) + /* + * arm64e plugin hosts currently run with JOP keys disabled, since they + * may need to run arm64 plugins. + */ + if (arm64e_plugin_host(imgp, result)) { + imgp->ip_flags |= IMGPF_NOJOP; + pmap_disable_user_jop(pmap); + } + +#if CONFIG_ROSETTA + /* Disable JOP keys if the Rosetta runtime being used isn't arm64e */ + if (result->is_rosetta && (imgp->ip_flags & IMGPF_NOJOP)) { + pmap_disable_user_jop(pmap); + } +#endif /* CONFIG_ROSETTA */ +#endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */ + + + return LOAD_SUCCESS; +} + +int macho_printf = 0; +#define MACHO_PRINTF(args) \ + do { \ + if (macho_printf) { \ + printf args; \ + } \ + } while (0) + + +static boolean_t +pie_required( + cpu_type_t exectype, + cpu_subtype_t execsubtype) +{ + switch (exectype) { + case CPU_TYPE_X86_64: + return FALSE; + case CPU_TYPE_ARM64: + return TRUE; + case CPU_TYPE_ARM: + switch (execsubtype) { + case CPU_SUBTYPE_ARM_V7K: + return TRUE; + } + break; + } + return FALSE; +} + +/* + * The file size of a mach-o file is limited to 32 bits; this is because + * this is the limit on the kalloc() of enough bytes for a mach_header and + * the contents of its sizeofcmds, which is currently constrained to 32 + * bits in the file format itself. We read into the kernel buffer the + * commands section, and then parse it in order to parse the mach-o file + * format load_command segment(s). We are only interested in a subset of + * the total set of possible commands. If "map"==VM_MAP_NULL or + * "thread"==THREAD_NULL, do not make permament VM modifications, + * just preflight the parse. + */ +static +load_return_t +parse_machfile( + struct vnode *vp, + vm_map_t map, + thread_t thread, + struct mach_header *header, + off_t file_offset, + off_t macho_size, + int depth, + int64_t aslr_offset, + int64_t dyld_aslr_offset, + load_result_t *result, + load_result_t *binresult, + struct image_params *imgp + ) +{ + uint32_t ncmds; + struct load_command *lcp; + struct dylinker_command *dlp = 0; + void * control; + load_return_t ret = LOAD_SUCCESS; + void * addr; + vm_size_t alloc_size, cmds_size; + size_t offset; + size_t oldoffset; /* for overflow check */ + int pass; + proc_t p = vfs_context_proc(imgp->ip_vfs_context); + int error; + int resid = 0; + int spawn = (imgp->ip_flags & IMGPF_SPAWN); + size_t mach_header_sz = sizeof(struct mach_header); + boolean_t abi64; + boolean_t got_code_signatures = FALSE; + boolean_t found_header_segment = FALSE; + boolean_t found_xhdr = FALSE; + boolean_t found_version_cmd = FALSE; + int64_t slide = 0; + boolean_t dyld_no_load_addr = FALSE; + boolean_t is_dyld = FALSE; + vm_map_offset_t effective_page_mask = PAGE_MASK; +#if __arm64__ + uint64_t pagezero_end = 0; + uint64_t executable_end = 0; + uint64_t writable_start = 0; + vm_map_size_t effective_page_size; + + effective_page_mask = vm_map_page_mask(map); + effective_page_size = vm_map_page_size(map); +#endif /* __arm64__ */ + + if (header->magic == MH_MAGIC_64 || + header->magic == MH_CIGAM_64) { + mach_header_sz = sizeof(struct mach_header_64); + } + + /* + * Break infinite recursion + */ + if (depth > 2) { + return LOAD_FAILURE; + } + + depth++; + + /* + * Set CS_NO_UNTRUSTED_HELPERS by default; load_dylinker and load_rosetta + * will unset it if necessary. + */ + if (depth == 1) { + result->csflags |= CS_NO_UNTRUSTED_HELPERS; + } + + /* + * Check to see if right machine type. + */ + if (((cpu_type_t)(header->cputype & ~CPU_ARCH_MASK) != (cpu_type() & ~CPU_ARCH_MASK)) + ) { + return LOAD_BADARCH; + } + + if (!grade_binary(header->cputype, + header->cpusubtype & ~CPU_SUBTYPE_MASK, + header->cpusubtype & CPU_SUBTYPE_MASK, TRUE)) { + return LOAD_BADARCH; + } + + abi64 = ((header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64); + + switch (header->filetype) { + case MH_EXECUTE: + if (depth != 1 && depth != 3) { + return LOAD_FAILURE; + } + if (header->flags & MH_DYLDLINK) { + /* Check properties of dynamic executables */ + if (!(header->flags & MH_PIE) && pie_required(header->cputype, header->cpusubtype & ~CPU_SUBTYPE_MASK)) { + return LOAD_FAILURE; + } + result->needs_dynlinker = TRUE; + } else if (header->cputype == CPU_TYPE_X86_64) { + /* x86_64 static binaries allowed */ +#if CONFIG_ROSETTA + } else if (imgp->ip_flags & IMGPF_ROSETTA) { + /* Rosetta runtime allowed */ +#endif /* CONFIG_X86_64_COMPAT */ + } else { + /* Check properties of static executables (disallowed except for development) */ +#if !(DEVELOPMENT || DEBUG) + return LOAD_FAILURE; +#endif + } + break; + case MH_DYLINKER: + if (depth != 2) { + return LOAD_FAILURE; + } + is_dyld = TRUE; + break; + + default: + return LOAD_FAILURE; + } + + /* + * For PIE and dyld, slide everything by the ASLR offset. + */ + if ((header->flags & MH_PIE) || is_dyld) { + slide = aslr_offset; + } + + /* + * Get the pager for the file. + */ + control = ubc_getobject(vp, UBC_FLAGS_NONE); + + /* ensure header + sizeofcmds falls within the file */ + if (os_add_overflow(mach_header_sz, header->sizeofcmds, &cmds_size) || + (off_t)cmds_size > macho_size || + round_page_overflow(cmds_size, &alloc_size) || + alloc_size > INT_MAX) { + return LOAD_BADMACHO; + } + + /* + * Map the load commands into kernel memory. + */ + addr = kalloc_data(alloc_size, Z_WAITOK); + if (addr == NULL) { + return LOAD_NOSPACE; + } + + error = vn_rdwr(UIO_READ, vp, addr, (int)alloc_size, file_offset, + UIO_SYSSPACE, 0, vfs_context_ucred(imgp->ip_vfs_context), &resid, p); + if (error) { + kfree_data(addr, alloc_size); + return LOAD_IOERROR; + } + + if (resid) { + { + /* We must be able to read in as much as the mach_header indicated */ + kfree_data(addr, alloc_size); + return LOAD_BADMACHO; + } + } + + /* + * Scan through the commands, processing each one as necessary. + * We parse in three passes through the headers: + * 0: determine if TEXT and DATA boundary can be page-aligned, load platform version + * 1: thread state, uuid, code signature + * 2: segments + * 3: dyld, encryption, check entry point + */ + + boolean_t slide_realign = FALSE; +#if __arm64__ + if (!abi64) { + slide_realign = TRUE; + } +#endif + + for (pass = 0; pass <= 3; pass++) { + if (pass == 1) { +#if __arm64__ + boolean_t is_pie; + int64_t adjust; + + is_pie = ((header->flags & MH_PIE) != 0); + if (pagezero_end != 0 && + pagezero_end < effective_page_size) { + /* need at least 1 page for PAGEZERO */ + adjust = effective_page_size; + MACHO_PRINTF(("pagezero boundary at " + "0x%llx; adjust slide from " + "0x%llx to 0x%llx%s\n", + (uint64_t) pagezero_end, + slide, + slide + adjust, + (is_pie + ? "" + : " BUT NO PIE ****** :-("))); + if (is_pie) { + slide += adjust; + pagezero_end += adjust; + executable_end += adjust; + writable_start += adjust; + } + } + if (pagezero_end != 0) { + result->has_pagezero = TRUE; + } + if (executable_end == writable_start && + (executable_end & effective_page_mask) != 0 && + (executable_end & FOURK_PAGE_MASK) == 0) { + /* + * The TEXT/DATA boundary is 4K-aligned but + * not page-aligned. Adjust the slide to make + * it page-aligned and avoid having a page + * with both write and execute permissions. + */ + adjust = + (effective_page_size - + (executable_end & effective_page_mask)); + MACHO_PRINTF(("page-unaligned X-W boundary at " + "0x%llx; adjust slide from " + "0x%llx to 0x%llx%s\n", + (uint64_t) executable_end, + slide, + slide + adjust, + (is_pie + ? "" + : " BUT NO PIE ****** :-("))); + if (is_pie) { + slide += adjust; + } + } +#endif /* __arm64__ */ + + if (dyld_no_load_addr && binresult) { + /* + * The dyld Mach-O does not specify a load address. Try to locate + * it right after the main binary. If binresult == NULL, load + * directly to the given slide. + */ + mach_vm_address_t max_vm_addr = binresult->max_vm_addr; + slide = vm_map_round_page(slide + max_vm_addr, effective_page_mask); + } + } + + /* + * Check that the entry point is contained in an executable segment + */ + if ((pass == 3) && (thread != THREAD_NULL)) { + if (depth == 1 && imgp && (imgp->ip_flags & IMGPF_DRIVER)) { + /* Driver binaries must have driverkit platform */ + if (result->ip_platform == PLATFORM_DRIVERKIT) { + /* Driver binaries have no entry point */ + ret = setup_driver_main(thread, slide, result); + } else { + ret = LOAD_FAILURE; + } + } else if (!result->using_lcmain && result->validentry == 0) { + ret = LOAD_FAILURE; + } + if (ret != KERN_SUCCESS) { + thread_state_initialize(thread); + break; + } + } + + /* + * Check that some segment maps the start of the mach-o file, which is + * needed by the dynamic loader to read the mach headers, etc. + */ + if ((pass == 3) && (found_header_segment == FALSE)) { + ret = LOAD_BADMACHO; + break; + } + + /* + * Loop through each of the load_commands indicated by the + * Mach-O header; if an absurd value is provided, we just + * run off the end of the reserved section by incrementing + * the offset too far, so we are implicitly fail-safe. + */ + offset = mach_header_sz; + ncmds = header->ncmds; + + while (ncmds--) { + /* ensure enough space for a minimal load command */ + if (offset + sizeof(struct load_command) > cmds_size) { + ret = LOAD_BADMACHO; + break; + } + + /* + * Get a pointer to the command. + */ + lcp = (struct load_command *)((uintptr_t)addr + offset); + oldoffset = offset; + + /* + * Perform prevalidation of the struct load_command + * before we attempt to use its contents. Invalid + * values are ones which result in an overflow, or + * which can not possibly be valid commands, or which + * straddle or exist past the reserved section at the + * start of the image. + */ + if (os_add_overflow(offset, lcp->cmdsize, &offset) || + lcp->cmdsize < sizeof(struct load_command) || + offset > cmds_size) { + ret = LOAD_BADMACHO; + break; + } + + /* + * Act on struct load_command's for which kernel + * intervention is required. + * Note that each load command implementation is expected to validate + * that lcp->cmdsize is large enough to fit its specific struct type + * before dereferencing fields not covered by struct load_command. + */ + switch (lcp->cmd) { + case LC_SEGMENT: { + struct segment_command *scp = (struct segment_command *) lcp; + if (scp->cmdsize < sizeof(*scp)) { + ret = LOAD_BADMACHO; + break; + } + if (pass == 0) { + if (is_dyld && scp->vmaddr == 0 && scp->fileoff == 0) { + dyld_no_load_addr = TRUE; + if (!slide_realign) { + /* got what we need, bail early on pass 0 */ + continue; + } + } + +#if __arm64__ + assert(!abi64); + + if (scp->initprot == 0 && scp->maxprot == 0 && scp->vmaddr == 0) { + /* PAGEZERO */ + if (os_add3_overflow(scp->vmaddr, scp->vmsize, slide, &pagezero_end) || pagezero_end > UINT32_MAX) { + ret = LOAD_BADMACHO; + break; + } + } + if (scp->initprot & VM_PROT_EXECUTE) { + /* TEXT */ + if (os_add3_overflow(scp->vmaddr, scp->vmsize, slide, &executable_end) || executable_end > UINT32_MAX) { + ret = LOAD_BADMACHO; + break; + } + } + if (scp->initprot & VM_PROT_WRITE) { + /* DATA */ + if (os_add_overflow(scp->vmaddr, slide, &writable_start) || writable_start > UINT32_MAX) { + ret = LOAD_BADMACHO; + break; + } + } +#endif /* __arm64__ */ + break; + } + + if (pass == 1 && !strncmp(scp->segname, "__XHDR", sizeof(scp->segname))) { + found_xhdr = TRUE; + } + + if (pass != 2) { + break; + } + + if (abi64) { + /* + * Having an LC_SEGMENT command for the + * wrong ABI is invalid + */ + ret = LOAD_BADMACHO; + break; + } + + ret = load_segment(lcp, + header->filetype, + control, + file_offset, + macho_size, + vp, + map, + slide, + result, + imgp); + if (ret == LOAD_SUCCESS && scp->fileoff == 0 && scp->filesize > 0) { + /* Enforce a single segment mapping offset zero, with R+X + * protection. */ + if (found_header_segment || + ((scp->initprot & (VM_PROT_READ | VM_PROT_EXECUTE)) != (VM_PROT_READ | VM_PROT_EXECUTE))) { + ret = LOAD_BADMACHO; + break; + } + found_header_segment = TRUE; + } + + break; + } + case LC_SEGMENT_64: { + struct segment_command_64 *scp64 = (struct segment_command_64 *) lcp; + if (scp64->cmdsize < sizeof(*scp64)) { + ret = LOAD_BADMACHO; + break; + } + if (pass == 0) { + if (is_dyld && scp64->vmaddr == 0 && scp64->fileoff == 0) { + dyld_no_load_addr = TRUE; + } + /* got what we need, bail early on pass 0 */ + continue; + } + + if (pass == 1 && !strncmp(scp64->segname, "__XHDR", sizeof(scp64->segname))) { + found_xhdr = TRUE; + } + + if (pass != 2) { + break; + } + + if (!abi64) { + /* + * Having an LC_SEGMENT_64 command for the + * wrong ABI is invalid + */ + ret = LOAD_BADMACHO; + break; + } + + ret = load_segment(lcp, + header->filetype, + control, + file_offset, + macho_size, + vp, + map, + slide, + result, + imgp); + + if (ret == LOAD_SUCCESS && scp64->fileoff == 0 && scp64->filesize > 0) { + /* Enforce a single segment mapping offset zero, with R+X + * protection. */ + if (found_header_segment || + ((scp64->initprot & (VM_PROT_READ | VM_PROT_EXECUTE)) != (VM_PROT_READ | VM_PROT_EXECUTE))) { + ret = LOAD_BADMACHO; + break; + } + found_header_segment = TRUE; + } + + break; + } + case LC_UNIXTHREAD: { + boolean_t is_x86_64_compat_binary = FALSE; + if (pass != 1) { + break; + } +#if CONFIG_ROSETTA + if (depth == 2 && (imgp->ip_flags & IMGPF_ROSETTA)) { + // Ignore dyld, Rosetta will parse it's load commands to get the + // entry point. + result->validentry = 1; + break; + } +#endif + ret = load_unixthread( + (struct thread_command *) lcp, + thread, + slide, + is_x86_64_compat_binary, + result); + break; + } + case LC_MAIN: + if (pass != 1) { + break; + } + if (depth != 1) { + break; + } + ret = load_main( + (struct entry_point_command *) lcp, + thread, + slide, + result); + break; + case LC_LOAD_DYLINKER: + if (pass != 3) { + break; + } + if ((depth == 1) && (dlp == 0)) { + dlp = (struct dylinker_command *)lcp; + } else { + ret = LOAD_FAILURE; + } + break; + case LC_UUID: + if (pass == 1 && depth == 1) { + ret = load_uuid((struct uuid_command *) lcp, + (char *)addr + cmds_size, + result); + } + break; + case LC_CODE_SIGNATURE: + /* CODE SIGNING */ + if (pass != 1) { + break; + } + + /* pager -> uip -> + * load signatures & store in uip + * set VM object "signed_pages" + */ + ret = load_code_signature( + (struct linkedit_data_command *) lcp, + vp, + file_offset, + macho_size, + header->cputype, + header->cpusubtype, + result, + imgp); + if (ret != LOAD_SUCCESS) { + printf("proc %d: load code signature error %d " + "for file \"%s\"\n", + proc_getpid(p), ret, vp->v_name); + /* + * Allow injections to be ignored on devices w/o enforcement enabled + */ + if (!cs_process_global_enforcement()) { + ret = LOAD_SUCCESS; /* ignore error */ + } + } else { + got_code_signatures = TRUE; + } + + if (got_code_signatures) { + unsigned tainted = CS_VALIDATE_TAINTED; + boolean_t valid = FALSE; + vm_size_t off = 0; + + + if (cs_debug > 10) { + printf("validating initial pages of %s\n", vp->v_name); + } + + while (off < alloc_size && ret == LOAD_SUCCESS) { + tainted = CS_VALIDATE_TAINTED; + + valid = cs_validate_range(vp, + NULL, + file_offset + off, + (const void *)((uintptr_t)addr + off), + MIN(PAGE_SIZE, cmds_size), + &tainted); + if (!valid || (tainted & CS_VALIDATE_TAINTED)) { + if (cs_debug) { + printf("CODE SIGNING: %s[%d]: invalid initial page at offset %lld validated:%d tainted:%d csflags:0x%x\n", + vp->v_name, proc_getpid(p), (long long)(file_offset + off), valid, tainted, result->csflags); + } + if (cs_process_global_enforcement() || + (result->csflags & (CS_HARD | CS_KILL | CS_ENFORCEMENT))) { + ret = LOAD_FAILURE; + } + result->csflags &= ~CS_VALID; + } + off += PAGE_SIZE; + } + } + + break; +#if CONFIG_CODE_DECRYPTION + case LC_ENCRYPTION_INFO: + case LC_ENCRYPTION_INFO_64: + if (pass != 3) { + break; + } + ret = set_code_unprotect( + (struct encryption_info_command *) lcp, + addr, map, slide, vp, file_offset, + header->cputype, header->cpusubtype); + if (ret != LOAD_SUCCESS) { + os_reason_t load_failure_reason = OS_REASON_NULL; + printf("proc %d: set_code_unprotect() error %d " + "for file \"%s\"\n", + proc_getpid(p), ret, vp->v_name); + /* + * Don't let the app run if it's + * encrypted but we failed to set up the + * decrypter. If the keys are missing it will + * return LOAD_DECRYPTFAIL. + */ + if (ret == LOAD_DECRYPTFAIL) { + /* failed to load due to missing FP keys */ + proc_lock(p); + p->p_lflag |= P_LTERM_DECRYPTFAIL; + proc_unlock(p); + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, + proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_FAIRPLAY_DECRYPT, 0, 0); + load_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_FAIRPLAY_DECRYPT); + } else { + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, + proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_DECRYPT, 0, 0); + load_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_DECRYPT); + } + + /* + * Don't signal the process if it was forked and in a partially constructed + * state as part of a spawn -- it will just be torn down when the exec fails. + */ + if (!spawn) { + assert(load_failure_reason != OS_REASON_NULL); + { + psignal_with_reason(current_proc(), SIGKILL, load_failure_reason); + load_failure_reason = OS_REASON_NULL; + } + } else { + os_reason_free(load_failure_reason); + load_failure_reason = OS_REASON_NULL; + } + } + break; +#endif + case LC_VERSION_MIN_IPHONEOS: + case LC_VERSION_MIN_MACOSX: + case LC_VERSION_MIN_WATCHOS: + case LC_VERSION_MIN_TVOS: { + struct version_min_command *vmc; + + if (depth != 1 || pass != 0) { + break; + } + vmc = (struct version_min_command *) lcp; + ret = load_version(vmc, &found_version_cmd, imgp, result); +#if XNU_TARGET_OS_OSX + if (ret == LOAD_SUCCESS) { + if (result->ip_platform == PLATFORM_IOS) { + vm_map_mark_alien(map); + } else { + assert(!vm_map_is_alien(map)); + } + } +#endif /* XNU_TARGET_OS_OSX */ + break; + } + case LC_BUILD_VERSION: { + if (depth != 1 || pass != 0) { + break; + } + struct build_version_command* bvc = (struct build_version_command*)lcp; + if (bvc->cmdsize < sizeof(*bvc)) { + ret = LOAD_BADMACHO; + break; + } + if (found_version_cmd == TRUE) { + ret = LOAD_BADMACHO; + break; + } + result->ip_platform = bvc->platform; + result->lr_sdk = bvc->sdk; + result->lr_min_sdk = bvc->minos; + found_version_cmd = TRUE; +#if XNU_TARGET_OS_OSX + if (result->ip_platform == PLATFORM_IOS) { + vm_map_mark_alien(map); + } else { + assert(!vm_map_is_alien(map)); + } +#endif /* XNU_TARGET_OS_OSX */ + break; + } + default: + /* Other commands are ignored by the kernel */ + ret = LOAD_SUCCESS; + break; + } + if (ret != LOAD_SUCCESS) { + break; + } + } + if (ret != LOAD_SUCCESS) { + break; + } + } + + if (ret == LOAD_SUCCESS) { + if (!got_code_signatures && cs_process_global_enforcement()) { + ret = LOAD_FAILURE; + } + + /* Make sure if we need dyld, we got it */ + if (result->needs_dynlinker && !dlp) { + ret = LOAD_FAILURE; + } + + if ((ret == LOAD_SUCCESS) && (dlp != 0)) { + /* + * load the dylinker, and slide it by the independent DYLD ASLR + * offset regardless of the PIE-ness of the main binary. + */ + ret = load_dylinker(dlp, header->cputype, map, thread, depth, + dyld_aslr_offset, result, imgp); + } + +#if CONFIG_ROSETTA + if ((ret == LOAD_SUCCESS) && (depth == 1) && (imgp->ip_flags & IMGPF_ROSETTA)) { + ret = load_rosetta(map, thread, result, imgp); + if (ret == LOAD_SUCCESS) { + if (result->user_stack_alloc_size != 0) { + // If a stack allocation is required then add a 4gb gap after the main + // binary/dyld for the worst case static translation size. + mach_vm_size_t reserved_aot_size = 0x100000000; + vm_map_offset_t mask = vm_map_page_mask(map); + + mach_vm_address_t vm_end; + if (dlp != 0) { + vm_end = vm_map_round_page(result->dynlinker_max_vm_addr, mask); + } else { + vm_end = vm_map_round_page(result->max_vm_addr, mask); + } + + mach_vm_size_t user_stack_size = vm_map_round_page(result->user_stack_alloc_size, mask); + result->user_stack = vm_map_round_page(vm_end + user_stack_size + reserved_aot_size + slide, mask); + } + } + } +#endif + + if ((ret == LOAD_SUCCESS) && (depth == 1)) { + if (result->thread_count == 0) { + ret = LOAD_FAILURE; + } +#if CONFIG_ENFORCE_SIGNED_CODE + if (!(result->csflags & CS_NO_UNTRUSTED_HELPERS)) { + ret = LOAD_FAILURE; + } +#endif + } + } + + if (ret == LOAD_BADMACHO && found_xhdr) { + ret = LOAD_BADMACHO_UPX; + } + + kfree_data(addr, alloc_size); + + return ret; +} + +load_return_t +validate_potential_simulator_binary( + cpu_type_t exectype __unused, + struct image_params *imgp __unused, + off_t file_offset __unused, + off_t macho_size __unused) +{ +#if __x86_64__ + /* Allow 32 bit exec only for simulator binaries */ + if (bootarg_no32exec && imgp != NULL && exectype == CPU_TYPE_X86) { + if (imgp->ip_simulator_binary == IMGPF_SB_DEFAULT) { + boolean_t simulator_binary = check_if_simulator_binary(imgp, file_offset, macho_size); + imgp->ip_simulator_binary = simulator_binary ? IMGPF_SB_TRUE : IMGPF_SB_FALSE; + } + + if (imgp->ip_simulator_binary != IMGPF_SB_TRUE) { + return LOAD_BADARCH; + } + } +#endif + return LOAD_SUCCESS; +} + +#if __x86_64__ +static boolean_t +check_if_simulator_binary( + struct image_params *imgp, + off_t file_offset, + off_t macho_size) +{ + struct mach_header *header; + char *ip_vdata = NULL; + kauth_cred_t cred = NULL; + uint32_t ncmds; + struct load_command *lcp; + boolean_t simulator_binary = FALSE; + void * addr = NULL; + vm_size_t alloc_size, cmds_size; + size_t offset; + proc_t p = current_proc(); /* XXXX */ + int error; + int resid = 0; + size_t mach_header_sz = sizeof(struct mach_header); + + + cred = kauth_cred_proc_ref(p); + + /* Allocate page to copyin mach header */ + ip_vdata = kalloc_data(PAGE_SIZE, Z_WAITOK | Z_ZERO); + if (ip_vdata == NULL) { + goto bad; + } + + /* Read the Mach-O header */ + error = vn_rdwr(UIO_READ, imgp->ip_vp, ip_vdata, + PAGE_SIZE, file_offset, + UIO_SYSSPACE, (IO_UNIT | IO_NODELOCKED), + cred, &resid, p); + if (error) { + goto bad; + } + + header = (struct mach_header *)ip_vdata; + + if (header->magic == MH_MAGIC_64 || + header->magic == MH_CIGAM_64) { + mach_header_sz = sizeof(struct mach_header_64); + } + + /* ensure header + sizeofcmds falls within the file */ + if (os_add_overflow(mach_header_sz, header->sizeofcmds, &cmds_size) || + (off_t)cmds_size > macho_size || + round_page_overflow(cmds_size, &alloc_size) || + alloc_size > INT_MAX) { + goto bad; + } + + /* + * Map the load commands into kernel memory. + */ + addr = kalloc_data(alloc_size, Z_WAITOK); + if (addr == NULL) { + goto bad; + } + + error = vn_rdwr(UIO_READ, imgp->ip_vp, addr, (int)alloc_size, file_offset, + UIO_SYSSPACE, IO_NODELOCKED, cred, &resid, p); + if (error) { + goto bad; + } + + if (resid) { + /* We must be able to read in as much as the mach_header indicated */ + goto bad; + } + + /* + * Loop through each of the load_commands indicated by the + * Mach-O header; if an absurd value is provided, we just + * run off the end of the reserved section by incrementing + * the offset too far, so we are implicitly fail-safe. + */ + offset = mach_header_sz; + ncmds = header->ncmds; + + while (ncmds--) { + /* ensure enough space for a minimal load command */ + if (offset + sizeof(struct load_command) > cmds_size) { + break; + } + + /* + * Get a pointer to the command. + */ + lcp = (struct load_command *)((uintptr_t)addr + offset); + + /* + * Perform prevalidation of the struct load_command + * before we attempt to use its contents. Invalid + * values are ones which result in an overflow, or + * which can not possibly be valid commands, or which + * straddle or exist past the reserved section at the + * start of the image. + */ + if (os_add_overflow(offset, lcp->cmdsize, &offset) || + lcp->cmdsize < sizeof(struct load_command) || + offset > cmds_size) { + break; + } + + /* Check if its a simulator binary. */ + switch (lcp->cmd) { + case LC_VERSION_MIN_WATCHOS: + simulator_binary = TRUE; + break; + + case LC_BUILD_VERSION: { + struct build_version_command *bvc; + + bvc = (struct build_version_command *) lcp; + if (bvc->cmdsize < sizeof(*bvc)) { + /* unsafe to use this command struct if cmdsize + * validated above is too small for it to fit */ + break; + } + if (bvc->platform == PLATFORM_IOSSIMULATOR || + bvc->platform == PLATFORM_WATCHOSSIMULATOR) { + simulator_binary = TRUE; + } + + break; + } + + case LC_VERSION_MIN_IPHONEOS: { + simulator_binary = TRUE; + break; + } + + default: + /* ignore other load commands */ + break; + } + + if (simulator_binary == TRUE) { + break; + } + } + +bad: + if (ip_vdata) { + kfree_data(ip_vdata, PAGE_SIZE); + } + + if (cred) { + kauth_cred_unref(&cred); + } + + if (addr) { + kfree_data(addr, alloc_size); + } + + return simulator_binary; +} +#endif /* __x86_64__ */ + +#if CONFIG_CODE_DECRYPTION + +#define APPLE_UNPROTECTED_HEADER_SIZE (3 * 4096) + +static load_return_t +unprotect_dsmos_segment( + uint64_t file_off, + uint64_t file_size, + struct vnode *vp, + off_t macho_offset, + vm_map_t map, + vm_map_offset_t map_addr, + vm_map_size_t map_size) +{ + kern_return_t kr; + uint64_t slice_off; + + /* + * The first APPLE_UNPROTECTED_HEADER_SIZE bytes (from offset 0 of + * this part of a Universal binary) are not protected... + * The rest needs to be "transformed". + */ + slice_off = file_off - macho_offset; + if (slice_off <= APPLE_UNPROTECTED_HEADER_SIZE && + slice_off + file_size <= APPLE_UNPROTECTED_HEADER_SIZE) { + /* it's all unprotected, nothing to do... */ + kr = KERN_SUCCESS; + } else { + if (slice_off <= APPLE_UNPROTECTED_HEADER_SIZE) { + /* + * We start mapping in the unprotected area. + * Skip the unprotected part... + */ + uint64_t delta_file; + vm_map_offset_t delta_map; + + delta_file = (uint64_t)APPLE_UNPROTECTED_HEADER_SIZE; + delta_file -= slice_off; + if (os_convert_overflow(delta_file, &delta_map)) { + return LOAD_BADMACHO; + } + if (os_add_overflow(map_addr, delta_map, &map_addr)) { + return LOAD_BADMACHO; + } + if (os_sub_overflow(map_size, delta_map, &map_size)) { + return LOAD_BADMACHO; + } + } + /* ... transform the rest of the mapping. */ + struct pager_crypt_info crypt_info; + crypt_info.page_decrypt = dsmos_page_transform; + crypt_info.crypt_ops = NULL; + crypt_info.crypt_end = NULL; +#pragma unused(vp, macho_offset) + crypt_info.crypt_ops = (void *)0x2e69cf40; + vm_map_offset_t crypto_backing_offset; + crypto_backing_offset = -1; /* i.e. use map entry's offset */ +#if VM_MAP_DEBUG_APPLE_PROTECT + if (vm_map_debug_apple_protect) { + struct proc *p; + p = current_proc(); + printf("APPLE_PROTECT: %d[%s] map %p " + "[0x%llx:0x%llx] %s(%s)\n", + proc_getpid(p), p->p_comm, map, + (uint64_t) map_addr, + (uint64_t) (map_addr + map_size), + __FUNCTION__, vp->v_name); + } +#endif /* VM_MAP_DEBUG_APPLE_PROTECT */ + + /* The DSMOS pager can only be used by apple signed code */ + struct cs_blob * blob = csvnode_get_blob(vp, file_off); + if (blob == NULL || !blob->csb_platform_binary || blob->csb_platform_path) { + return LOAD_FAILURE; + } + + kr = vm_map_apple_protected(map, + map_addr, + map_addr + map_size, + crypto_backing_offset, + &crypt_info, + CRYPTID_APP_ENCRYPTION); + } + + if (kr != KERN_SUCCESS) { + return LOAD_FAILURE; + } + return LOAD_SUCCESS; +} +#else /* CONFIG_CODE_DECRYPTION */ +static load_return_t +unprotect_dsmos_segment( + __unused uint64_t file_off, + __unused uint64_t file_size, + __unused struct vnode *vp, + __unused off_t macho_offset, + __unused vm_map_t map, + __unused vm_map_offset_t map_addr, + __unused vm_map_size_t map_size) +{ + return LOAD_SUCCESS; +} +#endif /* CONFIG_CODE_DECRYPTION */ + + +/* + * map_segment: + * Maps a Mach-O segment, taking care of mis-alignment (wrt the system + * page size) issues. + * + * The mapping might result in 1, 2 or 3 map entries: + * 1. for the first page, which could be overlap with the previous + * mapping, + * 2. for the center (if applicable), + * 3. for the last page, which could overlap with the next mapping. + * + * For each of those map entries, we might have to interpose a + * "fourk_pager" to deal with mis-alignment wrt the system page size, + * either in the mapping address and/or size or the file offset and/or + * size. + * The "fourk_pager" itself would be mapped with proper alignment + * wrt the system page size and would then be populated with the + * information about the intended mapping, with a "4KB" granularity. + */ +static kern_return_t +map_segment( + vm_map_t map, + vm_map_offset_t vm_start, + vm_map_offset_t vm_end, + memory_object_control_t control, + vm_map_offset_t file_start, + vm_map_offset_t file_end, + vm_prot_t initprot, + vm_prot_t maxprot, + load_result_t *result) +{ + vm_map_offset_t cur_offset, cur_start, cur_end; + kern_return_t ret; + vm_map_offset_t effective_page_mask; + vm_map_kernel_flags_t vmk_flags, cur_vmk_flags; + + if (vm_end < vm_start || + file_end < file_start) { + return LOAD_BADMACHO; + } + if (vm_end == vm_start || + file_end == file_start) { + /* nothing to map... */ + return LOAD_SUCCESS; + } + + effective_page_mask = vm_map_page_mask(map); + + vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(); + if (vm_map_page_aligned(vm_start, effective_page_mask) && + vm_map_page_aligned(vm_end, effective_page_mask) && + vm_map_page_aligned(file_start, effective_page_mask) && + vm_map_page_aligned(file_end, effective_page_mask)) { + /* all page-aligned and map-aligned: proceed */ + } else { +#if __arm64__ + /* use an intermediate "4K" pager */ + vmk_flags.vmkf_fourk = TRUE; +#else /* __arm64__ */ + panic("map_segment: unexpected mis-alignment " + "vm[0x%llx:0x%llx] file[0x%llx:0x%llx]\n", + (uint64_t) vm_start, + (uint64_t) vm_end, + (uint64_t) file_start, + (uint64_t) file_end); +#endif /* __arm64__ */ + } + + cur_offset = 0; + cur_start = vm_start; + cur_end = vm_start; +#if __arm64__ + if (!vm_map_page_aligned(vm_start, effective_page_mask)) { + /* one 4K pager for the 1st page */ + cur_end = vm_map_round_page(cur_start, effective_page_mask); + if (cur_end > vm_end) { + cur_end = vm_start + (file_end - file_start); + } + if (control != MEMORY_OBJECT_CONTROL_NULL) { + /* no copy-on-read for mapped binaries */ + vmk_flags.vmkf_no_copy_on_read = 1; + ret = vm_map_enter_mem_object_control( + map, + &cur_start, + cur_end - cur_start, + (mach_vm_offset_t)0, + vmk_flags, + control, + file_start + cur_offset, + TRUE, /* copy */ + initprot, maxprot, + VM_INHERIT_DEFAULT); + } else { + ret = vm_map_enter_mem_object( + map, + &cur_start, + cur_end - cur_start, + (mach_vm_offset_t)0, + vmk_flags, + IPC_PORT_NULL, + 0, /* offset */ + TRUE, /* copy */ + initprot, maxprot, + VM_INHERIT_DEFAULT); + } + if (ret != KERN_SUCCESS) { + return LOAD_NOSPACE; + } + cur_offset += cur_end - cur_start; + } +#endif /* __arm64__ */ + if (cur_end >= vm_start + (file_end - file_start)) { + /* all mapped: done */ + goto done; + } + if (vm_map_round_page(cur_end, effective_page_mask) >= + vm_map_trunc_page(vm_start + (file_end - file_start), + effective_page_mask)) { + /* no middle */ + } else { + cur_start = cur_end; + if ((vm_start & effective_page_mask) != + (file_start & effective_page_mask)) { + /* one 4K pager for the middle */ + cur_vmk_flags = vmk_flags; + } else { + /* regular mapping for the middle */ + cur_vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(); + } + +#if !defined(XNU_TARGET_OS_OSX) + (void) result; +#else /* !defined(XNU_TARGET_OS_OSX) */ + /* + * This process doesn't have its new csflags (from + * the image being loaded) yet, so tell VM to override the + * current process's CS_ENFORCEMENT for this mapping. + */ + if (result->csflags & CS_ENFORCEMENT) { + cur_vmk_flags.vmkf_cs_enforcement = TRUE; + } else { + cur_vmk_flags.vmkf_cs_enforcement = FALSE; + } + cur_vmk_flags.vmkf_cs_enforcement_override = TRUE; +#endif /* !defined(XNU_TARGET_OS_OSX) */ + + if (result->is_rosetta && (initprot & VM_PROT_EXECUTE) == VM_PROT_EXECUTE) { + cur_vmk_flags.vmkf_translated_allow_execute = TRUE; + } + + cur_end = vm_map_trunc_page(vm_start + (file_end - + file_start), + effective_page_mask); + if (control != MEMORY_OBJECT_CONTROL_NULL) { + /* no copy-on-read for mapped binaries */ + cur_vmk_flags.vmkf_no_copy_on_read = 1; + ret = vm_map_enter_mem_object_control( + map, + &cur_start, + cur_end - cur_start, + (mach_vm_offset_t)0, + cur_vmk_flags, + control, + file_start + cur_offset, + TRUE, /* copy */ + initprot, maxprot, + VM_INHERIT_DEFAULT); + } else { + ret = vm_map_enter_mem_object( + map, + &cur_start, + cur_end - cur_start, + (mach_vm_offset_t)0, + cur_vmk_flags, + IPC_PORT_NULL, + 0, /* offset */ + TRUE, /* copy */ + initprot, maxprot, + VM_INHERIT_DEFAULT); + } + if (ret != KERN_SUCCESS) { + return LOAD_NOSPACE; + } + cur_offset += cur_end - cur_start; + } + if (cur_end >= vm_start + (file_end - file_start)) { + /* all mapped: done */ + goto done; + } + cur_start = cur_end; +#if __arm64__ + if (!vm_map_page_aligned(vm_start + (file_end - file_start), + effective_page_mask)) { + /* one 4K pager for the last page */ + cur_end = vm_start + (file_end - file_start); + if (control != MEMORY_OBJECT_CONTROL_NULL) { + /* no copy-on-read for mapped binaries */ + vmk_flags.vmkf_no_copy_on_read = 1; + ret = vm_map_enter_mem_object_control( + map, + &cur_start, + cur_end - cur_start, + (mach_vm_offset_t)0, + vmk_flags, + control, + file_start + cur_offset, + TRUE, /* copy */ + initprot, maxprot, + VM_INHERIT_DEFAULT); + } else { + ret = vm_map_enter_mem_object( + map, + &cur_start, + cur_end - cur_start, + (mach_vm_offset_t)0, + vmk_flags, + IPC_PORT_NULL, + 0, /* offset */ + TRUE, /* copy */ + initprot, maxprot, + VM_INHERIT_DEFAULT); + } + if (ret != KERN_SUCCESS) { + return LOAD_NOSPACE; + } + cur_offset += cur_end - cur_start; + } +#endif /* __arm64__ */ +done: + assert(cur_end >= vm_start + (file_end - file_start)); + return LOAD_SUCCESS; +} + +static +load_return_t +load_segment( + struct load_command *lcp, + uint32_t filetype, + void * control, + off_t pager_offset, + off_t macho_size, + struct vnode *vp, + vm_map_t map, + int64_t slide, + load_result_t *result, + struct image_params *imgp) +{ + struct segment_command_64 segment_command, *scp; + kern_return_t ret; + vm_map_size_t delta_size; + vm_prot_t initprot; + vm_prot_t maxprot; + size_t segment_command_size, total_section_size, + single_section_size; + uint64_t file_offset, file_size; + vm_map_offset_t vm_offset; + size_t vm_size; + vm_map_offset_t vm_start, vm_end, vm_end_aligned; + vm_map_offset_t file_start, file_end; + kern_return_t kr; + boolean_t verbose; + vm_map_size_t effective_page_size; + vm_map_offset_t effective_page_mask; +#if __arm64__ + boolean_t fourk_align; +#endif /* __arm64__ */ + + (void)imgp; + + effective_page_size = vm_map_page_size(map); + effective_page_mask = vm_map_page_mask(map); + + verbose = FALSE; + if (LC_SEGMENT_64 == lcp->cmd) { + segment_command_size = sizeof(struct segment_command_64); + single_section_size = sizeof(struct section_64); +#if __arm64__ + /* 64-bit binary: should already be 16K-aligned */ + fourk_align = FALSE; + + if (vm_map_page_shift(map) == FOURK_PAGE_SHIFT && + PAGE_SHIFT != FOURK_PAGE_SHIFT) { + fourk_align = TRUE; + verbose = TRUE; + } +#endif /* __arm64__ */ + } else { + segment_command_size = sizeof(struct segment_command); + single_section_size = sizeof(struct section); +#if __arm64__ + /* 32-bit binary: might need 4K-alignment */ + if (effective_page_size != FOURK_PAGE_SIZE) { + /* not using 4K page size: need fourk_pager */ + fourk_align = TRUE; + verbose = TRUE; + } else { + /* using 4K page size: no need for re-alignment */ + fourk_align = FALSE; + } +#endif /* __arm64__ */ + } + if (lcp->cmdsize < segment_command_size) { + DEBUG4K_ERROR("LOAD_BADMACHO cmdsize %d < %zu\n", lcp->cmdsize, segment_command_size); + return LOAD_BADMACHO; + } + total_section_size = lcp->cmdsize - segment_command_size; + + if (LC_SEGMENT_64 == lcp->cmd) { + scp = (struct segment_command_64 *)lcp; + } else { + scp = &segment_command; + widen_segment_command((struct segment_command *)lcp, scp); + } + + if (verbose) { + MACHO_PRINTF(("+++ load_segment %s " + "vm[0x%llx:0x%llx] file[0x%llx:0x%llx] " + "prot %d/%d flags 0x%x\n", + scp->segname, + (uint64_t)(slide + scp->vmaddr), + (uint64_t)(slide + scp->vmaddr + scp->vmsize), + pager_offset + scp->fileoff, + pager_offset + scp->fileoff + scp->filesize, + scp->initprot, + scp->maxprot, + scp->flags)); + } + + /* + * Make sure what we get from the file is really ours (as specified + * by macho_size). + */ + if (scp->fileoff + scp->filesize < scp->fileoff || + scp->fileoff + scp->filesize > (uint64_t)macho_size) { + DEBUG4K_ERROR("LOAD_BADMACHO fileoff 0x%llx filesize 0x%llx macho_size 0x%llx\n", scp->fileoff, scp->filesize, (uint64_t)macho_size); + return LOAD_BADMACHO; + } + /* + * Ensure that the number of sections specified would fit + * within the load command size. + */ + if (total_section_size / single_section_size < scp->nsects) { + DEBUG4K_ERROR("LOAD_BADMACHO 0x%zx 0x%zx %d\n", total_section_size, single_section_size, scp->nsects); + return LOAD_BADMACHO; + } + /* + * Make sure the segment is page-aligned in the file. + */ + if (os_add_overflow(pager_offset, scp->fileoff, &file_offset)) { + DEBUG4K_ERROR("LOAD_BADMACHO file_offset: 0x%llx + 0x%llx\n", pager_offset, scp->fileoff); + return LOAD_BADMACHO; + } + file_size = scp->filesize; +#if __arm64__ + if (fourk_align) { + if ((file_offset & FOURK_PAGE_MASK) != 0) { + /* + * we can't mmap() it if it's not at least 4KB-aligned + * in the file + */ + DEBUG4K_ERROR("LOAD_BADMACHO file_offset 0x%llx\n", file_offset); + return LOAD_BADMACHO; + } + } else +#endif /* __arm64__ */ + if ((file_offset & PAGE_MASK_64) != 0 || + /* we can't mmap() it if it's not page-aligned in the file */ + (file_offset & vm_map_page_mask(map)) != 0) { + /* + * The 1st test would have failed if the system's page size + * was what this process believe is the page size, so let's + * fail here too for the sake of consistency. + */ + DEBUG4K_ERROR("LOAD_BADMACHO file_offset 0x%llx\n", file_offset); + return LOAD_BADMACHO; + } + + /* + * If we have a code signature attached for this slice + * require that the segments are within the signed part + * of the file. + */ + if (result->cs_end_offset && + result->cs_end_offset < (off_t)scp->fileoff && + result->cs_end_offset - scp->fileoff < scp->filesize) { + if (cs_debug) { + printf("section outside code signature\n"); + } + DEBUG4K_ERROR("LOAD_BADMACHO end_offset 0x%llx fileoff 0x%llx filesize 0x%llx\n", result->cs_end_offset, scp->fileoff, scp->filesize); + return LOAD_BADMACHO; + } + + if (os_add_overflow(scp->vmaddr, slide, &vm_offset)) { + if (cs_debug) { + printf("vmaddr too large\n"); + } + DEBUG4K_ERROR("LOAD_BADMACHO vmaddr 0x%llx slide 0x%llx vm_offset 0x%llx\n", scp->vmaddr, slide, (uint64_t)vm_offset); + return LOAD_BADMACHO; + } + + if (scp->vmsize > SIZE_MAX) { + DEBUG4K_ERROR("LOAD_BADMACHO vmsize 0x%llx\n", scp->vmsize); + return LOAD_BADMACHO; + } + + vm_size = (size_t)scp->vmsize; + + if (vm_size == 0) { + return LOAD_SUCCESS; + } + if (scp->vmaddr == 0 && + file_size == 0 && + vm_size != 0 && + (scp->initprot & VM_PROT_ALL) == VM_PROT_NONE && + (scp->maxprot & VM_PROT_ALL) == VM_PROT_NONE) { + if (map == VM_MAP_NULL) { + return LOAD_SUCCESS; + } + + /* + * For PIE, extend page zero rather than moving it. Extending + * page zero keeps early allocations from falling predictably + * between the end of page zero and the beginning of the first + * slid segment. + */ + /* + * This is a "page zero" segment: it starts at address 0, + * is not mapped from the binary file and is not accessible. + * User-space should never be able to access that memory, so + * make it completely off limits by raising the VM map's + * minimum offset. + */ + vm_end = (vm_map_offset_t)(vm_offset + vm_size); + if (vm_end < vm_offset) { + DEBUG4K_ERROR("LOAD_BADMACHO vm_end 0x%llx vm_offset 0x%llx vm_size 0x%llx\n", (uint64_t)vm_end, (uint64_t)vm_offset, (uint64_t)vm_size); + return LOAD_BADMACHO; + } + + if (verbose) { + MACHO_PRINTF(("++++++ load_segment: " + "page_zero up to 0x%llx\n", + (uint64_t) vm_end)); + } +#if __arm64__ + if (fourk_align) { + /* raise min_offset as much as page-alignment allows */ + vm_end_aligned = vm_map_trunc_page(vm_end, + effective_page_mask); + } else +#endif /* __arm64__ */ + { + vm_end = vm_map_round_page(vm_end, + PAGE_MASK_64); + vm_end_aligned = vm_end; + } + ret = vm_map_raise_min_offset(map, + vm_end_aligned); +#if __arm64__ + if (ret == 0 && + vm_end > vm_end_aligned) { + /* use fourk_pager to map the rest of pagezero */ + assert(fourk_align); + ret = vm_map_enter_mem_object( + map, + &vm_end_aligned, + vm_end - vm_end_aligned, + (mach_vm_offset_t) 0, /* mask */ + VM_MAP_KERNEL_FLAGS_FIXED(.vmkf_fourk = true), + IPC_PORT_NULL, + 0, + FALSE, /* copy */ + (scp->initprot & VM_PROT_ALL), + (scp->maxprot & VM_PROT_ALL), + VM_INHERIT_DEFAULT); + } +#endif /* __arm64__ */ + + if (ret != KERN_SUCCESS) { + DEBUG4K_ERROR("LOAD_FAILURE ret 0x%x\n", ret); + return LOAD_FAILURE; + } + return LOAD_SUCCESS; + } else { +#if !defined(XNU_TARGET_OS_OSX) + /* not PAGEZERO: should not be mapped at address 0 */ + if (filetype != MH_DYLINKER && (imgp->ip_flags & IMGPF_ROSETTA) == 0 && scp->vmaddr == 0) { + DEBUG4K_ERROR("LOAD_BADMACHO filetype %d vmaddr 0x%llx\n", filetype, scp->vmaddr); + return LOAD_BADMACHO; + } +#endif /* !defined(XNU_TARGET_OS_OSX) */ + } + +#if __arm64__ + if (fourk_align) { + /* 4K-align */ + file_start = vm_map_trunc_page(file_offset, + FOURK_PAGE_MASK); + file_end = vm_map_round_page(file_offset + file_size, + FOURK_PAGE_MASK); + vm_start = vm_map_trunc_page(vm_offset, + FOURK_PAGE_MASK); + vm_end = vm_map_round_page(vm_offset + vm_size, + FOURK_PAGE_MASK); + + if (file_offset - file_start > FOURK_PAGE_MASK || + file_end - file_offset - file_size > FOURK_PAGE_MASK) { + DEBUG4K_ERROR("LOAD_BADMACHO file_start / file_size wrap " + "[0x%llx:0x%llx] -> [0x%llx:0x%llx]\n", + file_offset, + file_offset + file_size, + (uint64_t) file_start, + (uint64_t) file_end); + return LOAD_BADMACHO; + } + + if (!strncmp(scp->segname, "__LINKEDIT", 11) && + page_aligned(file_start) && + vm_map_page_aligned(file_start, vm_map_page_mask(map)) && + page_aligned(vm_start) && + vm_map_page_aligned(vm_start, vm_map_page_mask(map))) { + /* XXX last segment: ignore mis-aligned tail */ + file_end = vm_map_round_page(file_end, + effective_page_mask); + vm_end = vm_map_round_page(vm_end, + effective_page_mask); + } + } else +#endif /* __arm64__ */ + { + file_start = vm_map_trunc_page(file_offset, + effective_page_mask); + file_end = vm_map_round_page(file_offset + file_size, + effective_page_mask); + vm_start = vm_map_trunc_page(vm_offset, + effective_page_mask); + vm_end = vm_map_round_page(vm_offset + vm_size, + effective_page_mask); + + if (file_offset - file_start > effective_page_mask || + file_end - file_offset - file_size > effective_page_mask) { + DEBUG4K_ERROR("LOAD_BADMACHO file_start / file_size wrap " + "[0x%llx:0x%llx] -> [0x%llx:0x%llx]\n", + file_offset, + file_offset + file_size, + (uint64_t) file_start, + (uint64_t) file_end); + return LOAD_BADMACHO; + } + } + + if (vm_start < result->min_vm_addr) { + result->min_vm_addr = vm_start; + } + if (vm_end > result->max_vm_addr) { + result->max_vm_addr = vm_end; + } + + if (map == VM_MAP_NULL) { + return LOAD_SUCCESS; + } + + if (scp->flags & SG_READ_ONLY) { + /* + * Record the VM start/end of a segment which should + * be RO after fixups. Only __DATA_CONST should + * have this flag. + */ + if (result->ro_vm_start != MACH_VM_MIN_ADDRESS || + result->ro_vm_end != MACH_VM_MIN_ADDRESS) { + DEBUG4K_ERROR("LOAD_BADMACHO segment flags [%x] " + "multiple segments with SG_READ_ONLY flag\n", + scp->flags); + return LOAD_BADMACHO; + } + + result->ro_vm_start = vm_start; + result->ro_vm_end = vm_end; + } + + if (vm_size > 0) { + initprot = (scp->initprot) & VM_PROT_ALL; + maxprot = (scp->maxprot) & VM_PROT_ALL; + /* + * Map a copy of the file into the address space. + */ + if (verbose) { + MACHO_PRINTF(("++++++ load_segment: " + "mapping at vm [0x%llx:0x%llx] of " + "file [0x%llx:0x%llx]\n", + (uint64_t) vm_start, + (uint64_t) vm_end, + (uint64_t) file_start, + (uint64_t) file_end)); + } + ret = map_segment(map, + vm_start, + vm_end, + control, + file_start, + file_end, + initprot, + maxprot, + result); + if (ret) { + DEBUG4K_ERROR("LOAD_NOSPACE start 0x%llx end 0x%llx ret 0x%x\n", (uint64_t)vm_start, (uint64_t)vm_end, ret); + return LOAD_NOSPACE; + } + +#if FIXME + /* + * If the file didn't end on a page boundary, + * we need to zero the leftover. + */ + delta_size = map_size - scp->filesize; + if (delta_size > 0) { + void *tmp = kalloc_data(delta_size, Z_WAITOK | Z_ZERO); + int rc; + + if (tmp == NULL) { + DEBUG4K_ERROR("LOAD_RESOURCE delta_size 0x%llx ret 0x%x\n", delta_size, ret); + return LOAD_RESOURCE; + } + + rc = copyout(tmp, map_addr + scp->filesize, delta_size); + kfree_data(tmp, delta_size); + + if (rc) { + DEBUG4K_ERROR("LOAD_FAILURE copyout 0x%llx 0x%llx\n", map_addr + scp->filesize, delta_size); + return LOAD_FAILURE; + } + } +#endif /* FIXME */ + } + + /* + * If the virtual size of the segment is greater + * than the size from the file, we need to allocate + * zero fill memory for the rest. + */ + if ((vm_end - vm_start) > (file_end - file_start)) { + delta_size = (vm_end - vm_start) - (file_end - file_start); + } else { + delta_size = 0; + } + if (delta_size > 0) { + vm_map_offset_t tmp_start; + vm_map_offset_t tmp_end; + + if (os_add_overflow(vm_start, file_end - file_start, &tmp_start)) { + DEBUG4K_ERROR("LOAD_NOSPACE tmp_start: 0x%llx + 0x%llx\n", (uint64_t)vm_start, (uint64_t)(file_end - file_start)); + return LOAD_NOSPACE; + } + + if (os_add_overflow(tmp_start, delta_size, &tmp_end)) { + DEBUG4K_ERROR("LOAD_NOSPACE tmp_end: 0x%llx + 0x%llx\n", (uint64_t)tmp_start, (uint64_t)delta_size); + return LOAD_NOSPACE; + } + + if (verbose) { + MACHO_PRINTF(("++++++ load_segment: " + "delta mapping vm [0x%llx:0x%llx]\n", + (uint64_t) tmp_start, + (uint64_t) tmp_end)); + } + kr = map_segment(map, + tmp_start, + tmp_end, + MEMORY_OBJECT_CONTROL_NULL, + 0, + delta_size, + scp->initprot, + scp->maxprot, + result); + if (kr != KERN_SUCCESS) { + DEBUG4K_ERROR("LOAD_NOSPACE 0x%llx 0x%llx kr 0x%x\n", (unsigned long long)tmp_start, (uint64_t)delta_size, kr); + return LOAD_NOSPACE; + } + } + + if ((scp->fileoff == 0) && (scp->filesize != 0)) { + result->mach_header = vm_offset; + } + + if (scp->flags & SG_PROTECTED_VERSION_1) { + ret = unprotect_dsmos_segment(file_start, + file_end - file_start, + vp, + pager_offset, + map, + vm_start, + vm_end - vm_start); + if (ret != LOAD_SUCCESS) { + DEBUG4K_ERROR("unprotect 0x%llx 0x%llx ret %d \n", (uint64_t)vm_start, (uint64_t)vm_end, ret); + return ret; + } + } else { + ret = LOAD_SUCCESS; + } + + if (LOAD_SUCCESS == ret && + filetype == MH_DYLINKER && + result->all_image_info_addr == MACH_VM_MIN_ADDRESS) { + note_all_image_info_section(scp, + LC_SEGMENT_64 == lcp->cmd, + single_section_size, + ((const char *)lcp + + segment_command_size), + slide, + result); + } + + if (result->entry_point != MACH_VM_MIN_ADDRESS) { + if ((result->entry_point >= vm_offset) && (result->entry_point < (vm_offset + vm_size))) { + if ((scp->initprot & (VM_PROT_READ | VM_PROT_EXECUTE)) == (VM_PROT_READ | VM_PROT_EXECUTE)) { + result->validentry = 1; + } else { + /* right range but wrong protections, unset if previously validated */ + result->validentry = 0; + } + } + } + + if (ret != LOAD_SUCCESS && verbose) { + DEBUG4K_ERROR("ret %d\n", ret); + } + return ret; +} + +static +load_return_t +load_uuid( + struct uuid_command *uulp, + char *command_end, + load_result_t *result + ) +{ + /* + * We need to check the following for this command: + * - The command size should be atleast the size of struct uuid_command + * - The UUID part of the command should be completely within the mach-o header + */ + + if ((uulp->cmdsize < sizeof(struct uuid_command)) || + (((char *)uulp + sizeof(struct uuid_command)) > command_end)) { + return LOAD_BADMACHO; + } + + memcpy(&result->uuid[0], &uulp->uuid[0], sizeof(result->uuid)); + return LOAD_SUCCESS; +} + +static +load_return_t +load_version( + struct version_min_command *vmc, + boolean_t *found_version_cmd, + struct image_params *imgp __unused, + load_result_t *result + ) +{ + uint32_t platform = 0; + uint32_t sdk; + uint32_t min_sdk; + + if (vmc->cmdsize < sizeof(*vmc)) { + return LOAD_BADMACHO; + } + if (*found_version_cmd == TRUE) { + return LOAD_BADMACHO; + } + *found_version_cmd = TRUE; + sdk = vmc->sdk; + min_sdk = vmc->version; + switch (vmc->cmd) { + case LC_VERSION_MIN_MACOSX: + platform = PLATFORM_MACOS; + break; +#if __x86_64__ /* __x86_64__ */ + case LC_VERSION_MIN_IPHONEOS: + platform = PLATFORM_IOSSIMULATOR; + break; + case LC_VERSION_MIN_WATCHOS: + platform = PLATFORM_WATCHOSSIMULATOR; + break; + case LC_VERSION_MIN_TVOS: + platform = PLATFORM_TVOSSIMULATOR; + break; +#else + case LC_VERSION_MIN_IPHONEOS: { +#if __arm64__ + if (vmc->sdk < (12 << 16)) { + /* app built with a pre-iOS12 SDK: apply legacy footprint mitigation */ + result->legacy_footprint = TRUE; + } +#endif /* __arm64__ */ + platform = PLATFORM_IOS; + break; + } + case LC_VERSION_MIN_WATCHOS: + platform = PLATFORM_WATCHOS; + break; + case LC_VERSION_MIN_TVOS: + platform = PLATFORM_TVOS; + break; +#endif /* __x86_64__ */ + /* All LC_VERSION_MIN_* load commands are legacy and we will not be adding any more */ + default: + sdk = (uint32_t)-1; + min_sdk = (uint32_t)-1; + __builtin_unreachable(); + } + result->ip_platform = platform; + result->lr_min_sdk = min_sdk; + result->lr_sdk = sdk; + return LOAD_SUCCESS; +} + +static +load_return_t +load_main( + struct entry_point_command *epc, + thread_t thread, + int64_t slide, + load_result_t *result + ) +{ + mach_vm_offset_t addr; + kern_return_t ret; + + if (epc->cmdsize < sizeof(*epc)) { + return LOAD_BADMACHO; + } + if (result->thread_count != 0) { + return LOAD_FAILURE; + } + + if (thread == THREAD_NULL) { + return LOAD_SUCCESS; + } + + /* + * LC_MAIN specifies stack size but not location. + * Add guard page to allocation size (MAXSSIZ includes guard page). + */ + if (epc->stacksize) { + if (os_add_overflow(epc->stacksize, 4 * PAGE_SIZE, &result->user_stack_size)) { + /* + * We are going to immediately throw away this result, but we want + * to make sure we aren't loading a dangerously close to + * overflowing value, since this will have a guard page added to it + * and be rounded to page boundaries + */ + return LOAD_BADMACHO; + } + result->user_stack_size = epc->stacksize; + if (os_add_overflow(epc->stacksize, PAGE_SIZE, &result->user_stack_alloc_size)) { + return LOAD_BADMACHO; + } + result->custom_stack = TRUE; + } else { + result->user_stack_alloc_size = MAXSSIZ; + } + + /* use default location for stack */ + ret = thread_userstackdefault(&addr, result->is_64bit_addr); + if (ret != KERN_SUCCESS) { + return LOAD_FAILURE; + } + + /* The stack slides down from the default location */ + result->user_stack = (user_addr_t)mach_vm_trunc_page((user_addr_t)addr - slide); + + if (result->using_lcmain || result->entry_point != MACH_VM_MIN_ADDRESS) { + /* Already processed LC_MAIN or LC_UNIXTHREAD */ + return LOAD_FAILURE; + } + + /* kernel does *not* use entryoff from LC_MAIN. Dyld uses it. */ + result->needs_dynlinker = TRUE; + result->using_lcmain = TRUE; + + ret = thread_state_initialize( thread ); + if (ret != KERN_SUCCESS) { + return LOAD_FAILURE; + } + + result->unixproc = TRUE; + result->thread_count++; + + return LOAD_SUCCESS; +} + +static +load_return_t +setup_driver_main( + thread_t thread, + int64_t slide, + load_result_t *result + ) +{ + mach_vm_offset_t addr; + kern_return_t ret; + + /* Driver binaries have no LC_MAIN, use defaults */ + + if (thread == THREAD_NULL) { + return LOAD_SUCCESS; + } + + result->user_stack_alloc_size = MAXSSIZ; + + /* use default location for stack */ + ret = thread_userstackdefault(&addr, result->is_64bit_addr); + if (ret != KERN_SUCCESS) { + return LOAD_FAILURE; + } + + /* The stack slides down from the default location */ + result->user_stack = (user_addr_t)addr; + result->user_stack -= slide; + + if (result->using_lcmain || result->entry_point != MACH_VM_MIN_ADDRESS) { + /* Already processed LC_MAIN or LC_UNIXTHREAD */ + return LOAD_FAILURE; + } + + result->needs_dynlinker = TRUE; + + ret = thread_state_initialize( thread ); + if (ret != KERN_SUCCESS) { + return LOAD_FAILURE; + } + + result->unixproc = TRUE; + result->thread_count++; + + return LOAD_SUCCESS; +} + +static +load_return_t +load_unixthread( + struct thread_command *tcp, + thread_t thread, + int64_t slide, + boolean_t is_x86_64_compat_binary, + load_result_t *result + ) +{ + load_return_t ret; + int customstack = 0; + mach_vm_offset_t addr; + if (tcp->cmdsize < sizeof(*tcp)) { + return LOAD_BADMACHO; + } + if (result->thread_count != 0) { + return LOAD_FAILURE; + } + + if (thread == THREAD_NULL) { + return LOAD_SUCCESS; + } + + ret = load_threadstack(thread, + (uint32_t *)(((vm_offset_t)tcp) + + sizeof(struct thread_command)), + tcp->cmdsize - sizeof(struct thread_command), + &addr, &customstack, is_x86_64_compat_binary, result); + if (ret != LOAD_SUCCESS) { + return ret; + } + + /* LC_UNIXTHREAD optionally specifies stack size and location */ + + if (customstack) { + result->custom_stack = TRUE; + } else { + result->user_stack_alloc_size = MAXSSIZ; + } + + /* The stack slides down from the default location */ + result->user_stack = (user_addr_t)mach_vm_trunc_page((user_addr_t)addr - slide); + + { + ret = load_threadentry(thread, + (uint32_t *)(((vm_offset_t)tcp) + + sizeof(struct thread_command)), + tcp->cmdsize - sizeof(struct thread_command), + &addr); + if (ret != LOAD_SUCCESS) { + return ret; + } + + if (result->using_lcmain || result->entry_point != MACH_VM_MIN_ADDRESS) { + /* Already processed LC_MAIN or LC_UNIXTHREAD */ + return LOAD_FAILURE; + } + + result->entry_point = (user_addr_t)addr; + result->entry_point += slide; + + ret = load_threadstate(thread, + (uint32_t *)(((vm_offset_t)tcp) + sizeof(struct thread_command)), + tcp->cmdsize - sizeof(struct thread_command), + result); + if (ret != LOAD_SUCCESS) { + return ret; + } + } + + result->unixproc = TRUE; + result->thread_count++; + + return LOAD_SUCCESS; +} + +static +load_return_t +load_threadstate( + thread_t thread, + uint32_t *ts, + uint32_t total_size, + load_result_t *result + ) +{ + uint32_t size; + int flavor; + uint32_t thread_size; + uint32_t *local_ts = NULL; + uint32_t local_ts_size = 0; + int ret; + + (void)thread; + + if (total_size > 0) { + local_ts_size = total_size; + local_ts = (uint32_t *)kalloc_data(local_ts_size, Z_WAITOK); + if (local_ts == NULL) { + return LOAD_FAILURE; + } + memcpy(local_ts, ts, local_ts_size); + ts = local_ts; + } + + /* + * Validate the new thread state; iterate through the state flavors in + * the Mach-O file. + * XXX: we should validate the machine state here, to avoid failing at + * activation time where we can't bail out cleanly. + */ + while (total_size > 0) { + if (total_size < 2 * sizeof(uint32_t)) { + return LOAD_BADMACHO; + } + + flavor = *ts++; + size = *ts++; + + if (os_add_and_mul_overflow(size, 2, sizeof(uint32_t), &thread_size) || + os_sub_overflow(total_size, thread_size, &total_size)) { + ret = LOAD_BADMACHO; + goto bad; + } + + ts += size; /* ts is a (uint32_t *) */ + } + + result->threadstate = local_ts; + result->threadstate_sz = local_ts_size; + return LOAD_SUCCESS; + +bad: + if (local_ts) { + kfree_data(local_ts, local_ts_size); + } + return ret; +} + + +static +load_return_t +load_threadstack( + thread_t thread, + uint32_t *ts, + uint32_t total_size, + mach_vm_offset_t *user_stack, + int *customstack, + __unused boolean_t is_x86_64_compat_binary, + load_result_t *result + ) +{ + kern_return_t ret; + uint32_t size; + int flavor; + uint32_t stack_size; + + if (total_size == 0) { + return LOAD_BADMACHO; + } + + while (total_size > 0) { + if (total_size < 2 * sizeof(uint32_t)) { + return LOAD_BADMACHO; + } + + flavor = *ts++; + size = *ts++; + if (UINT32_MAX - 2 < size || + UINT32_MAX / sizeof(uint32_t) < size + 2) { + return LOAD_BADMACHO; + } + stack_size = (size + 2) * sizeof(uint32_t); + if (stack_size > total_size) { + return LOAD_BADMACHO; + } + total_size -= stack_size; + + /* + * Third argument is a kernel space pointer; it gets cast + * to the appropriate type in thread_userstack() based on + * the value of flavor. + */ + { + ret = thread_userstack(thread, flavor, (thread_state_t)ts, size, user_stack, customstack, result->is_64bit_data); + if (ret != KERN_SUCCESS) { + return LOAD_FAILURE; + } + } + + ts += size; /* ts is a (uint32_t *) */ + } + return LOAD_SUCCESS; +} + +static +load_return_t +load_threadentry( + thread_t thread, + uint32_t *ts, + uint32_t total_size, + mach_vm_offset_t *entry_point + ) +{ + kern_return_t ret; + uint32_t size; + int flavor; + uint32_t entry_size; + + /* + * Set the thread state. + */ + *entry_point = MACH_VM_MIN_ADDRESS; + while (total_size > 0) { + if (total_size < 2 * sizeof(uint32_t)) { + return LOAD_BADMACHO; + } + + flavor = *ts++; + size = *ts++; + if (UINT32_MAX - 2 < size || + UINT32_MAX / sizeof(uint32_t) < size + 2) { + return LOAD_BADMACHO; + } + entry_size = (size + 2) * sizeof(uint32_t); + if (entry_size > total_size) { + return LOAD_BADMACHO; + } + total_size -= entry_size; + /* + * Third argument is a kernel space pointer; it gets cast + * to the appropriate type in thread_entrypoint() based on + * the value of flavor. + */ + ret = thread_entrypoint(thread, flavor, (thread_state_t)ts, size, entry_point); + if (ret != KERN_SUCCESS) { + return LOAD_FAILURE; + } + ts += size; /* ts is a (uint32_t *) */ + } + return LOAD_SUCCESS; +} + +struct macho_data { + struct nameidata __nid; + union macho_vnode_header { + struct mach_header mach_header; + struct fat_header fat_header; + char __pad[512]; + } __header; +}; + +#define DEFAULT_DYLD_PATH "/usr/lib/dyld" + +#if (DEVELOPMENT || DEBUG) +extern char dyld_alt_path[]; +extern int use_alt_dyld; + +extern char dyld_suffix[]; +extern int use_dyld_suffix; + +typedef struct _dyld_suffix_map_entry { + const char *suffix; + const char *path; +} dyld_suffix_map_entry_t; + +static const dyld_suffix_map_entry_t _dyld_suffix_map[] = { + [0] = { + .suffix = "", + .path = DEFAULT_DYLD_PATH, + }, { + .suffix = "release", + .path = DEFAULT_DYLD_PATH, + }, { + .suffix = "bringup", + .path = "/usr/appleinternal/lib/dyld.bringup", + }, +}; +#endif + +static load_return_t +load_dylinker( + struct dylinker_command *lcp, + cpu_type_t cputype, + vm_map_t map, + thread_t thread, + int depth, + int64_t slide, + load_result_t *result, + struct image_params *imgp + ) +{ + const char *name; + struct vnode *vp = NULLVP; /* set by get_macho_vnode() */ + struct mach_header *header; + off_t file_offset = 0; /* set by get_macho_vnode() */ + off_t macho_size = 0; /* set by get_macho_vnode() */ + load_result_t *myresult; + kern_return_t ret; + struct macho_data *macho_data; + struct { + struct mach_header __header; + load_result_t __myresult; + struct macho_data __macho_data; + } *dyld_data; + + if (lcp->cmdsize < sizeof(*lcp) || lcp->name.offset >= lcp->cmdsize) { + return LOAD_BADMACHO; + } + + name = (const char *)lcp + lcp->name.offset; + + /* Check for a proper null terminated string. */ + size_t maxsz = lcp->cmdsize - lcp->name.offset; + size_t namelen = strnlen(name, maxsz); + if (namelen >= maxsz) { + return LOAD_BADMACHO; + } + +#if (DEVELOPMENT || DEBUG) + + /* + * rdar://23680808 + * If an alternate dyld has been specified via boot args, check + * to see if PROC_UUID_ALT_DYLD_POLICY has been set on this + * executable and redirect the kernel to load that linker. + */ + + if (use_alt_dyld) { + int policy_error; + uint32_t policy_flags = 0; + int32_t policy_gencount = 0; + + policy_error = proc_uuid_policy_lookup(result->uuid, &policy_flags, &policy_gencount); + if (policy_error == 0) { + if (policy_flags & PROC_UUID_ALT_DYLD_POLICY) { + name = dyld_alt_path; + } + } + } else if (use_dyld_suffix) { + size_t i = 0; + +#define countof(x) (sizeof(x) / sizeof(x[0])) + for (i = 0; i < countof(_dyld_suffix_map); i++) { + const dyld_suffix_map_entry_t *entry = &_dyld_suffix_map[i]; + + if (strcmp(entry->suffix, dyld_suffix) == 0) { + name = entry->path; + break; + } + } + } +#endif + +#if !(DEVELOPMENT || DEBUG) + if (0 != strcmp(name, DEFAULT_DYLD_PATH)) { + return LOAD_BADMACHO; + } +#endif + + /* Allocate wad-of-data from heap to reduce excessively deep stacks */ + + dyld_data = kalloc_type(typeof(*dyld_data), Z_WAITOK); + header = &dyld_data->__header; + myresult = &dyld_data->__myresult; + macho_data = &dyld_data->__macho_data; + + { + cputype = (cputype & CPU_ARCH_MASK) | (cpu_type() & ~CPU_ARCH_MASK); + } + + ret = get_macho_vnode(name, cputype, header, + &file_offset, &macho_size, macho_data, &vp, imgp); + if (ret) { + goto novp_out; + } + + *myresult = load_result_null; + myresult->is_64bit_addr = result->is_64bit_addr; + myresult->is_64bit_data = result->is_64bit_data; + + ret = parse_machfile(vp, map, thread, header, file_offset, + macho_size, depth, slide, 0, myresult, result, imgp); + + if (ret == LOAD_SUCCESS) { + if (result->threadstate) { + /* don't use the app's threadstate if we have a dyld */ + kfree_data(result->threadstate, result->threadstate_sz); + } + result->threadstate = myresult->threadstate; + result->threadstate_sz = myresult->threadstate_sz; + + result->dynlinker = TRUE; + result->entry_point = myresult->entry_point; + result->validentry = myresult->validentry; + result->all_image_info_addr = myresult->all_image_info_addr; + result->all_image_info_size = myresult->all_image_info_size; + if (!myresult->platform_binary) { + result->csflags &= ~CS_NO_UNTRUSTED_HELPERS; + } + +#if CONFIG_ROSETTA + if (imgp->ip_flags & IMGPF_ROSETTA) { + extern const struct fileops vnops; + // Save the file descriptor and mach header address for dyld. These will + // be passed on the stack for the Rosetta runtime's use. + struct fileproc *fp; + int dyld_fd; + proc_t p = vfs_context_proc(imgp->ip_vfs_context); + int error = falloc(p, &fp, &dyld_fd, imgp->ip_vfs_context); + if (error == 0) { + error = VNOP_OPEN(vp, FREAD, imgp->ip_vfs_context); + if (error == 0) { + fp->fp_glob->fg_flag = FREAD; + fp->fp_glob->fg_ops = &vnops; + fp_set_data(fp, vp); + + proc_fdlock(p); + procfdtbl_releasefd(p, dyld_fd, NULL); + fp_drop(p, dyld_fd, fp, 1); + proc_fdunlock(p); + + vnode_ref(vp); + + result->dynlinker_fd = dyld_fd; + result->dynlinker_fp = fp; + result->dynlinker_mach_header = myresult->mach_header; + result->dynlinker_max_vm_addr = myresult->max_vm_addr; + result->dynlinker_ro_vm_start = myresult->ro_vm_start; + result->dynlinker_ro_vm_end = myresult->ro_vm_end; + } else { + fp_free(p, dyld_fd, fp); + ret = LOAD_IOERROR; + } + } else { + ret = LOAD_IOERROR; + } + } +#endif + } + + struct vnode_attr *va; + va = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO); + VATTR_INIT(va); + VATTR_WANTED(va, va_fsid64); + VATTR_WANTED(va, va_fsid); + VATTR_WANTED(va, va_fileid); + int error = vnode_getattr(vp, va, imgp->ip_vfs_context); + if (error == 0) { + imgp->ip_dyld_fsid = vnode_get_va_fsid(va); + imgp->ip_dyld_fsobjid = va->va_fileid; + } + + vnode_put(vp); + kfree_type(struct vnode_attr, va); +novp_out: + kfree_type(typeof(*dyld_data), dyld_data); + return ret; +} + +#if CONFIG_ROSETTA +static const char* rosetta_runtime_path = "/usr/libexec/rosetta/runtime"; + +#if (DEVELOPMENT || DEBUG) +static const char* rosetta_runtime_path_alt_x86 = "/usr/local/libexec/rosetta/runtime_internal"; +static const char* rosetta_runtime_path_alt_arm = "/usr/local/libexec/rosetta/runtime_arm_internal"; +#endif + +static load_return_t +load_rosetta( + vm_map_t map, + thread_t thread, + load_result_t *result, + struct image_params *imgp) +{ + struct vnode *vp = NULLVP; /* set by get_macho_vnode() */ + struct mach_header *header; + off_t file_offset = 0; /* set by get_macho_vnode() */ + off_t macho_size = 0; /* set by get_macho_vnode() */ + load_result_t *myresult; + kern_return_t ret; + struct macho_data *macho_data; + const char *rosetta_file_path; + struct { + struct mach_header __header; + load_result_t __myresult; + struct macho_data __macho_data; + } *rosetta_data; + mach_vm_address_t rosetta_load_addr; + mach_vm_size_t rosetta_size; + mach_vm_address_t shared_cache_base = SHARED_REGION_BASE_ARM64; + int64_t slide = 0; + + /* Allocate wad-of-data from heap to reduce excessively deep stacks */ + rosetta_data = kalloc_type(typeof(*rosetta_data), Z_WAITOK | Z_NOFAIL); + header = &rosetta_data->__header; + myresult = &rosetta_data->__myresult; + macho_data = &rosetta_data->__macho_data; + + rosetta_file_path = rosetta_runtime_path; + +#if (DEVELOPMENT || DEBUG) + bool use_alt_rosetta = false; + if (imgp->ip_flags & IMGPF_ALT_ROSETTA) { + use_alt_rosetta = true; + } else { + int policy_error; + uint32_t policy_flags = 0; + int32_t policy_gencount = 0; + policy_error = proc_uuid_policy_lookup(result->uuid, &policy_flags, &policy_gencount); + if (policy_error == 0 && (policy_flags & PROC_UUID_ALT_ROSETTA_POLICY) != 0) { + use_alt_rosetta = true; + } + } + + if (use_alt_rosetta) { + if (imgp->ip_origcputype == CPU_TYPE_X86_64) { + rosetta_file_path = rosetta_runtime_path_alt_x86; + } else if (imgp->ip_origcputype == CPU_TYPE_ARM64) { + rosetta_file_path = rosetta_runtime_path_alt_arm; + } else { + ret = LOAD_BADARCH; + goto novp_out; + } + } +#endif + + ret = get_macho_vnode(rosetta_file_path, CPU_TYPE_ARM64, header, + &file_offset, &macho_size, macho_data, &vp, imgp); + if (ret) { + goto novp_out; + } + + *myresult = load_result_null; + myresult->is_64bit_addr = TRUE; + myresult->is_64bit_data = TRUE; + + ret = parse_machfile(vp, NULL, NULL, header, file_offset, macho_size, + 2, 0, 0, myresult, NULL, imgp); + if (ret != LOAD_SUCCESS) { + goto out; + } + + if (!(imgp->ip_flags & IMGPF_DISABLE_ASLR)) { + slide = random(); + slide = (slide % (vm_map_get_max_loader_aslr_slide_pages(map) - 1)) + 1; + slide <<= vm_map_page_shift(map); + } + + if (imgp->ip_origcputype == CPU_TYPE_X86_64) { + shared_cache_base = SHARED_REGION_BASE_X86_64; + } + + rosetta_size = round_page(myresult->max_vm_addr - myresult->min_vm_addr); + rosetta_load_addr = shared_cache_base - rosetta_size - slide; + + *myresult = load_result_null; + myresult->is_64bit_addr = TRUE; + myresult->is_64bit_data = TRUE; + myresult->is_rosetta = TRUE; + + ret = parse_machfile(vp, map, thread, header, file_offset, macho_size, + 2, rosetta_load_addr, 0, myresult, result, imgp); + if (ret == LOAD_SUCCESS) { + if (result) { + if (result->threadstate) { + /* don't use the app's/dyld's threadstate */ + kfree_data(result->threadstate, result->threadstate_sz); + } + assert(myresult->threadstate != NULL); + + result->is_rosetta = TRUE; + + result->threadstate = myresult->threadstate; + result->threadstate_sz = myresult->threadstate_sz; + + result->entry_point = myresult->entry_point; + result->validentry = myresult->validentry; + if (!myresult->platform_binary) { + result->csflags &= ~CS_NO_UNTRUSTED_HELPERS; + } + + if ((header->cpusubtype & ~CPU_SUBTYPE_MASK) != CPU_SUBTYPE_ARM64E) { + imgp->ip_flags |= IMGPF_NOJOP; + } + } + } + +out: + vnode_put(vp); +novp_out: + kfree_type(typeof(*rosetta_data), rosetta_data); + return ret; +} +#endif + +static void +set_signature_error( + struct vnode* vp, + struct image_params * imgp, + const char* fatal_failure_desc, + const size_t fatal_failure_desc_len) +{ + char *vn_path = NULL; + vm_size_t vn_pathlen = MAXPATHLEN; + char const *path = NULL; + + vn_path = zalloc(ZV_NAMEI); + if (vn_getpath(vp, vn_path, (int*)&vn_pathlen) == 0) { + path = vn_path; + } else { + path = "(get vnode path failed)"; + } + os_reason_t reason = os_reason_create(OS_REASON_CODESIGNING, + CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG); + + if (reason == OS_REASON_NULL) { + printf("load_code_signature: %s: failure to allocate exit reason for validation failure: %s\n", + path, fatal_failure_desc); + goto out; + } + + imgp->ip_cs_error = reason; + reason->osr_flags = (OS_REASON_FLAG_GENERATE_CRASH_REPORT | + OS_REASON_FLAG_CONSISTENT_FAILURE); + + mach_vm_address_t data_addr = 0; + + int reason_error = 0; + int kcdata_error = 0; + + if ((reason_error = os_reason_alloc_buffer_noblock(reason, kcdata_estimate_required_buffer_size + (1, (uint32_t)fatal_failure_desc_len))) == 0 && + (kcdata_error = kcdata_get_memory_addr(&reason->osr_kcd_descriptor, + EXIT_REASON_USER_DESC, (uint32_t)fatal_failure_desc_len, + &data_addr)) == KERN_SUCCESS) { + kern_return_t mc_error = kcdata_memcpy(&reason->osr_kcd_descriptor, (mach_vm_address_t)data_addr, + fatal_failure_desc, (uint32_t)fatal_failure_desc_len); + + if (mc_error != KERN_SUCCESS) { + printf("load_code_signature: %s: failed to copy reason string " + "(kcdata_memcpy error: %d, length: %ld)\n", + path, mc_error, fatal_failure_desc_len); + } + } else { + printf("load_code_signature: %s: failed to allocate space for reason string " + "(os_reason_alloc_buffer error: %d, kcdata error: %d, length: %ld)\n", + path, reason_error, kcdata_error, fatal_failure_desc_len); + } +out: + if (vn_path) { + zfree(ZV_NAMEI, vn_path); + } +} + +static load_return_t +load_code_signature( + struct linkedit_data_command *lcp, + struct vnode *vp, + off_t macho_offset, + off_t macho_size, + cpu_type_t cputype, + cpu_subtype_t cpusubtype, + load_result_t *result, + struct image_params *imgp) +{ + int ret; + kern_return_t kr; + vm_offset_t addr; + int resid; + struct cs_blob *blob; + int error; + vm_size_t blob_size; + uint32_t sum; + boolean_t anyCPU; + + addr = 0; + blob = NULL; + + cpusubtype &= ~CPU_SUBTYPE_MASK; + + blob = ubc_cs_blob_get(vp, cputype, cpusubtype, macho_offset); + + if (blob != NULL) { + /* we already have a blob for this vnode and cpu(sub)type */ + anyCPU = blob->csb_cpu_type == -1; + if ((blob->csb_cpu_type != cputype && + blob->csb_cpu_subtype != cpusubtype && !anyCPU) || + (blob->csb_base_offset != macho_offset) || + ((blob->csb_flags & CS_VALID) == 0)) { + /* the blob has changed for this vnode: fail ! */ + ret = LOAD_BADMACHO; + const char* fatal_failure_desc = "embedded signature doesn't match attached signature"; + const size_t fatal_failure_desc_len = strlen(fatal_failure_desc) + 1; + + printf("load_code_signature: %s\n", fatal_failure_desc); + set_signature_error(vp, imgp, fatal_failure_desc, fatal_failure_desc_len); + goto out; + } + + /* It matches the blob we want here, let's verify the version */ + if (!anyCPU && ubc_cs_generation_check(vp) == 0) { + /* No need to revalidate, we're good! */ + ret = LOAD_SUCCESS; + goto out; + } + + /* That blob may be stale, let's revalidate. */ + error = ubc_cs_blob_revalidate(vp, blob, imgp, 0, result->ip_platform); + if (error == 0) { + /* Revalidation succeeded, we're good! */ + /* If we were revaliding a CS blob with any CPU arch we adjust it */ + if (anyCPU) { + vnode_lock_spin(vp); + struct cs_cpu_info cpu_info = { + .csb_cpu_type = cputype, + .csb_cpu_subtype = cpusubtype + }; + zalloc_ro_update_field(ZONE_ID_CS_BLOB, blob, csb_cpu_info, &cpu_info); + vnode_unlock(vp); + } + ret = LOAD_SUCCESS; + goto out; + } + + if (error != EAGAIN) { + printf("load_code_signature: revalidation failed: %d\n", error); + ret = LOAD_FAILURE; + goto out; + } + + assert(error == EAGAIN); + + /* + * Revalidation was not possible for this blob. We just continue as if there was no blob, + * rereading the signature, and ubc_cs_blob_add will do the right thing. + */ + blob = NULL; + } + + if (lcp->cmdsize != sizeof(struct linkedit_data_command)) { + ret = LOAD_BADMACHO; + goto out; + } + + sum = 0; + if (os_add_overflow(lcp->dataoff, lcp->datasize, &sum) || sum > macho_size) { + ret = LOAD_BADMACHO; + goto out; + } + + blob_size = lcp->datasize; + kr = ubc_cs_blob_allocate(&addr, &blob_size); + if (kr != KERN_SUCCESS) { + ret = LOAD_NOSPACE; + goto out; + } + + resid = 0; + error = vn_rdwr(UIO_READ, + vp, + (caddr_t) addr, + lcp->datasize, + macho_offset + lcp->dataoff, + UIO_SYSSPACE, + 0, + kauth_cred_get(), + &resid, + current_proc()); + if (error || resid != 0) { + ret = LOAD_IOERROR; + goto out; + } + + if (ubc_cs_blob_add(vp, + result->ip_platform, + cputype, + cpusubtype, + macho_offset, + &addr, + lcp->datasize, + imgp, + 0, + &blob)) { + if (addr) { + ubc_cs_blob_deallocate(addr, blob_size); + addr = 0; + } + ret = LOAD_FAILURE; + goto out; + } else { + /* ubc_cs_blob_add() has consumed "addr" */ + addr = 0; + } + +#if CHECK_CS_VALIDATION_BITMAP + ubc_cs_validation_bitmap_allocate( vp ); +#endif + + ret = LOAD_SUCCESS; +out: + if (ret == LOAD_SUCCESS) { + if (blob == NULL) { + panic("success, but no blob!"); + } + + result->csflags |= blob->csb_flags; + result->platform_binary = blob->csb_platform_binary; + result->cs_end_offset = blob->csb_end_offset; + } + if (addr != 0) { + ubc_cs_blob_deallocate(addr, blob_size); + addr = 0; + } + + return ret; +} + + +#if CONFIG_CODE_DECRYPTION + +static load_return_t +set_code_unprotect( + struct encryption_info_command *eip, + caddr_t addr, + vm_map_t map, + int64_t slide, + struct vnode *vp, + off_t macho_offset, + cpu_type_t cputype, + cpu_subtype_t cpusubtype) +{ + int error, len; + pager_crypt_info_t crypt_info; + const char * cryptname = 0; + char *vpath; + + size_t offset; + struct segment_command_64 *seg64; + struct segment_command *seg32; + vm_map_offset_t map_offset, map_size; + vm_object_offset_t crypto_backing_offset; + kern_return_t kr; + + if (eip->cmdsize < sizeof(*eip)) { + return LOAD_BADMACHO; + } + + switch (eip->cryptid) { + case 0: + /* not encrypted, just an empty load command */ + return LOAD_SUCCESS; + case 1: + cryptname = "com.apple.unfree"; + break; + case 0x10: + /* some random cryptid that you could manually put into + * your binary if you want NULL */ + cryptname = "com.apple.null"; + break; + default: + return LOAD_BADMACHO; + } + + if (map == VM_MAP_NULL) { + return LOAD_SUCCESS; + } + if (NULL == text_crypter_create) { + return LOAD_FAILURE; + } + + vpath = zalloc(ZV_NAMEI); + + len = MAXPATHLEN; + error = vn_getpath(vp, vpath, &len); + if (error) { + zfree(ZV_NAMEI, vpath); + return LOAD_FAILURE; + } + + if (eip->cryptsize == 0) { + printf("%s:%d '%s': cryptoff 0x%llx cryptsize 0x%llx cryptid 0x%x ignored\n", __FUNCTION__, __LINE__, vpath, (uint64_t)eip->cryptoff, (uint64_t)eip->cryptsize, eip->cryptid); + zfree(ZV_NAMEI, vpath); + return LOAD_SUCCESS; + } + + /* set up decrypter first */ + crypt_file_data_t crypt_data = { + .filename = vpath, + .cputype = cputype, + .cpusubtype = cpusubtype, + .origin = CRYPT_ORIGIN_APP_LAUNCH, + }; + kr = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data); +#if VM_MAP_DEBUG_APPLE_PROTECT + if (vm_map_debug_apple_protect) { + struct proc *p; + p = current_proc(); + printf("APPLE_PROTECT: %d[%s] map %p %s(%s) -> 0x%x\n", + proc_getpid(p), p->p_comm, map, __FUNCTION__, vpath, kr); + } +#endif /* VM_MAP_DEBUG_APPLE_PROTECT */ + zfree(ZV_NAMEI, vpath); + + if (kr) { + printf("set_code_unprotect: unable to create decrypter %s, kr=%d\n", + cryptname, kr); + if (kr == kIOReturnNotPrivileged) { + /* text encryption returned decryption failure */ + return LOAD_DECRYPTFAIL; + } else { + return LOAD_RESOURCE; + } + } + + /* this is terrible, but we have to rescan the load commands to find the + * virtual address of this encrypted stuff. This code is gonna look like + * the dyld source one day... */ + struct mach_header *header = (struct mach_header *)addr; + size_t mach_header_sz = sizeof(struct mach_header); + if (header->magic == MH_MAGIC_64 || + header->magic == MH_CIGAM_64) { + mach_header_sz = sizeof(struct mach_header_64); + } + offset = mach_header_sz; + uint32_t ncmds = header->ncmds; + while (ncmds--) { + /* + * Get a pointer to the command. + */ + struct load_command *lcp = (struct load_command *)(addr + offset); + offset += lcp->cmdsize; + + switch (lcp->cmd) { + case LC_SEGMENT_64: + seg64 = (struct segment_command_64 *)lcp; + if ((seg64->fileoff <= eip->cryptoff) && + (seg64->fileoff + seg64->filesize >= + eip->cryptoff + eip->cryptsize)) { + map_offset = (vm_map_offset_t)(seg64->vmaddr + eip->cryptoff - seg64->fileoff + slide); + map_size = eip->cryptsize; + crypto_backing_offset = macho_offset + eip->cryptoff; + goto remap_now; + } + break; + case LC_SEGMENT: + seg32 = (struct segment_command *)lcp; + if ((seg32->fileoff <= eip->cryptoff) && + (seg32->fileoff + seg32->filesize >= + eip->cryptoff + eip->cryptsize)) { + map_offset = (vm_map_offset_t)(seg32->vmaddr + eip->cryptoff - seg32->fileoff + slide); + map_size = eip->cryptsize; + crypto_backing_offset = macho_offset + eip->cryptoff; + goto remap_now; + } + break; + } + } + + /* if we get here, did not find anything */ + return LOAD_BADMACHO; + +remap_now: + /* now remap using the decrypter */ + MACHO_PRINTF(("+++ set_code_unprotect: vm[0x%llx:0x%llx]\n", + (uint64_t) map_offset, + (uint64_t) (map_offset + map_size))); + kr = vm_map_apple_protected(map, + map_offset, + map_offset + map_size, + crypto_backing_offset, + &crypt_info, + CRYPTID_APP_ENCRYPTION); + if (kr) { + printf("set_code_unprotect(): mapping failed with %x\n", kr); + return LOAD_PROTECT; + } + + return LOAD_SUCCESS; +} + +#endif + +/* + * This routine exists to support the load_dylinker(). + * + * This routine has its own, separate, understanding of the FAT file format, + * which is terrifically unfortunate. + */ +static +load_return_t +get_macho_vnode( + const char *path, + cpu_type_t cputype, + struct mach_header *mach_header, + off_t *file_offset, + off_t *macho_size, + struct macho_data *data, + struct vnode **vpp, + struct image_params *imgp + ) +{ + struct vnode *vp; + vfs_context_t ctx = vfs_context_current(); + proc_t p = vfs_context_proc(ctx); + kauth_cred_t kerncred; + struct nameidata *ndp = &data->__nid; + boolean_t is_fat; + struct fat_arch fat_arch; + int error; + int resid; + union macho_vnode_header *header = &data->__header; + off_t fsize = (off_t)0; + + /* + * Capture the kernel credential for use in the actual read of the + * file, since the user doing the execution may have execute rights + * but not read rights, but to exec something, we have to either map + * or read it into the new process address space, which requires + * read rights. This is to deal with lack of common credential + * serialization code which would treat NOCRED as "serialize 'root'". + */ + kerncred = vfs_context_ucred(vfs_context_kernel()); + + /* init the namei data to point the file user's program name */ + NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); + + if ((error = namei(ndp)) != 0) { + if (error == ENOENT) { + error = LOAD_ENOENT; + } else { + error = LOAD_FAILURE; + } + return error; + } + nameidone(ndp); + vp = ndp->ni_vp; + + /* check for regular file */ + if (vp->v_type != VREG) { + error = LOAD_PROTECT; + goto bad1; + } + + /* get size */ + if ((error = vnode_size(vp, &fsize, ctx)) != 0) { + error = LOAD_FAILURE; + goto bad1; + } + + /* Check mount point */ + if (vp->v_mount->mnt_flag & MNT_NOEXEC) { + error = LOAD_PROTECT; + goto bad1; + } + + /* check access */ + if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_EXECUTE | KAUTH_VNODE_READ_DATA, ctx)) != 0) { + error = LOAD_PROTECT; + goto bad1; + } + + /* try to open it */ + if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) { + error = LOAD_PROTECT; + goto bad1; + } + + if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)header, sizeof(*header), 0, + UIO_SYSSPACE, IO_NODELOCKED, kerncred, &resid, p)) != 0) { + error = LOAD_IOERROR; + goto bad2; + } + + if (resid) { + error = LOAD_BADMACHO; + goto bad2; + } + + if (header->mach_header.magic == MH_MAGIC || + header->mach_header.magic == MH_MAGIC_64) { + is_fat = FALSE; + } else if (OSSwapBigToHostInt32(header->fat_header.magic) == FAT_MAGIC) { + is_fat = TRUE; + } else { + error = LOAD_BADMACHO; + goto bad2; + } + + if (is_fat) { + error = fatfile_validate_fatarches((vm_offset_t)(&header->fat_header), + sizeof(*header), fsize); + if (error != LOAD_SUCCESS) { + goto bad2; + } + + /* Look up our architecture in the fat file. */ + error = fatfile_getbestarch_for_cputype(cputype, CPU_SUBTYPE_ANY, + (vm_offset_t)(&header->fat_header), sizeof(*header), imgp, &fat_arch); + if (error != LOAD_SUCCESS) { + goto bad2; + } + + /* Read the Mach-O header out of it */ + error = vn_rdwr(UIO_READ, vp, (caddr_t)&header->mach_header, + sizeof(header->mach_header), fat_arch.offset, + UIO_SYSSPACE, IO_NODELOCKED, kerncred, &resid, p); + if (error) { + error = LOAD_IOERROR; + goto bad2; + } + + if (resid) { + error = LOAD_BADMACHO; + goto bad2; + } + + /* Is this really a Mach-O? */ + if (header->mach_header.magic != MH_MAGIC && + header->mach_header.magic != MH_MAGIC_64) { + error = LOAD_BADMACHO; + goto bad2; + } + + *file_offset = fat_arch.offset; + *macho_size = fat_arch.size; + } else { + /* + * Force get_macho_vnode() to fail if the architecture bits + * do not match the expected architecture bits. This in + * turn causes load_dylinker() to fail for the same reason, + * so it ensures the dynamic linker and the binary are in + * lock-step. This is potentially bad, if we ever add to + * the CPU_ARCH_* bits any bits that are desirable but not + * required, since the dynamic linker might work, but we will + * refuse to load it because of this check. + */ + if ((cpu_type_t)header->mach_header.cputype != cputype) { + error = LOAD_BADARCH; + goto bad2; + } + + *file_offset = 0; + *macho_size = fsize; + } + + *mach_header = header->mach_header; + *vpp = vp; + + ubc_setsize(vp, fsize); + return error; + +bad2: + (void) VNOP_CLOSE(vp, FREAD, ctx); +bad1: + vnode_put(vp); + return error; +} \ No newline at end of file diff --git a/I. Mach-O/mac/machine.h b/I. Mach-O/mac/machine.h new file mode 100644 index 0000000..3f49d43 --- /dev/null +++ b/I. Mach-O/mac/machine.h @@ -0,0 +1,416 @@ +// Extracted from Xcode 15 Beta 7 +/* /Library/Developer/CommandLineTools/SDKs/MacOSX14.0.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach/machine.h */ +/* + * Copyright (c) 2007-2016 Apple, Inc. All rights reserved. + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* File: machine.h + * Author: Avadis Tevanian, Jr. + * Date: 1986 + * + * Machine independent machine abstraction. + */ + +#ifndef _MACH_MACHINE_H_ +#define _MACH_MACHINE_H_ + +#ifndef __ASSEMBLER__ + +#include +#include +#include + +typedef integer_t cpu_type_t; +typedef integer_t cpu_subtype_t; +typedef integer_t cpu_threadtype_t; + +#define CPU_STATE_MAX 4 + +#define CPU_STATE_USER 0 +#define CPU_STATE_SYSTEM 1 +#define CPU_STATE_IDLE 2 +#define CPU_STATE_NICE 3 + + + +/* + * Capability bits used in the definition of cpu_type. + */ +#define CPU_ARCH_MASK 0xff000000 /* mask for architecture bits */ +#define CPU_ARCH_ABI64 0x01000000 /* 64 bit ABI */ +#define CPU_ARCH_ABI64_32 0x02000000 /* ABI for 64-bit hardware with 32-bit types; LP32 */ + +/* + * Machine types known by all. + */ + +#define CPU_TYPE_ANY ((cpu_type_t) -1) + +#define CPU_TYPE_VAX ((cpu_type_t) 1) +/* skip ((cpu_type_t) 2) */ +/* skip ((cpu_type_t) 3) */ +/* skip ((cpu_type_t) 4) */ +/* skip ((cpu_type_t) 5) */ +#define CPU_TYPE_MC680x0 ((cpu_type_t) 6) +#define CPU_TYPE_X86 ((cpu_type_t) 7) +#define CPU_TYPE_I386 CPU_TYPE_X86 /* compatibility */ +#define CPU_TYPE_X86_64 (CPU_TYPE_X86 | CPU_ARCH_ABI64) + +/* skip CPU_TYPE_MIPS ((cpu_type_t) 8) */ +/* skip ((cpu_type_t) 9) */ +#define CPU_TYPE_MC98000 ((cpu_type_t) 10) +#define CPU_TYPE_HPPA ((cpu_type_t) 11) +#define CPU_TYPE_ARM ((cpu_type_t) 12) +#define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64) +#define CPU_TYPE_ARM64_32 (CPU_TYPE_ARM | CPU_ARCH_ABI64_32) +#define CPU_TYPE_MC88000 ((cpu_type_t) 13) +#define CPU_TYPE_SPARC ((cpu_type_t) 14) +#define CPU_TYPE_I860 ((cpu_type_t) 15) +/* skip CPU_TYPE_ALPHA ((cpu_type_t) 16) */ +/* skip ((cpu_type_t) 17) */ +#define CPU_TYPE_POWERPC ((cpu_type_t) 18) +#define CPU_TYPE_POWERPC64 (CPU_TYPE_POWERPC | CPU_ARCH_ABI64) +/* skip ((cpu_type_t) 19) */ +/* skip ((cpu_type_t) 20 */ +/* skip ((cpu_type_t) 21 */ +/* skip ((cpu_type_t) 22 */ +/* skip ((cpu_type_t) 23 */ + +/* + * Machine subtypes (these are defined here, instead of in a machine + * dependent directory, so that any program can get all definitions + * regardless of where is it compiled). + */ + +/* + * Capability bits used in the definition of cpu_subtype. + */ +#define CPU_SUBTYPE_MASK 0xff000000 /* mask for feature flags */ +#define CPU_SUBTYPE_LIB64 0x80000000 /* 64 bit libraries */ +#define CPU_SUBTYPE_PTRAUTH_ABI 0x80000000 /* pointer authentication with versioned ABI */ + +/* + * When selecting a slice, ANY will pick the slice with the best + * grading for the selected cpu_type_t, unlike the "ALL" subtypes, + * which are the slices that can run on any hardware for that cpu type. + */ +#define CPU_SUBTYPE_ANY ((cpu_subtype_t) -1) + +/* + * Object files that are hand-crafted to run on any + * implementation of an architecture are tagged with + * CPU_SUBTYPE_MULTIPLE. This functions essentially the same as + * the "ALL" subtype of an architecture except that it allows us + * to easily find object files that may need to be modified + * whenever a new implementation of an architecture comes out. + * + * It is the responsibility of the implementor to make sure the + * software handles unsupported implementations elegantly. + */ +#define CPU_SUBTYPE_MULTIPLE ((cpu_subtype_t) -1) +#define CPU_SUBTYPE_LITTLE_ENDIAN ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_BIG_ENDIAN ((cpu_subtype_t) 1) + +/* + * Machine threadtypes. + * This is none - not defined - for most machine types/subtypes. + */ +#define CPU_THREADTYPE_NONE ((cpu_threadtype_t) 0) + +/* + * VAX subtypes (these do *not* necessary conform to the actual cpu + * ID assigned by DEC available via the SID register). + */ + +#define CPU_SUBTYPE_VAX_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_VAX780 ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_VAX785 ((cpu_subtype_t) 2) +#define CPU_SUBTYPE_VAX750 ((cpu_subtype_t) 3) +#define CPU_SUBTYPE_VAX730 ((cpu_subtype_t) 4) +#define CPU_SUBTYPE_UVAXI ((cpu_subtype_t) 5) +#define CPU_SUBTYPE_UVAXII ((cpu_subtype_t) 6) +#define CPU_SUBTYPE_VAX8200 ((cpu_subtype_t) 7) +#define CPU_SUBTYPE_VAX8500 ((cpu_subtype_t) 8) +#define CPU_SUBTYPE_VAX8600 ((cpu_subtype_t) 9) +#define CPU_SUBTYPE_VAX8650 ((cpu_subtype_t) 10) +#define CPU_SUBTYPE_VAX8800 ((cpu_subtype_t) 11) +#define CPU_SUBTYPE_UVAXIII ((cpu_subtype_t) 12) + +/* + * 680x0 subtypes + * + * The subtype definitions here are unusual for historical reasons. + * NeXT used to consider 68030 code as generic 68000 code. For + * backwards compatability: + * + * CPU_SUBTYPE_MC68030 symbol has been preserved for source code + * compatability. + * + * CPU_SUBTYPE_MC680x0_ALL has been defined to be the same + * subtype as CPU_SUBTYPE_MC68030 for binary comatability. + * + * CPU_SUBTYPE_MC68030_ONLY has been added to allow new object + * files to be tagged as containing 68030-specific instructions. + */ + +#define CPU_SUBTYPE_MC680x0_ALL ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_MC68030 ((cpu_subtype_t) 1) /* compat */ +#define CPU_SUBTYPE_MC68040 ((cpu_subtype_t) 2) +#define CPU_SUBTYPE_MC68030_ONLY ((cpu_subtype_t) 3) + +/* + * I386 subtypes + */ + +#define CPU_SUBTYPE_INTEL(f, m) ((cpu_subtype_t) (f) + ((m) << 4)) + +#define CPU_SUBTYPE_I386_ALL CPU_SUBTYPE_INTEL(3, 0) +#define CPU_SUBTYPE_386 CPU_SUBTYPE_INTEL(3, 0) +#define CPU_SUBTYPE_486 CPU_SUBTYPE_INTEL(4, 0) +#define CPU_SUBTYPE_486SX CPU_SUBTYPE_INTEL(4, 8) // 8 << 4 = 128 +#define CPU_SUBTYPE_586 CPU_SUBTYPE_INTEL(5, 0) +#define CPU_SUBTYPE_PENT CPU_SUBTYPE_INTEL(5, 0) +#define CPU_SUBTYPE_PENTPRO CPU_SUBTYPE_INTEL(6, 1) +#define CPU_SUBTYPE_PENTII_M3 CPU_SUBTYPE_INTEL(6, 3) +#define CPU_SUBTYPE_PENTII_M5 CPU_SUBTYPE_INTEL(6, 5) +#define CPU_SUBTYPE_CELERON CPU_SUBTYPE_INTEL(7, 6) +#define CPU_SUBTYPE_CELERON_MOBILE CPU_SUBTYPE_INTEL(7, 7) +#define CPU_SUBTYPE_PENTIUM_3 CPU_SUBTYPE_INTEL(8, 0) +#define CPU_SUBTYPE_PENTIUM_3_M CPU_SUBTYPE_INTEL(8, 1) +#define CPU_SUBTYPE_PENTIUM_3_XEON CPU_SUBTYPE_INTEL(8, 2) +#define CPU_SUBTYPE_PENTIUM_M CPU_SUBTYPE_INTEL(9, 0) +#define CPU_SUBTYPE_PENTIUM_4 CPU_SUBTYPE_INTEL(10, 0) +#define CPU_SUBTYPE_PENTIUM_4_M CPU_SUBTYPE_INTEL(10, 1) +#define CPU_SUBTYPE_ITANIUM CPU_SUBTYPE_INTEL(11, 0) +#define CPU_SUBTYPE_ITANIUM_2 CPU_SUBTYPE_INTEL(11, 1) +#define CPU_SUBTYPE_XEON CPU_SUBTYPE_INTEL(12, 0) +#define CPU_SUBTYPE_XEON_MP CPU_SUBTYPE_INTEL(12, 1) + +#define CPU_SUBTYPE_INTEL_FAMILY(x) ((x) & 15) +#define CPU_SUBTYPE_INTEL_FAMILY_MAX 15 + +#define CPU_SUBTYPE_INTEL_MODEL(x) ((x) >> 4) +#define CPU_SUBTYPE_INTEL_MODEL_ALL 0 + +/* + * X86 subtypes. + */ + +#define CPU_SUBTYPE_X86_ALL ((cpu_subtype_t)3) +#define CPU_SUBTYPE_X86_64_ALL ((cpu_subtype_t)3) +#define CPU_SUBTYPE_X86_ARCH1 ((cpu_subtype_t)4) +#define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell feature subset */ + + +#define CPU_THREADTYPE_INTEL_HTT ((cpu_threadtype_t) 1) + +/* + * Mips subtypes. + */ + +#define CPU_SUBTYPE_MIPS_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_MIPS_R2300 ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_MIPS_R2600 ((cpu_subtype_t) 2) +#define CPU_SUBTYPE_MIPS_R2800 ((cpu_subtype_t) 3) +#define CPU_SUBTYPE_MIPS_R2000a ((cpu_subtype_t) 4) /* pmax */ +#define CPU_SUBTYPE_MIPS_R2000 ((cpu_subtype_t) 5) +#define CPU_SUBTYPE_MIPS_R3000a ((cpu_subtype_t) 6) /* 3max */ +#define CPU_SUBTYPE_MIPS_R3000 ((cpu_subtype_t) 7) + +/* + * MC98000 (PowerPC) subtypes + */ +#define CPU_SUBTYPE_MC98000_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_MC98601 ((cpu_subtype_t) 1) + +/* + * HPPA subtypes for Hewlett-Packard HP-PA family of + * risc processors. Port by NeXT to 700 series. + */ + +#define CPU_SUBTYPE_HPPA_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_HPPA_7100 ((cpu_subtype_t) 0) /* compat */ +#define CPU_SUBTYPE_HPPA_7100LC ((cpu_subtype_t) 1) + +/* + * MC88000 subtypes. + */ +#define CPU_SUBTYPE_MC88000_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_MC88100 ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_MC88110 ((cpu_subtype_t) 2) + +/* + * SPARC subtypes + */ +#define CPU_SUBTYPE_SPARC_ALL ((cpu_subtype_t) 0) + +/* + * I860 subtypes + */ +#define CPU_SUBTYPE_I860_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_I860_860 ((cpu_subtype_t) 1) + +/* + * PowerPC subtypes + */ +#define CPU_SUBTYPE_POWERPC_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_POWERPC_601 ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_POWERPC_602 ((cpu_subtype_t) 2) +#define CPU_SUBTYPE_POWERPC_603 ((cpu_subtype_t) 3) +#define CPU_SUBTYPE_POWERPC_603e ((cpu_subtype_t) 4) +#define CPU_SUBTYPE_POWERPC_603ev ((cpu_subtype_t) 5) +#define CPU_SUBTYPE_POWERPC_604 ((cpu_subtype_t) 6) +#define CPU_SUBTYPE_POWERPC_604e ((cpu_subtype_t) 7) +#define CPU_SUBTYPE_POWERPC_620 ((cpu_subtype_t) 8) +#define CPU_SUBTYPE_POWERPC_750 ((cpu_subtype_t) 9) +#define CPU_SUBTYPE_POWERPC_7400 ((cpu_subtype_t) 10) +#define CPU_SUBTYPE_POWERPC_7450 ((cpu_subtype_t) 11) +#define CPU_SUBTYPE_POWERPC_970 ((cpu_subtype_t) 100) + +/* + * ARM subtypes + */ +#define CPU_SUBTYPE_ARM_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_ARM_V4T ((cpu_subtype_t) 5) +#define CPU_SUBTYPE_ARM_V6 ((cpu_subtype_t) 6) +#define CPU_SUBTYPE_ARM_V5TEJ ((cpu_subtype_t) 7) +#define CPU_SUBTYPE_ARM_XSCALE ((cpu_subtype_t) 8) +#define CPU_SUBTYPE_ARM_V7 ((cpu_subtype_t) 9) /* ARMv7-A and ARMv7-R */ +#define CPU_SUBTYPE_ARM_V7F ((cpu_subtype_t) 10) /* Cortex A9 */ +#define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t) 11) /* Swift */ +#define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t) 12) +#define CPU_SUBTYPE_ARM_V8 ((cpu_subtype_t) 13) +#define CPU_SUBTYPE_ARM_V6M ((cpu_subtype_t) 14) /* Not meant to be run under xnu */ +#define CPU_SUBTYPE_ARM_V7M ((cpu_subtype_t) 15) /* Not meant to be run under xnu */ +#define CPU_SUBTYPE_ARM_V7EM ((cpu_subtype_t) 16) /* Not meant to be run under xnu */ +#define CPU_SUBTYPE_ARM_V8M ((cpu_subtype_t) 17) /* Not meant to be run under xnu */ + +/* + * ARM64 subtypes + */ +#define CPU_SUBTYPE_ARM64_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_ARM64_V8 ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_ARM64E ((cpu_subtype_t) 2) + +/* CPU subtype feature flags for ptrauth on arm64e platforms */ +#define CPU_SUBTYPE_ARM64_PTR_AUTH_MASK 0x0f000000 +#define CPU_SUBTYPE_ARM64_PTR_AUTH_VERSION(x) (((x) & CPU_SUBTYPE_ARM64_PTR_AUTH_MASK) >> 24) + +/* + * ARM64_32 subtypes + */ +#define CPU_SUBTYPE_ARM64_32_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_ARM64_32_V8 ((cpu_subtype_t) 1) + +#endif /* !__ASSEMBLER__ */ + +/* + * CPU families (sysctl hw.cpufamily) + * + * These are meant to identify the CPU's marketing name - an + * application can map these to (possibly) localized strings. + * NB: the encodings of the CPU families are intentionally arbitrary. + * There is no ordering, and you should never try to deduce whether + * or not some feature is available based on the family. + * Use feature flags (eg, hw.optional.altivec) to test for optional + * functionality. + */ +#define CPUFAMILY_UNKNOWN 0 +#define CPUFAMILY_POWERPC_G3 0xcee41549 +#define CPUFAMILY_POWERPC_G4 0x77c184ae +#define CPUFAMILY_POWERPC_G5 0xed76d8aa +#define CPUFAMILY_INTEL_6_13 0xaa33392b +#define CPUFAMILY_INTEL_PENRYN 0x78ea4fbc +#define CPUFAMILY_INTEL_NEHALEM 0x6b5a4cd2 +#define CPUFAMILY_INTEL_WESTMERE 0x573b5eec +#define CPUFAMILY_INTEL_SANDYBRIDGE 0x5490b78c +#define CPUFAMILY_INTEL_IVYBRIDGE 0x1f65e835 +#define CPUFAMILY_INTEL_HASWELL 0x10b282dc +#define CPUFAMILY_INTEL_BROADWELL 0x582ed09c +#define CPUFAMILY_INTEL_SKYLAKE 0x37fc219f +#define CPUFAMILY_INTEL_KABYLAKE 0x0f817246 +#define CPUFAMILY_INTEL_ICELAKE 0x38435547 +#define CPUFAMILY_INTEL_COMETLAKE 0x1cf8a03e +#define CPUFAMILY_ARM_9 0xe73283ae +#define CPUFAMILY_ARM_11 0x8ff620d8 +#define CPUFAMILY_ARM_XSCALE 0x53b005f5 +#define CPUFAMILY_ARM_12 0xbd1b0ae9 +#define CPUFAMILY_ARM_13 0x0cc90e64 +#define CPUFAMILY_ARM_14 0x96077ef1 +#define CPUFAMILY_ARM_15 0xa8511bca +#define CPUFAMILY_ARM_SWIFT 0x1e2d6381 +#define CPUFAMILY_ARM_CYCLONE 0x37a09642 +#define CPUFAMILY_ARM_TYPHOON 0x2c91a47e +#define CPUFAMILY_ARM_TWISTER 0x92fb37c8 +#define CPUFAMILY_ARM_HURRICANE 0x67ceee93 +#define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6 +#define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f +#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2 +#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3 +#define CPUFAMILY_ARM_BLIZZARD_AVALANCHE 0xda33d83d +#define CPUFAMILY_ARM_EVEREST_SAWTOOTH 0x8765edea + +/* Described in rdar://64125549 */ +#define CPUSUBFAMILY_UNKNOWN 0 +#define CPUSUBFAMILY_ARM_HP 1 +#define CPUSUBFAMILY_ARM_HG 2 +#define CPUSUBFAMILY_ARM_M 3 +#define CPUSUBFAMILY_ARM_HS 4 +#define CPUSUBFAMILY_ARM_HC_HD 5 +#define CPUSUBFAMILY_ARM_HA 6 + +/* The following synonyms are deprecated: */ +#define CPUFAMILY_INTEL_6_23 CPUFAMILY_INTEL_PENRYN +#define CPUFAMILY_INTEL_6_26 CPUFAMILY_INTEL_NEHALEM + + +#endif /* _MACH_MACHINE_H_ */ \ No newline at end of file diff --git a/I. Mach-O/mac/nlist.h b/I. Mach-O/mac/nlist.h new file mode 100644 index 0000000..9033bee --- /dev/null +++ b/I. Mach-O/mac/nlist.h @@ -0,0 +1,320 @@ +// Extracted from Xcode 15 Beta 7 +// /Library/Developer/CommandLineTools/SDKs/MacOSX14.0.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach-o/nlist.h */ +/* + * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _MACHO_NLIST_H_ +#define _MACHO_NLIST_H_ +/* $NetBSD: nlist.h,v 1.5 1994/10/26 00:56:11 cgd Exp $ */ + +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nlist.h 8.2 (Berkeley) 1/21/94 + */ +#include + +/* + * Format of a symbol table entry of a Mach-O file for 32-bit architectures. + * Modified from the BSD format. The modifications from the original format + * were changing n_other (an unused field) to n_sect and the addition of the + * N_SECT type. These modifications are required to support symbols in a larger + * number of sections not just the three sections (text, data and bss) in a BSD + * file. + */ +struct nlist { + union { +#ifndef __LP64__ + char *n_name; /* for use when in-core */ +#endif + uint32_t n_strx; /* index into the string table */ + } n_un; + uint8_t n_type; /* type flag, see below */ + uint8_t n_sect; /* section number or NO_SECT */ + int16_t n_desc; /* see */ + uint32_t n_value; /* value of this symbol (or stab offset) */ +}; + +/* + * This is the symbol table entry structure for 64-bit architectures. + */ +struct nlist_64 { + union { + uint32_t n_strx; /* index into the string table */ + } n_un; + uint8_t n_type; /* type flag, see below */ + uint8_t n_sect; /* section number or NO_SECT */ + uint16_t n_desc; /* see */ + uint64_t n_value; /* value of this symbol (or stab offset) */ +}; + +/* + * Symbols with a index into the string table of zero (n_un.n_strx == 0) are + * defined to have a null, "", name. Therefore all string indexes to non null + * names must not have a zero string index. This is bit historical information + * that has never been well documented. + */ + +/* + * The n_type field really contains four fields: + * unsigned char N_STAB:3, + * N_PEXT:1, + * N_TYPE:3, + * N_EXT:1; + * which are used via the following masks. + */ +#define N_STAB 0xe0 /* if any of these bits set, a symbolic debugging entry */ +#define N_PEXT 0x10 /* private external symbol bit */ +#define N_TYPE 0x0e /* mask for the type bits */ +#define N_EXT 0x01 /* external symbol bit, set for external symbols */ + +/* + * Only symbolic debugging entries have some of the N_STAB bits set and if any + * of these bits are set then it is a symbolic debugging entry (a stab). In + * which case then the values of the n_type field (the entire field) are given + * in + */ + +/* + * Values for N_TYPE bits of the n_type field. + */ +#define N_UNDF 0x0 /* undefined, n_sect == NO_SECT */ +#define N_ABS 0x2 /* absolute, n_sect == NO_SECT */ +#define N_SECT 0xe /* defined in section number n_sect */ +#define N_PBUD 0xc /* prebound undefined (defined in a dylib) */ +#define N_INDR 0xa /* indirect */ + +/* + * If the type is N_INDR then the symbol is defined to be the same as another + * symbol. In this case the n_value field is an index into the string table + * of the other symbol's name. When the other symbol is defined then they both + * take on the defined type and value. + */ + +/* + * If the type is N_SECT then the n_sect field contains an ordinal of the + * section the symbol is defined in. The sections are numbered from 1 and + * refer to sections in order they appear in the load commands for the file + * they are in. This means the same ordinal may very well refer to different + * sections in different files. + * + * The n_value field for all symbol table entries (including N_STAB's) gets + * updated by the link editor based on the value of it's n_sect field and where + * the section n_sect references gets relocated. If the value of the n_sect + * field is NO_SECT then it's n_value field is not changed by the link editor. + */ +#define NO_SECT 0 /* symbol is not in any section */ +#define MAX_SECT 255 /* 1 thru 255 inclusive */ + +/* + * Common symbols are represented by undefined (N_UNDF) external (N_EXT) types + * who's values (n_value) are non-zero. In which case the value of the n_value + * field is the size (in bytes) of the common symbol. The n_sect field is set + * to NO_SECT. The alignment of a common symbol may be set as a power of 2 + * between 2^1 and 2^15 as part of the n_desc field using the macros below. If + * the alignment is not set (a value of zero) then natural alignment based on + * the size is used. + */ +#define GET_COMM_ALIGN(n_desc) (((n_desc) >> 8) & 0x0f) +#define SET_COMM_ALIGN(n_desc,align) \ + (n_desc) = (((n_desc) & 0xf0ff) | (((align) & 0x0f) << 8)) + +/* + * To support the lazy binding of undefined symbols in the dynamic link-editor, + * the undefined symbols in the symbol table (the nlist structures) are marked + * with the indication if the undefined reference is a lazy reference or + * non-lazy reference. If both a non-lazy reference and a lazy reference is + * made to the same symbol the non-lazy reference takes precedence. A reference + * is lazy only when all references to that symbol are made through a symbol + * pointer in a lazy symbol pointer section. + * + * The implementation of marking nlist structures in the symbol table for + * undefined symbols will be to use some of the bits of the n_desc field as a + * reference type. The mask REFERENCE_TYPE will be applied to the n_desc field + * of an nlist structure for an undefined symbol to determine the type of + * undefined reference (lazy or non-lazy). + * + * The constants for the REFERENCE FLAGS are propagated to the reference table + * in a shared library file. In that case the constant for a defined symbol, + * REFERENCE_FLAG_DEFINED, is also used. + */ +/* Reference type bits of the n_desc field of undefined symbols */ +#define REFERENCE_TYPE 0x7 +/* types of references */ +#define REFERENCE_FLAG_UNDEFINED_NON_LAZY 0 +#define REFERENCE_FLAG_UNDEFINED_LAZY 1 +#define REFERENCE_FLAG_DEFINED 2 +#define REFERENCE_FLAG_PRIVATE_DEFINED 3 +#define REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY 4 +#define REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY 5 + +/* + * To simplify stripping of objects that use are used with the dynamic link + * editor, the static link editor marks the symbols defined an object that are + * referenced by a dynamicly bound object (dynamic shared libraries, bundles). + * With this marking strip knows not to strip these symbols. + */ +#define REFERENCED_DYNAMICALLY 0x0010 + +/* + * For images created by the static link editor with the -twolevel_namespace + * option in effect the flags field of the mach header is marked with + * MH_TWOLEVEL. And the binding of the undefined references of the image are + * determined by the static link editor. Which library an undefined symbol is + * bound to is recorded by the static linker in the high 8 bits of the n_desc + * field using the SET_LIBRARY_ORDINAL macro below. The ordinal recorded + * references the libraries listed in the Mach-O's LC_LOAD_DYLIB, + * LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB, and + * LC_LAZY_LOAD_DYLIB, etc. load commands in the order they appear in the + * headers. The library ordinals start from 1. + * For a dynamic library that is built as a two-level namespace image the + * undefined references from module defined in another use the same nlist struct + * an in that case SELF_LIBRARY_ORDINAL is used as the library ordinal. For + * defined symbols in all images they also must have the library ordinal set to + * SELF_LIBRARY_ORDINAL. The EXECUTABLE_ORDINAL refers to the executable + * image for references from plugins that refer to the executable that loads + * them. + * + * The DYNAMIC_LOOKUP_ORDINAL is for undefined symbols in a two-level namespace + * image that are looked up by the dynamic linker with flat namespace semantics. + * This ordinal was added as a feature in Mac OS X 10.3 by reducing the + * value of MAX_LIBRARY_ORDINAL by one. So it is legal for existing binaries + * or binaries built with older tools to have 0xfe (254) dynamic libraries. In + * this case the ordinal value 0xfe (254) must be treated as a library ordinal + * for compatibility. + */ +#define GET_LIBRARY_ORDINAL(n_desc) (((n_desc) >> 8) & 0xff) +#define SET_LIBRARY_ORDINAL(n_desc,ordinal) \ + (n_desc) = (((n_desc) & 0x00ff) | (((ordinal) & 0xff) << 8)) +#define SELF_LIBRARY_ORDINAL 0x0 +#define MAX_LIBRARY_ORDINAL 0xfd +#define DYNAMIC_LOOKUP_ORDINAL 0xfe +#define EXECUTABLE_ORDINAL 0xff + +/* + * The bit 0x0020 of the n_desc field is used for two non-overlapping purposes + * and has two different symbolic names, N_NO_DEAD_STRIP and N_DESC_DISCARDED. + */ + +/* + * The N_NO_DEAD_STRIP bit of the n_desc field only ever appears in a + * relocatable .o file (MH_OBJECT filetype). And is used to indicate to the + * static link editor it is never to dead strip the symbol. + */ +#define N_NO_DEAD_STRIP 0x0020 /* symbol is not to be dead stripped */ + +/* + * The N_DESC_DISCARDED bit of the n_desc field never appears in linked image. + * But is used in very rare cases by the dynamic link editor to mark an in + * memory symbol as discared and longer used for linking. + */ +#define N_DESC_DISCARDED 0x0020 /* symbol is discarded */ + +/* + * The N_WEAK_REF bit of the n_desc field indicates to the dynamic linker that + * the undefined symbol is allowed to be missing and is to have the address of + * zero when missing. + */ +#define N_WEAK_REF 0x0040 /* symbol is weak referenced */ + +/* + * The N_WEAK_DEF bit of the n_desc field indicates to the static and dynamic + * linkers that the symbol definition is weak, allowing a non-weak symbol to + * also be used which causes the weak definition to be discared. Currently this + * is only supported for symbols in coalesed sections. + */ +#define N_WEAK_DEF 0x0080 /* coalesed symbol is a weak definition */ + +/* + * The N_REF_TO_WEAK bit of the n_desc field indicates to the dynamic linker + * that the undefined symbol should be resolved using flat namespace searching. + */ +#define N_REF_TO_WEAK 0x0080 /* reference to a weak symbol */ + +/* + * The N_ARM_THUMB_DEF bit of the n_desc field indicates that the symbol is + * a defintion of a Thumb function. + */ +#define N_ARM_THUMB_DEF 0x0008 /* symbol is a Thumb function (ARM) */ + +/* + * The N_SYMBOL_RESOLVER bit of the n_desc field indicates that the + * that the function is actually a resolver function and should + * be called to get the address of the real function to use. + * This bit is only available in .o files (MH_OBJECT filetype) + */ +#define N_SYMBOL_RESOLVER 0x0100 + +/* + * The N_ALT_ENTRY bit of the n_desc field indicates that the + * symbol is pinned to the previous content. + */ +#define N_ALT_ENTRY 0x0200 + +#ifndef __STRICT_BSD__ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +/* + * The function nlist(3) from the C library. + */ +extern int nlist (const char *filename, struct nlist *list); +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* __STRICT_BSD__ */ + +#endif /* _MACHO_LIST_H_ */ \ No newline at end of file diff --git a/I. Mach-O/mac/vm_param.h b/I. Mach-O/mac/vm_param.h new file mode 100644 index 0000000..7d130cf --- /dev/null +++ b/I. Mach-O/mac/vm_param.h @@ -0,0 +1,207 @@ +// Extracted from Xcode 15 Beta 7 +/* /Library/Developer/CommandLineTools/SDKs/MacOSX14.0.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach/arm/vm_param.h */ +/* + * Copyright (c) 2007 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * FILE_ID: vm_param.h + */ + +/* + * ARM machine dependent virtual memory parameters. + */ + +#ifndef _MACH_ARM_VM_PARAM_H_ +#define _MACH_ARM_VM_PARAM_H_ + +#if defined (__arm__) || defined (__arm64__) + + + + +#define BYTE_SIZE 8 /* byte size in bits */ + + +#ifndef __ASSEMBLER__ + +#ifdef __arm__ +#define PAGE_SHIFT_CONST 12 +#elif defined(__arm64__) +extern int PAGE_SHIFT_CONST; +#else +#error Unsupported arch +#endif + +#define PAGE_SHIFT PAGE_SHIFT_CONST +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define PAGE_MASK (PAGE_SIZE-1) + +#define VM_PAGE_SIZE PAGE_SIZE + +#define machine_ptob(x) ((x) << PAGE_SHIFT) + +/* + * Defined for the purpose of testing the pmap advertised page + * size; this does not necessarily match the hardware page size. + */ +#define TEST_PAGE_SIZE_16K ((PAGE_SHIFT_CONST == 14)) +#define TEST_PAGE_SIZE_4K ((PAGE_SHIFT_CONST == 12)) + +#endif /* !__ASSEMBLER__ */ + + +#define PAGE_MAX_SHIFT 14 +#define PAGE_MAX_SIZE (1 << PAGE_MAX_SHIFT) +#define PAGE_MAX_MASK (PAGE_MAX_SIZE-1) + +#define PAGE_MIN_SHIFT 12 +#define PAGE_MIN_SIZE (1 << PAGE_MIN_SHIFT) +#define PAGE_MIN_MASK (PAGE_MIN_SIZE-1) + +#define VM_MAX_PAGE_ADDRESS MACH_VM_MAX_ADDRESS + +#ifndef __ASSEMBLER__ + + +#if defined (__arm__) + +#define VM_MIN_ADDRESS ((vm_address_t) 0x00000000) +#define VM_MAX_ADDRESS ((vm_address_t) 0x80000000) + +/* system-wide values */ +#define MACH_VM_MIN_ADDRESS ((mach_vm_offset_t) 0) +#define MACH_VM_MAX_ADDRESS ((mach_vm_offset_t) VM_MAX_ADDRESS) + +#elif defined (__arm64__) + +#define VM_MIN_ADDRESS ((vm_address_t) 0x0000000000000000ULL) +#define VM_MAX_ADDRESS ((vm_address_t) 0x00000000F0000000ULL) + +/* system-wide values */ +#define MACH_VM_MIN_ADDRESS_RAW 0x0ULL +#define MACH_VM_MAX_ADDRESS_RAW 0x00007FFFFE000000ULL + +#define MACH_VM_MIN_ADDRESS ((mach_vm_offset_t) MACH_VM_MIN_ADDRESS_RAW) +#define MACH_VM_MAX_ADDRESS ((mach_vm_offset_t) MACH_VM_MAX_ADDRESS_RAW) + +#define MACH_VM_MIN_GPU_CARVEOUT_ADDRESS_RAW 0x0000001000000000ULL +#define MACH_VM_MAX_GPU_CARVEOUT_ADDRESS_RAW 0x0000007000000000ULL +#define MACH_VM_MIN_GPU_CARVEOUT_ADDRESS ((mach_vm_offset_t) MACH_VM_MIN_GPU_CARVEOUT_ADDRESS_RAW) +#define MACH_VM_MAX_GPU_CARVEOUT_ADDRESS ((mach_vm_offset_t) MACH_VM_MAX_GPU_CARVEOUT_ADDRESS_RAW) + +#else /* defined(__arm64__) */ +#error architecture not supported +#endif + +#define VM_MAP_MIN_ADDRESS VM_MIN_ADDRESS +#define VM_MAP_MAX_ADDRESS VM_MAX_ADDRESS + + +#if defined (__arm__) +#define VM_KERNEL_POINTER_SIGNIFICANT_BITS 31 +#define VM_MIN_KERNEL_ADDRESS ((vm_address_t) 0x80000000) +#define VM_MAX_KERNEL_ADDRESS ((vm_address_t) 0xFFFEFFFF) +#define VM_HIGH_KERNEL_WINDOW ((vm_address_t) 0xFFFE0000) + +#elif defined (__arm64__) +/* + * kalloc() parameters: + * + * Historically kalloc's underlying zones were power-of-2 sizes, with a + * KALLOC_MINSIZE of 16 bytes. Thus the allocator ensured that + * (sizeof == alignof) >= 16 for all kalloc allocations. + * + * Today kalloc may use zones with intermediate (small) sizes, constrained by + * KALLOC_MINSIZE and a minimum alignment, expressed by KALLOC_LOG2_MINALIGN. + * + * Note that most dynamically allocated data structures contain more than + * one int/long/pointer member, so KALLOC_MINSIZE should probably start at 8. + */ +#define TiB(x) ((0ULL + (x)) << 40) +#define GiB(x) ((0ULL + (x)) << 30) +#define KALLOC_MINSIZE 16 /* minimum allocation size */ +#define KALLOC_LOG2_MINALIGN 4 /* log2 minimum alignment */ + +/* + * The minimum and maximum kernel address; some configurations may + * constrain the address space further. + */ + +// Inform kexts about largest possible kernel address space +#define VM_KERNEL_POINTER_SIGNIFICANT_BITS 41 +#define VM_MIN_KERNEL_ADDRESS ((vm_address_t) (0ULL - TiB(2))) +#define VM_MAX_KERNEL_ADDRESS ((vm_address_t) 0xfffffffbffffffffULL) +#else +#error architecture not supported +#endif + +#define VM_MIN_KERNEL_AND_KEXT_ADDRESS VM_MIN_KERNEL_ADDRESS + +#if defined (__arm64__) +/* Top-Byte-Ignore */ +#define ARM_TBI_USER_MASK (0xFF00000000000000ULL) +#define VM_USER_STRIP_TBI(_v) ((typeof (_v))(((uintptr_t)(_v)) &~ (ARM_TBI_USER_MASK))) +#else /* __arm64__ */ +#define VM_USER_STRIP_TBI(_v) (_v) +#endif /* __arm64__ */ + +#if CONFIG_KERNEL_TAGGING +#include +/* + * 'strip' in PAC sense, therefore replacing the stripped bits sign extending + * the sign bit. In kernel space the sign bit is 1, so 0xFF is a valid mask + * here. + */ +#define VM_KERNEL_STRIP_TAG(_v) (vm_memtag_canonicalize_address((vm_offset_t)_v)) +#else /* CONFIG_KERNEL_TAGGING */ +#define VM_KERNEL_STRIP_TAG(_v) (_v) +#endif /* CONFIG_KERNEL_TAGGING */ + +#if __has_feature(ptrauth_calls) +#include +#define VM_KERNEL_STRIP_PAC(_v) (ptrauth_strip((void *)(uintptr_t)(_v), ptrauth_key_asia)) +#else /* !ptrauth_calls */ +#define VM_KERNEL_STRIP_PAC(_v) (_v) +#endif /* ptrauth_calls */ + +#define VM_KERNEL_STRIP_PTR(_va) ((VM_KERNEL_STRIP_TAG(VM_KERNEL_STRIP_PAC((_va))))) +#define VM_KERNEL_STRIP_UPTR(_va) ((vm_address_t)VM_KERNEL_STRIP_PTR((uintptr_t)(_va))) +#define VM_KERNEL_ADDRESS(_va) \ + ((VM_KERNEL_STRIP_UPTR(_va) >= VM_MIN_KERNEL_ADDRESS) && \ + (VM_KERNEL_STRIP_UPTR(_va) <= VM_MAX_KERNEL_ADDRESS)) + +#define VM_USER_STRIP_PTR(_v) (VM_USER_STRIP_TBI(_v)) + + + +#endif /* !__ASSEMBLER__ */ + +#define SWI_SYSCALL 0x80 + +#endif /* defined (__arm__) || defined (__arm64__) */ + +#endif /* _MACH_ARM_VM_PARAM_H_ */ \ No newline at end of file diff --git a/I. Mach-O/mac/vm_prot.h b/I. Mach-O/mac/vm_prot.h new file mode 100644 index 0000000..2b70c5f --- /dev/null +++ b/I. Mach-O/mac/vm_prot.h @@ -0,0 +1,191 @@ +// Extracted from Xcode 15 Beta 7 +// /Library/Developer/CommandLineTools/SDKs/MacOSX14.0.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach/vm_prot.h */ + +/* + * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +/* + * File: mach/vm_prot.h + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * + * Virtual memory protection definitions. + * + */ + +#ifndef _MACH_VM_PROT_H_ +#define _MACH_VM_PROT_H_ + +/* + * Types defined: + * + * vm_prot_t VM protection values. + */ + +typedef int vm_prot_t; + +/* + * Protection values, defined as bits within the vm_prot_t type + */ + +#define VM_PROT_NONE ((vm_prot_t) 0x00) + +#define VM_PROT_READ ((vm_prot_t) 0x01) /* read permission */ +#define VM_PROT_WRITE ((vm_prot_t) 0x02) /* write permission */ +#define VM_PROT_EXECUTE ((vm_prot_t) 0x04) /* execute permission */ + +/* + * The default protection for newly-created virtual memory + */ + +#define VM_PROT_DEFAULT (VM_PROT_READ|VM_PROT_WRITE) + +/* + * The maximum privileges possible, for parameter checking. + */ + +#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) + +/* + * This is an alias to VM_PROT_EXECUTE to identify callers that + * want to allocate an hardware assisted Read-only/read-write + * trusted path in userland. + */ +#define VM_PROT_RORW_TP (VM_PROT_EXECUTE) + +/* + * An invalid protection value. + * Used only by memory_object_lock_request to indicate no change + * to page locks. Using -1 here is a bad idea because it + * looks like VM_PROT_ALL and then some. + */ + +#define VM_PROT_NO_CHANGE_LEGACY ((vm_prot_t) 0x08) +#define VM_PROT_NO_CHANGE ((vm_prot_t) 0x01000000) + +/* + * When a caller finds that he cannot obtain write permission on a + * mapped entry, the following flag can be used. The entry will + * be made "needs copy" effectively copying the object (using COW), + * and write permission will be added to the maximum protections + * for the associated entry. + */ + +#define VM_PROT_COPY ((vm_prot_t) 0x10) + + +/* + * Another invalid protection value. + * Used only by memory_object_data_request upon an object + * which has specified a copy_call copy strategy. It is used + * when the kernel wants a page belonging to a copy of the + * object, and is only asking the object as a result of + * following a shadow chain. This solves the race between pages + * being pushed up by the memory manager and the kernel + * walking down the shadow chain. + */ + +#define VM_PROT_WANTS_COPY ((vm_prot_t) 0x10) + + +/* + * Another invalid protection value. + * Indicates that the other protection bits are to be applied as a mask + * against the actual protection bits of the map entry. + */ +#define VM_PROT_IS_MASK ((vm_prot_t) 0x40) + +/* + * Another invalid protection value to support execute-only protection. + * VM_PROT_STRIP_READ is a special marker that tells mprotect to not + * set VM_PROT_READ. We have to do it this way because existing code + * expects the system to set VM_PROT_READ if VM_PROT_EXECUTE is set. + * VM_PROT_EXECUTE_ONLY is just a convenience value to indicate that + * the memory should be executable and explicitly not readable. It will + * be ignored on platforms that do not support this type of protection. + */ +#define VM_PROT_STRIP_READ ((vm_prot_t) 0x80) +#define VM_PROT_EXECUTE_ONLY (VM_PROT_EXECUTE|VM_PROT_STRIP_READ) + + +/* + * Another invalid protection value to support pager TPRO protection. + * VM_PROT_TPRO is a special marker that tells the a pager to + * set TPRO flags on a given entry. We do it this way to prevent + * bloating the pager structures and it allows dyld to pass through + * this flag in lieue of specifying explicit VM flags, allowing us to handle + * the final permissions internally. + */ +#define VM_PROT_TPRO ((vm_prot_t) 0x200) + +#if defined(__x86_64__) +/* + * Another invalid protection value to support specifying different + * execute permissions for user- and supervisor- modes. When + * MBE is enabled in a VM, VM_PROT_EXECUTE is used to indicate + * supervisor-mode execute permission, and VM_PROT_UEXEC specifies + * user-mode execute permission. Currently only used by the + * x86 Hypervisor kext. + */ +#define VM_PROT_UEXEC ((vm_prot_t) 0x8) /* User-mode Execute Permission */ + +#define VM_PROT_ALLEXEC (VM_PROT_EXECUTE | VM_PROT_UEXEC) +#else +#define VM_PROT_ALLEXEC (VM_PROT_EXECUTE) +#endif /* defined(__x86_64__) */ + + +#endif /* _MACH_VM_PROT_H_ */ \ No newline at end of file diff --git a/I. Mach-O/mac/vmparam.h b/I. Mach-O/mac/vmparam.h new file mode 100644 index 0000000..9ab244c --- /dev/null +++ b/I. Mach-O/mac/vmparam.h @@ -0,0 +1,57 @@ +// Source: https://github.com/apple-oss-distributions/xnu/blob/xnu-10002.61.3/bsd/arm/vmparam.h + +/* + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + */ + +#ifndef _BSD_ARM_VMPARAM_H_ +#define _BSD_ARM_VMPARAM_H_ 1 + +#if defined (__arm__) || defined (__arm64__) + +#include + +#ifndef KERNEL +#include +#endif + +#define USRSTACK (0x27E00000) /* ASLR slides stack down by up to 1MB */ +#define USRSTACK64 (0x000000016FE00000ULL) + +/* + * Virtual memory related constants, all in bytes + */ +#ifndef DFLDSIZ +#define DFLDSIZ (RLIM_INFINITY) /* initial data size limit */ +#endif +#ifndef MAXDSIZ +#define MAXDSIZ (RLIM_INFINITY) /* max data size */ +#endif +#ifndef DFLSSIZ +/* XXX stack size default is a platform property: use getrlimit(2) */ +#if (defined(TARGET_OS_OSX) && (TARGET_OS_OSX != 0)) || \ + (defined(KERNEL) && XNU_TARGET_OS_OSX) +#define DFLSSIZ (8*1024*1024 - 16*1024) +#else +#define DFLSSIZ (1024*1024 - 16*1024) /* initial stack size limit */ +#endif /* TARGET_OS_OSX .. || XNU_KERNEL_PRIVATE .. */ +#endif /* DFLSSIZ */ +#ifndef MAXSSIZ +/* XXX stack size limit is a platform property: use getrlimit(2) */ +#if (defined(TARGET_OS_OSX) && (TARGET_OS_OSX != 0)) || \ + (defined(KERNEL) && XNU_TARGET_OS_OSX) +#define MAXSSIZ (64*1024*1024) /* max stack size */ +#else +#define MAXSSIZ (1024*1024) /* max stack size */ +#endif /* TARGET_OS_OSX .. || XNU_KERNEL_PRIVATE .. */ +#endif /* MAXSSIZ */ +#ifndef DFLCSIZ +#define DFLCSIZ (0) /* initial core size limit */ +#endif +#ifndef MAXCSIZ +#define MAXCSIZ (RLIM_INFINITY) /* max core size */ +#endif /* MAXCSIZ */ + +#endif /* defined (__arm__) || defined (__arm64__) */ + +#endif /* _BSD_ARM_VMPARAM_H_ */ \ No newline at end of file diff --git a/I. Mach-O/python/CrimsonUroboros.py b/I. Mach-O/python/CrimsonUroboros.py new file mode 100755 index 0000000..16fe4fd --- /dev/null +++ b/I. Mach-O/python/CrimsonUroboros.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 +import lief +import uuid +import argparse +import subprocess +from asn1crypto.cms import ContentInfo +import os +import sys + +### --- I. MACH-O --- ### +class SnakeI: + def __init__(self, binaries): + '''When initiated, the program parses a Universal binary (binaries parameter) and extracts the ARM64 Mach-O. If the file is not in a universal format but is a valid ARM64 Mach-O, it is taken as a binary parameter during initialization.''' + self.binary = self.parseFatBinary(binaries) + self.fat_offset = self.binary.fat_offset # For various calculations, if ARM64 Mach-O extracted from Universal Binary + self.prot_map = { + 0: '---', + 1: 'r--', + 2: '-w-', + 3: 'rw-', + 4: '--x', + 5: 'r-x', + 6: '-wx', + 7: 'rwx' + } + self.segment_flags_map = { + 0x1: 'SG_HIGHVM', + 0x2: 'SG_FVMLIB', + 0x4: 'SG_NORELOC', + 0x8: 'SG_PROTECTED_VERSION_1', + 0x10: 'SG_READ_ONLY', + } + + def mapProtection(self, numeric_protection): + '''Maps numeric protection to its string representation.''' + return self.prot_map.get(numeric_protection, 'Unknown') + + def getSegmentFlags(self, flags): + '''Maps numeric segment flags to its string representation.''' + return self.segment_flags_map.get(flags, '') + #return " ".join(activated_flags) + + def parseFatBinary(self, binaries): + '''Parse Mach-O file, whether compiled for multiple architectures or just for a single one. It returns the ARM64 binary if it exists. If not, it exits the program.''' + for binary in binaries: + if binary.header.cpu_type == lief.MachO.CPU_TYPES.ARM64: + arm64_bin = binary + if arm64_bin == None: + print('The specified Mach-O file is not in ARM64 architecture.') + exit() + return arm64_bin + + def getFileType(self): + """Extract and return the file type from a binary object's header.""" + return self.binary.header.file_type.name + + def getHeaderFlags(self): + '''Return binary header flags.''' + return self.binary.header.flags_list + + def getEndianess(self): + '''Check the endianness of a binary based on the system and binary's magic number.''' + magic = self.binary.header.magic.name + endianness = sys.byteorder + if endianness == 'little' and (magic == 'MAGIC_64' or magic == 'MAGIC' or magic == 'FAT_MAGIC'): + return 'little' + else: + return 'big' + + def getBinaryHeader(self): + '''https://lief-project.github.io/doc/stable/api/python/macho.html#header''' + return self.binary.header + + def getLoadCommands(self): + '''https://lief-project.github.io/doc/stable/api/python/macho.html#loadcommand''' + return self.binary.commands + + def getSegments(self): + '''Extract segmenents from binary and return a human readable string: https://lief-project.github.io/doc/stable/api/python/macho.html#lief.MachO.SegmentCommand''' + segment_info = [] + for segment in self.binary.segments: + name = segment.name + va_start = '0x' + format(segment.virtual_address, '016x') + va_end = '0x' + format(int(va_start, 16) + segment.virtual_size, '016x') + file_start = hex(segment.file_size + self.fat_offset) + file_end = hex(int(file_start, 16) + segment.file_size) + init_prot = self.mapProtection(segment.init_protection) + max_prot = self.mapProtection(segment.max_protection) + flags = self.getSegmentFlags(segment.flags) + if flags != '': + segment_info.append(f'{name.ljust(16)}{init_prot}/{max_prot.ljust(8)} VM: {va_start}-{va_end.ljust(24)} FILE: {file_start}-{file_end} ({flags})') + else: + segment_info.append(f'{name.ljust(16)}{init_prot}/{max_prot.ljust(8)} VM: {va_start}-{va_end.ljust(24)} FILE: {file_start}-{file_end}') + return segment_info + + def getSections(self): + '''Extract sections from binary and return in human readable format: https://lief-project.github.io/doc/stable/api/python/macho.html#lief.MachO.Section''' + sections_info = [] + sections_info.append("SEGMENT".ljust(14) + "SECTION".ljust(20) + "TYPE".ljust(28) + "VIRTUAL MEMORY".ljust(32) + "FILE".ljust(26) + "FLAGS".ljust(40)) + sections_info.append(len(sections_info[0])*"=") + for section in self.binary.sections: + segment_name = section.segment_name + section_name = section.fullname + section_type = section.type.name + section_va_start = hex(section.virtual_address) + section_va_end = hex(section.virtual_address + section.offset) + section_size_start = hex(section.offset + self.fat_offset) + section_size_end = hex(section.size + section.offset + self.fat_offset) + section_flags_list = section.flags_list + flags_strings = [flag.name for flag in section_flags_list] + flags = " ".join(flags_strings) + sections_info.append((f'{segment_name.ljust(14)}{section_name.ljust(20)}{section_type.ljust(28)}{section_va_start}-{section_va_end.ljust(20)}{section_size_start}-{section_size_end}\t\t({flags})')) + return sections_info + + def getSymbols(self): + '''Get all symbols from the binary (LC_SYMTAB, Chained Fixups, Exports Trie): https://lief-project.github.io/doc/stable/api/python/macho.html#symbol''' + return self.binary.symbols + + def getChainedFixups(self): + '''Return Chained Fixups information: https://lief-project.github.io/doc/latest/api/python/macho.html#chained-binding-info''' + return self.binary.dyld_chained_fixups + + def getExportTrie(self): + '''Return Export Trie information: https://lief-project.github.io/doc/latest/api/python/macho.html#dyldexportstrie-command''' + try: + return self.binary.dyld_exports_trie.show_export_trie() + except: + return "NO EXPORT TRIE" + + def getUUID(self): + '''Return UUID as string and in UUID format: https://lief-project.github.io/doc/stable/api/python/macho.html#uuidcommand''' + for cmd in self.binary.commands: + if isinstance(cmd, lief.MachO.UUIDCommand): + uuid_bytes = cmd.uuid + break + uuid_string = str(uuid.UUID(bytes=bytes(uuid_bytes))) + return uuid_string + + def getMain(self): + '''Determine the entry point of an executable.''' + return self.binary.main_command + + def getStringSection(self): + '''Return strings from the __cstring (string table).''' + extracted_strings = set() + for section in self.binary.sections: + if section.type == lief.MachO.SECTION_TYPES.CSTRING_LITERALS: + extracted_strings.update(section.content.tobytes().split(b'\x00')) + return extracted_strings + + def findAllStringsInBinary(self): + '''Check every binary section to find strings.''' + extracted_strings = "" + byte_set = set() + for section in self.binary.sections: + byte_set.update(section.content.tobytes().split(b'\x00')) + for byte_item in byte_set: + try: + decoded_string = byte_item.decode('utf-8') + extracted_strings += decoded_string + "\n" + except UnicodeDecodeError: + pass + return extracted_strings +### --- --- --- ### + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Mach-O files parser for binary analysis.") + ### --- I. MACH-O --- ### + parser.add_argument('-p', '--path', required=True, help="Path to the Mach-O file.") + parser.add_argument('--file_type', action='store_true', help="Print binary file type.") + parser.add_argument('--header_flags', action='store_true', help="Print binary header flags.") + parser.add_argument('--endian', action='store_true', help="Print binary endianess.") + parser.add_argument('--header', action='store_true', help="Print binary header.") + parser.add_argument('--load_commands', action='store_true', help="Print binary load commands names.") + parser.add_argument('--segments', action='store_true', help="Print binary segments in human friendly form.") + parser.add_argument('--sections', action='store_true', help="Print binary sections in human friendly form.") + parser.add_argument('--symbols', action='store_true', help="Print all binary symbols.") + parser.add_argument('--chained_fixups', action='store_true', help="Print Chained Fixups information.") + parser.add_argument('--exports_trie', action='store_true', help="Print Export Trie information.") + parser.add_argument('--uuid', action='store_true', help="Print UUID.") + parser.add_argument('--main', action='store_true', help="Print entry point and stack size.") + parser.add_argument('--strings_section', action='store_true', help="Print strings from __cstring section.") + parser.add_argument('--all_strings', action='store_true', help="Print strings from all sections.") + parser.add_argument('--save_strings', help="Parse all sections, detect strings and save them to a file.") + parser.add_argument('--info', action='store_true', default=False , help="Print header, load commands, segments, sections, symbols and strings.") + + args = parser.parse_args() + file_path = os.path.abspath(args.path) + + ### --- I. MACH-O --- ### + try: # Check if the file is in a valid Mach-O format + if os.path.exists(file_path): + binaries = lief.MachO.parse(file_path) + snake_instance = SnakeI(binaries) + else: + print(f'The file {file_path} does not exist.') + exit() + except Exception as e: # Exit if not + print(f"An error occurred: {e}") + exit() + + if args.file_type: # Print binary file type + print(f'File type: {snake_instance.getFileType()}') + + if args.header_flags: # Print binary header flags + header_flag_list = snake_instance.getHeaderFlags() + print("Header flags:", " ".join(header_flag.name for header_flag in header_flag_list)) + + if args.endian: # Print binary endianess + print(f'Endianess: {snake_instance.getEndianess()}') + + if args.header: # Print binary header + print(snake_instance.getBinaryHeader()) + + if args.load_commands: # Print binary load commands + load_commands_list = snake_instance.getLoadCommands() + print("Load Commands:", " ".join(load_command.command.name for load_command in load_commands_list)) + + if args.segments: # Print binary segments in human friendly form + for segment in snake_instance.getSegments(): + print(segment) + + if args.sections: # Print binary sections in human friendly form + for section in snake_instance.getSections(): + print(section) + + if args.symbols: # Print symbols + for symbol in snake_instance.getSymbols(): + print(symbol.name) + + if args.chained_fixups: # Print Chained Fixups information + print(snake_instance.getChainedFixups()) + + if args.exports_trie: # Print Exports Trie information + print(snake_instance.getExportTrie()) + + if args.uuid: # Print UUID + print(f'UUID: {snake_instance.getUUID()}') + + if args.main: # Print entry point and stack size + print(f'Entry point: {hex(snake_instance.getMain().entrypoint)}') + print(f'Stack size: {hex(snake_instance.getMain().stack_size)}') + + if args.strings_section: # Print strings from __cstring section + print('Strings from __cstring section:') + print('-------------------------------') + for string in (snake_instance.getStringSection()): + print(string) + + if args.all_strings: # Print strings from all sections. + print(snake_instance.findAllStringsInBinary()) + + if args.save_strings: # Parse all sections, detect strings and save them to a file + extracted_strings = snake_instance.findAllStringsInBinary() + with open(args.save_strings, 'a') as f: + for s in extracted_strings: + f.write(s) + + if args.info: # Print all info about the binary + print('\n<=== HEADER ===>') + print(snake_instance.getBinaryHeader()) + print('\n<=== LOAD COMMANDS ===>') + for lcd in snake_instance.getLoadCommands(): + print(lcd) + print("="*50) + print('\n<=== SEGMENTS ===>') + for segment in snake_instance.getSegments(): + print(segment) + print('\n<=== SECTIONS ===>') + for section in snake_instance.getSections(): + print(section) + print('\n<=== SYMBOLS ===>') + for symbol in snake_instance.getSymbols(): + print(symbol.name) + print('\n<=== STRINGS ===>') + print('Strings from __cstring section:') + print('-------------------------------') + for string in (snake_instance.getStringSection()): + print(string) + print('\n<=== UUID ===>') + print(f'{snake_instance.getUUID()}') + print('\n<=== ENDIANESS ===>') + print(snake_instance.getEndianess()) + print('\n<=== ENTRYPOINT ===>') + print(f'{hex(snake_instance.getMain().entrypoint)}') \ No newline at end of file diff --git a/I. Mach-O/python/MachOFileFinder.py b/I. Mach-O/python/MachOFileFinder.py new file mode 100755 index 0000000..3382a5d --- /dev/null +++ b/I. Mach-O/python/MachOFileFinder.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +import os +import lief +import sys +import argparse + +class MachOFileFinder: + '''Class for finding ARM64 Mach-O binaries in a given directory.''' + + def __init__(self, directory_path, recursive=False): + '''Constructor to initialize the directory path and recursive flag.''' + self.directory_path = directory_path + self.recursive = recursive + + def parse_fat_binary(self, binaries): + '''Function to parse Mach-O file, whether compiled for multiple architectures or just for a single one. + It returns the ARM64 binary if it exists. If not, it exits the program.''' + arm64_bin = None + for binary in binaries: + if binary.header.cpu_type == lief.MachO.CPU_TYPES.ARM64: + arm64_bin = binary + return arm64_bin + + def process_directory(self, root, files): + '''Method to process all files in the specified directory.''' + for file_name in files: + file_path = os.path.abspath(os.path.join(root, file_name)) + try: + binaries = lief.MachO.parse(file_path) + binary = self.parse_fat_binary(binaries) + if binary is not None: + print(f"{binary.header.file_type.name}:{file_path}") + except: + pass # Ignore parsing errors or non-Mach-O files + + def process_files(self): + '''Method to process files based on the specified search type.''' + for root, dirs, files in os.walk(self.directory_path): + self.process_directory(root, files) + + if not self.recursive: + break # Break the loop if not searching recursively + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Find ARM64 Mach-O binaries in a directory.') + parser.add_argument('path', metavar='PATH', type=str, help='the directory path to search for Mach-O binaries') + parser.add_argument('-r', '--recursive', action='store_true', help='search recursively (default: false)') + + args = parser.parse_args() + directory_path = args.path + + if not os.path.isdir(directory_path): + print(f"Error: {directory_path} is not a valid directory.") + sys.exit(1) + + macho_finder = MachOFileFinder(directory_path, recursive=args.recursive) + macho_finder.process_files() diff --git a/LICENSE b/LICENSE index 261eeb9..e72bfdd 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,674 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. - 1. Definitions. + Preamble - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. + The GNU General Public License is a free, copyleft license for +software and other kinds of works. - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. + The precise terms and conditions for copying, distribution and +modification follow. - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. + TERMS AND CONDITIONS - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. + 0. Definitions. - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: + "This License" refers to version 3 of the GNU General Public License. - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. + A "covered work" means either the unmodified Program or a work based +on the Program. - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. + 1. Source Code. - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. - END OF TERMS AND CONDITIONS + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. - APPENDIX: How to apply the Apache License to your work. + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. - Copyright [yyyy] [name of copyright owner] + The Corresponding Source for a work in source code form is that +same work. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at + 2. Basic Permissions. - http://www.apache.org/licenses/LICENSE-2.0 + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. \ No newline at end of file diff --git a/README.md b/README.md index a84589b..7c19f94 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,60 @@ # Snake_Apple The code repository for the Snake&Apple article series. + +## ARTICLES +![alt](img/Snake_Apple.jpg) +* ☑ [I. Mach-O](https://medium.com/p/a8eda4b87263) +* ☐ [II. Code Signing]() +* ☐ [III. Checksec]() +* ☐ [IV. Dylibs]() + +## TOOLS +![alt](img/CrimsonUroboros.jpg) +[CrimsonUroboros](I.%20Mach-O/python/CrimsonUroboros.py) - core program resulting from the Snake&Apple article series for binary analysis. You may find older versions of this script in each article directory in this repository. +* Usage +```console +usage: CrimsonUroboros.py [-h] -p PATH [--file_type] [--header_flags] [--endian] [--header] [--load_commands] [--segments] [--sections] [--symbols] [--chained_fixups] [--exports_trie] + [--uuid] [--main] [--strings_section] [--all_strings] [--save_strings SAVE_STRINGS] [--info] + +Mach-O files parser for binary analysis. + +options: + -h, --help show this help message and exit + -p PATH, --path PATH Path to the Mach-O file. + --file_type Print binary file type. + --header_flags Print binary header flags. + --endian Print binary endianess. + --header Print binary header. + --load_commands Print binary load commands names. + --segments Print binary segments in human friendly form. + --sections Print binary sections in human friendly form. + --symbols Print all binary symbols. + --chained_fixups Print Chained Fixups information. + --exports_trie Print Export Trie information. + --uuid Print UUID. + --main Print entry point and stack size. + --strings_section Print strings from __cstring section. + --all_strings Print strings from all sections. + --save_strings SAVE_STRINGS + Parse all sections, detect strings and save them to a file. + --info Print header, load commands, segments, sections, symbols and strings. +``` +* Example: +```bash +CrimsonUroboros.py -p PATH --info +``` +[MachOFileFinder](I.%20Mach-O/python/MachOFileFinder.py) - designed to find ARM64 Mach-O binaries within a specified directory and print their file type. +* Usage: +```bash +python MachOFileFinder.py PATH +``` +* Example: +```bash +python MachOFileFinder.py . -r 2>/dev/null +EXECUTE:/Users/karmaz95/t/pingsender +DYLIB:/Users/karmaz95/t/dylibs/use_dylib_app/customs/custom.dylib +BUNDLE:/Users/karmaz95/t/bundles/MyBundle +``` + +## WHY UROBOROS? +I will write the code for each article as a class SnakeX, where X will be the article number. To make it easier for the audience to follow. Each Snake class will be a child of the previous one and infinitely "eat itself" (inherit methods of the previous class), like Uroboros. diff --git a/img/CrimsonUroboros.jpg b/img/CrimsonUroboros.jpg new file mode 100644 index 0000000..cf4ac18 Binary files /dev/null and b/img/CrimsonUroboros.jpg differ diff --git a/img/Snake_Apple.jpg b/img/Snake_Apple.jpg new file mode 100644 index 0000000..0808e1f Binary files /dev/null and b/img/Snake_Apple.jpg differ