diff --git a/browser/safari/extract_storage.go b/browser/safari/extract_storage.go new file mode 100644 index 0000000..cd88316 --- /dev/null +++ b/browser/safari/extract_storage.go @@ -0,0 +1,344 @@ +package safari + +import ( + "database/sql" + "encoding/binary" + "fmt" + "os" + "path/filepath" + "unicode/utf16" + + _ "modernc.org/sqlite" + + "github.com/moond4rk/hackbrowserdata/log" + "github.com/moond4rk/hackbrowserdata/types" +) + +// Modern WebKit (Safari 17+) stores localStorage under a nested, partitioned layout rooted at +// either WebsiteDataStore//Origins (per named profile) or WebsiteData/Default +// (the pre-profile default store). Within that root: +// +// ///origin — binary; encodes top+frame origins +// ///LocalStorage/localstorage.sqlite3 +// +// top-hash == frame-hash ⇒ first-party; they differ for third-party partitioned storage. +// We report the frame origin because that's what window.localStorage exposes to JS. +// ItemTable: (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB NOT NULL ON CONFLICT FAIL); +// value BLOBs are UTF-16 LE strings. +// +// The flat "LocalStorage/__.localstorage" directory that older builds used +// is empty on current Safari and is no longer a supported source. + +const ( + webkitOriginFile = "origin" + webkitLocalStorageSubdir = "LocalStorage" + webkitLocalStorageDB = "localstorage.sqlite3" + webkitOriginSaltName = "salt" // HMAC salt sibling of the dirs; not a data dir + + maxLocalStorageValueLength = 2048 +) + +// origin file encoding-byte constants (WebCore SecurityOrigin serialization). +const ( + originEncASCII = 0x01 // Latin-1 / ASCII + originEncUTF16 = 0x00 // UTF-16 LE +) + +// Port marker values after the (scheme, host) pair in an origin block. +// 0x00 → port is the scheme default (stored as 0). +// 0x01 → next two bytes are a uint16_le port. +const ( + originPortDefaultMarker = 0x00 + originPortExplicitFlag = 0x01 +) + +func extractLocalStorage(root string) ([]types.StorageEntry, error) { + dirs, err := findOriginDataDirs(root) + if err != nil { + return nil, err + } + + var entries []types.StorageEntry + for _, od := range dirs { + origin, err := readOriginFile(filepath.Join(od, webkitOriginFile)) + if err != nil { + log.Debugf("safari localstorage: origin %s: %v", od, err) + continue + } + dbPath := filepath.Join(od, webkitLocalStorageSubdir, webkitLocalStorageDB) + items, err := readLocalStorageFile(dbPath) + if err != nil { + log.Debugf("safari localstorage: db %s: %v", dbPath, err) + continue + } + for _, it := range items { + entries = append(entries, types.StorageEntry{ + URL: origin, + Key: it.key, + Value: it.value, + }) + } + } + return entries, nil +} + +// countLocalStorage sums ItemTable row counts across every origin DB under root without +// parsing origin files or decoding values — CountEntries callers only need the total, not the +// URLs or plaintext. COUNT(key) naturally excludes NULL keys, matching the same skip rule +// applied by readLocalStorageFile, so count and extract stay in sync. +func countLocalStorage(root string) (int, error) { + dirs, err := findOriginDataDirs(root) + if err != nil { + return 0, err + } + total := 0 + for _, od := range dirs { + dbPath := filepath.Join(od, webkitLocalStorageSubdir, webkitLocalStorageDB) + n, err := countLocalStorageFile(dbPath) + if err != nil { + log.Debugf("safari localstorage: count %s: %v", dbPath, err) + continue + } + total += n + } + return total, nil +} + +func countLocalStorageFile(path string) (int, error) { + // mode=ro (no immutable) so SQLite replays the copied -wal sidecar — this surfaces entries + // Safari has committed to WAL but not yet checkpointed to the main DB. Writes SQLite might + // make to the temp-copy's -shm during replay are harmless; the Session cleanup removes + // everything. Live-file reads (profiles.go) still use immutable=1 to stay off the real WAL. + dsn := "file:" + path + "?mode=ro" + db, err := sql.Open("sqlite", dsn) + if err != nil { + return 0, fmt.Errorf("open %s: %w", path, err) + } + defer db.Close() + if err := db.Ping(); err != nil { + return 0, fmt.Errorf("ping %s: %w", path, err) + } + var count int + if err := db.QueryRow(`SELECT COUNT(key) FROM ItemTable`).Scan(&count); err != nil { + return 0, fmt.Errorf("count ItemTable: %w", err) + } + return count, nil +} + +// findOriginDataDirs returns /

/

/ paths that contain both an "origin" file and +// a "LocalStorage/localstorage.sqlite3" database. Non-directory entries, the "salt" sibling, +// and partition dirs without localStorage data are silently skipped. +func findOriginDataDirs(root string) ([]string, error) { + topEntries, err := os.ReadDir(root) + if err != nil { + return nil, fmt.Errorf("read origins root %s: %w", root, err) + } + var out []string + for _, top := range topEntries { + if !top.IsDir() || top.Name() == webkitOriginSaltName { + continue + } + topPath := filepath.Join(root, top.Name()) + frameEntries, err := os.ReadDir(topPath) + if err != nil { + continue + } + for _, frame := range frameEntries { + if !frame.IsDir() { + continue + } + framePath := filepath.Join(topPath, frame.Name()) + if _, err := os.Stat(filepath.Join(framePath, webkitOriginFile)); err != nil { + continue + } + dbPath := filepath.Join(framePath, webkitLocalStorageSubdir, webkitLocalStorageDB) + if _, err := os.Stat(dbPath); err != nil { + continue + } + out = append(out, framePath) + } + } + return out, nil +} + +// originEndpoint is one half of an origin file (top-frame or frame). Port 0 means the scheme +// default (443 for https, 80 for http) and is omitted from the URL rendering. +type originEndpoint struct { + scheme string + host string + port uint16 +} + +// readOriginFile parses WebKit's SecurityOrigin binary serialization and returns the frame +// origin URL (scheme://host[:port]). The file holds two origin blocks back-to-back: top-frame +// then frame. When the frame block is missing/unreadable we fall back to the top-frame so we +// can still attribute the data to *something* meaningful. +func readOriginFile(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", fmt.Errorf("read origin file %s: %w", path, err) + } + top, pos, terr := readOriginBlock(data, 0) + if terr != nil { + return "", fmt.Errorf("parse top-frame origin: %w", terr) + } + frame, _, ferr := readOriginBlock(data, pos) + if ferr != nil { + // Partitioned info unavailable — attribute to the top-frame origin. + frame = top + } + if frame.scheme == "" || frame.host == "" { + return "", fmt.Errorf("origin file missing scheme/host") + } + return formatOriginURL(frame), nil +} + +// readOriginBlock reads one origin block: scheme record, host record, port marker. +// Returns the parsed endpoint and the byte offset immediately after the block. +func readOriginBlock(data []byte, pos int) (originEndpoint, int, error) { + var ep originEndpoint + var err error + ep.scheme, pos, err = readOriginString(data, pos) + if err != nil { + return ep, pos, err + } + ep.host, pos, err = readOriginString(data, pos) + if err != nil { + return ep, pos, err + } + if pos >= len(data) { + return ep, pos, fmt.Errorf("unexpected EOF before port marker") + } + marker := data[pos] + pos++ + switch marker { + case originPortDefaultMarker: + ep.port = 0 + case originPortExplicitFlag: + if pos+2 > len(data) { + return ep, pos, fmt.Errorf("truncated port value at offset %d", pos) + } + ep.port = binary.LittleEndian.Uint16(data[pos : pos+2]) + pos += 2 + default: + return ep, pos, fmt.Errorf("unexpected port marker 0x%02x at offset %d", marker, pos-1) + } + return ep, pos, nil +} + +// readOriginString consumes one length-prefixed record (uint32_le length + encoding byte + data). +func readOriginString(data []byte, pos int) (string, int, error) { + if pos+5 > len(data) { + return "", pos, fmt.Errorf("truncated string record at offset %d", pos) + } + length := int(binary.LittleEndian.Uint32(data[pos : pos+4])) + enc := data[pos+4] + pos += 5 + if length < 0 || pos+length > len(data) { + return "", pos, fmt.Errorf("string record overruns buffer: length %d at offset %d", length, pos-5) + } + chunk := data[pos : pos+length] + pos += length + switch enc { + case originEncASCII: + return decodeLatin1(chunk), pos, nil + case originEncUTF16: + return decodeUTF16LE(chunk), pos, nil + default: + return decodeLatin1(chunk), pos, nil + } +} + +// decodeLatin1 converts ISO-8859-1 bytes to a valid UTF-8 Go string. Latin-1 byte values map +// 1:1 to Unicode code points U+0000–U+00FF. Mirrors the helper in chromium/extract_storage.go. +func decodeLatin1(b []byte) string { + runes := make([]rune, len(b)) + for i, c := range b { + runes[i] = rune(c) + } + return string(runes) +} + +func formatOriginURL(ep originEndpoint) string { + url := ep.scheme + "://" + ep.host + if ep.port != 0 { + url += fmt.Sprintf(":%d", ep.port) + } + return url +} + +type localStorageItem struct { + key string + value string +} + +func readLocalStorageFile(path string) ([]localStorageItem, error) { + // mode=ro (no immutable) — see countLocalStorageFile for the WAL-replay rationale; the same + // live-vs-temp split applies here. ORDER BY key, rowid makes exports byte-for-byte stable + // across runs and SQLite versions. + dsn := "file:" + path + "?mode=ro" + db, err := sql.Open("sqlite", dsn) + if err != nil { + return nil, fmt.Errorf("open %s: %w", path, err) + } + defer db.Close() + if err := db.Ping(); err != nil { + return nil, fmt.Errorf("ping %s: %w", path, err) + } + + rows, err := db.Query(`SELECT key, value FROM ItemTable ORDER BY key, rowid`) + if err != nil { + return nil, fmt.Errorf("query ItemTable: %w", err) + } + defer rows.Close() + + var items []localStorageItem + for rows.Next() { + var key sql.NullString + var value []byte + if err := rows.Scan(&key, &value); err != nil { + log.Debugf("safari localstorage: scan row in %s: %v", path, err) + continue + } + if !key.Valid { + // NULL keys would collide with legitimate empty-string keys in the output and are + // not meaningful localStorage entries. The UNIQUE constraint in ItemTable still + // permits multiple NULL rows in SQLite, so we filter them here. + log.Debugf("safari localstorage: skip row with NULL key in %s", path) + continue + } + items = append(items, localStorageItem{ + key: key.String, + value: decodeLocalStorageValue(value), + }) + } + return items, rows.Err() +} + +// decodeLocalStorageValue treats the BLOB as UTF-16 LE. Values at or above the cap are replaced +// with a size marker to keep JSON/CSV output bounded, matching chromium/extract_storage.go. +func decodeLocalStorageValue(b []byte) string { + if len(b) >= maxLocalStorageValueLength { + return fmt.Sprintf( + "value is too long, length is %d, supported max length is %d", + len(b), maxLocalStorageValueLength, + ) + } + return decodeUTF16LE(b) +} + +// decodeUTF16LE returns the input as a Go string on odd-length (malformed) inputs; WebKit values +// are always even-length in practice but we don't want a stray byte to drop a whole row. +func decodeUTF16LE(b []byte) string { + if len(b) == 0 { + return "" + } + if len(b)%2 != 0 { + return string(b) + } + u16 := make([]uint16, len(b)/2) + for i := range u16 { + u16[i] = binary.LittleEndian.Uint16(b[i*2:]) + } + return string(utf16.Decode(u16)) +} diff --git a/browser/safari/extract_storage_test.go b/browser/safari/extract_storage_test.go new file mode 100644 index 0000000..9c979e6 --- /dev/null +++ b/browser/safari/extract_storage_test.go @@ -0,0 +1,335 @@ +package safari + +import ( + "database/sql" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + _ "modernc.org/sqlite" +) + +// --------------------------------------------------------------------------- +// readOriginBlock / readOriginFile +// --------------------------------------------------------------------------- + +func TestReadOriginBlock_FirstParty(t *testing.T) { + data := encodeOriginFile("https://example.com", "https://example.com") + top, pos, err := readOriginBlock(data, 0) + require.NoError(t, err) + assert.Equal(t, "https", top.scheme) + assert.Equal(t, "example.com", top.host) + assert.Equal(t, uint16(0), top.port, "port 0 ⇒ scheme default") + + frame, _, err := readOriginBlock(data, pos) + require.NoError(t, err) + assert.Equal(t, "https://example.com", formatOriginURL(frame)) +} + +func TestReadOriginBlock_NonDefaultPort(t *testing.T) { + data := encodeOriginFile("https://example.com:8443", "https://example.com:8443") + top, _, err := readOriginBlock(data, 0) + require.NoError(t, err) + assert.Equal(t, uint16(8443), top.port) + assert.Equal(t, "https://example.com:8443", formatOriginURL(top)) +} + +func TestReadOriginBlock_Latin1HighByte(t *testing.T) { + // WebKit stores scheme/host records with encoding byte 0x01 = Latin-1. Verify high-byte + // bytes decode as Latin-1 (é = 0xE9) rather than being passed through as invalid UTF-8. + data := []byte{ + 0x04, 0x00, 0x00, 0x00, 0x01, 'h', 't', 't', 'p', // scheme "http" + 0x04, 0x00, 0x00, 0x00, 0x01, 'c', 'a', 'f', 0xe9, // host "café" (Latin-1) + 0x00, // port default + } + ep, _, err := readOriginBlock(data, 0) + require.NoError(t, err) + assert.Equal(t, "http", ep.scheme) + assert.Equal(t, "café", ep.host) +} + +func TestDecodeLatin1(t *testing.T) { + assert.Equal(t, "café", decodeLatin1([]byte{'c', 'a', 'f', 0xe9})) + assert.Equal(t, "hello", decodeLatin1([]byte("hello"))) + assert.Empty(t, decodeLatin1(nil)) +} + +func TestReadOriginFile_FramePreferred(t *testing.T) { + dir := t.TempDir() + originPath := filepath.Join(dir, "origin") + require.NoError(t, os.WriteFile(originPath, + encodeOriginFile("https://top.example.com", "https://iframe.example.com"), 0o644)) + + got, err := readOriginFile(originPath) + require.NoError(t, err) + assert.Equal(t, "https://iframe.example.com", got) +} + +func TestReadOriginFile_FallbackToTop(t *testing.T) { + // Write only the top-frame block — no frame follows. Extractor should still succeed by + // falling back to the top-frame origin. + var buf []byte + buf = appendOriginBlock(buf, "https://example.com") + originPath := filepath.Join(t.TempDir(), "origin") + require.NoError(t, os.WriteFile(originPath, buf, 0o644)) + + got, err := readOriginFile(originPath) + require.NoError(t, err) + assert.Equal(t, "https://example.com", got) +} + +func TestReadOriginFile_Malformed(t *testing.T) { + originPath := filepath.Join(t.TempDir(), "origin") + require.NoError(t, os.WriteFile(originPath, []byte{0x01, 0x02}, 0o644)) + + _, err := readOriginFile(originPath) + require.Error(t, err) +} + +// --------------------------------------------------------------------------- +// decodeUTF16LE / decodeLocalStorageValue +// --------------------------------------------------------------------------- + +func TestDecodeUTF16LE(t *testing.T) { + t.Run("ascii", func(t *testing.T) { + assert.Equal(t, "hello", decodeUTF16LE(encodeUTF16LE("hello"))) + }) + t.Run("cjk", func(t *testing.T) { + assert.Equal(t, "你好世界", decodeUTF16LE(encodeUTF16LE("你好世界"))) + }) + t.Run("mixed", func(t *testing.T) { + assert.Equal(t, "hello 世界 🌍", decodeUTF16LE(encodeUTF16LE("hello 世界 🌍"))) + }) + t.Run("empty", func(t *testing.T) { + assert.Empty(t, decodeUTF16LE(nil)) + assert.Empty(t, decodeUTF16LE([]byte{})) + }) + t.Run("odd length falls back to raw string", func(t *testing.T) { + assert.Equal(t, "abc", decodeUTF16LE([]byte("abc"))) + }) +} + +func TestDecodeLocalStorageValue_Truncates(t *testing.T) { + // 1100 chars × 2 bytes = 2200 bytes, over the 2048 cap. + oversized := encodeUTF16LE(strings.Repeat("x", 1100)) + got := decodeLocalStorageValue(oversized) + assert.Contains(t, got, "too long") + assert.Contains(t, got, "2048") +} + +// --------------------------------------------------------------------------- +// extractLocalStorage — end-to-end over real nested layout fixtures +// --------------------------------------------------------------------------- + +func TestExtractLocalStorage_SingleOrigin(t *testing.T) { + root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{ + "https://example.com": {{Key: "auth_token", Value: "abc123"}}, + }) + entries, err := extractLocalStorage(root) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, "https://example.com", entries[0].URL) + assert.Equal(t, "auth_token", entries[0].Key) + assert.Equal(t, "abc123", entries[0].Value) + assert.False(t, entries[0].IsMeta) +} + +func TestExtractLocalStorage_MultiOrigin(t *testing.T) { + root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{ + "https://github.com": { + {Key: "theme", Value: "dark"}, + {Key: "lang", Value: "en"}, + }, + "https://example.com:8443": { + {Key: "session", Value: "xyz"}, + }, + }) + entries, err := extractLocalStorage(root) + require.NoError(t, err) + require.Len(t, entries, 3) + + byURL := make(map[string][]string) + for _, e := range entries { + byURL[e.URL] = append(byURL[e.URL], e.Key+"="+e.Value) + } + assert.ElementsMatch(t, []string{"theme=dark", "lang=en"}, byURL["https://github.com"]) + assert.ElementsMatch(t, []string{"session=xyz"}, byURL["https://example.com:8443"]) +} + +func TestExtractLocalStorage_CJKAndEmoji(t *testing.T) { + root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{ + "https://example.com": { + {Key: "名字", Value: "张三"}, + {Key: "status", Value: "hello 世界 🌍"}, + }, + }) + entries, err := extractLocalStorage(root) + require.NoError(t, err) + require.Len(t, entries, 2) + + values := make(map[string]string) + for _, e := range entries { + values[e.Key] = e.Value + } + assert.Equal(t, "张三", values["名字"]) + assert.Equal(t, "hello 世界 🌍", values["status"]) +} + +func TestExtractLocalStorage_EmptyItemTable(t *testing.T) { + root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{ + "https://example.com": nil, + }) + entries, err := extractLocalStorage(root) + require.NoError(t, err) + assert.Empty(t, entries) +} + +func TestExtractLocalStorage_TruncatesOversizedValue(t *testing.T) { + root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{ + "https://example.com": {{Key: "big", Value: strings.Repeat("x", 1100)}}, + }) + entries, err := extractLocalStorage(root) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Contains(t, entries[0].Value, "too long") +} + +func TestExtractLocalStorage_Partitioned(t *testing.T) { + // Manually construct a partitioned third-party entry: YouTube iframe inside Google top-frame. + root := filepath.Join(t.TempDir(), "Origins") + require.NoError(t, os.MkdirAll(root, 0o755)) + writeTestOriginStore(t, root, "topHash", "frameHash", + "https://accounts.google.com", "https://accounts.youtube.com", + []testLocalStorageItem{{Key: "yt-session", Value: "embedded"}}, + ) + + entries, err := extractLocalStorage(root) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, "https://accounts.youtube.com", entries[0].URL, "frame origin preferred over top-frame") +} + +func TestExtractLocalStorage_SkipsSaltAndStrayFiles(t *testing.T) { + root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{ + "https://example.com": {{Key: "a", Value: "1"}}, + }) + // Drop a "salt" sibling that must not be traversed, plus a stray file at root. + require.NoError(t, os.WriteFile(filepath.Join(root, "salt"), []byte("pretend salt"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(root, "README"), []byte("noise"), 0o644)) + + entries, err := extractLocalStorage(root) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, "https://example.com", entries[0].URL) +} + +func TestExtractLocalStorage_SkipsFrameDirsWithoutDB(t *testing.T) { + // Partition dirs that only have "origin" but no LocalStorage/ subdir must not error out — + // real Safari has plenty of these (cookies-only partitions). + root := filepath.Join(t.TempDir(), "Origins") + frameDir := filepath.Join(root, "topHash", "frameHash") + require.NoError(t, os.MkdirAll(frameDir, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(frameDir, "origin"), + encodeOriginFile("https://example.com", "https://example.com"), 0o644)) + + entries, err := extractLocalStorage(root) + require.NoError(t, err) + assert.Empty(t, entries) +} + +func TestExtractLocalStorage_DirMissing(t *testing.T) { + _, err := extractLocalStorage(filepath.Join(t.TempDir(), "does-not-exist")) + require.Error(t, err) +} + +func TestExtractLocalStorage_EmptyRoot(t *testing.T) { + entries, err := extractLocalStorage(t.TempDir()) + require.NoError(t, err) + assert.Empty(t, entries) +} + +// --------------------------------------------------------------------------- +// countLocalStorage +// --------------------------------------------------------------------------- + +func TestCountLocalStorage(t *testing.T) { + root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{ + "https://a.com": {{Key: "k1", Value: "v1"}, {Key: "k2", Value: "v2"}}, + "https://b.com": {{Key: "k3", Value: "v3"}}, + "https://c.com:8443": {{Key: "k4", Value: "v4"}}, + }) + count, err := countLocalStorage(root) + require.NoError(t, err) + assert.Equal(t, 4, count) +} + +func TestCountLocalStorage_DirMissing(t *testing.T) { + count, err := countLocalStorage(filepath.Join(t.TempDir(), "nope")) + require.Error(t, err) + assert.Equal(t, 0, count) +} + +// --------------------------------------------------------------------------- +// NULL-key handling — readLocalStorageFile / countLocalStorageFile both skip NULL keys, +// keeping count and extract in sync. +// --------------------------------------------------------------------------- + +func TestReadLocalStorageFile_SkipsNullKey(t *testing.T) { + dbPath := filepath.Join(t.TempDir(), "ls.sqlite3") + writeLocalStorageDB(t, dbPath, []testLocalStorageItem{ + {Key: "real", Value: "keeper"}, + }, true /*addNullKey*/) + + items, err := readLocalStorageFile(dbPath) + require.NoError(t, err) + require.Len(t, items, 1) + assert.Equal(t, "real", items[0].key) + assert.Equal(t, "keeper", items[0].value) +} + +func TestCountLocalStorageFile_SkipsNullKey(t *testing.T) { + dbPath := filepath.Join(t.TempDir(), "ls.sqlite3") + writeLocalStorageDB(t, dbPath, []testLocalStorageItem{ + {Key: "k1", Value: "v1"}, + {Key: "k2", Value: "v2"}, + }, true /*addNullKey*/) + + count, err := countLocalStorageFile(dbPath) + require.NoError(t, err) + assert.Equal(t, 2, count, "NULL keys are excluded from count to match extract's skip rule") +} + +func TestReadLocalStorageFile_ReturnsRowsInKeyOrder(t *testing.T) { + // Rows are inserted in reverse alphabetical order; ORDER BY key, rowid in the extractor + // query must surface them ascending so exports are deterministic across runs. + dbPath := filepath.Join(t.TempDir(), "ls.sqlite3") + writeLocalStorageDB(t, dbPath, []testLocalStorageItem{ + {Key: "zebra", Value: "z"}, + {Key: "mango", Value: "m"}, + {Key: "apple", Value: "a"}, + }, false /*addNullKey*/) + + items, err := readLocalStorageFile(dbPath) + require.NoError(t, err) + require.Len(t, items, 3) + assert.Equal(t, "apple", items[0].key) + assert.Equal(t, "mango", items[1].key) + assert.Equal(t, "zebra", items[2].key) +} + +func TestCountLocalStorageFile_MissingTable(t *testing.T) { + // Real Safari has origin dirs with LocalStorage/localstorage.sqlite3 but no ItemTable yet + // (seen during live verification). countLocalStorageFile must surface the error so the + // caller can log-and-skip rather than counting 0 silently. + dbPath := filepath.Join(t.TempDir(), "empty.sqlite3") + db, err := sql.Open("sqlite", dbPath) + require.NoError(t, err) + require.NoError(t, db.Close()) + + _, err = countLocalStorageFile(dbPath) + require.Error(t, err) +} diff --git a/browser/safari/safari.go b/browser/safari/safari.go index bf9e9e6..5904c13 100644 --- a/browser/safari/safari.go +++ b/browser/safari/safari.go @@ -136,6 +136,8 @@ func (b *Browser) extractCategory(data *types.BrowserData, cat types.Category, p data.Bookmarks, err = extractBookmarks(path) case types.Download: data.Downloads, err = extractDownloads(path, b.profile.downloadOwnerUUID()) + case types.LocalStorage: + data.LocalStorage, err = extractLocalStorage(path) default: return } @@ -158,6 +160,8 @@ func (b *Browser) countCategory(cat types.Category, path string) int { count, err = countBookmarks(path) case types.Download: count, err = countDownloads(path, b.profile.downloadOwnerUUID()) + case types.LocalStorage: + count, err = countLocalStorage(path) default: // Unsupported categories silently return 0. } diff --git a/browser/safari/safari_test.go b/browser/safari/safari_test.go index 1212018..6cbb5cd 100644 --- a/browser/safari/safari_test.go +++ b/browser/safari/safari_test.go @@ -3,6 +3,7 @@ package safari import ( "os" "path/filepath" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -78,6 +79,7 @@ func TestNewBrowsers(t *testing.T) { func TestNewBrowsers_MultiProfile(t *testing.T) { const uuid = "5604E6F5-02ED-4E40-8249-63DE7BC986C8" + uuidLower := strings.ToLower(uuid) // Build a pretend ~/Library that mirrors a macOS 14+ layout. library := t.TempDir() @@ -91,6 +93,11 @@ func TestNewBrowsers_MultiProfile(t *testing.T) { // Named profile data under the container. mkFile(t, container, "Safari", "Profiles", uuid, "History.db") + // Named profile's Origins directory (Safari 17+ nested localStorage root) — must exist + // for resolveSourcePaths to register it. + namedOriginsDir := filepath.Join(container, "WebKit", "WebsiteDataStore", uuidLower, "Origins") + require.NoError(t, os.MkdirAll(namedOriginsDir, 0o755)) + // SafariTabs.db registering the named profile with a human-readable title. writeSafariTabsDB(t, filepath.Join(container, safariTabsDBRelPath), []tabRow{ {uuid: "DefaultProfile", title: ""}, @@ -112,12 +119,18 @@ func TestNewBrowsers_MultiProfile(t *testing.T) { assert.Equal(t, legacyHome, b.ProfileDir()) assert.Contains(t, b.sourcePaths, types.History) assert.Equal(t, filepath.Join(legacyHome, "History.db"), b.sourcePaths[types.History].absPath) + // Default profile's LocalStorage root (WebsiteData/Default) isn't created in this fixture, + // so it won't resolve — which is the point: resolveSourcePaths only registers paths that exist. + assert.NotContains(t, b.sourcePaths, types.LocalStorage) case "work": assert.Equal(t, filepath.Join(container, "Safari", "Profiles", uuid), b.ProfileDir()) assert.Contains(t, b.sourcePaths, types.History) assert.Equal(t, filepath.Join(container, "Safari", "Profiles", uuid, "History.db"), b.sourcePaths[types.History].absPath) + require.Contains(t, b.sourcePaths, types.LocalStorage) + assert.Equal(t, namedOriginsDir, b.sourcePaths[types.LocalStorage].absPath) + assert.True(t, b.sourcePaths[types.LocalStorage].isDir) } } } @@ -216,6 +229,15 @@ func TestCountCategory(t *testing.T) { assert.Equal(t, 1, b.countCategory(types.Download, path)) }) + t.Run("LocalStorage", func(t *testing.T) { + dir := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{ + "https://example.com": {{Key: "k1", Value: "v1"}, {Key: "k2", Value: "v2"}}, + "https://go.dev": {{Key: "theme", Value: "dark"}}, + }) + b := &Browser{} + assert.Equal(t, 3, b.countCategory(types.LocalStorage, dir)) + }) + t.Run("UnsupportedCategory", func(t *testing.T) { b := &Browser{} assert.Equal(t, 0, b.countCategory(types.CreditCard, "unused")) @@ -291,6 +313,20 @@ func TestExtractCategory(t *testing.T) { assert.Equal(t, int64(1024), data.Downloads[0].TotalBytes) }) + t.Run("LocalStorage", func(t *testing.T) { + dir := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{ + "https://github.com": {{Key: "theme", Value: "dark"}}, + }) + b := &Browser{} + data := &types.BrowserData{} + b.extractCategory(data, types.LocalStorage, dir) + + require.Len(t, data.LocalStorage, 1) + assert.Equal(t, "https://github.com", data.LocalStorage[0].URL) + assert.Equal(t, "theme", data.LocalStorage[0].Key) + assert.Equal(t, "dark", data.LocalStorage[0].Value) + }) + t.Run("UnsupportedCategory", func(t *testing.T) { b := &Browser{} data := &types.BrowserData{} diff --git a/browser/safari/source.go b/browser/safari/source.go index e33b2f7..d23cc5b 100644 --- a/browser/safari/source.go +++ b/browser/safari/source.go @@ -12,6 +12,7 @@ type sourcePath struct { } func file(abs string) sourcePath { return sourcePath{abs: abs} } +func dir(abs string) sourcePath { return sourcePath{abs: abs, isDir: true} } // buildSources dispatches between the default and named-profile path layouts. // @@ -28,33 +29,37 @@ func buildSources(p profileContext) map[types.Category][]sourcePath { } // defaultSources: cookies try macOS 14+ container first, then the ≤13 legacy path. +// LocalStorage for the default profile lives under WebsiteData/Default — the pre-profile-era +// WebKit store that stays readable even after profiles are introduced. func defaultSources(p profileContext) map[types.Category][]sourcePath { home := p.legacyHome containerCookies := filepath.Join(p.container, "Cookies", "Cookies.binarycookies") legacyCookies := filepath.Join(filepath.Dir(home), "Cookies", "Cookies.binarycookies") + defaultLocalStorage := filepath.Join(p.container, "WebKit", "WebsiteData", "Default") return map[types.Category][]sourcePath{ - types.History: {file(filepath.Join(home, "History.db"))}, - types.Cookie: {file(containerCookies), file(legacyCookies)}, - types.Bookmark: {file(filepath.Join(home, "Bookmarks.plist"))}, - types.Download: {file(filepath.Join(home, "Downloads.plist"))}, + types.History: {file(filepath.Join(home, "History.db"))}, + types.Cookie: {file(containerCookies), file(legacyCookies)}, + types.Bookmark: {file(filepath.Join(home, "Bookmarks.plist"))}, + types.Download: {file(filepath.Join(home, "Downloads.plist"))}, + types.LocalStorage: {dir(defaultLocalStorage)}, } } // namedSources omits Bookmark (shared plist with no per-entry profile tag, so attributed to default). // Download is included because Downloads.plist carries DownloadEntryProfileUUIDStringKey per entry; // extractDownloads filters by owner UUID so default and named profiles each see their own downloads. -// -// LocalStorage slot for a follow-up PR: -// -// file(filepath.Join(p.container, "WebKit/WebsiteDataStore", p.uuidLower, "LocalStorage")) +// LocalStorage lives under WebKit/WebsiteDataStore//Origins — Safari 17+ uses a nested +// //LocalStorage/localstorage.sqlite3 layout; the flat +// WebsiteDataStore//LocalStorage directory from older builds is empty on modern Safari. func namedSources(p profileContext) map[types.Category][]sourcePath { profileDir := filepath.Join(p.container, "Safari", "Profiles", p.uuidUpper) webkitStore := filepath.Join(p.container, "WebKit", "WebsiteDataStore", p.uuidLower) return map[types.Category][]sourcePath{ - types.History: {file(filepath.Join(profileDir, "History.db"))}, - types.Cookie: {file(filepath.Join(webkitStore, "Cookies", "Cookies.binarycookies"))}, - types.Download: {file(filepath.Join(p.legacyHome, "Downloads.plist"))}, + types.History: {file(filepath.Join(profileDir, "History.db"))}, + types.Cookie: {file(filepath.Join(webkitStore, "Cookies", "Cookies.binarycookies"))}, + types.Download: {file(filepath.Join(p.legacyHome, "Downloads.plist"))}, + types.LocalStorage: {dir(filepath.Join(webkitStore, "Origins"))}, } } diff --git a/browser/safari/testutil_test.go b/browser/safari/testutil_test.go index 79fea7a..dbd0d9a 100644 --- a/browser/safari/testutil_test.go +++ b/browser/safari/testutil_test.go @@ -2,10 +2,14 @@ package safari import ( "database/sql" + "encoding/binary" "fmt" "os" "path/filepath" + "strconv" + "strings" "testing" + "unicode/utf16" "github.com/stretchr/testify/require" _ "modernc.org/sqlite" @@ -119,3 +123,148 @@ func writeSafariTabsDB(t *testing.T, path string, rows []tabRow) { require.NoError(t, err) } } + +// --------------------------------------------------------------------------- +// LocalStorage fixtures — modern WebKit nested Origins layout +// --------------------------------------------------------------------------- + +// testLocalStorageItem is one key/value pair written to an ItemTable row. +// Value is encoded as UTF-16 LE, matching WebKit's on-disk format. +type testLocalStorageItem struct { + Key, Value string +} + +// buildTestLocalStorageDir creates a root dir that mirrors Safari 17+'s nested +// localStorage layout (/

/

/origin + LocalStorage/localstorage.sqlite3) +// for each origin URL passed in. Origins are written as first-party (top == frame); +// for partitioned-origin coverage, use buildTestPartitionedLocalStorage. +func buildTestLocalStorageDir(t *testing.T, origins map[string][]testLocalStorageItem) string { + t.Helper() + root := filepath.Join(t.TempDir(), "Origins") + require.NoError(t, os.MkdirAll(root, 0o755)) + + i := 0 + for origin, items := range origins { + hash := fmt.Sprintf("h%02d", i) + i++ + writeTestOriginStore(t, root, hash, hash, origin, origin, items) + } + return root +} + +// writeTestOriginStore writes one /// tree with the given +// origins encoded into the binary origin file and items inserted into localstorage.sqlite3. +func writeTestOriginStore(t *testing.T, root, topHash, frameHash, topOrigin, frameOrigin string, items []testLocalStorageItem) { + t.Helper() + frameDir := filepath.Join(root, topHash, frameHash) + require.NoError(t, os.MkdirAll(filepath.Join(frameDir, webkitLocalStorageSubdir), 0o755)) + + require.NoError(t, os.WriteFile( + filepath.Join(frameDir, webkitOriginFile), + encodeOriginFile(topOrigin, frameOrigin), + 0o644, + )) + + dbPath := filepath.Join(frameDir, webkitLocalStorageSubdir, webkitLocalStorageDB) + db, err := sql.Open("sqlite", dbPath) + require.NoError(t, err) + _, err = db.Exec(`CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB NOT NULL ON CONFLICT FAIL)`) + require.NoError(t, err) + for _, item := range items { + _, err = db.Exec( + `INSERT INTO ItemTable (key, value) VALUES (?, ?)`, + item.Key, encodeUTF16LE(item.Value), + ) + require.NoError(t, err) + } + require.NoError(t, db.Close()) +} + +// encodeOriginFile mirrors WebKit's SecurityOrigin binary serialization. Layout per origin +// block: length-prefixed scheme record, length-prefixed host record, then a port marker +// (0x00 for the scheme default, or 0x01 + uint16_le port). Two blocks back-to-back: top-frame +// then frame. +func encodeOriginFile(topOrigin, frameOrigin string) []byte { + var buf []byte + buf = appendOriginBlock(buf, topOrigin) + buf = appendOriginBlock(buf, frameOrigin) + return buf +} + +func appendOriginBlock(buf []byte, originURL string) []byte { + scheme, host, port := splitTestOriginURL(originURL) + buf = appendOriginRecord(buf, scheme) + buf = appendOriginRecord(buf, host) + if port == 0 { + buf = append(buf, originPortDefaultMarker) + return buf + } + buf = append(buf, originPortExplicitFlag) + portBytes := make([]byte, 2) + binary.LittleEndian.PutUint16(portBytes, port) + return append(buf, portBytes...) +} + +func appendOriginRecord(buf []byte, s string) []byte { + lenBytes := make([]byte, 4) + binary.LittleEndian.PutUint32(lenBytes, uint32(len(s))) + buf = append(buf, lenBytes...) + buf = append(buf, originEncASCII) + return append(buf, []byte(s)...) +} + +// splitTestOriginURL parses "https://example.com[:port]" into (scheme, host, port). +// Port 0 means the URL had no explicit port (use scheme default). +func splitTestOriginURL(u string) (scheme, host string, port uint16) { + idx := strings.Index(u, "://") + if idx < 0 { + return "", u, 0 + } + scheme = u[:idx] + rest := u[idx+3:] + if colon := strings.LastIndex(rest, ":"); colon >= 0 { + if p, err := strconv.ParseUint(rest[colon+1:], 10, 16); err == nil { + return scheme, rest[:colon], uint16(p) + } + } + return scheme, rest, 0 +} + +// writeLocalStorageDB creates a minimal localstorage.sqlite3 at path with ItemTable populated +// from items. When addNullKey is true, a NULL-key row is inserted first to exercise the +// skip-NULL-key logic in readLocalStorageFile / countLocalStorageFile. This is a direct-DB +// variant of buildTestLocalStorageDir — use it when the test targets one DB, not the full +// Origins nesting. +func writeLocalStorageDB(t *testing.T, path string, items []testLocalStorageItem, addNullKey bool) { + t.Helper() + db, err := sql.Open("sqlite", path) + require.NoError(t, err) + _, err = db.Exec(`CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB NOT NULL ON CONFLICT FAIL)`) + require.NoError(t, err) + if addNullKey { + _, err = db.Exec( + `INSERT INTO ItemTable (key, value) VALUES (NULL, ?)`, + encodeUTF16LE("null-key-sentinel"), + ) + require.NoError(t, err) + } + for _, item := range items { + _, err = db.Exec( + `INSERT INTO ItemTable (key, value) VALUES (?, ?)`, + item.Key, encodeUTF16LE(item.Value), + ) + require.NoError(t, err) + } + require.NoError(t, db.Close()) +} + +// encodeUTF16LE is the inverse of extract_storage.go's decodeUTF16LE — it mirrors +// the WebKit encoding so test fixtures round-trip through the extractor. +func encodeUTF16LE(s string) []byte { + u16 := utf16.Encode([]rune(s)) + buf := make([]byte, 2*len(u16)) + for i, r := range u16 { + binary.LittleEndian.PutUint16(buf[i*2:], r) + } + return buf +} diff --git a/rfcs/011-safari-data-storage.md b/rfcs/011-safari-data-storage.md new file mode 100644 index 0000000..f002abe --- /dev/null +++ b/rfcs/011-safari-data-storage.md @@ -0,0 +1,273 @@ +# RFC-011: Safari Data Storage + +**Author**: moonD4rk +**Status**: Living Document +**Created**: 2026-04-21 + +## 1. Overview + +Safari is **macOS-only** and sandboxed under App Sandbox. Most of Safari's user data lives inside `~/Library/Containers/com.apple.Safari/Data/Library/` (the container root) and requires **Full Disk Access (TCC)** for third-party processes to read. A few legacy files still reside at `~/Library/Safari/` for backwards compatibility. + +Unlike Chromium and Firefox, Safari does **not** encrypt bookmarks, history, cookies, downloads, or localStorage — all are stored in plaintext on disk. Passwords are the only encrypted category and are delegated to the macOS login Keychain (see [RFC-006](006-key-retrieval-mechanisms.md) §7). + +Safari 17 (September 2023) introduced **multi-profile support**. Profile discovery therefore has two layers: a synthetic "default" profile mapped to the pre-profile legacy paths, plus one or more named profiles enumerated from `SafariTabs.db`. + +## 2. Profile Structure + +Each `profileContext` (in `browser/safari/profiles.go`) tracks five fields: + +| Field | Meaning | +|-------|---------| +| `name` | Human-readable profile name, disambiguated for duplicates | +| `uuidUpper` | UUID in uppercase (used by `Safari/Profiles//` directories) | +| `uuidLower` | UUID in lowercase (used by `WebKit/WebsiteDataStore//` directories) | +| `legacyHome` | `~/Library/Safari` | +| `container` | `~/Library/Containers/com.apple.Safari/Data/Library` | + +Empty `uuidUpper` marks the synthetic default profile. + +### 2.1 Profile Discovery + +The default profile is always emitted first. Named profiles come from `SafariTabs.db`: + +```sql +SELECT external_uuid, title FROM bookmarks +WHERE subtype = 2 AND external_uuid != 'DefaultProfile' +``` + +`DefaultProfile` is Safari's sentinel string for the pre-profile era; it is filtered out because it is already represented by the synthetic default. + +If the DB cannot be opened (missing, permission-denied), Safari falls back to scanning `Safari/Profiles/` for any directory whose name is a canonical 8-4-4-4-12 UUID and synthesizing the name as `profile-`. This makes profile discovery robust even when TCC blocks the SQL read. + +Duplicate display names are disambiguated with `-2`, `-3`, … suffixes, deterministically by discovery order. + +### 2.2 UUID Case Asymmetry + +Safari uses two different casings for the same profile UUID across the container: + +| Path prefix | Casing | Example | +|-------------|:------:|---------| +| `Safari/Profiles//` | Uppercase | `5604E6F5-02ED-4E40-8249-63DE7BC986C8` | +| `WebKit/WebsiteDataStore//` | Lowercase | `5604e6f5-02ed-4e40-8249-63de7bc986c8` | + +`profileContext` stores both to avoid case-folding at every call site. + +## 3. Data File Locations + +### 3.1 Default Profile + +| Category | Path | Format | +|----------|------|--------| +| History | `~/Library/Safari/History.db` | SQLite | +| Cookie | `Container/Cookies/Cookies.binarycookies`, then `~/Library/Cookies/Cookies.binarycookies` | BinaryCookies | +| Bookmark | `~/Library/Safari/Bookmarks.plist` | plist | +| Download | `~/Library/Safari/Downloads.plist` | plist | +| LocalStorage | `Container/WebKit/WebsiteData/Default/` | WebKit Origins dir | +| Password | macOS Keychain | — | + +The Cookie path is resolved in priority order — the first candidate that exists wins. Modern (macOS 14+) installs keep cookies in the sandboxed container; the legacy path is kept as a fallback for upgraded systems. + +### 3.2 Named Profiles + +| Category | Path | Format | +|----------|------|--------| +| History | `Container/Safari/Profiles//History.db` | SQLite | +| Cookie | `Container/WebKit/WebsiteDataStore//Cookies/Cookies.binarycookies` | BinaryCookies | +| Download | `~/Library/Safari/Downloads.plist` (filtered by UUID) | plist | +| LocalStorage | `Container/WebKit/WebsiteDataStore//Origins/` | WebKit Origins dir | + +Bookmark is intentionally **omitted** from named profiles: `Bookmarks.plist` is a shared plist with no per-entry profile tag, so it is attributed to the default profile only. Duplicate bookmarks would otherwise be emitted per profile. + +Downloads is shared across all profiles but each entry carries a `DownloadEntryProfileUUIDStringKey`; the extractor filters at read time so each profile sees only its own downloads. + +Passwords live in the user-scope Keychain, not on a per-profile basis — only the default profile emits passwords to avoid duplicates across the output. + +## 4. Data Storage Formats + +### 4.1 History (History.db — SQLite) + +```sql +SELECT url, title, visit_count, visit_time +FROM history_items +LEFT JOIN history_visits ON history_items.id = history_visits.history_item +``` + +Schema notes: +- `visit_time` is a `REAL` column using the **Core Data epoch** (see Section 5) +- One item → many visits; the extractor takes the most recent visit per item +- Results are sorted by `visit_count` descending + +### 4.2 Cookies (Cookies.binarycookies — binary) + +Apple's proprietary BinaryCookies format — not SQLite, not a documented format. Parsed by the [go-binarycookies](https://github.com/moond4rk/go-binarycookies) library. + +High-level layout: + +``` +| "cook" magic | page_count | page_sizes[] | pages[] | +|--------------|------------|------------------|--------------------------| +| 4B | 4B (BE) | page_count × 4B | variable | +``` + +Each page is an index-of-cookies table followed by per-cookie records. A cookie record carries flags (`isSecure`, `isHTTPOnly`), URL/name/path/value offsets into the record, and creation / expiry timestamps in Core Data epoch. + +Cookie values are **plaintext** — no per-cookie encryption. This is a fundamental divergence from Chromium, which encrypts `encrypted_value` with the OS master key. + +### 4.3 Bookmarks (Bookmarks.plist — property list) + +A nested dictionary tree with a `WebBookmarkType` discriminator at each node: + +| Type | Meaning | Additional keys | +|------|---------|-----------------| +| `WebBookmarkTypeList` | Folder | `Children` (array) | +| `WebBookmarkTypeLeaf` | URL entry | `URLString`, `URIDictionary.title` | + +The extractor walks the tree recursively, collecting leaf nodes into a flat list. Folder names are not preserved (only URL + title pairs are exported). + +### 4.4 Downloads (Downloads.plist — property list) + +A flat structure with a `DownloadHistory` array. Relevant keys per entry: + +| Key | Meaning | +|-----|---------| +| `DownloadEntryURL` | Source URL | +| `DownloadEntryPath` | Local filesystem path | +| `DownloadEntryBytesReceivedSoFar` | Bytes downloaded | +| `DownloadEntryProfileUUIDStringKey` | Owning profile's uppercase UUID, or `"DefaultProfile"` | + +The extractor filters by the caller-provided owner UUID so each profile reports its own downloads. MIME type and start/end times are not stored by Safari — `MimeType` is always empty in the output. + +### 4.5 Passwords (macOS Keychain) + +Safari does **not** persist passwords to a file in its container. All credentials live in `login.keychain-db`, accessible via `InternetPassword` records. The extractor reads them directly through [keychainbreaker](https://github.com/moond4rk/keychainbreaker) and reconstructs the URL from `(protocol, server, port, path)`. + +Default port handling: + +| Protocol | Default port | URL rendering | +|----------|-------------:|---------------| +| `https` | 443 | `https://host/path` (port omitted) | +| `http` | 80 | `http://host/path` (port omitted) | +| `ftp` | 21 | `ftp://host/path` (port omitted) | +| Other | — | `scheme://host:port/path` | + +The `htps` FourCC protocol code emitted by some Keychain entries is normalized to `https`. + +Partial-extraction mode: if the Keychain cannot be unlocked (no `--keychain-pw` supplied, or the password is wrong), metadata-only records are still emitted — URL, username, timestamps — with `PlainPassword` left blank. See [RFC-006](006-key-retrieval-mechanisms.md) §7 for the full credential-extraction architecture. + +### 4.6 LocalStorage (WebKit Origins — nested SQLite) + +Safari 17+ stores localStorage under a **partition-aware nested tree**, rooted at: + +| Profile | Root path | +|---------|-----------| +| Default | `Container/WebKit/WebsiteData/Default/` | +| Named | `Container/WebKit/WebsiteDataStore//Origins/` | + +Under the root, two levels of hashed directories lead to the actual data: + +``` +/// +├── origin ← binary-serialized origins (top + frame) +└── LocalStorage/ + ├── localstorage.sqlite3 ← ItemTable(key TEXT UNIQUE, value BLOB NOT NULL) + ├── localstorage.sqlite3-shm + └── localstorage.sqlite3-wal +``` + +`top-frame-hash == frame-hash` for **first-party** storage. They differ for **partitioned third-party** storage (an iframe with a different origin than the top document). The named profile root additionally carries a `salt` sibling file used by WebKit's origin-hashing — skipped at traversal time. + +The flat `WebsiteDataStore//LocalStorage/__.localstorage` layout used by older WebKit is **empty on modern Safari** and is not supported. + +#### Origin file format + +Two `origin` blocks back-to-back — top-frame then frame. Each block: + +``` +| scheme record | host record | port section | +|--------------------------|--------------------------|-----------------| +| uint32_le len | enc byte | uint32_le len | enc byte | 0x00 | +| | | | + or + | 0x01 | uint16_le port | +``` + +- `enc byte`: `0x01` = Latin-1/ASCII (common), `0x00` = UTF-16 LE +- Port section: `0x00` marker means "use scheme default" (stored as port 0 in the parsed struct); `0x01` marker is followed by a 2-byte little-endian port + +The extractor reads both blocks and reports the **frame origin URL** — that is what JavaScript's `window.localStorage` actually exposes in the partitioned case. If only the top-frame block is parseable, the extractor falls back to it. + +#### ItemTable + +```sql +SELECT key, value FROM ItemTable +``` + +Schema: `(key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB NOT NULL ON CONFLICT FAIL)`. + +Values are **UTF-16 LE** encoded JS strings. Oversized values (≥ 2048 bytes) are replaced with a size marker in the output — this matches the cap used by the Chromium extractor ([RFC-002](002-chromium-data-storage.md) §4.8) and keeps JSON/CSV exports bounded. + +## 5. Time Formats + +Safari uses the **Core Data epoch** — 2001-01-01 00:00:00 UTC, which is **978,307,200 seconds** after the Unix epoch. To convert a Core Data timestamp to Unix time, add `978307200` seconds. + +| Data Type | Field | Storage | +|-----------|-------|---------| +| History | `visit_time` | REAL seconds, Core Data epoch | +| Cookies | `creation`, `expiry` | REAL seconds, Core Data epoch | +| Downloads | — | No timestamp stored | +| Passwords | Keychain `Created` | Already Unix time (via keychainbreaker) | +| LocalStorage | — | No timestamp stored | + +Bookmarks carry no timestamp in Safari's plist representation. + +## 6. Encryption + +Safari's encryption story is deliberately thin: + +| Category | Encryption | +|----------|------------| +| History | None (plaintext SQLite) | +| Cookies | None (plaintext binary format) | +| Bookmarks | None (plaintext plist) | +| Downloads | None (plaintext plist) | +| LocalStorage | None (plaintext SQLite; UTF-16 LE is an encoding, not encryption) | +| Passwords | macOS Keychain — see [RFC-006](006-key-retrieval-mechanisms.md) §7 | + +The only encrypted category is passwords. Because they are not stored in Safari's own files at all, there is no Safari-specific cipher, key derivation, or master-key retrieval to document. See RFC-006 for the `InternetPassword` extraction path. + +## 7. Platform Specifics + +- **macOS-only**. There is no Safari on Windows or Linux. +- **Full Disk Access (TCC)** is required to read the sandboxed container. Without it, cookies / history / downloads / localStorage reads fail silently with permission errors at stat or open time. Legacy paths under `~/Library/Safari/` sometimes remain readable without FDA, but are mostly empty on modern systems. +- **Live-file safety** follows a live-vs-temp split: + - **Live reads** (`SafariTabs.db` during profile discovery in `profiles.go`) use `?mode=ro&immutable=1`, which disables WAL replay and locking so the extractor cannot disturb a running Safari — it sees a consistent snapshot of the main DB as of read time, at the cost of missing any pending WAL content. + - **Temp-copy reads** (`History.db`, `localstorage.sqlite3`, etc. via `filemanager.Session.Acquire`) use `?mode=ro` only. `Session.Acquire` copies the `-wal` / `-shm` sidecars alongside the main DB, so SQLite can replay uncommitted transactions on the copy — surfacing entries Safari has written to WAL but not yet checkpointed. Any `-shm` writes SQLite performs during replay land on the ephemeral copy and are deleted with the session. +- **Multi-profile availability**: requires Safari 17 (macOS 14 Sonoma) or newer. Older Safari versions have only the default profile; discovery degrades cleanly via the ReadDir fallback described in §2.1. +- **File acquisition**: all per-profile files are copied into a `filemanager.Session` temp directory before extraction, except the discovery-time `SafariTabs.db` read which opens the live file directly. See [RFC-008](008-file-acquisition-and-platform-quirks.md) for the general pattern. + +## 8. Key Differences from Chromium and Firefox + +| Aspect | Chromium | Firefox | Safari | +|--------|----------|---------|--------| +| Platform | Cross-platform | Cross-platform | **macOS-only** | +| Profile discovery | `Preferences` sentinel file | Any data file present | `SafariTabs.db` SQL + dir fallback | +| Profile naming | `Default`, `Profile 1`, … | `.default-release` | Human-readable title from SafariTabs.db | +| Password storage | Encrypted SQLite (`Login Data`) | Encrypted JSON (`logins.json`) | **macOS Keychain** (no file) | +| Cookie encryption | Encrypted with OS master key | Plaintext | **Plaintext** | +| Cookie format | SQLite | SQLite | Proprietary BinaryCookies binary | +| History | SQLite | SQLite (`places.sqlite`) | SQLite (Core Data epoch) | +| Bookmark | JSON | SQLite (`places.sqlite`) | **plist** | +| Download | SQLite (`History`, shared) | SQLite (`places.sqlite`, shared) | **plist** (filtered by UUID) | +| LocalStorage | LevelDB | SQLite (`webappsstore.sqlite`) | Nested **WebKit Origins** SQLite | +| LocalStorage partitioning | No | No | **Yes** (top-frame + frame hashes) | +| CreditCard / SessionStorage | Supported | Not supported | Not supported | +| Encryption scope | Passwords, cookies, credit cards | Passwords only | Passwords only | +| Time format | WebKit microseconds since 1601 | Mixed (μs for most, ms for passwords) | Core Data seconds since 2001 | + +## Related RFCs + +| RFC | Topic | +|-----|-------| +| [RFC-001](001-project-architecture.md) | Project architecture and directory layout | +| [RFC-006](006-key-retrieval-mechanisms.md) | §7 covers Safari Keychain credential extraction | +| [RFC-008](008-file-acquisition-and-platform-quirks.md) | File acquisition via `filemanager.Session` |