mirror of
https://github.com/moonD4rk/HackBrowserData.git
synced 2026-05-19 18:58:03 +02:00
7a5db25b4f
* feat(safari): localstorage extraction Extracts Safari 17+ localStorage from WebKit's nested layout — WebsiteDataStore/<uuid>/Origins/<top-hash>/<frame-hash>/LocalStorage/ localstorage.sqlite3 for named profiles, WebsiteData/Default for the default profile. Parses the binary SecurityOrigin serialization (length-prefixed scheme+host plus 0x00 default-port or 0x01 <uint16_le> explicit-port section) and decodes UTF-16 LE ItemTable value BLOBs, capping oversized values at 2048 bytes to match the Chromium extractor. Reports the frame origin URL so partitioned third-party storage is attributed to the iframe origin JavaScript actually sees. Closes the remaining LocalStorage checkbox in #565. * docs(safari): add RFC-011 data storage Documents Safari's profile structure, per-category file layouts, and storage formats including the Safari 17+ nested WebKit Origins localStorage layout and binary SecurityOrigin serialization. Defers Keychain credential extraction to RFC-006 §7 and notes the cross-browser differences (plaintext cookies, plist bookmarks/downloads, Core Data epoch timestamps, partitioned storage). * fix(safari): latin-1 origin decoding, NULL key skip, count fast-path - Decode originEncASCII via decodeLatin1 so high-byte records preserve their ISO-8859-1 meaning instead of being interpreted as UTF-8. Matches the pattern in chromium/extract_storage.go. - Skip ItemTable rows where key is NULL — SQLite's UNIQUE constraint permits multiple NULLs, and silently lowering them to empty strings would collide with legitimate empty-string keys. - countLocalStorage now walks origin dirs and runs SELECT COUNT(key) per localstorage.sqlite3 instead of fully decoding every value. COUNT(key) naturally excludes NULLs, keeping count and extract symmetric. Addresses Copilot review feedback on #582. * fix(safari): round-2 review — WAL replay, stable ordering, error context - Drop immutable=1 on temp-copy SQLite opens in readLocalStorageFile / countLocalStorageFile. Session.Acquire copies the -wal / -shm sidecars, so mode=ro alone lets SQLite replay WAL on the ephemeral copy and surface entries Safari committed to WAL but hasn't checkpointed yet. Live-file reads in profiles.go keep immutable=1 as before. - Order ItemTable query by (key, rowid) for deterministic exports across runs and SQLite versions. - Wrap os.ReadFile / os.ReadDir errors with the offending path so multi-origin debug logs stay scannable. - RFC-011 §7 rewritten to explain the live-vs-temp split. - New regression test asserts ORDER BY surfaces rows in key order. Addresses round-2 Copilot review on #582.
336 lines
12 KiB
Go
336 lines
12 KiB
Go
package safari
|
||
|
||
import (
|
||
"database/sql"
|
||
"os"
|
||
"path/filepath"
|
||
"strings"
|
||
"testing"
|
||
|
||
"github.com/stretchr/testify/assert"
|
||
"github.com/stretchr/testify/require"
|
||
_ "modernc.org/sqlite"
|
||
)
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// readOriginBlock / readOriginFile
|
||
// ---------------------------------------------------------------------------
|
||
|
||
func TestReadOriginBlock_FirstParty(t *testing.T) {
|
||
data := encodeOriginFile("https://example.com", "https://example.com")
|
||
top, pos, err := readOriginBlock(data, 0)
|
||
require.NoError(t, err)
|
||
assert.Equal(t, "https", top.scheme)
|
||
assert.Equal(t, "example.com", top.host)
|
||
assert.Equal(t, uint16(0), top.port, "port 0 ⇒ scheme default")
|
||
|
||
frame, _, err := readOriginBlock(data, pos)
|
||
require.NoError(t, err)
|
||
assert.Equal(t, "https://example.com", formatOriginURL(frame))
|
||
}
|
||
|
||
func TestReadOriginBlock_NonDefaultPort(t *testing.T) {
|
||
data := encodeOriginFile("https://example.com:8443", "https://example.com:8443")
|
||
top, _, err := readOriginBlock(data, 0)
|
||
require.NoError(t, err)
|
||
assert.Equal(t, uint16(8443), top.port)
|
||
assert.Equal(t, "https://example.com:8443", formatOriginURL(top))
|
||
}
|
||
|
||
func TestReadOriginBlock_Latin1HighByte(t *testing.T) {
|
||
// WebKit stores scheme/host records with encoding byte 0x01 = Latin-1. Verify high-byte
|
||
// bytes decode as Latin-1 (é = 0xE9) rather than being passed through as invalid UTF-8.
|
||
data := []byte{
|
||
0x04, 0x00, 0x00, 0x00, 0x01, 'h', 't', 't', 'p', // scheme "http"
|
||
0x04, 0x00, 0x00, 0x00, 0x01, 'c', 'a', 'f', 0xe9, // host "café" (Latin-1)
|
||
0x00, // port default
|
||
}
|
||
ep, _, err := readOriginBlock(data, 0)
|
||
require.NoError(t, err)
|
||
assert.Equal(t, "http", ep.scheme)
|
||
assert.Equal(t, "café", ep.host)
|
||
}
|
||
|
||
func TestDecodeLatin1(t *testing.T) {
|
||
assert.Equal(t, "café", decodeLatin1([]byte{'c', 'a', 'f', 0xe9}))
|
||
assert.Equal(t, "hello", decodeLatin1([]byte("hello")))
|
||
assert.Empty(t, decodeLatin1(nil))
|
||
}
|
||
|
||
func TestReadOriginFile_FramePreferred(t *testing.T) {
|
||
dir := t.TempDir()
|
||
originPath := filepath.Join(dir, "origin")
|
||
require.NoError(t, os.WriteFile(originPath,
|
||
encodeOriginFile("https://top.example.com", "https://iframe.example.com"), 0o644))
|
||
|
||
got, err := readOriginFile(originPath)
|
||
require.NoError(t, err)
|
||
assert.Equal(t, "https://iframe.example.com", got)
|
||
}
|
||
|
||
func TestReadOriginFile_FallbackToTop(t *testing.T) {
|
||
// Write only the top-frame block — no frame follows. Extractor should still succeed by
|
||
// falling back to the top-frame origin.
|
||
var buf []byte
|
||
buf = appendOriginBlock(buf, "https://example.com")
|
||
originPath := filepath.Join(t.TempDir(), "origin")
|
||
require.NoError(t, os.WriteFile(originPath, buf, 0o644))
|
||
|
||
got, err := readOriginFile(originPath)
|
||
require.NoError(t, err)
|
||
assert.Equal(t, "https://example.com", got)
|
||
}
|
||
|
||
func TestReadOriginFile_Malformed(t *testing.T) {
|
||
originPath := filepath.Join(t.TempDir(), "origin")
|
||
require.NoError(t, os.WriteFile(originPath, []byte{0x01, 0x02}, 0o644))
|
||
|
||
_, err := readOriginFile(originPath)
|
||
require.Error(t, err)
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// decodeUTF16LE / decodeLocalStorageValue
|
||
// ---------------------------------------------------------------------------
|
||
|
||
func TestDecodeUTF16LE(t *testing.T) {
|
||
t.Run("ascii", func(t *testing.T) {
|
||
assert.Equal(t, "hello", decodeUTF16LE(encodeUTF16LE("hello")))
|
||
})
|
||
t.Run("cjk", func(t *testing.T) {
|
||
assert.Equal(t, "你好世界", decodeUTF16LE(encodeUTF16LE("你好世界")))
|
||
})
|
||
t.Run("mixed", func(t *testing.T) {
|
||
assert.Equal(t, "hello 世界 🌍", decodeUTF16LE(encodeUTF16LE("hello 世界 🌍")))
|
||
})
|
||
t.Run("empty", func(t *testing.T) {
|
||
assert.Empty(t, decodeUTF16LE(nil))
|
||
assert.Empty(t, decodeUTF16LE([]byte{}))
|
||
})
|
||
t.Run("odd length falls back to raw string", func(t *testing.T) {
|
||
assert.Equal(t, "abc", decodeUTF16LE([]byte("abc")))
|
||
})
|
||
}
|
||
|
||
func TestDecodeLocalStorageValue_Truncates(t *testing.T) {
|
||
// 1100 chars × 2 bytes = 2200 bytes, over the 2048 cap.
|
||
oversized := encodeUTF16LE(strings.Repeat("x", 1100))
|
||
got := decodeLocalStorageValue(oversized)
|
||
assert.Contains(t, got, "too long")
|
||
assert.Contains(t, got, "2048")
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// extractLocalStorage — end-to-end over real nested layout fixtures
|
||
// ---------------------------------------------------------------------------
|
||
|
||
func TestExtractLocalStorage_SingleOrigin(t *testing.T) {
|
||
root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{
|
||
"https://example.com": {{Key: "auth_token", Value: "abc123"}},
|
||
})
|
||
entries, err := extractLocalStorage(root)
|
||
require.NoError(t, err)
|
||
require.Len(t, entries, 1)
|
||
assert.Equal(t, "https://example.com", entries[0].URL)
|
||
assert.Equal(t, "auth_token", entries[0].Key)
|
||
assert.Equal(t, "abc123", entries[0].Value)
|
||
assert.False(t, entries[0].IsMeta)
|
||
}
|
||
|
||
func TestExtractLocalStorage_MultiOrigin(t *testing.T) {
|
||
root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{
|
||
"https://github.com": {
|
||
{Key: "theme", Value: "dark"},
|
||
{Key: "lang", Value: "en"},
|
||
},
|
||
"https://example.com:8443": {
|
||
{Key: "session", Value: "xyz"},
|
||
},
|
||
})
|
||
entries, err := extractLocalStorage(root)
|
||
require.NoError(t, err)
|
||
require.Len(t, entries, 3)
|
||
|
||
byURL := make(map[string][]string)
|
||
for _, e := range entries {
|
||
byURL[e.URL] = append(byURL[e.URL], e.Key+"="+e.Value)
|
||
}
|
||
assert.ElementsMatch(t, []string{"theme=dark", "lang=en"}, byURL["https://github.com"])
|
||
assert.ElementsMatch(t, []string{"session=xyz"}, byURL["https://example.com:8443"])
|
||
}
|
||
|
||
func TestExtractLocalStorage_CJKAndEmoji(t *testing.T) {
|
||
root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{
|
||
"https://example.com": {
|
||
{Key: "名字", Value: "张三"},
|
||
{Key: "status", Value: "hello 世界 🌍"},
|
||
},
|
||
})
|
||
entries, err := extractLocalStorage(root)
|
||
require.NoError(t, err)
|
||
require.Len(t, entries, 2)
|
||
|
||
values := make(map[string]string)
|
||
for _, e := range entries {
|
||
values[e.Key] = e.Value
|
||
}
|
||
assert.Equal(t, "张三", values["名字"])
|
||
assert.Equal(t, "hello 世界 🌍", values["status"])
|
||
}
|
||
|
||
func TestExtractLocalStorage_EmptyItemTable(t *testing.T) {
|
||
root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{
|
||
"https://example.com": nil,
|
||
})
|
||
entries, err := extractLocalStorage(root)
|
||
require.NoError(t, err)
|
||
assert.Empty(t, entries)
|
||
}
|
||
|
||
func TestExtractLocalStorage_TruncatesOversizedValue(t *testing.T) {
|
||
root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{
|
||
"https://example.com": {{Key: "big", Value: strings.Repeat("x", 1100)}},
|
||
})
|
||
entries, err := extractLocalStorage(root)
|
||
require.NoError(t, err)
|
||
require.Len(t, entries, 1)
|
||
assert.Contains(t, entries[0].Value, "too long")
|
||
}
|
||
|
||
func TestExtractLocalStorage_Partitioned(t *testing.T) {
|
||
// Manually construct a partitioned third-party entry: YouTube iframe inside Google top-frame.
|
||
root := filepath.Join(t.TempDir(), "Origins")
|
||
require.NoError(t, os.MkdirAll(root, 0o755))
|
||
writeTestOriginStore(t, root, "topHash", "frameHash",
|
||
"https://accounts.google.com", "https://accounts.youtube.com",
|
||
[]testLocalStorageItem{{Key: "yt-session", Value: "embedded"}},
|
||
)
|
||
|
||
entries, err := extractLocalStorage(root)
|
||
require.NoError(t, err)
|
||
require.Len(t, entries, 1)
|
||
assert.Equal(t, "https://accounts.youtube.com", entries[0].URL, "frame origin preferred over top-frame")
|
||
}
|
||
|
||
func TestExtractLocalStorage_SkipsSaltAndStrayFiles(t *testing.T) {
|
||
root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{
|
||
"https://example.com": {{Key: "a", Value: "1"}},
|
||
})
|
||
// Drop a "salt" sibling that must not be traversed, plus a stray file at root.
|
||
require.NoError(t, os.WriteFile(filepath.Join(root, "salt"), []byte("pretend salt"), 0o644))
|
||
require.NoError(t, os.WriteFile(filepath.Join(root, "README"), []byte("noise"), 0o644))
|
||
|
||
entries, err := extractLocalStorage(root)
|
||
require.NoError(t, err)
|
||
require.Len(t, entries, 1)
|
||
assert.Equal(t, "https://example.com", entries[0].URL)
|
||
}
|
||
|
||
func TestExtractLocalStorage_SkipsFrameDirsWithoutDB(t *testing.T) {
|
||
// Partition dirs that only have "origin" but no LocalStorage/ subdir must not error out —
|
||
// real Safari has plenty of these (cookies-only partitions).
|
||
root := filepath.Join(t.TempDir(), "Origins")
|
||
frameDir := filepath.Join(root, "topHash", "frameHash")
|
||
require.NoError(t, os.MkdirAll(frameDir, 0o755))
|
||
require.NoError(t, os.WriteFile(
|
||
filepath.Join(frameDir, "origin"),
|
||
encodeOriginFile("https://example.com", "https://example.com"), 0o644))
|
||
|
||
entries, err := extractLocalStorage(root)
|
||
require.NoError(t, err)
|
||
assert.Empty(t, entries)
|
||
}
|
||
|
||
func TestExtractLocalStorage_DirMissing(t *testing.T) {
|
||
_, err := extractLocalStorage(filepath.Join(t.TempDir(), "does-not-exist"))
|
||
require.Error(t, err)
|
||
}
|
||
|
||
func TestExtractLocalStorage_EmptyRoot(t *testing.T) {
|
||
entries, err := extractLocalStorage(t.TempDir())
|
||
require.NoError(t, err)
|
||
assert.Empty(t, entries)
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// countLocalStorage
|
||
// ---------------------------------------------------------------------------
|
||
|
||
func TestCountLocalStorage(t *testing.T) {
|
||
root := buildTestLocalStorageDir(t, map[string][]testLocalStorageItem{
|
||
"https://a.com": {{Key: "k1", Value: "v1"}, {Key: "k2", Value: "v2"}},
|
||
"https://b.com": {{Key: "k3", Value: "v3"}},
|
||
"https://c.com:8443": {{Key: "k4", Value: "v4"}},
|
||
})
|
||
count, err := countLocalStorage(root)
|
||
require.NoError(t, err)
|
||
assert.Equal(t, 4, count)
|
||
}
|
||
|
||
func TestCountLocalStorage_DirMissing(t *testing.T) {
|
||
count, err := countLocalStorage(filepath.Join(t.TempDir(), "nope"))
|
||
require.Error(t, err)
|
||
assert.Equal(t, 0, count)
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// NULL-key handling — readLocalStorageFile / countLocalStorageFile both skip NULL keys,
|
||
// keeping count and extract in sync.
|
||
// ---------------------------------------------------------------------------
|
||
|
||
func TestReadLocalStorageFile_SkipsNullKey(t *testing.T) {
|
||
dbPath := filepath.Join(t.TempDir(), "ls.sqlite3")
|
||
writeLocalStorageDB(t, dbPath, []testLocalStorageItem{
|
||
{Key: "real", Value: "keeper"},
|
||
}, true /*addNullKey*/)
|
||
|
||
items, err := readLocalStorageFile(dbPath)
|
||
require.NoError(t, err)
|
||
require.Len(t, items, 1)
|
||
assert.Equal(t, "real", items[0].key)
|
||
assert.Equal(t, "keeper", items[0].value)
|
||
}
|
||
|
||
func TestCountLocalStorageFile_SkipsNullKey(t *testing.T) {
|
||
dbPath := filepath.Join(t.TempDir(), "ls.sqlite3")
|
||
writeLocalStorageDB(t, dbPath, []testLocalStorageItem{
|
||
{Key: "k1", Value: "v1"},
|
||
{Key: "k2", Value: "v2"},
|
||
}, true /*addNullKey*/)
|
||
|
||
count, err := countLocalStorageFile(dbPath)
|
||
require.NoError(t, err)
|
||
assert.Equal(t, 2, count, "NULL keys are excluded from count to match extract's skip rule")
|
||
}
|
||
|
||
func TestReadLocalStorageFile_ReturnsRowsInKeyOrder(t *testing.T) {
|
||
// Rows are inserted in reverse alphabetical order; ORDER BY key, rowid in the extractor
|
||
// query must surface them ascending so exports are deterministic across runs.
|
||
dbPath := filepath.Join(t.TempDir(), "ls.sqlite3")
|
||
writeLocalStorageDB(t, dbPath, []testLocalStorageItem{
|
||
{Key: "zebra", Value: "z"},
|
||
{Key: "mango", Value: "m"},
|
||
{Key: "apple", Value: "a"},
|
||
}, false /*addNullKey*/)
|
||
|
||
items, err := readLocalStorageFile(dbPath)
|
||
require.NoError(t, err)
|
||
require.Len(t, items, 3)
|
||
assert.Equal(t, "apple", items[0].key)
|
||
assert.Equal(t, "mango", items[1].key)
|
||
assert.Equal(t, "zebra", items[2].key)
|
||
}
|
||
|
||
func TestCountLocalStorageFile_MissingTable(t *testing.T) {
|
||
// Real Safari has origin dirs with LocalStorage/localstorage.sqlite3 but no ItemTable yet
|
||
// (seen during live verification). countLocalStorageFile must surface the error so the
|
||
// caller can log-and-skip rather than counting 0 silently.
|
||
dbPath := filepath.Join(t.TempDir(), "empty.sqlite3")
|
||
db, err := sql.Open("sqlite", dbPath)
|
||
require.NoError(t, err)
|
||
require.NoError(t, db.Close())
|
||
|
||
_, err = countLocalStorageFile(dbPath)
|
||
require.Error(t, err)
|
||
}
|