diff --git a/browser/browser.go b/browser/browser.go index 7c5e0b6..f817a89 100644 --- a/browser/browser.go +++ b/browser/browser.go @@ -8,6 +8,7 @@ import ( "github.com/moond4rk/hackbrowserdata/browser/chromium" "github.com/moond4rk/hackbrowserdata/browser/firefox" + "github.com/moond4rk/hackbrowserdata/browser/safari" "github.com/moond4rk/hackbrowserdata/crypto/keyretriever" "github.com/moond4rk/hackbrowserdata/log" "github.com/moond4rk/hackbrowserdata/types" @@ -150,6 +151,17 @@ func newBrowsers(cfg types.BrowserConfig) ([]Browser, error) { } return result, nil + case types.Safari: + found, err := safari.NewBrowsers(cfg) + if err != nil { + return nil, err + } + result := make([]Browser, len(found)) + for i, b := range found { + result[i] = b + } + return result, nil + default: return nil, fmt.Errorf("unknown browser kind: %d", cfg.Kind) } diff --git a/browser/browser_darwin.go b/browser/browser_darwin.go index f884fc4..4196c37 100644 --- a/browser/browser_darwin.go +++ b/browser/browser_darwin.go @@ -91,5 +91,11 @@ func platformBrowsers() []types.BrowserConfig { Kind: types.Firefox, UserDataDir: homeDir + "/Library/Application Support/Firefox/Profiles", }, + { + Key: "safari", + Name: safariName, + Kind: types.Safari, + UserDataDir: homeDir + "/Library/Safari", + }, } } diff --git a/browser/browser_test.go b/browser/browser_test.go index f5e92c1..9098ac0 100644 --- a/browser/browser_test.go +++ b/browser/browser_test.go @@ -349,6 +349,9 @@ func TestNewBrowsersDispatch(t *testing.T) { firefoxDir := t.TempDir() mkFile(t, firefoxDir, "abc.default", "places.sqlite") + safariDir := t.TempDir() + mkFile(t, safariDir, "History.db") + emptyDir := t.TempDir() tests := []struct { @@ -373,6 +376,13 @@ func TestNewBrowsersDispatch(t *testing.T) { wantName: "Firefox", wantProfile: "abc.default", }, + { + name: "safari dispatch", + cfg: types.BrowserConfig{Key: "safari", Name: "Safari", Kind: types.Safari, UserDataDir: safariDir}, + wantLen: 1, + wantName: "Safari", + wantProfile: "default", + }, { name: "unknown kind returns error", cfg: types.BrowserConfig{Key: "unknown", Name: "Unknown", Kind: types.BrowserKind(99)}, diff --git a/browser/consts.go b/browser/consts.go index 98ff89a..2041705 100644 --- a/browser/consts.go +++ b/browser/consts.go @@ -26,4 +26,5 @@ const ( sogouName = "Sogou" arcName = "Arc" duckduckgoName = "DuckDuckGo" + safariName = "Safari" ) diff --git a/browser/safari/extract_history.go b/browser/safari/extract_history.go new file mode 100644 index 0000000..28cb242 --- /dev/null +++ b/browser/safari/extract_history.go @@ -0,0 +1,56 @@ +package safari + +import ( + "database/sql" + "sort" + + "github.com/moond4rk/hackbrowserdata/types" + "github.com/moond4rk/hackbrowserdata/utils/sqliteutil" +) + +const ( + // safariHistoryQuery joins each history item to its latest visit so + // title and visit_time come from the same history_visits row. + safariHistoryQuery = `SELECT hi.url, COALESCE(hv.title, ''), hi.visit_count, + COALESCE(hv.visit_time, 0) + FROM history_items hi + LEFT JOIN history_visits hv ON hv.id = ( + SELECT hv2.id FROM history_visits hv2 + WHERE hv2.history_item = hi.id + ORDER BY hv2.visit_time DESC LIMIT 1 + )` + + safariCountHistoryQuery = `SELECT COUNT(*) FROM history_items` +) + +func extractHistories(path string) ([]types.HistoryEntry, error) { + histories, err := sqliteutil.QueryRows(path, true, safariHistoryQuery, + func(rows *sql.Rows) (types.HistoryEntry, error) { + var ( + url, title string + visitCount int + visitTime float64 + ) + if err := rows.Scan(&url, &title, &visitCount, &visitTime); err != nil { + return types.HistoryEntry{}, err + } + return types.HistoryEntry{ + URL: url, + Title: title, + VisitCount: visitCount, + LastVisit: coredataTimestamp(visitTime), + }, nil + }) + if err != nil { + return nil, err + } + + sort.Slice(histories, func(i, j int) bool { + return histories[i].VisitCount > histories[j].VisitCount + }) + return histories, nil +} + +func countHistories(path string) (int, error) { + return sqliteutil.CountRows(path, true, safariCountHistoryQuery) +} diff --git a/browser/safari/extract_history_test.go b/browser/safari/extract_history_test.go new file mode 100644 index 0000000..fd618db --- /dev/null +++ b/browser/safari/extract_history_test.go @@ -0,0 +1,105 @@ +package safari + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func setupSafariHistoryDB(t *testing.T) string { + t.Helper() + return createTestDB(t, "History.db", + []string{safariHistoryItemsSchema, safariHistoryVisitsSchema}, + insertHistoryItem(1, "https://github.com", "github.com", 100), + insertHistoryItem(2, "https://go.dev", "go.dev", 50), + insertHistoryItem(3, "https://example.com", "example.com", 200), + // Item 1 has two visits — extractHistories must deduplicate. + insertHistoryVisit(1, 1, 704067600.0, "GitHub"), + insertHistoryVisit(2, 1, 705067600.0, "GitHub - Latest"), + insertHistoryVisit(3, 2, 703067600.0, "The Go Programming Language"), + insertHistoryVisit(4, 3, 700067600.0, "Example Domain"), + ) +} + +func TestExtractHistories(t *testing.T) { + path := setupSafariHistoryDB(t) + + got, err := extractHistories(path) + require.NoError(t, err) + require.Len(t, got, 3) + + // Sorted by visit count descending (most visited first). + assert.Equal(t, 200, got[0].VisitCount) + assert.Equal(t, 100, got[1].VisitCount) + assert.Equal(t, 50, got[2].VisitCount) + + // Verify field mapping. + assert.Equal(t, "https://example.com", got[0].URL) + assert.Equal(t, "https://github.com", got[1].URL) + assert.Equal(t, "https://go.dev", got[2].URL) + assert.False(t, got[0].LastVisit.IsZero()) +} + +func TestExtractHistories_Dedup(t *testing.T) { + path := setupSafariHistoryDB(t) + + got, err := extractHistories(path) + require.NoError(t, err) + // 3 history_items, not 4 visits. + require.Len(t, got, 3) + + // GitHub (item 1) should have the later visit_time and its title. + for _, h := range got { + if h.URL == "https://github.com" { + // 705067600 + 978307200 = 1683374800 (unix) + assert.Equal(t, int64(1683374800), h.LastVisit.Unix()) + // Title must come from the latest visit row, not an arbitrary one. + assert.Equal(t, "GitHub - Latest", h.Title) + return + } + } + t.Fatal("expected https://github.com in results") +} + +func TestCountHistories(t *testing.T) { + path := setupSafariHistoryDB(t) + + count, err := countHistories(path) + require.NoError(t, err) + assert.Equal(t, 3, count) +} + +func TestCountHistories_Empty(t *testing.T) { + path := createTestDB(t, "History.db", + []string{safariHistoryItemsSchema, safariHistoryVisitsSchema}) + + count, err := countHistories(path) + require.NoError(t, err) + assert.Equal(t, 0, count) +} + +func TestExtractHistories_NullTitle(t *testing.T) { + path := createTestDB(t, "History.db", + []string{safariHistoryItemsSchema, safariHistoryVisitsSchema}, + insertHistoryItem(1, "https://null.test", "null.test", 1), + // Visit with NULL title — COALESCE should return empty string. + `INSERT INTO history_visits (id, history_item, visit_time) VALUES (1, 1, 700000000.0)`, + ) + + got, err := extractHistories(path) + require.NoError(t, err) + require.Len(t, got, 1) + assert.Equal(t, "https://null.test", got[0].URL) + assert.Empty(t, got[0].Title) +} + +func TestCoredataTimestamp(t *testing.T) { + // 0 Core Data epoch = 2001-01-01 00:00:00 UTC = Unix 978307200 + ts := coredataTimestamp(0) + assert.Equal(t, int64(978307200), ts.Unix()) + + // Known value: 700000000 Core Data = 1678307200 Unix + ts2 := coredataTimestamp(700000000) + assert.Equal(t, int64(1678307200), ts2.Unix()) +} diff --git a/browser/safari/safari.go b/browser/safari/safari.go new file mode 100644 index 0000000..22a0a20 --- /dev/null +++ b/browser/safari/safari.go @@ -0,0 +1,169 @@ +package safari + +import ( + "os" + "path/filepath" + "time" + + "github.com/moond4rk/hackbrowserdata/filemanager" + "github.com/moond4rk/hackbrowserdata/log" + "github.com/moond4rk/hackbrowserdata/types" +) + +// Browser represents Safari browser data ready for extraction. +// Safari has a single flat data directory (no profile subdirectories) +// and stores most data unencrypted (passwords live in macOS Keychain). +type Browser struct { + cfg types.BrowserConfig + dataDir string // absolute path to ~/Library/Safari + sources map[types.Category][]sourcePath // Category → candidate paths + sourcePaths map[types.Category]resolvedPath // Category → discovered absolute path +} + +// NewBrowsers checks whether Safari data exists at cfg.UserDataDir and returns +// a single Browser if any known source files are found. Unlike Chromium/Firefox, +// Safari has no profile directories — the data directory is used directly. +func NewBrowsers(cfg types.BrowserConfig) ([]*Browser, error) { + sourcePaths := resolveSourcePaths(safariSources, cfg.UserDataDir) + if len(sourcePaths) == 0 { + return nil, nil + } + return []*Browser{{ + cfg: cfg, + dataDir: cfg.UserDataDir, + sources: safariSources, + sourcePaths: sourcePaths, + }}, nil +} + +func (b *Browser) BrowserName() string { return b.cfg.Name } +func (b *Browser) ProfileDir() string { return b.dataDir } +func (b *Browser) ProfileName() string { return "default" } + +// Extract copies browser files to a temp directory and extracts data +// for the requested categories. +func (b *Browser) Extract(categories []types.Category) (*types.BrowserData, error) { + session, err := filemanager.NewSession() + if err != nil { + return nil, err + } + defer session.Cleanup() + + tempPaths := b.acquireFiles(session, categories) + + data := &types.BrowserData{} + for _, cat := range categories { + path, ok := tempPaths[cat] + if !ok { + continue + } + b.extractCategory(data, cat, path) + } + return data, nil +} + +// CountEntries copies browser files to a temp directory and counts entries +// per category without full extraction. +func (b *Browser) CountEntries(categories []types.Category) (map[types.Category]int, error) { + session, err := filemanager.NewSession() + if err != nil { + return nil, err + } + defer session.Cleanup() + + tempPaths := b.acquireFiles(session, categories) + + counts := make(map[types.Category]int) + for _, cat := range categories { + path, ok := tempPaths[cat] + if !ok { + continue + } + counts[cat] = b.countCategory(cat, path) + } + return counts, nil +} + +// acquireFiles copies source files to the session temp directory. +func (b *Browser) acquireFiles(session *filemanager.Session, categories []types.Category) map[types.Category]string { + tempPaths := make(map[types.Category]string) + for _, cat := range categories { + rp, ok := b.sourcePaths[cat] + if !ok { + continue + } + dst := filepath.Join(session.TempDir(), cat.String()) + if err := session.Acquire(rp.absPath, dst, rp.isDir); err != nil { + log.Debugf("acquire %s: %v", cat, err) + continue + } + tempPaths[cat] = dst + } + return tempPaths +} + +// extractCategory calls the appropriate extract function for a category. +func (b *Browser) extractCategory(data *types.BrowserData, cat types.Category, path string) { + var err error + switch cat { + case types.History: + data.Histories, err = extractHistories(path) + default: + return + } + if err != nil { + log.Debugf("extract %s for %s: %v", cat, b.BrowserName()+"/"+b.ProfileName(), err) + } +} + +// countCategory calls the appropriate count function for a category. +func (b *Browser) countCategory(cat types.Category, path string) int { + var count int + var err error + switch cat { + case types.History: + count, err = countHistories(path) + default: + // Unsupported categories silently return 0. + } + if err != nil { + log.Debugf("count %s for %s: %v", cat, b.BrowserName()+"/"+b.ProfileName(), err) + } + return count +} + +// resolvedPath holds the absolute path and type for a discovered source. +type resolvedPath struct { + absPath string + isDir bool +} + +// resolveSourcePaths checks which sources actually exist in dataDir. +// Candidates are tried in priority order; the first existing path wins. +func resolveSourcePaths(sources map[types.Category][]sourcePath, dataDir string) map[types.Category]resolvedPath { + resolved := make(map[types.Category]resolvedPath) + for cat, candidates := range sources { + for _, sp := range candidates { + abs := filepath.Join(dataDir, sp.rel) + info, err := os.Stat(abs) + if err != nil { + continue + } + if sp.isDir == info.IsDir() { + resolved[cat] = resolvedPath{abs, sp.isDir} + break + } + } + } + return resolved +} + +// coreDataEpochOffset is the number of seconds between the Unix epoch +// (1970-01-01) and the Core Data epoch (2001-01-01). +const coreDataEpochOffset = 978307200 + +// coredataTimestamp converts a Core Data timestamp (seconds since 2001-01-01) +// to a time.Time. Safari's History.db uses this epoch for visit_time. +func coredataTimestamp(seconds float64) time.Time { + return time.Unix(int64(seconds)+coreDataEpochOffset, 0) +} diff --git a/browser/safari/safari_test.go b/browser/safari/safari_test.go new file mode 100644 index 0000000..a4ec7d7 --- /dev/null +++ b/browser/safari/safari_test.go @@ -0,0 +1,171 @@ +package safari + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/moond4rk/hackbrowserdata/types" +) + +func mkFile(t *testing.T, parts ...string) { + t.Helper() + path := filepath.Join(parts...) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile(path, []byte("test"), 0o644)) +} + +// --------------------------------------------------------------------------- +// NewBrowsers +// --------------------------------------------------------------------------- + +func TestNewBrowsers(t *testing.T) { + tests := []struct { + name string + setup func(t *testing.T) string + wantLen int + }{ + { + name: "dir with History.db", + setup: func(t *testing.T) string { + dir := t.TempDir() + mkFile(t, dir, "History.db") + return dir + }, + wantLen: 1, + }, + { + name: "empty dir", + setup: func(t *testing.T) string { + return t.TempDir() + }, + wantLen: 0, + }, + { + name: "nonexistent dir", + setup: func(t *testing.T) string { + return "/nonexistent/path" + }, + wantLen: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dir := tt.setup(t) + cfg := types.BrowserConfig{Name: "Safari", Kind: types.Safari, UserDataDir: dir} + browsers, err := NewBrowsers(cfg) + require.NoError(t, err) + + if tt.wantLen == 0 { + assert.Empty(t, browsers) + return + } + require.Len(t, browsers, tt.wantLen) + assert.Equal(t, "Safari", browsers[0].BrowserName()) + assert.Equal(t, "default", browsers[0].ProfileName()) + assert.Equal(t, dir, browsers[0].ProfileDir()) + }) + } +} + +// --------------------------------------------------------------------------- +// resolveSourcePaths +// --------------------------------------------------------------------------- + +func TestResolveSourcePaths(t *testing.T) { + dir := t.TempDir() + mkFile(t, dir, "History.db") + + resolved := resolveSourcePaths(safariSources, dir) + assert.Contains(t, resolved, types.History) + assert.Equal(t, filepath.Join(dir, "History.db"), resolved[types.History].absPath) + assert.False(t, resolved[types.History].isDir) +} + +func TestResolveSourcePaths_Empty(t *testing.T) { + resolved := resolveSourcePaths(safariSources, t.TempDir()) + assert.Empty(t, resolved) +} + +// --------------------------------------------------------------------------- +// CountEntries +// --------------------------------------------------------------------------- + +func TestCountEntries(t *testing.T) { + dir := t.TempDir() + dbPath := createTestDB(t, "History.db", + []string{safariHistoryItemsSchema, safariHistoryVisitsSchema}, + insertHistoryItem(1, "https://example.com", "example.com", 5), + insertHistoryItem(2, "https://go.dev", "go.dev", 10), + insertHistoryVisit(1, 1, 700000000.0, "Example"), + insertHistoryVisit(2, 2, 700000000.0, "Go"), + ) + data, err := os.ReadFile(dbPath) + require.NoError(t, err) + require.NoError(t, os.WriteFile(filepath.Join(dir, "History.db"), data, 0o644)) + + browsers, err := NewBrowsers(types.BrowserConfig{ + Name: "Safari", Kind: types.Safari, UserDataDir: dir, + }) + require.NoError(t, err) + require.Len(t, browsers, 1) + + counts, err := browsers[0].CountEntries([]types.Category{types.History}) + require.NoError(t, err) + assert.Equal(t, 2, counts[types.History]) +} + +// --------------------------------------------------------------------------- +// countCategory / extractCategory +// --------------------------------------------------------------------------- + +func TestCountCategory(t *testing.T) { + t.Run("History", func(t *testing.T) { + path := createTestDB(t, "History.db", + []string{safariHistoryItemsSchema, safariHistoryVisitsSchema}, + insertHistoryItem(1, "https://example.com", "example.com", 1), + ) + b := &Browser{} + assert.Equal(t, 1, b.countCategory(types.History, path)) + }) + + t.Run("UnsupportedCategory", func(t *testing.T) { + b := &Browser{} + assert.Equal(t, 0, b.countCategory(types.Cookie, "unused")) + assert.Equal(t, 0, b.countCategory(types.CreditCard, "unused")) + assert.Equal(t, 0, b.countCategory(types.SessionStorage, "unused")) + }) +} + +func TestExtractCategory(t *testing.T) { + t.Run("History", func(t *testing.T) { + path := createTestDB(t, "History.db", + []string{safariHistoryItemsSchema, safariHistoryVisitsSchema}, + insertHistoryItem(1, "https://example.com", "example.com", 3), + insertHistoryItem(2, "https://go.dev", "go.dev", 1), + insertHistoryVisit(1, 1, 700000000.0, "Example"), + insertHistoryVisit(2, 2, 700000000.0, "Go"), + ) + b := &Browser{} + data := &types.BrowserData{} + b.extractCategory(data, types.History, path) + + require.Len(t, data.Histories, 2) + // Sorted by visit count descending + assert.Equal(t, 3, data.Histories[0].VisitCount) + assert.Equal(t, 1, data.Histories[1].VisitCount) + }) + + t.Run("UnsupportedCategory", func(t *testing.T) { + b := &Browser{} + data := &types.BrowserData{} + b.extractCategory(data, types.Cookie, "unused") + b.extractCategory(data, types.CreditCard, "unused") + assert.Empty(t, data.Cookies) + assert.Empty(t, data.CreditCards) + }) +} diff --git a/browser/safari/source.go b/browser/safari/source.go new file mode 100644 index 0000000..b903e5b --- /dev/null +++ b/browser/safari/source.go @@ -0,0 +1,23 @@ +package safari + +import ( + "path/filepath" + + "github.com/moond4rk/hackbrowserdata/types" +) + +// sourcePath describes a single candidate location for browser data, +// relative to the Safari data directory. +type sourcePath struct { + rel string // relative path from dataDir + isDir bool // true for directory targets +} + +func file(rel string) sourcePath { return sourcePath{rel: filepath.FromSlash(rel), isDir: false} } + +// safariSources defines the Safari file layout. +// Each category maps to one or more candidate paths tried in priority order; +// the first existing path wins. +var safariSources = map[types.Category][]sourcePath{ + types.History: {file("History.db")}, +} diff --git a/browser/safari/testutil_test.go b/browser/safari/testutil_test.go new file mode 100644 index 0000000..bb9eeac --- /dev/null +++ b/browser/safari/testutil_test.go @@ -0,0 +1,85 @@ +package safari + +import ( + "database/sql" + "fmt" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + _ "modernc.org/sqlite" +) + +// --------------------------------------------------------------------------- +// Real Safari table schemas — extracted via `sqlite3 History.db ".schema"`. +// --------------------------------------------------------------------------- + +const safariHistoryItemsSchema = `CREATE TABLE history_items ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + url TEXT NOT NULL UNIQUE, + domain_expansion TEXT NULL, + visit_count INTEGER NOT NULL DEFAULT 0, + daily_visit_counts BLOB NOT NULL DEFAULT x'', + weekly_visit_counts BLOB NULL, + autocomplete_triggers BLOB NULL, + should_recompute_derived_visit_counts INTEGER NOT NULL DEFAULT 1, + visit_count_score INTEGER NOT NULL DEFAULT 0 +)` + +const safariHistoryVisitsSchema = `CREATE TABLE history_visits ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + history_item INTEGER NOT NULL REFERENCES history_items(id), + visit_time REAL NOT NULL, + title TEXT NULL, + load_successful BOOLEAN NOT NULL DEFAULT 1, + http_non_get INTEGER NOT NULL DEFAULT 0, + synthesized INTEGER NOT NULL DEFAULT 0, + redirect_source INTEGER NULL, + redirect_destination INTEGER NULL, + origin INTEGER NOT NULL DEFAULT 0, + generation INTEGER NOT NULL DEFAULT 0, + attributes INTEGER NOT NULL DEFAULT 0, + score INTEGER NOT NULL DEFAULT 0 +)` + +// --------------------------------------------------------------------------- +// INSERT helpers +// --------------------------------------------------------------------------- + +func insertHistoryItem(id int, url, domainExpansion string, visitCount int) string { + return fmt.Sprintf( + `INSERT INTO history_items (id, url, domain_expansion, visit_count) + VALUES (%d, '%s', '%s', %d)`, + id, url, domainExpansion, visitCount, + ) +} + +func insertHistoryVisit(id, historyItem int, visitTime float64, title string) string { + return fmt.Sprintf( + `INSERT INTO history_visits (id, history_item, visit_time, title) + VALUES (%d, %d, %f, '%s')`, + id, historyItem, visitTime, title, + ) +} + +// --------------------------------------------------------------------------- +// Test fixture builders +// --------------------------------------------------------------------------- + +func createTestDB(t *testing.T, name string, schemas []string, inserts ...string) string { //nolint:unparam // name will vary when future data types are added + t.Helper() + path := filepath.Join(t.TempDir(), name) + db, err := sql.Open("sqlite", path) + require.NoError(t, err) + defer db.Close() + + for _, schema := range schemas { + _, err = db.Exec(schema) + require.NoError(t, err) + } + for _, stmt := range inserts { + _, err = db.Exec(stmt) + require.NoError(t, err) + } + return path +} diff --git a/types/category.go b/types/category.go index 0d74ced..2fb73bb 100644 --- a/types/category.go +++ b/types/category.go @@ -78,6 +78,7 @@ const ( ChromiumYandex // Chromium variant with different file names and extract logic ChromiumOpera // Opera: extensions in "opsettings" key, data in Roaming Firefox + Safari ) // BrowserConfig holds the declarative configuration for a browser installation.