feat: add Safari browser support with history extraction (#564)

* feat: add Safari browser support with history extraction
* fix: use correlated subquery to ensure title matches latest visit
This commit is contained in:
Roger
2026-04-11 23:47:53 +08:00
committed by GitHub
parent 26817b44d5
commit 509cdc2468
11 changed files with 639 additions and 0 deletions
+12
View File
@@ -8,6 +8,7 @@ import (
"github.com/moond4rk/hackbrowserdata/browser/chromium"
"github.com/moond4rk/hackbrowserdata/browser/firefox"
"github.com/moond4rk/hackbrowserdata/browser/safari"
"github.com/moond4rk/hackbrowserdata/crypto/keyretriever"
"github.com/moond4rk/hackbrowserdata/log"
"github.com/moond4rk/hackbrowserdata/types"
@@ -150,6 +151,17 @@ func newBrowsers(cfg types.BrowserConfig) ([]Browser, error) {
}
return result, nil
case types.Safari:
found, err := safari.NewBrowsers(cfg)
if err != nil {
return nil, err
}
result := make([]Browser, len(found))
for i, b := range found {
result[i] = b
}
return result, nil
default:
return nil, fmt.Errorf("unknown browser kind: %d", cfg.Kind)
}
+6
View File
@@ -91,5 +91,11 @@ func platformBrowsers() []types.BrowserConfig {
Kind: types.Firefox,
UserDataDir: homeDir + "/Library/Application Support/Firefox/Profiles",
},
{
Key: "safari",
Name: safariName,
Kind: types.Safari,
UserDataDir: homeDir + "/Library/Safari",
},
}
}
+10
View File
@@ -349,6 +349,9 @@ func TestNewBrowsersDispatch(t *testing.T) {
firefoxDir := t.TempDir()
mkFile(t, firefoxDir, "abc.default", "places.sqlite")
safariDir := t.TempDir()
mkFile(t, safariDir, "History.db")
emptyDir := t.TempDir()
tests := []struct {
@@ -373,6 +376,13 @@ func TestNewBrowsersDispatch(t *testing.T) {
wantName: "Firefox",
wantProfile: "abc.default",
},
{
name: "safari dispatch",
cfg: types.BrowserConfig{Key: "safari", Name: "Safari", Kind: types.Safari, UserDataDir: safariDir},
wantLen: 1,
wantName: "Safari",
wantProfile: "default",
},
{
name: "unknown kind returns error",
cfg: types.BrowserConfig{Key: "unknown", Name: "Unknown", Kind: types.BrowserKind(99)},
+1
View File
@@ -26,4 +26,5 @@ const (
sogouName = "Sogou"
arcName = "Arc"
duckduckgoName = "DuckDuckGo"
safariName = "Safari"
)
+56
View File
@@ -0,0 +1,56 @@
package safari
import (
"database/sql"
"sort"
"github.com/moond4rk/hackbrowserdata/types"
"github.com/moond4rk/hackbrowserdata/utils/sqliteutil"
)
const (
// safariHistoryQuery joins each history item to its latest visit so
// title and visit_time come from the same history_visits row.
safariHistoryQuery = `SELECT hi.url, COALESCE(hv.title, ''), hi.visit_count,
COALESCE(hv.visit_time, 0)
FROM history_items hi
LEFT JOIN history_visits hv ON hv.id = (
SELECT hv2.id FROM history_visits hv2
WHERE hv2.history_item = hi.id
ORDER BY hv2.visit_time DESC LIMIT 1
)`
safariCountHistoryQuery = `SELECT COUNT(*) FROM history_items`
)
func extractHistories(path string) ([]types.HistoryEntry, error) {
histories, err := sqliteutil.QueryRows(path, true, safariHistoryQuery,
func(rows *sql.Rows) (types.HistoryEntry, error) {
var (
url, title string
visitCount int
visitTime float64
)
if err := rows.Scan(&url, &title, &visitCount, &visitTime); err != nil {
return types.HistoryEntry{}, err
}
return types.HistoryEntry{
URL: url,
Title: title,
VisitCount: visitCount,
LastVisit: coredataTimestamp(visitTime),
}, nil
})
if err != nil {
return nil, err
}
sort.Slice(histories, func(i, j int) bool {
return histories[i].VisitCount > histories[j].VisitCount
})
return histories, nil
}
func countHistories(path string) (int, error) {
return sqliteutil.CountRows(path, true, safariCountHistoryQuery)
}
+105
View File
@@ -0,0 +1,105 @@
package safari
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func setupSafariHistoryDB(t *testing.T) string {
t.Helper()
return createTestDB(t, "History.db",
[]string{safariHistoryItemsSchema, safariHistoryVisitsSchema},
insertHistoryItem(1, "https://github.com", "github.com", 100),
insertHistoryItem(2, "https://go.dev", "go.dev", 50),
insertHistoryItem(3, "https://example.com", "example.com", 200),
// Item 1 has two visits — extractHistories must deduplicate.
insertHistoryVisit(1, 1, 704067600.0, "GitHub"),
insertHistoryVisit(2, 1, 705067600.0, "GitHub - Latest"),
insertHistoryVisit(3, 2, 703067600.0, "The Go Programming Language"),
insertHistoryVisit(4, 3, 700067600.0, "Example Domain"),
)
}
func TestExtractHistories(t *testing.T) {
path := setupSafariHistoryDB(t)
got, err := extractHistories(path)
require.NoError(t, err)
require.Len(t, got, 3)
// Sorted by visit count descending (most visited first).
assert.Equal(t, 200, got[0].VisitCount)
assert.Equal(t, 100, got[1].VisitCount)
assert.Equal(t, 50, got[2].VisitCount)
// Verify field mapping.
assert.Equal(t, "https://example.com", got[0].URL)
assert.Equal(t, "https://github.com", got[1].URL)
assert.Equal(t, "https://go.dev", got[2].URL)
assert.False(t, got[0].LastVisit.IsZero())
}
func TestExtractHistories_Dedup(t *testing.T) {
path := setupSafariHistoryDB(t)
got, err := extractHistories(path)
require.NoError(t, err)
// 3 history_items, not 4 visits.
require.Len(t, got, 3)
// GitHub (item 1) should have the later visit_time and its title.
for _, h := range got {
if h.URL == "https://github.com" {
// 705067600 + 978307200 = 1683374800 (unix)
assert.Equal(t, int64(1683374800), h.LastVisit.Unix())
// Title must come from the latest visit row, not an arbitrary one.
assert.Equal(t, "GitHub - Latest", h.Title)
return
}
}
t.Fatal("expected https://github.com in results")
}
func TestCountHistories(t *testing.T) {
path := setupSafariHistoryDB(t)
count, err := countHistories(path)
require.NoError(t, err)
assert.Equal(t, 3, count)
}
func TestCountHistories_Empty(t *testing.T) {
path := createTestDB(t, "History.db",
[]string{safariHistoryItemsSchema, safariHistoryVisitsSchema})
count, err := countHistories(path)
require.NoError(t, err)
assert.Equal(t, 0, count)
}
func TestExtractHistories_NullTitle(t *testing.T) {
path := createTestDB(t, "History.db",
[]string{safariHistoryItemsSchema, safariHistoryVisitsSchema},
insertHistoryItem(1, "https://null.test", "null.test", 1),
// Visit with NULL title — COALESCE should return empty string.
`INSERT INTO history_visits (id, history_item, visit_time) VALUES (1, 1, 700000000.0)`,
)
got, err := extractHistories(path)
require.NoError(t, err)
require.Len(t, got, 1)
assert.Equal(t, "https://null.test", got[0].URL)
assert.Empty(t, got[0].Title)
}
func TestCoredataTimestamp(t *testing.T) {
// 0 Core Data epoch = 2001-01-01 00:00:00 UTC = Unix 978307200
ts := coredataTimestamp(0)
assert.Equal(t, int64(978307200), ts.Unix())
// Known value: 700000000 Core Data = 1678307200 Unix
ts2 := coredataTimestamp(700000000)
assert.Equal(t, int64(1678307200), ts2.Unix())
}
+169
View File
@@ -0,0 +1,169 @@
package safari
import (
"os"
"path/filepath"
"time"
"github.com/moond4rk/hackbrowserdata/filemanager"
"github.com/moond4rk/hackbrowserdata/log"
"github.com/moond4rk/hackbrowserdata/types"
)
// Browser represents Safari browser data ready for extraction.
// Safari has a single flat data directory (no profile subdirectories)
// and stores most data unencrypted (passwords live in macOS Keychain).
type Browser struct {
cfg types.BrowserConfig
dataDir string // absolute path to ~/Library/Safari
sources map[types.Category][]sourcePath // Category → candidate paths
sourcePaths map[types.Category]resolvedPath // Category → discovered absolute path
}
// NewBrowsers checks whether Safari data exists at cfg.UserDataDir and returns
// a single Browser if any known source files are found. Unlike Chromium/Firefox,
// Safari has no profile directories — the data directory is used directly.
func NewBrowsers(cfg types.BrowserConfig) ([]*Browser, error) {
sourcePaths := resolveSourcePaths(safariSources, cfg.UserDataDir)
if len(sourcePaths) == 0 {
return nil, nil
}
return []*Browser{{
cfg: cfg,
dataDir: cfg.UserDataDir,
sources: safariSources,
sourcePaths: sourcePaths,
}}, nil
}
func (b *Browser) BrowserName() string { return b.cfg.Name }
func (b *Browser) ProfileDir() string { return b.dataDir }
func (b *Browser) ProfileName() string { return "default" }
// Extract copies browser files to a temp directory and extracts data
// for the requested categories.
func (b *Browser) Extract(categories []types.Category) (*types.BrowserData, error) {
session, err := filemanager.NewSession()
if err != nil {
return nil, err
}
defer session.Cleanup()
tempPaths := b.acquireFiles(session, categories)
data := &types.BrowserData{}
for _, cat := range categories {
path, ok := tempPaths[cat]
if !ok {
continue
}
b.extractCategory(data, cat, path)
}
return data, nil
}
// CountEntries copies browser files to a temp directory and counts entries
// per category without full extraction.
func (b *Browser) CountEntries(categories []types.Category) (map[types.Category]int, error) {
session, err := filemanager.NewSession()
if err != nil {
return nil, err
}
defer session.Cleanup()
tempPaths := b.acquireFiles(session, categories)
counts := make(map[types.Category]int)
for _, cat := range categories {
path, ok := tempPaths[cat]
if !ok {
continue
}
counts[cat] = b.countCategory(cat, path)
}
return counts, nil
}
// acquireFiles copies source files to the session temp directory.
func (b *Browser) acquireFiles(session *filemanager.Session, categories []types.Category) map[types.Category]string {
tempPaths := make(map[types.Category]string)
for _, cat := range categories {
rp, ok := b.sourcePaths[cat]
if !ok {
continue
}
dst := filepath.Join(session.TempDir(), cat.String())
if err := session.Acquire(rp.absPath, dst, rp.isDir); err != nil {
log.Debugf("acquire %s: %v", cat, err)
continue
}
tempPaths[cat] = dst
}
return tempPaths
}
// extractCategory calls the appropriate extract function for a category.
func (b *Browser) extractCategory(data *types.BrowserData, cat types.Category, path string) {
var err error
switch cat {
case types.History:
data.Histories, err = extractHistories(path)
default:
return
}
if err != nil {
log.Debugf("extract %s for %s: %v", cat, b.BrowserName()+"/"+b.ProfileName(), err)
}
}
// countCategory calls the appropriate count function for a category.
func (b *Browser) countCategory(cat types.Category, path string) int {
var count int
var err error
switch cat {
case types.History:
count, err = countHistories(path)
default:
// Unsupported categories silently return 0.
}
if err != nil {
log.Debugf("count %s for %s: %v", cat, b.BrowserName()+"/"+b.ProfileName(), err)
}
return count
}
// resolvedPath holds the absolute path and type for a discovered source.
type resolvedPath struct {
absPath string
isDir bool
}
// resolveSourcePaths checks which sources actually exist in dataDir.
// Candidates are tried in priority order; the first existing path wins.
func resolveSourcePaths(sources map[types.Category][]sourcePath, dataDir string) map[types.Category]resolvedPath {
resolved := make(map[types.Category]resolvedPath)
for cat, candidates := range sources {
for _, sp := range candidates {
abs := filepath.Join(dataDir, sp.rel)
info, err := os.Stat(abs)
if err != nil {
continue
}
if sp.isDir == info.IsDir() {
resolved[cat] = resolvedPath{abs, sp.isDir}
break
}
}
}
return resolved
}
// coreDataEpochOffset is the number of seconds between the Unix epoch
// (1970-01-01) and the Core Data epoch (2001-01-01).
const coreDataEpochOffset = 978307200
// coredataTimestamp converts a Core Data timestamp (seconds since 2001-01-01)
// to a time.Time. Safari's History.db uses this epoch for visit_time.
func coredataTimestamp(seconds float64) time.Time {
return time.Unix(int64(seconds)+coreDataEpochOffset, 0)
}
+171
View File
@@ -0,0 +1,171 @@
package safari
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/moond4rk/hackbrowserdata/types"
)
func mkFile(t *testing.T, parts ...string) {
t.Helper()
path := filepath.Join(parts...)
require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755))
require.NoError(t, os.WriteFile(path, []byte("test"), 0o644))
}
// ---------------------------------------------------------------------------
// NewBrowsers
// ---------------------------------------------------------------------------
func TestNewBrowsers(t *testing.T) {
tests := []struct {
name string
setup func(t *testing.T) string
wantLen int
}{
{
name: "dir with History.db",
setup: func(t *testing.T) string {
dir := t.TempDir()
mkFile(t, dir, "History.db")
return dir
},
wantLen: 1,
},
{
name: "empty dir",
setup: func(t *testing.T) string {
return t.TempDir()
},
wantLen: 0,
},
{
name: "nonexistent dir",
setup: func(t *testing.T) string {
return "/nonexistent/path"
},
wantLen: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dir := tt.setup(t)
cfg := types.BrowserConfig{Name: "Safari", Kind: types.Safari, UserDataDir: dir}
browsers, err := NewBrowsers(cfg)
require.NoError(t, err)
if tt.wantLen == 0 {
assert.Empty(t, browsers)
return
}
require.Len(t, browsers, tt.wantLen)
assert.Equal(t, "Safari", browsers[0].BrowserName())
assert.Equal(t, "default", browsers[0].ProfileName())
assert.Equal(t, dir, browsers[0].ProfileDir())
})
}
}
// ---------------------------------------------------------------------------
// resolveSourcePaths
// ---------------------------------------------------------------------------
func TestResolveSourcePaths(t *testing.T) {
dir := t.TempDir()
mkFile(t, dir, "History.db")
resolved := resolveSourcePaths(safariSources, dir)
assert.Contains(t, resolved, types.History)
assert.Equal(t, filepath.Join(dir, "History.db"), resolved[types.History].absPath)
assert.False(t, resolved[types.History].isDir)
}
func TestResolveSourcePaths_Empty(t *testing.T) {
resolved := resolveSourcePaths(safariSources, t.TempDir())
assert.Empty(t, resolved)
}
// ---------------------------------------------------------------------------
// CountEntries
// ---------------------------------------------------------------------------
func TestCountEntries(t *testing.T) {
dir := t.TempDir()
dbPath := createTestDB(t, "History.db",
[]string{safariHistoryItemsSchema, safariHistoryVisitsSchema},
insertHistoryItem(1, "https://example.com", "example.com", 5),
insertHistoryItem(2, "https://go.dev", "go.dev", 10),
insertHistoryVisit(1, 1, 700000000.0, "Example"),
insertHistoryVisit(2, 2, 700000000.0, "Go"),
)
data, err := os.ReadFile(dbPath)
require.NoError(t, err)
require.NoError(t, os.WriteFile(filepath.Join(dir, "History.db"), data, 0o644))
browsers, err := NewBrowsers(types.BrowserConfig{
Name: "Safari", Kind: types.Safari, UserDataDir: dir,
})
require.NoError(t, err)
require.Len(t, browsers, 1)
counts, err := browsers[0].CountEntries([]types.Category{types.History})
require.NoError(t, err)
assert.Equal(t, 2, counts[types.History])
}
// ---------------------------------------------------------------------------
// countCategory / extractCategory
// ---------------------------------------------------------------------------
func TestCountCategory(t *testing.T) {
t.Run("History", func(t *testing.T) {
path := createTestDB(t, "History.db",
[]string{safariHistoryItemsSchema, safariHistoryVisitsSchema},
insertHistoryItem(1, "https://example.com", "example.com", 1),
)
b := &Browser{}
assert.Equal(t, 1, b.countCategory(types.History, path))
})
t.Run("UnsupportedCategory", func(t *testing.T) {
b := &Browser{}
assert.Equal(t, 0, b.countCategory(types.Cookie, "unused"))
assert.Equal(t, 0, b.countCategory(types.CreditCard, "unused"))
assert.Equal(t, 0, b.countCategory(types.SessionStorage, "unused"))
})
}
func TestExtractCategory(t *testing.T) {
t.Run("History", func(t *testing.T) {
path := createTestDB(t, "History.db",
[]string{safariHistoryItemsSchema, safariHistoryVisitsSchema},
insertHistoryItem(1, "https://example.com", "example.com", 3),
insertHistoryItem(2, "https://go.dev", "go.dev", 1),
insertHistoryVisit(1, 1, 700000000.0, "Example"),
insertHistoryVisit(2, 2, 700000000.0, "Go"),
)
b := &Browser{}
data := &types.BrowserData{}
b.extractCategory(data, types.History, path)
require.Len(t, data.Histories, 2)
// Sorted by visit count descending
assert.Equal(t, 3, data.Histories[0].VisitCount)
assert.Equal(t, 1, data.Histories[1].VisitCount)
})
t.Run("UnsupportedCategory", func(t *testing.T) {
b := &Browser{}
data := &types.BrowserData{}
b.extractCategory(data, types.Cookie, "unused")
b.extractCategory(data, types.CreditCard, "unused")
assert.Empty(t, data.Cookies)
assert.Empty(t, data.CreditCards)
})
}
+23
View File
@@ -0,0 +1,23 @@
package safari
import (
"path/filepath"
"github.com/moond4rk/hackbrowserdata/types"
)
// sourcePath describes a single candidate location for browser data,
// relative to the Safari data directory.
type sourcePath struct {
rel string // relative path from dataDir
isDir bool // true for directory targets
}
func file(rel string) sourcePath { return sourcePath{rel: filepath.FromSlash(rel), isDir: false} }
// safariSources defines the Safari file layout.
// Each category maps to one or more candidate paths tried in priority order;
// the first existing path wins.
var safariSources = map[types.Category][]sourcePath{
types.History: {file("History.db")},
}
+85
View File
@@ -0,0 +1,85 @@
package safari
import (
"database/sql"
"fmt"
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
_ "modernc.org/sqlite"
)
// ---------------------------------------------------------------------------
// Real Safari table schemas — extracted via `sqlite3 History.db ".schema"`.
// ---------------------------------------------------------------------------
const safariHistoryItemsSchema = `CREATE TABLE history_items (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT NOT NULL UNIQUE,
domain_expansion TEXT NULL,
visit_count INTEGER NOT NULL DEFAULT 0,
daily_visit_counts BLOB NOT NULL DEFAULT x'',
weekly_visit_counts BLOB NULL,
autocomplete_triggers BLOB NULL,
should_recompute_derived_visit_counts INTEGER NOT NULL DEFAULT 1,
visit_count_score INTEGER NOT NULL DEFAULT 0
)`
const safariHistoryVisitsSchema = `CREATE TABLE history_visits (
id INTEGER PRIMARY KEY AUTOINCREMENT,
history_item INTEGER NOT NULL REFERENCES history_items(id),
visit_time REAL NOT NULL,
title TEXT NULL,
load_successful BOOLEAN NOT NULL DEFAULT 1,
http_non_get INTEGER NOT NULL DEFAULT 0,
synthesized INTEGER NOT NULL DEFAULT 0,
redirect_source INTEGER NULL,
redirect_destination INTEGER NULL,
origin INTEGER NOT NULL DEFAULT 0,
generation INTEGER NOT NULL DEFAULT 0,
attributes INTEGER NOT NULL DEFAULT 0,
score INTEGER NOT NULL DEFAULT 0
)`
// ---------------------------------------------------------------------------
// INSERT helpers
// ---------------------------------------------------------------------------
func insertHistoryItem(id int, url, domainExpansion string, visitCount int) string {
return fmt.Sprintf(
`INSERT INTO history_items (id, url, domain_expansion, visit_count)
VALUES (%d, '%s', '%s', %d)`,
id, url, domainExpansion, visitCount,
)
}
func insertHistoryVisit(id, historyItem int, visitTime float64, title string) string {
return fmt.Sprintf(
`INSERT INTO history_visits (id, history_item, visit_time, title)
VALUES (%d, %d, %f, '%s')`,
id, historyItem, visitTime, title,
)
}
// ---------------------------------------------------------------------------
// Test fixture builders
// ---------------------------------------------------------------------------
func createTestDB(t *testing.T, name string, schemas []string, inserts ...string) string { //nolint:unparam // name will vary when future data types are added
t.Helper()
path := filepath.Join(t.TempDir(), name)
db, err := sql.Open("sqlite", path)
require.NoError(t, err)
defer db.Close()
for _, schema := range schemas {
_, err = db.Exec(schema)
require.NoError(t, err)
}
for _, stmt := range inserts {
_, err = db.Exec(stmt)
require.NoError(t, err)
}
return path
}
+1
View File
@@ -78,6 +78,7 @@ const (
ChromiumYandex // Chromium variant with different file names and extract logic
ChromiumOpera // Opera: extensions in "opsettings" key, data in Roaming
Firefox
Safari
)
// BrowserConfig holds the declarative configuration for a browser installation.