diff --git a/browser/archive.go b/browser/archive.go new file mode 100644 index 0000000..28e3d34 --- /dev/null +++ b/browser/archive.go @@ -0,0 +1,68 @@ +package browser + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/moond4rk/hackbrowserdata/browser/chromium" + "github.com/moond4rk/hackbrowserdata/filemanager" + "github.com/moond4rk/hackbrowserdata/log" + "github.com/moond4rk/hackbrowserdata/types" + "github.com/moond4rk/hackbrowserdata/utils/fileutil" +) + +// Archivable is implemented by installations that can enumerate their decryption-relevant files for +// cross-host transport (Chromium only). +type Archivable interface { + BrowserKey() string + ArchiveSources(categories []types.Category) []chromium.ArchiveSource +} + +// WriteArchive packs each browser's decryption-relevant files into a zip whose internal layout is +// /, so a restore can re-expand it and decrypt with a keys.json. Files +// are staged through a locked-file session first because Windows holds exclusive SQLite locks. Returns +// the number of source entries staged (a directory source counts once). +func WriteArchive(browsers []Browser, categories []types.Category, outPath string) (int, error) { + session, err := filemanager.NewSession() + if err != nil { + return 0, err + } + defer session.Cleanup() + + staging := session.TempDir() + seen := make(map[string]bool) + count := 0 + for _, b := range browsers { + archivable, ok := b.(Archivable) + if !ok { + continue + } + key := archivable.BrowserKey() + for _, src := range archivable.ArchiveSources(categories) { + entry := key + "/" + src.LayoutRel + if seen[entry] { + continue + } + seen[entry] = true + + dst := filepath.Join(staging, key, filepath.FromSlash(src.LayoutRel)) + if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil { + log.Warnf("archive: %s: %v", entry, err) + continue + } + if err := session.Acquire(src.AbsPath, dst, src.IsDir); err != nil { + log.Warnf("archive: acquire %s: %v", entry, err) + continue + } + count++ + } + } + if count == 0 { + return 0, fmt.Errorf("no decryption-relevant files found to archive") + } + if err := fileutil.ZipDir(outPath, staging); err != nil { + return 0, fmt.Errorf("write archive %s: %w", outPath, err) + } + return count, nil +} diff --git a/browser/archive_test.go b/browser/archive_test.go new file mode 100644 index 0000000..17af7f6 --- /dev/null +++ b/browser/archive_test.go @@ -0,0 +1,58 @@ +package browser + +import ( + "os" + "path/filepath" + "testing" + + "github.com/moond4rk/hackbrowserdata/browser/chromium" + "github.com/moond4rk/hackbrowserdata/types" + "github.com/moond4rk/hackbrowserdata/utils/fileutil" +) + +// TestWriteArchive_RoundTrip exercises the archive path: ArchiveSources -> WriteArchive (stage+zip) +// -> Unzip, asserting the archive's internal layout is /. +func TestWriteArchive_RoundTrip(t *testing.T) { + origin := t.TempDir() + def := filepath.Join(origin, "Default") + if err := os.MkdirAll(def, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(def, "Preferences"), []byte("{}"), 0o600); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(def, "History"), []byte("hist"), 0o600); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(origin, "Local State"), []byte("{}"), 0o600); err != nil { + t.Fatal(err) + } + + b, err := chromium.NewBrowser(types.BrowserConfig{Key: "chrome", Name: "chrome", Kind: types.Chromium, UserDataDir: origin}) + if err != nil || b == nil { + t.Fatalf("NewBrowser: b=%v err=%v", b, err) + } + + zipPath := filepath.Join(t.TempDir(), "data.zip") + n, err := WriteArchive([]Browser{b}, []types.Category{types.History}, zipPath) + if err != nil { + t.Fatalf("WriteArchive: %v", err) + } + if n == 0 { + t.Fatal("WriteArchive captured 0 entries") + } + + extracted := t.TempDir() + if err := fileutil.Unzip(zipPath, extracted); err != nil { + t.Fatalf("Unzip: %v", err) + } + for _, rel := range []string{ + filepath.Join("chrome", "Default", "History"), + filepath.Join("chrome", "Default", "Preferences"), + filepath.Join("chrome", "Local State"), + } { + if _, err := os.Stat(filepath.Join(extracted, rel)); err != nil { + t.Errorf("expected %s in archive layout: %v", rel, err) + } + } +} diff --git a/browser/chromium/archive.go b/browser/chromium/archive.go new file mode 100644 index 0000000..ae1f93c --- /dev/null +++ b/browser/chromium/archive.go @@ -0,0 +1,64 @@ +package chromium + +import ( + "path" + "path/filepath" + + "github.com/moond4rk/hackbrowserdata/types" + "github.com/moond4rk/hackbrowserdata/utils/fileutil" +) + +// ArchiveSource is one decryption-relevant file or directory plus its path inside the browser's +// User Data tree (forward-slash), so an archive can be re-expanded into a working profile layout. +type ArchiveSource struct { + AbsPath string + LayoutRel string + IsDir bool +} + +// installationFiles live at the User Data root (shared across profiles); archived for fidelity even +// though keys.json-based restore does not read them. +var installationFiles = []string{"Local State"} + +// ArchiveSources lists the files an archive must capture for the given categories: the User Data root +// files (Local State), every resolved category source per profile, plus each profile's Preferences +// marker so a restore can rediscover the profile. LayoutRel is forward-slash, relative to the root. +func (b *Browser) ArchiveSources(categories []types.Category) []ArchiveSource { + var out []ArchiveSource + for _, name := range installationFiles { + abs := filepath.Join(b.cfg.UserDataDir, name) + if fileutil.FileExists(abs) { + out = append(out, ArchiveSource{AbsPath: abs, LayoutRel: name, IsDir: false}) + } + } + for _, p := range b.profiles { + // Flat-layout installs hold data directly under UserDataDir (profileDir == root); skip the + // basename so the archive matches the real layout instead of inserting a phantom level. + profileRel := "" + if p.profileDir != b.cfg.UserDataDir { + profileRel = filepath.Base(p.profileDir) + } + for _, marker := range profileMarkers { + abs := filepath.Join(p.profileDir, marker) + if fileutil.FileExists(abs) { + out = append(out, ArchiveSource{ + AbsPath: abs, + LayoutRel: path.Join(profileRel, marker), + IsDir: false, + }) + } + } + for _, cat := range categories { + rp, ok := p.sourcePaths[cat] + if !ok { + continue + } + out = append(out, ArchiveSource{ + AbsPath: rp.absPath, + LayoutRel: path.Join(profileRel, rp.rel), + IsDir: rp.isDir, + }) + } + } + return out +} diff --git a/browser/chromium/archive_test.go b/browser/chromium/archive_test.go new file mode 100644 index 0000000..ecc13c7 --- /dev/null +++ b/browser/chromium/archive_test.go @@ -0,0 +1,88 @@ +package chromium + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/moond4rk/hackbrowserdata/types" +) + +func TestArchiveSources_ForwardSlashLayout(t *testing.T) { + udd := t.TempDir() + networkDir := filepath.Join(udd, "Default", "Network") + if err := os.MkdirAll(networkDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(udd, "Default", "Preferences"), []byte("{}"), 0o600); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(networkDir, "Cookies"), []byte("x"), 0o600); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(udd, "Local State"), []byte("{}"), 0o600); err != nil { + t.Fatal(err) + } + + b, err := NewBrowser(types.BrowserConfig{Key: "chrome", Name: "chrome", Kind: types.Chromium, UserDataDir: udd}) + if err != nil || b == nil { + t.Fatalf("NewBrowser: b=%v err=%v", b, err) + } + + srcs := b.ArchiveSources([]types.Category{types.Cookie}) + + var gotCookie, gotMarker, gotLocalState bool + for _, s := range srcs { + if strings.Contains(s.LayoutRel, `\`) { + t.Errorf("LayoutRel must be forward-slash, got %q", s.LayoutRel) + } + switch s.LayoutRel { + case "Default/Network/Cookies": + gotCookie = true + case "Default/Preferences": + gotMarker = true + case "Local State": + gotLocalState = true + } + } + if !gotCookie { + t.Errorf("missing Cookies entry with layout path, got %+v", srcs) + } + if !gotMarker { + t.Errorf("missing Preferences marker entry (needed for restore profile discovery), got %+v", srcs) + } + if !gotLocalState { + t.Errorf("missing Local State entry (User Data root file), got %+v", srcs) + } +} + +func TestArchiveSources_FlatLayoutNoExtraLevel(t *testing.T) { + // Flat-layout install: data lives directly under UserDataDir with no Default/ subdir, so + // discoverProfiles falls back to UserDataDir itself as the profile (profileDir == root). + udd := t.TempDir() + if err := os.WriteFile(filepath.Join(udd, "History"), []byte("x"), 0o600); err != nil { + t.Fatal(err) + } + + b, err := NewBrowser(types.BrowserConfig{Key: "opera", Name: "opera", Kind: types.Chromium, UserDataDir: udd}) + if err != nil || b == nil { + t.Fatalf("NewBrowser: b=%v err=%v", b, err) + } + + srcs := b.ArchiveSources([]types.Category{types.History}) + + phantom := filepath.Base(udd) + "/" + var gotHistory bool + for _, s := range srcs { + if strings.HasPrefix(s.LayoutRel, phantom) { + t.Errorf("flat layout must not insert a %q level, got %q", phantom, s.LayoutRel) + } + if s.LayoutRel == "History" { + gotHistory = true + } + } + if !gotHistory { + t.Errorf("expected History at archive root, got %+v", srcs) + } +} diff --git a/browser/chromium/chromium.go b/browser/chromium/chromium.go index 2848915..7527fee 100644 --- a/browser/chromium/chromium.go +++ b/browser/chromium/chromium.go @@ -55,6 +55,7 @@ func NewBrowser(cfg types.BrowserConfig) (*Browser, error) { func (b *Browser) SetRetrievers(r masterkey.Retrievers) { b.retrievers = r } func (b *Browser) BrowserName() string { return b.cfg.Name } +func (b *Browser) BrowserKey() string { return b.cfg.Key } func (b *Browser) UserDataDir() string { return b.cfg.UserDataDir } // Profiles returns the identity of every profile in this installation. @@ -204,9 +205,11 @@ func hasAnySource(sources map[types.Category][]sourcePath, dir string) bool { return false } -// resolvedPath holds the absolute path and type for a discovered source. +// resolvedPath holds the absolute path, the slash-relative source path, and the type of a discovered +// source. rel is retained (not just absPath) so archive can reproduce the User Data layout. type resolvedPath struct { absPath string + rel string isDir bool } @@ -222,7 +225,7 @@ func resolveSourcePaths(sources map[types.Category][]sourcePath, profileDir stri continue } if sp.isDir == info.IsDir() { - resolved[cat] = resolvedPath{abs, sp.isDir} + resolved[cat] = resolvedPath{absPath: abs, rel: sp.rel, isDir: sp.isDir} break } } diff --git a/browser/chromium/source.go b/browser/chromium/source.go index 5c5baff..dada72c 100644 --- a/browser/chromium/source.go +++ b/browser/chromium/source.go @@ -1,8 +1,6 @@ package chromium import ( - "path/filepath" - "github.com/moond4rk/hackbrowserdata/masterkey" "github.com/moond4rk/hackbrowserdata/types" ) @@ -14,8 +12,10 @@ type sourcePath struct { isDir bool // true for directory targets (LevelDB, Session Storage) } -func file(rel string) sourcePath { return sourcePath{rel: filepath.FromSlash(rel), isDir: false} } -func dir(rel string) sourcePath { return sourcePath{rel: filepath.FromSlash(rel), isDir: true} } +// rel stays slash-canonical (e.g. "Network/Cookies"); filepath.Join converts at resolve time, and +// archive reuses it verbatim as a forward-slash zip entry name. +func file(rel string) sourcePath { return sourcePath{rel: rel, isDir: false} } +func dir(rel string) sourcePath { return sourcePath{rel: rel, isDir: true} } // chromiumSources defines the standard Chromium file layout. // Each category maps to one or more candidate paths tried in priority order; diff --git a/cmd/hack-browser-data/archive.go b/cmd/hack-browser-data/archive.go new file mode 100644 index 0000000..8ca7012 --- /dev/null +++ b/cmd/hack-browser-data/archive.go @@ -0,0 +1,49 @@ +package main + +import ( + "github.com/spf13/cobra" + + "github.com/moond4rk/hackbrowserdata/browser" + "github.com/moond4rk/hackbrowserdata/log" +) + +func archiveCmd() *cobra.Command { + var ( + browserName string + category string + outputPath string + ) + + cmd := &cobra.Command{ + Use: "archive", + Short: "Pack decryption-relevant profile files into a zip for cross-host restore", + Example: ` hack-browser-data archive + hack-browser-data archive -b chrome -c cookie -o chrome-cookies.zip`, + RunE: func(cmd *cobra.Command, args []string) error { + browsers, err := browser.DiscoverBrowsers(browser.DiscoverOptions{Name: browserName}) + if err != nil { + return err + } + if len(browsers) == 0 { + log.Warnf("no browsers found") + return nil + } + categories, err := parseCategories(category) + if err != nil { + return err + } + n, err := browser.WriteArchive(browsers, categories, outputPath) + if err != nil { + return err + } + log.Infof("Archived %d entries to %s", n, outputPath) + return nil + }, + } + + cmd.Flags().StringVarP(&browserName, "browser", "b", "all", "target browser: all|"+browser.Names()) + cmd.Flags().StringVarP(&category, "category", "c", "all", "data categories (comma-separated): all|"+categoryNames()) + cmd.Flags().StringVarP(&outputPath, "output", "o", "browser-data.zip", "output archive of decryption-relevant browser files") + + return cmd +} diff --git a/cmd/hack-browser-data/main.go b/cmd/hack-browser-data/main.go index d8c9c5e..a632f1f 100644 --- a/cmd/hack-browser-data/main.go +++ b/cmd/hack-browser-data/main.go @@ -31,7 +31,7 @@ GitHub: https://github.com/moonD4rk/HackBrowserData`, root.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "enable debug logging") dump := dumpCmd() - root.AddCommand(dump, dumpKeysCmd(), restoreCmd(), listCmd(), versionCmd()) + root.AddCommand(dump, dumpKeysCmd(), archiveCmd(), restoreCmd(), listCmd(), versionCmd()) // Default to dump when no subcommand is given. // Copy dump flags to root so that `hack-browser-data -b chrome` diff --git a/rfcs/013-cli-redesign-cross-host.md b/rfcs/013-cli-redesign-cross-host.md index d8c7000..da54fb6 100644 --- a/rfcs/013-cli-redesign-cross-host.md +++ b/rfcs/013-cli-redesign-cross-host.md @@ -1,8 +1,9 @@ # RFC-013: CLI Redesign — Flat-Verb Surface & Cross-Host Restore **Author**: moonD4rk -**Status**: Accepted — implementation pending +**Status**: Accepted — `archive` (#607) implemented; cross-platform `restore` (#606) pending **Created**: 2026-06-03 +**Revised**: 2026-06-06 (subdir-convention archive, dual-mode restore, Local State, delivery order) ## 1. Summary @@ -41,8 +42,8 @@ Workflows: ``` local : hbd dump -b chrome -c cookie,password cross-host: origin> hbd dumpkeys -o keys.json - origin> hbd archive -b chrome -o data.zip - analyst> hbd restore --keys keys.json --data-zip data.zip -c cookie + origin> hbd archive -b chrome -o browser-data.zip + analyst> hbd restore --keys keys.json --data-zip browser-data.zip -c cookie ``` The `keys` parent command is removed: `keys export` becomes `dumpkeys`, `keys import` becomes `restore`, and a new `archive` fills the missing data-transport step. `dump` / `list` / `version` keep their current behavior; `dump` stays the default when no subcommand is given (which also covers the Windows double-click case). @@ -63,8 +64,8 @@ The resolution to §2.3 is a single rule: **the set of browsers a command may ac The cross-host producer emits two independent, composable artifacts; the consumer takes both. - `dumpkeys` writes `keys.json` — the portable master keys (stdout by default for `ssh origin hbd dumpkeys | …` pipelines; `-o` for a 0600 file). -- `archive` writes `data.zip` — only the decryption-relevant files for the requested `-c` categories (`Login Data`, `Cookies`, `Web Data`, `History`, … plus `Local State` and `Preferences`), read through the existing locked-file bypass, preserving the relative `User Data` layout so the zip's internal root *is* the `User Data` dir. -- `restore` takes `--keys keys.json` and the data via two explicit flags, `--data-dir ` or `--data-zip ` (mutually exclusive, exactly one required). A zip is extracted to a temporary directory; a directory is used as-is. Because the archive preserves layout, `unzip data.zip -d X && restore --data-dir X` equals `restore --data-zip data.zip`. +- `archive` writes `browser-data.zip` — the decryption-relevant files for the requested `-c` categories (`Login Data`, `Cookies`, `Web Data`, `History`, …), read through the existing locked-file bypass. To carry more than one browser and to keep restore unambiguous, the zip is laid out as `/` (e.g. `chrome/Default/Network/Cookies`) — one subdir per installation, each subdir being that browser's `User Data` root. Two things are always included regardless of `-c`: each profile's `Preferences`/`Preferences_02` (so restore can rediscover the profile — the marker is no extraction source) and the installation's `Local State` (carried for fidelity only; restore decrypts with the keys in `keys.json` and never reads it). Zip entry names are always forward-slash, so a Windows-produced archive restores on macOS/Linux. +- `restore` takes `--keys keys.json` and the data via two explicit flags, `--data-dir ` or `--data-zip ` (mutually exclusive, exactly one required). A zip is extracted to a temporary directory; a directory is used as-is, so `unzip browser-data.zip -d X && restore --data-dir X` equals `restore --data-zip browser-data.zip`. The data resolves two ways: when it holds `/` subdirs (the `archive` layout) each vault is rooted at its own subdir and several browsers restore at once; otherwise `--data-dir` is a single browser's hand-copied `User Data` root, which is unambiguous only for one vault — so `-b` must select it. This preserves the pre-redesign "point at a copied profile folder" workflow. `restore` is a **separate verb**, not a `dump --keys` mode. Folding it into `dump` would force one command to carry two mutually-exclusive input modes (`-b` for local discovery xor `--keys/--data` for transported artifacts) and dead flags (a `--keychain-pw` that silently does nothing once keys are supplied — a friction the earlier `dump --keys` design already hit). One verb, one job keeps each command's flags and help self-contained. `restore -b` is an **optional filter** over the dump's vaults, not a required selector, because the dump self-describes what each vault is (§4, §6). @@ -90,9 +91,9 @@ This crystallizes the principle that lets cross-platform decryption and the curr Working backwards from the chosen surface: -- **keydump struct** (`masterkey/dump.go`): the vault carries the engine kind so restore can construct without the local table. The `Browser` field becomes the canonical key (it was the display name), a `Kind` string field is added (values `chromium` / `chromium-yandex` / `chromium-opera`), and `DumpVersion` goes "1"→"2". `UserDataDir` and `Profiles` remain as informational fields. The keys stay `V10` / `V11` / `V20` (Chromium-only; Firefox keys are out of scope, §9). -- **`browser/keydump.go`**: `BuildDump` records the kind; the overlay `ApplyDump` (which mutates locally-discovered browsers) is replaced by a construct-from-dump path that synthesizes a `BrowserConfig` from each vault and builds the engine directly — no `platformBrowsers()` dependency. This is the mechanical form of §4. -- **`archive`** reuses the per-category source-path resolution already used by extraction, plus the existing locked-file session and the zip helper. +- **keydump struct** (`masterkey/dump.go`): the vault carries the engine kind so restore can construct without the local table. The `Browser` field becomes the canonical key (it was the display name), a `Kind` string field is added (values `chromium` / `chromium-yandex` / `chromium-opera`, mapped to/from the internal enum by an explicit bijection so a reordered enum can't silently corrupt), and `DumpVersion` is bumped to "2". The format is designed fresh — `ReadJSON` rejects other versions and there are no backward-compat shims for pre-redesign dumps. `UserDataDir` and `Profiles` remain informational. The keys stay `V10` / `V11` / `V20` (Chromium-only; Firefox keys are out of scope, §9). +- **`browser/keydump.go`**: `BuildDump` records the key and kind; the overlay `ApplyDump` (which mutates locally-discovered browsers) is replaced by `BuildFromDump`, which synthesizes a `BrowserConfig` per vault and builds the engine directly — no `platformBrowsers()` dependency. It resolves the data via the subdir convention or, for a hand-copied folder, the supplied dir as a single browser's root (§5). This is the mechanical form of §4. +- **`archive`** reuses the engine's per-category source resolution through a new `ArchiveSources` accessor — each source path is kept slash-canonical so the forward-slash zip entry name falls out directly — plus the existing locked-file session. The flattening `CompressDir` helper is unfit (it drops the layout and deletes the source), so `archive` uses a new layout-preserving `ZipDir`, and `restore --data-zip` a Zip-Slip-safe `Unzip`. - **cmd layer**: drop the `keys` parent; add `dumpkeys`, `archive`, `restore` as siblings of `dump` / `list` / `version`. - **Cross-cutting (orthogonal to the taxonomy)**: a Chromium-import password CSV format (`name,url,username,password,note`, #602) and category-aware credential prompting so a no-decryption request never asks for a password (#570). @@ -103,6 +104,13 @@ Working backwards from the chosen surface: 3. keydump vault identity: **option 1A** — `Browser` becomes the canonical key and a `Kind` field is added (§7). 4. Verb names are final: `archive` and `restore`. +### Refinements (2026-06-06) + +5. Archive layout is the subdir convention `/` (multi-browser); `restore` is dual-mode — that layout, or a single hand-copied `User Data` root selected by `-b` (§5). +6. `archive` always includes each profile's `Preferences` marker (required for restore's profile discovery) and the installation's `Local State` (fidelity only — restore decrypts from `keys.json` and never reads it; §5). +7. No backward compatibility: the dump format is designed fresh, with no shims for pre-redesign artifacts. +8. Delivery order: `archive` (#607) lands first as an independent PR (it stands alone — its output also feeds the current overlay `restore` for same-OS browsers); the self-describing cross-platform `restore` (#606) follows. + ## 9. Non-goals / deferred - Firefox / Safari key export (Firefox keys are per-profile NSS; Safari has no portable key). diff --git a/utils/fileutil/fileutil.go b/utils/fileutil/fileutil.go index f21ab5a..46b5ce0 100644 --- a/utils/fileutil/fileutil.go +++ b/utils/fileutil/fileutil.go @@ -3,9 +3,12 @@ package fileutil import ( "archive/zip" "bytes" + "errors" "fmt" + "io" "os" "path/filepath" + "strings" ) // FileExists checks if the file exists in the provided path. @@ -88,3 +91,102 @@ func writeFile(buffer *bytes.Buffer, filename string) error { return nil } + +// ZipDir writes every file under srcDir into a new zip at zipPath, preserving the relative directory +// layout with forward-slash entry names. Unlike CompressDir it neither flattens names nor deletes the +// source — it is the producer side of cross-host archive transport. +func ZipDir(zipPath, srcDir string) error { + out, err := os.Create(zipPath) + if err != nil { + return fmt.Errorf("create %s: %w", zipPath, err) + } + defer func() { _ = out.Close() }() + + zw := zip.NewWriter(out) + walkErr := filepath.WalkDir(srcDir, func(p string, d os.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if d.IsDir() { + return nil + } + rel, err := filepath.Rel(srcDir, p) + if err != nil { + return err + } + w, err := zw.Create(filepath.ToSlash(rel)) + if err != nil { + return err + } + src, err := os.Open(p) //nolint:gosec // G122: staging tree is created and populated by us + if err != nil { + return err + } + defer func() { _ = src.Close() }() + _, err = io.Copy(w, src) + return err + }) + if walkErr != nil { + _ = zw.Close() + return fmt.Errorf("zip %s: %w", srcDir, walkErr) + } + if err := zw.Close(); err != nil { + return fmt.Errorf("close zip %s: %w", zipPath, err) + } + return nil +} + +// Unzip extracts zipPath into destDir, rejecting any entry whose path would escape destDir (Zip-Slip) +// since a transported archive is not fully trusted. +func Unzip(zipPath, destDir string) error { + r, err := zip.OpenReader(zipPath) + if err != nil { + return fmt.Errorf("open zip %s: %w", zipPath, err) + } + defer func() { _ = r.Close() }() + + root := filepath.Clean(destDir) + for _, f := range r.File { + target := filepath.Join(root, filepath.FromSlash(f.Name)) + if target != root && !strings.HasPrefix(target, root+string(os.PathSeparator)) { + return fmt.Errorf("zip entry %q escapes destination", f.Name) + } + if f.FileInfo().IsDir() { + if err := os.MkdirAll(target, 0o755); err != nil { + return err + } + continue + } + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + if err := writeZipEntry(f, target); err != nil { + return err + } + } + return nil +} + +func writeZipEntry(f *zip.File, target string) error { + rc, err := f.Open() + if err != nil { + return err + } + defer func() { _ = rc.Close() }() + + out, err := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o600) + if err != nil { + return err + } + defer func() { _ = out.Close() }() + + for { + _, err := io.CopyN(out, rc, 1<<20) + if errors.Is(err, io.EOF) { + return nil + } + if err != nil { + return err + } + } +} diff --git a/utils/fileutil/fileutil_zip_test.go b/utils/fileutil/fileutil_zip_test.go new file mode 100644 index 0000000..9d144af --- /dev/null +++ b/utils/fileutil/fileutil_zip_test.go @@ -0,0 +1,88 @@ +package fileutil + +import ( + "archive/zip" + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestZipDirUnzip_RoundTrip(t *testing.T) { + src := t.TempDir() + files := map[string][]byte{ + "empty.txt": {}, + "small.txt": []byte("hello"), + "Default/Network/Cookies": []byte("cookie-bytes"), + "sub/big.bin": bytes.Repeat([]byte("A"), 3<<20), // 3 MiB: exercises the chunked copy loop + } + for rel, data := range files { + p := filepath.Join(src, filepath.FromSlash(rel)) + if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(p, data, 0o600); err != nil { + t.Fatal(err) + } + } + + zipPath := filepath.Join(t.TempDir(), "out.zip") + if err := ZipDir(zipPath, src); err != nil { + t.Fatalf("ZipDir: %v", err) + } + + zr, err := zip.OpenReader(zipPath) + if err != nil { + t.Fatalf("open zip: %v", err) + } + for _, f := range zr.File { + if strings.Contains(f.Name, `\`) { + t.Errorf("zip entry name must be forward-slash, got %q", f.Name) + } + } + if err := zr.Close(); err != nil { + t.Fatal(err) + } + + dst := t.TempDir() + if err := Unzip(zipPath, dst); err != nil { + t.Fatalf("Unzip: %v", err) + } + for rel, want := range files { + got, err := os.ReadFile(filepath.Join(dst, filepath.FromSlash(rel))) + if err != nil { + t.Errorf("missing %s after Unzip: %v", rel, err) + continue + } + if !bytes.Equal(got, want) { + t.Errorf("%s: content mismatch (got %d bytes, want %d)", rel, len(got), len(want)) + } + } +} + +func TestUnzip_RejectsZipSlip(t *testing.T) { + zipPath := filepath.Join(t.TempDir(), "evil.zip") + f, err := os.Create(zipPath) + if err != nil { + t.Fatal(err) + } + zw := zip.NewWriter(f) + w, err := zw.Create("../escape.txt") + if err != nil { + t.Fatal(err) + } + if _, err := w.Write([]byte("pwned")); err != nil { + t.Fatal(err) + } + if err := zw.Close(); err != nil { + t.Fatal(err) + } + if err := f.Close(); err != nil { + t.Fatal(err) + } + + if err := Unzip(zipPath, t.TempDir()); err == nil { + t.Fatal("Unzip must reject an entry that escapes the destination") + } +}