fix: correctly parse Chromium localStorage LevelDB entries (#539)

This commit is contained in:
chleynx
2026-04-04 01:01:55 +08:00
committed by GitHub
parent d0971ca098
commit c493804ede
2 changed files with 317 additions and 64 deletions
+109 -42
View File
@@ -14,7 +14,6 @@ import (
"github.com/moond4rk/hackbrowserdata/extractor"
"github.com/moond4rk/hackbrowserdata/log"
"github.com/moond4rk/hackbrowserdata/types"
"github.com/moond4rk/hackbrowserdata/utils/byteutil"
"github.com/moond4rk/hackbrowserdata/utils/typeutil"
)
@@ -38,34 +37,23 @@ type storage struct {
const maxLocalStorageValueLength = 1024 * 2
const (
chromiumLocalStorageVersionKey = "VERSION"
chromiumLocalStorageMetaPrefix = "META:"
chromiumLocalStorageMetaAccessKey = "METAACCESS:"
chromiumLocalStorageDataPrefix = '_'
chromiumStringUTF16Format = 0
chromiumStringLatin1Format = 1
)
func (c *ChromiumLocalStorage) Extract(_ []byte) error {
db, err := leveldb.OpenFile(types.ChromiumLocalStorage.TempFilename(), nil)
entries, err := extractChromiumLocalStorage(types.ChromiumLocalStorage.TempFilename())
if err != nil {
return err
}
defer os.RemoveAll(types.ChromiumLocalStorage.TempFilename())
defer db.Close()
iter := db.NewIterator(nil, nil)
for iter.Next() {
key := iter.Key()
value := iter.Value()
s := new(storage)
s.fillKey(key)
// don't all value upper than 2KB
if len(value) < maxLocalStorageValueLength {
s.fillValue(value)
} else {
s.Value = fmt.Sprintf("value is too long, length is %d, supported max length is %d", len(value), maxLocalStorageValueLength)
}
if s.IsMeta {
s.Value = fmt.Sprintf("meta data, value bytes is %v", value)
}
*c = append(*c, *s)
}
iter.Release()
err = iter.Error()
return err
*c = append(*c, entries...)
return nil
}
func (c *ChromiumLocalStorage) Name() string {
@@ -76,24 +64,69 @@ func (c *ChromiumLocalStorage) Len() int {
return len(*c)
}
func (s *storage) fillKey(b []byte) {
keys := bytes.Split(b, []byte("\x00"))
if len(keys) == 1 && bytes.HasPrefix(keys[0], []byte("META:")) {
s.IsMeta = true
s.fillMetaHeader(keys[0])
func extractChromiumLocalStorage(path string) (ChromiumLocalStorage, error) {
db, err := leveldb.OpenFile(path, nil)
if err != nil {
return nil, err
}
if len(keys) == 2 && bytes.HasPrefix(keys[0], []byte("_")) {
s.fillHeader(keys[0], keys[1])
defer db.Close()
var entries ChromiumLocalStorage
iter := db.NewIterator(nil, nil)
defer iter.Release()
for iter.Next() {
entry, ok := parseChromiumLocalStorageEntry(iter.Key(), iter.Value())
if !ok {
continue
}
entries = append(entries, entry)
}
return entries, iter.Error()
}
func parseChromiumLocalStorageEntry(key, value []byte) (storage, bool) {
switch {
case bytes.Equal(key, []byte(chromiumLocalStorageVersionKey)):
return storage{}, false
case bytes.HasPrefix(key, []byte(chromiumLocalStorageMetaAccessKey)):
return storage{
IsMeta: true,
URL: string(bytes.TrimPrefix(key, []byte(chromiumLocalStorageMetaAccessKey))),
Value: fmt.Sprintf("meta data, value bytes is %v", value),
}, true
case bytes.HasPrefix(key, []byte(chromiumLocalStorageMetaPrefix)):
return storage{
IsMeta: true,
URL: string(bytes.TrimPrefix(key, []byte(chromiumLocalStorageMetaPrefix))),
Value: fmt.Sprintf("meta data, value bytes is %v", value),
}, true
case len(key) > 0 && key[0] == chromiumLocalStorageDataPrefix:
return parseChromiumLocalStorageDataEntry(key[1:], value), true
default:
return storage{}, false
}
}
func (s *storage) fillMetaHeader(b []byte) {
s.URL = string(bytes.Trim(b, "META:"))
}
func parseChromiumLocalStorageDataEntry(key, value []byte) storage {
entry := storage{
Value: decodeChromiumLocalStorageValue(value),
}
func (s *storage) fillHeader(url, key []byte) {
s.URL = string(bytes.Trim(url, "_"))
s.Key = string(bytes.Trim(key, "\x01"))
separator := bytes.IndexByte(key, 0)
if separator < 0 {
entry.Key = "unsupported chromium localStorage key encoding: missing origin separator"
return entry
}
entry.URL = string(key[:separator])
scriptKey, err := decodeChromiumString(key[separator+1:])
if err != nil {
entry.Key = fmt.Sprintf("unsupported chromium localStorage key encoding: %v", err)
return entry
}
entry.Key = scriptKey
return entry
}
func convertUTF16toUTF8(source []byte, endian unicode.Endianness) ([]byte, error) {
@@ -101,11 +134,45 @@ func convertUTF16toUTF8(source []byte, endian unicode.Endianness) ([]byte, error
return r, err
}
// fillValue fills value of the storage
// TODO: support unicode charter
func (s *storage) fillValue(b []byte) {
value := bytes.Map(byteutil.OnSplitUTF8Func, b)
s.Value = string(value)
func decodeChromiumString(b []byte) (string, error) {
if len(b) == 0 {
return "", fmt.Errorf("empty chromium string")
}
switch b[0] {
case chromiumStringLatin1Format:
return string(b[1:]), nil
case chromiumStringUTF16Format:
if len(b) == 1 {
return "", nil
}
if (len(b)-1)%2 != 0 {
return "", fmt.Errorf("invalid UTF-16 byte length %d", len(b)-1)
}
value, err := convertUTF16toUTF8(b[1:], unicode.LittleEndian)
if err != nil {
return "", err
}
return string(value), nil
default:
return "", fmt.Errorf("unknown chromium string format 0x%02x", b[0])
}
}
func decodeChromiumLocalStorageValue(value []byte) string {
if len(value) >= maxLocalStorageValueLength {
return fmt.Sprintf(
"value is too long, length is %d, supported max length is %d",
len(value),
maxLocalStorageValueLength,
)
}
decoded, err := decodeChromiumString(value)
if err != nil {
return fmt.Sprintf("unsupported chromium localStorage value encoding: %v", err)
}
return decoded
}
type FirefoxLocalStorage []storage
+208 -22
View File
@@ -1,33 +1,219 @@
package localstorage
import (
"encoding/binary"
"testing"
"unicode/utf16"
"github.com/stretchr/testify/assert"
"golang.org/x/text/encoding/unicode"
"github.com/stretchr/testify/require"
"github.com/syndtr/goleveldb/leveldb"
)
var testCases = []struct {
in []byte
wanted []byte
actual []byte
}{
{
in: []byte{0x0, 0x7b, 0x0, 0x22, 0x0, 0x72, 0x0, 0x65, 0x0, 0x66, 0x0, 0x65, 0x0, 0x72, 0x0, 0x5f, 0x0, 0x6b, 0x0, 0x65, 0x0, 0x79, 0x0, 0x22, 0x0, 0x3a, 0x0, 0x22, 0x0, 0x68, 0x0, 0x74, 0x0, 0x74, 0x0, 0x70, 0x0, 0x73, 0x0, 0x3a, 0x0, 0x2f, 0x0, 0x2f, 0x0, 0x77, 0x0, 0x77, 0x0, 0x77, 0x0, 0x2e, 0x0, 0x76, 0x0, 0x6f, 0x0, 0x6c, 0x0, 0x63, 0x0, 0x65, 0x0, 0x6e, 0x0, 0x67, 0x0, 0x69, 0x0, 0x6e, 0x0, 0x65, 0x0, 0x2e, 0x0, 0x63, 0x0, 0x6f, 0x0, 0x6d, 0x0, 0x2f, 0x0, 0x70, 0x0, 0x72, 0x0, 0x6f, 0x0, 0x64, 0x0, 0x75, 0x0, 0x63, 0x0, 0x74, 0x0, 0x73, 0x0, 0x2f, 0x0, 0x66, 0x0, 0x65, 0x0, 0x69, 0x0, 0x6c, 0x0, 0x69, 0x0, 0x61, 0x0, 0x6e, 0x0, 0x22, 0x0, 0x2c, 0x0, 0x22, 0x0, 0x72, 0x0, 0x65, 0x0, 0x66, 0x0, 0x65, 0x0, 0x72, 0x0, 0x5f, 0x0, 0x74, 0x0, 0x69, 0x0, 0x74, 0x0, 0x6c, 0x0, 0x65, 0x0, 0x22, 0x0, 0x3a, 0x0, 0x22, 0x0, 0xde, 0x98, 0xde, 0x8f, 0x2d, 0x0, 0x6b, 0x70, 0x71, 0x5c, 0x15, 0x5f, 0xce, 0x64, 0x22, 0x0, 0x2c, 0x0, 0x22, 0x0, 0x72, 0x0, 0x65, 0x0, 0x66, 0x0, 0x65, 0x0, 0x72, 0x0, 0x5f, 0x0, 0x6d, 0x0, 0x61, 0x0, 0x6e, 0x0, 0x75, 0x0, 0x61, 0x0, 0x6c, 0x0, 0x5f, 0x0, 0x6b, 0x0, 0x65, 0x0, 0x79, 0x0, 0x22, 0x0, 0x3a, 0x0, 0x22, 0x0, 0x22, 0x0, 0x7d, 0x0},
wanted: []byte(`{"refer_key":"https://www.volcengine.com/product/feilian","refer_title":"飞连_SSO单点登录_VPN_终端安全合规_便捷Wifi认证-火山引擎","refer_manual_key":""}`),
actual: []byte{0x7b, 0x22, 0x72, 0x65, 0x66, 0x65, 0x72, 0x5f, 0x6b, 0x65, 0x79, 0x22, 0x3a, 0x22, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x76, 0x6f, 0x6c, 0x63, 0x65, 0x6e, 0x67, 0x69, 0x6e, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x70, 0x72, 0x6f, 0x64, 0x75, 0x63, 0x74, 0x73, 0x2f, 0x66, 0x65, 0x69, 0x6c, 0x69, 0x61, 0x6e, 0x22, 0x2c, 0x22, 0x72, 0x65, 0x66, 0x65, 0x72, 0x5f, 0x74, 0x69, 0x74, 0x6c, 0x65, 0x22, 0x3a, 0x22, 0xc3, 0x9e, 0xe9, 0xa3, 0x9e, 0xe8, 0xbc, 0xad, 0x6b, 0xe7, 0x81, 0xb1, 0xe5, 0xb0, 0x95, 0xe5, 0xbf, 0x8e, 0xe6, 0x90, 0xa2, 0x2c, 0x22, 0x72, 0x65, 0x66, 0x65, 0x72, 0x5f, 0x6d, 0x61, 0x6e, 0x75, 0x61, 0x6c, 0x5f, 0x6b, 0x65, 0x79, 0x22, 0x3a, 0x22, 0x22, 0x7d, 0xef, 0xbf, 0xbd},
},
}
func TestLocalStorageKeyToUTF8(t *testing.T) {
func TestDecodeChromiumString(t *testing.T) {
t.Parallel()
for _, tc := range testCases {
actual, err := convertUTF16toUTF8(tc.in, unicode.BigEndian)
if err != nil {
t.Error(err)
}
// TODO: fix this, value from local storage if contains chinese characters, need convert utf16 to utf8
// but now, it can't convert, so just skip it.
assert.Equal(t, tc.actual, actual, "chinese characters can't actual convert")
tests := []struct {
name string
input []byte
want string
wantErr string
}{
{
name: "latin1",
input: encodeChromiumLatin1("abc123"),
want: "abc123",
},
{
name: "utf16le",
input: encodeChromiumUTF16("飞连"),
want: "飞连",
},
{
name: "unknown format",
input: []byte{2, 'x'},
wantErr: "unknown chromium string format",
},
{
name: "invalid utf16 byte length",
input: []byte{chromiumStringUTF16Format, 0x61},
wantErr: "invalid UTF-16 byte length",
},
}
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got, err := decodeChromiumString(tc.input)
if tc.wantErr != "" {
require.Error(t, err)
assert.Contains(t, err.Error(), tc.wantErr)
return
}
require.NoError(t, err)
assert.Equal(t, tc.want, got)
})
}
}
func TestParseChromiumLocalStorageEntry(t *testing.T) {
t.Parallel()
tests := []struct {
name string
key []byte
value []byte
wantParsed bool
wantMeta bool
wantURL string
wantKey string
wantValue string
wantContains string
}{
{
name: "skip version key",
key: []byte(chromiumLocalStorageVersionKey),
wantParsed: false,
},
{
name: "meta key",
key: []byte(chromiumLocalStorageMetaPrefix + "https://example.com"),
value: []byte{0x08, 0x96, 0x01},
wantParsed: true,
wantMeta: true,
wantURL: "https://example.com",
wantValue: "meta data, value bytes is [8 150 1]",
},
{
name: "meta access key",
key: []byte(chromiumLocalStorageMetaAccessKey + "https://example.com"),
value: []byte{0x10, 0x20},
wantParsed: true,
wantMeta: true,
wantURL: "https://example.com",
wantValue: "meta data, value bytes is [16 32]",
},
{
name: "latin1 business key",
key: append([]byte("_https://example.com\x00"), encodeChromiumLatin1("token")...),
value: encodeChromiumLatin1("abc123"),
wantParsed: true,
wantURL: "https://example.com",
wantKey: "token",
wantValue: "abc123",
},
{
name: "utf16 business key",
key: append([]byte("_https://example.com\x00"), encodeChromiumUTF16("飞连")...),
value: encodeChromiumUTF16("终端安全"),
wantParsed: true,
wantURL: "https://example.com",
wantKey: "飞连",
wantValue: "终端安全",
},
{
name: "unsupported business key format",
key: append([]byte("_https://example.com\x00"), []byte{2, 'x'}...),
value: encodeChromiumLatin1("abc123"),
wantParsed: true,
wantURL: "https://example.com",
wantContains: "unsupported chromium localStorage key encoding",
wantValue: "abc123",
},
{
name: "missing origin separator",
key: append([]byte("_https://example.com"), encodeChromiumLatin1("token")...),
value: encodeChromiumLatin1("abc123"),
wantParsed: true,
wantContains: "missing origin separator",
wantValue: "abc123",
},
{
name: "unsupported value format",
key: append([]byte("_https://example.com\x00"), encodeChromiumLatin1("token")...),
value: []byte{2, 'x'},
wantParsed: true,
wantURL: "https://example.com",
wantKey: "token",
wantValue: "unsupported chromium localStorage value encoding: unknown chromium string format 0x02",
},
}
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got, parsed := parseChromiumLocalStorageEntry(tc.key, tc.value)
assert.Equal(t, tc.wantParsed, parsed)
assert.Equal(t, tc.wantMeta, got.IsMeta)
assert.Equal(t, tc.wantURL, got.URL)
assert.Equal(t, tc.wantValue, got.Value)
if tc.wantContains != "" {
assert.Contains(t, got.Key, tc.wantContains)
return
}
assert.Equal(t, tc.wantKey, got.Key)
})
}
}
func TestExtractChromiumLocalStorage(t *testing.T) {
dir := t.TempDir()
db, err := leveldb.OpenFile(dir, nil)
require.NoError(t, err)
testEntries := map[string][]byte{
chromiumLocalStorageVersionKey: []byte("1"),
chromiumLocalStorageMetaPrefix + "https://example.com": {0x08, 0x96, 0x01},
chromiumLocalStorageMetaAccessKey + "https://example.com": {0x10, 0x20},
string(append([]byte("_https://example.com\x00"), encodeChromiumLatin1("token")...)): encodeChromiumLatin1("abc123"),
string(append([]byte("_https://example.com\x00"), encodeChromiumUTF16("飞连")...)): encodeChromiumUTF16("终端安全"),
}
for key, value := range testEntries {
require.NoError(t, db.Put([]byte(key), value, nil))
}
require.NoError(t, db.Close())
got, err := extractChromiumLocalStorage(dir)
require.NoError(t, err)
require.Len(t, got, 4)
metaCount := 0
valuesByKey := make(map[string]string)
for _, entry := range got {
if entry.IsMeta {
metaCount++
assert.Equal(t, "https://example.com", entry.URL)
assert.Contains(t, entry.Value, "meta data, value bytes is")
continue
}
valuesByKey[entry.Key] = entry.Value
assert.Equal(t, "https://example.com", entry.URL)
}
assert.Equal(t, 2, metaCount)
assert.Equal(t, "abc123", valuesByKey["token"])
assert.Equal(t, "终端安全", valuesByKey["飞连"])
}
func encodeChromiumLatin1(s string) []byte {
return append([]byte{chromiumStringLatin1Format}, []byte(s)...)
}
func encodeChromiumUTF16(s string) []byte {
encoded := utf16.Encode([]rune(s))
result := make([]byte, 1, 1+len(encoded)*2)
result[0] = chromiumStringUTF16Format
for _, r := range encoded {
var raw [2]byte
binary.LittleEndian.PutUint16(raw[:], r)
result = append(result, raw[:]...)
}
return result
}