feat: synchronizer

This commit is contained in:
zhom
2026-03-15 18:00:04 +04:00
parent e72874142b
commit 5bea6a32e0
38 changed files with 3943 additions and 957 deletions
+224 -8
View File
@@ -9,7 +9,7 @@ use crate::settings_manager::SettingsManager;
use chrono::{DateTime, Utc};
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::time::Instant;
@@ -49,6 +49,70 @@ fn is_critical_file(path: &str) -> bool {
.any(|pattern| path.contains(pattern))
}
/// Checkpoint all SQLite WAL files in a profile directory.
///
/// When a browser crashes or is killed, SQLite WAL files may contain
/// uncommitted data (e.g. cookies, login data). Since WAL files are
/// excluded from sync, we must checkpoint them into the main database
/// files before generating the manifest to avoid data loss.
fn checkpoint_sqlite_wal_files(profile_dir: &Path) {
fn find_wal_files(dir: &Path, wal_files: &mut Vec<PathBuf>) {
let Ok(entries) = fs::read_dir(dir) else {
return;
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
find_wal_files(&path, wal_files);
} else if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if name.ends_with("-wal") {
wal_files.push(path);
}
}
}
}
let mut wal_files = Vec::new();
find_wal_files(profile_dir, &mut wal_files);
for wal_path in &wal_files {
// Only checkpoint non-empty WAL files
let is_non_empty = fs::metadata(wal_path).map(|m| m.len() > 0).unwrap_or(false);
if !is_non_empty {
continue;
}
// Derive the main database path by stripping the "-wal" suffix
let db_path_str = wal_path.to_string_lossy();
let db_path = PathBuf::from(db_path_str.strip_suffix("-wal").unwrap());
if !db_path.exists() {
continue;
}
match rusqlite::Connection::open(&db_path) {
Ok(conn) => match conn.pragma_update(None, "wal_checkpoint", "TRUNCATE") {
Ok(_) => {
log::info!(
"Checkpointed WAL for: {}",
db_path.file_name().unwrap_or_default().to_string_lossy()
);
}
Err(e) => {
log::warn!("Failed to checkpoint WAL for {}: {}", db_path.display(), e);
}
},
Err(e) => {
log::warn!(
"Failed to open DB for WAL checkpoint {}: {}",
db_path.display(),
e
);
}
}
}
}
/// Resume state persisted to disk so interrupted syncs can continue
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
struct SyncResumeState {
@@ -362,6 +426,10 @@ impl SyncEngine {
))
})?;
// Checkpoint any SQLite WAL files to ensure all data is in the main DB
// before we generate the manifest (WAL files are excluded from sync)
checkpoint_sqlite_wal_files(&profile_dir);
// Load or create hash cache
let cache_path = get_cache_path(&profile_dir);
let mut hash_cache = HashCache::load(&cache_path);
@@ -488,9 +556,22 @@ impl SyncEngine {
.upload_profile_metadata(&profile_id, profile, &key_prefix)
.await?;
// If we recovered from an empty local state (downloaded everything from remote),
// regenerate the manifest from the actual files now on disk so we don't
// overwrite the remote manifest with an empty one.
let final_manifest = if local_manifest.files.is_empty() && !diff.files_to_download.is_empty() {
let mut new_cache = HashCache::load(&cache_path);
let mut regenerated = generate_manifest(&profile_id, &profile_dir, &mut new_cache)?;
new_cache.save(&cache_path)?;
regenerated.encrypted = encryption_key.is_some();
regenerated
} else {
let mut m = local_manifest;
m.encrypted = encryption_key.is_some();
m
};
// Upload manifest.json last for atomicity
let mut final_manifest = local_manifest;
final_manifest.encrypted = encryption_key.is_some();
self
.upload_manifest(&profile_id, &final_manifest, &key_prefix)
.await?;
@@ -2165,14 +2246,14 @@ impl SyncEngine {
) -> SyncResult<Vec<String>> {
log::info!("Checking for missing synced profiles...");
// List personal profiles from S3
let list_response = self.client.list("profiles/").await?;
// List all personal profiles from S3 (paginated)
let all_objects = self.client.list_all("profiles/").await?;
let mut downloaded: Vec<String> = Vec::new();
// Extract unique profile IDs with their key prefix
let mut profiles_to_check: HashMap<String, String> = HashMap::new();
for obj in list_response.objects {
for obj in all_objects {
if obj.key.starts_with("profiles/") && obj.key.ends_with("/manifest.json") {
if let Some(profile_id) = obj
.key
@@ -2189,8 +2270,8 @@ impl SyncEngine {
if let Some(team_id) = &auth.user.team_id {
let team_prefix = format!("teams/{}/", team_id);
let team_list_key = format!("{}profiles/", team_prefix);
if let Ok(team_list) = self.client.list(&team_list_key).await {
for obj in team_list.objects {
if let Ok(team_objects) = self.client.list_all(&team_list_key).await {
for obj in team_objects {
if obj.key.starts_with("profiles/") && obj.key.ends_with("/manifest.json") {
if let Some(profile_id) = obj
.key
@@ -3341,3 +3422,138 @@ pub async fn set_extension_group_sync_enabled(
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_checkpoint_sqlite_wal_files() {
let temp_dir = tempfile::TempDir::new().unwrap();
let db_path = temp_dir.path().join("test.db");
// Create a SQLite database in WAL mode and insert data.
// Use std::mem::forget to prevent the connection destructor from running,
// which simulates a browser crash where WAL is not checkpointed.
{
let conn = rusqlite::Connection::open(&db_path).unwrap();
conn.pragma_update(None, "journal_mode", "WAL").unwrap();
conn.pragma_update(None, "wal_autocheckpoint", "0").unwrap();
conn
.execute(
"CREATE TABLE cookies (id INTEGER PRIMARY KEY, value TEXT)",
[],
)
.unwrap();
conn
.execute(
"INSERT INTO cookies (value) VALUES ('session_token_123')",
[],
)
.unwrap();
// Leak the connection to prevent auto-checkpoint on drop
std::mem::forget(conn);
}
// Verify WAL file exists and has data
let wal_path = temp_dir.path().join("test.db-wal");
assert!(wal_path.exists(), "WAL file should exist");
let wal_size = fs::metadata(&wal_path).unwrap().len();
assert!(wal_size > 0, "WAL file should be non-empty");
// Run checkpoint
checkpoint_sqlite_wal_files(temp_dir.path());
// After checkpoint, WAL should be truncated (empty)
let wal_size_after = fs::metadata(&wal_path).map(|m| m.len()).unwrap_or(0);
assert_eq!(
wal_size_after, 0,
"WAL should be truncated after checkpoint"
);
// Verify data is still accessible from the main database
let conn = rusqlite::Connection::open(&db_path).unwrap();
let value: String = conn
.query_row("SELECT value FROM cookies WHERE id = 1", [], |row| {
row.get(0)
})
.unwrap();
assert_eq!(value, "session_token_123");
}
#[test]
fn test_checkpoint_handles_missing_db() {
let temp_dir = tempfile::TempDir::new().unwrap();
// Create a WAL file without a corresponding database
let wal_path = temp_dir.path().join("missing.db-wal");
fs::write(&wal_path, b"fake wal data").unwrap();
// Should not panic
checkpoint_sqlite_wal_files(temp_dir.path());
}
#[test]
fn test_checkpoint_skips_empty_wal() {
let temp_dir = tempfile::TempDir::new().unwrap();
let db_path = temp_dir.path().join("test.db");
// Create a database and checkpoint immediately (WAL is empty)
{
let conn = rusqlite::Connection::open(&db_path).unwrap();
conn.pragma_update(None, "journal_mode", "WAL").unwrap();
conn
.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)", [])
.unwrap();
}
// Create an empty WAL file
let wal_path = temp_dir.path().join("test.db-wal");
fs::write(&wal_path, b"").unwrap();
// Should skip empty WAL without error
checkpoint_sqlite_wal_files(temp_dir.path());
}
#[test]
fn test_checkpoint_nested_directories() {
let temp_dir = tempfile::TempDir::new().unwrap();
let nested_dir = temp_dir.path().join("profile").join("Default");
fs::create_dir_all(&nested_dir).unwrap();
let db_path = nested_dir.join("Cookies");
// Create a database with WAL data, leak connection to simulate crash
{
let conn = rusqlite::Connection::open(&db_path).unwrap();
conn.pragma_update(None, "journal_mode", "WAL").unwrap();
conn.pragma_update(None, "wal_autocheckpoint", "0").unwrap();
conn
.execute(
"CREATE TABLE cookies (host_key TEXT, name TEXT, value TEXT)",
[],
)
.unwrap();
conn
.execute(
"INSERT INTO cookies VALUES ('.example.com', 'session', 'abc')",
[],
)
.unwrap();
std::mem::forget(conn);
}
let wal_path = nested_dir.join("Cookies-wal");
assert!(wal_path.exists());
// Checkpoint from the top-level directory
checkpoint_sqlite_wal_files(temp_dir.path());
// Verify data is in the main database
let conn = rusqlite::Connection::open(&db_path).unwrap();
let count: i64 = conn
.query_row("SELECT COUNT(*) FROM cookies", [], |row| row.get(0))
.unwrap();
assert_eq!(count, 1);
}
}