Fix WebKit timestamp schema handling

This commit is contained in:
Janik Besendorf
2026-06-05 20:14:52 +02:00
parent e69a02169c
commit a77d3f74aa
2 changed files with 86 additions and 36 deletions
@@ -85,35 +85,32 @@ class WebkitResourceLoadStatistics(IOSExtraction):
try:
try:
cur.execute(
"""
SELECT
domainID,
registrableDomain,
lastSeen,
hadUserInteraction,
mostRecentUserInteractionTime,
mostRecentWebPushInteractionTime
from ObservedDomains;
"""
)
has_extra_timestamps = True
except sqlite3.OperationalError:
try:
cur.execute(
"""
SELECT
domainID,
registrableDomain,
lastSeen,
hadUserInteraction
from ObservedDomains;
"""
)
has_extra_timestamps = False
except sqlite3.OperationalError:
cur.execute("PRAGMA table_info(ObservedDomains);")
available_columns = {row[1] for row in cur}
required_columns = [
"domainID",
"registrableDomain",
"lastSeen",
"hadUserInteraction",
]
if not set(required_columns).issubset(available_columns):
return
optional_columns = [
column
for column in [
"mostRecentUserInteractionTime",
"mostRecentWebPushInteractionTime",
]
if column in available_columns
]
selected_columns = required_columns + optional_columns
cur.execute(
f"SELECT {', '.join(selected_columns)} FROM ObservedDomains;"
)
except sqlite3.OperationalError:
return
for row in cur:
result = {
"domain_id": row[0],
@@ -124,15 +121,19 @@ class WebkitResourceLoadStatistics(IOSExtraction):
"domain": domain,
"path": path,
}
if has_extra_timestamps:
result["most_recent_user_interaction_time"] = row[4]
result["most_recent_user_interaction_time_isodate"] = (
convert_unix_to_iso(row[4])
)
result["most_recent_web_push_interaction_time"] = row[5]
result["most_recent_web_push_interaction_time_isodate"] = (
convert_unix_to_iso(row[5])
)
for index, column in enumerate(optional_columns, start=4):
field = {
"mostRecentUserInteractionTime": (
"most_recent_user_interaction_time"
),
"mostRecentWebPushInteractionTime": (
"most_recent_web_push_interaction_time"
),
}[column]
timestamp = row[index]
result[field] = timestamp
if timestamp is not None and timestamp >= 0:
result[f"{field}_isodate"] = convert_unix_to_iso(timestamp)
self.results.append(result)
finally:
cur.close()
@@ -3,6 +3,8 @@
# Use of this software is governed by the MVT License 1.1 that can be found at
# https://license.mvt.re/1.1/
import sqlite3
from mvt.common.module import run_module
from mvt.ios.modules.mixed.webkit_resource_load_statistics import (
WebkitResourceLoadStatistics,
@@ -19,3 +21,50 @@ class TestWebkitResourceLoadStatisticsModule:
assert len(m.results) == 2
assert len(m.timeline) == 2
assert len(m.alertstore.alerts) == 0
results = {result["registrable_domain"]: result for result in m.results}
assert results["google.com"]["most_recent_user_interaction_time"] > 0
assert "most_recent_user_interaction_time_isodate" in results["google.com"]
assert results["gstatic.com"]["most_recent_user_interaction_time"] == -1.0
assert (
"most_recent_user_interaction_time_isodate"
not in results["gstatic.com"]
)
assert all(
"most_recent_web_push_interaction_time" not in result
for result in m.results
)
def test_webkit_full_timestamp_schema(self, tmp_path):
db_path = tmp_path / "observations.db"
conn = sqlite3.connect(db_path)
conn.execute(
"""
CREATE TABLE ObservedDomains (
domainID INTEGER PRIMARY KEY,
registrableDomain TEXT NOT NULL,
lastSeen REAL NOT NULL,
hadUserInteraction INTEGER NOT NULL,
mostRecentUserInteractionTime REAL NOT NULL,
mostRecentWebPushInteractionTime REAL NOT NULL
);
"""
)
conn.execute(
"""
INSERT INTO ObservedDomains VALUES (?, ?, ?, ?, ?, ?);
""",
(1, "example.com", 1634560250.0, 1, 1634560030.0, -1.0),
)
conn.commit()
conn.close()
m = WebkitResourceLoadStatistics(target_path=str(tmp_path))
m._process_observations_db(str(db_path), "", "observations.db")
assert len(m.results) == 1
result = m.results[0]
assert result["most_recent_user_interaction_time"] == 1634560030.0
assert "most_recent_user_interaction_time_isodate" in result
assert result["most_recent_web_push_interaction_time"] == -1.0
assert "most_recent_web_push_interaction_time_isodate" not in result