diff --git a/src/core/source/GitHubDownload.ts b/src/core/source/GitHubDownload.ts
index 76424aa..ffe7837 100644
--- a/src/core/source/GitHubDownload.ts
+++ b/src/core/source/GitHubDownload.ts
@@ -36,6 +36,10 @@ export default class GitHubDownload extends GitHubBase {
         cause: error as Error,
       });
     }
+    // Wipe any partial state from a previous failed extraction. Without
+    // this, half-extracted trees would survive and later be served as the
+    // canonical listing.
+    await storage.rm(this.data.repoId);
     await storage.mk(this.data.repoId);
     try {
       const downloadStream = got.stream(response.url);
@@ -53,7 +57,15 @@ export default class GitHubDownload extends GitHubBase {
         downloadStream,
         this.type
       );
+      // Write the completion marker last. Its presence — and only its
+      // presence — means "the cache for this repo is fully extracted and
+      // safe to serve." Any code path that finds files but no marker must
+      // treat the cache as incomplete and re-download.
+      await storage.write(this.data.repoId, COMPLETE_MARKER, "ok", this.type);
     } catch (error) {
+      // Best-effort cleanup of the partial extraction so the next call
+      // doesn't trip over half-written files.
+      await storage.rm(this.data.repoId).catch(() => undefined);
       throw new AnonymousError("unable_to_download", {
         httpStatus: 500,
         cause: error as Error,
@@ -62,12 +74,48 @@ export default class GitHubDownload extends GitHubBase {
     }
   }
 
+  private async isCacheComplete(): Promise<boolean> {
+    return (
+      (await storage.exists(this.data.repoId, COMPLETE_MARKER)) ===
+      FILE_TYPE.FILE
+    );
+  }
+
   async getFileContent(
     file: AnonymizedFile,
     progress?: (status: string) => void
   ): Promise<Readable> {
+    if (!(await this.isCacheComplete())) {
+      // will throw an error if the file is not in the repository
+      await file.originalPath();
+      await this.download(progress);
+      return storage.read(this.data.repoId, file.filePath);
+    }
     const exists = await storage.exists(this.data.repoId, file.filePath);
     if (exists === FILE_TYPE.FILE) {
+      // Validate the cached file size against the upstream tree size when
+      // it's known — guards against the same poisoned-cache class as in
+      // GitHubStream. cached.size >= expected is accepted (equal for
+      // normal files, larger for LFS-resolved blobs).
+      let expectedSize: number | undefined;
+      try {
+        expectedSize = await file.size();
+      } catch {
+        // not all callers populate a FileModel; fall back to reading
+      }
+      if (expectedSize != null && expectedSize > 0) {
+        try {
+          const stat = await storage.fileInfo(this.data.repoId, file.filePath);
+          if (stat.size != null && stat.size < expectedSize) {
+            await storage.rm(this.data.repoId, file.filePath);
+            await file.originalPath();
+            await this.download(progress);
+            return storage.read(this.data.repoId, file.filePath);
+          }
+        } catch {
+          // fall through to read; if read fails the caller surfaces the error
+        }
+      }
       return storage.read(this.data.repoId, file.filePath);
     } else if (exists === FILE_TYPE.FOLDER) {
       throw new AnonymousError("folder_not_supported", {
@@ -75,20 +123,19 @@ export default class GitHubDownload extends GitHubBase {
         object: file,
       });
     }
-    // will throw an error if the file is not in the repository
+    // marker present but file missing — shouldn't happen, but redownload
+    // rather than 404 on a stale partial cache.
     await file.originalPath();
-
-    // the cache is not ready, we need to download the repository
     await this.download(progress);
     return storage.read(this.data.repoId, file.filePath);
   }
 
   async getFiles(progress?: (status: string) => void) {
-    if ((await storage.exists(this.data.repoId)) === FILE_TYPE.NOT_FOUND) {
+    if (!(await this.isCacheComplete())) {
       await this.download(progress);
     }
     let nbFiles = 0;
-    return storage.listFiles(this.data.repoId, "", {
+    const all = await storage.listFiles(this.data.repoId, "", {
       onEntry: () => {
         if (progress) {
           nbFiles++;
@@ -96,9 +143,15 @@ export default class GitHubDownload extends GitHubBase {
         }
       },
     });
+    return all.filter((f) => f.name !== COMPLETE_MARKER);
   }
 }
 
+// Sentinel object/file written at the root of an extracted repo cache
+// once the extraction has finished successfully. Its presence means the
+// cache is complete. Hidden so it doesn't show up in user listings.
+const COMPLETE_MARKER = ".anon-complete";
+
 function humanFileSize(bytes: number, si = false, dp = 1) {
   const thresh = si ? 1000 : 1024;
 
diff --git a/src/core/source/GitHubStream.ts b/src/core/source/GitHubStream.ts
index 368fd28..9cefee1 100644
--- a/src/core/source/GitHubStream.ts
+++ b/src/core/source/GitHubStream.ts
@@ -114,7 +114,10 @@ export default class GitHubStream extends GitHubBase {
     });
     blobStream.on("end", () => decide());
     blobStream.on("error", (err) => {
-      if (decided) return;
+      // Always propagate — pre-decision this is the only listener; once a
+      // non-LFS decision is made, the inner branch attaches its own
+      // listener that will also fire, but we shouldn't rely on that being
+      // there if the code is later refactored.
       decided = true;
       out.destroy(err);
     });
diff --git a/src/core/storage/FileSystem.ts b/src/core/storage/FileSystem.ts
index 1b9aad1..b1a5ee7 100644
--- a/src/core/storage/FileSystem.ts
+++ b/src/core/storage/FileSystem.ts
@@ -153,13 +153,17 @@ export default class FileSystem extends StorageBase {
               size: stats.size,
             });
           }
+          // Don't synthesise a sha here. The previous value (stats.ino)
+          // wasn't a content hash — just an inode number — and any code
+          // that compared it to an upstream Git blob sha would silently
+          // disagree. Leave it undefined so callers either look up the
+          // real sha from FileModel/GitHub or skip sha-keyed paths.
           output2.push(
             new FileModel({
               name: file,
               path: dir,
               repoId: repoId,
               size: stats.size,
-              sha: stats.ino.toString(),
             })
           );
         }
diff --git a/src/core/storage/S3.ts b/src/core/storage/S3.ts
index 6d58797..baf6080 100644
--- a/src/core/storage/S3.ts
+++ b/src/core/storage/S3.ts
@@ -175,12 +175,13 @@ export default class S3Storage extends StorageBase {
   ): Promise<void> {
     if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
 
-    if (data instanceof Readable) {
-      data.on("error", (err) => {
-        console.error(`[ERROR] S3 write ${path}`, err);
-        this.rm(repoId, path);
-      });
-    }
+    // No fire-and-forget rm on stream error: Upload uses multipart and
+    // does not commit a partially-uploaded object, so there's nothing to
+    // clean up. The previous handler raced with retries and could delete
+    // a previously-good object on a transient source-stream hiccup. The
+    // size-validated read path in GitHubStream.getFileContentCache
+    // recovers from any object that does end up undersized for any
+    // reason.
 
     const params: PutObjectCommandInput = {
       Bucket: config.S3_BUCKET,