monograph: fix monograph content sanitization

This commit is contained in:
Abdullah Atta
2026-02-25 15:39:25 +05:00
committed by Abdullah Atta
parent 9ae5db378d
commit 8d4336d1bc
5 changed files with 108 additions and 17 deletions

View File

@@ -35,6 +35,8 @@ using Notesnook.API.Authorization;
using Notesnook.API.Models;
using Notesnook.API.Services;
using Streetwriters.Common;
using Streetwriters.Common.Accessors;
using Streetwriters.Common.Enums;
using Streetwriters.Common.Helpers;
using Streetwriters.Common.Interfaces;
using Streetwriters.Common.Messages;
@@ -46,7 +48,7 @@ namespace Notesnook.API.Controllers
[ApiController]
[Route("monographs")]
[Authorize("Sync")]
public class MonographsController(Repository<Monograph> monographs, IURLAnalyzer analyzer, SyncDeviceService syncDeviceService, ILogger<MonographsController> logger) : ControllerBase
public class MonographsController(Repository<Monograph> monographs, IURLAnalyzer analyzer, SyncDeviceService syncDeviceService, WampServiceAccessor serviceAccessor, ILogger<MonographsController> logger) : ControllerBase
{
const string SVG_PIXEL = "<svg xmlns='http://www.w3.org/2000/svg' width='1' height='1'><circle r='9'/></svg>";
private const int MAX_DOC_SIZE = 15 * 1024 * 1024;
@@ -107,7 +109,11 @@ namespace Notesnook.API.Controllers
if (existingMonograph != null && !existingMonograph.Deleted) return await UpdateAsync(deviceId, monograph);
if (monograph.EncryptedContent == null)
monograph.CompressedContent = (await CleanupContentAsync(User, monograph.Content)).CompressBrotli();
{
var sanitizationLevel = User.IsUserSubscribed() ? ContentSanitizationLevel.Partial : ContentSanitizationLevel.Full;
monograph.CompressedContent = (await SanitizeContentAsync(monograph.Content, sanitizationLevel)).CompressBrotli();
monograph.ContentSanitizationLevel = sanitizationLevel;
}
monograph.UserId = userId;
monograph.DatePublished = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
@@ -158,8 +164,12 @@ namespace Notesnook.API.Controllers
if (monograph.EncryptedContent?.Cipher.Length > MAX_DOC_SIZE || monograph.CompressedContent?.Length > MAX_DOC_SIZE)
return base.BadRequest("Monograph is too big. Max allowed size is 15mb.");
var sanitizationLevel = ContentSanitizationLevel.Unknown;
if (monograph.EncryptedContent == null)
monograph.CompressedContent = (await CleanupContentAsync(User, monograph.Content)).CompressBrotli();
{
sanitizationLevel = User.IsUserSubscribed() ? ContentSanitizationLevel.Partial : ContentSanitizationLevel.Full;
monograph.CompressedContent = (await SanitizeContentAsync(monograph.Content, sanitizationLevel)).CompressBrotli();
}
else
monograph.Content = null;
@@ -173,6 +183,7 @@ namespace Notesnook.API.Controllers
.Set(m => m.SelfDestruct, monograph.SelfDestruct)
.Set(m => m.Title, monograph.Title)
.Set(m => m.Password, monograph.Password)
.Set(m => m.ContentSanitizationLevel, sanitizationLevel)
);
if (!result.IsAcknowledged) return BadRequest();
@@ -223,7 +234,22 @@ namespace Notesnook.API.Controllers
}
if (monograph.EncryptedContent == null)
{
var isContentUnsanitized = monograph.ContentSanitizationLevel == ContentSanitizationLevel.Partial || monograph.ContentSanitizationLevel == ContentSanitizationLevel.Unknown;
if (!Constants.IS_SELF_HOSTED && isContentUnsanitized && serviceAccessor.UserSubscriptionService != null && !await serviceAccessor.UserSubscriptionService.IsUserSubscribedAsync(Clients.Notesnook.Id, monograph.UserId!))
{
var cleaned = await SanitizeContentAsync(monograph.CompressedContent?.DecompressBrotli(), ContentSanitizationLevel.Full);
monograph.CompressedContent = cleaned.CompressBrotli();
await monographs.Collection.UpdateOneAsync(
CreateMonographFilter(monograph.UserId!, monograph),
Builders<Monograph>.Update
.Set(m => m.CompressedContent, monograph.CompressedContent)
.Set(m => m.ContentSanitizationLevel, ContentSanitizationLevel.Full)
);
}
monograph.Content = monograph.CompressedContent?.DecompressBrotli();
}
monograph.ItemId ??= monograph.Id;
return Ok(monograph);
}
@@ -241,7 +267,7 @@ namespace Notesnook.API.Controllers
if (monograph.SelfDestruct)
{
await monographs.Collection.ReplaceOneAsync(
CreateMonographFilter(monograph.UserId, monograph),
CreateMonographFilter(monograph.UserId!, monograph),
new Monograph
{
ItemId = id,
@@ -251,12 +277,12 @@ namespace Notesnook.API.Controllers
ViewCount = 0
}
);
await MarkMonographForSyncAsync(monograph.UserId, id);
await MarkMonographForSyncAsync(monograph.UserId!, id);
}
else if (!hasVisitedBefore)
{
await monographs.Collection.UpdateOneAsync(
CreateMonographFilter(monograph.UserId, monograph),
CreateMonographFilter(monograph.UserId!, monograph),
Builders<Monograph>.Update.Inc(m => m.ViewCount, 1)
);
@@ -329,7 +355,20 @@ namespace Notesnook.API.Controllers
await syncDeviceService.AddIdsToAllDevicesAsync(userId, [new(monographId, "monograph")]);
}
private async Task<string> CleanupContentAsync(ClaimsPrincipal user, string? content)
// (selector, url-bearing attribute) pairs to inspect
private static readonly (string Selector, string Attribute)[] urlElements =
[
("a", "href"),
("img", "src"),
("iframe", "src"),
("embed", "src"),
("object", "data"),
("source", "src"),
("video", "src"),
("audio", "src"),
];
private async Task<string> SanitizeContentAsync(string? content, ContentSanitizationLevel level)
{
if (string.IsNullOrEmpty(content)) return string.Empty;
if (Constants.IS_SELF_HOSTED) return content;
@@ -338,31 +377,36 @@ namespace Notesnook.API.Controllers
var json = JsonSerializer.Deserialize<MonographContent>(content) ?? throw new Exception("Invalid monograph content.");
var html = json.Data;
if (user.IsUserSubscribed())
if (level == ContentSanitizationLevel.Full)
{
var config = Configuration.Default.WithDefaultLoader();
var context = BrowsingContext.New(config);
var document = await context.OpenAsync(r => r.Content(html));
foreach (var element in document.QuerySelectorAll("a"))
foreach (var (selector, attribute) in urlElements)
{
var href = element.GetAttribute("href");
if (string.IsNullOrEmpty(href)) continue;
if (!await analyzer.IsURLSafeAsync(href))
foreach (var element in document.QuerySelectorAll(selector))
{
logger.LogInformation("Malicious URL detected: {Url}", href);
element.RemoveAttribute("href");
var url = element.GetAttribute(attribute);
if (string.IsNullOrEmpty(url)) continue;
if (!await analyzer.IsURLSafeAsync(url))
{
logger.LogInformation("Malicious URL detected in <{Selector} {Attribute}>: {Url}", selector, attribute, url);
element.RemoveAttribute(attribute);
}
}
}
html = document.ToHtml();
}
else
else if (level == ContentSanitizationLevel.Full)
{
var config = Configuration.Default.WithDefaultLoader();
var context = BrowsingContext.New(config);
var document = await context.OpenAsync(r => r.Content(html));
foreach (var element in document.QuerySelectorAll("a,iframe,img,object,svg,button,link"))
{
foreach (var attr in element.Attributes)
foreach (var attr in element.Attributes.ToList())
element.RemoveAttribute(attr.Name);
}
html = document.ToHtml();

View File

@@ -0,0 +1,38 @@
/*
This file is part of the Notesnook Sync Server project (https://notesnook.com/)
Copyright (C) 2023 Streetwriters (Private) Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the Affero GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Affero GNU General Public License for more details.
You should have received a copy of the Affero GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace Notesnook.API.Models
{
public enum ContentSanitizationLevel
{
Unknown = 0,
/// <summary>
/// Full sanitization applied: links, iframes, images, and other embeds are stripped.
/// Applied to monographs published by free-tier users.
/// </summary>
Full = 1,
/// <summary>
/// Partial sanitization: only unsafe/malicious URLs are removed; rich content is preserved.
/// Applied to monographs published by subscribed users. Requires re-sanitization if the
/// publisher's subscription lapses.
/// </summary>
Partial = 2
}
}

View File

@@ -83,5 +83,8 @@ namespace Notesnook.API.Models
[JsonPropertyName("viewCount")]
public int ViewCount { get; set; }
[JsonIgnore]
public ContentSanitizationLevel ContentSanitizationLevel { get; set; }
}
}