mirror of
https://github.com/streetwriters/notesnook-sync-server.git
synced 2026-03-20 17:14:52 +00:00
monograph: fix monograph content sanitization
This commit is contained in:
committed by
Abdullah Atta
parent
9ae5db378d
commit
8d4336d1bc
@@ -35,6 +35,8 @@ using Notesnook.API.Authorization;
|
||||
using Notesnook.API.Models;
|
||||
using Notesnook.API.Services;
|
||||
using Streetwriters.Common;
|
||||
using Streetwriters.Common.Accessors;
|
||||
using Streetwriters.Common.Enums;
|
||||
using Streetwriters.Common.Helpers;
|
||||
using Streetwriters.Common.Interfaces;
|
||||
using Streetwriters.Common.Messages;
|
||||
@@ -46,7 +48,7 @@ namespace Notesnook.API.Controllers
|
||||
[ApiController]
|
||||
[Route("monographs")]
|
||||
[Authorize("Sync")]
|
||||
public class MonographsController(Repository<Monograph> monographs, IURLAnalyzer analyzer, SyncDeviceService syncDeviceService, ILogger<MonographsController> logger) : ControllerBase
|
||||
public class MonographsController(Repository<Monograph> monographs, IURLAnalyzer analyzer, SyncDeviceService syncDeviceService, WampServiceAccessor serviceAccessor, ILogger<MonographsController> logger) : ControllerBase
|
||||
{
|
||||
const string SVG_PIXEL = "<svg xmlns='http://www.w3.org/2000/svg' width='1' height='1'><circle r='9'/></svg>";
|
||||
private const int MAX_DOC_SIZE = 15 * 1024 * 1024;
|
||||
@@ -107,7 +109,11 @@ namespace Notesnook.API.Controllers
|
||||
if (existingMonograph != null && !existingMonograph.Deleted) return await UpdateAsync(deviceId, monograph);
|
||||
|
||||
if (monograph.EncryptedContent == null)
|
||||
monograph.CompressedContent = (await CleanupContentAsync(User, monograph.Content)).CompressBrotli();
|
||||
{
|
||||
var sanitizationLevel = User.IsUserSubscribed() ? ContentSanitizationLevel.Partial : ContentSanitizationLevel.Full;
|
||||
monograph.CompressedContent = (await SanitizeContentAsync(monograph.Content, sanitizationLevel)).CompressBrotli();
|
||||
monograph.ContentSanitizationLevel = sanitizationLevel;
|
||||
}
|
||||
monograph.UserId = userId;
|
||||
monograph.DatePublished = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
|
||||
@@ -158,8 +164,12 @@ namespace Notesnook.API.Controllers
|
||||
if (monograph.EncryptedContent?.Cipher.Length > MAX_DOC_SIZE || monograph.CompressedContent?.Length > MAX_DOC_SIZE)
|
||||
return base.BadRequest("Monograph is too big. Max allowed size is 15mb.");
|
||||
|
||||
var sanitizationLevel = ContentSanitizationLevel.Unknown;
|
||||
if (monograph.EncryptedContent == null)
|
||||
monograph.CompressedContent = (await CleanupContentAsync(User, monograph.Content)).CompressBrotli();
|
||||
{
|
||||
sanitizationLevel = User.IsUserSubscribed() ? ContentSanitizationLevel.Partial : ContentSanitizationLevel.Full;
|
||||
monograph.CompressedContent = (await SanitizeContentAsync(monograph.Content, sanitizationLevel)).CompressBrotli();
|
||||
}
|
||||
else
|
||||
monograph.Content = null;
|
||||
|
||||
@@ -173,6 +183,7 @@ namespace Notesnook.API.Controllers
|
||||
.Set(m => m.SelfDestruct, monograph.SelfDestruct)
|
||||
.Set(m => m.Title, monograph.Title)
|
||||
.Set(m => m.Password, monograph.Password)
|
||||
.Set(m => m.ContentSanitizationLevel, sanitizationLevel)
|
||||
);
|
||||
if (!result.IsAcknowledged) return BadRequest();
|
||||
|
||||
@@ -223,7 +234,22 @@ namespace Notesnook.API.Controllers
|
||||
}
|
||||
|
||||
if (monograph.EncryptedContent == null)
|
||||
{
|
||||
var isContentUnsanitized = monograph.ContentSanitizationLevel == ContentSanitizationLevel.Partial || monograph.ContentSanitizationLevel == ContentSanitizationLevel.Unknown;
|
||||
if (!Constants.IS_SELF_HOSTED && isContentUnsanitized && serviceAccessor.UserSubscriptionService != null && !await serviceAccessor.UserSubscriptionService.IsUserSubscribedAsync(Clients.Notesnook.Id, monograph.UserId!))
|
||||
{
|
||||
var cleaned = await SanitizeContentAsync(monograph.CompressedContent?.DecompressBrotli(), ContentSanitizationLevel.Full);
|
||||
monograph.CompressedContent = cleaned.CompressBrotli();
|
||||
await monographs.Collection.UpdateOneAsync(
|
||||
CreateMonographFilter(monograph.UserId!, monograph),
|
||||
Builders<Monograph>.Update
|
||||
.Set(m => m.CompressedContent, monograph.CompressedContent)
|
||||
.Set(m => m.ContentSanitizationLevel, ContentSanitizationLevel.Full)
|
||||
);
|
||||
}
|
||||
monograph.Content = monograph.CompressedContent?.DecompressBrotli();
|
||||
}
|
||||
|
||||
monograph.ItemId ??= monograph.Id;
|
||||
return Ok(monograph);
|
||||
}
|
||||
@@ -241,7 +267,7 @@ namespace Notesnook.API.Controllers
|
||||
if (monograph.SelfDestruct)
|
||||
{
|
||||
await monographs.Collection.ReplaceOneAsync(
|
||||
CreateMonographFilter(monograph.UserId, monograph),
|
||||
CreateMonographFilter(monograph.UserId!, monograph),
|
||||
new Monograph
|
||||
{
|
||||
ItemId = id,
|
||||
@@ -251,12 +277,12 @@ namespace Notesnook.API.Controllers
|
||||
ViewCount = 0
|
||||
}
|
||||
);
|
||||
await MarkMonographForSyncAsync(monograph.UserId, id);
|
||||
await MarkMonographForSyncAsync(monograph.UserId!, id);
|
||||
}
|
||||
else if (!hasVisitedBefore)
|
||||
{
|
||||
await monographs.Collection.UpdateOneAsync(
|
||||
CreateMonographFilter(monograph.UserId, monograph),
|
||||
CreateMonographFilter(monograph.UserId!, monograph),
|
||||
Builders<Monograph>.Update.Inc(m => m.ViewCount, 1)
|
||||
);
|
||||
|
||||
@@ -329,7 +355,20 @@ namespace Notesnook.API.Controllers
|
||||
await syncDeviceService.AddIdsToAllDevicesAsync(userId, [new(monographId, "monograph")]);
|
||||
}
|
||||
|
||||
private async Task<string> CleanupContentAsync(ClaimsPrincipal user, string? content)
|
||||
// (selector, url-bearing attribute) pairs to inspect
|
||||
private static readonly (string Selector, string Attribute)[] urlElements =
|
||||
[
|
||||
("a", "href"),
|
||||
("img", "src"),
|
||||
("iframe", "src"),
|
||||
("embed", "src"),
|
||||
("object", "data"),
|
||||
("source", "src"),
|
||||
("video", "src"),
|
||||
("audio", "src"),
|
||||
];
|
||||
|
||||
private async Task<string> SanitizeContentAsync(string? content, ContentSanitizationLevel level)
|
||||
{
|
||||
if (string.IsNullOrEmpty(content)) return string.Empty;
|
||||
if (Constants.IS_SELF_HOSTED) return content;
|
||||
@@ -338,31 +377,36 @@ namespace Notesnook.API.Controllers
|
||||
var json = JsonSerializer.Deserialize<MonographContent>(content) ?? throw new Exception("Invalid monograph content.");
|
||||
var html = json.Data;
|
||||
|
||||
if (user.IsUserSubscribed())
|
||||
if (level == ContentSanitizationLevel.Full)
|
||||
{
|
||||
var config = Configuration.Default.WithDefaultLoader();
|
||||
var context = BrowsingContext.New(config);
|
||||
var document = await context.OpenAsync(r => r.Content(html));
|
||||
foreach (var element in document.QuerySelectorAll("a"))
|
||||
|
||||
foreach (var (selector, attribute) in urlElements)
|
||||
{
|
||||
var href = element.GetAttribute("href");
|
||||
if (string.IsNullOrEmpty(href)) continue;
|
||||
if (!await analyzer.IsURLSafeAsync(href))
|
||||
foreach (var element in document.QuerySelectorAll(selector))
|
||||
{
|
||||
logger.LogInformation("Malicious URL detected: {Url}", href);
|
||||
element.RemoveAttribute("href");
|
||||
var url = element.GetAttribute(attribute);
|
||||
if (string.IsNullOrEmpty(url)) continue;
|
||||
if (!await analyzer.IsURLSafeAsync(url))
|
||||
{
|
||||
logger.LogInformation("Malicious URL detected in <{Selector} {Attribute}>: {Url}", selector, attribute, url);
|
||||
element.RemoveAttribute(attribute);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
html = document.ToHtml();
|
||||
}
|
||||
else
|
||||
else if (level == ContentSanitizationLevel.Full)
|
||||
{
|
||||
var config = Configuration.Default.WithDefaultLoader();
|
||||
var context = BrowsingContext.New(config);
|
||||
var document = await context.OpenAsync(r => r.Content(html));
|
||||
foreach (var element in document.QuerySelectorAll("a,iframe,img,object,svg,button,link"))
|
||||
{
|
||||
foreach (var attr in element.Attributes)
|
||||
foreach (var attr in element.Attributes.ToList())
|
||||
element.RemoveAttribute(attr.Name);
|
||||
}
|
||||
html = document.ToHtml();
|
||||
|
||||
38
Notesnook.API/Models/ContentSanitizationLevel.cs
Normal file
38
Notesnook.API/Models/ContentSanitizationLevel.cs
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
This file is part of the Notesnook Sync Server project (https://notesnook.com/)
|
||||
|
||||
Copyright (C) 2023 Streetwriters (Private) Limited
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the Affero GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
Affero GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the Affero GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
namespace Notesnook.API.Models
|
||||
{
|
||||
public enum ContentSanitizationLevel
|
||||
{
|
||||
Unknown = 0,
|
||||
/// <summary>
|
||||
/// Full sanitization applied: links, iframes, images, and other embeds are stripped.
|
||||
/// Applied to monographs published by free-tier users.
|
||||
/// </summary>
|
||||
Full = 1,
|
||||
|
||||
/// <summary>
|
||||
/// Partial sanitization: only unsafe/malicious URLs are removed; rich content is preserved.
|
||||
/// Applied to monographs published by subscribed users. Requires re-sanitization if the
|
||||
/// publisher's subscription lapses.
|
||||
/// </summary>
|
||||
Partial = 2
|
||||
}
|
||||
}
|
||||
@@ -83,5 +83,8 @@ namespace Notesnook.API.Models
|
||||
|
||||
[JsonPropertyName("viewCount")]
|
||||
public int ViewCount { get; set; }
|
||||
|
||||
[JsonIgnore]
|
||||
public ContentSanitizationLevel ContentSanitizationLevel { get; set; }
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user