refactor: replace kuchikiki with dom_query (#14959)

* test: add more unit-tests for `html` module

* refactor: remove html dependencies from `tauri-cli`

* feat: introduce `html-manipulation-2` feature

* Remove deprecation

* Use new feature flag

* Unroll `build` feature

* Introduce `build-2` feature

* Reduce diff

* Use `build-2` in more places

* Add docs

* Refactor `inject_script_hashes`

* Refactor `with_head`

* Rename serialize and parse functions

* Add changes file

* Remove unused function

* Update changelog

* Remove test

* Update wry

* Add todo comments
we don't have the git blame data in html2, better do it now or never
find it again

* refactor `with_head` to `ensure_head`

* Remove unused casts

* Avoid using format to construct html elements
which has the potential to get injected

* Feature gate `inline_isolation`

* Keep old prepends appends

* Fix `inline_isolation_replaces_src_with_content` test

* End meta tag

* Mirror test to old html module

* Use back to `append_html` for csp and link issue

* Try out dom query main branch

* Use nodes instead to avoid an extra clone

* Use wry 0.54.4 and dom_query 0.27

* Mark stability

* Remove `PatternObject`

---------

Co-authored-by: Tony <legendmastertony@gmail.com>
Co-authored-by: Tony <68118705+Legend-Master@users.noreply.github.com>
This commit is contained in:
Thomas Eizinger
2026-03-25 23:58:51 +11:00
committed by GitHub
parent 386312c73a
commit e032c3b342
25 changed files with 526 additions and 98 deletions

View File

@@ -0,0 +1,6 @@
---
"tauri-utils": minor:deps
---
Add new `html-manipulation-2` and `build-2` feature flags that use `dom_query` instead of `kuchikiki` for HTML parsing / manipulation.
This allows downstream users to remove `kuchikiki` and its dependencies from their dependency tree.

24
Cargo.lock generated
View File

@@ -1321,7 +1321,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
dependencies = [
"lazy_static",
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -2423,7 +2423,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
dependencies = [
"libc",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -4423,7 +4423,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
dependencies = [
"cfg-if",
"windows-targets 0.48.5",
"windows-targets 0.52.6",
]
[[package]]
@@ -5529,7 +5529,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967"
dependencies = [
"libc",
"windows-sys 0.48.0",
"windows-sys 0.60.2",
]
[[package]]
@@ -5946,7 +5946,7 @@ dependencies = [
"aes-gcm",
"aes-kw",
"argon2",
"base64 0.21.7",
"base64 0.22.1",
"bitfield",
"block-padding",
"blowfish",
@@ -7239,7 +7239,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys 0.4.15",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -7252,7 +7252,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys 0.9.4",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -7378,7 +7378,7 @@ dependencies = [
"security-framework 3.5.1",
"security-framework-sys",
"webpki-root-certs",
"windows-sys 0.52.0",
"windows-sys 0.60.2",
]
[[package]]
@@ -8888,7 +8888,6 @@ dependencies = [
"glob",
"handlebars",
"heck 0.5.0",
"html5ever 0.29.1",
"ignore",
"image",
"include_dir",
@@ -8901,7 +8900,6 @@ dependencies = [
"jsonrpsee-core",
"jsonrpsee-ws-client",
"jsonschema",
"kuchikiki",
"libc",
"local-ip-address",
"log",
@@ -9188,6 +9186,7 @@ dependencies = [
"brotli",
"cargo_metadata",
"ctor 0.2.9",
"dom_query",
"dunce",
"getrandom 0.3.3",
"glob",
@@ -9213,6 +9212,7 @@ dependencies = [
"serialize-to-javascript",
"swift-rs",
"tauri",
"tempfile",
"thiserror 2.0.12",
"toml 1.0.6+spec-1.1.0",
"url",
@@ -9254,7 +9254,7 @@ dependencies = [
"getrandom 0.2.15",
"once_cell",
"rustix 0.38.43",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -10609,7 +10609,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]
[[package]]

View File

@@ -28,7 +28,7 @@ anyhow = "1"
quote = { version = "1", optional = true }
tauri-codegen = { version = "2.5.5", path = "../tauri-codegen", optional = true }
tauri-utils = { version = "2.8.3", path = "../tauri-utils", features = [
"build",
"build-2",
"resources",
] }
cargo_toml = "0.22"

View File

@@ -63,7 +63,7 @@ tauri-utils = { version = "2.8.3", path = "../tauri-utils", features = [
"schema",
"config-json5",
"config-toml",
"html-manipulation",
"html-manipulation-2",
] }
toml = "1"
jsonschema = { version = "0.33", default-features = false }
@@ -89,8 +89,6 @@ env_logger = "0.11"
icns = { package = "tauri-icns", version = "0.1" }
image = { version = "0.25", default-features = false, features = ["ico"] }
axum = { version = "0.8", features = ["ws"] }
html5ever = "0.29"
kuchiki = { package = "kuchikiki", version = "=0.8.8-speedreader" }
tokio = { version = "1", features = ["macros", "sync"] }
common-path = "1"
serde-value = "0.7"

View File

@@ -7,8 +7,6 @@ use axum::{
http::{header, StatusCode, Uri},
response::{IntoResponse, Response},
};
use html5ever::{namespace_url, ns, LocalName, QualName};
use kuchiki::{traits::TendrilSink, NodeRef};
use std::{
net::{IpAddr, SocketAddr},
path::{Path, PathBuf},
@@ -128,30 +126,14 @@ async fn ws_handler(ws: WebSocketUpgrade, state: State<ServerState>) -> Response
}
fn inject_address(html_bytes: Vec<u8>, address: &SocketAddr) -> Vec<u8> {
fn with_html_head<F: FnOnce(&NodeRef)>(document: &mut NodeRef, f: F) {
if let Ok(ref node) = document.select_first("head") {
f(node.as_node())
} else {
let node = NodeRef::new_element(
QualName::new(None, ns!(html), LocalName::from("head")),
None,
);
f(&node);
document.prepend(node)
}
}
let document = tauri_utils::html2::parse_doc(String::from_utf8_lossy(&html_bytes).into_owned());
let mut document = kuchiki::parse_html()
.one(String::from_utf8_lossy(&html_bytes).into_owned())
.document_node;
with_html_head(&mut document, |head| {
let script = RELOAD_SCRIPT.replace("{{reload_url}}", &format!("ws://{address}/__tauri_cli"));
let script_el = NodeRef::new_element(QualName::new(None, ns!(html), "script".into()), None);
script_el.append(NodeRef::new_text(script));
head.prepend(script_el);
});
tauri_utils::html2::append_script_to_head(
&document,
&RELOAD_SCRIPT.replace("{{reload_url}}", &format!("ws://{address}/__tauri_cli")),
);
tauri_utils::html::serialize_node(&document)
tauri_utils::html2::serialize_doc(&document)
}
fn fs_read_scoped(path: PathBuf, scope: &Path) -> crate::Result<Vec<u8>> {

View File

@@ -21,7 +21,7 @@ syn = "2"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tauri-utils = { version = "2.8.3", path = "../tauri-utils", features = [
"build",
"build-2",
] }
thiserror = "2"
walkdir = "2"

View File

@@ -25,7 +25,7 @@ use tauri_utils::{
},
assets::AssetKey,
config::{Config, FrontendDist, PatternKind},
html::{inject_nonce_token, parse as parse_html, serialize_node as serialize_html_node, NodeRef},
html2::{inject_nonce_token, parse_doc, serialize_doc, Document},
platform::Target,
tokens::{map_lit, str_lit},
};
@@ -44,27 +44,25 @@ pub struct ContextData {
pub test: bool,
}
fn inject_script_hashes(document: &NodeRef, key: &AssetKey, csp_hashes: &mut CspHashes) {
if let Ok(inline_script_elements) = document.select("script:not(:empty)") {
let mut scripts = Vec::new();
for inline_script_el in inline_script_elements {
let script = inline_script_el.as_node().text_contents();
let mut hasher = Sha256::new();
hasher.update(tauri_utils::html::normalize_script_for_csp(
script.as_bytes(),
));
let hash = hasher.finalize();
scripts.push(format!(
"'sha256-{}'",
base64::engine::general_purpose::STANDARD.encode(hash)
));
}
csp_hashes
.inline_scripts
.entry(key.clone().into())
.or_default()
.append(&mut scripts);
}
fn inject_script_hashes(document: &Document, key: &AssetKey, csp_hashes: &mut CspHashes) {
let script_elements = document.select("script:not(:empty)");
let scripts = script_elements
.iter()
.map(|element| {
let script = tauri_utils::html2::normalize_script_for_csp(element.text().as_bytes());
let script_hash = Sha256::digest(script);
let hash_base64 = base64::engine::general_purpose::STANDARD.encode(script_hash);
format!("'sha256-{hash_base64}'")
})
.collect::<Vec<_>>();
csp_hashes
.inline_scripts
.entry(key.clone().into())
.or_default()
.extend(scripts);
}
fn map_core_assets(
@@ -77,7 +75,7 @@ fn map_core_assets(
if path.extension() == Some(OsStr::new("html")) {
#[allow(clippy::collapsible_if)]
if csp {
let document = parse_html(String::from_utf8_lossy(input).into_owned());
let document = parse_doc(String::from_utf8_lossy(input).into_owned());
inject_nonce_token(&document, &dangerous_disable_asset_csp_modification);
@@ -85,7 +83,7 @@ fn map_core_assets(
inject_script_hashes(&document, key, csp_hashes);
}
*input = serialize_html_node(&document);
*input = serialize_doc(&document);
}
}
Ok(())
@@ -108,13 +106,13 @@ fn map_isolation(
move |key, path, input, csp_hashes| {
if path.extension() == Some(OsStr::new("html")) {
let isolation_html = parse_html(String::from_utf8_lossy(input).into_owned());
let isolation_html = parse_doc(String::from_utf8_lossy(input).into_owned());
// this is appended, so no need to reverse order it
tauri_utils::html::inject_codegen_isolation_script(&isolation_html);
tauri_utils::html2::inject_codegen_isolation_script(&isolation_html);
// temporary workaround for windows not loading assets
tauri_utils::html::inline_isolation(&isolation_html, &dir);
tauri_utils::html2::inline_isolation(&isolation_html, &dir);
inject_nonce_token(
&isolation_html,
@@ -125,7 +123,7 @@ fn map_isolation(
csp_hashes.styles.push(iframe_style_csp_hash.clone());
*input = isolation_html.to_string().as_bytes().to_vec()
*input = serialize_doc(&isolation_html)
}
Ok(())

View File

@@ -181,7 +181,7 @@ impl CspHashes {
let mut hasher = Sha256::new();
hasher.update(
&std::fs::read(path)
.map(|b| tauri_utils::html::normalize_script_for_csp(&b))
.map(|b| tauri_utils::html2::normalize_script_for_csp(&b))
.map_err(|error| EmbeddedAssetsError::AssetRead {
path: path.to_path_buf(),
error,

View File

@@ -28,7 +28,7 @@ runtime = []
anyhow = { version = "1", optional = true }
serde = { version = "1", optional = true }
tauri-utils = { version = "2.8.3", default-features = false, features = [
"build",
"build-2",
], path = "../tauri-utils" }
serde_json = { version = "1", optional = true }
glob = { version = "0.3", optional = true }

View File

@@ -24,6 +24,7 @@ brotli = { version = "8", optional = true, default-features = false, features =
url = { version = "2", features = ["serde"] }
html5ever = { version = "0.29", optional = true }
kuchiki = { package = "kuchikiki", version = "0.8.8-speedreader", optional = true }
dom_query = { version = "0.27", optional = true, default-features = false }
proc-macro2 = { version = "1", optional = true }
quote = { version = "1", optional = true }
# Our code requires at least 0.8.21 so don't change this to 0.8
@@ -59,6 +60,7 @@ swift-rs = { version = "1", optional = true, features = ["build"] }
getrandom = { version = "0.3", features = ["std"] }
serial_test = "3"
tauri = { path = "../tauri" }
tempfile = "3.15.0"
[features]
build = [
@@ -69,6 +71,15 @@ build = [
"swift-rs",
"html-manipulation",
]
# Same as `build` but uses `html-manipulation-2` to avoid the `kuchikiki` dependency.
build-2 = [
"proc-macro2",
"quote",
"cargo_metadata",
"schema",
"swift-rs",
"html-manipulation-2",
]
compression = ["brotli"]
schema = ["schemars"]
isolation = ["aes-gcm", "getrandom", "serialize-to-javascript"]
@@ -77,3 +88,4 @@ config-json5 = ["json5"]
config-toml = []
resources = ["walkdir"]
html-manipulation = ["dep:html5ever", "dep:kuchiki"]
html-manipulation-2 = ["dep:dom_query"]

View File

@@ -322,7 +322,7 @@ impl FromStr for CapabilityFile {
}
}
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
mod build {
use std::convert::identity;

View File

@@ -283,7 +283,7 @@ mod tests {
}
}
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
mod build {
use proc_macro2::TokenStream;
use quote::{quote, ToTokens, TokenStreamExt};

View File

@@ -126,7 +126,7 @@ impl Manifest {
}
}
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
mod build {
use proc_macro2::TokenStream;
use quote::{quote, ToTokens, TokenStreamExt};

View File

@@ -58,7 +58,7 @@ pub const ALLOWED_COMMANDS_FILE_NAME: &str = "allowed-commands.json";
/// the value is set to the config's directory
pub const REMOVE_UNUSED_COMMANDS_ENV_VAR: &str = "REMOVE_UNUSED_COMMANDS";
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
pub mod build;
pub mod capability;
pub mod identifier;
@@ -104,7 +104,7 @@ pub enum Error {
CreateDir(std::io::Error, PathBuf),
/// [`cargo_metadata`] was not able to complete successfully
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
#[error("failed to execute: {0}")]
Metadata(#[from] ::cargo_metadata::Error),
@@ -460,7 +460,7 @@ mod tests {
}
}
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
mod build_ {
use std::convert::identity;

View File

@@ -438,7 +438,7 @@ fn display_perm_key(prefix: &str) -> &str {
}
}
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
mod build {
use proc_macro2::TokenStream;
use quote::{quote, ToTokens, TokenStreamExt};

View File

@@ -145,7 +145,7 @@ impl From<toml::Value> for Value {
}
}
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
mod build {
use std::convert::identity;

View File

@@ -3354,7 +3354,7 @@ pub struct PluginConfig(pub HashMap<String, JsonValue>);
/// This allows for a build script to output the values in a `Config` to a `TokenStream`, which can
/// then be consumed by another crate. Useful for passing a config to both the build script and the
/// application using tauri while only parsing it once (in the build script).
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
mod build {
use super::*;
use crate::{literal_struct, tokens::*};

View File

@@ -281,6 +281,7 @@ pub fn inline_isolation(document: &NodeRef, dir: &Path) {
}
}
// TODO: Verify this, this is not found in the HTML spec, see https://github.com/tauri-apps/tauri/pull/14265#discussion_r2415396842
/// Normalize line endings in script content to match what the browser uses for CSP hashing.
///
/// According to the HTML spec, browsers normalize:
@@ -315,6 +316,13 @@ pub fn normalize_script_for_csp(input: &[u8]) -> Vec<u8> {
#[cfg(test)]
mod tests {
use std::io::Write;
use super::*;
use crate::{
assets::{SCRIPT_NONCE_TOKEN, STYLE_NONCE_TOKEN},
config,
};
#[test]
fn csp() {
@@ -322,12 +330,14 @@ mod tests {
"<html><head></head></html>".to_string(),
"<html></html>".to_string(),
];
for html in htmls {
let document = super::parse(html);
let document = parse(html);
let csp = "csp-string";
super::inject_csp(&document, csp);
inject_csp(&document, csp);
assert_eq!(
document.to_string(),
String::from_utf8(serialize_node(&document)).unwrap(),
format!(
r#"<html><head><meta http-equiv="Content-Security-Policy" content="{csp}"></head><body></body></html>"#,
)
@@ -336,12 +346,97 @@ mod tests {
}
#[test]
fn normalize_script_for_csp() {
fn normalize_script_for_csp_test() {
let js = "// Copyright 2019-2024 Tauri Programme within The Commons Conservancy\r// SPDX-License-Identifier: Apache-2.0\n// SPDX-License-Identifier: MIT\r\n\r\nwindow.__TAURI_ISOLATION_HOOK__ = (payload, options) => {\r\n return payload\r\n}\r\n";
let expected = "// Copyright 2019-2024 Tauri Programme within The Commons Conservancy\n// SPDX-License-Identifier: Apache-2.0\n// SPDX-License-Identifier: MIT\n\nwindow.__TAURI_ISOLATION_HOOK__ = (payload, options) => {\n return payload\n}\n";
assert_eq!(normalize_script_for_csp(js.as_bytes()), expected.as_bytes())
}
#[test]
fn parse_and_serialize_roundtrips() {
let htmls = [
"<html><head><title>Test</title></head><body><h1>Hello</h1></body></html>",
"<!DOCTYPE html><html><head></head><body></body></html>",
];
for html in htmls {
let parsed = parse(html.to_string());
let serialized = serialize_node(&parsed);
let result = String::from_utf8(serialized).unwrap();
assert_eq!(result, html);
}
}
#[test]
fn inject_nonce_to_scripts() {
let html = r#"<html><head><script src="http://example.com/script.js"></script></head><body></body></html>"#;
let document = parse(html.to_string());
inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false));
assert_eq!(
super::normalize_script_for_csp(js.as_bytes()),
expected.as_bytes()
)
String::from_utf8(serialize_node(&document)).unwrap(),
format!(
r#"<html><head><script src="http://example.com/script.js" nonce="{SCRIPT_NONCE_TOKEN}"></script></head><body></body></html>"#
)
);
}
#[test]
fn inject_nonce_to_styles() {
let html = r#"<html><head><style>body { color: red; }</style></head><body></body></html>"#;
let document = parse(html.to_string());
inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false));
assert_eq!(
String::from_utf8(serialize_node(&document)).unwrap(),
format!(
r#"<html><head><style nonce="{STYLE_NONCE_TOKEN}">body {{ color: red; }}</style></head><body></body></html>"#
)
);
}
#[test]
fn inject_nonce_skips_existing() {
let html = r#"<html><head><script src="http://example.com/script.js" nonce="existing"></script></head><body></body></html>"#;
let document = parse(html.to_string());
inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false));
assert_eq!(String::from_utf8(serialize_node(&document)).unwrap(), html);
}
#[test]
fn inject_nonce_respects_disabled_modification() {
let html = r#"<html><head><script src="http://example.com/script.js"></script></head><body></body></html>"#;
let document = parse(html.to_string());
inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(true));
assert_eq!(
String::from_utf8(serialize_node(&document)).unwrap(),
r#"<html><head><script src="http://example.com/script.js"></script></head><body></body></html>"#
);
}
#[test]
fn inline_isolation_replaces_src_with_content() {
let temp_dir = tempfile::tempdir().unwrap();
let mut file = tempfile::NamedTempFile::with_suffix_in(".js", &temp_dir).unwrap();
file.write_all(b"console.log('test');").unwrap();
let file_name = file.path().file_name().unwrap().to_str().unwrap();
let html =
format!(r#"<html><head><script src="/{file_name}"></script></head><body></body></html>"#);
let document = parse(html);
inline_isolation(&document, temp_dir.path());
assert_eq!(
String::from_utf8(serialize_node(&document)).unwrap(),
r#"<html><head><script>console.log('test');</script></head><body></body></html>"#
);
}
}

View File

@@ -0,0 +1,335 @@
// Copyright 2019-2024 Tauri Programme within The Commons Conservancy
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: MIT
//! The module to process HTML in Tauri.
//!
//! # Stability
//!
//! This is utility used in Tauri internally and not considered part of the stable API.
//! If you use it, note that it may include breaking changes in the future.
use dom_query::NodeRef;
use crate::{
assets::{SCRIPT_NONCE_TOKEN, STYLE_NONCE_TOKEN},
config::DisabledCspModificationKind,
};
/// # Stability
///
/// This dependency might receive updates in minor releases.
pub use dom_query::Document;
/// Serializes the document to HTML.
///
/// # Stability
///
/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases.
pub fn serialize_doc(document: &Document) -> Vec<u8> {
document.html().as_bytes().to_vec()
}
/// Parses the given HTML string.
///
/// # Stability
///
/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases.
pub fn parse_doc(html: String) -> Document {
Document::from(html)
}
fn ensure_head(document: &Document) -> NodeRef<'_> {
document.head().unwrap_or_else(|| {
let html = document.html_root();
let head = document.tree.new_element("head");
html.prepend_child(&head);
head
})
}
fn inject_nonce(document: &Document, selector: &str, token: &str) {
let elements = document.select(selector);
for elem in elements.nodes() {
// if the node already has the `nonce` attribute, skip it
if elem.attr("nonce").is_some() {
continue;
}
elem.set_attr("nonce", token);
}
}
/// Inject nonce tokens to all scripts and styles.
///
/// # Stability
///
/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases.
pub fn inject_nonce_token(
document: &Document,
dangerous_disable_asset_csp_modification: &DisabledCspModificationKind,
) {
if dangerous_disable_asset_csp_modification.can_modify("script-src") {
inject_nonce(document, "script[src^='http']", SCRIPT_NONCE_TOKEN);
}
if dangerous_disable_asset_csp_modification.can_modify("style-src") {
inject_nonce(document, "style", STYLE_NONCE_TOKEN);
}
}
/// Injects a content security policy to the HTML.
///
/// # Stability
///
/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases.
pub fn inject_csp(document: &Document, csp: &str) {
let head = ensure_head(document);
let meta_tag = document.tree.new_element("meta");
meta_tag.set_attr("http-equiv", "Content-Security-Policy");
meta_tag.set_attr("content", csp);
head.append_child(&meta_tag);
}
/// Injects a content security policy to the HTML.
///
/// # Stability
///
/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases.
pub fn append_script_to_head(document: &Document, script: &str) {
let head = ensure_head(document);
let script_tag = document.tree.new_element("script");
script_tag.set_text(script);
head.prepend_child(&script_tag);
}
/// Injects the Isolation JavaScript to a codegen time document.
///
/// Note: This function is not considered part of the stable API.
///
/// # Stability
///
/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases.
#[cfg(feature = "isolation")]
pub fn inject_codegen_isolation_script(document: &Document) {
use crate::pattern::isolation::IsolationJavascriptCodegen;
use serialize_to_javascript::DefaultTemplate;
let head = ensure_head(document);
let script_content = IsolationJavascriptCodegen {}
.render_default(&Default::default())
.expect("unable to render codegen isolation script template")
.into_string();
let script_tag = document.tree.new_element("script");
script_tag.set_attr("nonce", SCRIPT_NONCE_TOKEN);
script_tag.set_text(script_content);
head.prepend_child(&script_tag);
}
/// Temporary workaround for Windows not allowing requests
///
/// Note: this does not prevent path traversal due to the isolation application expectation that it
/// is secure.
///
/// # Stability
///
/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases.
#[cfg(feature = "isolation")]
pub fn inline_isolation(document: &Document, dir: &std::path::Path) {
let scripts = document.select("script[src]");
for script in scripts.nodes() {
let src = match script.attr("src") {
Some(s) => s.to_string(),
None => continue,
};
let mut path = std::path::PathBuf::from(src);
if path.has_root() {
path = path
.strip_prefix("/")
.expect("Tauri \"Isolation\" Pattern only supports relative or absolute (`/`) paths.")
.into();
}
let file = std::fs::read_to_string(dir.join(path)).expect("unable to find isolation file");
script.set_text(file);
script.remove_attr("src");
}
}
// TODO: Verify this, this is not found in the HTML spec, see https://github.com/tauri-apps/tauri/pull/14265#discussion_r2415396842
/// Normalize line endings in script content to match what the browser uses for CSP hashing.
///
/// According to the HTML spec, browsers normalize:
/// - `\r\n` → `\n`
/// - `\r` → `\n`
pub fn normalize_script_for_csp(input: &[u8]) -> Vec<u8> {
let mut output = Vec::with_capacity(input.len());
let mut i = 0;
while i < input.len() {
match input[i] {
b'\r' => {
if i + 1 < input.len() && input[i + 1] == b'\n' {
// CRLF → LF
output.push(b'\n');
i += 2;
} else {
// Lone CR → LF
output.push(b'\n');
i += 1;
}
}
_ => {
output.push(input[i]);
i += 1;
}
}
}
output
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
assets::{SCRIPT_NONCE_TOKEN, STYLE_NONCE_TOKEN},
config,
};
#[test]
fn csp() {
let htmls = vec![
"<html><head></head></html>".to_string(),
"<html></html>".to_string(),
];
for html in htmls {
let document = parse_doc(html);
let csp = "csp-string";
inject_csp(&document, csp);
assert_eq!(
String::from_utf8(serialize_doc(&document)).unwrap(),
format!(
r#"<html><head><meta http-equiv="Content-Security-Policy" content="{csp}"></head><body></body></html>"#
)
);
}
}
#[test]
fn normalize_script_for_csp_test() {
let js = "// Copyright 2019-2024 Tauri Programme within The Commons Conservancy\r// SPDX-License-Identifier: Apache-2.0\n// SPDX-License-Identifier: MIT\r\n\r\nwindow.__TAURI_ISOLATION_HOOK__ = (payload, options) => {\r\n return payload\r\n}\r\n";
let expected = "// Copyright 2019-2024 Tauri Programme within The Commons Conservancy\n// SPDX-License-Identifier: Apache-2.0\n// SPDX-License-Identifier: MIT\n\nwindow.__TAURI_ISOLATION_HOOK__ = (payload, options) => {\n return payload\n}\n";
assert_eq!(normalize_script_for_csp(js.as_bytes()), expected.as_bytes())
}
#[test]
fn parse_and_serialize_roundtrips() {
let htmls = [
"<html><head><title>Test</title></head><body><h1>Hello</h1></body></html>",
"<!DOCTYPE html><html><head></head><body></body></html>",
];
for html in htmls {
let parsed = parse_doc(html.to_string());
let serialized = serialize_doc(&parsed);
let result = String::from_utf8(serialized).unwrap();
assert_eq!(result, html);
}
}
#[test]
fn inject_nonce_to_scripts() {
let html = r#"<html><head><script src="http://example.com/script.js"></script></head><body></body></html>"#;
let document = parse_doc(html.to_string());
inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false));
assert_eq!(
String::from_utf8(serialize_doc(&document)).unwrap(),
format!(
r#"<html><head><script src="http://example.com/script.js" nonce="{SCRIPT_NONCE_TOKEN}"></script></head><body></body></html>"#
)
);
}
#[test]
fn inject_nonce_to_styles() {
let html = r#"<html><head><style>body { color: red; }</style></head><body></body></html>"#;
let document = parse_doc(html.to_string());
inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false));
assert_eq!(
String::from_utf8(serialize_doc(&document)).unwrap(),
format!(
r#"<html><head><style nonce="{STYLE_NONCE_TOKEN}">body {{ color: red; }}</style></head><body></body></html>"#
)
);
}
#[test]
fn append_script_to_head_test() {
let html = r#"<html><head></head><body></body></html>"#;
let document = parse_doc(html.to_string());
append_script_to_head(&document, r#"console.log('Test')"#);
assert_eq!(
String::from_utf8(serialize_doc(&document)).unwrap(),
format!(r#"<html><head><script>console.log('Test')</script></head><body></body></html>"#)
);
}
#[test]
fn inject_nonce_skips_existing() {
let html = r#"<html><head><script src="http://example.com/script.js" nonce="existing"></script></head><body></body></html>"#;
let document = parse_doc(html.to_string());
inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false));
assert_eq!(String::from_utf8(serialize_doc(&document)).unwrap(), html);
}
#[test]
fn inject_nonce_respects_disabled_modification() {
let html = r#"<html><head><script src="http://example.com/script.js"></script></head><body></body></html>"#;
let document = parse_doc(html.to_string());
inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(true));
assert_eq!(
String::from_utf8(serialize_doc(&document)).unwrap(),
r#"<html><head><script src="http://example.com/script.js"></script></head><body></body></html>"#
);
}
#[test]
#[cfg(feature = "isolation")]
fn inline_isolation_replaces_src_with_content() {
use std::io::Write;
let temp_dir = tempfile::tempdir().unwrap();
let mut file = tempfile::NamedTempFile::with_suffix_in(".js", &temp_dir).unwrap();
file.write_all(b"console.log('test');").unwrap();
let file_name = file.path().file_name().unwrap().to_str().unwrap();
let html =
format!(r#"<html><head><script src="/{file_name}"></script></head><body></body></html>"#);
let document = parse_doc(html);
inline_isolation(&document, temp_dir.path());
assert_eq!(
String::from_utf8(serialize_doc(&document)).unwrap(),
r#"<html><head><script>console.log('test');</script></head><body></body></html>"#
);
}
}

View File

@@ -26,6 +26,8 @@ pub mod config;
pub mod config_v1;
#[cfg(feature = "html-manipulation")]
pub mod html;
#[cfg(feature = "html-manipulation-2")]
pub mod html2;
pub mod io;
pub mod mime_type;
pub mod platform;
@@ -33,10 +35,10 @@ pub mod plugin;
/// Prepare application resources and sidecars.
#[cfg(feature = "resources")]
pub mod resources;
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
pub mod tokens;
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
pub mod build;
/// Application pattern.

View File

@@ -369,7 +369,7 @@ pub fn bundle_type() -> Option<BundleType> {
}
}
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
mod build {
use proc_macro2::TokenStream;
use quote::{quote, ToTokens, TokenStreamExt};

View File

@@ -3,10 +3,10 @@
// SPDX-License-Identifier: MIT
//! Compile-time and runtime types for Tauri plugins.
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
pub use build::*;
#[cfg(feature = "build")]
#[cfg(any(feature = "build", feature = "build-2"))]
mod build {
use std::{
env::vars_os,

View File

@@ -169,7 +169,7 @@ glob = "0.3"
heck = "0.5"
tauri-build = { path = "../tauri-build/", default-features = false, version = "2.5.6" }
tauri-utils = { path = "../tauri-utils/", version = "2.8.3", features = [
"build",
"build-2",
] }
[dev-dependencies]
@@ -222,7 +222,7 @@ macos-private-api = [
"tauri-runtime/macos-private-api",
"tauri-runtime-wry?/macos-private-api",
]
webview-data-url = ["data-url", "tauri-utils/html-manipulation"]
webview-data-url = ["data-url", "tauri-utils/html-manipulation-2"]
protocol-asset = ["http-range"]
config-json5 = ["tauri-macros/config-json5"]
config-toml = ["tauri-macros/config-toml"]

View File

@@ -460,9 +460,9 @@ impl<R: Runtime> WebviewManager<R> {
let html = String::from_utf8_lossy(&body).into_owned();
// naive way to check if it's an html
if html.contains('<') && html.contains('>') {
let document = tauri_utils::html::parse(html);
tauri_utils::html::inject_csp(&document, &csp.to_string());
url.set_path(&format!("{},{document}", mime::TEXT_HTML));
let document = tauri_utils::html2::parse_doc(html);
tauri_utils::html2::inject_csp(&document, &csp.to_string());
url.set_path(&format!("{},{}", mime::TEXT_HTML, document.html()));
}
}
}

View File

@@ -11,6 +11,6 @@ rust-version.workspace = true
publish = false
[dev-dependencies]
tauri-utils = { path = "../../tauri-utils/", features = ["build"] }
tauri-utils = { path = "../../tauri-utils/", features = ["build-2"] }
serde_json = "1"
insta = "1"