diff --git a/.changes/supersede-kuchikiki.md b/.changes/supersede-kuchikiki.md new file mode 100644 index 000000000..18974ff48 --- /dev/null +++ b/.changes/supersede-kuchikiki.md @@ -0,0 +1,6 @@ +--- +"tauri-utils": minor:deps +--- + +Add new `html-manipulation-2` and `build-2` feature flags that use `dom_query` instead of `kuchikiki` for HTML parsing / manipulation. +This allows downstream users to remove `kuchikiki` and its dependencies from their dependency tree. diff --git a/Cargo.lock b/Cargo.lock index 3ee41ee22..f0b7ea703 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1321,7 +1321,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -2423,7 +2423,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4423,7 +4423,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -5529,7 +5529,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967" dependencies = [ "libc", - "windows-sys 0.48.0", + "windows-sys 0.60.2", ] [[package]] @@ -5946,7 +5946,7 @@ dependencies = [ "aes-gcm", "aes-kw", "argon2", - "base64 0.21.7", + "base64 0.22.1", "bitfield", "block-padding", "blowfish", @@ -7239,7 +7239,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -7252,7 +7252,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.9.4", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -7378,7 +7378,7 @@ dependencies = [ "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -8888,7 +8888,6 @@ dependencies = [ "glob", "handlebars", "heck 0.5.0", - "html5ever 0.29.1", "ignore", "image", "include_dir", @@ -8901,7 +8900,6 @@ dependencies = [ "jsonrpsee-core", "jsonrpsee-ws-client", "jsonschema", - "kuchikiki", "libc", "local-ip-address", "log", @@ -9188,6 +9186,7 @@ dependencies = [ "brotli", "cargo_metadata", "ctor 0.2.9", + "dom_query", "dunce", "getrandom 0.3.3", "glob", @@ -9213,6 +9212,7 @@ dependencies = [ "serialize-to-javascript", "swift-rs", "tauri", + "tempfile", "thiserror 2.0.12", "toml 1.0.6+spec-1.1.0", "url", @@ -9254,7 +9254,7 @@ dependencies = [ "getrandom 0.2.15", "once_cell", "rustix 0.38.43", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -10609,7 +10609,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/crates/tauri-build/Cargo.toml b/crates/tauri-build/Cargo.toml index 26a7f63d6..aa7242857 100644 --- a/crates/tauri-build/Cargo.toml +++ b/crates/tauri-build/Cargo.toml @@ -28,7 +28,7 @@ anyhow = "1" quote = { version = "1", optional = true } tauri-codegen = { version = "2.5.5", path = "../tauri-codegen", optional = true } tauri-utils = { version = "2.8.3", path = "../tauri-utils", features = [ - "build", + "build-2", "resources", ] } cargo_toml = "0.22" diff --git a/crates/tauri-cli/Cargo.toml b/crates/tauri-cli/Cargo.toml index 4f95db943..9e216f6c5 100644 --- a/crates/tauri-cli/Cargo.toml +++ b/crates/tauri-cli/Cargo.toml @@ -63,7 +63,7 @@ tauri-utils = { version = "2.8.3", path = "../tauri-utils", features = [ "schema", "config-json5", "config-toml", - "html-manipulation", + "html-manipulation-2", ] } toml = "1" jsonschema = { version = "0.33", default-features = false } @@ -89,8 +89,6 @@ env_logger = "0.11" icns = { package = "tauri-icns", version = "0.1" } image = { version = "0.25", default-features = false, features = ["ico"] } axum = { version = "0.8", features = ["ws"] } -html5ever = "0.29" -kuchiki = { package = "kuchikiki", version = "=0.8.8-speedreader" } tokio = { version = "1", features = ["macros", "sync"] } common-path = "1" serde-value = "0.7" diff --git a/crates/tauri-cli/src/dev/builtin_dev_server.rs b/crates/tauri-cli/src/dev/builtin_dev_server.rs index 1b49058ed..292b7459d 100644 --- a/crates/tauri-cli/src/dev/builtin_dev_server.rs +++ b/crates/tauri-cli/src/dev/builtin_dev_server.rs @@ -7,8 +7,6 @@ use axum::{ http::{header, StatusCode, Uri}, response::{IntoResponse, Response}, }; -use html5ever::{namespace_url, ns, LocalName, QualName}; -use kuchiki::{traits::TendrilSink, NodeRef}; use std::{ net::{IpAddr, SocketAddr}, path::{Path, PathBuf}, @@ -128,30 +126,14 @@ async fn ws_handler(ws: WebSocketUpgrade, state: State) -> Response } fn inject_address(html_bytes: Vec, address: &SocketAddr) -> Vec { - fn with_html_head(document: &mut NodeRef, f: F) { - if let Ok(ref node) = document.select_first("head") { - f(node.as_node()) - } else { - let node = NodeRef::new_element( - QualName::new(None, ns!(html), LocalName::from("head")), - None, - ); - f(&node); - document.prepend(node) - } - } + let document = tauri_utils::html2::parse_doc(String::from_utf8_lossy(&html_bytes).into_owned()); - let mut document = kuchiki::parse_html() - .one(String::from_utf8_lossy(&html_bytes).into_owned()) - .document_node; - with_html_head(&mut document, |head| { - let script = RELOAD_SCRIPT.replace("{{reload_url}}", &format!("ws://{address}/__tauri_cli")); - let script_el = NodeRef::new_element(QualName::new(None, ns!(html), "script".into()), None); - script_el.append(NodeRef::new_text(script)); - head.prepend(script_el); - }); + tauri_utils::html2::append_script_to_head( + &document, + &RELOAD_SCRIPT.replace("{{reload_url}}", &format!("ws://{address}/__tauri_cli")), + ); - tauri_utils::html::serialize_node(&document) + tauri_utils::html2::serialize_doc(&document) } fn fs_read_scoped(path: PathBuf, scope: &Path) -> crate::Result> { diff --git a/crates/tauri-codegen/Cargo.toml b/crates/tauri-codegen/Cargo.toml index e1154e676..78de83cf0 100644 --- a/crates/tauri-codegen/Cargo.toml +++ b/crates/tauri-codegen/Cargo.toml @@ -21,7 +21,7 @@ syn = "2" serde = { version = "1", features = ["derive"] } serde_json = "1" tauri-utils = { version = "2.8.3", path = "../tauri-utils", features = [ - "build", + "build-2", ] } thiserror = "2" walkdir = "2" diff --git a/crates/tauri-codegen/src/context.rs b/crates/tauri-codegen/src/context.rs index bc889bd8d..415b7413f 100644 --- a/crates/tauri-codegen/src/context.rs +++ b/crates/tauri-codegen/src/context.rs @@ -25,7 +25,7 @@ use tauri_utils::{ }, assets::AssetKey, config::{Config, FrontendDist, PatternKind}, - html::{inject_nonce_token, parse as parse_html, serialize_node as serialize_html_node, NodeRef}, + html2::{inject_nonce_token, parse_doc, serialize_doc, Document}, platform::Target, tokens::{map_lit, str_lit}, }; @@ -44,27 +44,25 @@ pub struct ContextData { pub test: bool, } -fn inject_script_hashes(document: &NodeRef, key: &AssetKey, csp_hashes: &mut CspHashes) { - if let Ok(inline_script_elements) = document.select("script:not(:empty)") { - let mut scripts = Vec::new(); - for inline_script_el in inline_script_elements { - let script = inline_script_el.as_node().text_contents(); - let mut hasher = Sha256::new(); - hasher.update(tauri_utils::html::normalize_script_for_csp( - script.as_bytes(), - )); - let hash = hasher.finalize(); - scripts.push(format!( - "'sha256-{}'", - base64::engine::general_purpose::STANDARD.encode(hash) - )); - } - csp_hashes - .inline_scripts - .entry(key.clone().into()) - .or_default() - .append(&mut scripts); - } +fn inject_script_hashes(document: &Document, key: &AssetKey, csp_hashes: &mut CspHashes) { + let script_elements = document.select("script:not(:empty)"); + + let scripts = script_elements + .iter() + .map(|element| { + let script = tauri_utils::html2::normalize_script_for_csp(element.text().as_bytes()); + let script_hash = Sha256::digest(script); + let hash_base64 = base64::engine::general_purpose::STANDARD.encode(script_hash); + + format!("'sha256-{hash_base64}'") + }) + .collect::>(); + + csp_hashes + .inline_scripts + .entry(key.clone().into()) + .or_default() + .extend(scripts); } fn map_core_assets( @@ -77,7 +75,7 @@ fn map_core_assets( if path.extension() == Some(OsStr::new("html")) { #[allow(clippy::collapsible_if)] if csp { - let document = parse_html(String::from_utf8_lossy(input).into_owned()); + let document = parse_doc(String::from_utf8_lossy(input).into_owned()); inject_nonce_token(&document, &dangerous_disable_asset_csp_modification); @@ -85,7 +83,7 @@ fn map_core_assets( inject_script_hashes(&document, key, csp_hashes); } - *input = serialize_html_node(&document); + *input = serialize_doc(&document); } } Ok(()) @@ -108,13 +106,13 @@ fn map_isolation( move |key, path, input, csp_hashes| { if path.extension() == Some(OsStr::new("html")) { - let isolation_html = parse_html(String::from_utf8_lossy(input).into_owned()); + let isolation_html = parse_doc(String::from_utf8_lossy(input).into_owned()); // this is appended, so no need to reverse order it - tauri_utils::html::inject_codegen_isolation_script(&isolation_html); + tauri_utils::html2::inject_codegen_isolation_script(&isolation_html); // temporary workaround for windows not loading assets - tauri_utils::html::inline_isolation(&isolation_html, &dir); + tauri_utils::html2::inline_isolation(&isolation_html, &dir); inject_nonce_token( &isolation_html, @@ -125,7 +123,7 @@ fn map_isolation( csp_hashes.styles.push(iframe_style_csp_hash.clone()); - *input = isolation_html.to_string().as_bytes().to_vec() + *input = serialize_doc(&isolation_html) } Ok(()) diff --git a/crates/tauri-codegen/src/embedded_assets.rs b/crates/tauri-codegen/src/embedded_assets.rs index ede7ee6d9..f3d1bd275 100644 --- a/crates/tauri-codegen/src/embedded_assets.rs +++ b/crates/tauri-codegen/src/embedded_assets.rs @@ -181,7 +181,7 @@ impl CspHashes { let mut hasher = Sha256::new(); hasher.update( &std::fs::read(path) - .map(|b| tauri_utils::html::normalize_script_for_csp(&b)) + .map(|b| tauri_utils::html2::normalize_script_for_csp(&b)) .map_err(|error| EmbeddedAssetsError::AssetRead { path: path.to_path_buf(), error, diff --git a/crates/tauri-plugin/Cargo.toml b/crates/tauri-plugin/Cargo.toml index 1f89c33f0..022492dce 100644 --- a/crates/tauri-plugin/Cargo.toml +++ b/crates/tauri-plugin/Cargo.toml @@ -28,7 +28,7 @@ runtime = [] anyhow = { version = "1", optional = true } serde = { version = "1", optional = true } tauri-utils = { version = "2.8.3", default-features = false, features = [ - "build", + "build-2", ], path = "../tauri-utils" } serde_json = { version = "1", optional = true } glob = { version = "0.3", optional = true } diff --git a/crates/tauri-utils/Cargo.toml b/crates/tauri-utils/Cargo.toml index 37cd0dd6d..0c4feb51f 100644 --- a/crates/tauri-utils/Cargo.toml +++ b/crates/tauri-utils/Cargo.toml @@ -24,6 +24,7 @@ brotli = { version = "8", optional = true, default-features = false, features = url = { version = "2", features = ["serde"] } html5ever = { version = "0.29", optional = true } kuchiki = { package = "kuchikiki", version = "0.8.8-speedreader", optional = true } +dom_query = { version = "0.27", optional = true, default-features = false } proc-macro2 = { version = "1", optional = true } quote = { version = "1", optional = true } # Our code requires at least 0.8.21 so don't change this to 0.8 @@ -59,6 +60,7 @@ swift-rs = { version = "1", optional = true, features = ["build"] } getrandom = { version = "0.3", features = ["std"] } serial_test = "3" tauri = { path = "../tauri" } +tempfile = "3.15.0" [features] build = [ @@ -69,6 +71,15 @@ build = [ "swift-rs", "html-manipulation", ] +# Same as `build` but uses `html-manipulation-2` to avoid the `kuchikiki` dependency. +build-2 = [ + "proc-macro2", + "quote", + "cargo_metadata", + "schema", + "swift-rs", + "html-manipulation-2", +] compression = ["brotli"] schema = ["schemars"] isolation = ["aes-gcm", "getrandom", "serialize-to-javascript"] @@ -77,3 +88,4 @@ config-json5 = ["json5"] config-toml = [] resources = ["walkdir"] html-manipulation = ["dep:html5ever", "dep:kuchiki"] +html-manipulation-2 = ["dep:dom_query"] diff --git a/crates/tauri-utils/src/acl/capability.rs b/crates/tauri-utils/src/acl/capability.rs index 4e7417d11..33a2c60c7 100644 --- a/crates/tauri-utils/src/acl/capability.rs +++ b/crates/tauri-utils/src/acl/capability.rs @@ -322,7 +322,7 @@ impl FromStr for CapabilityFile { } } -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] mod build { use std::convert::identity; diff --git a/crates/tauri-utils/src/acl/identifier.rs b/crates/tauri-utils/src/acl/identifier.rs index bb571c9fd..26c7326d2 100644 --- a/crates/tauri-utils/src/acl/identifier.rs +++ b/crates/tauri-utils/src/acl/identifier.rs @@ -283,7 +283,7 @@ mod tests { } } -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] mod build { use proc_macro2::TokenStream; use quote::{quote, ToTokens, TokenStreamExt}; diff --git a/crates/tauri-utils/src/acl/manifest.rs b/crates/tauri-utils/src/acl/manifest.rs index 8c4eed0fd..ff18df825 100644 --- a/crates/tauri-utils/src/acl/manifest.rs +++ b/crates/tauri-utils/src/acl/manifest.rs @@ -126,7 +126,7 @@ impl Manifest { } } -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] mod build { use proc_macro2::TokenStream; use quote::{quote, ToTokens, TokenStreamExt}; diff --git a/crates/tauri-utils/src/acl/mod.rs b/crates/tauri-utils/src/acl/mod.rs index 03c33d4fd..b9bb6a2d0 100644 --- a/crates/tauri-utils/src/acl/mod.rs +++ b/crates/tauri-utils/src/acl/mod.rs @@ -58,7 +58,7 @@ pub const ALLOWED_COMMANDS_FILE_NAME: &str = "allowed-commands.json"; /// the value is set to the config's directory pub const REMOVE_UNUSED_COMMANDS_ENV_VAR: &str = "REMOVE_UNUSED_COMMANDS"; -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] pub mod build; pub mod capability; pub mod identifier; @@ -104,7 +104,7 @@ pub enum Error { CreateDir(std::io::Error, PathBuf), /// [`cargo_metadata`] was not able to complete successfully - #[cfg(feature = "build")] + #[cfg(any(feature = "build", feature = "build-2"))] #[error("failed to execute: {0}")] Metadata(#[from] ::cargo_metadata::Error), @@ -460,7 +460,7 @@ mod tests { } } -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] mod build_ { use std::convert::identity; diff --git a/crates/tauri-utils/src/acl/resolved.rs b/crates/tauri-utils/src/acl/resolved.rs index 941019822..8aa4322f5 100644 --- a/crates/tauri-utils/src/acl/resolved.rs +++ b/crates/tauri-utils/src/acl/resolved.rs @@ -438,7 +438,7 @@ fn display_perm_key(prefix: &str) -> &str { } } -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] mod build { use proc_macro2::TokenStream; use quote::{quote, ToTokens, TokenStreamExt}; diff --git a/crates/tauri-utils/src/acl/value.rs b/crates/tauri-utils/src/acl/value.rs index 34c7efc48..24ac91eb7 100644 --- a/crates/tauri-utils/src/acl/value.rs +++ b/crates/tauri-utils/src/acl/value.rs @@ -145,7 +145,7 @@ impl From for Value { } } -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] mod build { use std::convert::identity; diff --git a/crates/tauri-utils/src/config.rs b/crates/tauri-utils/src/config.rs index 9ce530496..19035d11a 100644 --- a/crates/tauri-utils/src/config.rs +++ b/crates/tauri-utils/src/config.rs @@ -3354,7 +3354,7 @@ pub struct PluginConfig(pub HashMap); /// This allows for a build script to output the values in a `Config` to a `TokenStream`, which can /// then be consumed by another crate. Useful for passing a config to both the build script and the /// application using tauri while only parsing it once (in the build script). -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] mod build { use super::*; use crate::{literal_struct, tokens::*}; diff --git a/crates/tauri-utils/src/html.rs b/crates/tauri-utils/src/html.rs index 958ce5978..bc2aaf373 100644 --- a/crates/tauri-utils/src/html.rs +++ b/crates/tauri-utils/src/html.rs @@ -281,6 +281,7 @@ pub fn inline_isolation(document: &NodeRef, dir: &Path) { } } +// TODO: Verify this, this is not found in the HTML spec, see https://github.com/tauri-apps/tauri/pull/14265#discussion_r2415396842 /// Normalize line endings in script content to match what the browser uses for CSP hashing. /// /// According to the HTML spec, browsers normalize: @@ -315,6 +316,13 @@ pub fn normalize_script_for_csp(input: &[u8]) -> Vec { #[cfg(test)] mod tests { + use std::io::Write; + + use super::*; + use crate::{ + assets::{SCRIPT_NONCE_TOKEN, STYLE_NONCE_TOKEN}, + config, + }; #[test] fn csp() { @@ -322,12 +330,14 @@ mod tests { "".to_string(), "".to_string(), ]; + for html in htmls { - let document = super::parse(html); + let document = parse(html); let csp = "csp-string"; - super::inject_csp(&document, csp); + inject_csp(&document, csp); + assert_eq!( - document.to_string(), + String::from_utf8(serialize_node(&document)).unwrap(), format!( r#""#, ) @@ -336,12 +346,97 @@ mod tests { } #[test] - fn normalize_script_for_csp() { + fn normalize_script_for_csp_test() { let js = "// Copyright 2019-2024 Tauri Programme within The Commons Conservancy\r// SPDX-License-Identifier: Apache-2.0\n// SPDX-License-Identifier: MIT\r\n\r\nwindow.__TAURI_ISOLATION_HOOK__ = (payload, options) => {\r\n return payload\r\n}\r\n"; let expected = "// Copyright 2019-2024 Tauri Programme within The Commons Conservancy\n// SPDX-License-Identifier: Apache-2.0\n// SPDX-License-Identifier: MIT\n\nwindow.__TAURI_ISOLATION_HOOK__ = (payload, options) => {\n return payload\n}\n"; + + assert_eq!(normalize_script_for_csp(js.as_bytes()), expected.as_bytes()) + } + + #[test] + fn parse_and_serialize_roundtrips() { + let htmls = [ + "Test

Hello

", + "", + ]; + + for html in htmls { + let parsed = parse(html.to_string()); + let serialized = serialize_node(&parsed); + let result = String::from_utf8(serialized).unwrap(); + + assert_eq!(result, html); + } + } + + #[test] + fn inject_nonce_to_scripts() { + let html = r#""#; + + let document = parse(html.to_string()); + inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false)); + assert_eq!( - super::normalize_script_for_csp(js.as_bytes()), - expected.as_bytes() - ) + String::from_utf8(serialize_node(&document)).unwrap(), + format!( + r#""# + ) + ); + } + + #[test] + fn inject_nonce_to_styles() { + let html = r#""#; + + let document = parse(html.to_string()); + inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false)); + + assert_eq!( + String::from_utf8(serialize_node(&document)).unwrap(), + format!( + r#""# + ) + ); + } + + #[test] + fn inject_nonce_skips_existing() { + let html = r#""#; + + let document = parse(html.to_string()); + inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false)); + + assert_eq!(String::from_utf8(serialize_node(&document)).unwrap(), html); + } + + #[test] + fn inject_nonce_respects_disabled_modification() { + let html = r#""#; + + let document = parse(html.to_string()); + inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(true)); + + assert_eq!( + String::from_utf8(serialize_node(&document)).unwrap(), + r#""# + ); + } + + #[test] + fn inline_isolation_replaces_src_with_content() { + let temp_dir = tempfile::tempdir().unwrap(); + let mut file = tempfile::NamedTempFile::with_suffix_in(".js", &temp_dir).unwrap(); + file.write_all(b"console.log('test');").unwrap(); + let file_name = file.path().file_name().unwrap().to_str().unwrap(); + + let html = + format!(r#""#); + let document = parse(html); + inline_isolation(&document, temp_dir.path()); + + assert_eq!( + String::from_utf8(serialize_node(&document)).unwrap(), + r#""# + ); } } diff --git a/crates/tauri-utils/src/html2.rs b/crates/tauri-utils/src/html2.rs new file mode 100644 index 000000000..7f591dc27 --- /dev/null +++ b/crates/tauri-utils/src/html2.rs @@ -0,0 +1,335 @@ +// Copyright 2019-2024 Tauri Programme within The Commons Conservancy +// SPDX-License-Identifier: Apache-2.0 +// SPDX-License-Identifier: MIT + +//! The module to process HTML in Tauri. +//! +//! # Stability +//! +//! This is utility used in Tauri internally and not considered part of the stable API. +//! If you use it, note that it may include breaking changes in the future. + +use dom_query::NodeRef; + +use crate::{ + assets::{SCRIPT_NONCE_TOKEN, STYLE_NONCE_TOKEN}, + config::DisabledCspModificationKind, +}; + +/// # Stability +/// +/// This dependency might receive updates in minor releases. +pub use dom_query::Document; + +/// Serializes the document to HTML. +/// +/// # Stability +/// +/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases. +pub fn serialize_doc(document: &Document) -> Vec { + document.html().as_bytes().to_vec() +} + +/// Parses the given HTML string. +/// +/// # Stability +/// +/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases. +pub fn parse_doc(html: String) -> Document { + Document::from(html) +} + +fn ensure_head(document: &Document) -> NodeRef<'_> { + document.head().unwrap_or_else(|| { + let html = document.html_root(); + let head = document.tree.new_element("head"); + html.prepend_child(&head); + head + }) +} + +fn inject_nonce(document: &Document, selector: &str, token: &str) { + let elements = document.select(selector); + for elem in elements.nodes() { + // if the node already has the `nonce` attribute, skip it + if elem.attr("nonce").is_some() { + continue; + } + elem.set_attr("nonce", token); + } +} + +/// Inject nonce tokens to all scripts and styles. +/// +/// # Stability +/// +/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases. +pub fn inject_nonce_token( + document: &Document, + dangerous_disable_asset_csp_modification: &DisabledCspModificationKind, +) { + if dangerous_disable_asset_csp_modification.can_modify("script-src") { + inject_nonce(document, "script[src^='http']", SCRIPT_NONCE_TOKEN); + } + if dangerous_disable_asset_csp_modification.can_modify("style-src") { + inject_nonce(document, "style", STYLE_NONCE_TOKEN); + } +} + +/// Injects a content security policy to the HTML. +/// +/// # Stability +/// +/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases. +pub fn inject_csp(document: &Document, csp: &str) { + let head = ensure_head(document); + let meta_tag = document.tree.new_element("meta"); + meta_tag.set_attr("http-equiv", "Content-Security-Policy"); + meta_tag.set_attr("content", csp); + head.append_child(&meta_tag); +} + +/// Injects a content security policy to the HTML. +/// +/// # Stability +/// +/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases. +pub fn append_script_to_head(document: &Document, script: &str) { + let head = ensure_head(document); + let script_tag = document.tree.new_element("script"); + script_tag.set_text(script); + head.prepend_child(&script_tag); +} + +/// Injects the Isolation JavaScript to a codegen time document. +/// +/// Note: This function is not considered part of the stable API. +/// +/// # Stability +/// +/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases. +#[cfg(feature = "isolation")] +pub fn inject_codegen_isolation_script(document: &Document) { + use crate::pattern::isolation::IsolationJavascriptCodegen; + use serialize_to_javascript::DefaultTemplate; + + let head = ensure_head(document); + + let script_content = IsolationJavascriptCodegen {} + .render_default(&Default::default()) + .expect("unable to render codegen isolation script template") + .into_string(); + + let script_tag = document.tree.new_element("script"); + script_tag.set_attr("nonce", SCRIPT_NONCE_TOKEN); + script_tag.set_text(script_content); + + head.prepend_child(&script_tag); +} + +/// Temporary workaround for Windows not allowing requests +/// +/// Note: this does not prevent path traversal due to the isolation application expectation that it +/// is secure. +/// +/// # Stability +/// +/// This dependency [`dom_query`] for [`Document`] might receive updates in minor releases. +#[cfg(feature = "isolation")] +pub fn inline_isolation(document: &Document, dir: &std::path::Path) { + let scripts = document.select("script[src]"); + + for script in scripts.nodes() { + let src = match script.attr("src") { + Some(s) => s.to_string(), + None => continue, + }; + + let mut path = std::path::PathBuf::from(src); + if path.has_root() { + path = path + .strip_prefix("/") + .expect("Tauri \"Isolation\" Pattern only supports relative or absolute (`/`) paths.") + .into(); + } + + let file = std::fs::read_to_string(dir.join(path)).expect("unable to find isolation file"); + + script.set_text(file); + script.remove_attr("src"); + } +} + +// TODO: Verify this, this is not found in the HTML spec, see https://github.com/tauri-apps/tauri/pull/14265#discussion_r2415396842 +/// Normalize line endings in script content to match what the browser uses for CSP hashing. +/// +/// According to the HTML spec, browsers normalize: +/// - `\r\n` → `\n` +/// - `\r` → `\n` +pub fn normalize_script_for_csp(input: &[u8]) -> Vec { + let mut output = Vec::with_capacity(input.len()); + + let mut i = 0; + while i < input.len() { + match input[i] { + b'\r' => { + if i + 1 < input.len() && input[i + 1] == b'\n' { + // CRLF → LF + output.push(b'\n'); + i += 2; + } else { + // Lone CR → LF + output.push(b'\n'); + i += 1; + } + } + _ => { + output.push(input[i]); + i += 1; + } + } + } + + output +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + assets::{SCRIPT_NONCE_TOKEN, STYLE_NONCE_TOKEN}, + config, + }; + + #[test] + fn csp() { + let htmls = vec![ + "".to_string(), + "".to_string(), + ]; + + for html in htmls { + let document = parse_doc(html); + let csp = "csp-string"; + inject_csp(&document, csp); + + assert_eq!( + String::from_utf8(serialize_doc(&document)).unwrap(), + format!( + r#""# + ) + ); + } + } + + #[test] + fn normalize_script_for_csp_test() { + let js = "// Copyright 2019-2024 Tauri Programme within The Commons Conservancy\r// SPDX-License-Identifier: Apache-2.0\n// SPDX-License-Identifier: MIT\r\n\r\nwindow.__TAURI_ISOLATION_HOOK__ = (payload, options) => {\r\n return payload\r\n}\r\n"; + let expected = "// Copyright 2019-2024 Tauri Programme within The Commons Conservancy\n// SPDX-License-Identifier: Apache-2.0\n// SPDX-License-Identifier: MIT\n\nwindow.__TAURI_ISOLATION_HOOK__ = (payload, options) => {\n return payload\n}\n"; + + assert_eq!(normalize_script_for_csp(js.as_bytes()), expected.as_bytes()) + } + + #[test] + fn parse_and_serialize_roundtrips() { + let htmls = [ + "Test

Hello

", + "", + ]; + + for html in htmls { + let parsed = parse_doc(html.to_string()); + let serialized = serialize_doc(&parsed); + let result = String::from_utf8(serialized).unwrap(); + + assert_eq!(result, html); + } + } + + #[test] + fn inject_nonce_to_scripts() { + let html = r#""#; + + let document = parse_doc(html.to_string()); + inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false)); + + assert_eq!( + String::from_utf8(serialize_doc(&document)).unwrap(), + format!( + r#""# + ) + ); + } + + #[test] + fn inject_nonce_to_styles() { + let html = r#""#; + + let document = parse_doc(html.to_string()); + inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false)); + + assert_eq!( + String::from_utf8(serialize_doc(&document)).unwrap(), + format!( + r#""# + ) + ); + } + + #[test] + fn append_script_to_head_test() { + let html = r#""#; + + let document = parse_doc(html.to_string()); + append_script_to_head(&document, r#"console.log('Test')"#); + + assert_eq!( + String::from_utf8(serialize_doc(&document)).unwrap(), + format!(r#""#) + ); + } + + #[test] + fn inject_nonce_skips_existing() { + let html = r#""#; + + let document = parse_doc(html.to_string()); + inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(false)); + + assert_eq!(String::from_utf8(serialize_doc(&document)).unwrap(), html); + } + + #[test] + fn inject_nonce_respects_disabled_modification() { + let html = r#""#; + + let document = parse_doc(html.to_string()); + inject_nonce_token(&document, &config::DisabledCspModificationKind::Flag(true)); + + assert_eq!( + String::from_utf8(serialize_doc(&document)).unwrap(), + r#""# + ); + } + + #[test] + #[cfg(feature = "isolation")] + fn inline_isolation_replaces_src_with_content() { + use std::io::Write; + + let temp_dir = tempfile::tempdir().unwrap(); + let mut file = tempfile::NamedTempFile::with_suffix_in(".js", &temp_dir).unwrap(); + file.write_all(b"console.log('test');").unwrap(); + let file_name = file.path().file_name().unwrap().to_str().unwrap(); + + let html = + format!(r#""#); + let document = parse_doc(html); + inline_isolation(&document, temp_dir.path()); + + assert_eq!( + String::from_utf8(serialize_doc(&document)).unwrap(), + r#""# + ); + } +} diff --git a/crates/tauri-utils/src/lib.rs b/crates/tauri-utils/src/lib.rs index 25e5a4f97..3b2a31ef5 100644 --- a/crates/tauri-utils/src/lib.rs +++ b/crates/tauri-utils/src/lib.rs @@ -26,6 +26,8 @@ pub mod config; pub mod config_v1; #[cfg(feature = "html-manipulation")] pub mod html; +#[cfg(feature = "html-manipulation-2")] +pub mod html2; pub mod io; pub mod mime_type; pub mod platform; @@ -33,10 +35,10 @@ pub mod plugin; /// Prepare application resources and sidecars. #[cfg(feature = "resources")] pub mod resources; -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] pub mod tokens; -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] pub mod build; /// Application pattern. diff --git a/crates/tauri-utils/src/platform.rs b/crates/tauri-utils/src/platform.rs index 4ce3d2fac..a83201bdc 100644 --- a/crates/tauri-utils/src/platform.rs +++ b/crates/tauri-utils/src/platform.rs @@ -369,7 +369,7 @@ pub fn bundle_type() -> Option { } } -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] mod build { use proc_macro2::TokenStream; use quote::{quote, ToTokens, TokenStreamExt}; diff --git a/crates/tauri-utils/src/plugin.rs b/crates/tauri-utils/src/plugin.rs index 8f178ca41..9fe35be2d 100644 --- a/crates/tauri-utils/src/plugin.rs +++ b/crates/tauri-utils/src/plugin.rs @@ -3,10 +3,10 @@ // SPDX-License-Identifier: MIT //! Compile-time and runtime types for Tauri plugins. -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] pub use build::*; -#[cfg(feature = "build")] +#[cfg(any(feature = "build", feature = "build-2"))] mod build { use std::{ env::vars_os, diff --git a/crates/tauri/Cargo.toml b/crates/tauri/Cargo.toml index 9326713e5..071076fa4 100644 --- a/crates/tauri/Cargo.toml +++ b/crates/tauri/Cargo.toml @@ -169,7 +169,7 @@ glob = "0.3" heck = "0.5" tauri-build = { path = "../tauri-build/", default-features = false, version = "2.5.6" } tauri-utils = { path = "../tauri-utils/", version = "2.8.3", features = [ - "build", + "build-2", ] } [dev-dependencies] @@ -222,7 +222,7 @@ macos-private-api = [ "tauri-runtime/macos-private-api", "tauri-runtime-wry?/macos-private-api", ] -webview-data-url = ["data-url", "tauri-utils/html-manipulation"] +webview-data-url = ["data-url", "tauri-utils/html-manipulation-2"] protocol-asset = ["http-range"] config-json5 = ["tauri-macros/config-json5"] config-toml = ["tauri-macros/config-toml"] diff --git a/crates/tauri/src/manager/webview.rs b/crates/tauri/src/manager/webview.rs index 98c470b76..4845f4fa6 100644 --- a/crates/tauri/src/manager/webview.rs +++ b/crates/tauri/src/manager/webview.rs @@ -460,9 +460,9 @@ impl WebviewManager { let html = String::from_utf8_lossy(&body).into_owned(); // naive way to check if it's an html if html.contains('<') && html.contains('>') { - let document = tauri_utils::html::parse(html); - tauri_utils::html::inject_csp(&document, &csp.to_string()); - url.set_path(&format!("{},{document}", mime::TEXT_HTML)); + let document = tauri_utils::html2::parse_doc(html); + tauri_utils::html2::inject_csp(&document, &csp.to_string()); + url.set_path(&format!("{},{}", mime::TEXT_HTML, document.html())); } } } diff --git a/crates/tests/acl/Cargo.toml b/crates/tests/acl/Cargo.toml index 0fe970b99..9e4f45b90 100644 --- a/crates/tests/acl/Cargo.toml +++ b/crates/tests/acl/Cargo.toml @@ -11,6 +11,6 @@ rust-version.workspace = true publish = false [dev-dependencies] -tauri-utils = { path = "../../tauri-utils/", features = ["build"] } +tauri-utils = { path = "../../tauri-utils/", features = ["build-2"] } serde_json = "1" insta = "1"