From 6c3da6d2901590eff4daa2d84a21a6883085776f Mon Sep 17 00:00:00 2001 From: aiueo13 <155184777+aiueo13@users.noreply.github.com> Date: Fri, 13 Feb 2026 10:41:43 +0900 Subject: [PATCH] fix(fs): use correct line detection for encodings in `readTextFileLines` (#3273) Co-authored-by: OkaYu --- plugins/fs/api-iife.js | 2 +- plugins/fs/guest-js/index.ts | 11 ++- plugins/fs/src/commands.rs | 169 +++++++++++++++++++++++++++++------ 3 files changed, 152 insertions(+), 30 deletions(-) diff --git a/plugins/fs/api-iife.js b/plugins/fs/api-iife.js index a92f8aaf3..a392145df 100644 --- a/plugins/fs/api-iife.js +++ b/plugins/fs/api-iife.js @@ -1 +1 @@ -if("__TAURI__"in window){var __TAURI_PLUGIN_FS__=function(t){"use strict";function e(t,e,n,i){if("function"==typeof e?t!==e||!i:!e.has(t))throw new TypeError("Cannot read private member from an object whose class did not declare it");return"m"===n?i:"a"===n?i.call(t):i?i.value:e.get(t)}function n(t,e,n,i,o){if("function"==typeof e||!e.has(t))throw new TypeError("Cannot write private member to an object whose class did not declare it");return e.set(t,n),n}var i,o,r,a,s;"function"==typeof SuppressedError&&SuppressedError;const c="__TAURI_TO_IPC_KEY__";class f{constructor(t){i.set(this,void 0),o.set(this,0),r.set(this,[]),a.set(this,void 0),n(this,i,t||(()=>{})),this.id=function(t,e=!1){return window.__TAURI_INTERNALS__.transformCallback(t,e)}((t=>{const s=t.index;if("end"in t)return void(s==e(this,o,"f")?this.cleanupCallback():n(this,a,s));const c=t.message;if(s==e(this,o,"f")){for(e(this,i,"f").call(this,c),n(this,o,e(this,o,"f")+1);e(this,o,"f")in e(this,r,"f");){const t=e(this,r,"f")[e(this,o,"f")];e(this,i,"f").call(this,t),delete e(this,r,"f")[e(this,o,"f")],n(this,o,e(this,o,"f")+1)}e(this,o,"f")===e(this,a,"f")&&this.cleanupCallback()}else e(this,r,"f")[s]=c}))}cleanupCallback(){window.__TAURI_INTERNALS__.unregisterCallback(this.id)}set onmessage(t){n(this,i,t)}get onmessage(){return e(this,i,"f")}[(i=new WeakMap,o=new WeakMap,r=new WeakMap,a=new WeakMap,c)](){return`__CHANNEL__:${this.id}`}toJSON(){return this[c]()}}async function l(t,e={},n){return window.__TAURI_INTERNALS__.invoke(t,e,n)}class u{get rid(){return e(this,s,"f")}constructor(t){s.set(this,void 0),n(this,s,t)}async close(){return l("plugin:resources|close",{rid:this.rid})}}var p,d;function w(t){return{isFile:t.isFile,isDirectory:t.isDirectory,isSymlink:t.isSymlink,size:t.size,mtime:null!==t.mtime?new Date(t.mtime):null,atime:null!==t.atime?new Date(t.atime):null,birthtime:null!==t.birthtime?new Date(t.birthtime):null,readonly:t.readonly,fileAttributes:t.fileAttributes,dev:t.dev,ino:t.ino,mode:t.mode,nlink:t.nlink,uid:t.uid,gid:t.gid,rdev:t.rdev,blksize:t.blksize,blocks:t.blocks}}s=new WeakMap,t.BaseDirectory=void 0,(p=t.BaseDirectory||(t.BaseDirectory={}))[p.Audio=1]="Audio",p[p.Cache=2]="Cache",p[p.Config=3]="Config",p[p.Data=4]="Data",p[p.LocalData=5]="LocalData",p[p.Document=6]="Document",p[p.Download=7]="Download",p[p.Picture=8]="Picture",p[p.Public=9]="Public",p[p.Video=10]="Video",p[p.Resource=11]="Resource",p[p.Temp=12]="Temp",p[p.AppConfig=13]="AppConfig",p[p.AppData=14]="AppData",p[p.AppLocalData=15]="AppLocalData",p[p.AppCache=16]="AppCache",p[p.AppLog=17]="AppLog",p[p.Desktop=18]="Desktop",p[p.Executable=19]="Executable",p[p.Font=20]="Font",p[p.Home=21]="Home",p[p.Runtime=22]="Runtime",p[p.Template=23]="Template",t.SeekMode=void 0,(d=t.SeekMode||(t.SeekMode={}))[d.Start=0]="Start",d[d.Current=1]="Current",d[d.End=2]="End";class h extends u{async read(t){if(0===t.byteLength)return 0;const e=await l("plugin:fs|read",{rid:this.rid,len:t.byteLength}),n=function(t){const e=new Uint8ClampedArray(t),n=e.byteLength;let i=0;for(let t=0;tt instanceof URL?t.toString():t)),options:n,onEvent:o}),a=new L(r);return()=>{a.close()}}return t.FileHandle=h,t.copyFile=async function(t,e,n){if(t instanceof URL&&"file:"!==t.protocol||e instanceof URL&&"file:"!==e.protocol)throw new TypeError("Must be a file URL.");await l("plugin:fs|copy_file",{fromPath:t instanceof URL?t.toString():t,toPath:e instanceof URL?e.toString():e,options:n})},t.create=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");const n=await l("plugin:fs|create",{path:t instanceof URL?t.toString():t,options:e});return new h(n)},t.exists=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");return await l("plugin:fs|exists",{path:t instanceof URL?t.toString():t,options:e})},t.lstat=async function(t,e){return w(await l("plugin:fs|lstat",{path:t instanceof URL?t.toString():t,options:e}))},t.mkdir=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");await l("plugin:fs|mkdir",{path:t instanceof URL?t.toString():t,options:e})},t.open=y,t.readDir=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");return await l("plugin:fs|read_dir",{path:t instanceof URL?t.toString():t,options:e})},t.readFile=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");const n=await l("plugin:fs|read_file",{path:t instanceof URL?t.toString():t,options:e});return n instanceof ArrayBuffer?new Uint8Array(n):Uint8Array.from(n)},t.readTextFile=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");const n=await l("plugin:fs|read_text_file",{path:t instanceof URL?t.toString():t,options:e}),i=n instanceof ArrayBuffer?n:Uint8Array.from(n);return new TextDecoder(e?.encoding??"utf-8").decode(i)},t.readTextFileLines=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");const n=t instanceof URL?t.toString():t;return await Promise.resolve({path:n,rid:null,async next(){null===this.rid&&(this.rid=await l("plugin:fs|read_text_file_lines",{path:n,options:e}));const t=await l("plugin:fs|read_text_file_lines_next",{rid:this.rid}),i=t instanceof ArrayBuffer?new Uint8Array(t):Uint8Array.from(t),o=1===i[i.byteLength-1];if(o)return this.rid=null,{value:null,done:o};return{value:new TextDecoder(e?.encoding??"utf-8").decode(i.slice(0,i.byteLength-1)),done:o}},[Symbol.asyncIterator](){return this}})},t.remove=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");await l("plugin:fs|remove",{path:t instanceof URL?t.toString():t,options:e})},t.rename=async function(t,e,n){if(t instanceof URL&&"file:"!==t.protocol||e instanceof URL&&"file:"!==e.protocol)throw new TypeError("Must be a file URL.");await l("plugin:fs|rename",{oldPath:t instanceof URL?t.toString():t,newPath:e instanceof URL?e.toString():e,options:n})},t.size=async function(t){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");return await l("plugin:fs|size",{path:t instanceof URL?t.toString():t})},t.stat=async function(t,e){return w(await l("plugin:fs|stat",{path:t instanceof URL?t.toString():t,options:e}))},t.truncate=async function(t,e,n){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");await l("plugin:fs|truncate",{path:t instanceof URL?t.toString():t,len:e,options:n})},t.watch=async function(t,e,n){return await R(t,e,{delayMs:2e3,...n})},t.watchImmediate=async function(t,e,n){return await R(t,e,{...n,delayMs:void 0})},t.writeFile=async function(t,e,n){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");if(e instanceof ReadableStream){const i=await y(t,{read:!1,create:!0,write:!0,...n}),o=e.getReader();try{for(;;){const{done:t,value:e}=await o.read();if(t)break;await i.write(e)}}finally{o.releaseLock(),await i.close()}}else await l("plugin:fs|write_file",e,{headers:{path:encodeURIComponent(t instanceof URL?t.toString():t),options:JSON.stringify(n)}})},t.writeTextFile=async function(t,e,n){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");const i=new TextEncoder;await l("plugin:fs|write_text_file",i.encode(e),{headers:{path:encodeURIComponent(t instanceof URL?t.toString():t),options:JSON.stringify(n)}})},t}({});Object.defineProperty(window.__TAURI__,"fs",{value:__TAURI_PLUGIN_FS__})} +if("__TAURI__"in window){var __TAURI_PLUGIN_FS__=function(t){"use strict";function e(t,e,n,i){if("function"==typeof e?t!==e||!i:!e.has(t))throw new TypeError("Cannot read private member from an object whose class did not declare it");return"m"===n?i:"a"===n?i.call(t):i?i.value:e.get(t)}function n(t,e,n,i,o){if("function"==typeof e||!e.has(t))throw new TypeError("Cannot write private member to an object whose class did not declare it");return e.set(t,n),n}var i,o,r,a,s;"function"==typeof SuppressedError&&SuppressedError;const c="__TAURI_TO_IPC_KEY__";class f{constructor(t){i.set(this,void 0),o.set(this,0),r.set(this,[]),a.set(this,void 0),n(this,i,t||(()=>{})),this.id=function(t,e=!1){return window.__TAURI_INTERNALS__.transformCallback(t,e)}((t=>{const s=t.index;if("end"in t)return void(s==e(this,o,"f")?this.cleanupCallback():n(this,a,s));const c=t.message;if(s==e(this,o,"f")){for(e(this,i,"f").call(this,c),n(this,o,e(this,o,"f")+1);e(this,o,"f")in e(this,r,"f");){const t=e(this,r,"f")[e(this,o,"f")];e(this,i,"f").call(this,t),delete e(this,r,"f")[e(this,o,"f")],n(this,o,e(this,o,"f")+1)}e(this,o,"f")===e(this,a,"f")&&this.cleanupCallback()}else e(this,r,"f")[s]=c}))}cleanupCallback(){window.__TAURI_INTERNALS__.unregisterCallback(this.id)}set onmessage(t){n(this,i,t)}get onmessage(){return e(this,i,"f")}[(i=new WeakMap,o=new WeakMap,r=new WeakMap,a=new WeakMap,c)](){return`__CHANNEL__:${this.id}`}toJSON(){return this[c]()}}async function l(t,e={},n){return window.__TAURI_INTERNALS__.invoke(t,e,n)}class u{get rid(){return e(this,s,"f")}constructor(t){s.set(this,void 0),n(this,s,t)}async close(){return l("plugin:resources|close",{rid:this.rid})}}var p,d;function w(t){return{isFile:t.isFile,isDirectory:t.isDirectory,isSymlink:t.isSymlink,size:t.size,mtime:null!==t.mtime?new Date(t.mtime):null,atime:null!==t.atime?new Date(t.atime):null,birthtime:null!==t.birthtime?new Date(t.birthtime):null,readonly:t.readonly,fileAttributes:t.fileAttributes,dev:t.dev,ino:t.ino,mode:t.mode,nlink:t.nlink,uid:t.uid,gid:t.gid,rdev:t.rdev,blksize:t.blksize,blocks:t.blocks}}s=new WeakMap,t.BaseDirectory=void 0,(p=t.BaseDirectory||(t.BaseDirectory={}))[p.Audio=1]="Audio",p[p.Cache=2]="Cache",p[p.Config=3]="Config",p[p.Data=4]="Data",p[p.LocalData=5]="LocalData",p[p.Document=6]="Document",p[p.Download=7]="Download",p[p.Picture=8]="Picture",p[p.Public=9]="Public",p[p.Video=10]="Video",p[p.Resource=11]="Resource",p[p.Temp=12]="Temp",p[p.AppConfig=13]="AppConfig",p[p.AppData=14]="AppData",p[p.AppLocalData=15]="AppLocalData",p[p.AppCache=16]="AppCache",p[p.AppLog=17]="AppLog",p[p.Desktop=18]="Desktop",p[p.Executable=19]="Executable",p[p.Font=20]="Font",p[p.Home=21]="Home",p[p.Runtime=22]="Runtime",p[p.Template=23]="Template",t.SeekMode=void 0,(d=t.SeekMode||(t.SeekMode={}))[d.Start=0]="Start",d[d.Current=1]="Current",d[d.End=2]="End";class h extends u{async read(t){if(0===t.byteLength)return 0;const e=await l("plugin:fs|read",{rid:this.rid,len:t.byteLength}),n=function(t){const e=new Uint8ClampedArray(t),n=e.byteLength;let i=0;for(let t=0;tt instanceof URL?t.toString():t)),options:n,onEvent:o}),a=new g(r);return()=>{a.close()}}return t.FileHandle=h,t.copyFile=async function(t,e,n){if(t instanceof URL&&"file:"!==t.protocol||e instanceof URL&&"file:"!==e.protocol)throw new TypeError("Must be a file URL.");await l("plugin:fs|copy_file",{fromPath:t instanceof URL?t.toString():t,toPath:e instanceof URL?e.toString():e,options:n})},t.create=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");const n=await l("plugin:fs|create",{path:t instanceof URL?t.toString():t,options:e});return new h(n)},t.exists=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");return await l("plugin:fs|exists",{path:t instanceof URL?t.toString():t,options:e})},t.lstat=async function(t,e){return w(await l("plugin:fs|lstat",{path:t instanceof URL?t.toString():t,options:e}))},t.mkdir=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");await l("plugin:fs|mkdir",{path:t instanceof URL?t.toString():t,options:e})},t.open=y,t.readDir=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");return await l("plugin:fs|read_dir",{path:t instanceof URL?t.toString():t,options:e})},t.readFile=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");const n=await l("plugin:fs|read_file",{path:t instanceof URL?t.toString():t,options:e});return n instanceof ArrayBuffer?new Uint8Array(n):Uint8Array.from(n)},t.readTextFile=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");const n=await l("plugin:fs|read_text_file",{path:t instanceof URL?t.toString():t,options:e}),i=n instanceof ArrayBuffer?n:Uint8Array.from(n);return new TextDecoder(e?.encoding??"utf-8").decode(i)},t.readTextFileLines=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");const n=t instanceof URL?t.toString():t;return await Promise.resolve({path:n,rid:null,async next(){const t=new TextDecoder(e?.encoding??"utf-8");if(null===this.rid){const i=t.encoding;this.rid=await l("plugin:fs|read_text_file_lines",{path:n,options:null!=e?{...e,encoding:i}:void 0})}const i=await l("plugin:fs|read_text_file_lines_next",{rid:this.rid}),o=i instanceof ArrayBuffer?new Uint8Array(i):Uint8Array.from(i),r=1===o[o.byteLength-1];if(r)return this.rid=null,{value:null,done:r};return{value:t.decode(o.slice(0,o.byteLength-1)),done:r}},[Symbol.asyncIterator](){return this}})},t.remove=async function(t,e){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");await l("plugin:fs|remove",{path:t instanceof URL?t.toString():t,options:e})},t.rename=async function(t,e,n){if(t instanceof URL&&"file:"!==t.protocol||e instanceof URL&&"file:"!==e.protocol)throw new TypeError("Must be a file URL.");await l("plugin:fs|rename",{oldPath:t instanceof URL?t.toString():t,newPath:e instanceof URL?e.toString():e,options:n})},t.size=async function(t){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");return await l("plugin:fs|size",{path:t instanceof URL?t.toString():t})},t.stat=async function(t,e){return w(await l("plugin:fs|stat",{path:t instanceof URL?t.toString():t,options:e}))},t.truncate=async function(t,e,n){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");await l("plugin:fs|truncate",{path:t instanceof URL?t.toString():t,len:e,options:n})},t.watch=async function(t,e,n){return await L(t,e,{delayMs:2e3,...n})},t.watchImmediate=async function(t,e,n){return await L(t,e,{...n,delayMs:void 0})},t.writeFile=async function(t,e,n){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");if(e instanceof ReadableStream){const i=await y(t,{read:!1,create:!0,write:!0,...n}),o=e.getReader();try{for(;;){const{done:t,value:e}=await o.read();if(t)break;await i.write(e)}}finally{o.releaseLock(),await i.close()}}else await l("plugin:fs|write_file",e,{headers:{path:encodeURIComponent(t instanceof URL?t.toString():t),options:JSON.stringify(n)}})},t.writeTextFile=async function(t,e,n){if(t instanceof URL&&"file:"!==t.protocol)throw new TypeError("Must be a file URL.");const i=new TextEncoder;await l("plugin:fs|write_text_file",i.encode(e),{headers:{path:encodeURIComponent(t instanceof URL?t.toString():t),options:JSON.stringify(n)}})},t}({});Object.defineProperty(window.__TAURI__,"fs",{value:__TAURI_PLUGIN_FS__})} diff --git a/plugins/fs/guest-js/index.ts b/plugins/fs/guest-js/index.ts index 64e53fa31..8a5d99752 100644 --- a/plugins/fs/guest-js/index.ts +++ b/plugins/fs/guest-js/index.ts @@ -812,10 +812,15 @@ async function readTextFileLines( rid: null as number | null, async next(): Promise> { + const decoder = new TextDecoder(options?.encoding ?? 'utf-8') + if (this.rid === null) { + // Use the normalized encoding label for options. + const encoding = decoder.encoding + this.rid = await invoke('plugin:fs|read_text_file_lines', { path: pathStr, - options + options: options != null ? { ...options, encoding } : undefined }) } @@ -840,9 +845,7 @@ async function readTextFileLines( return { value: null, done } } - const line = new TextDecoder(options?.encoding ?? 'utf-8').decode( - bytes.slice(0, bytes.byteLength - 1) - ) + const line = decoder.decode(bytes.slice(0, bytes.byteLength - 1)) return { value: line, diff --git a/plugins/fs/src/commands.rs b/plugins/fs/src/commands.rs index ad837a4c0..93e861a20 100644 --- a/plugins/fs/src/commands.rs +++ b/plugins/fs/src/commands.rs @@ -393,6 +393,14 @@ pub async fn read_file( .await } +#[derive(Debug, Default, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ReadTextFileOptions { + #[serde(flatten)] + base: BaseOptions, + encoding: Option, +} + // TODO, remove in v3, rely on `read_file` command instead #[tauri::command] pub async fn read_text_file( @@ -419,7 +427,7 @@ pub fn read_text_file_lines( global_scope: GlobalScope, command_scope: CommandScope, path: SafeFilePath, - options: Option, + options: Option, ) -> CommandResult { let resolved_path = resolve_path( "read-text-file-lines", @@ -427,7 +435,7 @@ pub fn read_text_file_lines( &global_scope, &command_scope, path, - options.as_ref().and_then(|o| o.base_dir), + options.as_ref().and_then(|o| o.base.base_dir), )?; let file = File::open(&resolved_path).map_err(|e| { @@ -437,12 +445,43 @@ pub fn read_text_file_lines( ) })?; + let encoding = options.as_ref().and_then(|o| o.encoding.as_deref()); + let (lf_bytes, cr_bytes) = lf_cr_bytes_for_encoding_label(encoding); let lines = BufReader::new(file); - let rid = webview.resources_table().add(StdLinesResource::new(lines)); + let rid = webview + .resources_table() + .add(StdLinesResource::new(lines, lf_bytes, cr_bytes)); Ok(rid) } +/// Returns the byte sequences for LF (`\n`) and CR (`\r`) in the encoding label. +/// +/// The provided encoding label must be a normalized, lowercase string, +/// such as one obtained via `(new TextDecoder(encoding)).encoding`. +/// +/// +fn lf_cr_bytes_for_encoding_label(label: Option<&str>) -> (Vec, Vec) { + // Defaults to utf-8 + // https://developer.mozilla.org/ja/docs/Web/API/TextDecoder/TextDecoder#label + let label = label.unwrap_or("utf-8"); + + // Currently, according to the Web Standard, + // the ASCII-incompatible encodings are UTF-16LE/BE and ISO-2022-JP. + // However, ISO-2022-JP can still detect line breaks in the same way as ASCII. + // + // https://encoding.spec.whatwg.org/#security-background + if label == "utf-16le" { + return (vec![0x0A, 0x00], vec![0x0D, 0x00]); + } + if label == "utf-16be" { + return (vec![0x00, 0x0A], vec![0x00, 0x0D]); + } + + // ASCII-compatible + (vec![b'\n'], vec![b'\r']) +} + #[tauri::command] pub async fn read_text_file_lines_next( webview: Webview, @@ -1203,22 +1242,39 @@ impl StdFileResource { impl Resource for StdFileResource {} /// Same as [std::io::Lines] but with bytes -struct LinesBytes(T); +struct LinesBytes { + bytes: T, + lf_bytes: Vec, + cr_bytes: Vec, +} + +impl LinesBytes { + fn new(bytes: T, lf_bytes: Vec, cr_bytes: Vec) -> Self { + LinesBytes { + bytes, + lf_bytes, + cr_bytes, + } + } +} impl Iterator for LinesBytes { type Item = std::io::Result>; fn next(&mut self) -> Option>> { let mut buf = Vec::new(); - match self.0.read_until(b'\n', &mut buf) { + // Search for '\n' + match read_until_bytes(&mut self.bytes, &self.lf_bytes, &mut buf) { Ok(0) => None, Ok(_n) => { - if buf.last() == Some(&b'\n') { - buf.pop(); - if buf.last() == Some(&b'\r') { - buf.pop(); + // Remove '\n' or '\r\n' + if buf.ends_with(&self.lf_bytes) { + buf.truncate(buf.len() - self.lf_bytes.len()); + if buf.ends_with(&self.cr_bytes) { + buf.truncate(buf.len() - self.cr_bytes.len()); } } + Some(Ok(buf)) } Err(e) => Some(Err(e)), @@ -1226,11 +1282,35 @@ impl Iterator for LinesBytes { } } +fn read_until_bytes( + r: &mut impl BufRead, + bytes: &[u8], + buf: &mut Vec, +) -> std::io::Result { + let last_byte = *bytes + .last() + .ok_or_else(|| std::io::Error::other("invalid empty bytes"))?; + + if bytes.len() == 1 { + return r.read_until(last_byte, buf); + } + + let mut total_n = 0; + loop { + let n = r.read_until(last_byte, buf)?; + total_n += n; + + if n == 0 || buf.ends_with(bytes) { + return Ok(total_n); + } + } +} + struct StdLinesResource(Mutex>>); impl StdLinesResource { - fn new(lines: BufReader) -> Self { - Self(Mutex::new(LinesBytes(lines))) + fn new(lines: BufReader, lf_bytes: Vec, cr_bytes: Vec) -> Self { + Self(Mutex::new(LinesBytes::new(lines, lf_bytes, cr_bytes))) } fn with_lock>) -> R>(&self, mut f: F) -> R { @@ -1354,21 +1434,60 @@ mod test { #[test] fn test_lines_bytes() { - let base = String::from("line 1\nline2\nline 3\nline 4"); - let bytes = base.as_bytes(); + // UTF-8 + { + let base = String::from("line 1\nline2\nline 3\r\nline 4"); + let bytes = base.as_bytes(); - let string1 = base.lines().collect::(); - let string2 = BufReader::new(bytes) - .lines() - .map_while(Result::ok) - .collect::(); - let string3 = LinesBytes(BufReader::new(bytes)) - .flatten() - .flat_map(String::from_utf8) - .collect::(); + let string1 = base.lines().collect::(); + let string2 = BufReader::new(bytes) + .lines() + .map_while(Result::ok) + .collect::(); + let string3 = LinesBytes::new(BufReader::new(bytes), vec![b'\n'], vec![b'\r']) + .flatten() + .flat_map(String::from_utf8) + .collect::(); - assert_eq!(string1, string2); - assert_eq!(string1, string3); - assert_eq!(string2, string3); + assert_eq!(string1, string2); + assert_eq!(string1, string3); + assert_eq!(string2, string3); + } + + // UTF-16 LE + { + fn utf16(text: &str) -> Vec { + text.encode_utf16().flat_map(|u| u.to_le_bytes()).collect() + } + + let base = String::from("line 1\nline2\nline 3\r\nline 4\n"); + let bytes = utf16(&base); + + let mut lines = LinesBytes::new(BufReader::new(&bytes[..]), utf16("\n"), utf16("\r")); + assert_eq!(lines.next().map(Result::unwrap), Some(utf16("line 1"))); + assert_eq!(lines.next().map(Result::unwrap), Some(utf16("line2"))); + assert_eq!(lines.next().map(Result::unwrap), Some(utf16("line 3"))); + assert_eq!(lines.next().map(Result::unwrap), Some(utf16("line 4"))); + assert!(lines.next().is_none()); + } + + // UTF-16 BE + { + fn utf16(text: &str) -> Vec { + text.encode_utf16().flat_map(|u| u.to_be_bytes()).collect() + } + + // ਗ (U+0A17) encodes to 0x0A 0x17, + // which contains 0x0A but is not a line feed (U+000A = 0x00 0x0A). + let base = String::from("line 1\nline2ਗ\nline 3\r\nline 4"); + let bytes = utf16(&base); + + let mut lines = LinesBytes::new(BufReader::new(&bytes[..]), utf16("\n"), utf16("\r")); + assert_eq!(lines.next().map(Result::unwrap), Some(utf16("line 1"))); + assert_eq!(lines.next().map(Result::unwrap), Some(utf16("line2ਗ"))); + assert_eq!(lines.next().map(Result::unwrap), Some(utf16("line 3"))); + assert_eq!(lines.next().map(Result::unwrap), Some(utf16("line 4"))); + assert!(lines.next().is_none()); + } } }