From d9f0d8a32865d962fd9b02165a6f077082129551 Mon Sep 17 00:00:00 2001 From: Phodal Huang Date: Thu, 10 Sep 2020 20:39:06 +0800 Subject: [PATCH] feat: split utf string for testing --- scie-scanner/src/scanner/old/mod.rs | 3 +++ scie-scanner/src/scanner/old/scie_onig.rs | 7 ++----- scie-scanner/src/scanner/old/scie_scanner.rs | 2 +- scie-scanner/src/scanner/onig_scanner.rs | 6 +++--- scie-scanner/src/scanner/onig_string.rs | 19 ++++++++++++------- .../vscode-onig/src/lib/utf-string.spec.ts | 7 ++++++- 6 files changed, 27 insertions(+), 17 deletions(-) diff --git a/scie-scanner/src/scanner/old/mod.rs b/scie-scanner/src/scanner/old/mod.rs index e69de29..20e2337 100644 --- a/scie-scanner/src/scanner/old/mod.rs +++ b/scie-scanner/src/scanner/old/mod.rs @@ -0,0 +1,3 @@ +pub mod scie_scanner; +pub mod scie_onig; +pub mod scie_error; \ No newline at end of file diff --git a/scie-scanner/src/scanner/old/scie_onig.rs b/scie-scanner/src/scanner/old/scie_onig.rs index e225001..5c2ef97 100644 --- a/scie-scanner/src/scanner/old/scie_onig.rs +++ b/scie-scanner/src/scanner/old/scie_onig.rs @@ -3,7 +3,7 @@ use std::ptr::null_mut; use onig::{Syntax, EncodedChars}; use std::sync::Mutex; -use crate::scanner::scie_error::ScieOnigError; +use crate::scanner::old::scie_error::ScieOnigError; lazy_static! { static ref REGEX_NEW_MUTEX: Mutex<()> = Mutex::new(()); @@ -21,9 +21,6 @@ pub struct ScieOnig { } impl ScieOnig { - pub fn new(pattern: &str) -> Result { - - } pub fn demo_new(pattern: &str) -> Result { let option = ScieOnigOptions::REGEX_OPTION_NONE; let syntax = Syntax::default(); @@ -69,7 +66,7 @@ impl ScieOnig { #[cfg(test)] mod tests { - use crate::scanner::scie_onig::ScieOnig; + use crate::scanner::old::scie_onig::ScieOnig; #[test] fn it_works() { diff --git a/scie-scanner/src/scanner/old/scie_scanner.rs b/scie-scanner/src/scanner/old/scie_scanner.rs index 8630a3b..0033e72 100644 --- a/scie-scanner/src/scanner/old/scie_scanner.rs +++ b/scie-scanner/src/scanner/old/scie_scanner.rs @@ -137,7 +137,7 @@ pub fn str_vec_to_string(iter: I) -> Vec #[cfg(test)] mod tests { - use crate::scanner::scie_scanner::{str_vec_to_string, ScieScanner}; + use crate::scanner::old::scie_scanner::{ScieScanner, str_vec_to_string}; #[test] fn should_handle_simple_regex() { diff --git a/scie-scanner/src/scanner/onig_scanner.rs b/scie-scanner/src/scanner/onig_scanner.rs index cdf69a0..5fcb2d6 100644 --- a/scie-scanner/src/scanner/onig_scanner.rs +++ b/scie-scanner/src/scanner/onig_scanner.rs @@ -7,9 +7,9 @@ pub struct OnigScanner { impl OnigScanner { pub fn new(pattens: Vec<&str>) -> Self { - let str_ptrs_arr: Vec = vec![]; - let str_len_arr: Vec = vec![]; - for x in pattens { + let _str_ptrs_arr: Vec = vec![]; + let _str_len_arr: Vec = vec![]; + for _x in pattens { } OnigScanner { _ptr: 0 } diff --git a/scie-scanner/src/scanner/onig_string.rs b/scie-scanner/src/scanner/onig_string.rs index bc02226..4d8cc99 100644 --- a/scie-scanner/src/scanner/onig_string.rs +++ b/scie-scanner/src/scanner/onig_string.rs @@ -1,3 +1,5 @@ +use unicode_segmentation::UnicodeSegmentation; + pub struct OnigString { pub utf16length: i32, pub utf8length: i32, @@ -10,12 +12,15 @@ pub struct OnigString { impl OnigString { pub fn new(str: String) -> Self { - let utf16Length = str.len(); - + let utf16length = str.len(); + let utf8_str = str.graphemes(true).collect::>().clone(); + println!("{:?}", utf8_str); + let utf8length = utf8_str.len(); + OnigString { - utf16length: 0, - utf8length: 0, + utf16length: utf16length as i32, + utf8length: utf8length as i32, utf16value: "".to_string(), utf8value: vec![], utf16offset_to_utf8: None, @@ -27,12 +32,12 @@ impl OnigString { #[cfg(test)] mod tests { - use crate::scanner::onig_scanner::OnigScanner; use crate::scanner::onig_string::OnigString; #[test] fn it_show_works_works() { - OnigString::new(String::from("")); - assert!(true) + let onig_string = OnigString::new(String::from("a💻bYX")); + assert_eq!(8, onig_string.utf16length); + assert_eq!(5, onig_string.utf8length); } } diff --git a/vscode-tests/vscode-onig/src/lib/utf-string.spec.ts b/vscode-tests/vscode-onig/src/lib/utf-string.spec.ts index 9a36d9d..9dafaff 100644 --- a/vscode-tests/vscode-onig/src/lib/utf-string.spec.ts +++ b/vscode-tests/vscode-onig/src/lib/utf-string.spec.ts @@ -4,6 +4,11 @@ import UtfString from './utf-string'; test('constructor', (t) => { const utfString = new UtfString("hello, world"); - t.log(utfString); t.is(utfString.utf8Length, 12); }); + +test('utf8 length', (t) => { + const utfString = new UtfString("a💻bYX"); + console.log(utfString); + t.is(utfString.utf8Length, 8); +}); -- GitLab