Skip to content

Commit c3c213d

Browse files
authored
fix: correctly handle cjk identifiers in strip_identifier (#138)
* fix: Correct UTF-8 boundary handling in strip_identifier * test: Replace Japanese with Chinese in CJK test Change "変数名" (Japanese) to "变量名" (Chinese) for better CJK coverage. * Reformat the Korean identifier test assertion to improve readability by breaking it across multiple lines.
1 parent 7ebed4d commit c3c213d

1 file changed

Lines changed: 19 additions & 4 deletions

File tree

src/js_identifiers.rs

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,26 +27,29 @@ fn is_valid_continue(c: char) -> bool {
2727
fn strip_identifier(s: &str) -> Option<&str> {
2828
let mut iter = s.char_indices();
2929
// Is the first character a valid starting character
30-
match iter.next() {
30+
let first_char_len = match iter.next() {
3131
Some((_, c)) => {
3232
if !is_valid_start(c) {
3333
return None;
3434
}
35+
c.len_utf8()
3536
}
3637
None => {
3738
return None;
3839
}
3940
};
4041
// Slice up to the last valid continuation character
41-
let mut end_idx = 0;
42+
// Initialize to end of first char to handle single-char and multibyte identifiers correctly
43+
let mut end_idx = first_char_len;
4244
for (i, c) in iter {
4345
if is_valid_continue(c) {
44-
end_idx = i;
46+
// Store the end byte index (start + char length) for proper UTF-8 boundary
47+
end_idx = i + c.len_utf8();
4548
} else {
4649
break;
4750
}
4851
}
49-
Some(&s[..=end_idx])
52+
Some(&s[..end_idx])
5053
}
5154

5255
pub fn is_valid_javascript_identifier(s: &str) -> bool {
@@ -75,11 +78,23 @@ mod tests {
7578
assert!(!is_valid_javascript_identifier("foo "));
7679
assert!(!is_valid_javascript_identifier("[123]"));
7780
assert!(!is_valid_javascript_identifier("foo.bar"));
81+
82+
// Non-ASCII identifiers
83+
assert!(is_valid_javascript_identifier("한글변수"));
84+
assert!(is_valid_javascript_identifier("变量名"));
85+
assert!(is_valid_javascript_identifier("ひらがな"));
86+
7887
// Should these pass?
7988
// assert!(is_valid_javascript_identifier("foo [bar]"));
8089
assert_eq!(get_javascript_token("foo "), Some("foo"));
8190
assert_eq!(get_javascript_token("f _hi"), Some("f"));
8291
assert_eq!(get_javascript_token("foo.bar"), Some("foo"));
8392
assert_eq!(get_javascript_token("[foo,bar]"), None);
93+
assert_eq!(
94+
get_javascript_token("결제사_연결():De"),
95+
Some("결제사_연결")
96+
);
97+
assert_eq!(get_javascript_token("变量名123"), Some("变量名123"));
98+
assert_eq!(get_javascript_token("へんすう_test"), Some("へんすう_test"));
8499
}
85100
}

0 commit comments

Comments
 (0)