Skip to content

Commit 61beb43

Browse files
committed
refactor(codegen): print string literals containing lone surrogates without reference to raw
1 parent 22f9406 commit 61beb43

File tree

3 files changed

+32
-13
lines changed

3 files changed

+32
-13
lines changed

crates/oxc_codegen/src/lib.rs

+26-7
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,12 @@ impl<'a> Codegen<'a> {
242242
self.code.print_str(s);
243243
}
244244

245+
/// Push `char` into the buffer.
246+
#[inline]
247+
pub fn print_char(&mut self, ch: char) {
248+
self.code.print_char(ch);
249+
}
250+
245251
/// Print a single [`Expression`], adding it to the code generator's
246252
/// internal buffer. Unlike [`Codegen::build`], this does not consume `self`.
247253
#[inline]
@@ -578,14 +584,7 @@ impl<'a> Codegen<'a> {
578584

579585
fn print_string_literal(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) {
580586
self.add_source_mapping(s.span);
581-
if s.lone_surrogates {
582-
self.print_str(s.raw.unwrap().as_str());
583-
return;
584-
}
585-
self.print_quoted_utf16(s, allow_backtick);
586-
}
587587

588-
fn print_quoted_utf16(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) {
589588
let quote = if self.options.minify {
590589
let mut single_cost: i32 = 0;
591590
let mut double_cost: i32 = 0;
@@ -680,6 +679,26 @@ impl<'a> Codegen<'a> {
680679
}
681680
self.print_ascii_byte(b'$');
682681
}
682+
'\u{FFFD}' if s.lone_surrogates => {
683+
// If `lone_surrogates` is set, string contains lone surrogates which are escaped
684+
// using the lossy replacement character (U+FFFD) as an escape marker.
685+
// The lone surrogate is encoded as `\u{FFFD}XXXX` where `XXXX` is the code point as hex.
686+
let hex1 = chars.next().unwrap();
687+
let hex2 = chars.next().unwrap();
688+
let hex3 = chars.next().unwrap();
689+
let hex4 = chars.next().unwrap();
690+
if [hex1, hex2, hex3, hex4] == ['f', 'f', 'f', 'd'] {
691+
// Actual lossy replacement character
692+
self.print_char('\u{FFFD}');
693+
} else {
694+
// Lossy replacement character representing a lone surrogate
695+
self.print_str("\\u");
696+
self.print_char(hex1);
697+
self.print_char(hex2);
698+
self.print_char(hex3);
699+
self.print_char(hex4);
700+
}
701+
}
683702
_ => self.print_str(c.encode_utf8([0; 4].as_mut())),
684703
}
685704
}

crates/oxc_codegen/tests/integration/esbuild.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -363,15 +363,15 @@ fn test_string() {
363363
test("let x = '\\U000123AB'", "let x = \"U000123AB\";\n");
364364
test("let x = '\\u{123AB}'", "let x = \"\u{123ab}\";\n");
365365
test("let x = '\\uD808\\uDFAB'", "let x = \"\u{123ab}\";\n");
366-
test("let x = '\\uD808'", "let x = '\\uD808';\n"); // lone surrogate
367-
test("let x = '\\uD808X'", "let x = '\\uD808X';\n");
368-
test("let x = '\\uDFAB'", "let x = '\\uDFAB';\n");
369-
test("let x = '\\uDFABX'", "let x = '\\uDFABX';\n");
366+
test("let x = '\\uD808'", "let x = \"\\ud808\";\n"); // lone surrogate
367+
test("let x = '\\uD808X'", "let x = \"\\ud808X\";\n");
368+
test("let x = '\\uDFAB'", "let x = \"\\udfab\";\n");
369+
test("let x = '\\uDFABX'", "let x = \"\\udfabX\";\n");
370370

371371
test("let x = '\\x80'", "let x = \"\u{80}\";\n");
372372
test("let x = '\\xFF'", "let x = \"ÿ\";\n");
373373
test("let x = '\\xF0\\x9F\\x8D\\x95'", "let x = \"ð\u{9f}\u{8d}\u{95}\";\n");
374-
test("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x = '\\uD801\\uDC02\\uDC03\\uD804';\n"); // lossy
374+
test("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x = \"𐐂\\udc03\\ud804\";\n"); // surrogates
375375
}
376376

377377
#[test]

crates/oxc_codegen/tests/integration/unit.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ fn unicode_escape() {
143143
test("console.log('こんにちは');", "console.log(\"こんにちは\");\n");
144144
test("console.log('안녕하세요');", "console.log(\"안녕하세요\");\n");
145145
test("console.log('🧑‍🤝‍🧑');", "console.log(\"🧑‍🤝‍🧑\");\n");
146-
test("console.log(\"\\uD800\\uD801\")", "console.log(\"\\uD800\\uD801\");\n");
146+
test("console.log(\"\\uD800\\uD801\")", "console.log(\"\\ud800\\ud801\");\n");
147147
}
148148

149149
#[test]

0 commit comments

Comments
 (0)