Skip to content

Commit d78c04e

Browse files
authored
feat(codegen): Revert "feat(codegen)!: print StringLiteral raw if minify option disabled (#10553) (#10569)
This reverts commit 10e1018. After experimenting with downstream crates like Rolldown, I discovered this introduces a footgun. It's really easy to change `StringLiteral::value` without also changing `StringLiteral::raw`, which results in incorrect codegen. After some consideration, I've decided against introducing an extra option for printing the raw value, it adds a significant amount of maintenance burden. (This issue alone is already consuming a lot of our time). We will need to three modes in the codegen: normal, minify and raw. For now, let's assume our codegen will not preserve the original source text. If preserving the original source text is a requirement, please use magic string or any other direct string manipulation methods.
1 parent f5332bf commit d78c04e

File tree

3 files changed

+63
-204
lines changed

3 files changed

+63
-204
lines changed

crates/oxc_codegen/src/str.rs

-92
Original file line numberDiff line numberDiff line change
@@ -28,98 +28,6 @@ impl Codegen<'_> {
2828
pub(crate) fn print_string_literal(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) {
2929
self.add_source_mapping(s.span);
3030

31-
if !self.options.minify {
32-
if let Some(raw) = s.raw {
33-
self.print_string_literal_raw(&raw);
34-
return;
35-
}
36-
}
37-
38-
self.print_string_literal_encode(s, allow_backtick);
39-
}
40-
41-
/// Print a [`StringLiteral`], from its `raw` representation.
42-
///
43-
/// Only change made is to change quotes to what's specified in `options.single_quote`.
44-
fn print_string_literal_raw(&mut self, raw: &str) {
45-
let quote = self.quote;
46-
let raw_bytes = raw.as_bytes();
47-
48-
// Assertion here should remove the bounds checks from `raw_bytes[0]`,
49-
// `raw_bytes[raw_bytes.len() - 1]` and `&raw_bytes[1..raw_bytes.len() - 1]` below
50-
assert!(raw_bytes.len() >= 2);
51-
52-
// If `raw` already has desired quotes, print `raw` unchanged
53-
if raw_bytes[0] == quote as u8 {
54-
self.print_str(raw);
55-
return;
56-
}
57-
58-
// Escape quotes
59-
60-
// Cut off quotes from start and end of `raw`.
61-
// Check the last char of `raw` is ASCII. This ensures that trimmed `raw_bytes` is a valid
62-
// UTF-8 string, and doesn't end with an unfinished part of a unicode byte sequence.
63-
// This ensures the safety of `print_bytes_unchecked` calls below.
64-
// We have to do this check because we don't have a static guarantee that `raw` starts and ends
65-
// with an ASCII quote. Without this check, a bug in parser could cause undefined behavior here.
66-
assert!(raw_bytes[raw_bytes.len() - 1].is_ascii());
67-
let raw_bytes = &raw_bytes[1..raw_bytes.len() - 1];
68-
let mut bytes = raw_bytes.iter();
69-
let mut chunk_start = bytes.as_slice().as_ptr();
70-
71-
quote.print(self);
72-
73-
while let Some(&byte) = bytes.clone().next() {
74-
if byte == quote as u8 {
75-
// Print up to before quote, print slash, and start next chunk on the quote,
76-
// so quote gets pushed in next chunk.
77-
// Note: `byte` was peeked, not consumed, so `quote_ptr` points to the quote, not after it.
78-
let quote_ptr = bytes.as_slice().as_ptr();
79-
// SAFETY: `chunk_start` points to either start of string content or an ASCII quote char.
80-
// Either way, that's on a UTF-8 char boundary, and in bounds of `raw_bytes`.
81-
// `quote_ptr` points to an ASCII quote char, so also on a UTF-8 char boundary.
82-
// `quote_ptr >= chunk_start` because `bytes` only gets advanced,
83-
// and `chunk_start` is either start of string or a previous `quote_ptr`.
84-
unsafe {
85-
let chunk_len = quote_ptr.offset_from(chunk_start);
86-
let chunk_len = usize::try_from(chunk_len).unwrap_unchecked();
87-
let chunk = slice::from_raw_parts(chunk_start, chunk_len);
88-
self.code.print_bytes_unchecked(chunk);
89-
}
90-
self.print_ascii_byte(b'\\');
91-
chunk_start = quote_ptr;
92-
93-
// Consume the quote
94-
bytes.next().unwrap();
95-
} else if byte == b'\\' {
96-
// Consume slash and next byte.
97-
// Next byte might be an escaped quote which don't want to escape again e.g. `\"`.
98-
bytes.next().unwrap();
99-
bytes.next().unwrap();
100-
} else {
101-
// Consume the peeked byte
102-
bytes.next().unwrap();
103-
}
104-
}
105-
106-
// SAFETY: `chunk_start` points to either start of string content or an ASCII quote character.
107-
// Either way, that's on a UTF-8 char boundary, and in bounds of `raw_bytes`.
108-
// `bytes` is exhausted, so `bytes.as_slice().as_ptr()` points to end of `raw_bytes`.
109-
// `chunk_start` must be before it, or (if string is empty) equal to it.
110-
unsafe {
111-
let end_ptr = bytes.as_slice().as_ptr();
112-
let chunk_len = end_ptr.offset_from(chunk_start);
113-
let chunk_len = usize::try_from(chunk_len).unwrap_unchecked();
114-
let chunk = slice::from_raw_parts(chunk_start, chunk_len);
115-
self.code.print_bytes_unchecked(chunk);
116-
}
117-
118-
quote.print(self);
119-
}
120-
121-
/// Print a [`StringLiteral`], re-encoding from its `value`.
122-
fn print_string_literal_encode(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) {
12331
// If `minify` option enabled, quote will be chosen depending on what produces shortest output.
12432
// What is the best quote to use will be determined when first character needing escape is found.
12533
// This avoids iterating through the string twice if it contains no quotes (common case).

crates/oxc_codegen/tests/integration/esbuild.rs

+44-81
Original file line numberDiff line numberDiff line change
@@ -333,89 +333,52 @@ fn test_nullish() {
333333

334334
#[test]
335335
fn test_string() {
336-
// No `minify` option
337-
338-
// Prints double-quoted strings as in original
339-
test("let x = \"\"", "let x = \"\";\n");
340-
test("let x = \"abc\"", "let x = \"abc\";\n");
341-
test("let x = \"\t\"", "let x = \"\t\";\n");
342-
test("let x = \"\\t\"", "let x = \"\\t\";\n");
343-
344-
// Converts single quote to double
345336
test("let x = ''", "let x = \"\";\n");
346-
test("let x = 'abc'", "let x = \"abc\";\n");
347-
test("let x = '\"'", "let x = \"\\\"\";\n");
348-
test("let x = 'abc\"'", "let x = \"abc\\\"\";\n");
349-
test("let x = 'abc\"\"\"'", "let x = \"abc\\\"\\\"\\\"\";\n");
350-
test("let x = '\"def'", "let x = \"\\\"def\";\n");
351-
test("let x = '\"\"\"def'", "let x = \"\\\"\\\"\\\"def\";\n");
352-
test("let x = 'abc\"def'", "let x = \"abc\\\"def\";\n");
353-
test("let x = 'abc\"\"\"def\"\"\"ghi'", "let x = \"abc\\\"\\\"\\\"def\\\"\\\"\\\"ghi\";\n");
354-
// Does not double-escape already-escaped quotes
355-
test("let x = '\\\"'", "let x = \"\\\"\";\n");
356-
test("let x = 'abc\\\"\\\"'", "let x = \"abc\\\"\\\"\";\n");
357-
test("let x = '\\\"\\\"def'", "let x = \"\\\"\\\"def\";\n");
358-
test("let x = 'abc\\\"\\\"def'", "let x = \"abc\\\"\\\"def\";\n");
359-
test("let x = '\\r\\n\"'", "let x = \"\\r\\n\\\"\";\n");
360-
test("let x = '\\\\\"'", "let x = \"\\\\\\\"\";\n");
361-
test("let x = '\\\\\\\"'", "let x = \"\\\\\\\"\";\n");
362-
// Does not escape other characters
337+
test("let x = '\\b'", "let x = \"\\b\";\n");
338+
test("let x = '\\f'", "let x = \"\\f\";\n");
363339
test("let x = '\t'", "let x = \"\t\";\n");
364-
// Prints other escapes as in original
365-
test("let x = '\\t'", "let x = \"\\t\";\n");
366-
test("let x = '\\x41'", "let x = \"\\x41\";\n");
367-
test("let x = '\\u{41}'", "let x = \"\\u{41}\";\n");
368-
test("let x = '\\uD800'", "let x = \"\\uD800\";\n");
369-
test("let x = '\\uD801\\uDC02'", "let x = \"\\uD801\\uDC02\";\n");
370-
371-
// `minify` option
372-
373-
// Escapes characters and chooses best quote character
374-
test_minify("let x = ''", "let x=``;");
375-
test_minify("let x = '\\b'", "let x=`\\b`;");
376-
test_minify("let x = '\\f'", "let x=`\\f`;");
377-
test_minify("let x = '\t'", "let x=`\t`;");
378-
test_minify("let x = '\\v'", "let x=`\\v`;");
379-
test_minify("let x = '\\n'", "let x=`\n`;");
380-
test_minify("let x = '\\r'", "let x=`\\r`;");
381-
test_minify("let x = '\\r\\n'", "let x=`\\r\n`;");
382-
test_minify("let x = '\\''", "let x=`'`;");
383-
test_minify("let x = '\"'", "let x=`\"`;");
384-
test_minify("let x = '`'", "let x=\"`\";");
385-
test_minify("let x = '\\'\"'", "let x=`'\"`;");
386-
test_minify("let x = '\\'`'", "let x=\"'`\";");
387-
test_minify("let x = '\"`'", "let x='\"`';");
388-
test_minify("let x = '\\\\'", "let x=`\\\\`;");
389-
test_minify("let x = '\x00'", "let x=`\\0`;");
390-
test_minify("let x = '\x00!'", "let x=`\\0!`;");
391-
test_minify("let x = '\x001'", "let x=`\\x001`;");
392-
test_minify("let x = '\\0'", "let x=`\\0`;");
393-
test_minify("let x = '\\0!'", "let x=`\\0!`;");
394-
test_minify("let x = '\x07'", "let x=`\\x07`;");
395-
test_minify("let x = '\x07!'", "let x=`\\x07!`;");
396-
test_minify("let x = '\x071'", "let x=`\\x071`;");
397-
test_minify("let x = '\\7'", "let x=`\\x07`;");
398-
test_minify("let x = '\\7!'", "let x=`\\x07!`;");
399-
test_minify("let x = '\\01'", "let x=`\x01`;");
400-
test_minify("let x = '\x10'", "let x=`\x10`;");
401-
test_minify("let x = '\\x10'", "let x=`\x10`;");
402-
test_minify("let x = '\x1B'", "let x=`\\x1B`;");
403-
test_minify("let x = '\\x1B'", "let x=`\\x1B`;");
404-
test_minify("let x = '\\x41'", "let x=`A`;");
405-
test_minify("let x = '\u{ABCD}'", "let x=`\u{ABCD}`;");
406-
test_minify("let x = '\\uABCD'", "let x=`\u{ABCD}`;");
407-
test_minify("let x = '\\U000123AB'", "let x=`U000123AB`;");
408-
test_minify("let x = '\\u{123AB}'", "let x=`\u{123ab}`;");
409-
test_minify("let x = '\\u{41}'", "let x=`A`;");
410-
test_minify("let x = '\\uD808\\uDFAB'", "let x=`\u{123ab}`;");
411-
test_minify("let x = '\\uD808'", "let x=`\\ud808`;"); // lone surrogate
412-
test_minify("let x = '\\uD808X'", "let x=`\\ud808X`;");
413-
test_minify("let x = '\\uDFAB'", "let x=`\\udfab`;");
414-
test_minify("let x = '\\uDFABX'", "let x=`\\udfabX`;");
415-
test_minify("let x = '\\x80'", "let x=`\u{80}`;");
416-
test_minify("let x = '\\xFF'", "let x=`ÿ`;");
417-
test_minify("let x = '\\xF0\\x9F\\x8D\\x95'", "let x=`ð\u{9f}\u{8d}\u{95}`;");
418-
test_minify("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x=`𐐂\\udc03\\ud804`;"); // surrogates
340+
test("let x = '\\v'", "let x = \"\\v\";\n");
341+
test("let x = '\\n'", "let x = \"\\n\";\n");
342+
test("let x = '\\r'", "let x = \"\\r\";\n");
343+
test("let x = '\\r\\n'", "let x = \"\\r\\n\";\n");
344+
test("let x = '\\''", "let x = \"'\";\n");
345+
test("let x = '\"'", "let x = \"\\\"\";\n");
346+
test("let x = '`'", "let x = \"`\";\n");
347+
test("let x = '\\'\"'", "let x = \"'\\\"\";\n");
348+
test("let x = '\\'`'", "let x = \"'`\";\n");
349+
test("let x = '\"`'", "let x = \"\\\"`\";\n");
350+
test("let x = '\\\\'", "let x = \"\\\\\";\n");
351+
test("let x = '\x00'", "let x = \"\\0\";\n");
352+
test("let x = '\x00!'", "let x = \"\\0!\";\n");
353+
test("let x = '\x001'", "let x = \"\\x001\";\n");
354+
test("let x = '\\0'", "let x = \"\\0\";\n");
355+
test("let x = '\\0!'", "let x = \"\\0!\";\n");
356+
test("let x = '\x07'", "let x = \"\\x07\";\n");
357+
test("let x = '\x07!'", "let x = \"\\x07!\";\n");
358+
test("let x = '\x071'", "let x = \"\\x071\";\n");
359+
test("let x = '\\7'", "let x = \"\\x07\";\n");
360+
test("let x = '\\7!'", "let x = \"\\x07!\";\n");
361+
test("let x = '\\01'", "let x = \"\x01\";\n");
362+
test("let x = '\x10'", "let x = \"\x10\";\n");
363+
test("let x = '\\x10'", "let x = \"\x10\";\n");
364+
test("let x = '\x1B'", "let x = \"\\x1B\";\n");
365+
test("let x = '\\x1B'", "let x = \"\\x1B\";\n");
366+
test("let x = '\\x41'", "let x = \"A\";\n");
367+
test("let x = '\u{ABCD}'", "let x = \"\u{ABCD}\";\n");
368+
test("let x = '\\uABCD'", "let x = \"\u{ABCD}\";\n");
369+
test("let x = '\\U000123AB'", "let x = \"U000123AB\";\n");
370+
test("let x = '\\u{123AB}'", "let x = \"\u{123ab}\";\n");
371+
test("let x = '\\u{41}'", "let x = \"A\";\n");
372+
test("let x = '\\uD808\\uDFAB'", "let x = \"\u{123ab}\";\n");
373+
test("let x = '\\uD808'", "let x = \"\\ud808\";\n"); // lone surrogate
374+
test("let x = '\\uD808X'", "let x = \"\\ud808X\";\n");
375+
test("let x = '\\uDFAB'", "let x = \"\\udfab\";\n");
376+
test("let x = '\\uDFABX'", "let x = \"\\udfabX\";\n");
377+
378+
test("let x = '\\x80'", "let x = \"\u{80}\";\n");
379+
test("let x = '\\xFF'", "let x = \"ÿ\";\n");
380+
test("let x = '\\xF0\\x9F\\x8D\\x95'", "let x = \"ð\u{9f}\u{8d}\u{95}\";\n");
381+
test("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x = \"𐐂\\udc03\\ud804\";\n"); // surrogates
419382
}
420383

421384
#[test]

crates/oxc_codegen/tests/integration/unit.rs

+19-31
Original file line numberDiff line numberDiff line change
@@ -150,13 +150,7 @@ fn unicode_escape() {
150150
test("console.log('こんにちは');", "console.log(\"こんにちは\");\n");
151151
test("console.log('안녕하세요');", "console.log(\"안녕하세요\");\n");
152152
test("console.log('🧑‍🤝‍🧑');", "console.log(\"🧑‍🤝‍🧑\");\n");
153-
test("console.log(\"\\uD800\\uD801\")", "console.log(\"\\uD800\\uD801\");\n");
154-
155-
test_minify("console.log('你好');", "console.log(`你好`);");
156-
test_minify("console.log('こんにちは');", "console.log(`こんにちは`);");
157-
test_minify("console.log('안녕하세요');", "console.log(`안녕하세요`);");
158-
test_minify("console.log('🧑‍🤝‍🧑');", "console.log(`🧑‍🤝‍🧑`);");
159-
test_minify("console.log(\"\\uD800\\uD801\")", "console.log(`\\ud800\\ud801`);");
153+
test("console.log(\"\\uD800\\uD801\")", "console.log(\"\\ud800\\ud801\");\n");
160154
}
161155

162156
#[test]
@@ -517,59 +511,53 @@ fn getter_setter() {
517511

518512
#[test]
519513
fn string() {
520-
// Uses quotes as requested in options
521-
let single_quote = CodegenOptions { single_quote: true, ..CodegenOptions::default() };
522-
test_options("let x = \"'\";", "let x = '\\'';\n", single_quote);
523-
let double_quote = CodegenOptions { single_quote: false, ..CodegenOptions::default() };
524-
test_options("let x = '\\\"';", "let x = \"\\\"\";\n", double_quote);
525-
526514
// `${` only escaped when quote is backtick
527-
test("let x = '${}';", "let x = \"${}\";\n");
528-
test_minify("let x = '${}';", "let x=\"${}\";");
515+
test("let x = \"${}\";", "let x = \"${}\";\n");
516+
test_minify("let x = \"${}\";", "let x=\"${}\";");
529517
test("let x = '\"\"${}';", "let x = \"\\\"\\\"${}\";\n");
530518
test_minify("let x = '\"\"${}';", "let x='\"\"${}';");
531-
test("let x = '\"\"\\'\\'${}';", "let x = \"\\\"\\\"\\'\\'${}\";\n");
519+
test("let x = '\"\"\\'\\'${}';", "let x = \"\\\"\\\"''${}\";\n");
532520
test_minify("let x = '\"\"\\'\\'${}';", "let x=`\"\"''\\${}`;");
533521
test_minify("let x = '\\'\\'\\'\"\"\"${}';", "let x=`'''\"\"\"\\${}`;");
534522

535523
// Lossy replacement character
536-
test("let x = '\\u{FFFD}';", "let x = \"\\u{FFFD}\";\n");
524+
test("let x = \"\\u{FFFD}\";", "let x = \"\";\n");
537525
test_minify("let x = \"\\u{FFFD}\";", "let x=`��`;");
538526
test(
539-
"let x = '� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �';",
540-
"let x = \"� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD}\";\n",
527+
"let x = \"� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �\";",
528+
"let x = \"� ��� � ���\";\n",
541529
);
542530
test_minify(
543-
"let x = '� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �';",
531+
"let x = \"� ��� \\u{FFFD} \\u{FFFD}\\u{FFFD}\\u{FFFD} �\";",
544532
"let x=`� ��� � ��� �`;",
545533
);
546534
// Lone surrogates
547535
test(
548-
"let x = '\\uD800 \\uDBFF \\uDC00 \\uDFFF';",
549-
"let x = \"\\uD800 \\uDBFF \\uDC00 \\uDFFF\";\n",
536+
"let x = \"\\uD800 \\uDBFF \\uDC00 \\uDFFF\";",
537+
"let x = \"\\ud800 \\udbff \\udc00 \\udfff\";\n",
550538
);
551539
test_minify(
552-
"let x = '\\uD800 \\uDBFF \\uDC00 \\uDFFF';",
540+
"let x = \"\\uD800 \\uDBFF \\uDC00 \\uDFFF\";",
553541
"let x=`\\ud800 \\udbff \\udc00 \\udfff`;",
554542
);
555-
test("let x = '\\uD800\\u{41}';", "let x = \"\\uD800\\u{41}\";\n");
556-
test_minify("let x = '\\uD800\\u{41}';", "let x=`\\ud800A`;");
543+
test("let x = \"\\uD800\u{41}\";", "let x = \"\\ud800A\";\n");
544+
test_minify("let x = \"\\uD800\u{41}\";", "let x=`\\ud800A`;");
557545
// Invalid pairs
558546
test(
559-
"let x = '\\uD800\\uDBFF \\uDC00\\uDFFF';",
560-
"let x = \"\\uD800\\uDBFF \\uDC00\\uDFFF\";\n",
547+
"let x = \"\\uD800\\uDBFF \\uDC00\\uDFFF\";",
548+
"let x = \"\\ud800\\udbff \\udc00\\udfff\";\n",
561549
);
562550
test_minify(
563-
"let x = '\\uD800\\uDBFF \\uDC00\\uDFFF';",
551+
"let x = \"\\uD800\\uDBFF \\uDC00\\uDFFF\";",
564552
"let x=`\\ud800\\udbff \\udc00\\udfff`;",
565553
);
566554
// Lone surrogates and lossy replacement characters
567555
test(
568-
"let x = '��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}';",
569-
"let x = \"��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}\";\n",
556+
"let x = \"��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}\";",
557+
"let x = \"����\\ud800\\udbff����\\udc00\\udfff����\";\n",
570558
);
571559
test_minify(
572-
"let x = '��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}';",
560+
"let x = \"��\\u{FFFD}\\u{FFFD}\\uD800\\uDBFF��\\u{FFFD}\\u{FFFD}\\uDC00\\uDFFF��\\u{FFFD}\\u{FFFD}\";",
573561
"let x=`����\\ud800\\udbff����\\udc00\\udfff����`;",
574562
);
575563

0 commit comments

Comments
 (0)