Skip to content

Commit 025be46

Browse files
committed
fix(parser): store lone surrogates as escape sequence
1 parent 68f53e0 commit 025be46

File tree

21 files changed

+433
-253
lines changed

21 files changed

+433
-253
lines changed

crates/oxc_ast/src/ast/literal.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,14 @@ pub struct StringLiteral<'a> {
8585
#[content_eq(skip)]
8686
pub raw: Option<Atom<'a>>,
8787

88-
/// The string value contains replacement character (U+FFFD).
88+
/// The string value contains lone surrogates.
89+
///
90+
/// `value` is encoded using `\u{FFFD}` (the lossy replacement character) as an escape character.
91+
/// Lone surrogates are encoded as `\u{FFFD}XXXX`, where `XXXX` is the code unit in hex.
92+
/// The lossy escape character itself is encoded as `\u{FFFD}fffd`.
8993
#[builder(default)]
9094
#[estree(skip)]
91-
pub lossy: bool,
95+
pub lone_surrogates: bool,
9296
}
9397

9498
/// BigInt literal

crates/oxc_ast/src/generated/assert_layouts.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,7 @@ const _: () = {
776776
assert!(offset_of!(StringLiteral, span) == 0);
777777
assert!(offset_of!(StringLiteral, value) == 8);
778778
assert!(offset_of!(StringLiteral, raw) == 24);
779-
assert!(offset_of!(StringLiteral, lossy) == 40);
779+
assert!(offset_of!(StringLiteral, lone_surrogates) == 40);
780780

781781
assert!(size_of::<BigIntLiteral>() == 32);
782782
assert!(align_of::<BigIntLiteral>() == 8);
@@ -2167,7 +2167,7 @@ const _: () = {
21672167
assert!(offset_of!(StringLiteral, span) == 0);
21682168
assert!(offset_of!(StringLiteral, value) == 8);
21692169
assert!(offset_of!(StringLiteral, raw) == 16);
2170-
assert!(offset_of!(StringLiteral, lossy) == 24);
2170+
assert!(offset_of!(StringLiteral, lone_surrogates) == 24);
21712171

21722172
assert!(size_of::<BigIntLiteral>() == 20);
21732173
assert!(align_of::<BigIntLiteral>() == 4);

crates/oxc_ast/src/generated/ast_builder.rs

+91-52
Original file line numberDiff line numberDiff line change
@@ -276,27 +276,32 @@ impl<'a> AstBuilder<'a> {
276276
Expression::StringLiteral(self.alloc_string_literal(span, value, raw))
277277
}
278278

279-
/// Build an [`Expression::StringLiteral`] with `lossy`.
279+
/// Build an [`Expression::StringLiteral`] with `lone_surrogates`.
280280
///
281281
/// This node contains a [`StringLiteral`] that will be stored in the memory arena.
282282
///
283283
/// ## Parameters
284284
/// * `span`: Node location in source code
285285
/// * `value`: The value of the string.
286286
/// * `raw`: The raw string as it appears in source code.
287-
/// * `lossy`: The string value contains replacement character (U+FFFD).
287+
/// * `lone_surrogates`: The string value contains lone surrogates.
288288
#[inline]
289-
pub fn expression_string_literal_with_lossy<A>(
289+
pub fn expression_string_literal_with_lone_surrogates<A>(
290290
self,
291291
span: Span,
292292
value: A,
293293
raw: Option<Atom<'a>>,
294-
lossy: bool,
294+
lone_surrogates: bool,
295295
) -> Expression<'a>
296296
where
297297
A: IntoIn<'a, Atom<'a>>,
298298
{
299-
Expression::StringLiteral(self.alloc_string_literal_with_lossy(span, value, raw, lossy))
299+
Expression::StringLiteral(self.alloc_string_literal_with_lone_surrogates(
300+
span,
301+
value,
302+
raw,
303+
lone_surrogates,
304+
))
300305
}
301306

302307
/// Build an [`Expression::TemplateLiteral`].
@@ -7843,25 +7848,30 @@ impl<'a> AstBuilder<'a> {
78437848
ImportAttributeKey::StringLiteral(self.string_literal(span, value, raw))
78447849
}
78457850

7846-
/// Build an [`ImportAttributeKey::StringLiteral`] with `lossy`.
7851+
/// Build an [`ImportAttributeKey::StringLiteral`] with `lone_surrogates`.
78477852
///
78487853
/// ## Parameters
78497854
/// * `span`: Node location in source code
78507855
/// * `value`: The value of the string.
78517856
/// * `raw`: The raw string as it appears in source code.
7852-
/// * `lossy`: The string value contains replacement character (U+FFFD).
7857+
/// * `lone_surrogates`: The string value contains lone surrogates.
78537858
#[inline]
7854-
pub fn import_attribute_key_string_literal_with_lossy<A>(
7859+
pub fn import_attribute_key_string_literal_with_lone_surrogates<A>(
78557860
self,
78567861
span: Span,
78577862
value: A,
78587863
raw: Option<Atom<'a>>,
7859-
lossy: bool,
7864+
lone_surrogates: bool,
78607865
) -> ImportAttributeKey<'a>
78617866
where
78627867
A: IntoIn<'a, Atom<'a>>,
78637868
{
7864-
ImportAttributeKey::StringLiteral(self.string_literal_with_lossy(span, value, raw, lossy))
7869+
ImportAttributeKey::StringLiteral(self.string_literal_with_lone_surrogates(
7870+
span,
7871+
value,
7872+
raw,
7873+
lone_surrogates,
7874+
))
78657875
}
78667876

78677877
/// Build an [`ExportNamedDeclaration`].
@@ -8442,25 +8452,30 @@ impl<'a> AstBuilder<'a> {
84428452
ModuleExportName::StringLiteral(self.string_literal(span, value, raw))
84438453
}
84448454

8445-
/// Build a [`ModuleExportName::StringLiteral`] with `lossy`.
8455+
/// Build a [`ModuleExportName::StringLiteral`] with `lone_surrogates`.
84468456
///
84478457
/// ## Parameters
84488458
/// * `span`: Node location in source code
84498459
/// * `value`: The value of the string.
84508460
/// * `raw`: The raw string as it appears in source code.
8451-
/// * `lossy`: The string value contains replacement character (U+FFFD).
8461+
/// * `lone_surrogates`: The string value contains lone surrogates.
84528462
#[inline]
8453-
pub fn module_export_name_string_literal_with_lossy<A>(
8463+
pub fn module_export_name_string_literal_with_lone_surrogates<A>(
84548464
self,
84558465
span: Span,
84568466
value: A,
84578467
raw: Option<Atom<'a>>,
8458-
lossy: bool,
8468+
lone_surrogates: bool,
84598469
) -> ModuleExportName<'a>
84608470
where
84618471
A: IntoIn<'a, Atom<'a>>,
84628472
{
8463-
ModuleExportName::StringLiteral(self.string_literal_with_lossy(span, value, raw, lossy))
8473+
ModuleExportName::StringLiteral(self.string_literal_with_lone_surrogates(
8474+
span,
8475+
value,
8476+
raw,
8477+
lone_surrogates,
8478+
))
84648479
}
84658480

84668481
/// Build a [`V8IntrinsicExpression`].
@@ -8598,7 +8613,12 @@ impl<'a> AstBuilder<'a> {
85988613
where
85998614
A: IntoIn<'a, Atom<'a>>,
86008615
{
8601-
StringLiteral { span, value: value.into_in(self.allocator), raw, lossy: Default::default() }
8616+
StringLiteral {
8617+
span,
8618+
value: value.into_in(self.allocator),
8619+
raw,
8620+
lone_surrogates: Default::default(),
8621+
}
86028622
}
86038623

86048624
/// Build a [`StringLiteral`], and store it in the memory arena.
@@ -8622,50 +8642,53 @@ impl<'a> AstBuilder<'a> {
86228642
Box::new_in(self.string_literal(span, value, raw), self.allocator)
86238643
}
86248644

8625-
/// Build a [`StringLiteral`] with `lossy`.
8645+
/// Build a [`StringLiteral`] with `lone_surrogates`.
86268646
///
8627-
/// If you want the built node to be allocated in the memory arena, use [`AstBuilder::alloc_string_literal_with_lossy`] instead.
8647+
/// If you want the built node to be allocated in the memory arena, use [`AstBuilder::alloc_string_literal_with_lone_surrogates`] instead.
86288648
///
86298649
/// ## Parameters
86308650
/// * `span`: Node location in source code
86318651
/// * `value`: The value of the string.
86328652
/// * `raw`: The raw string as it appears in source code.
8633-
/// * `lossy`: The string value contains replacement character (U+FFFD).
8653+
/// * `lone_surrogates`: The string value contains lone surrogates.
86348654
#[inline]
8635-
pub fn string_literal_with_lossy<A>(
8655+
pub fn string_literal_with_lone_surrogates<A>(
86368656
self,
86378657
span: Span,
86388658
value: A,
86398659
raw: Option<Atom<'a>>,
8640-
lossy: bool,
8660+
lone_surrogates: bool,
86418661
) -> StringLiteral<'a>
86428662
where
86438663
A: IntoIn<'a, Atom<'a>>,
86448664
{
8645-
StringLiteral { span, value: value.into_in(self.allocator), raw, lossy }
8665+
StringLiteral { span, value: value.into_in(self.allocator), raw, lone_surrogates }
86468666
}
86478667

8648-
/// Build a [`StringLiteral`] with `lossy`, and store it in the memory arena.
8668+
/// Build a [`StringLiteral`] with `lone_surrogates`, and store it in the memory arena.
86498669
///
8650-
/// Returns a [`Box`] containing the newly-allocated node. If you want a stack-allocated node, use [`AstBuilder::string_literal_with_lossy`] instead.
8670+
/// Returns a [`Box`] containing the newly-allocated node. If you want a stack-allocated node, use [`AstBuilder::string_literal_with_lone_surrogates`] instead.
86518671
///
86528672
/// ## Parameters
86538673
/// * `span`: Node location in source code
86548674
/// * `value`: The value of the string.
86558675
/// * `raw`: The raw string as it appears in source code.
8656-
/// * `lossy`: The string value contains replacement character (U+FFFD).
8676+
/// * `lone_surrogates`: The string value contains lone surrogates.
86578677
#[inline]
8658-
pub fn alloc_string_literal_with_lossy<A>(
8678+
pub fn alloc_string_literal_with_lone_surrogates<A>(
86598679
self,
86608680
span: Span,
86618681
value: A,
86628682
raw: Option<Atom<'a>>,
8663-
lossy: bool,
8683+
lone_surrogates: bool,
86648684
) -> Box<'a, StringLiteral<'a>>
86658685
where
86668686
A: IntoIn<'a, Atom<'a>>,
86678687
{
8668-
Box::new_in(self.string_literal_with_lossy(span, value, raw, lossy), self.allocator)
8688+
Box::new_in(
8689+
self.string_literal_with_lone_surrogates(span, value, raw, lone_surrogates),
8690+
self.allocator,
8691+
)
86698692
}
86708693

86718694
/// Build a [`BigIntLiteral`].
@@ -9444,29 +9467,32 @@ impl<'a> AstBuilder<'a> {
94449467
JSXAttributeValue::StringLiteral(self.alloc_string_literal(span, value, raw))
94459468
}
94469469

9447-
/// Build a [`JSXAttributeValue::StringLiteral`] with `lossy`.
9470+
/// Build a [`JSXAttributeValue::StringLiteral`] with `lone_surrogates`.
94489471
///
94499472
/// This node contains a [`StringLiteral`] that will be stored in the memory arena.
94509473
///
94519474
/// ## Parameters
94529475
/// * `span`: Node location in source code
94539476
/// * `value`: The value of the string.
94549477
/// * `raw`: The raw string as it appears in source code.
9455-
/// * `lossy`: The string value contains replacement character (U+FFFD).
9478+
/// * `lone_surrogates`: The string value contains lone surrogates.
94569479
#[inline]
9457-
pub fn jsx_attribute_value_string_literal_with_lossy<A>(
9480+
pub fn jsx_attribute_value_string_literal_with_lone_surrogates<A>(
94589481
self,
94599482
span: Span,
94609483
value: A,
94619484
raw: Option<Atom<'a>>,
9462-
lossy: bool,
9485+
lone_surrogates: bool,
94639486
) -> JSXAttributeValue<'a>
94649487
where
94659488
A: IntoIn<'a, Atom<'a>>,
94669489
{
9467-
JSXAttributeValue::StringLiteral(
9468-
self.alloc_string_literal_with_lossy(span, value, raw, lossy),
9469-
)
9490+
JSXAttributeValue::StringLiteral(self.alloc_string_literal_with_lone_surrogates(
9491+
span,
9492+
value,
9493+
raw,
9494+
lone_surrogates,
9495+
))
94709496
}
94719497

94729498
/// Build a [`JSXAttributeValue::ExpressionContainer`].
@@ -9949,27 +9975,32 @@ impl<'a> AstBuilder<'a> {
99499975
TSEnumMemberName::String(self.alloc_string_literal(span, value, raw))
99509976
}
99519977

9952-
/// Build a [`TSEnumMemberName::String`] with `lossy`.
9978+
/// Build a [`TSEnumMemberName::String`] with `lone_surrogates`.
99539979
///
99549980
/// This node contains a [`StringLiteral`] that will be stored in the memory arena.
99559981
///
99569982
/// ## Parameters
99579983
/// * `span`: Node location in source code
99589984
/// * `value`: The value of the string.
99599985
/// * `raw`: The raw string as it appears in source code.
9960-
/// * `lossy`: The string value contains replacement character (U+FFFD).
9986+
/// * `lone_surrogates`: The string value contains lone surrogates.
99619987
#[inline]
9962-
pub fn ts_enum_member_name_string_with_lossy<A>(
9988+
pub fn ts_enum_member_name_string_with_lone_surrogates<A>(
99639989
self,
99649990
span: Span,
99659991
value: A,
99669992
raw: Option<Atom<'a>>,
9967-
lossy: bool,
9993+
lone_surrogates: bool,
99689994
) -> TSEnumMemberName<'a>
99699995
where
99709996
A: IntoIn<'a, Atom<'a>>,
99719997
{
9972-
TSEnumMemberName::String(self.alloc_string_literal_with_lossy(span, value, raw, lossy))
9998+
TSEnumMemberName::String(self.alloc_string_literal_with_lone_surrogates(
9999+
span,
10000+
value,
10001+
raw,
10002+
lone_surrogates,
10003+
))
997310004
}
997410005

997510006
/// Build a [`TSTypeAnnotation`].
@@ -10106,27 +10137,32 @@ impl<'a> AstBuilder<'a> {
1010610137
TSLiteral::StringLiteral(self.alloc_string_literal(span, value, raw))
1010710138
}
1010810139

10109-
/// Build a [`TSLiteral::StringLiteral`] with `lossy`.
10140+
/// Build a [`TSLiteral::StringLiteral`] with `lone_surrogates`.
1011010141
///
1011110142
/// This node contains a [`StringLiteral`] that will be stored in the memory arena.
1011210143
///
1011310144
/// ## Parameters
1011410145
/// * `span`: Node location in source code
1011510146
/// * `value`: The value of the string.
1011610147
/// * `raw`: The raw string as it appears in source code.
10117-
/// * `lossy`: The string value contains replacement character (U+FFFD).
10148+
/// * `lone_surrogates`: The string value contains lone surrogates.
1011810149
#[inline]
10119-
pub fn ts_literal_string_literal_with_lossy<A>(
10150+
pub fn ts_literal_string_literal_with_lone_surrogates<A>(
1012010151
self,
1012110152
span: Span,
1012210153
value: A,
1012310154
raw: Option<Atom<'a>>,
10124-
lossy: bool,
10155+
lone_surrogates: bool,
1012510156
) -> TSLiteral<'a>
1012610157
where
1012710158
A: IntoIn<'a, Atom<'a>>,
1012810159
{
10129-
TSLiteral::StringLiteral(self.alloc_string_literal_with_lossy(span, value, raw, lossy))
10160+
TSLiteral::StringLiteral(self.alloc_string_literal_with_lone_surrogates(
10161+
span,
10162+
value,
10163+
raw,
10164+
lone_surrogates,
10165+
))
1013010166
}
1013110167

1013210168
/// Build a [`TSLiteral::TemplateLiteral`].
@@ -13387,27 +13423,30 @@ impl<'a> AstBuilder<'a> {
1338713423
TSModuleDeclarationName::StringLiteral(self.string_literal(span, value, raw))
1338813424
}
1338913425

13390-
/// Build a [`TSModuleDeclarationName::StringLiteral`] with `lossy`.
13426+
/// Build a [`TSModuleDeclarationName::StringLiteral`] with `lone_surrogates`.
1339113427
///
1339213428
/// ## Parameters
1339313429
/// * `span`: Node location in source code
1339413430
/// * `value`: The value of the string.
1339513431
/// * `raw`: The raw string as it appears in source code.
13396-
/// * `lossy`: The string value contains replacement character (U+FFFD).
13432+
/// * `lone_surrogates`: The string value contains lone surrogates.
1339713433
#[inline]
13398-
pub fn ts_module_declaration_name_string_literal_with_lossy<A>(
13434+
pub fn ts_module_declaration_name_string_literal_with_lone_surrogates<A>(
1339913435
self,
1340013436
span: Span,
1340113437
value: A,
1340213438
raw: Option<Atom<'a>>,
13403-
lossy: bool,
13439+
lone_surrogates: bool,
1340413440
) -> TSModuleDeclarationName<'a>
1340513441
where
1340613442
A: IntoIn<'a, Atom<'a>>,
1340713443
{
13408-
TSModuleDeclarationName::StringLiteral(
13409-
self.string_literal_with_lossy(span, value, raw, lossy),
13410-
)
13444+
TSModuleDeclarationName::StringLiteral(self.string_literal_with_lone_surrogates(
13445+
span,
13446+
value,
13447+
raw,
13448+
lone_surrogates,
13449+
))
1341113450
}
1341213451

1341313452
/// Build a [`TSModuleDeclarationBody::TSModuleDeclaration`].

crates/oxc_ast/src/generated/derive_clone_in.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -4843,7 +4843,7 @@ impl<'new_alloc> CloneIn<'new_alloc> for StringLiteral<'_> {
48434843
span: CloneIn::clone_in(&self.span, allocator),
48444844
value: CloneIn::clone_in(&self.value, allocator),
48454845
raw: CloneIn::clone_in(&self.raw, allocator),
4846-
lossy: CloneIn::clone_in(&self.lossy, allocator),
4846+
lone_surrogates: CloneIn::clone_in(&self.lone_surrogates, allocator),
48474847
}
48484848
}
48494849

@@ -4852,7 +4852,7 @@ impl<'new_alloc> CloneIn<'new_alloc> for StringLiteral<'_> {
48524852
span: CloneIn::clone_in_with_semantic_ids(&self.span, allocator),
48534853
value: CloneIn::clone_in_with_semantic_ids(&self.value, allocator),
48544854
raw: CloneIn::clone_in_with_semantic_ids(&self.raw, allocator),
4855-
lossy: CloneIn::clone_in_with_semantic_ids(&self.lossy, allocator),
4855+
lone_surrogates: CloneIn::clone_in_with_semantic_ids(&self.lone_surrogates, allocator),
48564856
}
48574857
}
48584858
}

0 commit comments

Comments
 (0)