Skip to content

Commit 8a197e7

Browse files
committed
Write 4-byte characters (surrogate pairs) instead of escapes
1 parent 89b2381 commit 8a197e7

File tree

5 files changed

+49
-2
lines changed

5 files changed

+49
-2
lines changed

src/main/java/com/fasterxml/jackson/core/JsonGenerator.java

+2
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@ public enum Feature {
202202
*/
203203
WRITE_BIGDECIMAL_AS_PLAIN(false),
204204

205+
WRITE_UTF8_SURROGATES(false),
206+
205207
// // Schema/Validity support features
206208

207209
/**

src/main/java/com/fasterxml/jackson/core/StreamWriteFeature.java

+2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ public enum StreamWriteFeature
7474
*/
7575
WRITE_BIGDECIMAL_AS_PLAIN(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN),
7676

77+
WRITE_UTF8_SURROGATES(JsonGenerator.Feature.WRITE_UTF8_SURROGATES),
78+
7779
// // Schema/Validity support features
7880

7981
/**

src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java

+32-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.io.*;
44
import java.math.BigDecimal;
55
import java.math.BigInteger;
6+
import java.nio.charset.StandardCharsets;
67

78
import com.fasterxml.jackson.core.*;
89
import com.fasterxml.jackson.core.io.CharTypes;
@@ -659,6 +660,10 @@ public void writeUTF8String(byte[] text, int offset, int len) throws IOException
659660
_outputBuffer[_outputTail++] = _quoteChar;
660661
}
661662

663+
private boolean isSurrogatePair(char ch) {
664+
return (ch & 0xD800) == 0xD800;
665+
}
666+
662667
/*
663668
/**********************************************************
664669
/* Output method implementations, unprocessed ("raw")
@@ -1489,6 +1494,8 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
14891494
final byte[] outputBuffer = _outputBuffer;
14901495
final int[] escCodes = _outputEscapes;
14911496

1497+
boolean writeSurrogates = Feature.WRITE_UTF8_SURROGATES.enabledIn(_features);
1498+
14921499
while (offset < end) {
14931500
int ch = cbuf[offset++];
14941501
if (ch <= 0x7F) {
@@ -1510,7 +1517,14 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
15101517
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
15111518
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
15121519
} else {
1513-
outputPtr = _outputMultiByteChar(ch, outputPtr);
1520+
// multibyte character
1521+
if (writeSurrogates && isSurrogatePair((char) ch) && offset < end) {
1522+
char highSurrogate = (char) ch;
1523+
char lowSurrogate = cbuf[offset++];
1524+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1525+
} else {
1526+
outputPtr = _outputMultiByteChar(ch, outputPtr);
1527+
}
15141528
}
15151529
}
15161530
_outputTail = outputPtr;
@@ -1527,6 +1541,8 @@ private final void _writeStringSegment2(final String text, int offset, final int
15271541
final byte[] outputBuffer = _outputBuffer;
15281542
final int[] escCodes = _outputEscapes;
15291543

1544+
boolean writeSurrogates = Feature.WRITE_UTF8_SURROGATES.enabledIn(_features);
1545+
15301546
while (offset < end) {
15311547
int ch = text.charAt(offset++);
15321548
if (ch <= 0x7F) {
@@ -1548,7 +1564,14 @@ private final void _writeStringSegment2(final String text, int offset, final int
15481564
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
15491565
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
15501566
} else {
1551-
outputPtr = _outputMultiByteChar(ch, outputPtr);
1567+
// multibyte character
1568+
if (writeSurrogates && isSurrogatePair((char) ch) && offset < end) {
1569+
char highSurrogate = (char) ch;
1570+
char lowSurrogate = text.charAt(offset++);
1571+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1572+
} else {
1573+
outputPtr = _outputMultiByteChar(ch, outputPtr);
1574+
}
15521575
}
15531576
}
15541577
_outputTail = outputPtr;
@@ -2133,6 +2156,13 @@ protected final void _outputSurrogates(int surr1, int surr2) throws IOException
21332156
bbuf[_outputTail++] = (byte) (0x80 | (c & 0x3f));
21342157
}
21352158

2159+
private int _outputSurrogatePair(char highSurrogate, char lowSurrogate, int outputPtr) {
2160+
String s = String.valueOf(highSurrogate) + lowSurrogate;
2161+
byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
2162+
System.arraycopy(bytes, 0, _outputBuffer, outputPtr, bytes.length);
2163+
return outputPtr + bytes.length;
2164+
}
2165+
21362166
/**
21372167
*
21382168
* @param ch

src/test/java/com/fasterxml/jackson/core/json/GeneratorFeaturesTest.java

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ void configDefaults() throws IOException
2828

2929
assertFalse(g.isEnabled(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN));
3030
assertFalse(g.isEnabled(StreamWriteFeature.WRITE_BIGDECIMAL_AS_PLAIN));
31+
assertFalse(g.isEnabled(StreamWriteFeature.WRITE_UTF8_SURROGATES));
3132

3233
assertTrue(g.canOmitFields());
3334
assertFalse(g.canWriteBinaryNatively());

src/test/java/com/fasterxml/jackson/core/json/StringGenerationTest.java

+12
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import java.util.Random;
88

9+
import static com.fasterxml.jackson.core.JsonGenerator.Feature;
910
import static org.junit.jupiter.api.Assertions.*;
1011

1112
import org.junit.jupiter.api.Test;
@@ -86,6 +87,17 @@ void longerRandomMultiChunk() throws Exception
8687
}
8788
}
8889

90+
@Test
91+
public void testWritingSurrogatePairs() throws IOException {
92+
ByteArrayOutputStream stream = new ByteArrayOutputStream();
93+
JsonGenerator generator = FACTORY.createGenerator(stream, JsonEncoding.UTF8).enable(Feature.WRITE_UTF8_SURROGATES);
94+
String string = "システム\uD867\uDE3D"; // システム𩸽
95+
generator.writeString(string);
96+
generator.flush();
97+
generator.close();
98+
assertEquals("\"" + string + "\"", stream.toString());
99+
}
100+
89101
/*
90102
/**********************************************************
91103
/* Internal methods

0 commit comments

Comments
 (0)