Skip to content

Commit 5eabe2e

Browse files
committed
Write 4-byte characters (surrogate pairs) instead of escapes
1 parent 89b2381 commit 5eabe2e

File tree

5 files changed

+45
-2
lines changed

5 files changed

+45
-2
lines changed

src/main/java/com/fasterxml/jackson/core/JsonGenerator.java

+2
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@ public enum Feature {
202202
*/
203203
WRITE_BIGDECIMAL_AS_PLAIN(false),
204204

205+
WRITE_UTF8_SURROGATES(false),
206+
205207
// // Schema/Validity support features
206208

207209
/**

src/main/java/com/fasterxml/jackson/core/StreamWriteFeature.java

+2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ public enum StreamWriteFeature
7474
*/
7575
WRITE_BIGDECIMAL_AS_PLAIN(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN),
7676

77+
WRITE_UTF8_SURROGATES(JsonGenerator.Feature.WRITE_UTF8_SURROGATES),
78+
7779
// // Schema/Validity support features
7880

7981
/**

src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java

+28-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.io.*;
44
import java.math.BigDecimal;
55
import java.math.BigInteger;
6+
import java.nio.charset.StandardCharsets;
67

78
import com.fasterxml.jackson.core.*;
89
import com.fasterxml.jackson.core.io.CharTypes;
@@ -659,6 +660,10 @@ public void writeUTF8String(byte[] text, int offset, int len) throws IOException
659660
_outputBuffer[_outputTail++] = _quoteChar;
660661
}
661662

663+
private boolean isSurrogatePair(char ch) {
664+
return (ch & 0xD800) == 0xD800;
665+
}
666+
662667
/*
663668
/**********************************************************
664669
/* Output method implementations, unprocessed ("raw")
@@ -1510,7 +1515,14 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
15101515
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
15111516
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
15121517
} else {
1513-
outputPtr = _outputMultiByteChar(ch, outputPtr);
1518+
// multibyte character
1519+
if (Feature.WRITE_UTF8_SURROGATES.enabledIn(_features) && isSurrogatePair((char) ch) && offset < end) {
1520+
char highSurrogate = (char) ch;
1521+
char lowSurrogate = cbuf[offset++];
1522+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1523+
} else {
1524+
outputPtr = _outputMultiByteChar(ch, outputPtr);
1525+
}
15141526
}
15151527
}
15161528
_outputTail = outputPtr;
@@ -1548,7 +1560,14 @@ private final void _writeStringSegment2(final String text, int offset, final int
15481560
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
15491561
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
15501562
} else {
1551-
outputPtr = _outputMultiByteChar(ch, outputPtr);
1563+
// multibyte character
1564+
if (Feature.WRITE_UTF8_SURROGATES.enabledIn(_features) && isSurrogatePair((char) ch) && offset < end) {
1565+
char highSurrogate = (char) ch;
1566+
char lowSurrogate = text.charAt(offset++);
1567+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1568+
} else {
1569+
outputPtr = _outputMultiByteChar(ch, outputPtr);
1570+
}
15521571
}
15531572
}
15541573
_outputTail = outputPtr;
@@ -2133,6 +2152,13 @@ protected final void _outputSurrogates(int surr1, int surr2) throws IOException
21332152
bbuf[_outputTail++] = (byte) (0x80 | (c & 0x3f));
21342153
}
21352154

2155+
private int _outputSurrogatePair(char highSurrogate, char lowSurrogate, int outputPtr) {
2156+
String s = String.valueOf(highSurrogate) + lowSurrogate;
2157+
byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
2158+
System.arraycopy(bytes, 0, _outputBuffer, outputPtr, bytes.length);
2159+
return outputPtr + bytes.length;
2160+
}
2161+
21362162
/**
21372163
*
21382164
* @param ch

src/test/java/com/fasterxml/jackson/core/json/GeneratorFeaturesTest.java

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ void configDefaults() throws IOException
2828

2929
assertFalse(g.isEnabled(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN));
3030
assertFalse(g.isEnabled(StreamWriteFeature.WRITE_BIGDECIMAL_AS_PLAIN));
31+
assertFalse(g.isEnabled(StreamWriteFeature.WRITE_UTF8_SURROGATES));
3132

3233
assertTrue(g.canOmitFields());
3334
assertFalse(g.canWriteBinaryNatively());

src/test/java/com/fasterxml/jackson/core/json/StringGenerationTest.java

+12
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import java.util.Random;
88

9+
import static com.fasterxml.jackson.core.JsonGenerator.Feature;
910
import static org.junit.jupiter.api.Assertions.*;
1011

1112
import org.junit.jupiter.api.Test;
@@ -86,6 +87,17 @@ void longerRandomMultiChunk() throws Exception
8687
}
8788
}
8889

90+
@Test
91+
public void testWritingSurrogatePairs() throws IOException {
92+
ByteArrayOutputStream stream = new ByteArrayOutputStream();
93+
JsonGenerator generator = FACTORY.createGenerator(stream, JsonEncoding.UTF8).enable(Feature.WRITE_UTF8_SURROGATES);
94+
String string = "システム\uD867\uDE3D"; // システム𩸽
95+
generator.writeString(string);
96+
generator.flush();
97+
generator.close();
98+
assertEquals("\"" + string + "\"", stream.toString());
99+
}
100+
89101
/*
90102
/**********************************************************
91103
/* Internal methods

0 commit comments

Comments
 (0)