Skip to content

Commit e29638d

Browse files
committed
Add comment to code flow.
JAVA-5816
1 parent 759381d commit e29638d

File tree

2 files changed

+73
-44
lines changed

2 files changed

+73
-44
lines changed

driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java

Lines changed: 73 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -307,34 +307,59 @@ private static final class BufferPositionPair {
307307
}
308308

309309
protected int writeCharacters(final String str, final boolean checkNullTermination) {
310-
int len = str.length();
310+
int stringLength = str.length();
311311
int sp = 0;
312312
int prevPos = position;
313313

314-
ByteBuf buf = getCurrentByteBuffer();
315-
int currBufferPos = buf.position();
316-
int limit = buf.limit();
317-
int remaining = limit - currBufferPos;
314+
ByteBuf curBuffer = getCurrentByteBuffer();
315+
int curBufferPos = curBuffer.position();
316+
int curBufferLimit = curBuffer.limit();
317+
int remaining = curBufferLimit - curBufferPos;
318318

319-
if (buf.hasArray()) {
320-
byte[] dst = buf.array();
321-
int arrayOffset = buf.arrayOffset();
319+
if (curBuffer.hasArray()) {
320+
byte[] dst = curBuffer.array();
321+
int arrayOffset = curBuffer.arrayOffset();
322322
if (remaining >= str.length() + 1) {
323-
sp = writeOnArrayAscii(str, dst, arrayOffset + currBufferPos, checkNullTermination);
324-
currBufferPos += sp;
325-
if (sp == len) {
326-
dst[arrayOffset + currBufferPos++] = 0;
323+
// Write ASCII characters directly to the array until we hit a non-ASCII character
324+
sp = writeOnArrayAscii(str, dst, arrayOffset + curBufferPos, checkNullTermination);
325+
curBufferPos += sp;
326+
if (sp == stringLength) {
327+
dst[arrayOffset + curBufferPos++] = 0;
327328
position += sp + 1;
328-
buf.position(currBufferPos);
329+
curBuffer.position(curBufferPos);
329330
return sp + 1;
330331
}
331332
position += sp;
332-
buf.position(currBufferPos);
333+
curBuffer.position(curBufferPos);
333334
}
334335
}
335336

336-
while (sp < len) {
337-
remaining = limit - currBufferPos;
337+
// We get here, when the buffer is not backed by an array, or when the string contains non-ASCII characters.
338+
return writeOnBuffers(str,
339+
checkNullTermination,
340+
sp,
341+
stringLength,
342+
curBufferLimit,
343+
curBufferPos,
344+
curBuffer,
345+
prevPos);
346+
}
347+
348+
private int writeOnBuffers(final String str,
349+
final boolean checkNullTermination,
350+
final int stringPointer,
351+
final int stringLength,
352+
final int bufferLimit,
353+
final int bufferPos,
354+
final ByteBuf buffer,
355+
final int prevPos) {
356+
int remaining;
357+
int sp = stringPointer;
358+
int curBufferPos = bufferPos;
359+
int curBufferLimit = bufferLimit;
360+
ByteBuf curBuffer = buffer;
361+
while (sp < stringLength) {
362+
remaining = curBufferLimit - curBufferPos;
338363
int c = str.charAt(sp);
339364

340365
if (checkNullTermination && c == 0x0) {
@@ -344,43 +369,49 @@ protected int writeCharacters(final String str, final boolean checkNullTerminati
344369

345370
if (c < 0x80) {
346371
if (remaining == 0) {
347-
buf = getNextByteBuffer();
348-
currBufferPos = 0;
349-
limit = buf.limit();
372+
curBuffer = getNextByteBuffer();
373+
curBufferPos = 0;
374+
curBufferLimit = curBuffer.limit();
350375
}
351-
buf.put((byte) c);
352-
currBufferPos++;
376+
curBuffer.put((byte) c);
377+
curBufferPos++;
353378
position++;
354379
} else if (c < 0x800) {
355380
if (remaining < 2) {
356381
write((byte) (0xc0 + (c >> 6)));
357382
write((byte) (0x80 + (c & 0x3f)));
358383

359-
buf = getCurrentByteBuffer();
360-
currBufferPos = buf.position();
361-
limit = buf.limit();
384+
curBuffer = getCurrentByteBuffer();
385+
curBufferPos = curBuffer.position();
386+
curBufferLimit = curBuffer.limit();
362387
} else {
363-
buf.put((byte) (0xc0 + (c >> 6)));
364-
buf.put((byte) (0x80 + (c & 0x3f)));
365-
currBufferPos += 2;
388+
curBuffer.put((byte) (0xc0 + (c >> 6)));
389+
curBuffer.put((byte) (0x80 + (c & 0x3f)));
390+
curBufferPos += 2;
366391
position += 2;
367392
}
368393
} else {
394+
// Handle multibyte characters (may involve surrogate pairs)
369395
c = Character.codePointAt(str, sp);
396+
/*
397+
Malformed surrogate pairs are encoded as-is (3 byte code unit) without substituting any code point.
398+
This known deviation from the spec and current functionality remains for backward compatibility.
399+
Ticket: JAVA-5575
400+
*/
370401
if (c < 0x10000) {
371402
if (remaining < 3) {
372403
write((byte) (0xe0 + (c >> 12)));
373404
write((byte) (0x80 + ((c >> 6) & 0x3f)));
374405
write((byte) (0x80 + (c & 0x3f)));
375406

376-
buf = getCurrentByteBuffer();
377-
currBufferPos = buf.position();
378-
limit = buf.limit();
407+
curBuffer = getCurrentByteBuffer();
408+
curBufferPos = curBuffer.position();
409+
curBufferLimit = curBuffer.limit();
379410
} else {
380-
buf.put((byte) (0xe0 + (c >> 12)));
381-
buf.put((byte) (0x80 + ((c >> 6) & 0x3f)));
382-
buf.put((byte) (0x80 + (c & 0x3f)));
383-
currBufferPos += 3;
411+
curBuffer.put((byte) (0xe0 + (c >> 12)));
412+
curBuffer.put((byte) (0x80 + ((c >> 6) & 0x3f)));
413+
curBuffer.put((byte) (0x80 + (c & 0x3f)));
414+
curBufferPos += 3;
384415
position += 3;
385416
}
386417
} else {
@@ -390,15 +421,15 @@ protected int writeCharacters(final String str, final boolean checkNullTerminati
390421
write((byte) (0x80 + ((c >> 6) & 0x3f)));
391422
write((byte) (0x80 + (c & 0x3f)));
392423

393-
buf = getCurrentByteBuffer();
394-
currBufferPos = buf.position();
395-
limit = buf.limit();
424+
curBuffer = getCurrentByteBuffer();
425+
curBufferPos = curBuffer.position();
426+
curBufferLimit = curBuffer.limit();
396427
} else {
397-
buf.put((byte) (0xf0 + (c >> 18)));
398-
buf.put((byte) (0x80 + ((c >> 12) & 0x3f)));
399-
buf.put((byte) (0x80 + ((c >> 6) & 0x3f)));
400-
buf.put((byte) (0x80 + (c & 0x3f)));
401-
currBufferPos += 4;
428+
curBuffer.put((byte) (0xf0 + (c >> 18)));
429+
curBuffer.put((byte) (0x80 + ((c >> 12) & 0x3f)));
430+
curBuffer.put((byte) (0x80 + ((c >> 6) & 0x3f)));
431+
curBuffer.put((byte) (0x80 + (c & 0x3f)));
432+
curBufferPos += 4;
402433
position += 4;
403434
}
404435
}

driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,6 @@ class Utf8StringTests {
747747
@MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders")
748748
void shouldWriteCStringAcrossBuffersUTF8(final BufferProvider bufferProvider) throws IOException {
749749
for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) {
750-
//given
751750
String str = new String(Character.toChars(codePoint)) + "a";
752751
byte[] expectedStringEncoding = str.getBytes(StandardCharsets.UTF_8);
753752
int bufferAllocationSize = expectedStringEncoding.length + "\u0000".length();
@@ -760,7 +759,6 @@ void shouldWriteCStringAcrossBuffersUTF8(final BufferProvider bufferProvider) th
760759
@MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders")
761760
void shouldWriteCStringAcrossBuffersUTF8WithBranch(final BufferProvider bufferProvider) throws IOException {
762761
for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) {
763-
//given
764762
String str = new String(Character.toChars(codePoint)) + "a";
765763
int bufferAllocationSize = str.getBytes(StandardCharsets.UTF_8).length + "\u0000".length();
766764
byte[] expectedEncoding = str.getBytes(StandardCharsets.UTF_8);

0 commit comments

Comments
 (0)