Skip to content

Commit ff28f65

Browse files
authored
feat: temp workaround for omg/48020 (#1521)
This should only be used by customer who is affected by omg/48020 (connection stuck after 10MB) using JsonWriter directly, before fix finished rolling out on Friday 2/11. It can be enabled by calling JsonWriter.setReconnectOnStuck(true). The write performance will be impacted by this change, but it can work around the stuckness.
1 parent 9fd7aca commit ff28f65

File tree

2 files changed

+97
-5
lines changed

2 files changed

+97
-5
lines changed

google-cloud-bigquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1/JsonStreamWriter.java

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ public class JsonStreamWriter implements AutoCloseable {
5353
private Descriptor descriptor;
5454
private TableSchema tableSchema;
5555
private boolean ignoreUnknownFields = false;
56+
private boolean reconnectAfter10M = false;
57+
private long totalMessageSize = 0;
58+
private long absTotal = 0;
59+
private ProtoSchema protoSchema;
5660

5761
/**
5862
* Constructs the JsonStreamWriter
@@ -71,7 +75,9 @@ private JsonStreamWriter(Builder builder)
7175
} else {
7276
streamWriterBuilder = StreamWriter.newBuilder(builder.streamName, builder.client);
7377
}
74-
streamWriterBuilder.setWriterSchema(ProtoSchemaConverter.convert(this.descriptor));
78+
this.protoSchema = ProtoSchemaConverter.convert(this.descriptor);
79+
this.totalMessageSize = protoSchema.getSerializedSize();
80+
streamWriterBuilder.setWriterSchema(protoSchema);
7581
setStreamWriterSettings(
7682
builder.channelProvider,
7783
builder.credentialsProvider,
@@ -82,6 +88,7 @@ private JsonStreamWriter(Builder builder)
8288
this.streamName = builder.streamName;
8389
this.tableSchema = builder.tableSchema;
8490
this.ignoreUnknownFields = builder.ignoreUnknownFields;
91+
this.reconnectAfter10M = builder.reconnectAfter10M;
8592
}
8693

8794
/**
@@ -122,27 +129,46 @@ public ApiFuture<AppendRowsResponse> append(JSONArray jsonArr, long offset)
122129
this.tableSchema = updatedSchema;
123130
this.descriptor =
124131
BQTableSchemaToProtoDescriptor.convertBQTableSchemaToProtoDescriptor(updatedSchema);
132+
this.protoSchema = ProtoSchemaConverter.convert(this.descriptor);
133+
this.totalMessageSize = protoSchema.getSerializedSize();
125134
// Create a new underlying StreamWriter with the updated TableSchema and Descriptor
126-
this.streamWriter =
127-
streamWriterBuilder
128-
.setWriterSchema(ProtoSchemaConverter.convert(this.descriptor))
129-
.build();
135+
this.streamWriter = streamWriterBuilder.setWriterSchema(this.protoSchema).build();
130136
}
131137
}
132138

133139
ProtoRows.Builder rowsBuilder = ProtoRows.newBuilder();
134140
// Any error in convertJsonToProtoMessage will throw an
135141
// IllegalArgumentException/IllegalStateException/NullPointerException and will halt processing
136142
// of JSON data.
143+
long currentRequestSize = 0;
137144
for (int i = 0; i < jsonArr.length(); i++) {
138145
JSONObject json = jsonArr.getJSONObject(i);
139146
Message protoMessage =
140147
JsonToProtoMessage.convertJsonToProtoMessage(
141148
this.descriptor, this.tableSchema, json, ignoreUnknownFields);
142149
rowsBuilder.addSerializedRows(protoMessage.toByteString());
150+
currentRequestSize += protoMessage.getSerializedSize();
143151
}
144152
// Need to make sure refreshAppendAndSetDescriptor finish first before this can run
145153
synchronized (this) {
154+
this.totalMessageSize += currentRequestSize;
155+
this.absTotal += currentRequestSize;
156+
// Reconnect on every 9.5MB.
157+
if (this.totalMessageSize > 9500000 && this.reconnectAfter10M) {
158+
streamWriter.close();
159+
// Create a new underlying StreamWriter aka establish a new connection.
160+
this.streamWriter = streamWriterBuilder.setWriterSchema(protoSchema).build();
161+
this.totalMessageSize = this.protoSchema.getSerializedSize() + currentRequestSize;
162+
this.absTotal += currentRequestSize;
163+
// Allow first request to pass.
164+
}
165+
LOG.fine(
166+
"Sending a total of:"
167+
+ this.totalMessageSize
168+
+ " "
169+
+ currentRequestSize
170+
+ " "
171+
+ this.absTotal);
146172
final ApiFuture<AppendRowsResponse> appendResponseFuture =
147173
this.streamWriter.append(rowsBuilder.build(), offset);
148174
return appendResponseFuture;
@@ -264,6 +290,7 @@ public static final class Builder {
264290
private boolean createDefaultStream = false;
265291
private String traceId;
266292
private boolean ignoreUnknownFields = false;
293+
private boolean reconnectAfter10M = false;
267294

268295
private static String streamPatternString =
269296
"(projects/[^/]+/datasets/[^/]+/tables/[^/]+)/streams/[^/]+";
@@ -377,6 +404,19 @@ public Builder setIgnoreUnknownFields(boolean ignoreUnknownFields) {
377404
return this;
378405
}
379406

407+
/**
408+
* Setter for a reconnectAfter10M, temporaily workaround for omg/48020. Fix for the omg is
409+
* supposed to roll out by 2/11/2022 Friday. If you set this to True, your write will be slower
410+
* (0.75MB/s per connection), but your writes will not be stuck as a sympton of omg/48020.
411+
*
412+
* @param reconnectAfter10M
413+
* @return Builder
414+
*/
415+
public Builder setReconnectAfter10M(boolean reconnectAfter10M) {
416+
this.reconnectAfter10M = reconnectAfter10M;
417+
return this;
418+
}
419+
380420
/**
381421
* Builds JsonStreamWriter
382422
*

google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1/it/ITBigQueryWriteManualClientTest.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,58 @@ public void testJsonStreamWriterWithDefaultStream()
412412
}
413413
}
414414

415+
// This test runs about 1 min.
416+
@Test
417+
public void testJsonStreamWriterWithMessagesOver10M()
418+
throws IOException, InterruptedException, ExecutionException,
419+
Descriptors.DescriptorValidationException {
420+
String tableName = "TableLarge";
421+
TableId tableId = TableId.of(DATASET, tableName);
422+
Field col1 = Field.newBuilder("col1", StandardSQLTypeName.STRING).build();
423+
Schema schema = Schema.of(col1);
424+
TableInfo tableInfo = TableInfo.newBuilder(tableId, StandardTableDefinition.of(schema)).build();
425+
bigquery.create(tableInfo);
426+
TableName parent = TableName.of(ServiceOptions.getDefaultProjectId(), DATASET, tableName);
427+
428+
WriteStream writeStream =
429+
client.createWriteStream(
430+
CreateWriteStreamRequest.newBuilder()
431+
.setParent(parent.toString())
432+
.setWriteStream(
433+
WriteStream.newBuilder().setType(WriteStream.Type.COMMITTED).build())
434+
.build());
435+
int totalRequest = 10;
436+
int rowBatch = 40000;
437+
ArrayList<ApiFuture<AppendRowsResponse>> allResponses =
438+
new ArrayList<ApiFuture<AppendRowsResponse>>(totalRequest);
439+
// Sends a total of 30MB over the wire.
440+
try (JsonStreamWriter jsonStreamWriter =
441+
JsonStreamWriter.newBuilder(writeStream.getName(), writeStream.getTableSchema())
442+
.setReconnectAfter10M(true)
443+
.build()) {
444+
for (int k = 0; k < totalRequest; k++) {
445+
JSONObject row = new JSONObject();
446+
row.put("col1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
447+
JSONArray jsonArr = new JSONArray();
448+
// 3MB batch.
449+
for (int j = 0; j < rowBatch; j++) {
450+
jsonArr.put(row);
451+
}
452+
LOG.info("Appending: " + k + "/" + totalRequest);
453+
allResponses.add(jsonStreamWriter.append(jsonArr, k * rowBatch));
454+
}
455+
}
456+
LOG.info("Waiting for all responses to come back");
457+
for (int i = 0; i < totalRequest; i++) {
458+
try {
459+
Assert.assertEquals(
460+
allResponses.get(i).get().getAppendResult().getOffset().getValue(), i * rowBatch);
461+
} catch (ExecutionException ex) {
462+
Assert.fail("Unexpected error " + ex);
463+
}
464+
}
465+
}
466+
415467
@Test
416468
public void testJsonStreamWriterSchemaUpdate()
417469
throws DescriptorValidationException, IOException, InterruptedException, ExecutionException {

0 commit comments

Comments
 (0)