diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/action/QueryChangeStreamAction.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/action/QueryChangeStreamAction.java index 3176abd9f247..2e65ec71a427 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/action/QueryChangeStreamAction.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/action/QueryChangeStreamAction.java @@ -303,14 +303,24 @@ public ProcessContinuation run( throw e; } - LOG.debug("[{}] change stream completed successfully", token); - if (tracker.tryClaim(endTimestamp)) { + LOG.debug( + "[{}] change stream completed successfully up to {}", token, changeStreamQueryEndTimestamp); + if (!tracker.tryClaim(changeStreamQueryEndTimestamp)) { + return ProcessContinuation.stop(); + } + + if (changeStreamQueryEndTimestamp.equals(endTimestamp)) { LOG.debug("[{}] Finishing partition", token); + // TODO: This should be performed after the commit succeeds. Since bundle finalizers are not + // guaranteed to be called, this needs to be performed in a subsequent fused stage. partitionMetadataDao.updateToFinished(token); metrics.decActivePartitionReadCounter(); LOG.info("[{}] After attempting to finish the partition", token); + return ProcessContinuation.stop(); } - return ProcessContinuation.stop(); + + LOG.info("[{}] Rescheduling partition where query completed due to not being finished", token); + return ProcessContinuation.resume(); } private BundleFinalizer.Callback updateWatermarkCallback( @@ -339,8 +349,8 @@ private boolean isTimestampOutOfRange(SpannerException e) { } // Return (now + 2 mins) as the end timestamp for reading change streams. This is only used if - // users want to run the connector forever. This approach works because Google Dataflow - // checkpoints every 5s or 5MB output provided and the change stream query has deadline for 1 min. + // users want to run the connector forever. If the end timestamp is reached, we will resume + // processing from that timestamp on a subsequent DoFn execution. private Timestamp getNextReadChangeStreamEndTimestamp() { final Timestamp current = Timestamp.now(); return Timestamp.ofTimeSecondsAndNanos(current.getSeconds() + 2 * 60, current.getNanos()); diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/action/QueryChangeStreamActionTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/action/QueryChangeStreamActionTest.java index 21f5a888b14b..436057fdf296 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/action/QueryChangeStreamActionTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/changestreams/action/QueryChangeStreamActionTest.java @@ -17,8 +17,10 @@ */ package org.apache.beam.sdk.io.gcp.spanner.changestreams.action; +import static org.apache.beam.sdk.io.gcp.spanner.changestreams.ChangeStreamsConstants.MAX_INCLUSIVE_END_AT; import static org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata.State.SCHEDULED; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; @@ -55,6 +57,7 @@ import org.joda.time.Instant; import org.junit.Before; import org.junit.Test; +import org.mockito.ArgumentCaptor; public class QueryChangeStreamActionTest { private static final String PARTITION_TOKEN = "partitionToken"; @@ -613,6 +616,53 @@ public void testQueryChangeStreamWithStreamFinished() { assertEquals(ProcessContinuation.stop(), result); verify(partitionMetadataDao).updateWatermark(PARTITION_TOKEN, WATERMARK_TIMESTAMP); verify(partitionMetadataDao).updateToFinished(PARTITION_TOKEN); + verify(metrics).decActivePartitionReadCounter(); + + verify(dataChangeRecordAction, never()).run(any(), any(), any(), any(), any(), any()); + verify(heartbeatRecordAction, never()).run(any(), any(), any(), any(), any()); + verify(childPartitionsRecordAction, never()).run(any(), any(), any(), any(), any()); + verify(partitionStartRecordAction, never()).run(any(), any(), any(), any(), any()); + verify(partitionEndRecordAction, never()).run(any(), any(), any(), any(), any()); + verify(partitionEventRecordAction, never()).run(any(), any(), any(), any(), any()); + } + + @Test + public void testQueryChangeStreamFinishedWithResume() { + partition = + PartitionMetadata.newBuilder() + .setPartitionToken(PARTITION_TOKEN) + .setParentTokens(Sets.newHashSet("parentToken")) + .setStartTimestamp(PARTITION_START_TIMESTAMP) + .setEndTimestamp(MAX_INCLUSIVE_END_AT) + .setHeartbeatMillis(PARTITION_HEARTBEAT_MILLIS) + .setState(SCHEDULED) + .setWatermark(WATERMARK_TIMESTAMP) + .setScheduledAt(Timestamp.now()) + .build(); + when(partitionMetadataMapper.from(any())).thenReturn(partition); + + final ChangeStreamResultSet changeStreamResultSet = mock(ChangeStreamResultSet.class); + final ArgumentCaptor timestampCaptor = ArgumentCaptor.forClass(Timestamp.class); + when(changeStreamDao.changeStreamQuery( + eq(PARTITION_TOKEN), + eq(PARTITION_START_TIMESTAMP), + timestampCaptor.capture(), + eq(PARTITION_HEARTBEAT_MILLIS))) + .thenReturn(changeStreamResultSet); + when(changeStreamResultSet.next()).thenReturn(false); + when(watermarkEstimator.currentWatermark()).thenReturn(WATERMARK); + when(restrictionTracker.tryClaim(any(Timestamp.class))).thenReturn(true); + + final ProcessContinuation result = + action.run( + partition, restrictionTracker, outputReceiver, watermarkEstimator, bundleFinalizer); + assertEquals(ProcessContinuation.resume(), result); + assertNotEquals(MAX_INCLUSIVE_END_AT, timestampCaptor.getValue()); + + verify(restrictionTracker).tryClaim(timestampCaptor.getValue()); + verify(partitionMetadataDao).updateWatermark(PARTITION_TOKEN, WATERMARK_TIMESTAMP); + verify(partitionMetadataDao, never()).updateToFinished(PARTITION_TOKEN); + verify(metrics, never()).decActivePartitionReadCounter(); verify(dataChangeRecordAction, never()).run(any(), any(), any(), any(), any(), any()); verify(heartbeatRecordAction, never()).run(any(), any(), any(), any(), any());