Skip to content

Commit 72d71ec

Browse files
authored
Flink SQL splitting tutorial (#1618)
* feat: Flink SQL splitting tutorial * chore: fix typos in Flink SQL tutorials * fix failing Flink SQL splitting test
1 parent 01be037 commit 72d71ec

File tree

78 files changed

+1224
-11
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+1224
-11
lines changed

.semaphore/semaphore.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,3 +493,6 @@ blocks:
493493
- name: Flink SQL test for joins
494494
commands:
495495
- make -C _includes/tutorials/joining-stream-stream/flinksql/code tutorial
496+
- name: Flink SQL test for splitting
497+
commands:
498+
- make -C _includes/tutorials/splitting/flinksql/code tutorial
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
dev:
2+
steps:
3+
- title: Prerequisites
4+
content:
5+
- action: skip
6+
render:
7+
file: shared/markup/dev/docker-prerequisite.adoc
8+
9+
- title: Initialize the project
10+
content:
11+
- action: execute
12+
file: tutorial-steps/dev/init.sh
13+
render:
14+
file: tutorials/splitting/flinksql/markup/dev/init.adoc
15+
16+
- title: Get Confluent Platform
17+
content:
18+
- action: make_file
19+
file: docker-compose.yml
20+
render:
21+
file: tutorials/splitting/flinksql/markup/dev/make-docker-compose.adoc
22+
23+
- action: execute_async
24+
file: tutorial-steps/dev/docker-compose-up.sh
25+
render:
26+
file: tutorials/splitting/flinksql/markup/dev/start-compose.adoc
27+
28+
- action: execute
29+
file: tutorial-steps/dev/wait-for-containers.sh
30+
render:
31+
skip: true
32+
33+
- title: Write the program interactively using the CLI
34+
content:
35+
- action: docker_flinksql_cli_session
36+
container: flink-sql-client
37+
docker_bootup_file: tutorial-steps/dev/start-cli.sh
38+
column_width: 20
39+
render:
40+
file: tutorials/splitting/flinksql/markup/dev/start-cli.adoc
41+
stdin:
42+
- file: tutorial-steps/dev/create-acting-events.sql
43+
render:
44+
file: tutorials/splitting/flinksql/markup/dev/create-acting-events.adoc
45+
46+
- file: tutorial-steps/dev/populate-acting-events.sql
47+
render:
48+
file: tutorials/splitting/flinksql/markup/dev/populate-acting-events.adoc
49+
50+
- file: tutorial-steps/dev/transient-query-drama.sql
51+
render:
52+
file: tutorials/splitting/flinksql/markup/dev/transient-query-drama.adoc
53+
54+
- file: tutorial-steps/dev/transient-query-other.sql
55+
render:
56+
file: tutorials/splitting/flinksql/markup/dev/transient-query-other.adoc
57+
58+
- file: tutorial-steps/dev/create-acting-events-drama.sql
59+
render:
60+
file: tutorials/splitting/flinksql/markup/dev/create-acting-events-drama.adoc
61+
62+
- file: tutorial-steps/dev/create-acting-events-fantasy.sql
63+
render:
64+
file: tutorials/splitting/flinksql/markup/dev/create-acting-events-fantasy.adoc
65+
66+
- file: tutorial-steps/dev/create-acting-events-other.sql
67+
render:
68+
file: tutorials/splitting/flinksql/markup/dev/create-acting-events-other.adoc
69+
70+
stdout:
71+
directory: tutorial-steps/dev/outputs
72+
73+
- title: Validate output
74+
content:
75+
- action: execute
76+
file: tutorial-steps/dev/validate-acting-events-fantasy.sh
77+
stdout: tutorial-steps/dev/outputs/validate-acting-events-fantasy.log
78+
render:
79+
file: tutorials/splitting/flinksql/markup/dev/validate-acting-events-per-genre.adoc
80+
81+
test:
82+
steps:
83+
- title: Decide what testing tools to use
84+
content:
85+
- action: skip
86+
render:
87+
file: tutorials/splitting/flinksql/markup/test/test-architecture.adoc
88+
89+
- title: Create the test skeleton
90+
content:
91+
- action: execute
92+
file: tutorial-steps/test/make-test-dirs.sh
93+
render:
94+
file: tutorials/splitting/flinksql/markup/test/make-test-dirs.adoc
95+
96+
- action: make_file
97+
file: build.gradle
98+
render:
99+
file: tutorials/splitting/flinksql/markup/test/make-build-gradle.adoc
100+
101+
- action: execute
102+
file: tutorial-steps/test/gradle-wrapper.sh
103+
render:
104+
file: tutorials/splitting/flinksql/markup/test/make-gradle-wrapper.adoc
105+
106+
- title: Create SQL resources
107+
content:
108+
- action: make_file
109+
file: src/test/resources/create-acting-events.sql.template
110+
render:
111+
file: tutorials/splitting/flinksql/markup/test/create-resource-create-acting-events.sql.template.adoc
112+
113+
- action: make_file
114+
file: src/test/resources/populate-acting-events.sql
115+
render:
116+
file: tutorials/splitting/flinksql/markup/test/create-resource-populate-acting-events.sql.adoc
117+
118+
- action: make_file
119+
file: src/test/resources/create-acting-events-drama.sql.template
120+
render:
121+
file: tutorials/splitting/flinksql/markup/test/create-resource-create-acting-events-drama.sql.template.adoc
122+
123+
- action: make_file
124+
file: src/test/resources/query-acting-events-drama.sql
125+
render:
126+
file: tutorials/splitting/flinksql/markup/test/create-resource-query-acting-events-drama.sql.adoc
127+
128+
- action: make_file
129+
file: src/test/resources/expected-acting-events-drama.txt
130+
render:
131+
file: tutorials/splitting/flinksql/markup/test/create-resource-expected-acting-events-drama.txt.adoc
132+
133+
- title: Write a test
134+
content:
135+
- action: make_file
136+
file: src/test/java/io/confluent/developer/AbstractFlinkKafkaTest.java
137+
render:
138+
file: tutorials/splitting/flinksql/markup/test/make-test-base.adoc
139+
140+
- action: make_file
141+
file: src/test/java/io/confluent/developer/FlinkSqlSplitStreamTest.java
142+
render:
143+
file: tutorials/splitting/flinksql/markup/test/make-test.adoc
144+
145+
- title: Invoke the test
146+
content:
147+
- action: execute
148+
file: tutorial-steps/test/invoke-test.sh
149+
render:
150+
file: tutorials/splitting/flinksql/markup/test/invoke-test.adoc
151+
152+
ccloud:
153+
steps:
154+
- title: Run your app to Confluent Cloud
155+
content:
156+
- action: skip
157+
render:
158+
file: shared/markup/ccloud/try-ccloud.adoc

_data/tutorials.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ splitting:
4646
kstreams: enabled
4747
kafka: enabled
4848
confluent: enabled
49+
flinksql: enabled
4950
merging:
5051
title: How to merge many streams into one stream
5152
meta-description: merge many streams into one stream

_includes/tutorials/aggregating-count/flinksql/code/src/test/java/io/confluent/developer/AbstractFlinkKafkaTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ protected static String tableauResults(TableResult tableResult) {
141141

142142
// The given table result may come from a table backed by the Kafka or Upsert Kafka connector,
143143
// both of which perform unbounded (neverending) scans. So, in order to prevent tests from blocking
144-
// on called to this method, we kick off a thread to kill the underlying job once output has
144+
// on calls to this method, we kick off a thread to kill the underlying job once output has
145145
// been printed.
146146
//
147147
// Note: as of Flink 1.17.0, the Kafka connector will support bounded scanning, which would obviate
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Since the output of our transient query looks right, the next step is to make the query persistent. This looks exactly like the transient query, except we first create a new table with the https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/connectors/table/upsert-kafka/[Upsert Kafka] connector and then `INSERT INTO` the table. We use the Upsert Kafka connector because we only care about the most recent aggregate for a given title (the key column). The `INSERT INTO` statement returns to the CLI prompt right away, having created a persistent stream processing program running in the Flink cluster, continuously processing input records and updating the resulting `movie_ticket_sales_by_title` table.
22

3-
Now go ahead and tun the following two commands in your Flink SQL session:
3+
Now go ahead and run the following two commands in your Flink SQL session:
44
+++++
55
<pre class="snippet"><code class="sql">{% include_raw tutorials/aggregating-count/flinksql/code/tutorial-steps/dev/create-movie-sales-by-title.sql %}</code></pre>
66
+++++

_includes/tutorials/aggregating-count/flinksql/markup/dev/create-movie-ticket-sales.adoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
This tutorial takes a stream of individual movie ticket sales events and counts the total number of tickets sold per movie. Not all ticket prices are the same (apparently some of these theaters are fancier than others), but the task of the Flink SQL query is just to group and count regardless of ticket price.
22

33
This line of Flink SQL DDL creates a table and its underlying Kafka topic to represent the annual sales totals.
4-
Note that we are defining the schema for the table, which includes three fields: `title`, the name of the movie; `sale_ts`, the time at which the ticket was sold; and `ticket_total_value`, the price paid for the ticket. The statement also the underlying Kafka topic as `movie-ticket-sales`, that it should have a single partition (the default `num.partitions` configured in the broker), and defines Avro as its data format.
4+
Note that we are defining the schema for the table, which includes three fields: `title`, the name of the movie; `sale_ts`, the time at which the ticket was sold; and `ticket_total_value`, the price paid for the ticket. The statement also specifies the underlying Kafka topic as `movie-ticket-sales`, that it should have a single partition (the default `num.partitions` configured in the broker), and defines Avro as its data format.
55

66
+++++
77
<pre class="snippet"><code class="sql">{% include_raw tutorials/aggregating-count/flinksql/code/tutorial-steps/dev/create-movie-ticket-sales.sql %}</code></pre>

_includes/tutorials/aggregating-count/flinksql/markup/test/make-test.adoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ Next, create the test implementation at `src/test/java/io/confluent/developer/Fl
44
<pre class="snippet"><code class="java">{% include_raw tutorials/aggregating-count/flinksql/code/src/test/java/io/confluent/developer/FlinkSqlAggregatingCountTest.java %}</code></pre>
55
+++++
66

7-
The test itself it straightforward to follow. It executes the SQL from our resource files, then runs a select statement against the final output `TABLE` of our application and compares the results to what's expected.
7+
The test itself is straightforward to follow. It executes the SQL from our resource files, then runs a select statement against the final output `TABLE` of our application and compares the results to what's expected.

_includes/tutorials/aggregating-count/ksql/markup/dev/create-movie-ticket-sales.adoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
This tutorial takes a stream of individual movie ticket sales events and counts the total number of tickets sold per movie. Not all ticket prices are the same (apparently some of these theaters are fancier than others), but the task of the ksqlDB query is just to group and count regardless of ticket price.
22

3-
This line of ksqlDB DDL creates a stream and its underlying Kafka topic to represent the annual sales totals. If the topic already exists, then ksqlDB simply registers is as the source of data underlying the new stream. The stream has three fields: `title`, the name of the movie; `sale_ts`, the time at which the ticket was sold; and `ticket_total_value`, the price paid for the ticket. The statement also the underlying Kafka topic as `movie-ticket-sales`, that it should have a single partition, and defines Avro as its data format.
3+
This line of ksqlDB DDL creates a stream and its underlying Kafka topic to represent the annual sales totals. If the topic already exists, then ksqlDB simply registers is as the source of data underlying the new stream. The stream has three fields: `title`, the name of the movie; `sale_ts`, the time at which the ticket was sold; and `ticket_total_value`, the price paid for the ticket. The statement also specifies the underlying Kafka topic as `movie-ticket-sales`, that it should have a single partition, and defines Avro as its data format.
44

55
+++++
66
<pre class="snippet"><code class="sql">{% include_raw tutorials/aggregating-count/ksql/code/tutorial-steps/dev/create-movie-ticket-sales.sql %}</code></pre>

_includes/tutorials/aggregating-minmax/flinksql/code/src/test/java/io/confluent/developer/AbstractFlinkKafkaTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ protected static String tableauResults(TableResult tableResult) {
141141

142142
// The given table result may come from a table backed by the Kafka or Upsert Kafka connector,
143143
// both of which perform unbounded (neverending) scans. So, in order to prevent tests from blocking
144-
// on called to this method, we kick off a thread to kill the underlying job once output has
144+
// on calls to this method, we kick off a thread to kill the underlying job once output has
145145
// been printed.
146146
//
147147
// Note: as of Flink 1.17.0, the Kafka connector will support bounded scanning, which would obviate
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Since the output of our transient query looks right, the next step is to make the query persistent. This looks exactly like the transient query, except we first create a new table with the https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/connectors/table/upsert-kafka/[Upsert Kafka] connector and then `INSERT INTO` the table. We use the Upsert Kafka connector because we only care about the most recent aggregates for a given release year (the key column). The `INSERT INTO` statement returns to the CLI prompt right away, having created a persistent stream processing program running in the Flink cluster, continuously processing input records and updating the resulting `movie_sales_by_year` table.
22

3-
Now go ahead and tun the following two commands in your Flink SQL session:
3+
Now go ahead and run the following two commands in your Flink SQL session:
44
+++++
55
<pre class="snippet"><code class="sql">{% include_raw tutorials/aggregating-minmax/flinksql/code/tutorial-steps/dev/create-movie-sales-by-year.sql %}</code></pre>
66
+++++

_includes/tutorials/aggregating-minmax/flinksql/markup/test/make-test.adoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ Next, create the test implementation at `src/test/java/io/confluent/developer/Fl
44
<pre class="snippet"><code class="java">{% include_raw tutorials/aggregating-minmax/flinksql/code/src/test/java/io/confluent/developer/FlinkSqlAggregatingMinMaxTest.java %}</code></pre>
55
+++++
66

7-
The test itself it straightforward to follow. It executes the SQL from our resource files, then runs a select statement against the final output `TABLE` of our application and compares the results to what's expected.
7+
The test itself is straightforward to follow. It executes the SQL from our resource files, then runs a select statement against the final output `TABLE` of our application and compares the results to what's expected.

_includes/tutorials/joining-stream-stream/flinksql/code/src/test/java/io/confluent/developer/AbstractFlinkKafkaTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ protected static String tableauResults(TableResult tableResult) {
147147

148148
// The given table result may come from a table backed by the Kafka or Upsert Kafka connector,
149149
// both of which perform unbounded (neverending) scans. So, in order to prevent tests from blocking
150-
// on called to this method, we kick off a thread to kill the underlying job once output has
150+
// on calls to this method, we kick off a thread to kill the underlying job once output has
151151
// been printed.
152152
//
153153
// Note: as of Flink 1.17.0, the Kafka connector will support bounded scanning, which would obviate

_includes/tutorials/joining-stream-stream/flinksql/markup/dev/populate-shipped-orders-table.adoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Since the output of our transient query looks right, the next step is to make the query persistent. This looks exactly like the transient query, except we first create a new table and then execute an `INSERT INTO` statement to populate the table. The `INSERT INTO` statement returns to the CLI prompt right away, having created a persistent stream processing program running in the Flink cluster, continuously processing input records and updating the resulting `shipped_orders` table.
22

3-
Now go ahead and tun the following two commands in your Flink SQL session:
3+
Now go ahead and run the following two commands in your Flink SQL session:
44

55
+++++
66
<pre class="snippet"><code class="sql">{% include_raw tutorials/joining-stream-stream/flinksql/code/tutorial-steps/dev/create-join-results-table.sql %}</code></pre>

_includes/tutorials/joining-stream-stream/flinksql/markup/test/make-test.adoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ Next, create the test implementation at `src/test/java/io/confluent/developer/Fl
44
<pre class="snippet"><code class="java">{% include_raw tutorials/joining-stream-stream/flinksql/code/src/test/java/io/confluent/developer/FlinkSqlIntervalJoinTest.java %}</code></pre>
55
+++++
66

7-
The test itself it straightforward to follow. It executes the SQL from our resource files, then runs a select statement against the final output `TABLE` of our application and compares the results to what's expected.
7+
The test itself is straightforward to follow. It executes the SQL from our resource files, then runs a select statement against the final output `TABLE` of our application and compares the results to what's expected.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
tutorial-steps/dev/outputs/
2+
3+
# Ignore Gradle project-specific cache directory
4+
.gradle
5+
6+
# Ignore Gradle build output directory
7+
build
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
STEPS_DIR := tutorial-steps
2+
DEV_OUTPUTS_DIR := $(STEPS_DIR)/dev/outputs
3+
TEMP_DIR := $(shell mktemp -d)
4+
SEQUENCE := "dev, test, ccloud"
5+
6+
tutorial:
7+
rm -r $(DEV_OUTPUTS_DIR) || true
8+
mkdir $(DEV_OUTPUTS_DIR)
9+
harness-runner ../../../../../_data/harnesses/splitting/flinksql.yml $(TEMP_DIR) $(SEQUENCE)
10+
diff --strip-trailing-cr $(STEPS_DIR)/dev/expected-acting-events-fantasy.log $(DEV_OUTPUTS_DIR)/validate-acting-events-fantasy.log
11+
reset
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
buildscript {
2+
repositories {
3+
mavenCentral()
4+
}
5+
}
6+
7+
plugins {
8+
id "java"
9+
}
10+
11+
sourceCompatibility = JavaVersion.VERSION_11
12+
targetCompatibility = JavaVersion.VERSION_11
13+
version = "0.0.1"
14+
15+
repositories {
16+
mavenCentral()
17+
}
18+
19+
dependencies {
20+
testImplementation "com.google.guava:guava:31.1-jre"
21+
testImplementation "junit:junit:4.13.2"
22+
testImplementation 'org.testcontainers:testcontainers:1.17.6'
23+
testImplementation 'org.testcontainers:kafka:1.17.6'
24+
testImplementation "org.apache.flink:flink-sql-connector-kafka:1.16.1"
25+
testImplementation "org.apache.flink:flink-sql-avro-confluent-registry:1.16.1"
26+
testImplementation "org.apache.flink:flink-test-utils:1.16.1"
27+
testImplementation "org.apache.flink:flink-test-utils-junit:1.16.1"
28+
testImplementation "org.apache.flink:flink-table-api-java-bridge:1.16.1"
29+
testImplementation "org.apache.flink:flink-table-planner_2.12:1.16.1"
30+
testImplementation "org.apache.flink:flink-table-planner_2.12:1.16.1:tests"
31+
testImplementation "org.apache.flink:flink-statebackend-rocksdb:1.16.1"
32+
}

0 commit comments

Comments
 (0)