diff --git a/.gitattributes b/.gitattributes
deleted file mode 100644
index c39158cf00..0000000000
--- a/.gitattributes
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (C) 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-# The default behavior, which overrides 'core.autocrlf', is to use Git's
-# built-in heuristics to determine whether a particular file is text or binary.
-# Text files are automatically normalized to the user's platforms.
-* text=auto
-
-# Explicitly declare text files that should always be normalized and converted
-# to native line endings.
-.gitattributes text
-.gitignore text
-LICENSE text
-*.avsc text
-*.html text
-*.java text
-*.md text
-*.properties text
-*.proto text
-*.py text
-*.sh text
-*.xml text
-*.yml text
-
-# Declare files that will always have CRLF line endings on checkout.
-# *.sln text eol=crlf
-
-# Explicitly denote all files that are truly binary and should not be modified.
-# *.jpg binary
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 2a27023c28..0000000000
--- a/.gitignore
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (C) 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-target/
-
-# Ignore IntelliJ files.
-.idea/
-*.iml
-*.ipr
-*.iws
-
-# Ignore Eclipse files.
-.classpath
-.project
-.settings/
-
-# The build process generates the dependency-reduced POM, but it shouldn't be
-# committed.
-dependency-reduced-pom.xml
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 8fa5d9a932..0000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-language: java
-
-sudo: false
-
-notifications:
- email:
- # Group email notifications are disabled for now, since we cannot do it on a per-branch basis.
- # Right now, it would trigger a notification for each fork, which generates a lot of spam.
- # recipients:
- # - dataflow-sdk-build-notifications+travis@google.com
- on_success: change
- on_failure: always
-
-matrix:
- include:
- # On OSX, run with default JDK only.
- - os: osx
- # On Linux, run with specific JDKs only.
- - os: linux
- env: CUSTOM_JDK="oraclejdk8"
- # The distribution does not build with Java 7 by design. We need to rewrite these tests
- # to, for example, build and install with Java 8 and then test examples with Java 7.
- # - os: linux
- # env: CUSTOM_JDK="oraclejdk7"
- # - os: linux
- # env: CUSTOM_JDK="openjdk7"
-
-before_install:
- - if [ "$TRAVIS_OS_NAME" == "osx" ]; then export JAVA_HOME=$(/usr/libexec/java_home); fi
- - if [ "$TRAVIS_OS_NAME" == "linux" ]; then jdk_switcher use "$CUSTOM_JDK"; fi
-
-install:
- - travis_retry mvn install clean -U -DskipTests=true
-
-script:
- # Verify that the project can be built and installed.
- - mvn install
- # Verify that starter and examples archetypes have the correct version of the NOTICE file.
- - diff -q NOTICE maven-archetypes/starter/src/main/resources/NOTICE
- - diff -q NOTICE maven-archetypes/examples/src/main/resources/NOTICE
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
deleted file mode 100644
index 9b616e5fe3..0000000000
--- a/CONTRIBUTING.md
+++ /dev/null
@@ -1,51 +0,0 @@
-
-
-Want to contribute? Great! First, read this page (including the small print at
-the end).
-
-Google Cloud Dataflow SDK is a distribution of Apache Beam. If you'd like to
-change anything under the `org.apache.beam.*` namespace, please submit that
-change directly to the [Apache Beam](https://github.com/apache/beam) project.
-
-This repository contains code to build the Dataflow distribution of Beam, and
-some Dataflow-specific code. Only changes to how the distribution is built, or
-the Dataflow-specific code under the `com.google.cloud.dataflow.*` namespace,
-can be merged here.
-
-### Before you contribute
-Before we can use your code, you must sign the
-[Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1)
-(CLA), which you can do online. The CLA is necessary mainly because you own the
-copyright to your changes, even after your contribution becomes part of our
-codebase, so we need your permission to use and distribute your code. We also
-need to be sure of various other things. For instance that you'll tell us if you
-know that your code infringes on other people's patents. You don't have to sign
-the CLA until after you've submitted your code for review and a member has
-approved it, but you must do it before we can put your code into our codebase.
-
-Before you start working on a larger contribution, we recommend to get in touch
-with us first through the issue tracker with your idea so that we can help out
-and possibly guide you. Coordinating up front makes it much easier to avoid
-frustration later on.
-
-### Code reviews
-All submissions, including submissions by project members, require review. We
-use GitHub pull requests for this purpose.
-
-### The small print
-Contributions made by corporations are covered by a different agreement than
-the one above, the Software Grant and Corporate Contributor License Agreement.
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index d645695673..0000000000
--- a/LICENSE
+++ /dev/null
@@ -1,202 +0,0 @@
-
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/NOTICE b/NOTICE
deleted file mode 100644
index 981fde5a9e..0000000000
--- a/NOTICE
+++ /dev/null
@@ -1,5 +0,0 @@
-Google Cloud Dataflow SDK for Java
-Copyright 2017, Google Inc.
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
diff --git a/README.md b/README.md
index a644049218..dfb630ad79 100644
--- a/README.md
+++ b/README.md
@@ -16,106 +16,34 @@
# Google Cloud Dataflow SDK for Java
-[Google Cloud Dataflow](https://cloud.google.com/dataflow/) provides a simple,
-powerful programming model for building both batch and streaming parallel data
-processing pipelines.
-
-Dataflow SDK for Java is a distribution of a portion of the
-[Apache Beam](https://beam.apache.org) project. This repository hosts the
-code to build this distribution and any Dataflow-specific code/modules. The
-underlying source code is hosted in the
-[Apache Beam repository](https://github.com/apache/beam).
-
-[General usage](https://cloud.google.com/dataflow/getting-started) of Google
-Cloud Dataflow does **not** require use of this repository. Instead:
-
-1. depend directly on a specific
-[version](https://cloud.google.com/dataflow/release-notes/java) of the SDK in
-the [Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22)
-by adding the following dependency to development
-environments like Eclipse or Apache Maven:
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-all
- version_number
-
-
-1. download the example pipelines from the separate
-[DataflowJavaSDK-examples](https://github.com/GoogleCloudPlatform/DataflowJavaSDK-examples)
-repository.
-
-
-
-## Status [](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK)
-
-This branch is a work-in-progress for the Dataflow SDK for Java, version 2.0.0.
-It is currently supported on the Cloud Dataflow service in Beta.
-
-
-
-## Overview
-
-The key concepts in this programming model are:
-
-* [`PCollection`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java):
-represents a collection of data, which could be bounded or unbounded in size.
-* [`PTransform`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java):
-represents a computation that transforms input PCollections into output
-PCollections.
-* [`Pipeline`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java):
-manages a directed acyclic graph of PTransforms and PCollections that is ready
-for execution.
-* [`PipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java):
-specifies where and how the pipeline should execute.
-
-We provide two runners:
-
- 1. The [`DirectRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java)
-runs the pipeline on your local machine.
- 1. The [`DataflowRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java)
-submits the pipeline to the Dataflow Service, where it runs using managed
-resources in the [Google Cloud Platform](https://cloud.google.com) (GCP).
-
-The SDK is built to be extensible and support additional execution environments
-beyond local execution and the Google Cloud Dataflow Service. Apache Beam
-contains additional SDKs, runners, IO connectors, etc.
+[Google Cloud Dataflow](https://cloud.google.com/dataflow/) is a service for executing [Apache Beam](https://beam.apache.org) pipelines on Google Cloud Platform.
## Getting Started
-This repository consists of the following parts:
-
-* The [`sdk`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk)
-module provides a set of basic Java APIs to program against.
-* The [`examples`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples)
-module provides a few samples to get started. We recommend starting with the
-`WordCount` example.
+* [Quickstart Using Java](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-java-maven) on Google Cloud Dataflow
+* [Java API Reference](https://beam.apache.org/documentation/sdks/javadoc/)
+* [Java Examples](https://github.com/apache/beam/tree/master/examples/java)
-The following command will build both the `sdk` and `example` modules and
-install them in your local Maven repository:
+## We moved to Apache Beam!
+Apache Beam Java SDK and the code development moved to the [Apache Beam repo](https://github.com/apache/beam/tree/master/sdks/java).
- mvn clean install
-
-After building and installing, you can execute the `WordCount` and other
-example pipelines by following the instructions in this
-[README](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/README.md).
+If you want to contribute to the project (please do!) use this [Apache Beam contributor's guide](http://beam.apache.org/contribution-guide/)
## Contact Us
-We welcome all usage-related questions on [Stack Overflow](http://stackoverflow.com/questions/tagged/google-cloud-dataflow)
+We welcome all usage-related questions on
+[Stack Overflow](https://stackoverflow.com/questions/tagged/google-cloud-dataflow)
tagged with `google-cloud-dataflow`.
-Please use [issue tracker](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/issues)
-on GitHub to report any bugs, comments or questions regarding SDK development.
+Please use the
+[issue tracker](https://issues.apache.org/jira/browse/BEAM)
+on Apache JIRA to report any bugs, comments or questions regarding SDK development.
## More Information
* [Google Cloud Dataflow](https://cloud.google.com/dataflow/)
* [Apache Beam](https://beam.apache.org/)
-* [Dataflow Concepts and Programming Model](https://cloud.google.com/dataflow/model/programming-model)
-* [Java API Reference](https://cloud.google.com/dataflow/java-sdk/JavaDoc/index)
+* [Dataflow Concepts and Programming Model](https://beam.apache.org/documentation/programming-guide/)
+* [Java API Reference](https://beam.apache.org/documentation/sdks/javadoc/)
+
+_Apache, Apache Beam and the orange letter B logo are either registered trademarks or trademarks of the Apache Software Foundation in the United States and/or other countries._
diff --git a/examples/pom.xml b/examples/pom.xml
deleted file mode 100644
index 4c8584a91c..0000000000
--- a/examples/pom.xml
+++ /dev/null
@@ -1,51 +0,0 @@
-
-
-
- 4.0.0
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-parent
- 2.0.0-beta1-SNAPSHOT
-
-
- google-cloud-dataflow-java-examples-all
- Google Cloud Dataflow Java Examples - All
- Google Cloud Dataflow SDK for Java is a distribution of Apache
- Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow
- service. This artifact includes all Dataflow Java SDK
- examples.
-
- jar
-
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-all
-
-
-
- org.apache.beam
- beam-examples-java
-
-
-
- org.apache.beam
- beam-examples-java8
-
-
-
diff --git a/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java b/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java
deleted file mode 100644
index 827aff8395..0000000000
--- a/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2017 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk;
-
-import org.apache.beam.examples.MinimalWordCountJava8;
-import org.apache.beam.examples.WordCount;
-
-/**
- * Mark the examples dependencies as used at compile time. This is also needed
- * to produce some content in the final JAR file.
- */
-class ExamplesDependencies {
- SdkDependencies sdkDependencies;
- WordCount wordCount;
- MinimalWordCountJava8 minimalWordCount;
-}
diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml
deleted file mode 100644
index fa781ac432..0000000000
--- a/maven-archetypes/examples/pom.xml
+++ /dev/null
@@ -1,45 +0,0 @@
-
-
-
- 4.0.0
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-parent
- 2.0.0-beta1-SNAPSHOT
- ../../pom.xml
-
-
- google-cloud-dataflow-java-archetypes-examples
- Google Cloud Dataflow SDK for Java - Examples Archetype
- Google Cloud Dataflow SDK for Java is a distribution of Apache
- Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow
- service. This archetype creates a project containing all the example
- pipelines.
-
- maven-archetype
-
-
-
-
- org.apache.maven.archetype
- archetype-packaging
- 2.4
-
-
-
-
diff --git a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
deleted file mode 100644
index 2b9eb52d80..0000000000
--- a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ /dev/null
@@ -1,44 +0,0 @@
-
-
-
-
-
-
- 1.7
-
-
-
-
-
- src/main/java
-
- **/*.java
-
-
-
-
- src/test/java
-
- **/*.java
-
-
-
-
diff --git a/maven-archetypes/examples/src/main/resources/NOTICE b/maven-archetypes/examples/src/main/resources/NOTICE
deleted file mode 100644
index 981fde5a9e..0000000000
--- a/maven-archetypes/examples/src/main/resources/NOTICE
+++ /dev/null
@@ -1,5 +0,0 @@
-Google Cloud Dataflow SDK for Java
-Copyright 2017, Google Inc.
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
deleted file mode 100644
index 163212d61a..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
+++ /dev/null
@@ -1,229 +0,0 @@
-
-
-
- 4.0.0
-
- ${groupId}
- ${artifactId}
- ${version}
-
-
- UTF-8
-
-
-
-
- ossrh.snapshots
- Sonatype OSS Repository Hosting
- https://oss.sonatype.org/content/repositories/snapshots/
-
- false
-
-
- true
-
-
-
-
- jar
-
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- 3.3
-
- ${targetPlatform}
- ${targetPlatform}
-
-
-
-
- org.apache.maven.plugins
- maven-shade-plugin
- 2.3
-
-
- package
-
- shade
-
-
- ${project.artifactId}-bundled-${project.version}
-
-
- *:*
-
-
-
-
- *:*
-
- META-INF/*.SF
- META-INF/*.DSA
- META-INF/*.RSA
-
-
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-surefire-plugin
- 2.18.1
-
- all
- 4
- true
-
-
-
- org.apache.maven.surefire
- surefire-junit47
- 2.18.1
-
-
-
-
-
-
-
-
- org.codehaus.mojo
- exec-maven-plugin
- 1.4.0
-
- false
-
-
-
-
-
-
-
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-all
- 2.0.0-beta1-SNAPSHOT
-
-
-
- com.google.api-client
- google-api-client
- 1.22.0
-
-
-
- com.google.guava
- guava-jdk5
-
-
-
-
-
-
- com.google.apis
- google-api-services-bigquery
- v2-rev295-1.22.0
-
-
-
- com.google.guava
- guava-jdk5
-
-
-
-
-
- com.google.http-client
- google-http-client
- 1.22.0
-
-
-
- com.google.guava
- guava-jdk5
-
-
-
-
-
- com.google.apis
- google-api-services-pubsub
- v1-rev10-1.22.0
-
-
-
- com.google.guava
- guava-jdk5
-
-
-
-
-
- joda-time
- joda-time
- 2.4
-
-
-
- com.google.guava
- guava
- 19.0
-
-
-
-
- org.slf4j
- slf4j-api
- 1.7.7
-
-
-
- org.slf4j
- slf4j-jdk14
- 1.7.14
-
- runtime
-
-
-
-
- org.hamcrest
- hamcrest-all
- 1.3
-
-
-
- junit
- junit
- 4.11
-
-
-
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
deleted file mode 100644
index 99ae79687c..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package};
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.regex.Pattern;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.testing.PAssert;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.Sum;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-
-/**
- * An example that verifies word counts in Shakespeare and includes Beam best practices.
- *
- *
This class, {@link DebuggingWordCount}, is the third in a series of four successively more
- * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount}
- * and {@link WordCount}. After you've looked at this example, then see the
- * {@link WindowedWordCount} pipeline, for introduction of additional concepts.
- *
- *
Basic concepts, also in the MinimalWordCount and WordCount examples:
- * Reading text files; counting a PCollection; executing a Pipeline both locally
- * and using a selected runner; defining DoFns.
- *
- *
New Concepts:
- *
- * 1. Logging using SLF4J, even in a distributed environment
- * 2. Creating a custom aggregator (runners have varying levels of support)
- * 3. Testing your Pipeline via PAssert
- *
- *
- *
To execute this pipeline locally, specify general pipeline configuration:
- *
The input file defaults to a public data set containing the text of of King Lear,
- * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}.
- *
- */
-public class DebuggingWordCount {
- /** A DoFn that filters for a specific key based upon a regular expression. */
- public static class FilterTextFn extends DoFn, KV> {
- /**
- * Concept #1: The logger below uses the fully qualified class name of FilterTextFn as the
- * logger. Depending on your SLF4J configuration, log statements will likely be qualified by
- * this name.
- *
- *
Note that this is entirely standard SLF4J usage. Some runners may provide a default SLF4J
- * configuration that is most appropriate for their logging integration.
- */
- private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class);
-
- private final Pattern filter;
- public FilterTextFn(String pattern) {
- filter = Pattern.compile(pattern);
- }
-
- /**
- * Concept #2: A custom aggregator can track values in your pipeline as it runs. Each
- * runner provides varying levels of support for aggregators, and may expose them
- * in a dashboard, etc.
- */
- private final Aggregator matchedWords =
- createAggregator("matchedWords", new Sum.SumLongFn());
- private final Aggregator unmatchedWords =
- createAggregator("umatchedWords", new Sum.SumLongFn());
-
- @ProcessElement
- public void processElement(ProcessContext c) {
- if (filter.matcher(c.element().getKey()).matches()) {
- // Log at the "DEBUG" level each element that we match. When executing this pipeline
- // these log lines will appear only if the log level is set to "DEBUG" or lower.
- LOG.debug("Matched: " + c.element().getKey());
- matchedWords.addValue(1L);
- c.output(c.element());
- } else {
- // Log at the "TRACE" level each element that is not matched. Different log levels
- // can be used to control the verbosity of logging providing an effective mechanism
- // to filter less important information.
- LOG.trace("Did not match: " + c.element().getKey());
- unmatchedWords.addValue(1L);
- }
- }
- }
-
- /**
- * Options supported by {@link DebuggingWordCount}.
- *
- *
Inherits standard configuration options and all options defined in
- * {@link WordCount.WordCountOptions}.
- */
- public interface WordCountOptions extends WordCount.WordCountOptions {
-
- @Description("Regex filter pattern to use in DebuggingWordCount. "
- + "Only words matching this pattern will be counted.")
- @Default.String("Flourish|stomach")
- String getFilterPattern();
- void setFilterPattern(String value);
- }
-
- public static void main(String[] args) {
- WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
- .as(WordCountOptions.class);
- Pipeline p = Pipeline.create(options);
-
- PCollection> filteredWords =
- p.apply("ReadLines", TextIO.Read.from(options.getInputFile()))
- .apply(new WordCount.CountWords())
- .apply(ParDo.of(new FilterTextFn(options.getFilterPattern())));
-
- /**
- * Concept #3: PAssert is a set of convenient PTransforms in the style of
- * Hamcrest's collection matchers that can be used when writing Pipeline level tests
- * to validate the contents of PCollections. PAssert is best used in unit tests
- * with small data sets but is demonstrated here as a teaching tool.
- *
- *
Below we verify that the set of filtered words matches our expected counts. Note
- * that PAssert does not provide any output and that successful completion of the
- * Pipeline implies that the expectations were met. Learn more at
- * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline on how to test
- * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test.
- */
- List> expectedResults = Arrays.asList(
- KV.of("Flourish", 3L),
- KV.of("stomach", 1L));
- PAssert.that(filteredWords).containsInAnyOrder(expectedResults);
-
- p.run().waitUntilFinish();
- }
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
deleted file mode 100644
index 97bd8243b8..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package};
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.SimpleFunction;
-import org.apache.beam.sdk.values.KV;
-
-
-/**
- * An example that counts words in Shakespeare.
- *
- *
This class, {@link MinimalWordCount}, is the first in a series of four successively more
- * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or
- * argument processing, and focus on construction of the pipeline, which chains together the
- * application of core transforms.
- *
- *
Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally the
- * {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional
- * concepts.
- *
- *
Concepts:
- *
- *
- * 1. Reading data from text files
- * 2. Specifying 'inline' transforms
- * 3. Counting items in a PCollection
- * 4. Writing data to text files
- *
- *
- *
No arguments are required to run this pipeline. It will be executed with the DirectRunner. You
- * can see the results in the output files in your current working directory, with names like
- * "wordcounts-00001-of-00005. When running on a distributed service, you would use an appropriate
- * file service.
- */
-public class MinimalWordCount {
-
- public static void main(String[] args) {
- // Create a PipelineOptions object. This object lets us set various execution
- // options for our pipeline, such as the runner you wish to use. This example
- // will run with the DirectRunner by default, based on the class path configured
- // in its dependencies.
- PipelineOptions options = PipelineOptionsFactory.create();
-
- // Create the Pipeline object with the options we defined above.
- Pipeline p = Pipeline.create(options);
-
- // Apply the pipeline's transforms.
-
- // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
- // of input text files. TextIO.Read returns a PCollection where each element is one line from
- // the input text (a set of Shakespeare's texts).
-
- // This example reads a public data set consisting of the complete works of Shakespeare.
- p.apply(TextIO.Read.from("gs://apache-beam-samples/shakespeare/*"))
-
- // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a
- // DoFn (defined in-line) on each element that tokenizes the text line into individual words.
- // The ParDo returns a PCollection, where each element is an individual word in
- // Shakespeare's collected texts.
- .apply("ExtractWords", ParDo.of(new DoFn() {
- @ProcessElement
- public void processElement(ProcessContext c) {
- for (String word : c.element().split("[^a-zA-Z']+")) {
- if (!word.isEmpty()) {
- c.output(word);
- }
- }
- }
- }))
-
- // Concept #3: Apply the Count transform to our PCollection of individual words. The Count
- // transform returns a new PCollection of key/value pairs, where each key represents a unique
- // word in the text. The associated value is the occurrence count for that word.
- .apply(Count.perElement())
-
- // Apply a MapElements transform that formats our PCollection of word counts into a printable
- // string, suitable for writing to an output file.
- .apply("FormatResults", MapElements.via(new SimpleFunction, String>() {
- @Override
- public String apply(KV input) {
- return input.getKey() + ": " + input.getValue();
- }
- }))
-
- // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline.
- // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of
- // formatted strings) to a series of text files.
- //
- // By default, it will write to a set of files with names like wordcount-00001-of-00005
- .apply(TextIO.Write.to("wordcounts"));
-
- // Run the pipeline.
- p.run().waitUntilFinish();
- }
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
deleted file mode 100644
index 052d7b6a0e..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package};
-
-import java.io.IOException;
-import java.util.concurrent.ThreadLocalRandom;
-import ${package}.common.ExampleBigQueryTableOptions;
-import ${package}.common.ExampleOptions;
-import ${package}.common.WriteWindowedFilesDoFn;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.PipelineResult;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.DefaultValueFactory;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.GroupByKey;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.FixedWindows;
-import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-
-/**
- * An example that counts words in text, and can run over either unbounded or bounded input
- * collections.
- *
- *
This class, {@link WindowedWordCount}, is the last in a series of four successively more
- * detailed 'word count' examples. First take a look at {@link MinimalWordCount},
- * {@link WordCount}, and {@link DebuggingWordCount}.
- *
- *
Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples:
- * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally
- * and using a selected runner; defining DoFns; creating a custom aggregator;
- * user-defined PTransforms; defining PipelineOptions.
- *
- *
New Concepts:
- *
- * 1. Unbounded and bounded pipeline input modes
- * 2. Adding timestamps to data
- * 3. Windowing
- * 4. Re-using PTransforms over windowed PCollections
- * 5. Accessing the window of an element
- * 6. Writing data to per-window text files
- *
- *
- *
By default, the examples will run with the {@code DirectRunner}.
- * To change the runner, specify:
- *
- * See examples/java/README.md for instructions about how to configure different runners.
- *
- *
To execute this pipeline locally, specify a local output file (if using the
- * {@code DirectRunner}) or output prefix on a supported distributed file system.
- *
The input file defaults to a public data set containing the text of of King Lear,
- * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}.
- *
- *
By default, the pipeline will do fixed windowing, on 1-minute windows. You can
- * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10}
- * for 10-minute windows.
- *
- *
The example will try to cancel the pipeline on the signal to terminate the process (CTRL-C).
- */
-public class WindowedWordCount {
- static final int WINDOW_SIZE = 10; // Default window duration in minutes
- /**
- * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for
- * this example, for the bounded data case.
- *
- *
Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate
- * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a
- * 2-hour period.
- */
- static class AddTimestampFn extends DoFn {
- private static final Duration RAND_RANGE = Duration.standardHours(1);
- private final Instant minTimestamp;
- private final Instant maxTimestamp;
-
- AddTimestampFn(Instant minTimestamp, Instant maxTimestamp) {
- this.minTimestamp = minTimestamp;
- this.maxTimestamp = maxTimestamp;
- }
-
- @ProcessElement
- public void processElement(ProcessContext c) {
- Instant randomTimestamp =
- new Instant(
- ThreadLocalRandom.current()
- .nextLong(minTimestamp.getMillis(), maxTimestamp.getMillis()));
-
- /**
- * Concept #2: Set the data element with that timestamp.
- */
- c.outputWithTimestamp(c.element(), new Instant(randomTimestamp));
- }
- }
-
- /** A {@link DefaultValueFactory} that returns the current system time. */
- public static class DefaultToCurrentSystemTime implements DefaultValueFactory {
- @Override
- public Long create(PipelineOptions options) {
- return System.currentTimeMillis();
- }
- }
-
- /** A {@link DefaultValueFactory} that returns the minimum timestamp plus one hour. */
- public static class DefaultToMinTimestampPlusOneHour implements DefaultValueFactory {
- @Override
- public Long create(PipelineOptions options) {
- return options.as(Options.class).getMinTimestampMillis()
- + Duration.standardHours(1).getMillis();
- }
- }
-
- /**
- * Options for {@link WindowedWordCount}.
- *
- *
Inherits standard example configuration options, which allow specification of the
- * runner, as well as the {@link WordCount.WordCountOptions} support for
- * specification of the input and output files.
- */
- public interface Options extends WordCount.WordCountOptions,
- ExampleOptions, ExampleBigQueryTableOptions {
- @Description("Fixed window duration, in minutes")
- @Default.Integer(WINDOW_SIZE)
- Integer getWindowSize();
- void setWindowSize(Integer value);
-
- @Description("Minimum randomly assigned timestamp, in milliseconds-since-epoch")
- @Default.InstanceFactory(DefaultToCurrentSystemTime.class)
- Long getMinTimestampMillis();
- void setMinTimestampMillis(Long value);
-
- @Description("Maximum randomly assigned timestamp, in milliseconds-since-epoch")
- @Default.InstanceFactory(DefaultToMinTimestampPlusOneHour.class)
- Long getMaxTimestampMillis();
- void setMaxTimestampMillis(Long value);
- }
-
- public static void main(String[] args) throws IOException {
- Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
- final String output = options.getOutput();
- final Duration windowSize = Duration.standardMinutes(options.getWindowSize());
- final Instant minTimestamp = new Instant(options.getMinTimestampMillis());
- final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis());
-
- Pipeline pipeline = Pipeline.create(options);
-
- /**
- * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or
- * unbounded input source.
- */
- PCollection input = pipeline
- /** Read from the GCS file. */
- .apply(TextIO.Read.from(options.getInputFile()))
- // Concept #2: Add an element timestamp, using an artificial time just to show windowing.
- // See AddTimestampFn for more detail on this.
- .apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp)));
-
- /**
- * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1
- * minute (you can change this with a command-line option). See the documentation for more
- * information on how fixed windows work, and for information on the other types of windowing
- * available (e.g., sliding windows).
- */
- PCollection windowedWords =
- input.apply(
- Window.into(
- FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));
-
- /**
- * Concept #4: Re-use our existing CountWords transform that does not have knowledge of
- * windows over a PCollection containing windowed values.
- */
- PCollection> wordCounts = windowedWords.apply(new WordCount.CountWords());
-
- /**
- * Concept #5: Customize the output format using windowing information
- *
- *
At this point, the data is organized by window. We're writing text files and and have no
- * late data, so for simplicity we can use the window as the key and {@link GroupByKey} to get
- * one output file per window. (if we had late data this key would not be unique)
- *
- *
To access the window in a {@link DoFn}, add a {@link BoundedWindow} parameter. This will
- * be automatically detected and populated with the window for the current element.
- */
- PCollection>> keyedByWindow =
- wordCounts.apply(
- ParDo.of(
- new DoFn, KV>>() {
- @ProcessElement
- public void processElement(ProcessContext context, IntervalWindow window) {
- context.output(KV.of(window, context.element()));
- }
- }));
-
- /**
- * Concept #6: Format the results and write to a sharded file partitioned by window, using a
- * simple ParDo operation. Because there may be failures followed by retries, the
- * writes must be idempotent, but the details of writing to files is elided here.
- */
- keyedByWindow
- .apply(GroupByKey.>create())
- .apply(ParDo.of(new WriteWindowedFilesDoFn(output)));
-
- PipelineResult result = pipeline.run();
- try {
- result.waitUntilFinish();
- } catch (Exception exc) {
- result.cancel();
- }
- }
-
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
deleted file mode 100644
index 634dea1a0a..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package};
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.options.Validation.Required;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.SimpleFunction;
-import org.apache.beam.sdk.transforms.Sum;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-
-/**
- * An example that counts words in Shakespeare and includes Beam best practices.
- *
- *
This class, {@link WordCount}, is the second in a series of four successively more detailed
- * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}.
- * After you've looked at this example, then see the {@link DebuggingWordCount}
- * pipeline, for introduction of additional concepts.
- *
- *
Basic concepts, also in the MinimalWordCount example:
- * Reading text files; counting a PCollection; writing to text files
- *
- *
New Concepts:
- *
- * 1. Executing a Pipeline both locally and using the selected runner
- * 2. Using ParDo with static DoFns defined out-of-line
- * 3. Building a composite transform
- * 4. Defining your own pipeline options
- *
- *
- *
Concept #1: you can execute this pipeline either locally or using by selecting another runner.
- * These are now command-line options and not hard-coded as they were in the MinimalWordCount
- * example.
- *
- *
To execute this pipeline, specify a local output file (if using the
- * {@code DirectRunner}) or output prefix on a supported distributed file system.
- *
The input file defaults to a public data set containing the text of of King Lear,
- * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}.
- */
-public class WordCount {
-
- /**
- * Concept #2: You can make your pipeline assembly code less verbose by defining your DoFns
- * statically out-of-line. This DoFn tokenizes lines of text into individual words; we pass it
- * to a ParDo in the pipeline.
- */
- static class ExtractWordsFn extends DoFn {
- private final Aggregator emptyLines =
- createAggregator("emptyLines", new Sum.SumLongFn());
-
- @ProcessElement
- public void processElement(ProcessContext c) {
- if (c.element().trim().isEmpty()) {
- emptyLines.addValue(1L);
- }
-
- // Split the line into words.
- String[] words = c.element().split("[^a-zA-Z']+");
-
- // Output each word encountered into the output PCollection.
- for (String word : words) {
- if (!word.isEmpty()) {
- c.output(word);
- }
- }
- }
- }
-
- /** A SimpleFunction that converts a Word and Count into a printable string. */
- public static class FormatAsTextFn extends SimpleFunction, String> {
- @Override
- public String apply(KV input) {
- return input.getKey() + ": " + input.getValue();
- }
- }
-
- /**
- * A PTransform that converts a PCollection containing lines of text into a PCollection of
- * formatted word counts.
- *
- *
Concept #3: This is a custom composite transform that bundles two transforms (ParDo and
- * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse,
- * modular testing, and an improved monitoring experience.
- */
- public static class CountWords extends PTransform,
- PCollection>> {
- @Override
- public PCollection> expand(PCollection lines) {
-
- // Convert lines of text into individual words.
- PCollection words = lines.apply(
- ParDo.of(new ExtractWordsFn()));
-
- // Count the number of times each word occurs.
- PCollection> wordCounts =
- words.apply(Count.perElement());
-
- return wordCounts;
- }
- }
-
- /**
- * Options supported by {@link WordCount}.
- *
- *
Concept #4: Defining your own configuration options. Here, you can add your own arguments
- * to be processed by the command-line parser, and specify default values for them. You can then
- * access the options values in your pipeline code.
- *
- *
Inherits standard configuration options.
- */
- public interface WordCountOptions extends PipelineOptions {
-
- /**
- * By default, this example reads from a public dataset containing the text of
- * King Lear. Set this option to choose a different input file or glob.
- */
- @Description("Path of the file to read from")
- @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt")
- String getInputFile();
- void setInputFile(String value);
-
- /**
- * Set this required option to specify where to write the output.
- */
- @Description("Path of the file to write to")
- @Required
- String getOutput();
- void setOutput(String value);
- }
-
- public static void main(String[] args) {
- WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
- .as(WordCountOptions.class);
- Pipeline p = Pipeline.create(options);
-
- // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
- // static FormatAsTextFn() to the ParDo transform.
- p.apply("ReadLines", TextIO.Read.from(options.getInputFile()))
- .apply(new CountWords())
- .apply(MapElements.via(new FormatAsTextFn()))
- .apply("WriteCounts", TextIO.Write.to(options.getOutput()));
-
- p.run().waitUntilFinish();
- }
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
deleted file mode 100644
index 6b51074f44..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package}.common;
-
-import com.google.api.services.bigquery.model.TableSchema;
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.DefaultValueFactory;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.GcpOptions;
-import org.apache.beam.sdk.options.PipelineOptions;
-
-/**
- * Options that can be used to configure BigQuery tables in Beam examples.
- * The project defaults to the project being used to run the example.
- */
-public interface ExampleBigQueryTableOptions extends GcpOptions {
- @Description("BigQuery dataset name")
- @Default.String("beam_examples")
- String getBigQueryDataset();
- void setBigQueryDataset(String dataset);
-
- @Description("BigQuery table name")
- @Default.InstanceFactory(BigQueryTableFactory.class)
- String getBigQueryTable();
- void setBigQueryTable(String table);
-
- @Description("BigQuery table schema")
- TableSchema getBigQuerySchema();
- void setBigQuerySchema(TableSchema schema);
-
- /**
- * Returns the job name as the default BigQuery table name.
- */
- class BigQueryTableFactory implements DefaultValueFactory {
- @Override
- public String create(PipelineOptions options) {
- return options.getJobName().replace('-', '_');
- }
- }
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java
deleted file mode 100644
index 90f935c3ce..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package}.common;
-
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptions;
-
-/**
- * Options that can be used to configure the Beam examples.
- */
-public interface ExampleOptions extends PipelineOptions {
- @Description("Whether to keep jobs running after local process exit")
- @Default.Boolean(false)
- boolean getKeepJobsRunning();
- void setKeepJobsRunning(boolean keepJobsRunning);
-
- @Description("Number of workers to use when executing the injector pipeline")
- @Default.Integer(1)
- int getInjectorNumWorkers();
- void setInjectorNumWorkers(int numWorkers);
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java
deleted file mode 100644
index daeb398f7f..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package}.common;
-
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.DefaultValueFactory;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.GcpOptions;
-import org.apache.beam.sdk.options.PipelineOptions;
-
-/**
- * Options that can be used to configure Pub/Sub topic/subscription in Beam examples.
- */
-public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions {
- @Description("Pub/Sub subscription")
- @Default.InstanceFactory(PubsubSubscriptionFactory.class)
- String getPubsubSubscription();
- void setPubsubSubscription(String subscription);
-
- /**
- * Returns a default Pub/Sub subscription based on the project and the job names.
- */
- class PubsubSubscriptionFactory implements DefaultValueFactory {
- @Override
- public String create(PipelineOptions options) {
- return "projects/" + options.as(GcpOptions.class).getProject()
- + "/subscriptions/" + options.getJobName();
- }
- }
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
deleted file mode 100644
index 936bff5675..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package}.common;
-
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.DefaultValueFactory;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.GcpOptions;
-import org.apache.beam.sdk.options.PipelineOptions;
-
-/**
- * Options that can be used to configure Pub/Sub topic in Beam examples.
- */
-public interface ExamplePubsubTopicOptions extends GcpOptions {
- @Description("Pub/Sub topic")
- @Default.InstanceFactory(PubsubTopicFactory.class)
- String getPubsubTopic();
- void setPubsubTopic(String topic);
-
- /**
- * Returns a default Pub/Sub topic based on the project and the job names.
- */
- class PubsubTopicFactory implements DefaultValueFactory {
- @Override
- public String create(PipelineOptions options) {
- return "projects/" + options.as(GcpOptions.class).getProject()
- + "/topics/" + options.getJobName();
- }
- }
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java
deleted file mode 100644
index 570b3827b7..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package}.common;
-
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.BackOffUtils;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.Bigquery.Datasets;
-import com.google.api.services.bigquery.Bigquery.Tables;
-import com.google.api.services.bigquery.model.Dataset;
-import com.google.api.services.bigquery.model.DatasetReference;
-import com.google.api.services.bigquery.model.Table;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.Subscription;
-import com.google.api.services.pubsub.model.Topic;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-import com.google.common.util.concurrent.Uninterruptibles;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import org.apache.beam.sdk.PipelineResult;
-import org.apache.beam.sdk.options.BigQueryOptions;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PubsubOptions;
-import org.apache.beam.sdk.util.FluentBackoff;
-import org.apache.beam.sdk.util.Transport;
-import org.joda.time.Duration;
-
-/**
- * The utility class that sets up and tears down external resources,
- * and cancels the streaming pipelines once the program terminates.
- *
- *
It is used to run Beam examples.
- */
-public class ExampleUtils {
-
- private static final int SC_NOT_FOUND = 404;
-
- private final PipelineOptions options;
- private Bigquery bigQueryClient = null;
- private Pubsub pubsubClient = null;
- private Set pipelinesToCancel = Sets.newHashSet();
- private List pendingMessages = Lists.newArrayList();
-
- /**
- * Do resources and runner options setup.
- */
- public ExampleUtils(PipelineOptions options) {
- this.options = options;
- }
-
- /**
- * Sets up external resources that are required by the example,
- * such as Pub/Sub topics and BigQuery tables.
- *
- * @throws IOException if there is a problem setting up the resources
- */
- public void setup() throws IOException {
- Sleeper sleeper = Sleeper.DEFAULT;
- BackOff backOff =
- FluentBackoff.DEFAULT
- .withMaxRetries(3).withInitialBackoff(Duration.millis(200)).backoff();
- Throwable lastException = null;
- try {
- do {
- try {
- setupPubsub();
- setupBigQueryTable();
- return;
- } catch (GoogleJsonResponseException e) {
- lastException = e;
- }
- } while (BackOffUtils.next(sleeper, backOff));
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- // Ignore InterruptedException
- }
- throw new RuntimeException(lastException);
- }
-
- /**
- * Sets up the Google Cloud Pub/Sub topic.
- *
- *
If the topic doesn't exist, a new topic with the given name will be created.
- *
- * @throws IOException if there is a problem setting up the Pub/Sub topic
- */
- public void setupPubsub() throws IOException {
- ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
- options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
- if (!pubsubOptions.getPubsubTopic().isEmpty()) {
- pendingMessages.add("**********************Set Up Pubsub************************");
- setupPubsubTopic(pubsubOptions.getPubsubTopic());
- pendingMessages.add("The Pub/Sub topic has been set up for this example: "
- + pubsubOptions.getPubsubTopic());
-
- if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
- setupPubsubSubscription(
- pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription());
- pendingMessages.add("The Pub/Sub subscription has been set up for this example: "
- + pubsubOptions.getPubsubSubscription());
- }
- }
- }
-
- /**
- * Sets up the BigQuery table with the given schema.
- *
- *
If the table already exists, the schema has to match the given one. Otherwise, the example
- * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
- * will be created.
- *
- * @throws IOException if there is a problem setting up the BigQuery table
- */
- public void setupBigQueryTable() throws IOException {
- ExampleBigQueryTableOptions bigQueryTableOptions =
- options.as(ExampleBigQueryTableOptions.class);
- if (bigQueryTableOptions.getBigQueryDataset() != null
- && bigQueryTableOptions.getBigQueryTable() != null
- && bigQueryTableOptions.getBigQuerySchema() != null) {
- pendingMessages.add("******************Set Up Big Query Table*******************");
- setupBigQueryTable(bigQueryTableOptions.getProject(),
- bigQueryTableOptions.getBigQueryDataset(),
- bigQueryTableOptions.getBigQueryTable(),
- bigQueryTableOptions.getBigQuerySchema());
- pendingMessages.add("The BigQuery table has been set up for this example: "
- + bigQueryTableOptions.getProject()
- + ":" + bigQueryTableOptions.getBigQueryDataset()
- + "." + bigQueryTableOptions.getBigQueryTable());
- }
- }
-
- /**
- * Tears down external resources that can be deleted upon the example's completion.
- */
- private void tearDown() {
- pendingMessages.add("*************************Tear Down*************************");
- ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
- options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
- if (!pubsubOptions.getPubsubTopic().isEmpty()) {
- try {
- deletePubsubTopic(pubsubOptions.getPubsubTopic());
- pendingMessages.add("The Pub/Sub topic has been deleted: "
- + pubsubOptions.getPubsubTopic());
- } catch (IOException e) {
- pendingMessages.add("Failed to delete the Pub/Sub topic : "
- + pubsubOptions.getPubsubTopic());
- }
- if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
- try {
- deletePubsubSubscription(pubsubOptions.getPubsubSubscription());
- pendingMessages.add("The Pub/Sub subscription has been deleted: "
- + pubsubOptions.getPubsubSubscription());
- } catch (IOException e) {
- pendingMessages.add("Failed to delete the Pub/Sub subscription : "
- + pubsubOptions.getPubsubSubscription());
- }
- }
- }
-
- ExampleBigQueryTableOptions bigQueryTableOptions =
- options.as(ExampleBigQueryTableOptions.class);
- if (bigQueryTableOptions.getBigQueryDataset() != null
- && bigQueryTableOptions.getBigQueryTable() != null
- && bigQueryTableOptions.getBigQuerySchema() != null) {
- pendingMessages.add("The BigQuery table might contain the example's output, "
- + "and it is not deleted automatically: "
- + bigQueryTableOptions.getProject()
- + ":" + bigQueryTableOptions.getBigQueryDataset()
- + "." + bigQueryTableOptions.getBigQueryTable());
- pendingMessages.add("Please go to the Developers Console to delete it manually."
- + " Otherwise, you may be charged for its usage.");
- }
- }
-
- private void setupBigQueryTable(String projectId, String datasetId, String tableId,
- TableSchema schema) throws IOException {
- if (bigQueryClient == null) {
- bigQueryClient = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build();
- }
-
- Datasets datasetService = bigQueryClient.datasets();
- if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
- Dataset newDataset = new Dataset().setDatasetReference(
- new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
- datasetService.insert(projectId, newDataset).execute();
- }
-
- Tables tableService = bigQueryClient.tables();
- Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
- if (table == null) {
- Table newTable = new Table().setSchema(schema).setTableReference(
- new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId));
- tableService.insert(projectId, datasetId, newTable).execute();
- } else if (!table.getSchema().equals(schema)) {
- throw new RuntimeException(
- "Table exists and schemas do not match, expecting: " + schema.toPrettyString()
- + ", actual: " + table.getSchema().toPrettyString());
- }
- }
-
- private void setupPubsubTopic(String topic) throws IOException {
- if (pubsubClient == null) {
- pubsubClient = Transport.newPubsubClient(options.as(PubsubOptions.class)).build();
- }
- if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) {
- pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute();
- }
- }
-
- private void setupPubsubSubscription(String topic, String subscription) throws IOException {
- if (pubsubClient == null) {
- pubsubClient = Transport.newPubsubClient(options.as(PubsubOptions.class)).build();
- }
- if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) {
- Subscription subInfo = new Subscription()
- .setAckDeadlineSeconds(60)
- .setTopic(topic);
- pubsubClient.projects().subscriptions().create(subscription, subInfo).execute();
- }
- }
-
- /**
- * Deletes the Google Cloud Pub/Sub topic.
- *
- * @throws IOException if there is a problem deleting the Pub/Sub topic
- */
- private void deletePubsubTopic(String topic) throws IOException {
- if (pubsubClient == null) {
- pubsubClient = Transport.newPubsubClient(options.as(PubsubOptions.class)).build();
- }
- if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) {
- pubsubClient.projects().topics().delete(topic).execute();
- }
- }
-
- /**
- * Deletes the Google Cloud Pub/Sub subscription.
- *
- * @throws IOException if there is a problem deleting the Pub/Sub subscription
- */
- private void deletePubsubSubscription(String subscription) throws IOException {
- if (pubsubClient == null) {
- pubsubClient = Transport.newPubsubClient(options.as(PubsubOptions.class)).build();
- }
- if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) {
- pubsubClient.projects().subscriptions().delete(subscription).execute();
- }
- }
-
- /**
- * Waits for the pipeline to finish and cancels it before the program exists.
- */
- public void waitToFinish(PipelineResult result) {
- pipelinesToCancel.add(result);
- if (!options.as(ExampleOptions.class).getKeepJobsRunning()) {
- addShutdownHook(pipelinesToCancel);
- }
- try {
- result.waitUntilFinish();
- } catch (UnsupportedOperationException e) {
- // Do nothing if the given PipelineResult doesn't support waitUntilFinish(),
- // such as EvaluationResults returned by DirectRunner.
- tearDown();
- printPendingMessages();
- } catch (Exception e) {
- throw new RuntimeException("Failed to wait the pipeline until finish: " + result);
- }
- }
-
- private void addShutdownHook(final Collection pipelineResults) {
- Runtime.getRuntime().addShutdownHook(new Thread() {
- @Override
- public void run() {
- tearDown();
- printPendingMessages();
- for (PipelineResult pipelineResult : pipelineResults) {
- try {
- pipelineResult.cancel();
- } catch (IOException e) {
- System.out.println("Failed to cancel the job.");
- System.out.println(e.getMessage());
- }
- }
-
- for (PipelineResult pipelineResult : pipelineResults) {
- boolean cancellationVerified = false;
- for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) {
- if (pipelineResult.getState().isTerminal()) {
- cancellationVerified = true;
- break;
- } else {
- System.out.println(
- "The example pipeline is still running. Verifying the cancellation.");
- }
- Uninterruptibles.sleepUninterruptibly(10, TimeUnit.SECONDS);
- }
- if (!cancellationVerified) {
- System.out.println("Failed to verify the cancellation for job: " + pipelineResult);
- }
- }
- }
- });
- }
-
- private void printPendingMessages() {
- System.out.println();
- System.out.println("***********************************************************");
- System.out.println("***********************************************************");
- for (String message : pendingMessages) {
- System.out.println(message);
- }
- System.out.println("***********************************************************");
- System.out.println("***********************************************************");
- }
-
- private static T executeNullIfNotFound(
- AbstractGoogleClientRequest request) throws IOException {
- try {
- return request.execute();
- } catch (GoogleJsonResponseException e) {
- if (e.getStatusCode() == SC_NOT_FOUND) {
- return null;
- } else {
- throw e;
- }
- }
- }
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteWindowedFilesDoFn.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteWindowedFilesDoFn.java
deleted file mode 100644
index a08e6a9b0f..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteWindowedFilesDoFn.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package}.common;
-
-import com.google.common.annotations.VisibleForTesting;
-import java.io.OutputStream;
-import java.nio.channels.Channels;
-import java.nio.charset.StandardCharsets;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
-import org.apache.beam.sdk.util.IOChannelFactory;
-import org.apache.beam.sdk.util.IOChannelUtils;
-import org.apache.beam.sdk.values.KV;
-import org.joda.time.format.DateTimeFormatter;
-import org.joda.time.format.ISODateTimeFormat;
-
-/**
- * A {@link DoFn} that writes elements to files with names deterministically derived from the lower
- * and upper bounds of their key (an {@link IntervalWindow}).
- *
- *
This is test utility code, not for end-users, so examples can be focused
- * on their primary lessons.
- */
-public class WriteWindowedFilesDoFn
- extends DoFn>>, Void> {
-
- static final byte[] NEWLINE = "\n".getBytes(StandardCharsets.UTF_8);
- static final Coder STRING_CODER = StringUtf8Coder.of();
-
- private static DateTimeFormatter formatter = ISODateTimeFormat.hourMinute();
-
- private final String output;
-
- public WriteWindowedFilesDoFn(String output) {
- this.output = output;
- }
-
- @VisibleForTesting
- public static String fileForWindow(String output, IntervalWindow window) {
- return String.format(
- "%s-%s-%s", output, formatter.print(window.start()), formatter.print(window.end()));
- }
-
- @ProcessElement
- public void processElement(ProcessContext context) throws Exception {
- // Build a file name from the window
- IntervalWindow window = context.element().getKey();
- String outputShard = fileForWindow(output, window);
-
- // Open the file and write all the values
- IOChannelFactory factory = IOChannelUtils.getFactory(outputShard);
- OutputStream out = Channels.newOutputStream(factory.create(outputShard, "text/plain"));
- for (KV wordCount : context.element().getValue()) {
- STRING_CODER.encode(
- wordCount.getKey() + ": " + wordCount.getValue(), out, Coder.Context.OUTER);
- out.write(NEWLINE);
- }
- out.close();
- }
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
deleted file mode 100644
index 155242d996..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package};
-
-import com.google.common.io.Files;
-import java.io.File;
-import java.nio.charset.StandardCharsets;
-import ${package}.DebuggingWordCount.WordCountOptions;
-import org.apache.beam.sdk.testing.TestPipeline;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link DebuggingWordCount}.
- */
-@RunWith(JUnit4.class)
-public class DebuggingWordCountTest {
- @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
-
- @Test
- public void testDebuggingWordCount() throws Exception {
- File inputFile = tmpFolder.newFile();
- File outputFile = tmpFolder.newFile();
- Files.write(
- "stomach secret Flourish message Flourish here Flourish",
- inputFile,
- StandardCharsets.UTF_8);
- WordCountOptions options =
- TestPipeline.testingPipelineOptions().as(WordCountOptions.class);
- options.setInputFile(inputFile.getAbsolutePath());
- options.setOutput(outputFile.getAbsolutePath());
- DebuggingWordCount.main(TestPipeline.convertToArgs(options));
- }
-}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
deleted file mode 100644
index e86c2aac96..0000000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package};
-
-import java.util.Arrays;
-import java.util.List;
-import ${package}.WordCount.CountWords;
-import ${package}.WordCount.ExtractWordsFn;
-import ${package}.WordCount.FormatAsTextFn;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.testing.PAssert;
-import org.apache.beam.sdk.testing.RunnableOnService;
-import org.apache.beam.sdk.testing.TestPipeline;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.DoFnTester;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.values.PCollection;
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests of WordCount.
- */
-@RunWith(JUnit4.class)
-public class WordCountTest {
-
- /** Example test that tests a specific {@link DoFn}. */
- @Test
- public void testExtractWordsFn() throws Exception {
- DoFnTester extractWordsFn =
- DoFnTester.of(new ExtractWordsFn());
-
- Assert.assertThat(extractWordsFn.processBundle(" some input words "),
- CoreMatchers.hasItems("some", "input", "words"));
- Assert.assertThat(extractWordsFn.processBundle(" "),
- CoreMatchers.hasItems());
- Assert.assertThat(extractWordsFn.processBundle(" some ", " input", " words"),
- CoreMatchers.hasItems("some", "input", "words"));
- }
-
- static final String[] WORDS_ARRAY = new String[] {
- "hi there", "hi", "hi sue bob",
- "hi sue", "", "bob hi"};
-
- static final List WORDS = Arrays.asList(WORDS_ARRAY);
-
- static final String[] COUNTS_ARRAY = new String[] {
- "hi: 5", "there: 1", "sue: 2", "bob: 2"};
-
- /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
- @Test
- @Category(RunnableOnService.class)
- public void testCountWords() throws Exception {
- Pipeline p = TestPipeline.create();
-
- PCollection input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
-
- PCollection output = input.apply(new CountWords())
- .apply(MapElements.via(new FormatAsTextFn()));
-
- PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
- p.run().waitUntilFinish();
- }
-}
diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
deleted file mode 100644
index 1f3c9c5178..0000000000
--- a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (C) 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-package=it.pkg
-version=0.1-SNAPSHOT
-groupId=archetype.it
-artifactId=basic
-targetPlatform=1.7
diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt b/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt
deleted file mode 100644
index 0b5987362f..0000000000
--- a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt
+++ /dev/null
@@ -1 +0,0 @@
-verify
diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml
deleted file mode 100644
index c600393712..0000000000
--- a/maven-archetypes/starter/pom.xml
+++ /dev/null
@@ -1,45 +0,0 @@
-
-
-
- 4.0.0
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-parent
- 2.0.0-beta1-SNAPSHOT
- ../../pom.xml
-
-
- google-cloud-dataflow-java-archetypes-starter
- Google Cloud Dataflow SDK for Java - Starter Archetype
- Google Cloud Dataflow SDK for Java is a distribution of Apache
- Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow
- service. This archetype creates a simple starter pipeline to get started
- using the Google Cloud Dataflow SDK for Java.
-
- maven-archetype
-
-
-
-
- org.apache.maven.archetype
- archetype-packaging
- 2.4
-
-
-
-
diff --git a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
deleted file mode 100644
index 4c22d5d68b..0000000000
--- a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ /dev/null
@@ -1,36 +0,0 @@
-
-
-
-
-
- 1.7
-
-
-
-
-
- src/main/java
-
- **/*.java
-
-
-
-
diff --git a/maven-archetypes/starter/src/main/resources/NOTICE b/maven-archetypes/starter/src/main/resources/NOTICE
deleted file mode 100644
index 981fde5a9e..0000000000
--- a/maven-archetypes/starter/src/main/resources/NOTICE
+++ /dev/null
@@ -1,5 +0,0 @@
-Google Cloud Dataflow SDK for Java
-Copyright 2017, Google Inc.
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
deleted file mode 100644
index 183b29062c..0000000000
--- a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
+++ /dev/null
@@ -1,92 +0,0 @@
-
-
-
- 4.0.0
-
- ${groupId}
- ${artifactId}
- ${version}
-
-
- UTF-8
-
-
-
-
- ossrh.snapshots
- Sonatype OSS Repository Hosting
- https://oss.sonatype.org/content/repositories/snapshots/
-
- false
-
-
- true
-
-
-
-
- jar
-
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- 3.3
-
- ${targetPlatform}
- ${targetPlatform}
-
-
-
-
-
-
-
- org.codehaus.mojo
- exec-maven-plugin
- 1.4.0
-
- false
-
-
-
-
-
-
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-all
- 2.0.0-beta1-SNAPSHOT
-
-
-
-
- org.slf4j
- slf4j-api
- 1.7.7
-
-
- org.slf4j
- slf4j-jdk14
- 1.7.7
-
-
-
diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
deleted file mode 100644
index d6afdecf11..0000000000
--- a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package};
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.SimpleFunction;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * A starter example for writing Beam programs.
- *
- *
The example takes two strings, converts them to their upper-case
- * representation and logs them.
- *
- *
To run this starter example locally using DirectRunner, just
- * execute it without any additional parameters from your favorite development
- * environment.
- *
- *
To run this starter example using managed resource in Google Cloud
- * Platform, you should specify the following command-line options:
- * --project=
- * --stagingLocation=
- * --runner=DataflowRunner
- */
-public class StarterPipeline {
- private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
-
- public static void main(String[] args) {
- Pipeline p = Pipeline.create(
- PipelineOptionsFactory.fromArgs(args).withValidation().create());
-
- p.apply(Create.of("Hello", "World"))
- .apply(MapElements.via(new SimpleFunction() {
- @Override
- public String apply(String input) {
- return input.toUpperCase();
- }
- }))
- .apply(ParDo.of(new DoFn() {
- @ProcessElement
- public void processElement(ProcessContext c) {
- LOG.info(c.element());
- }
- }));
-
- p.run();
- }
-}
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
deleted file mode 100644
index 1f3c9c5178..0000000000
--- a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (C) 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-package=it.pkg
-version=0.1-SNAPSHOT
-groupId=archetype.it
-artifactId=basic
-targetPlatform=1.7
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt b/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
deleted file mode 100644
index 0b5987362f..0000000000
--- a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
+++ /dev/null
@@ -1 +0,0 @@
-verify
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
deleted file mode 100644
index db4b1c9784..0000000000
--- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
+++ /dev/null
@@ -1,92 +0,0 @@
-
-
-
- 4.0.0
-
- archetype.it
- basic
- 0.1-SNAPSHOT
-
-
- UTF-8
-
-
-
-
- ossrh.snapshots
- Sonatype OSS Repository Hosting
- https://oss.sonatype.org/content/repositories/snapshots/
-
- false
-
-
- true
-
-
-
-
- jar
-
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- 3.3
-
- 1.7
- 1.7
-
-
-
-
-
-
-
- org.codehaus.mojo
- exec-maven-plugin
- 1.4.0
-
- false
-
-
-
-
-
-
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-all
- 2.0.0-beta1-SNAPSHOT
-
-
-
-
- org.slf4j
- slf4j-api
- 1.7.7
-
-
- org.slf4j
- slf4j-jdk14
- 1.7.7
-
-
-
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
deleted file mode 100644
index 4ae92e8ce6..0000000000
--- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package it.pkg;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.SimpleFunction;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * A starter example for writing Beam programs.
- *
- *
The example takes two strings, converts them to their upper-case
- * representation and logs them.
- *
- *
To run this starter example locally using DirectRunner, just
- * execute it without any additional parameters from your favorite development
- * environment.
- *
- *
To run this starter example using managed resource in Google Cloud
- * Platform, you should specify the following command-line options:
- * --project=
- * --stagingLocation=
- * --runner=DataflowRunner
- */
-public class StarterPipeline {
- private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
-
- public static void main(String[] args) {
- Pipeline p = Pipeline.create(
- PipelineOptionsFactory.fromArgs(args).withValidation().create());
-
- p.apply(Create.of("Hello", "World"))
- .apply(MapElements.via(new SimpleFunction() {
- @Override
- public String apply(String input) {
- return input.toUpperCase();
- }
- }))
- .apply(ParDo.of(new DoFn() {
- @ProcessElement
- public void processElement(ProcessContext c) {
- LOG.info(c.element());
- }
- }));
-
- p.run();
- }
-}
diff --git a/pom.xml b/pom.xml
deleted file mode 100644
index c3c64811f4..0000000000
--- a/pom.xml
+++ /dev/null
@@ -1,371 +0,0 @@
-
-
-
- 4.0.0
-
-
- com.google
- google
- 5
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-parent
- Google Cloud Dataflow SDK for Java - Parent
- Google Cloud Dataflow SDK for Java is a distribution of Apache
- Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow
- service. This artifact includes the parent POM for other Dataflow SDK
- artifacts.
- http://cloud.google.com/dataflow
- 2013
-
- 2.0.0-beta1-SNAPSHOT
-
-
-
- Apache License, Version 2.0
- http://www.apache.org/licenses/LICENSE-2.0.txt
- repo
-
-
-
-
-
- Google Inc.
- http://www.google.com
-
-
-
-
- scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git
- scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git
- git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git
- release-2.0.0-beta1
-
-
-
-
- ossrh
- https://oss.sonatype.org/content/repositories/snapshots
-
-
- ossrh
- https://oss.sonatype.org/service/local/staging/deploy/maven2/
-
-
-
-
- 3.0.3
-
-
-
- UTF-8
- ${maven.build.timestamp}
- yyyy-MM-dd HH:mm
-
- 0.4.0
-
-
- pom
-
- sdk
- examples
- maven-archetypes/starter
- maven-archetypes/examples
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-enforcer-plugin
- 1.4.1
-
-
- enforce-java
-
- enforce
-
-
-
-
-
- [1.8.0,)
-
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-clean-plugin
- 3.0.0
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- 3.6.0
-
- 1.7
- 1.7
-
- -Xlint:all
- -Werror
- -Xlint:-options
-
- true
-
-
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
- 2.17
-
-
- com.puppycrawl.tools
- checkstyle
- 6.19
-
-
-
- sdk/checkstyle.xml
- sdk/suppressions.xml
- true
- true
- true
- true
-
-
-
- verify
-
- check
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
- 3.0.2
-
- true
-
-
-
- default-jar
-
- jar
-
-
-
- default-test-jar
-
- test-jar
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-source-plugin
- 3.0.1
-
-
- attach-sources
- compile
-
- jar
-
-
-
- attach-test-sources
- test-compile
-
- test-jar
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-dependency-plugin
- 3.0.0
-
-
-
- analyze-only
-
-
- true
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-surefire-plugin
- 2.19.1
-
-
-
- org.apache.maven.plugins
- maven-archetype-plugin
- 2.4
-
-
- org.apache.maven.shared
- maven-invoker
- 2.2
-
-
-
-
-
- default-integration-test
- install
-
- integration-test
-
-
- true
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-release-plugin
- 2.5.3
-
- true
- true
- deploy
-
-
-
-
- org.codehaus.mojo
- exec-maven-plugin
- 1.5.0
-
- false
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-enforcer-plugin
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
-
-
-
- org.apache.maven.plugins
- maven-source-plugin
-
-
-
- org.apache.maven.plugins
- maven-dependency-plugin
-
-
-
- org.apache.maven.plugins
- maven-surefire-plugin
-
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
-
-
-
-
-
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-all
- ${project.version}
-
-
-
- org.apache.beam
- beam-sdks-java-core
- ${beam.version}
-
-
-
- org.apache.beam
- beam-sdks-java-io-google-cloud-platform
- ${beam.version}
-
-
-
- org.apache.beam
- beam-runners-direct-java
- ${beam.version}
-
-
-
- org.apache.beam
- beam-runners-google-cloud-dataflow-java
- ${beam.version}
-
-
-
- org.apache.beam
- beam-examples-java
- ${beam.version}
-
-
-
- org.apache.beam
- beam-examples-java8
- ${beam.version}
-
-
-
-
diff --git a/sdk/checkstyle.xml b/sdk/checkstyle.xml
deleted file mode 100644
index 1769c8bed9..0000000000
--- a/sdk/checkstyle.xml
+++ /dev/null
@@ -1,458 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/sdk/pom.xml b/sdk/pom.xml
deleted file mode 100644
index 8dd014f9f7..0000000000
--- a/sdk/pom.xml
+++ /dev/null
@@ -1,64 +0,0 @@
-
-
-
- 4.0.0
-
-
- com.google.cloud.dataflow
- google-cloud-dataflow-java-sdk-parent
- 2.0.0-beta1-SNAPSHOT
-
-
- google-cloud-dataflow-java-sdk-all
- Google Cloud Dataflow SDK for Java - All
- Google Cloud Dataflow SDK for Java is a distribution of Apache
- Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow
- service. This artifact includes entire Dataflow Java SDK.
-
- jar
-
-
-
-
- src/main/resources
- true
-
-
-
-
-
-
- org.apache.beam
- beam-sdks-java-core
-
-
-
- org.apache.beam
- beam-sdks-java-io-google-cloud-platform
-
-
-
- org.apache.beam
- beam-runners-direct-java
-
-
-
- org.apache.beam
- beam-runners-google-cloud-dataflow-java
-
-
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java
deleted file mode 100644
index 7bbfbe3729..0000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2017 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk;
-
-import org.apache.beam.runners.dataflow.DataflowRunner;
-import org.apache.beam.runners.direct.DirectRunner;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO;
-
-/**
- * Mark the dependencies as used at compile time.
- */
-class SdkDependencies {
- private Pipeline p;
- private BigQueryIO bigQueryIO;
- private DirectRunner directRunner;
- private DataflowRunner dataflowRunner;
-}
diff --git a/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties b/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties
deleted file mode 100644
index 1af27fdd48..0000000000
--- a/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (C) 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-environment.major.version=6
-
-worker.image.batch=dataflow.gcr.io/v1beta3/beam-java-batch:${project.version}-20170103
-worker.image.streaming=dataflow.gcr.io/v1beta3/beam-java-streaming:${project.version}-20170103
diff --git a/sdk/src/main/resources/org/apache/beam/sdk/sdk.properties b/sdk/src/main/resources/org/apache/beam/sdk/sdk.properties
deleted file mode 100644
index a9df3b5b5c..0000000000
--- a/sdk/src/main/resources/org/apache/beam/sdk/sdk.properties
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (C) 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-name=Google Cloud Dataflow SDK for Java
-version=${pom.version}
-build.date=${timestamp}
diff --git a/sdk/suppressions.xml b/sdk/suppressions.xml
deleted file mode 100644
index c3635c9bb8..0000000000
--- a/sdk/suppressions.xml
+++ /dev/null
@@ -1,31 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-