diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index c39158cf00..0000000000 --- a/.gitattributes +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -# The default behavior, which overrides 'core.autocrlf', is to use Git's -# built-in heuristics to determine whether a particular file is text or binary. -# Text files are automatically normalized to the user's platforms. -* text=auto - -# Explicitly declare text files that should always be normalized and converted -# to native line endings. -.gitattributes text -.gitignore text -LICENSE text -*.avsc text -*.html text -*.java text -*.md text -*.properties text -*.proto text -*.py text -*.sh text -*.xml text -*.yml text - -# Declare files that will always have CRLF line endings on checkout. -# *.sln text eol=crlf - -# Explicitly denote all files that are truly binary and should not be modified. -# *.jpg binary diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 2a27023c28..0000000000 --- a/.gitignore +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -target/ - -# Ignore IntelliJ files. -.idea/ -*.iml -*.ipr -*.iws - -# Ignore Eclipse files. -.classpath -.project -.settings/ - -# The build process generates the dependency-reduced POM, but it shouldn't be -# committed. -dependency-reduced-pom.xml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 8fa5d9a932..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -language: java - -sudo: false - -notifications: - email: - # Group email notifications are disabled for now, since we cannot do it on a per-branch basis. - # Right now, it would trigger a notification for each fork, which generates a lot of spam. - # recipients: - # - dataflow-sdk-build-notifications+travis@google.com - on_success: change - on_failure: always - -matrix: - include: - # On OSX, run with default JDK only. - - os: osx - # On Linux, run with specific JDKs only. - - os: linux - env: CUSTOM_JDK="oraclejdk8" - # The distribution does not build with Java 7 by design. We need to rewrite these tests - # to, for example, build and install with Java 8 and then test examples with Java 7. - # - os: linux - # env: CUSTOM_JDK="oraclejdk7" - # - os: linux - # env: CUSTOM_JDK="openjdk7" - -before_install: - - if [ "$TRAVIS_OS_NAME" == "osx" ]; then export JAVA_HOME=$(/usr/libexec/java_home); fi - - if [ "$TRAVIS_OS_NAME" == "linux" ]; then jdk_switcher use "$CUSTOM_JDK"; fi - -install: - - travis_retry mvn install clean -U -DskipTests=true - -script: - # Verify that the project can be built and installed. - - mvn install - # Verify that starter and examples archetypes have the correct version of the NOTICE file. - - diff -q NOTICE maven-archetypes/starter/src/main/resources/NOTICE - - diff -q NOTICE maven-archetypes/examples/src/main/resources/NOTICE diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 9b616e5fe3..0000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,51 +0,0 @@ - - -Want to contribute? Great! First, read this page (including the small print at -the end). - -Google Cloud Dataflow SDK is a distribution of Apache Beam. If you'd like to -change anything under the `org.apache.beam.*` namespace, please submit that -change directly to the [Apache Beam](https://github.com/apache/beam) project. - -This repository contains code to build the Dataflow distribution of Beam, and -some Dataflow-specific code. Only changes to how the distribution is built, or -the Dataflow-specific code under the `com.google.cloud.dataflow.*` namespace, -can be merged here. - -### Before you contribute -Before we can use your code, you must sign the -[Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1) -(CLA), which you can do online. The CLA is necessary mainly because you own the -copyright to your changes, even after your contribution becomes part of our -codebase, so we need your permission to use and distribute your code. We also -need to be sure of various other things. For instance that you'll tell us if you -know that your code infringes on other people's patents. You don't have to sign -the CLA until after you've submitted your code for review and a member has -approved it, but you must do it before we can put your code into our codebase. - -Before you start working on a larger contribution, we recommend to get in touch -with us first through the issue tracker with your idea so that we can help out -and possibly guide you. Coordinating up front makes it much easier to avoid -frustration later on. - -### Code reviews -All submissions, including submissions by project members, require review. We -use GitHub pull requests for this purpose. - -### The small print -Contributions made by corporations are covered by a different agreement than -the one above, the Software Grant and Corporate Contributor License Agreement. diff --git a/LICENSE b/LICENSE deleted file mode 100644 index d645695673..0000000000 --- a/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/NOTICE b/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/README.md b/README.md index a644049218..dfb630ad79 100644 --- a/README.md +++ b/README.md @@ -16,106 +16,34 @@ # Google Cloud Dataflow SDK for Java -[Google Cloud Dataflow](https://cloud.google.com/dataflow/) provides a simple, -powerful programming model for building both batch and streaming parallel data -processing pipelines. - -Dataflow SDK for Java is a distribution of a portion of the -[Apache Beam](https://beam.apache.org) project. This repository hosts the -code to build this distribution and any Dataflow-specific code/modules. The -underlying source code is hosted in the -[Apache Beam repository](https://github.com/apache/beam). - -[General usage](https://cloud.google.com/dataflow/getting-started) of Google -Cloud Dataflow does **not** require use of this repository. Instead: - -1. depend directly on a specific -[version](https://cloud.google.com/dataflow/release-notes/java) of the SDK in -the [Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22) -by adding the following dependency to development -environments like Eclipse or Apache Maven: - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - version_number - - -1. download the example pipelines from the separate -[DataflowJavaSDK-examples](https://github.com/GoogleCloudPlatform/DataflowJavaSDK-examples) -repository. - - - -## Status [![Build Status](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK.svg?branch=v2)](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK) - -This branch is a work-in-progress for the Dataflow SDK for Java, version 2.0.0. -It is currently supported on the Cloud Dataflow service in Beta. - - - -## Overview - -The key concepts in this programming model are: - -* [`PCollection`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java): -represents a collection of data, which could be bounded or unbounded in size. -* [`PTransform`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java): -represents a computation that transforms input PCollections into output -PCollections. -* [`Pipeline`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java): -manages a directed acyclic graph of PTransforms and PCollections that is ready -for execution. -* [`PipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java): -specifies where and how the pipeline should execute. - -We provide two runners: - - 1. The [`DirectRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java) -runs the pipeline on your local machine. - 1. The [`DataflowRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java) -submits the pipeline to the Dataflow Service, where it runs using managed -resources in the [Google Cloud Platform](https://cloud.google.com) (GCP). - -The SDK is built to be extensible and support additional execution environments -beyond local execution and the Google Cloud Dataflow Service. Apache Beam -contains additional SDKs, runners, IO connectors, etc. +[Google Cloud Dataflow](https://cloud.google.com/dataflow/) is a service for executing [Apache Beam](https://beam.apache.org) pipelines on Google Cloud Platform. ## Getting Started -This repository consists of the following parts: - -* The [`sdk`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk) -module provides a set of basic Java APIs to program against. -* The [`examples`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples) -module provides a few samples to get started. We recommend starting with the -`WordCount` example. +* [Quickstart Using Java](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-java-maven) on Google Cloud Dataflow +* [Java API Reference](https://beam.apache.org/documentation/sdks/javadoc/) +* [Java Examples](https://github.com/apache/beam/tree/master/examples/java) -The following command will build both the `sdk` and `example` modules and -install them in your local Maven repository: +## We moved to Apache Beam! +Apache Beam Java SDK and the code development moved to the [Apache Beam repo](https://github.com/apache/beam/tree/master/sdks/java). - mvn clean install - -After building and installing, you can execute the `WordCount` and other -example pipelines by following the instructions in this -[README](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/README.md). +If you want to contribute to the project (please do!) use this [Apache Beam contributor's guide](http://beam.apache.org/contribution-guide/) ## Contact Us -We welcome all usage-related questions on [Stack Overflow](http://stackoverflow.com/questions/tagged/google-cloud-dataflow) +We welcome all usage-related questions on +[Stack Overflow](https://stackoverflow.com/questions/tagged/google-cloud-dataflow) tagged with `google-cloud-dataflow`. -Please use [issue tracker](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/issues) -on GitHub to report any bugs, comments or questions regarding SDK development. +Please use the +[issue tracker](https://issues.apache.org/jira/browse/BEAM) +on Apache JIRA to report any bugs, comments or questions regarding SDK development. ## More Information * [Google Cloud Dataflow](https://cloud.google.com/dataflow/) * [Apache Beam](https://beam.apache.org/) -* [Dataflow Concepts and Programming Model](https://cloud.google.com/dataflow/model/programming-model) -* [Java API Reference](https://cloud.google.com/dataflow/java-sdk/JavaDoc/index) +* [Dataflow Concepts and Programming Model](https://beam.apache.org/documentation/programming-guide/) +* [Java API Reference](https://beam.apache.org/documentation/sdks/javadoc/) + +_Apache, Apache Beam and the orange letter B logo are either registered trademarks or trademarks of the Apache Software Foundation in the United States and/or other countries._ diff --git a/examples/pom.xml b/examples/pom.xml deleted file mode 100644 index 4c8584a91c..0000000000 --- a/examples/pom.xml +++ /dev/null @@ -1,51 +0,0 @@ - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - 2.0.0-beta1-SNAPSHOT - - - google-cloud-dataflow-java-examples-all - Google Cloud Dataflow Java Examples - All - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This artifact includes all Dataflow Java SDK - examples. - - jar - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - - - - org.apache.beam - beam-examples-java - - - - org.apache.beam - beam-examples-java8 - - - diff --git a/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java b/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java deleted file mode 100644 index 827aff8395..0000000000 --- a/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2017 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.dataflow.sdk; - -import org.apache.beam.examples.MinimalWordCountJava8; -import org.apache.beam.examples.WordCount; - -/** - * Mark the examples dependencies as used at compile time. This is also needed - * to produce some content in the final JAR file. - */ -class ExamplesDependencies { - SdkDependencies sdkDependencies; - WordCount wordCount; - MinimalWordCountJava8 minimalWordCount; -} diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml deleted file mode 100644 index fa781ac432..0000000000 --- a/maven-archetypes/examples/pom.xml +++ /dev/null @@ -1,45 +0,0 @@ - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - 2.0.0-beta1-SNAPSHOT - ../../pom.xml - - - google-cloud-dataflow-java-archetypes-examples - Google Cloud Dataflow SDK for Java - Examples Archetype - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This archetype creates a project containing all the example - pipelines. - - maven-archetype - - - - - org.apache.maven.archetype - archetype-packaging - 2.4 - - - - diff --git a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml deleted file mode 100644 index 2b9eb52d80..0000000000 --- a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml +++ /dev/null @@ -1,44 +0,0 @@ - - - - - - - 1.7 - - - - - - src/main/java - - **/*.java - - - - - src/test/java - - **/*.java - - - - diff --git a/maven-archetypes/examples/src/main/resources/NOTICE b/maven-archetypes/examples/src/main/resources/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/maven-archetypes/examples/src/main/resources/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml deleted file mode 100644 index 163212d61a..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml +++ /dev/null @@ -1,229 +0,0 @@ - - - - 4.0.0 - - ${groupId} - ${artifactId} - ${version} - - - UTF-8 - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - jar - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.3 - - ${targetPlatform} - ${targetPlatform} - - - - - org.apache.maven.plugins - maven-shade-plugin - 2.3 - - - package - - shade - - - ${project.artifactId}-bundled-${project.version} - - - *:* - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.18.1 - - all - 4 - true - - - - org.apache.maven.surefire - surefire-junit47 - 2.18.1 - - - - - - - - - org.codehaus.mojo - exec-maven-plugin - 1.4.0 - - false - - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - 2.0.0-beta1-SNAPSHOT - - - - com.google.api-client - google-api-client - 1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - - com.google.apis - google-api-services-bigquery - v2-rev295-1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - com.google.http-client - google-http-client - 1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - com.google.apis - google-api-services-pubsub - v1-rev10-1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - joda-time - joda-time - 2.4 - - - - com.google.guava - guava - 19.0 - - - - - org.slf4j - slf4j-api - 1.7.7 - - - - org.slf4j - slf4j-jdk14 - 1.7.14 - - runtime - - - - - org.hamcrest - hamcrest-all - 1.3 - - - - junit - junit - 4.11 - - - diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java deleted file mode 100644 index 99ae79687c..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import java.util.List; -import java.util.regex.Pattern; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Aggregator; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * An example that verifies word counts in Shakespeare and includes Beam best practices. - * - *

This class, {@link DebuggingWordCount}, is the third in a series of four successively more - * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount} - * and {@link WordCount}. After you've looked at this example, then see the - * {@link WindowedWordCount} pipeline, for introduction of additional concepts. - * - *

Basic concepts, also in the MinimalWordCount and WordCount examples: - * Reading text files; counting a PCollection; executing a Pipeline both locally - * and using a selected runner; defining DoFns. - * - *

New Concepts: - *

- *   1. Logging using SLF4J, even in a distributed environment
- *   2. Creating a custom aggregator (runners have varying levels of support)
- *   3. Testing your Pipeline via PAssert
- * 
- * - *

To execute this pipeline locally, specify general pipeline configuration: - *

{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * 
- * - *

To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - * - */ -public class DebuggingWordCount { - /** A DoFn that filters for a specific key based upon a regular expression. */ - public static class FilterTextFn extends DoFn, KV> { - /** - * Concept #1: The logger below uses the fully qualified class name of FilterTextFn as the - * logger. Depending on your SLF4J configuration, log statements will likely be qualified by - * this name. - * - *

Note that this is entirely standard SLF4J usage. Some runners may provide a default SLF4J - * configuration that is most appropriate for their logging integration. - */ - private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class); - - private final Pattern filter; - public FilterTextFn(String pattern) { - filter = Pattern.compile(pattern); - } - - /** - * Concept #2: A custom aggregator can track values in your pipeline as it runs. Each - * runner provides varying levels of support for aggregators, and may expose them - * in a dashboard, etc. - */ - private final Aggregator matchedWords = - createAggregator("matchedWords", new Sum.SumLongFn()); - private final Aggregator unmatchedWords = - createAggregator("umatchedWords", new Sum.SumLongFn()); - - @ProcessElement - public void processElement(ProcessContext c) { - if (filter.matcher(c.element().getKey()).matches()) { - // Log at the "DEBUG" level each element that we match. When executing this pipeline - // these log lines will appear only if the log level is set to "DEBUG" or lower. - LOG.debug("Matched: " + c.element().getKey()); - matchedWords.addValue(1L); - c.output(c.element()); - } else { - // Log at the "TRACE" level each element that is not matched. Different log levels - // can be used to control the verbosity of logging providing an effective mechanism - // to filter less important information. - LOG.trace("Did not match: " + c.element().getKey()); - unmatchedWords.addValue(1L); - } - } - } - - /** - * Options supported by {@link DebuggingWordCount}. - * - *

Inherits standard configuration options and all options defined in - * {@link WordCount.WordCountOptions}. - */ - public interface WordCountOptions extends WordCount.WordCountOptions { - - @Description("Regex filter pattern to use in DebuggingWordCount. " - + "Only words matching this pattern will be counted.") - @Default.String("Flourish|stomach") - String getFilterPattern(); - void setFilterPattern(String value); - } - - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); - Pipeline p = Pipeline.create(options); - - PCollection> filteredWords = - p.apply("ReadLines", TextIO.Read.from(options.getInputFile())) - .apply(new WordCount.CountWords()) - .apply(ParDo.of(new FilterTextFn(options.getFilterPattern()))); - - /** - * Concept #3: PAssert is a set of convenient PTransforms in the style of - * Hamcrest's collection matchers that can be used when writing Pipeline level tests - * to validate the contents of PCollections. PAssert is best used in unit tests - * with small data sets but is demonstrated here as a teaching tool. - * - *

Below we verify that the set of filtered words matches our expected counts. Note - * that PAssert does not provide any output and that successful completion of the - * Pipeline implies that the expectations were met. Learn more at - * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline on how to test - * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test. - */ - List> expectedResults = Arrays.asList( - KV.of("Flourish", 3L), - KV.of("stomach", 1L)); - PAssert.that(filteredWords).containsInAnyOrder(expectedResults); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java deleted file mode 100644 index 97bd8243b8..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.apache.beam.sdk.values.KV; - - -/** - * An example that counts words in Shakespeare. - * - *

This class, {@link MinimalWordCount}, is the first in a series of four successively more - * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or - * argument processing, and focus on construction of the pipeline, which chains together the - * application of core transforms. - * - *

Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally the - * {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional - * concepts. - * - *

Concepts: - * - *

- *   1. Reading data from text files
- *   2. Specifying 'inline' transforms
- *   3. Counting items in a PCollection
- *   4. Writing data to text files
- * 
- * - *

No arguments are required to run this pipeline. It will be executed with the DirectRunner. You - * can see the results in the output files in your current working directory, with names like - * "wordcounts-00001-of-00005. When running on a distributed service, you would use an appropriate - * file service. - */ -public class MinimalWordCount { - - public static void main(String[] args) { - // Create a PipelineOptions object. This object lets us set various execution - // options for our pipeline, such as the runner you wish to use. This example - // will run with the DirectRunner by default, based on the class path configured - // in its dependencies. - PipelineOptions options = PipelineOptionsFactory.create(); - - // Create the Pipeline object with the options we defined above. - Pipeline p = Pipeline.create(options); - - // Apply the pipeline's transforms. - - // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set - // of input text files. TextIO.Read returns a PCollection where each element is one line from - // the input text (a set of Shakespeare's texts). - - // This example reads a public data set consisting of the complete works of Shakespeare. - p.apply(TextIO.Read.from("gs://apache-beam-samples/shakespeare/*")) - - // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a - // DoFn (defined in-line) on each element that tokenizes the text line into individual words. - // The ParDo returns a PCollection, where each element is an individual word in - // Shakespeare's collected texts. - .apply("ExtractWords", ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - for (String word : c.element().split("[^a-zA-Z']+")) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - })) - - // Concept #3: Apply the Count transform to our PCollection of individual words. The Count - // transform returns a new PCollection of key/value pairs, where each key represents a unique - // word in the text. The associated value is the occurrence count for that word. - .apply(Count.perElement()) - - // Apply a MapElements transform that formats our PCollection of word counts into a printable - // string, suitable for writing to an output file. - .apply("FormatResults", MapElements.via(new SimpleFunction, String>() { - @Override - public String apply(KV input) { - return input.getKey() + ": " + input.getValue(); - } - })) - - // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline. - // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of - // formatted strings) to a series of text files. - // - // By default, it will write to a set of files with names like wordcount-00001-of-00005 - .apply(TextIO.Write.to("wordcounts")); - - // Run the pipeline. - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java deleted file mode 100644 index 052d7b6a0e..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.io.IOException; -import java.util.concurrent.ThreadLocalRandom; -import ${package}.common.ExampleBigQueryTableOptions; -import ${package}.common.ExampleOptions; -import ${package}.common.WriteWindowedFilesDoFn; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.GroupByKey; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.joda.time.Duration; -import org.joda.time.Instant; - - -/** - * An example that counts words in text, and can run over either unbounded or bounded input - * collections. - * - *

This class, {@link WindowedWordCount}, is the last in a series of four successively more - * detailed 'word count' examples. First take a look at {@link MinimalWordCount}, - * {@link WordCount}, and {@link DebuggingWordCount}. - * - *

Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples: - * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally - * and using a selected runner; defining DoFns; creating a custom aggregator; - * user-defined PTransforms; defining PipelineOptions. - * - *

New Concepts: - *

- *   1. Unbounded and bounded pipeline input modes
- *   2. Adding timestamps to data
- *   3. Windowing
- *   4. Re-using PTransforms over windowed PCollections
- *   5. Accessing the window of an element
- *   6. Writing data to per-window text files
- * 
- * - *

By default, the examples will run with the {@code DirectRunner}. - * To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * See examples/java/README.md for instructions about how to configure different runners. - * - *

To execute this pipeline locally, specify a local output file (if using the - * {@code DirectRunner}) or output prefix on a supported distributed file system. - *

{@code
- *   --output=[YOUR_LOCAL_FILE | YOUR_OUTPUT_PREFIX]
- * }
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - * - *

By default, the pipeline will do fixed windowing, on 1-minute windows. You can - * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10} - * for 10-minute windows. - * - *

The example will try to cancel the pipeline on the signal to terminate the process (CTRL-C). - */ -public class WindowedWordCount { - static final int WINDOW_SIZE = 10; // Default window duration in minutes - /** - * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for - * this example, for the bounded data case. - * - *

Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate - * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a - * 2-hour period. - */ - static class AddTimestampFn extends DoFn { - private static final Duration RAND_RANGE = Duration.standardHours(1); - private final Instant minTimestamp; - private final Instant maxTimestamp; - - AddTimestampFn(Instant minTimestamp, Instant maxTimestamp) { - this.minTimestamp = minTimestamp; - this.maxTimestamp = maxTimestamp; - } - - @ProcessElement - public void processElement(ProcessContext c) { - Instant randomTimestamp = - new Instant( - ThreadLocalRandom.current() - .nextLong(minTimestamp.getMillis(), maxTimestamp.getMillis())); - - /** - * Concept #2: Set the data element with that timestamp. - */ - c.outputWithTimestamp(c.element(), new Instant(randomTimestamp)); - } - } - - /** A {@link DefaultValueFactory} that returns the current system time. */ - public static class DefaultToCurrentSystemTime implements DefaultValueFactory { - @Override - public Long create(PipelineOptions options) { - return System.currentTimeMillis(); - } - } - - /** A {@link DefaultValueFactory} that returns the minimum timestamp plus one hour. */ - public static class DefaultToMinTimestampPlusOneHour implements DefaultValueFactory { - @Override - public Long create(PipelineOptions options) { - return options.as(Options.class).getMinTimestampMillis() - + Duration.standardHours(1).getMillis(); - } - } - - /** - * Options for {@link WindowedWordCount}. - * - *

Inherits standard example configuration options, which allow specification of the - * runner, as well as the {@link WordCount.WordCountOptions} support for - * specification of the input and output files. - */ - public interface Options extends WordCount.WordCountOptions, - ExampleOptions, ExampleBigQueryTableOptions { - @Description("Fixed window duration, in minutes") - @Default.Integer(WINDOW_SIZE) - Integer getWindowSize(); - void setWindowSize(Integer value); - - @Description("Minimum randomly assigned timestamp, in milliseconds-since-epoch") - @Default.InstanceFactory(DefaultToCurrentSystemTime.class) - Long getMinTimestampMillis(); - void setMinTimestampMillis(Long value); - - @Description("Maximum randomly assigned timestamp, in milliseconds-since-epoch") - @Default.InstanceFactory(DefaultToMinTimestampPlusOneHour.class) - Long getMaxTimestampMillis(); - void setMaxTimestampMillis(Long value); - } - - public static void main(String[] args) throws IOException { - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - final String output = options.getOutput(); - final Duration windowSize = Duration.standardMinutes(options.getWindowSize()); - final Instant minTimestamp = new Instant(options.getMinTimestampMillis()); - final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis()); - - Pipeline pipeline = Pipeline.create(options); - - /** - * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or - * unbounded input source. - */ - PCollection input = pipeline - /** Read from the GCS file. */ - .apply(TextIO.Read.from(options.getInputFile())) - // Concept #2: Add an element timestamp, using an artificial time just to show windowing. - // See AddTimestampFn for more detail on this. - .apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp))); - - /** - * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1 - * minute (you can change this with a command-line option). See the documentation for more - * information on how fixed windows work, and for information on the other types of windowing - * available (e.g., sliding windows). - */ - PCollection windowedWords = - input.apply( - Window.into( - FixedWindows.of(Duration.standardMinutes(options.getWindowSize())))); - - /** - * Concept #4: Re-use our existing CountWords transform that does not have knowledge of - * windows over a PCollection containing windowed values. - */ - PCollection> wordCounts = windowedWords.apply(new WordCount.CountWords()); - - /** - * Concept #5: Customize the output format using windowing information - * - *

At this point, the data is organized by window. We're writing text files and and have no - * late data, so for simplicity we can use the window as the key and {@link GroupByKey} to get - * one output file per window. (if we had late data this key would not be unique) - * - *

To access the window in a {@link DoFn}, add a {@link BoundedWindow} parameter. This will - * be automatically detected and populated with the window for the current element. - */ - PCollection>> keyedByWindow = - wordCounts.apply( - ParDo.of( - new DoFn, KV>>() { - @ProcessElement - public void processElement(ProcessContext context, IntervalWindow window) { - context.output(KV.of(window, context.element())); - } - })); - - /** - * Concept #6: Format the results and write to a sharded file partitioned by window, using a - * simple ParDo operation. Because there may be failures followed by retries, the - * writes must be idempotent, but the details of writing to files is elided here. - */ - keyedByWindow - .apply(GroupByKey.>create()) - .apply(ParDo.of(new WriteWindowedFilesDoFn(output))); - - PipelineResult result = pipeline.run(); - try { - result.waitUntilFinish(); - } catch (Exception exc) { - result.cancel(); - } - } - -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java deleted file mode 100644 index 634dea1a0a..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.Validation.Required; -import org.apache.beam.sdk.transforms.Aggregator; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; - -/** - * An example that counts words in Shakespeare and includes Beam best practices. - * - *

This class, {@link WordCount}, is the second in a series of four successively more detailed - * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}. - * After you've looked at this example, then see the {@link DebuggingWordCount} - * pipeline, for introduction of additional concepts. - * - *

For a detailed walkthrough of this example, see - * - * http://beam.apache.org/use/walkthroughs/ - * - * - *

Basic concepts, also in the MinimalWordCount example: - * Reading text files; counting a PCollection; writing to text files - * - *

New Concepts: - *

- *   1. Executing a Pipeline both locally and using the selected runner
- *   2. Using ParDo with static DoFns defined out-of-line
- *   3. Building a composite transform
- *   4. Defining your own pipeline options
- * 
- * - *

Concept #1: you can execute this pipeline either locally or using by selecting another runner. - * These are now command-line options and not hard-coded as they were in the MinimalWordCount - * example. - * - *

To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * - *

To execute this pipeline, specify a local output file (if using the - * {@code DirectRunner}) or output prefix on a supported distributed file system. - *

{@code
- *   --output=[YOUR_LOCAL_FILE | YOUR_OUTPUT_PREFIX]
- * }
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - */ -public class WordCount { - - /** - * Concept #2: You can make your pipeline assembly code less verbose by defining your DoFns - * statically out-of-line. This DoFn tokenizes lines of text into individual words; we pass it - * to a ParDo in the pipeline. - */ - static class ExtractWordsFn extends DoFn { - private final Aggregator emptyLines = - createAggregator("emptyLines", new Sum.SumLongFn()); - - @ProcessElement - public void processElement(ProcessContext c) { - if (c.element().trim().isEmpty()) { - emptyLines.addValue(1L); - } - - // Split the line into words. - String[] words = c.element().split("[^a-zA-Z']+"); - - // Output each word encountered into the output PCollection. - for (String word : words) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - } - - /** A SimpleFunction that converts a Word and Count into a printable string. */ - public static class FormatAsTextFn extends SimpleFunction, String> { - @Override - public String apply(KV input) { - return input.getKey() + ": " + input.getValue(); - } - } - - /** - * A PTransform that converts a PCollection containing lines of text into a PCollection of - * formatted word counts. - * - *

Concept #3: This is a custom composite transform that bundles two transforms (ParDo and - * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse, - * modular testing, and an improved monitoring experience. - */ - public static class CountWords extends PTransform, - PCollection>> { - @Override - public PCollection> expand(PCollection lines) { - - // Convert lines of text into individual words. - PCollection words = lines.apply( - ParDo.of(new ExtractWordsFn())); - - // Count the number of times each word occurs. - PCollection> wordCounts = - words.apply(Count.perElement()); - - return wordCounts; - } - } - - /** - * Options supported by {@link WordCount}. - * - *

Concept #4: Defining your own configuration options. Here, you can add your own arguments - * to be processed by the command-line parser, and specify default values for them. You can then - * access the options values in your pipeline code. - * - *

Inherits standard configuration options. - */ - public interface WordCountOptions extends PipelineOptions { - - /** - * By default, this example reads from a public dataset containing the text of - * King Lear. Set this option to choose a different input file or glob. - */ - @Description("Path of the file to read from") - @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt") - String getInputFile(); - void setInputFile(String value); - - /** - * Set this required option to specify where to write the output. - */ - @Description("Path of the file to write to") - @Required - String getOutput(); - void setOutput(String value); - } - - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); - Pipeline p = Pipeline.create(options); - - // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the - // static FormatAsTextFn() to the ParDo transform. - p.apply("ReadLines", TextIO.Read.from(options.getInputFile())) - .apply(new CountWords()) - .apply(MapElements.via(new FormatAsTextFn())) - .apply("WriteCounts", TextIO.Write.to(options.getOutput())); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java deleted file mode 100644 index 6b51074f44..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.api.services.bigquery.model.TableSchema; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.GcpOptions; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure BigQuery tables in Beam examples. - * The project defaults to the project being used to run the example. - */ -public interface ExampleBigQueryTableOptions extends GcpOptions { - @Description("BigQuery dataset name") - @Default.String("beam_examples") - String getBigQueryDataset(); - void setBigQueryDataset(String dataset); - - @Description("BigQuery table name") - @Default.InstanceFactory(BigQueryTableFactory.class) - String getBigQueryTable(); - void setBigQueryTable(String table); - - @Description("BigQuery table schema") - TableSchema getBigQuerySchema(); - void setBigQuerySchema(TableSchema schema); - - /** - * Returns the job name as the default BigQuery table name. - */ - class BigQueryTableFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return options.getJobName().replace('-', '_'); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java deleted file mode 100644 index 90f935c3ce..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure the Beam examples. - */ -public interface ExampleOptions extends PipelineOptions { - @Description("Whether to keep jobs running after local process exit") - @Default.Boolean(false) - boolean getKeepJobsRunning(); - void setKeepJobsRunning(boolean keepJobsRunning); - - @Description("Number of workers to use when executing the injector pipeline") - @Default.Integer(1) - int getInjectorNumWorkers(); - void setInjectorNumWorkers(int numWorkers); -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java deleted file mode 100644 index daeb398f7f..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.GcpOptions; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure Pub/Sub topic/subscription in Beam examples. - */ -public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions { - @Description("Pub/Sub subscription") - @Default.InstanceFactory(PubsubSubscriptionFactory.class) - String getPubsubSubscription(); - void setPubsubSubscription(String subscription); - - /** - * Returns a default Pub/Sub subscription based on the project and the job names. - */ - class PubsubSubscriptionFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return "projects/" + options.as(GcpOptions.class).getProject() - + "/subscriptions/" + options.getJobName(); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java deleted file mode 100644 index 936bff5675..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.GcpOptions; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure Pub/Sub topic in Beam examples. - */ -public interface ExamplePubsubTopicOptions extends GcpOptions { - @Description("Pub/Sub topic") - @Default.InstanceFactory(PubsubTopicFactory.class) - String getPubsubTopic(); - void setPubsubTopic(String topic); - - /** - * Returns a default Pub/Sub topic based on the project and the job names. - */ - class PubsubTopicFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return "projects/" + options.as(GcpOptions.class).getProject() - + "/topics/" + options.getJobName(); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java deleted file mode 100644 index 570b3827b7..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.api.client.googleapis.json.GoogleJsonResponseException; -import com.google.api.client.googleapis.services.AbstractGoogleClientRequest; -import com.google.api.client.util.BackOff; -import com.google.api.client.util.BackOffUtils; -import com.google.api.client.util.Sleeper; -import com.google.api.services.bigquery.Bigquery; -import com.google.api.services.bigquery.Bigquery.Datasets; -import com.google.api.services.bigquery.Bigquery.Tables; -import com.google.api.services.bigquery.model.Dataset; -import com.google.api.services.bigquery.model.DatasetReference; -import com.google.api.services.bigquery.model.Table; -import com.google.api.services.bigquery.model.TableReference; -import com.google.api.services.bigquery.model.TableSchema; -import com.google.api.services.pubsub.Pubsub; -import com.google.api.services.pubsub.model.Subscription; -import com.google.api.services.pubsub.model.Topic; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.common.util.concurrent.Uninterruptibles; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.options.BigQueryOptions; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PubsubOptions; -import org.apache.beam.sdk.util.FluentBackoff; -import org.apache.beam.sdk.util.Transport; -import org.joda.time.Duration; - -/** - * The utility class that sets up and tears down external resources, - * and cancels the streaming pipelines once the program terminates. - * - *

It is used to run Beam examples. - */ -public class ExampleUtils { - - private static final int SC_NOT_FOUND = 404; - - private final PipelineOptions options; - private Bigquery bigQueryClient = null; - private Pubsub pubsubClient = null; - private Set pipelinesToCancel = Sets.newHashSet(); - private List pendingMessages = Lists.newArrayList(); - - /** - * Do resources and runner options setup. - */ - public ExampleUtils(PipelineOptions options) { - this.options = options; - } - - /** - * Sets up external resources that are required by the example, - * such as Pub/Sub topics and BigQuery tables. - * - * @throws IOException if there is a problem setting up the resources - */ - public void setup() throws IOException { - Sleeper sleeper = Sleeper.DEFAULT; - BackOff backOff = - FluentBackoff.DEFAULT - .withMaxRetries(3).withInitialBackoff(Duration.millis(200)).backoff(); - Throwable lastException = null; - try { - do { - try { - setupPubsub(); - setupBigQueryTable(); - return; - } catch (GoogleJsonResponseException e) { - lastException = e; - } - } while (BackOffUtils.next(sleeper, backOff)); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - // Ignore InterruptedException - } - throw new RuntimeException(lastException); - } - - /** - * Sets up the Google Cloud Pub/Sub topic. - * - *

If the topic doesn't exist, a new topic with the given name will be created. - * - * @throws IOException if there is a problem setting up the Pub/Sub topic - */ - public void setupPubsub() throws IOException { - ExamplePubsubTopicAndSubscriptionOptions pubsubOptions = - options.as(ExamplePubsubTopicAndSubscriptionOptions.class); - if (!pubsubOptions.getPubsubTopic().isEmpty()) { - pendingMessages.add("**********************Set Up Pubsub************************"); - setupPubsubTopic(pubsubOptions.getPubsubTopic()); - pendingMessages.add("The Pub/Sub topic has been set up for this example: " - + pubsubOptions.getPubsubTopic()); - - if (!pubsubOptions.getPubsubSubscription().isEmpty()) { - setupPubsubSubscription( - pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription()); - pendingMessages.add("The Pub/Sub subscription has been set up for this example: " - + pubsubOptions.getPubsubSubscription()); - } - } - } - - /** - * Sets up the BigQuery table with the given schema. - * - *

If the table already exists, the schema has to match the given one. Otherwise, the example - * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema - * will be created. - * - * @throws IOException if there is a problem setting up the BigQuery table - */ - public void setupBigQueryTable() throws IOException { - ExampleBigQueryTableOptions bigQueryTableOptions = - options.as(ExampleBigQueryTableOptions.class); - if (bigQueryTableOptions.getBigQueryDataset() != null - && bigQueryTableOptions.getBigQueryTable() != null - && bigQueryTableOptions.getBigQuerySchema() != null) { - pendingMessages.add("******************Set Up Big Query Table*******************"); - setupBigQueryTable(bigQueryTableOptions.getProject(), - bigQueryTableOptions.getBigQueryDataset(), - bigQueryTableOptions.getBigQueryTable(), - bigQueryTableOptions.getBigQuerySchema()); - pendingMessages.add("The BigQuery table has been set up for this example: " - + bigQueryTableOptions.getProject() - + ":" + bigQueryTableOptions.getBigQueryDataset() - + "." + bigQueryTableOptions.getBigQueryTable()); - } - } - - /** - * Tears down external resources that can be deleted upon the example's completion. - */ - private void tearDown() { - pendingMessages.add("*************************Tear Down*************************"); - ExamplePubsubTopicAndSubscriptionOptions pubsubOptions = - options.as(ExamplePubsubTopicAndSubscriptionOptions.class); - if (!pubsubOptions.getPubsubTopic().isEmpty()) { - try { - deletePubsubTopic(pubsubOptions.getPubsubTopic()); - pendingMessages.add("The Pub/Sub topic has been deleted: " - + pubsubOptions.getPubsubTopic()); - } catch (IOException e) { - pendingMessages.add("Failed to delete the Pub/Sub topic : " - + pubsubOptions.getPubsubTopic()); - } - if (!pubsubOptions.getPubsubSubscription().isEmpty()) { - try { - deletePubsubSubscription(pubsubOptions.getPubsubSubscription()); - pendingMessages.add("The Pub/Sub subscription has been deleted: " - + pubsubOptions.getPubsubSubscription()); - } catch (IOException e) { - pendingMessages.add("Failed to delete the Pub/Sub subscription : " - + pubsubOptions.getPubsubSubscription()); - } - } - } - - ExampleBigQueryTableOptions bigQueryTableOptions = - options.as(ExampleBigQueryTableOptions.class); - if (bigQueryTableOptions.getBigQueryDataset() != null - && bigQueryTableOptions.getBigQueryTable() != null - && bigQueryTableOptions.getBigQuerySchema() != null) { - pendingMessages.add("The BigQuery table might contain the example's output, " - + "and it is not deleted automatically: " - + bigQueryTableOptions.getProject() - + ":" + bigQueryTableOptions.getBigQueryDataset() - + "." + bigQueryTableOptions.getBigQueryTable()); - pendingMessages.add("Please go to the Developers Console to delete it manually." - + " Otherwise, you may be charged for its usage."); - } - } - - private void setupBigQueryTable(String projectId, String datasetId, String tableId, - TableSchema schema) throws IOException { - if (bigQueryClient == null) { - bigQueryClient = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build(); - } - - Datasets datasetService = bigQueryClient.datasets(); - if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) { - Dataset newDataset = new Dataset().setDatasetReference( - new DatasetReference().setProjectId(projectId).setDatasetId(datasetId)); - datasetService.insert(projectId, newDataset).execute(); - } - - Tables tableService = bigQueryClient.tables(); - Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId)); - if (table == null) { - Table newTable = new Table().setSchema(schema).setTableReference( - new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId)); - tableService.insert(projectId, datasetId, newTable).execute(); - } else if (!table.getSchema().equals(schema)) { - throw new RuntimeException( - "Table exists and schemas do not match, expecting: " + schema.toPrettyString() - + ", actual: " + table.getSchema().toPrettyString()); - } - } - - private void setupPubsubTopic(String topic) throws IOException { - if (pubsubClient == null) { - pubsubClient = Transport.newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) { - pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute(); - } - } - - private void setupPubsubSubscription(String topic, String subscription) throws IOException { - if (pubsubClient == null) { - pubsubClient = Transport.newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) { - Subscription subInfo = new Subscription() - .setAckDeadlineSeconds(60) - .setTopic(topic); - pubsubClient.projects().subscriptions().create(subscription, subInfo).execute(); - } - } - - /** - * Deletes the Google Cloud Pub/Sub topic. - * - * @throws IOException if there is a problem deleting the Pub/Sub topic - */ - private void deletePubsubTopic(String topic) throws IOException { - if (pubsubClient == null) { - pubsubClient = Transport.newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) { - pubsubClient.projects().topics().delete(topic).execute(); - } - } - - /** - * Deletes the Google Cloud Pub/Sub subscription. - * - * @throws IOException if there is a problem deleting the Pub/Sub subscription - */ - private void deletePubsubSubscription(String subscription) throws IOException { - if (pubsubClient == null) { - pubsubClient = Transport.newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) { - pubsubClient.projects().subscriptions().delete(subscription).execute(); - } - } - - /** - * Waits for the pipeline to finish and cancels it before the program exists. - */ - public void waitToFinish(PipelineResult result) { - pipelinesToCancel.add(result); - if (!options.as(ExampleOptions.class).getKeepJobsRunning()) { - addShutdownHook(pipelinesToCancel); - } - try { - result.waitUntilFinish(); - } catch (UnsupportedOperationException e) { - // Do nothing if the given PipelineResult doesn't support waitUntilFinish(), - // such as EvaluationResults returned by DirectRunner. - tearDown(); - printPendingMessages(); - } catch (Exception e) { - throw new RuntimeException("Failed to wait the pipeline until finish: " + result); - } - } - - private void addShutdownHook(final Collection pipelineResults) { - Runtime.getRuntime().addShutdownHook(new Thread() { - @Override - public void run() { - tearDown(); - printPendingMessages(); - for (PipelineResult pipelineResult : pipelineResults) { - try { - pipelineResult.cancel(); - } catch (IOException e) { - System.out.println("Failed to cancel the job."); - System.out.println(e.getMessage()); - } - } - - for (PipelineResult pipelineResult : pipelineResults) { - boolean cancellationVerified = false; - for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) { - if (pipelineResult.getState().isTerminal()) { - cancellationVerified = true; - break; - } else { - System.out.println( - "The example pipeline is still running. Verifying the cancellation."); - } - Uninterruptibles.sleepUninterruptibly(10, TimeUnit.SECONDS); - } - if (!cancellationVerified) { - System.out.println("Failed to verify the cancellation for job: " + pipelineResult); - } - } - } - }); - } - - private void printPendingMessages() { - System.out.println(); - System.out.println("***********************************************************"); - System.out.println("***********************************************************"); - for (String message : pendingMessages) { - System.out.println(message); - } - System.out.println("***********************************************************"); - System.out.println("***********************************************************"); - } - - private static T executeNullIfNotFound( - AbstractGoogleClientRequest request) throws IOException { - try { - return request.execute(); - } catch (GoogleJsonResponseException e) { - if (e.getStatusCode() == SC_NOT_FOUND) { - return null; - } else { - throw e; - } - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteWindowedFilesDoFn.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteWindowedFilesDoFn.java deleted file mode 100644 index a08e6a9b0f..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteWindowedFilesDoFn.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.common.annotations.VisibleForTesting; -import java.io.OutputStream; -import java.nio.channels.Channels; -import java.nio.charset.StandardCharsets; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.util.IOChannelFactory; -import org.apache.beam.sdk.util.IOChannelUtils; -import org.apache.beam.sdk.values.KV; -import org.joda.time.format.DateTimeFormatter; -import org.joda.time.format.ISODateTimeFormat; - -/** - * A {@link DoFn} that writes elements to files with names deterministically derived from the lower - * and upper bounds of their key (an {@link IntervalWindow}). - * - *

This is test utility code, not for end-users, so examples can be focused - * on their primary lessons. - */ -public class WriteWindowedFilesDoFn - extends DoFn>>, Void> { - - static final byte[] NEWLINE = "\n".getBytes(StandardCharsets.UTF_8); - static final Coder STRING_CODER = StringUtf8Coder.of(); - - private static DateTimeFormatter formatter = ISODateTimeFormat.hourMinute(); - - private final String output; - - public WriteWindowedFilesDoFn(String output) { - this.output = output; - } - - @VisibleForTesting - public static String fileForWindow(String output, IntervalWindow window) { - return String.format( - "%s-%s-%s", output, formatter.print(window.start()), formatter.print(window.end())); - } - - @ProcessElement - public void processElement(ProcessContext context) throws Exception { - // Build a file name from the window - IntervalWindow window = context.element().getKey(); - String outputShard = fileForWindow(output, window); - - // Open the file and write all the values - IOChannelFactory factory = IOChannelUtils.getFactory(outputShard); - OutputStream out = Channels.newOutputStream(factory.create(outputShard, "text/plain")); - for (KV wordCount : context.element().getValue()) { - STRING_CODER.encode( - wordCount.getKey() + ": " + wordCount.getValue(), out, Coder.Context.OUTER); - out.write(NEWLINE); - } - out.close(); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java deleted file mode 100644 index 155242d996..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import com.google.common.io.Files; -import java.io.File; -import java.nio.charset.StandardCharsets; -import ${package}.DebuggingWordCount.WordCountOptions; -import org.apache.beam.sdk.testing.TestPipeline; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link DebuggingWordCount}. - */ -@RunWith(JUnit4.class) -public class DebuggingWordCountTest { - @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); - - @Test - public void testDebuggingWordCount() throws Exception { - File inputFile = tmpFolder.newFile(); - File outputFile = tmpFolder.newFile(); - Files.write( - "stomach secret Flourish message Flourish here Flourish", - inputFile, - StandardCharsets.UTF_8); - WordCountOptions options = - TestPipeline.testingPipelineOptions().as(WordCountOptions.class); - options.setInputFile(inputFile.getAbsolutePath()); - options.setOutput(outputFile.getAbsolutePath()); - DebuggingWordCount.main(TestPipeline.convertToArgs(options)); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java deleted file mode 100644 index e86c2aac96..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import java.util.List; -import ${package}.WordCount.CountWords; -import ${package}.WordCount.ExtractWordsFn; -import ${package}.WordCount.FormatAsTextFn; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.RunnableOnService; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnTester; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.PCollection; -import org.hamcrest.CoreMatchers; -import org.junit.Assert; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of WordCount. - */ -@RunWith(JUnit4.class) -public class WordCountTest { - - /** Example test that tests a specific {@link DoFn}. */ - @Test - public void testExtractWordsFn() throws Exception { - DoFnTester extractWordsFn = - DoFnTester.of(new ExtractWordsFn()); - - Assert.assertThat(extractWordsFn.processBundle(" some input words "), - CoreMatchers.hasItems("some", "input", "words")); - Assert.assertThat(extractWordsFn.processBundle(" "), - CoreMatchers.hasItems()); - Assert.assertThat(extractWordsFn.processBundle(" some ", " input", " words"), - CoreMatchers.hasItems("some", "input", "words")); - } - - static final String[] WORDS_ARRAY = new String[] { - "hi there", "hi", "hi sue bob", - "hi sue", "", "bob hi"}; - - static final List WORDS = Arrays.asList(WORDS_ARRAY); - - static final String[] COUNTS_ARRAY = new String[] { - "hi: 5", "there: 1", "sue: 2", "bob: 2"}; - - /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */ - @Test - @Category(RunnableOnService.class) - public void testCountWords() throws Exception { - Pipeline p = TestPipeline.create(); - - PCollection input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of())); - - PCollection output = input.apply(new CountWords()) - .apply(MapElements.via(new FormatAsTextFn())); - - PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY); - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties deleted file mode 100644 index 1f3c9c5178..0000000000 --- a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -package=it.pkg -version=0.1-SNAPSHOT -groupId=archetype.it -artifactId=basic -targetPlatform=1.7 diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt b/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt deleted file mode 100644 index 0b5987362f..0000000000 --- a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt +++ /dev/null @@ -1 +0,0 @@ -verify diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml deleted file mode 100644 index c600393712..0000000000 --- a/maven-archetypes/starter/pom.xml +++ /dev/null @@ -1,45 +0,0 @@ - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - 2.0.0-beta1-SNAPSHOT - ../../pom.xml - - - google-cloud-dataflow-java-archetypes-starter - Google Cloud Dataflow SDK for Java - Starter Archetype - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This archetype creates a simple starter pipeline to get started - using the Google Cloud Dataflow SDK for Java. - - maven-archetype - - - - - org.apache.maven.archetype - archetype-packaging - 2.4 - - - - diff --git a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml deleted file mode 100644 index 4c22d5d68b..0000000000 --- a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - 1.7 - - - - - - src/main/java - - **/*.java - - - - diff --git a/maven-archetypes/starter/src/main/resources/NOTICE b/maven-archetypes/starter/src/main/resources/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/maven-archetypes/starter/src/main/resources/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml deleted file mode 100644 index 183b29062c..0000000000 --- a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - 4.0.0 - - ${groupId} - ${artifactId} - ${version} - - - UTF-8 - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - jar - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.3 - - ${targetPlatform} - ${targetPlatform} - - - - - - - - org.codehaus.mojo - exec-maven-plugin - 1.4.0 - - false - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - 2.0.0-beta1-SNAPSHOT - - - - - org.slf4j - slf4j-api - 1.7.7 - - - org.slf4j - slf4j-jdk14 - 1.7.7 - - - diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java deleted file mode 100644 index d6afdecf11..0000000000 --- a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A starter example for writing Beam programs. - * - *

The example takes two strings, converts them to their upper-case - * representation and logs them. - * - *

To run this starter example locally using DirectRunner, just - * execute it without any additional parameters from your favorite development - * environment. - * - *

To run this starter example using managed resource in Google Cloud - * Platform, you should specify the following command-line options: - * --project= - * --stagingLocation= - * --runner=DataflowRunner - */ -public class StarterPipeline { - private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class); - - public static void main(String[] args) { - Pipeline p = Pipeline.create( - PipelineOptionsFactory.fromArgs(args).withValidation().create()); - - p.apply(Create.of("Hello", "World")) - .apply(MapElements.via(new SimpleFunction() { - @Override - public String apply(String input) { - return input.toUpperCase(); - } - })) - .apply(ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - LOG.info(c.element()); - } - })); - - p.run(); - } -} diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties deleted file mode 100644 index 1f3c9c5178..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -package=it.pkg -version=0.1-SNAPSHOT -groupId=archetype.it -artifactId=basic -targetPlatform=1.7 diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt b/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt deleted file mode 100644 index 0b5987362f..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt +++ /dev/null @@ -1 +0,0 @@ -verify diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml deleted file mode 100644 index db4b1c9784..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - 4.0.0 - - archetype.it - basic - 0.1-SNAPSHOT - - - UTF-8 - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - jar - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.3 - - 1.7 - 1.7 - - - - - - - - org.codehaus.mojo - exec-maven-plugin - 1.4.0 - - false - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - 2.0.0-beta1-SNAPSHOT - - - - - org.slf4j - slf4j-api - 1.7.7 - - - org.slf4j - slf4j-jdk14 - 1.7.7 - - - diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java deleted file mode 100644 index 4ae92e8ce6..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package it.pkg; - -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A starter example for writing Beam programs. - * - *

The example takes two strings, converts them to their upper-case - * representation and logs them. - * - *

To run this starter example locally using DirectRunner, just - * execute it without any additional parameters from your favorite development - * environment. - * - *

To run this starter example using managed resource in Google Cloud - * Platform, you should specify the following command-line options: - * --project= - * --stagingLocation= - * --runner=DataflowRunner - */ -public class StarterPipeline { - private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class); - - public static void main(String[] args) { - Pipeline p = Pipeline.create( - PipelineOptionsFactory.fromArgs(args).withValidation().create()); - - p.apply(Create.of("Hello", "World")) - .apply(MapElements.via(new SimpleFunction() { - @Override - public String apply(String input) { - return input.toUpperCase(); - } - })) - .apply(ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - LOG.info(c.element()); - } - })); - - p.run(); - } -} diff --git a/pom.xml b/pom.xml deleted file mode 100644 index c3c64811f4..0000000000 --- a/pom.xml +++ /dev/null @@ -1,371 +0,0 @@ - - - - 4.0.0 - - - com.google - google - 5 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - Google Cloud Dataflow SDK for Java - Parent - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This artifact includes the parent POM for other Dataflow SDK - artifacts. - http://cloud.google.com/dataflow - 2013 - - 2.0.0-beta1-SNAPSHOT - - - - Apache License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - - - - - - Google Inc. - http://www.google.com - - - - - scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - release-2.0.0-beta1 - - - - - ossrh - https://oss.sonatype.org/content/repositories/snapshots - - - ossrh - https://oss.sonatype.org/service/local/staging/deploy/maven2/ - - - - - 3.0.3 - - - - UTF-8 - ${maven.build.timestamp} - yyyy-MM-dd HH:mm - - 0.4.0 - - - pom - - sdk - examples - maven-archetypes/starter - maven-archetypes/examples - - - - - - - org.apache.maven.plugins - maven-enforcer-plugin - 1.4.1 - - - enforce-java - - enforce - - - - - - [1.8.0,) - - - - - - - - - org.apache.maven.plugins - maven-clean-plugin - 3.0.0 - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.6.0 - - 1.7 - 1.7 - - -Xlint:all - -Werror - -Xlint:-options - - true - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - 2.17 - - - com.puppycrawl.tools - checkstyle - 6.19 - - - - sdk/checkstyle.xml - sdk/suppressions.xml - true - true - true - true - - - - verify - - check - - - - - - - org.apache.maven.plugins - maven-jar-plugin - 3.0.2 - - true - - - - default-jar - - jar - - - - default-test-jar - - test-jar - - - - - - - org.apache.maven.plugins - maven-source-plugin - 3.0.1 - - - attach-sources - compile - - jar - - - - attach-test-sources - test-compile - - test-jar - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - 3.0.0 - - - - analyze-only - - - true - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.19.1 - - - - org.apache.maven.plugins - maven-archetype-plugin - 2.4 - - - org.apache.maven.shared - maven-invoker - 2.2 - - - - - - default-integration-test - install - - integration-test - - - true - - - - - - - org.apache.maven.plugins - maven-release-plugin - 2.5.3 - - true - true - deploy - - - - - org.codehaus.mojo - exec-maven-plugin - 1.5.0 - - false - - - - - - - - org.apache.maven.plugins - maven-enforcer-plugin - - - - org.apache.maven.plugins - maven-compiler-plugin - - - - org.apache.maven.plugins - maven-source-plugin - - - - org.apache.maven.plugins - maven-dependency-plugin - - - - org.apache.maven.plugins - maven-surefire-plugin - - - - org.apache.maven.plugins - maven-checkstyle-plugin - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - ${project.version} - - - - org.apache.beam - beam-sdks-java-core - ${beam.version} - - - - org.apache.beam - beam-sdks-java-io-google-cloud-platform - ${beam.version} - - - - org.apache.beam - beam-runners-direct-java - ${beam.version} - - - - org.apache.beam - beam-runners-google-cloud-dataflow-java - ${beam.version} - - - - org.apache.beam - beam-examples-java - ${beam.version} - - - - org.apache.beam - beam-examples-java8 - ${beam.version} - - - - diff --git a/sdk/checkstyle.xml b/sdk/checkstyle.xml deleted file mode 100644 index 1769c8bed9..0000000000 --- a/sdk/checkstyle.xml +++ /dev/null @@ -1,458 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sdk/pom.xml b/sdk/pom.xml deleted file mode 100644 index 8dd014f9f7..0000000000 --- a/sdk/pom.xml +++ /dev/null @@ -1,64 +0,0 @@ - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - 2.0.0-beta1-SNAPSHOT - - - google-cloud-dataflow-java-sdk-all - Google Cloud Dataflow SDK for Java - All - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This artifact includes entire Dataflow Java SDK. - - jar - - - - - src/main/resources - true - - - - - - - org.apache.beam - beam-sdks-java-core - - - - org.apache.beam - beam-sdks-java-io-google-cloud-platform - - - - org.apache.beam - beam-runners-direct-java - - - - org.apache.beam - beam-runners-google-cloud-dataflow-java - - - diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java deleted file mode 100644 index 7bbfbe3729..0000000000 --- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) 2017 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.dataflow.sdk; - -import org.apache.beam.runners.dataflow.DataflowRunner; -import org.apache.beam.runners.direct.DirectRunner; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; - -/** - * Mark the dependencies as used at compile time. - */ -class SdkDependencies { - private Pipeline p; - private BigQueryIO bigQueryIO; - private DirectRunner directRunner; - private DataflowRunner dataflowRunner; -} diff --git a/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties b/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties deleted file mode 100644 index 1af27fdd48..0000000000 --- a/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow.properties +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -environment.major.version=6 - -worker.image.batch=dataflow.gcr.io/v1beta3/beam-java-batch:${project.version}-20170103 -worker.image.streaming=dataflow.gcr.io/v1beta3/beam-java-streaming:${project.version}-20170103 diff --git a/sdk/src/main/resources/org/apache/beam/sdk/sdk.properties b/sdk/src/main/resources/org/apache/beam/sdk/sdk.properties deleted file mode 100644 index a9df3b5b5c..0000000000 --- a/sdk/src/main/resources/org/apache/beam/sdk/sdk.properties +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -name=Google Cloud Dataflow SDK for Java -version=${pom.version} -build.date=${timestamp} diff --git a/sdk/suppressions.xml b/sdk/suppressions.xml deleted file mode 100644 index c3635c9bb8..0000000000 --- a/sdk/suppressions.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - -