From fd6e9409739b9024e578c5d20e314f6f98d9c6ce Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Thu, 12 Oct 2017 10:04:24 -0700 Subject: [PATCH] Modify TableRow2EntityFn due to nullable values (#5) --- pom.xml | 2 +- .../github/yuiskw/beam/TableRow2EntityFn.java | 36 ++++++++++++++++++- .../yuiskw/beam/BigQuery2DatastoreTest.java | 12 ++++--- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/pom.xml b/pom.xml index 8cf39d9..267495c 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ com.github.yuiskw bigquery-to-datastore - 0.1 + 0.2 jar diff --git a/src/main/java/com/github/yuiskw/beam/TableRow2EntityFn.java b/src/main/java/com/github/yuiskw/beam/TableRow2EntityFn.java index 5d2f3ff..86c7621 100644 --- a/src/main/java/com/github/yuiskw/beam/TableRow2EntityFn.java +++ b/src/main/java/com/github/yuiskw/beam/TableRow2EntityFn.java @@ -16,12 +16,20 @@ import org.joda.time.LocalDateTime; +/** + * This class is an Apache Beam function to convert TableRow to Entity. + */ public class TableRow2EntityFn extends DoFn { + /** Google Cloud Platform project ID */ private String projectId; + /** Google Datastore name space */ private String namespace; + /** Google Datastore parent paths */ private LinkedHashMap parents; + /** Google Datastore kind name */ private String kind; + /** BigQuery column for Google Datastore key */ private String keyColumn; public TableRow2EntityFn( @@ -49,6 +57,9 @@ public static Timestamp toTimestamp(Date date) { return timestamp; } + /** + * Convert TableRow to Entity + */ @ProcessElement public void processElement(ProcessContext c) { try { @@ -61,8 +72,16 @@ public void processElement(ProcessContext c) { } } + /** + * Convert an object to Datastore value + */ public Value convertToDatastoreValue(Object value) { Value v = null; + + if (value == null) { + return v; + } + if (value instanceof java.lang.Boolean) { v = Value.newBuilder().setBooleanValue(((Boolean) value).booleanValue()) .setExcludeFromIndexes(true).build(); @@ -140,13 +159,22 @@ else if (value instanceof Map) { Map struct = (Map) value; for (String subKey : struct.keySet()) { Value subV = convertToDatastoreValue(struct.get(subKey)); - subEntityBuilder.putProperties(subKey, subV); + if (subV != null) { + subEntityBuilder.putProperties(subKey, subV); + } } v = Value.newBuilder().setEntityValue(subEntityBuilder.build()).build(); } return v; } + /** + * Convert TableRow to Entity + * + * @param row TableRow of bigquery + * @return converted Entity + * @throws ParseException + */ public Entity convertTableRowToEntity(TableRow row) throws ParseException { String keyName = row.get(keyColumn).toString(); Key key = getKey(keyName); @@ -196,6 +224,12 @@ public Key getKey(String name) { return keyBuilder.build(); } + /** + * Parse integer value + * + * @param value String + * @return parsed integer of null if given value is not integer + */ public static Integer parseInteger(String value) { Integer integer = null; try { diff --git a/src/test/java/com/github/yuiskw/beam/BigQuery2DatastoreTest.java b/src/test/java/com/github/yuiskw/beam/BigQuery2DatastoreTest.java index 205cba6..b6b9f05 100644 --- a/src/test/java/com/github/yuiskw/beam/BigQuery2DatastoreTest.java +++ b/src/test/java/com/github/yuiskw/beam/BigQuery2DatastoreTest.java @@ -41,6 +41,7 @@ public void testGetOptions() { SELECT "uuid1" AS uuid, False AS bool_value, + null AS nullable_value, 1 AS int_value, 1.23 AS float_value, "hoge" AS string_value, @@ -58,7 +59,8 @@ public void testGetOptions() { UNION ALL SELECT "uuid2" AS uuid, - False AS bool_value, + True AS bool_value, + 321 AS nullable_value, 1 AS int_value, 1.23 AS float_value, "hoge" AS string_value, @@ -66,8 +68,8 @@ public void testGetOptions() { CURRENT_TIME() AS time_value, CURRENT_TIMESTAMP() AS timestamp_value, [1, 2] AS int_array_value, - [1.23, 2.34, 3.45] AS float_array_value, - ["hoge", "fuga", "hoge2", "fuga2"] AS string_array_value, + [1.23, 2.34, 3.45, 4.56] AS float_array_value, + ["hoge", "fuga"] AS string_array_value, STRUCT( 1 AS int_value, 1.23 AS float_value, @@ -94,9 +96,9 @@ public void testMain2() { String[] args = { "--project=test-project-id", "--inputBigQueryDataset=test_yu", - "--inputBigQueryTable=test_table", + "--inputBigQueryTable=table_table", "--outputDatastoreNamespace=test_double", - "--outputDatastoreKind=TestKind", + "--outputDatastoreKind=TestKind2", "--parentPaths=Parent1:p1,Parent2:p2", "--keyColumn=uuid", "--tempLocation=gs://test_yu/test-log/",