Skip to content

Commit

Permalink
Modified based on feedback - removing pandas
Browse files Browse the repository at this point in the history
removing pandas requirement and using Pyspark FillNa directly
  • Loading branch information
rmattsampson authored Feb 27, 2023
1 parent 78e2178 commit 81cacc9
Showing 1 changed file with 3 additions and 7 deletions.
10 changes: 3 additions & 7 deletions examples/transforms/CustomTransform_FillEmptyStringsInAColumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pyspark.sql.dataframe import DataFrame
from pyspark.sql.types import StructType
from pyspark.context import SparkContext
import pandas as pd
from pyspark.sql.functions import col,isnan, when, count, regexp_replace

def fill_empty_null_values_txn(
Expand All @@ -16,12 +15,9 @@ def fill_empty_null_values_txn(
_dyf = DynamicFrame.fromDF(modifiedDF, self.glue_ctx, self.name)
return _dyf
elif _df.filter(col(columnName).isNull()).count() > 0:
_pdf = _df.toPandas()
_pdf[columnName] = _pdf[columnName].fillna(newValue)
modifiedDF = gluectx.spark_session.createDataFrame(_pdf)
_dyf = DynamicFrame.fromDF(modifiedDF, self.glue_ctx, self.name)
_df = _df.fillna(value=newValue, subset=[columnName])
_dyf = DynamicFrame.fromDF(_df, self.glue_ctx, self.name)
return _dyf
return self


DynamicFrame.fill_empty_null_values_txn = fill_empty_null_values_txn
DynamicFrame.fill_empty_null_values_txn = fill_empty_null_values_txn

0 comments on commit 81cacc9

Please sign in to comment.