From 9f14743427b8f5041614916306d912d54e290d8e Mon Sep 17 00:00:00 2001 From: James Wexler Date: Tue, 14 May 2019 10:12:05 -0400 Subject: [PATCH 1/3] test --- .../python/base_generic_feature_statistics_generator.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/facets_overview/python/base_generic_feature_statistics_generator.py b/facets_overview/python/base_generic_feature_statistics_generator.py index 1c62b0e..b7562ba 100644 --- a/facets_overview/python/base_generic_feature_statistics_generator.py +++ b/facets_overview/python/base_generic_feature_statistics_generator.py @@ -19,6 +19,7 @@ import numpy as np import pandas as pd +import sys class BaseGenericFeatureStatisticsGenerator(object): @@ -273,7 +274,11 @@ def GetDatasetsProto(self, datasets, features=None, printable_val = val[1] else: try: - printable_val = val[1].decode('UTF-8', 'strict') + if (sys.version_info.major < 3 or + isinstance(data, (bytes, bytearray))): + printable_val = val[1].decode('UTF-8', 'strict') + else: + printable_val = val[1] except (UnicodeDecodeError, UnicodeEncodeError): printable_val = '__BYTES_VALUE__' bucket = featstats.rank_histogram.buckets.add( From 550118f780c761d4b4fef872f1d6c92e88364114 Mon Sep 17 00:00:00 2001 From: James Wexler Date: Tue, 14 May 2019 10:18:50 -0400 Subject: [PATCH 2/3] 2 --- ...se_generic_feature_statistics_generator.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/facets_overview/python/base_generic_feature_statistics_generator.py b/facets_overview/python/base_generic_feature_statistics_generator.py index b7562ba..3b7bc37 100644 --- a/facets_overview/python/base_generic_feature_statistics_generator.py +++ b/facets_overview/python/base_generic_feature_statistics_generator.py @@ -270,17 +270,14 @@ def GetDatasetsProto(self, datasets, features=None, sorted_vals = sorted(zip(counts, vals), reverse=True) sorted_vals = sorted_vals[:histogram_categorical_levels_count] for val_index, val in enumerate(sorted_vals): - if val[1].dtype.type is np.str_: - printable_val = val[1] - else: - try: - if (sys.version_info.major < 3 or - isinstance(data, (bytes, bytearray))): - printable_val = val[1].decode('UTF-8', 'strict') - else: - printable_val = val[1] - except (UnicodeDecodeError, UnicodeEncodeError): - printable_val = '__BYTES_VALUE__' + try: + if (sys.version_info.major < 3 or + isinstance(data, (bytes, bytearray))): + printable_val = val[1].decode('UTF-8', 'strict') + else: + printable_val = val[1] + except (UnicodeDecodeError, UnicodeEncodeError): + printable_val = '__BYTES_VALUE__' bucket = featstats.rank_histogram.buckets.add( low_rank=val_index, high_rank=val_index, From e40ec4cc45d4074ae7ec6952ff05507c0e05d16d Mon Sep 17 00:00:00 2001 From: James Wexler Date: Tue, 14 May 2019 10:20:59 -0400 Subject: [PATCH 3/3] fix --- .../python/base_generic_feature_statistics_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/facets_overview/python/base_generic_feature_statistics_generator.py b/facets_overview/python/base_generic_feature_statistics_generator.py index 3b7bc37..32eb9d3 100644 --- a/facets_overview/python/base_generic_feature_statistics_generator.py +++ b/facets_overview/python/base_generic_feature_statistics_generator.py @@ -272,7 +272,7 @@ def GetDatasetsProto(self, datasets, features=None, for val_index, val in enumerate(sorted_vals): try: if (sys.version_info.major < 3 or - isinstance(data, (bytes, bytearray))): + isinstance(val[1], (bytes, bytearray))): printable_val = val[1].decode('UTF-8', 'strict') else: printable_val = val[1]