- 
                Notifications
    You must be signed in to change notification settings 
- Fork 116
Model Definition Summary
        brightcoder01 edited this page Dec 15, 2019 
        ·
        14 revisions
      
    CATEGORICAL_FEATURE_KEYS = [
    "workclass",
    "education",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "native-country",
]
NUMERIC_FEATURE_KEYS = [
    "age",
    "capital-gain",
    "capital-loss",
    "hours-per-week",
]
OPTIONAL_NUMERIC_FEATURE_KEYS = [
    "education-num",
]
LABEL_KEY = "label"
def get_feature_columns():
    feature_columns = []
    for numeric_feature_key in NUMERIC_FEATURE_KEYS:
        numeric_feature = tf.feature_column.numeric_column(numeric_feature_key)
        feature_columns.append(numeric_feature)
    for categorical_feature_key in CATEGORICAL_FEATURE_KEYS:
        embedding_feature = tf.feature_column.embedding_column(
            tf.feature_column.categorical_column_with_hash_bucket(categorical_feature_key, hash_bucket_size=64),
            dimension=16
        )
        feature_columns.append(embedding_feature)
    return feature_columns
def get_feature_input_layers():
    feature_input_layers = {}
    for numeric_feature_key in NUMERIC_FEATURE_KEYS:
        feature_input_layers[numeric_feature_key] = tf.keras.Input(
            shape=(1,), name=numeric_feature_key, dtype=tf.float32
        )
    for categorical_feature_key in CATEGORICAL_FEATURE_KEYS:
        feature_input_layers[categorical_feature_key] = tf.keras.Input(
            shape=(1,), name=categorical_feature_key, dtype=tf.string
        )
    return feature_input_layersdef custom_model(feature_columns):
    model = tf.keras.Sequential([
        tf.keras.layers.DenseFeatures(feature_columns=feature_columns),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
        ])
    return model
feature_columns = get_feature_columns()
model = custom_model(feature_columns)def custom_model(feature_columns, feature_inputs):
    feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
    x = feature_layer(feature_inputs)
    x = tf.keras.layers.Dense(16, activation="relu")(x)
    x = tf.keras.layers.Dense(16, activation="relu")(x)
    y = tf.keras.layers.Dense(1, activation="sigmoid")(x)
    model = tf.keras.Model(inputs=feature_inputs, outputs=y)
    return model
feature_columns = get_feature_columns()
feature_inputs = get_feature_input_layers()
model = custom_model(feature_columns, feature_inputs)class CustomModel(tf.keras.Model):
    def __init__(self, feature_columns):
        super(CustomModel, self).__init__(name="census_model")
        self.dense_features = tf.keras.layers.DenseFeatures(feature_columns)
        self.dense_1 = tf.keras.layers.Dense(16, activation="relu")
        self.dense_2 = tf.keras.layers.Dense(16, activation="relu")
        self.dense_3 = tf.keras.layers.Dense(1, activation="sigmoid")
    def call(self, inputs, training=False):
        x = self.dense_features(inputs)
        x = self.dense_1(x)
        x = self.dense_2(x)
        x = self.dense_3(x)
        return x
feature_columns = get_feature_columns()
model = CustomModel(feature_columns)- Single shared embedding vs several separate embeddings? Generally the same value in different column from the source data should be treated as different value.
- How to aggregate multiple categorical inputs into a embedding table? Different input shouldn't be mapped to the same id.
 
- For the feature column input, the instances of a minibatch should be the same size. How to solve the var len input? Such as the clicked item id list.
- Truncate/Padding in dataset_fn
- Truncate/Padding in feature column normalizer_fn
 
- For sparse input, use the embedding_column or dense embedding layer?
- How to decide the hash_bucket_size for the categorical input?
- statisical. Calculate the categorical set size, and then multiple it with a constant such as 2, 3 to get the hash bucket size.
 
- Integration with SQLFlow, how to generate the code of the feature column definition and dataset_fn using code_gen
- TBD