|
2 | 2 | import os |
3 | 3 | import sys |
4 | 4 |
|
5 | | -from absl.testing import parameterized |
6 | 5 | from tensorflow.python.distribute import multi_process_lib |
7 | 6 | import multiprocessing |
8 | 7 | import tensorflow as tf |
@@ -73,79 +72,119 @@ def tearDownClass(cls): |
73 | 72 | super(ParameterServerStrategyV2Test, cls).tearDownClass() |
74 | 73 | cls.cluster.stop() |
75 | 74 |
|
76 | | - #@parameterized.parameters(True, False) |
77 | | - def testPerWorkerVariableCreation(self): |
| 75 | + def testPerWorkerTraining(self): |
78 | 76 | var_dtype = tf.dtypes.float32 |
79 | 77 | var_name = 'var' |
80 | | - shape = [1] #if define_shape else None |
81 | | - |
82 | | - # with self.strategy.scope(): |
83 | | - var = variables.Variable(initial_value=[0.0], |
84 | | - shape=shape, |
85 | | - dtype=var_dtype, |
86 | | - name=var_name, |
87 | | - per_worker_de_variable=True) |
88 | | - |
89 | | - # Use per-worker variable as a capture |
90 | | - @def_function.function |
91 | | - def worker_fn(): |
92 | | - var.assign_add(constant_op.constant([1.0])) |
93 | | - return var |
94 | | - |
95 | | - num_closures = 10 |
96 | | - for ix in range(num_closures): |
97 | | - self.coordinator.schedule(worker_fn) |
98 | | - # Read the PWV many times to ensure result is up-to-date |
99 | | - self.coordinator.join() |
100 | | - result_sum = sum(var.read_all()).numpy() |
101 | | - self.assertEqual(result_sum, ix + 1) |
102 | | - |
103 | | - for _ in range(num_closures): |
104 | | - self.coordinator.schedule(worker_fn) |
105 | | - self.coordinator.join() |
106 | | - |
107 | | - # Verify placement of variables |
108 | | - devices = [wv._get_values().device for wv in var._per_worker_vars._values] |
109 | | - expected_devices = [ |
110 | | - f'/job:worker/replica:0/task:{ix}/device:CPU:0' |
111 | | - for ix in range(self.strategy._num_workers) |
112 | | - ] # pylint: disable=protected-access |
113 | | - self.assertAllEqual(devices, expected_devices) |
114 | | - |
115 | | - result_sum = sum(var.read_all()).numpy() |
116 | | - self.assertEqual(result_sum, num_closures * 2) |
117 | | - |
118 | | - def testKerasFit(self): |
119 | | - embed_dim = 8 |
| 78 | + shape = [1] |
120 | 79 | with self.strategy.scope(): |
121 | | - model = Sequential([ |
122 | | - layers.Input(shape=(1,), dtype=tf.int32), |
123 | | - de.keras.layers.Embedding(embed_dim, key_dtype=tf.int32), |
124 | | - layers.Flatten(), |
125 | | - layers.Dense(1, activation='sigmoid') |
126 | | - ]) |
127 | | - optimizer = Adam(1E-3) |
128 | | - optimizer = de.DynamicEmbeddingOptimizer(optimizer) |
129 | | - model.compile(loss='binary_crossentropy', |
130 | | - optimizer=optimizer, |
131 | | - metrics=['accuracy']) |
132 | | - |
133 | | - ids = np.random.randint(0, 100, size=(64 * 2, 1)) |
134 | | - labels = np.random.randint(0, 2, size=(64 * 2, 1)) |
135 | | - |
136 | | - def dataset_fn(input_context): |
137 | | - global_batch_size = 32 |
138 | | - batch_size = input_context.get_per_replica_batch_size(global_batch_size) |
139 | | - dataset = tf.data.Dataset.from_tensor_slices((ids, labels)) |
140 | | - dataset = dataset.shard(input_context.num_input_pipelines, |
141 | | - input_context.input_pipeline_id) |
142 | | - dataset = dataset.batch(batch_size).repeat() |
143 | | - return dataset |
144 | | - |
145 | | - dataset = self.strategy.distribute_datasets_from_function(dataset_fn) |
146 | | - |
147 | | - history = model.fit(dataset, epochs=1, steps_per_epoch=len(ids) // 64) |
148 | | - self.assertIn('loss', history.history) |
| 80 | + var = variables.Variable(initial_value=[0.0], |
| 81 | + shape=shape, |
| 82 | + dtype=var_dtype, |
| 83 | + name=var_name, |
| 84 | + per_worker_variable=True) |
| 85 | + var._trainable = True |
| 86 | + with backprop.GradientTape(persistent=True) as tape: |
| 87 | + |
| 88 | + # 定义训练步骤 |
| 89 | + @tf.function |
| 90 | + def train_step(): |
| 91 | + with tf.GradientTape() as tape: |
| 92 | + # var._maybe_create_per_worker_vars() |
| 93 | + value = var.read_value() |
| 94 | + # if not var.trainable: |
| 95 | + tape.watch(value) # still need this with var._trainable = True set. |
| 96 | + y = value * 2.0 |
| 97 | + grad = tape.gradient(y, value) |
| 98 | + return grad |
| 99 | + |
| 100 | + @tf.function |
| 101 | + def train_step2(): |
| 102 | + with tf.GradientTape() as tape: |
| 103 | + var._maybe_create_per_worker_vars() |
| 104 | + value = var.value() |
| 105 | + # if not var.trainable: |
| 106 | + tape.watch(value) # still need this with var._trainable = True set. |
| 107 | + y = value * 2.0 |
| 108 | + grad = tape.gradient(y, value) |
| 109 | + return grad |
| 110 | + |
| 111 | + # 运行并检查结果 |
| 112 | + grads = self.strategy.run(train_step2) |
| 113 | + print(f"grads :{grads}") |
| 114 | + print(f"var.read_all() {var.read_all()}") |
| 115 | + #@parameterized.parameters(True, False) |
| 116 | + # def testPerWorkerVariableCreation(self): |
| 117 | + # var_dtype = tf.dtypes.float32 |
| 118 | + # var_name = 'var' |
| 119 | + # shape = [1] #if define_shape else None |
| 120 | + # |
| 121 | + # with self.strategy.scope(): |
| 122 | + # var = variables.Variable(initial_value=[0.0], |
| 123 | + # shape=shape, |
| 124 | + # dtype=var_dtype, |
| 125 | + # name=var_name, |
| 126 | + # per_worker_de_variable=True) |
| 127 | + # |
| 128 | + # # Use per-worker variable as a capture |
| 129 | + # @def_function.function |
| 130 | + # def worker_fn(): |
| 131 | + # var.assign_add(constant_op.constant([1.0])) |
| 132 | + # return var |
| 133 | + # |
| 134 | + # num_closures = 10 |
| 135 | + # for ix in range(num_closures): |
| 136 | + # self.coordinator.schedule(worker_fn) |
| 137 | + # # Read the PWV many times to ensure result is up-to-date |
| 138 | + # self.coordinator.join() |
| 139 | + # result_sum = sum(var.read_all()).numpy() |
| 140 | + # self.assertEqual(result_sum, ix + 1) |
| 141 | + # |
| 142 | + # for _ in range(num_closures): |
| 143 | + # self.coordinator.schedule(worker_fn) |
| 144 | + # self.coordinator.join() |
| 145 | + # |
| 146 | + # # Verify placement of variables |
| 147 | + # devices = [wv._get_values().device for wv in var._per_worker_vars._values] |
| 148 | + # expected_devices = [ |
| 149 | + # f'/job:worker/replica:0/task:{ix}/device:CPU:0' |
| 150 | + # for ix in range(self.strategy._num_workers) |
| 151 | + # ] # pylint: disable=protected-access |
| 152 | + # self.assertAllEqual(devices, expected_devices) |
| 153 | + # |
| 154 | + # result_sum = sum(var.read_all()).numpy() |
| 155 | + # self.assertEqual(result_sum, num_closures * 2) |
| 156 | + |
| 157 | + # def testKerasFit(self): |
| 158 | + # embed_dim = 8 |
| 159 | + # with self.strategy.scope(): |
| 160 | + # model = Sequential([ |
| 161 | + # layers.Input(shape=(1,), dtype=tf.int32), |
| 162 | + # de.keras.layers.Embedding(embed_dim, key_dtype=tf.int32), |
| 163 | + # layers.Flatten(), |
| 164 | + # layers.Dense(1, activation='sigmoid') |
| 165 | + # ]) |
| 166 | + # optimizer = Adam(1E-3) |
| 167 | + # optimizer = de.DynamicEmbeddingOptimizer(optimizer) |
| 168 | + # model.compile(loss='binary_crossentropy', |
| 169 | + # optimizer=optimizer, |
| 170 | + # metrics=['accuracy']) |
| 171 | + # |
| 172 | + # ids = np.random.randint(0, 100, size=(64 * 2, 1)) |
| 173 | + # labels = np.random.randint(0, 2, size=(64 * 2, 1)) |
| 174 | + # |
| 175 | + # def dataset_fn(input_context): |
| 176 | + # global_batch_size = 32 |
| 177 | + # batch_size = input_context.get_per_replica_batch_size(global_batch_size) |
| 178 | + # dataset = tf.data.Dataset.from_tensor_slices((ids, labels)) |
| 179 | + # dataset = dataset.shard(input_context.num_input_pipelines, |
| 180 | + # input_context.input_pipeline_id) |
| 181 | + # dataset = dataset.batch(batch_size).repeat() |
| 182 | + # return dataset |
| 183 | + # |
| 184 | + # dataset = self.strategy.distribute_datasets_from_function(dataset_fn) |
| 185 | + # |
| 186 | + # history = model.fit(dataset, epochs=1, steps_per_epoch=len(ids) // 64) |
| 187 | + # self.assertIn('loss', history.history) |
149 | 188 |
|
150 | 189 |
|
151 | 190 | # borrow from multi_process_lib._set_spawn_exe_path and modify it for tf_recommenders_addons |
@@ -175,8 +214,8 @@ def guess_path(package_root): |
175 | 214 | multiprocessing.get_context().set_executable(sys.argv[0]) |
176 | 215 |
|
177 | 216 |
|
178 | | -# This is not for pytest |
179 | | -# bazel test //tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests:parameter_server_bzl |
| 217 | +# This is not for pytest bazel clean --expunge |
| 218 | +# bazel test --test_output=all //tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests:parameter_server_bzl |
180 | 219 | if __name__ == "__main__": |
181 | 220 | multi_process_lib._set_spawn_exe_path = custom_set_spawn_exe_path |
182 | 221 | v2_compat.enable_v2_behavior() |
|
0 commit comments