diff --git a/python/deprecated/eval_sgf.py b/python/deprecated/eval_sgf.py old mode 100755 new mode 100644 index 13bcde60e..26381b19a --- a/python/deprecated/eval_sgf.py +++ b/python/deprecated/eval_sgf.py @@ -118,12 +118,12 @@ def play(pla,loc): print(board.to_string()) -saver = tf.train.Saver( +saver = tf.compat.v1.train.Saver( max_to_keep = 10000, save_relative_paths = True, ) -with tf.Session() as session: +with tf.compat.v1.Session() as session: if not debug: saver.restore(session, modelpath) diff --git a/python/deprecated/export_model.py b/python/deprecated/export_model.py old mode 100755 new mode 100644 index 522eeabc7..54141831e --- a/python/deprecated/export_model.py +++ b/python/deprecated/export_model.py @@ -49,7 +49,7 @@ def log(s): model = Model(model_config) total_parameters = 0 -for variable in tf.trainable_variables(): +for variable in tf.compat.v1.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: @@ -63,17 +63,17 @@ def log(s): print("Testing", flush=True) -saver = tf.train.Saver( +saver = tf.compat.v1.train.Saver( max_to_keep = 10000, save_relative_paths = True, ) #Some tensorflow options #tfconfig = tf.ConfigProto(log_device_placement=False,device_count={'GPU': 0}) -tfconfig = tf.ConfigProto(log_device_placement=False) +tfconfig = tf.compat.v1.ConfigProto(log_device_placement=False) #tfconfig.gpu_options.allow_growth = True #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.4 -with tf.Session(config=tfconfig) as session: +with tf.compat.v1.Session(config=tfconfig) as session: saver.restore(session, model_file) sys.stdout.flush() @@ -85,7 +85,7 @@ def log(s): sys.stderr.flush() if not for_cuda: - tf.train.write_graph(session.graph_def,export_dir,filename_prefix + ".graph.pb") + tf.io.write_graph(session.graph_def,export_dir,filename_prefix + ".graph.pb") savepath = export_dir + "/" + filename_prefix saver.save(session, savepath + ".weights") with open(savepath + ".config.json","w") as f: @@ -112,7 +112,7 @@ def writeln(s): writeln(model.max_board_size) #y writeln(model.num_input_features) - variables = dict((variable.name,variable) for variable in tf.global_variables()) + variables = dict((variable.name,variable) for variable in tf.compat.v1.global_variables()) def get_weights(name): return np.array(variables[name+":0"].eval()) diff --git a/python/deprecated/find_poses.py b/python/deprecated/find_poses.py old mode 100755 new mode 100644 index 9e8bfbf8d..203130e52 --- a/python/deprecated/find_poses.py +++ b/python/deprecated/find_poses.py @@ -57,7 +57,7 @@ def log(s): policy_probs_output = tf.nn.softmax(model.policy_output) total_parameters = 0 -for variable in tf.trainable_variables(): +for variable in tf.compat.v1.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: @@ -94,17 +94,17 @@ def log(s): sgfhash_start = next_moves_start + next_moves_len sgfhash_len = 8 -saver = tf.train.Saver( +saver = tf.compat.v1.train.Saver( max_to_keep = 10000, save_relative_paths = True, ) #Some tensorflow options #tfconfig = tf.ConfigProto(log_device_placement=False,device_count={'GPU': 0}) -tfconfig = tf.ConfigProto(log_device_placement=False) +tfconfig = tf.compat.v1.ConfigProto(log_device_placement=False) #tfconfig.gpu_options.allow_growth = True #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.4 -with tf.Session(config=tfconfig) as session: +with tf.compat.v1.Session(config=tfconfig) as session: saver.restore(session, model_file) log("Began session, loaded model") diff --git a/python/deprecated/mixmodels.py b/python/deprecated/mixmodels.py old mode 100755 new mode 100644 index 3de1f9672..64307f11d --- a/python/deprecated/mixmodels.py +++ b/python/deprecated/mixmodels.py @@ -45,7 +45,7 @@ def volume(variable): variables = {} total_parameters = 0 -for variable in tf.global_variables(): +for variable in tf.compat.v1.global_variables(): variable_parameters = volume(variable) total_parameters += variable_parameters variables[variable.name] = variable @@ -59,7 +59,7 @@ def volume(variable): print("Testing", flush=True) -saver = tf.train.Saver( +saver = tf.compat.v1.train.Saver( max_to_keep = 10000, save_relative_paths = True, ) @@ -67,8 +67,8 @@ def volume(variable): count = 0 accum_weights = {} -tfconfig = tf.ConfigProto(log_device_placement=False) -with tf.Session(config=tfconfig) as session: +tfconfig = tf.compat.v1.ConfigProto(log_device_placement=False) +with tf.compat.v1.Session(config=tfconfig) as session: for model_file in model_files: saver.restore(session, model_file) @@ -91,7 +91,7 @@ def run(fetches): assign_ops = dict([(name,variables[name].assign(accum_weights[name])) for name in accum_weights]) -with tf.Session(config=tfconfig) as session: +with tf.compat.v1.Session(config=tfconfig) as session: session.run(assign_ops) print("Saving to " + output_file) saver.save(session, output_file) diff --git a/python/deprecated/model.py b/python/deprecated/model.py index 8ccd27c2c..51c303067 100644 --- a/python/deprecated/model.py +++ b/python/deprecated/model.py @@ -26,7 +26,7 @@ def __init__(self,config): self.reg_variables = [] self.lr_adjusted_variables = {} - self.is_training = tf.placeholder(tf.bool,name="is_training") + self.is_training = tf.compat.v1.placeholder(tf.bool,name="is_training") #Accumulates outputs for printing stats about their activations self.outputs_by_layer = [] @@ -210,7 +210,7 @@ def addPrevPrevLadderFeature(loc,pos,workingMoves): # Build model ------------------------------------------------------------- def ensure_variable_exists(self,name): - for v in tf.trainable_variables(): + for v in tf.compat.v1.trainable_variables(): if v.name == name: return name raise Exception("Could not find variable " + name) @@ -227,7 +227,7 @@ def batchnorm(self,name,tensor): has_bias = True has_scale = False self.batch_norms[name] = (tensor.shape[-1].value,epsilon,has_bias,has_scale) - return tf.layers.batch_normalization( + return tf.compat.v1.layers.batch_normalization( tensor, axis=-1, #Because channels are our last axis, -1 refers to that via wacky python indexing momentum=0.99, @@ -246,7 +246,7 @@ def init_stdev(self,num_inputs,num_outputs): def init_weights(self, shape, num_inputs, num_outputs): stdev = self.init_stdev(num_inputs,num_outputs) / 1.0 - return tf.truncated_normal(shape=shape, stddev=stdev) + return tf.random.truncated_normal(shape=shape, stddev=stdev) def weight_variable_init_constant(self, name, shape, constant): init = tf.zeros(shape) @@ -268,7 +268,7 @@ def weight_variable(self, name, shape, num_inputs, num_outputs, scale_initial_we return variable def conv2d(self, x, w): - return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME') + return tf.nn.conv2d(input=x, filters=w, strides=[1,1,1,1], padding='SAME') def dilated_conv2d(self, x, w, dilation): return tf.nn.atrous_conv2d(x, w, rate = dilation, padding='SAME') @@ -279,8 +279,8 @@ def apply_symmetry(self,tensor,symmetries,inverse): transp = symmetries[2] rev_axes = tf.concat([ - tf.cond(ud, lambda: tf.constant([1]), lambda: tf.constant([],dtype='int32')), - tf.cond(lr, lambda: tf.constant([2]), lambda: tf.constant([],dtype='int32')), + tf.cond(pred=ud, true_fn=lambda: tf.constant([1]), false_fn=lambda: tf.constant([],dtype='int32')), + tf.cond(pred=lr, true_fn=lambda: tf.constant([2]), false_fn=lambda: tf.constant([],dtype='int32')), ], axis=0) if not inverse: @@ -289,14 +289,14 @@ def apply_symmetry(self,tensor,symmetries,inverse): assert(len(tensor.shape) == 4 or len(tensor.shape) == 3) if len(tensor.shape) == 3: tensor = tf.cond( - transp, - lambda: tf.transpose(tensor, [0,2,1]), - lambda: tensor) + pred=transp, + true_fn=lambda: tf.transpose(a=tensor, perm=[0,2,1]), + false_fn=lambda: tensor) else: tensor = tf.cond( - transp, - lambda: tf.transpose(tensor, [0,2,1,3]), - lambda: tensor) + pred=transp, + true_fn=lambda: tf.transpose(a=tensor, perm=[0,2,1,3]), + false_fn=lambda: tensor) if inverse: tensor = tf.reverse(tensor, rev_axes) @@ -321,14 +321,14 @@ def chain_pool(self,tensor,chains,num_chain_segments,empty,nonempty,mode): #Each one needs max_chain_idxs different buckets. num_segments_by_batch_and_channel = tf.fill([1,num_channels],1) * tf.expand_dims(num_chain_segments,axis=1) shift = tf.cumsum(tf.reshape(num_segments_by_batch_and_channel,[-1]),exclusive=True) - num_segments = tf.reduce_sum(num_chain_segments) * num_channels + num_segments = tf.reduce_sum(input_tensor=num_chain_segments) * num_channels shift = tf.reshape(shift,[-1,1,1,num_channels]) segments = tf.expand_dims(chains,3) + shift if mode == "sum": - pools = tf.unsorted_segment_sum(tensor,segments,num_segments=num_segments) + pools = tf.math.unsorted_segment_sum(tensor,segments,num_segments=num_segments) elif mode == "max": - pools = tf.unsorted_segment_max(tensor,segments,num_segments=num_segments) + pools = tf.math.unsorted_segment_max(tensor,segments,num_segments=num_segments) else: assert False @@ -367,13 +367,13 @@ def conv_weight_variable(self, name, diam1, diam2, in_channels, out_channels, sc weights = self.weight_variable(name,[diam1,diam2,in_channels,out_channels],in_channels*diam1*diam2,out_channels,scale_initial_weights,reg=reg) else: extra_initial_weight = self.init_weights([1,1,in_channels,out_channels], in_channels, out_channels) * emphasize_center_weight - extra_initial_weight = tf.pad(extra_initial_weight, [(radius1,radius1),(radius2,radius2),(0,0),(0,0)]) + extra_initial_weight = tf.pad(tensor=extra_initial_weight, paddings=[(radius1,radius1),(radius2,radius2),(0,0),(0,0)]) weights = self.weight_variable(name,[diam1,diam2,in_channels,out_channels],in_channels*diam1*diam2,out_channels,scale_initial_weights,extra_initial_weight,reg=reg) if emphasize_center_lr is not None: factor = tf.constant([emphasize_center_lr],dtype=tf.float32) factor = tf.reshape(factor,[1,1,1,1]) - factor = tf.pad(factor, [(radius1,radius1),(radius2,radius2),(0,0),(0,0)], constant_values=1.0) + factor = tf.pad(tensor=factor, paddings=[(radius1,radius1),(radius2,radius2),(0,0),(0,0)], constant_values=1.0) self.add_lr_factor(weights.name, factor) return weights @@ -399,7 +399,7 @@ def conv_only_extra_center_block(self, name, in_layer, diam, in_channels, out_ch radius = diam // 2 center_weights = self.weight_variable(name+"/wcenter",[1,1,in_channels,out_channels],in_channels,out_channels,scale_initial_weights=0.3*scale_initial_weights) weights = self.weight_variable(name+"/w",[diam,diam,in_channels,out_channels],in_channels*diam*diam,out_channels,scale_initial_weights) - weights = weights + tf.pad(center_weights,[(radius,radius),(radius,radius),(0,0),(0,0)]) + weights = weights + tf.pad(tensor=center_weights,paddings=[(radius,radius),(radius,radius),(0,0),(0,0)]) out_layer = self.conv2d(in_layer, weights) self.outputs_by_layer.append((name,out_layer)) return out_layer @@ -435,8 +435,8 @@ def global_res_conv_block(self, name, in_layer, diam, main_channels, mid_channel self.outputs_by_layer.append((name+"/conv1b",conv1b_layer)) trans1b_layer = self.parametric_relu(name+"/trans1b",(self.batchnorm(name+"/norm1b",conv1b_layer))) - trans1b_mean = tf.reduce_mean(trans1b_layer,axis=[1,2],keepdims=True) - trans1b_max = tf.reduce_max(trans1b_layer,axis=[1,2],keepdims=True) + trans1b_mean = tf.reduce_mean(input_tensor=trans1b_layer,axis=[1,2],keepdims=True) + trans1b_max = tf.reduce_max(input_tensor=trans1b_layer,axis=[1,2],keepdims=True) trans1b_pooled = tf.concat([trans1b_mean,trans1b_max],axis=3) remix_weights = self.weight_variable(name+"/w1r",[global_mid_channels*2,mid_channels],global_mid_channels*2,mid_channels, scale_initial_weights = 0.5) @@ -552,7 +552,7 @@ def skew_right(tensor): assert(tensor.shape[1].value == n) assert(tensor.shape[2].value == n) c = tensor.shape[3].value - tensor = tf.pad(tensor,[[0,0],[0,0],[0,n],[0,0]]) #Pad 19x19 -> 19x38 + tensor = tf.pad(tensor=tensor,paddings=[[0,0],[0,0],[0,n],[0,0]]) #Pad 19x19 -> 19x38 tensor = tf.reshape(tensor,[-1,2*n*n,c]) #Linearize tensor = tensor[:,:((2*n-1)*n),:] #Chop off the 19 zeroes on the end tensor = tf.reshape(tensor,[-1,n,2*n-1,c]) #Now we are skewed 19x37 as desired @@ -564,7 +564,7 @@ def unskew_right(tensor): assert(tensor.shape[2].value == 2*n-1) c = tensor.shape[3].value tensor = tf.reshape(tensor,[-1,n*(2*n-1),c]) #Linearize - tensor = tf.pad(tensor,[[0,0],[0,n],[0,0]]) #Pad 19*37 -> 19*38 + tensor = tf.pad(tensor=tensor,paddings=[[0,0],[0,n],[0,0]]) #Pad 19*37 -> 19*38 tensor = tf.reshape(tensor,[-1,n,2*n,c]) #Convert back to 19x38 tensor = tensor[:,:,:n,:] #Chop off the extra, now we are 19x19 return tensor @@ -575,7 +575,7 @@ def skew_left(tensor): assert(tensor.shape[1].value == n) assert(tensor.shape[2].value == n) c = tensor.shape[3].value - tensor = tf.pad(tensor,[[0,0],[1,1],[n-2,0],[0,0]]) #Pad 19x19 -> 21x36 + tensor = tf.pad(tensor=tensor,paddings=[[0,0],[1,1],[n-2,0],[0,0]]) #Pad 19x19 -> 21x36 tensor = tf.reshape(tensor,[-1,(n+2)*(2*n-2),c]) #Linearize tensor = tensor[:,(2*n-3):(-n+1),:] #Chop off the 35 extra zeroes on the start and the 18 at the end. tensor = tf.reshape(tensor,[-1,n,2*n-1,c]) #Now we are skewed 19x37 as desired @@ -588,7 +588,7 @@ def unskew_left(tensor): assert(tensor.shape[2].value == 2*n-1) c = tensor.shape[3].value tensor = tf.reshape(tensor,[-1,n*(2*n-1),c]) #Linearize - tensor = tf.pad(tensor,[[0,0],[2*n-3,n-1],[0,0]]) #Pad 19*37 -> 21*36 + tensor = tf.pad(tensor=tensor,paddings=[[0,0],[2*n-3,n-1],[0,0]]) #Pad 19*37 -> 21*36 tensor = tf.reshape(tensor,[-1,n+2,2*n-2,c]) #Convert back to 21x36 tensor = tensor[:,1:(n+1),(n-2):,:] #Chop off the extra, now we are 19x19 return tensor @@ -683,10 +683,10 @@ def build_model(self, use_ranks, include_policy, include_value, predict_pass): self.version = 2 #V2 features, no internal architecture change. #Input layer--------------------------------------------------------------------------------- - inputs = tf.placeholder(tf.float32, [None] + self.input_shape, name="inputs") - ranks = tf.placeholder(tf.float32, [None] + self.rank_shape, name="ranks") - symmetries = tf.placeholder(tf.bool, [3], name="symmetries") - include_history = tf.placeholder(tf.float32, [None] + [5], name="include_history") + inputs = tf.compat.v1.placeholder(tf.float32, [None] + self.input_shape, name="inputs") + ranks = tf.compat.v1.placeholder(tf.float32, [None] + self.rank_shape, name="ranks") + symmetries = tf.compat.v1.placeholder(tf.bool, [3], name="symmetries") + include_history = tf.compat.v1.placeholder(tf.float32, [None] + [5], name="include_history") self.inputs = inputs self.ranks = ranks self.symmetries = symmetries @@ -879,8 +879,8 @@ def build_model(self, use_ranks, include_policy, include_value, predict_pass): #Fold g1 down to single values for the board. #For stdev, add a tiny constant to ensure numeric stability - g1_mean = tf.reduce_mean(g1_layer,axis=[1,2],keepdims=True) - g1_max = tf.reduce_max(g1_layer,axis=[1,2],keepdims=True) + g1_mean = tf.reduce_mean(input_tensor=g1_layer,axis=[1,2],keepdims=True) + g1_max = tf.reduce_max(input_tensor=g1_layer,axis=[1,2],keepdims=True) g2_layer = tf.concat([g1_mean,g1_max],axis=3) #shape [b,1,1,2*convg1num_channels] g2_num_channels = 2*g1_num_channels self.outputs_by_layer.append(("g2",g2_layer)) @@ -921,7 +921,7 @@ def build_model(self, use_ranks, include_policy, include_value, predict_pass): if not predict_pass: #Simply add the pass output on with a large negative constant that's probably way more negative than anything #else the neural net would output. - policy_output = tf.pad(policy_output,[(0,0),(0,1)], constant_values = -10000., name="policy_output") + policy_output = tf.pad(tensor=policy_output,paddings=[(0,0),(0,1)], constant_values = -10000., name="policy_output") else: #Add pass move based on the global g values matmulpass = self.weight_variable("matmulpass",[g2_num_channels,1],g2_num_channels*8,1) @@ -935,7 +935,7 @@ def build_model(self, use_ranks, include_policy, include_value, predict_pass): else: #Don't include policy? Just set the policy output to all zeros. policy_output = tf.zeros_like(inputs[:,:,0]) - policy_output = tf.pad(policy_output,[(0,0),(0,1)]) + policy_output = tf.pad(tensor=policy_output,paddings=[(0,0),(0,1)]) self.policy_output = policy_output if include_value: @@ -947,7 +947,7 @@ def build_model(self, use_ranks, include_policy, include_value, predict_pass): self.v1_conv = ("v1",3,trunk_num_channels,v1_num_channels) self.v1_num_channels = v1_num_channels - v1_layer_pooled = tf.reduce_mean(v1_layer,axis=[1,2],keepdims=False) + v1_layer_pooled = tf.reduce_mean(input_tensor=v1_layer,axis=[1,2],keepdims=False) v1_size = v1_num_channels v2_size = 12 @@ -983,43 +983,43 @@ def __init__(self,model,for_optimization,require_last_move): value_output = model.value_output #Loss function - self.policy_targets = tf.placeholder(tf.float32, [None] + model.policy_target_shape) - self.value_target = tf.placeholder(tf.float32, [None] + model.value_target_shape) - self.target_weights_from_data = tf.placeholder(tf.float32, [None] + model.target_weights_shape) + self.policy_targets = tf.compat.v1.placeholder(tf.float32, [None] + model.policy_target_shape) + self.value_target = tf.compat.v1.placeholder(tf.float32, [None] + model.value_target_shape) + self.target_weights_from_data = tf.compat.v1.placeholder(tf.float32, [None] + model.target_weights_shape) if require_last_move == "all": - self.target_weights_used = self.target_weights_from_data * tf.reduce_sum(model.inputs[:,:,14],axis=[1]) + self.target_weights_used = self.target_weights_from_data * tf.reduce_sum(input_tensor=model.inputs[:,:,14],axis=[1]) elif require_last_move is True: - self.target_weights_used = self.target_weights_from_data * tf.reduce_sum(model.inputs[:,:,10],axis=[1]) + self.target_weights_used = self.target_weights_from_data * tf.reduce_sum(input_tensor=model.inputs[:,:,10],axis=[1]) else: self.target_weights_used = self.target_weights_from_data self.policy_loss = tf.reduce_sum( - self.target_weights_used * - tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.policy_targets, logits=policy_output) + input_tensor=self.target_weights_used * + tf.nn.softmax_cross_entropy_with_logits(labels=self.policy_targets, logits=policy_output) ) cross_entropy_value_loss = 1.4*tf.reduce_sum( - self.target_weights_used * + input_tensor=self.target_weights_used * tf.nn.softmax_cross_entropy_with_logits( - labels=tf.stack([(1+self.value_target)/2,(1-self.value_target)/2],axis=1), + labels=tf.stop_gradient(tf.stack([(1+self.value_target)/2,(1-self.value_target)/2],axis=1)), logits=tf.stack([value_output,tf.zeros_like(value_output)],axis=1) ) ) l2_value_loss = tf.reduce_sum( - self.target_weights_used * + input_tensor=self.target_weights_used * tf.square(self.value_target - tf.tanh(value_output)) ) self.value_loss = 0.5 * (cross_entropy_value_loss + l2_value_loss) # self.value_loss = l2_value_loss - self.weight_sum = tf.reduce_sum(self.target_weights_used) + self.weight_sum = tf.reduce_sum(input_tensor=self.target_weights_used) if for_optimization: #Prior/Regularization - self.l2_reg_coeff = tf.placeholder(tf.float32) + self.l2_reg_coeff = tf.compat.v1.placeholder(tf.float32) self.reg_loss = self.l2_reg_coeff * tf.add_n([tf.nn.l2_loss(variable) for variable in model.reg_variables]) * self.weight_sum #The loss to optimize @@ -1028,36 +1028,36 @@ def __init__(self,model,for_optimization,require_last_move): class Metrics: def __init__(self,model,target_vars,include_debug_stats): #Training results - policy_target_idxs = tf.argmax(target_vars.policy_targets, 1) - self.top1_prediction = tf.equal(tf.argmax(model.policy_output, 1), policy_target_idxs) - self.top4_prediction = tf.nn.in_top_k(model.policy_output,policy_target_idxs,4) - self.accuracy1 = tf.reduce_sum(target_vars.target_weights_used * tf.cast(self.top1_prediction, tf.float32)) - self.accuracy4 = tf.reduce_sum(target_vars.target_weights_used * tf.cast(self.top4_prediction, tf.float32)) - self.valueconf = tf.reduce_sum(tf.square(model.value_output)) + policy_target_idxs = tf.argmax(input=target_vars.policy_targets, axis=1) + self.top1_prediction = tf.equal(tf.argmax(input=model.policy_output, axis=1), policy_target_idxs) + self.top4_prediction = tf.nn.in_top_k(predictions=model.policy_output,targets=policy_target_idxs,k=4) + self.accuracy1 = tf.reduce_sum(input_tensor=target_vars.target_weights_used * tf.cast(self.top1_prediction, tf.float32)) + self.accuracy4 = tf.reduce_sum(input_tensor=target_vars.target_weights_used * tf.cast(self.top4_prediction, tf.float32)) + self.valueconf = tf.reduce_sum(input_tensor=tf.square(model.value_output)) #Debugging stats if include_debug_stats: def reduce_norm(x, axis=None, keepdims=False): - return tf.sqrt(tf.reduce_mean(tf.square(x), axis=axis, keepdims=keepdims)) + return tf.sqrt(tf.reduce_mean(input_tensor=tf.square(x), axis=axis, keepdims=keepdims)) def reduce_stdev(x, axis=None, keepdims=False): - m = tf.reduce_mean(x, axis=axis, keepdims=True) + m = tf.reduce_mean(input_tensor=x, axis=axis, keepdims=True) devs_squared = tf.square(x - m) - return tf.sqrt(tf.reduce_mean(devs_squared, axis=axis, keepdims=keepdims)) + return tf.sqrt(tf.reduce_mean(input_tensor=devs_squared, axis=axis, keepdims=keepdims)) self.activated_prop_by_layer = dict([ - (name,tf.reduce_mean(tf.count_nonzero(layer,axis=[1,2])/layer.shape[1].value/layer.shape[2].value, axis=0)) for (name,layer) in model.outputs_by_layer + (name,tf.reduce_mean(input_tensor=tf.math.count_nonzero(layer,axis=[1,2])/layer.shape[1].value/layer.shape[2].value, axis=0)) for (name,layer) in model.outputs_by_layer ]) self.mean_output_by_layer = dict([ - (name,tf.reduce_mean(layer,axis=[0,1,2])) for (name,layer) in model.outputs_by_layer + (name,tf.reduce_mean(input_tensor=layer,axis=[0,1,2])) for (name,layer) in model.outputs_by_layer ]) self.stdev_output_by_layer = dict([ (name,reduce_stdev(layer,axis=[0,1,2])**2) for (name,layer) in model.outputs_by_layer ]) self.mean_weights_by_var = dict([ - (v.name,tf.reduce_mean(v)) for v in tf.trainable_variables() + (v.name,tf.reduce_mean(input_tensor=v)) for v in tf.compat.v1.trainable_variables() ]) self.norm_weights_by_var = dict([ - (v.name,reduce_norm(v)) for v in tf.trainable_variables() + (v.name,reduce_norm(v)) for v in tf.compat.v1.trainable_variables() ]) diff --git a/python/deprecated/play.py b/python/deprecated/play.py old mode 100755 new mode 100644 index 30583d81a..17a4198ca --- a/python/deprecated/play.py +++ b/python/deprecated/play.py @@ -265,7 +265,7 @@ def run_gtp(session): layerdict = dict(model.outputs_by_layer) weightdict = dict() - for v in tf.trainable_variables(): + for v in tf.compat.v1.trainable_variables(): weightdict[v.name] = v rank_policy_command_lookup = dict() @@ -528,12 +528,12 @@ def add_input_feature_visualizations(layer_name, feature_idx, normalization_div) print('?%s ???\n\n' % (cmdid,), end='') sys.stdout.flush() -saver = tf.train.Saver( +saver = tf.compat.v1.train.Saver( max_to_keep = 10000, save_relative_paths = True, ) -with tf.Session() as session: +with tf.compat.v1.Session() as session: saver.restore(session, modelpath) run_gtp(session) diff --git a/python/deprecated/test.py b/python/deprecated/test.py old mode 100755 new mode 100644 index 0b8711d31..26adf1145 --- a/python/deprecated/test.py +++ b/python/deprecated/test.py @@ -57,7 +57,7 @@ def log(s): metrics = Metrics(model,target_vars,include_debug_stats=False) total_parameters = 0 -for variable in tf.trainable_variables(): +for variable in tf.compat.v1.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: @@ -94,17 +94,17 @@ def log(s): print("Testing", flush=True) -saver = tf.train.Saver( +saver = tf.compat.v1.train.Saver( max_to_keep = 10000, save_relative_paths = True, ) #Some tensorflow options #tfconfig = tf.ConfigProto(log_device_placement=False,device_count={'GPU': 0}) -tfconfig = tf.ConfigProto(log_device_placement=False) +tfconfig = tf.compat.v1.ConfigProto(log_device_placement=False) #tfconfig.gpu_options.allow_growth = True #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.4 -with tf.Session(config=tfconfig) as session: +with tf.compat.v1.Session(config=tfconfig) as session: saver.restore(session, model_file) sys.stdout.flush() diff --git a/python/deprecated/testlossbyhash.py b/python/deprecated/testlossbyhash.py index 6b3ce3a00..2424e1096 100644 --- a/python/deprecated/testlossbyhash.py +++ b/python/deprecated/testlossbyhash.py @@ -52,7 +52,7 @@ def log(s): target_vars = Target_vars(model,for_optimization=False,require_last_move=require_last_move) total_parameters = 0 -for variable in tf.trainable_variables(): +for variable in tf.compat.v1.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: @@ -89,17 +89,17 @@ def log(s): print("Testing", flush=True) -saver = tf.train.Saver( +saver = tf.compat.v1.train.Saver( max_to_keep = 10000, save_relative_paths = True, ) #Some tensorflow options #tfconfig = tf.ConfigProto(log_device_placement=False,device_count={'GPU': 0}) -tfconfig = tf.ConfigProto(log_device_placement=False) +tfconfig = tf.compat.v1.ConfigProto(log_device_placement=False) #tfconfig.gpu_options.allow_growth = True #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.4 -with tf.Session(config=tfconfig) as session: +with tf.compat.v1.Session(config=tfconfig) as session: saver.restore(session, model_file) sys.stdout.flush() diff --git a/python/deprecated/testmagnitudes.py b/python/deprecated/testmagnitudes.py old mode 100755 new mode 100644 index 44b019fe0..b66bfe295 --- a/python/deprecated/testmagnitudes.py +++ b/python/deprecated/testmagnitudes.py @@ -57,7 +57,7 @@ def log(s): metrics = Metrics(model,target_vars,include_debug_stats=False) total_parameters = 0 -for variable in tf.trainable_variables(): +for variable in tf.compat.v1.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: @@ -94,17 +94,17 @@ def log(s): print("Testing", flush=True) -saver = tf.train.Saver( +saver = tf.compat.v1.train.Saver( max_to_keep = 10000, save_relative_paths = True, ) #Some tensorflow options #tfconfig = tf.ConfigProto(log_device_placement=False,device_count={'GPU': 0}) -tfconfig = tf.ConfigProto(log_device_placement=False) +tfconfig = tf.compat.v1.ConfigProto(log_device_placement=False) #tfconfig.gpu_options.allow_growth = True #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.4 -with tf.Session(config=tfconfig) as session: +with tf.compat.v1.Session(config=tfconfig) as session: saver.restore(session, model_file) sys.stdout.flush() @@ -185,12 +185,12 @@ def run_validation_in_batches_and_print(fetches): sys.stdout.flush() vmetrics = {} - for variable in tf.trainable_variables(): - vmetrics[variable.name + "/maxabsvalue"] = tf.reduce_max(tf.abs(variable)) + for variable in tf.compat.v1.trainable_variables(): + vmetrics[variable.name + "/maxabsvalue"] = tf.reduce_max(input_tensor=tf.abs(variable)) for (layername,tensor) in model.outputs_by_layer: - vmetrics[layername + "/maxabsvalue"] = tf.reduce_max(tf.abs(tensor)) + vmetrics[layername + "/maxabsvalue"] = tf.reduce_max(input_tensor=tf.abs(tensor)) for (layername,tensor) in model.other_internal_outputs: - vmetrics[layername + "/maxabsvalue"] = tf.reduce_max(tf.abs(tensor)) + vmetrics[layername + "/maxabsvalue"] = tf.reduce_max(input_tensor=tf.abs(tensor)) run_validation_in_batches_and_print(vmetrics) diff --git a/python/deprecated/train.py b/python/deprecated/train.py old mode 100755 new mode 100644 index 6ff9bacb1..4bf401d18 --- a/python/deprecated/train.py +++ b/python/deprecated/train.py @@ -98,11 +98,11 @@ def detaillog(s): target_vars = Target_vars(model,for_optimization=True,require_last_move=False) #Training operation -per_sample_learning_rate = tf.placeholder(tf.float32) +per_sample_learning_rate = tf.compat.v1.placeholder(tf.float32) lr_adjusted_variables = model.lr_adjusted_variables -update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #collect batch norm update operations +update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) #collect batch norm update operations with tf.control_dependencies(update_ops): - optimizer = tf.train.MomentumOptimizer(per_sample_learning_rate, momentum=0.9, use_nesterov=True) + optimizer = tf.compat.v1.train.MomentumOptimizer(per_sample_learning_rate, momentum=0.9, use_nesterov=True) gradients = optimizer.compute_gradients(target_vars.opt_loss) adjusted_gradients = [] for (grad,x) in gradients: @@ -118,13 +118,13 @@ def detaillog(s): metrics = Metrics(model,target_vars,include_debug_stats=True) def reduce_norm(x, axis=None, keepdims=False): - return tf.sqrt(tf.reduce_mean(tf.square(x), axis=axis, keepdims=keepdims)) + return tf.sqrt(tf.reduce_mean(input_tensor=tf.square(x), axis=axis, keepdims=keepdims)) relative_update_by_var = dict([ (v.name,per_sample_learning_rate * reduce_norm(grad) / (1e-10 + reduce_norm(v))) for (grad,v) in adjusted_gradients if grad is not None ]) total_parameters = 0 -for variable in tf.trainable_variables(): +for variable in tf.compat.v1.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: @@ -134,7 +134,7 @@ def reduce_norm(x, axis=None, keepdims=False): trainlog("Built model, %d total parameters" % total_parameters) -for update_op in tf.get_collection(tf.GraphKeys.UPDATE_OPS): +for update_op in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS): trainlog("Additional update op on train step: %s" % update_op.name) # Open H5 file--------------------------------------------------------- @@ -227,20 +227,20 @@ def report_epoch_done(self,epoch): # Training ------------------------------------------------------------ -saver = tf.train.Saver( +saver = tf.compat.v1.train.Saver( max_to_keep = 10000, save_relative_paths = True, ) #Some tensorflow options -tfconfig = tf.ConfigProto(log_device_placement=False) +tfconfig = tf.compat.v1.ConfigProto(log_device_placement=False) #tfconfig.gpu_options.allow_growth = True #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.4 -with tf.Session(config=tfconfig) as session: +with tf.compat.v1.Session(config=tfconfig) as session: if restart_file is not None: saver.restore(session, restart_file) else: - session.run(tf.global_variables_initializer()) + session.run(tf.compat.v1.global_variables_initializer()) sys.stdout.flush() sys.stderr.flush() diff --git a/python/deprecated/visualize.py b/python/deprecated/visualize.py old mode 100755 new mode 100644 index 0f1239490..779e2f565 --- a/python/deprecated/visualize.py +++ b/python/deprecated/visualize.py @@ -54,7 +54,7 @@ def volume(variable): return variable_parameters total_parameters = 0 -for variable in tf.global_variables(): +for variable in tf.compat.v1.global_variables(): variable_parameters = volume(variable) total_parameters += variable_parameters log("Model variable %s, %d parameters" % (variable.name,variable_parameters)) @@ -66,17 +66,17 @@ def volume(variable): print("Testing", flush=True) -saver = tf.train.Saver( +saver = tf.compat.v1.train.Saver( max_to_keep = 10000, save_relative_paths = True, ) #Some tensorflow options #tfconfig = tf.ConfigProto(log_device_placement=False,device_count={'GPU': 0}) -tfconfig = tf.ConfigProto(log_device_placement=False) +tfconfig = tf.compat.v1.ConfigProto(log_device_placement=False) #tfconfig.gpu_options.allow_growth = True #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.4 -with tf.Session(config=tfconfig) as session: +with tf.compat.v1.Session(config=tfconfig) as session: saver.restore(session, model_file) sys.stdout.flush() @@ -91,7 +91,7 @@ def run(fetches): return session.run(fetches, feed_dict={}) if dump is not None: - variables = dict((variable.name,variable) for variable in tf.trainable_variables()) + variables = dict((variable.name,variable) for variable in tf.compat.v1.trainable_variables()) for name in dump.split(","): variable = variables[name] variable = np.array(variable.eval()) @@ -116,13 +116,13 @@ def run(fetches): for x0 in range(variable.shape[0]))) if conv_norm_by_xy is not None: - variables = dict((variable.name,variable) for variable in tf.trainable_variables()) + variables = dict((variable.name,variable) for variable in tf.compat.v1.trainable_variables()) for name in conv_norm_by_xy.split(","): variable = variables[name] #Should be x,y,in_channels,out_channels assert(len(variable.shape) == 4) - norms = tf.sqrt(tf.reduce_mean(variable*variable,axis=[2,3])) + norms = tf.sqrt(tf.reduce_mean(input_tensor=variable*variable,axis=[2,3])) norms = np.array(run(norms)) print(name + " " + str(volume(variable)) + " parameters") for y in range(norms.shape[1]): @@ -131,7 +131,7 @@ def run(fetches): print("") if conv_norm_by_channel is not None: - variables = dict((variable.name,variable) for variable in tf.trainable_variables()) + variables = dict((variable.name,variable) for variable in tf.compat.v1.trainable_variables()) #Each convolution weight variable has a set of channels it takes in as input and a set of channels it produces #as output. This is a dictionary of the mapping. @@ -178,7 +178,7 @@ def run(fetches): #Should be x,y,in_channels,out_channels assert(len(variable.shape) == 4) - norm = tf.sqrt(tf.reduce_mean(variable*variable,axis=[0,1])) + norm = tf.sqrt(tf.reduce_mean(input_tensor=variable*variable,axis=[0,1])) norm = np.array(run(norm)) norms[var_name] = norm diff --git a/python/export_model.py b/python/export_model.py index bf01b8460..8ce1e63da 100644 --- a/python/export_model.py +++ b/python/export_model.py @@ -15,6 +15,7 @@ from model import Model, ModelUtils import common +tf.compat.v1.disable_eager_execution() #Command and args------------------------------------------------------------------- description = """ @@ -89,7 +90,7 @@ def log(s): sys.stderr.flush() if not for_cuda: - tf.train.write_graph(session.graph_def,export_dir,filename_prefix + ".graph.pb") + tf.io.write_graph(session.graph_def,export_dir,filename_prefix + ".graph.pb") savepath = export_dir + "/" + filename_prefix saver.save(session, savepath + ".weights") with open(savepath + ".config.json","w") as f: diff --git a/python/genboard_run.py b/python/genboard_run.py old mode 100755 new mode 100644 diff --git a/python/genboard_train.py b/python/genboard_train.py old mode 100755 new mode 100644 diff --git a/python/model.py b/python/model.py index 59b9070db..1f7e532a7 100644 --- a/python/model.py +++ b/python/model.py @@ -117,15 +117,15 @@ def __init__(self,config,pos_len,placeholders,is_training=False): def assert_batched_shape(self,name,tensor,shape): if (len(tensor.shape) != len(shape)+1 or - [int(tensor.shape[i+1].value) for i in range(len(shape))] != [int(x) for x in shape]): + [int(tensor.shape.as_list()[i+1]) for i in range(len(shape))] != [int(x) for x in shape]): raise Exception("%s should have shape %s after a batch dimension but instead it had shape %s" % ( - name, str(shape), str([str(x.value) for x in tensor.shape]))) + name, str(shape), str([str(x) for x in tensor.shape]))) def assert_shape(self,name,tensor,shape): if (len(tensor.shape) != len(shape) or - [int(x.value) for x in tensor.shape] != [int(x) for x in shape]): + [int(x) for x in tensor.shape] != [int(x) for x in shape]): raise Exception("%s should have shape %s but instead it had shape %s" % ( - name, str(shape), str([str(x.value) for x in tensor.shape]))) + name, str(shape), str([str(x) for x in tensor.shape]))) def xy_to_tensor_pos(self,x,y): return y * self.pos_len + x @@ -509,34 +509,34 @@ def add_lr_factor(self,name,factor): def batchnorm_and_mask(self,name,tensor,mask,mask_sum,use_gamma_in_fixup=False): if self.use_fixup: - self.batch_norms[name] = (tensor.shape[-1].value,1e-20,True,use_gamma_in_fixup,self.use_fixup) + self.batch_norms[name] = (tensor.shape.as_list()[-1],1e-20,True,use_gamma_in_fixup,self.use_fixup) if use_gamma_in_fixup: - gamma = self.weight_variable_init_constant(name+"/gamma", [tensor.shape[3].value], 1.0) - beta = self.weight_variable_init_constant(name+"/beta", [tensor.shape[3].value], 0.0, reg="tiny") + gamma = self.weight_variable_init_constant(name+"/gamma", [tensor.shape.as_list()[3]], 1.0) + beta = self.weight_variable_init_constant(name+"/beta", [tensor.shape.as_list()[3]], 0.0, reg="tiny") return (tensor * gamma + beta) * mask else: - beta = self.weight_variable_init_constant(name+"/beta", [tensor.shape[3].value], 0.0, reg="tiny") + beta = self.weight_variable_init_constant(name+"/beta", [tensor.shape.as_list()[3]], 0.0, reg="tiny") return (tensor + beta) * mask epsilon = 0.001 has_bias = True has_scale = False - self.batch_norms[name] = (tensor.shape[-1].value,epsilon,has_bias,has_scale,self.use_fixup) + self.batch_norms[name] = (tensor.shape.as_list()[-1],epsilon,has_bias,has_scale,self.use_fixup) - num_channels = tensor.shape[3].value + num_channels = tensor.shape.as_list()[3] collections = [tf.compat.v1.GraphKeys.GLOBAL_VARIABLES,tf.compat.v1.GraphKeys.MODEL_VARIABLES,tf.compat.v1.GraphKeys.MOVING_AVERAGE_VARIABLES] #Define variables to keep track of the mean and variance moving_mean = tf.compat.v1.get_variable(initializer=tf.zeros([num_channels]),name=(name+"/moving_mean"),trainable=False,collections=collections) moving_var = tf.compat.v1.get_variable(initializer=tf.ones([num_channels]),name=(name+"/moving_variance"),trainable=False,collections=collections) - beta = self.weight_variable_init_constant(name+"/beta", [tensor.shape[3].value], 0.0, reg=False) + beta = self.weight_variable_init_constant(name+"/beta", [tensor.shape.as_list()[3]], 0.0, reg=False) #This is the mean, computed only over exactly the areas of the mask, weighting each spot equally, #even across different elements in the batch that might have different board sizes. - mean = tf.reduce_sum(tensor * mask,axis=[0,1,2]) / mask_sum + mean = tf.reduce_sum(input_tensor=tensor * mask,axis=[0,1,2]) / mask_sum zmtensor = tensor-mean #Similarly, the variance computed exactly only over those spots - var = tf.reduce_sum(tf.square(zmtensor * mask),axis=[0,1,2]) / mask_sum + var = tf.reduce_sum(input_tensor=tf.square(zmtensor * mask),axis=[0,1,2]) / mask_sum with tf.compat.v1.variable_scope(name): mean_op = tf.keras.backend.moving_average_update(moving_mean,mean,0.998) @@ -550,24 +550,9 @@ def training_f(): def inference_f(): return (moving_mean,moving_var) - use_mean,use_var = tf.cond(self.is_training_tensor,training_f,inference_f) + use_mean,use_var = tf.cond(pred=self.is_training_tensor,true_fn=training_f,false_fn=inference_f) return tf.nn.batch_normalization(tensor,use_mean,use_var,beta,None,epsilon) * mask - # def batchnorm(self,name,tensor): - # epsilon = 0.001 - # has_bias = True - # has_scale = False - # self.batch_norms[name] = (tensor.shape[-1].value,epsilon,has_bias,has_scale) - # return tf.layers.batch_normalization( - # tensor, - # axis=-1, #Because channels are our last axis, -1 refers to that via wacky python indexing - # momentum=0.99, - # epsilon=epsilon, - # center=has_bias, - # scale=has_scale, - # training=self.is_training_tensor, - # name=name, - # ) def init_stdev(self,num_inputs,num_outputs): #xavier @@ -604,7 +589,7 @@ def weight_variable(self, name, shape, num_inputs, num_outputs, scale_initial_we return variable def conv2d(self, x, w): - return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME') + return tf.nn.conv2d(input=x, filters=w, strides=[1,1,1,1], padding='SAME') def dilated_conv2d(self, x, w, dilation): return tf.nn.atrous_conv2d(x, w, rate = dilation, padding='SAME') @@ -616,31 +601,31 @@ def apply_symmetry(self,tensor,symmetries,inverse): if not inverse: tensor = tf.cond( - ud, - lambda: tf.reverse(tensor,[1]), - lambda: tensor + pred=ud, + true_fn=lambda: tf.reverse(tensor,[1]), + false_fn=lambda: tensor ) tensor = tf.cond( - lr, - lambda: tf.reverse(tensor,[2]), - lambda: tensor + pred=lr, + true_fn=lambda: tf.reverse(tensor,[2]), + false_fn=lambda: tensor ) tensor = tf.cond( - transp, - lambda: tf.transpose(tensor, [0,2,1,3]), - lambda: tensor) + pred=transp, + true_fn=lambda: tf.transpose(a=tensor, perm=[0,2,1,3]), + false_fn=lambda: tensor) if inverse: tensor = tf.cond( - ud, - lambda: tf.reverse(tensor,[1]), - lambda: tensor + pred=ud, + true_fn=lambda: tf.reverse(tensor,[1]), + false_fn=lambda: tensor ) tensor = tf.cond( - lr, - lambda: tf.reverse(tensor,[2]), - lambda: tensor + pred=lr, + true_fn=lambda: tf.reverse(tensor,[2]), + false_fn=lambda: tensor ) return tensor @@ -649,20 +634,14 @@ def apply_symmetry(self,tensor,symmetries,inverse): def relu(self, name, layer): assert(len(layer.shape) == 4) - #num_channels = layer.shape[3].value - #alphas = self.weight_variable_init_constant(name+"/relu",[1,1,1,num_channels],constant=0.0) return tf.nn.relu(layer) def relu_spatial1d(self, name, layer): assert(len(layer.shape) == 3) - #num_channels = layer.shape[1].value - #alphas = self.weight_variable_init_constant(name+"/relu",[1,num_channels],constant=0.0) return tf.nn.relu(layer) def relu_non_spatial(self, name, layer): assert(len(layer.shape) == 2) - #num_channels = layer.shape[1].value - #alphas = self.weight_variable_init_constant(name+"/relu",[1,num_channels],constant=0.0) return tf.nn.relu(layer) def merge_residual(self,name,trunk,residual): @@ -785,8 +764,8 @@ def global_pool(self, in_layer, mask_sum_hw, mask_sum_hw_sqrt): div = tf.reshape(mask_sum_hw,[-1,1,1,1]) div_sqrt = tf.reshape(mask_sum_hw_sqrt,[-1,1,1,1]) - layer_raw_mean = tf.reduce_sum(in_layer,axis=[1,2],keepdims=True) / div - layer_raw_max = tf.reduce_max(in_layer,axis=[1,2],keepdims=True) + layer_raw_mean = tf.reduce_sum(input_tensor=in_layer,axis=[1,2],keepdims=True) / div + layer_raw_max = tf.reduce_max(input_tensor=in_layer,axis=[1,2],keepdims=True) # 1, (x-14)/10, and (x-14)^2/100 - 0.1 are three orthogonal functions over [9,19], the range of reasonable board sizes. # We have the 14 in there since it's the midpoint of that range. The /10 is just sort of arbitrary normalization to keep things on the same scale. @@ -802,7 +781,7 @@ def value_head_pool(self, in_layer, mask_sum_hw, mask_sum_hw_sqrt): div = tf.reshape(mask_sum_hw,[-1,1]) div_sqrt = tf.reshape(mask_sum_hw_sqrt,[-1,1]) - layer_raw_mean = tf.reduce_sum(in_layer,axis=[1,2],keepdims=False) / div + layer_raw_mean = tf.reduce_sum(input_tensor=in_layer,axis=[1,2],keepdims=False) / div # 1, (x-14)/10, and (x-14)^2/100 - 0.1 are three orthogonal functions over [9,19], the range of reasonable board sizes. # We have the 14 in there since it's the midpoint of that range. The /10 and /100 are just sort of arbitrary normalization to keep things on the same scale @@ -931,17 +910,17 @@ def build_model(self,config,placeholders): assert(hist_matrix_base.dtype == tf.float32) assert(hist_matrix_builder.dtype == tf.float32) assert(len(hist_matrix_builder.shape) == 3) - assert(hist_matrix_builder.shape[0].value == 5) - assert(hist_matrix_builder.shape[1].value == self.num_bin_input_features) - assert(hist_matrix_builder.shape[2].value == self.num_bin_input_features) + assert(hist_matrix_builder.shape.as_list()[0] == 5) + assert(hist_matrix_builder.shape.as_list()[1] == self.num_bin_input_features) + assert(hist_matrix_builder.shape.as_list()[2] == self.num_bin_input_features) hist_filter_matrix = hist_matrix_base + tf.tensordot(include_history, hist_matrix_builder, axes=[[1],[0]]) #[batch,move] * [move,inc,outc] = [batch,inc,outc] cur_layer = tf.reshape(cur_layer,[-1,self.pos_len*self.pos_len,self.num_bin_input_features]) #[batch,xy,inc] cur_layer = tf.matmul(cur_layer,hist_filter_matrix) #[batch,xy,inc] * [batch,inc,outc] = [batch,xy,outc] cur_layer = tf.reshape(cur_layer,[-1,self.pos_len,self.pos_len,self.num_bin_input_features]) - assert(include_history.shape[1].value == 5) - transformed_global_inputs = global_inputs * tf.pad(include_history, [(0,0),(0,self.num_global_input_features - include_history.shape[1].value)], constant_values=1.0) + assert(include_history.shape.as_list()[1] == 5) + transformed_global_inputs = global_inputs * tf.pad(tensor=include_history, paddings=[(0,0),(0,self.num_global_input_features - include_history.shape.as_list()[1])], constant_values=1.0) self.transformed_bin_inputs = cur_layer self.transformed_global_inputs = transformed_global_inputs @@ -962,8 +941,8 @@ def build_model(self,config,placeholders): self.gpool_num_channels = gpool_num_channels mask = cur_layer[:,:,:,0:1] - mask_sum = tf.reduce_sum(mask) # Global sum - mask_sum_hw = tf.reduce_sum(mask,axis=[1,2,3]) # Sum per batch element + mask_sum = tf.reduce_sum(input_tensor=mask) # Global sum + mask_sum_hw = tf.reduce_sum(input_tensor=mask,axis=[1,2,3]) # Sum per batch element mask_sum_hw_sqrt = tf.sqrt(mask_sum_hw) #Initial convolutional layer------------------------------------------------------------------------------------- @@ -1137,7 +1116,7 @@ def scaletransform(tensor): #tf.where has a bug where nan values on the non-chosen side will still propagate nans back in gradients. #So we also abs the tensor, so that we never get a log of a negative value abstensor = tf.abs(tensor) - return tf.where(tensor > 0, 1.0 + tf.math.log(abstensor + 1.0), 1.0 / (1.0 + tf.math.log(abstensor + 1.0))) + return tf.compat.v1.where(tensor > 0, 1.0 + tf.math.log(abstensor + 1.0), 1.0 / (1.0 + tf.math.log(abstensor + 1.0))) scorebelief_len = self.scorebelief_target_shape[0] scorebelief_mid = self.pos_len*self.pos_len+Model.EXTRA_SCORE_DISTR_RADIUS @@ -1235,7 +1214,7 @@ def scaletransform(tensor): def huber_loss(x,y,delta): absdiff = tf.abs(x - y) - return tf.where(absdiff > delta, (0.5 * delta*delta) + delta * (absdiff - delta), 0.5 * absdiff * absdiff) + return tf.compat.v1.where(absdiff > delta, (0.5 * delta*delta) + delta * (absdiff - delta), 0.5 * absdiff * absdiff) class Target_vars: @@ -1342,42 +1321,42 @@ def __init__(self,model,for_optimization,placeholders): self.policy_loss_unreduced = self.policy_target_weight * ( - tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.policy_target, logits=policy_output[:,:,0]) + tf.nn.softmax_cross_entropy_with_logits(labels=self.policy_target, logits=policy_output[:,:,0]) ) self.policy1_loss_unreduced = self.policy_target_weight1 * 0.15 * ( - tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.policy_target1, logits=policy_output[:,:,1]) + tf.nn.softmax_cross_entropy_with_logits(labels=self.policy_target1, logits=policy_output[:,:,1]) ) - self.value_loss_unreduced = 1.20 * tf.nn.softmax_cross_entropy_with_logits_v2( + self.value_loss_unreduced = 1.20 * tf.nn.softmax_cross_entropy_with_logits( labels=self.value_target, logits=value_output ) self.td_value_loss_unreduced = tf.constant([0.55,0.55,0.15],dtype=tf.float32) * ( - tf.nn.softmax_cross_entropy_with_logits_v2( + tf.nn.softmax_cross_entropy_with_logits( labels=self.td_value_target, logits=td_value_prediction ) - # Subtract out the entropy, so as to get loss 0 at perfect prediction - tf.nn.softmax_cross_entropy_with_logits_v2( + tf.nn.softmax_cross_entropy_with_logits( labels=self.td_value_target, logits=tf.math.log(self.td_value_target + 1.0e-30) ) ) - self.td_value_loss_unreduced = tf.reduce_sum(self.td_value_loss_unreduced, axis=1) + self.td_value_loss_unreduced = tf.reduce_sum(input_tensor=self.td_value_loss_unreduced, axis=1) self.td_score_loss_unreduced = 0.0004 * self.ownership_target_weight * ( - tf.reduce_sum(huber_loss(self.td_score_target, td_score_prediction, delta = 12.0), axis=1) + tf.reduce_sum(input_tensor=huber_loss(self.td_score_target, td_score_prediction, delta = 12.0), axis=1) ) self.scorebelief_cdf_loss_unreduced = 0.020 * self.ownership_target_weight * ( tf.reduce_sum( - tf.square(tf.cumsum(self.scorebelief_target,axis=1) - tf.cumsum(tf.nn.softmax(scorebelief_output,axis=1),axis=1)), + input_tensor=tf.square(tf.cumsum(self.scorebelief_target,axis=1) - tf.cumsum(tf.nn.softmax(scorebelief_output,axis=1),axis=1)), axis=1 ) ) self.scorebelief_pdf_loss_unreduced = 0.020 * self.ownership_target_weight * ( - tf.nn.softmax_cross_entropy_with_logits_v2( + tf.nn.softmax_cross_entropy_with_logits( labels=self.scorebelief_target, logits=scorebelief_output ) @@ -1388,7 +1367,7 @@ def __init__(self,model,for_optimization,placeholders): #Not unlike the way that policy and value loss are also equal-weighted by batch element. self.ownership_loss_unreduced = 1.5 * self.ownership_target_weight * ( tf.reduce_sum( - tf.nn.softmax_cross_entropy_with_logits_v2( + input_tensor=tf.nn.softmax_cross_entropy_with_logits( labels=tf.stack([(1+self.ownership_target)/2,(1-self.ownership_target)/2],axis=3), logits=tf.stack([ownership_output,-ownership_output],axis=3) ) * tf.reshape(model.mask_before_symmetry,[-1,model.pos_len,model.pos_len]), @@ -1398,7 +1377,7 @@ def __init__(self,model,for_optimization,placeholders): self.scoring_loss_unreduced = 1.0 * self.scoring_target_weight * ( tf.reduce_sum( - tf.square(self.scoring_target - scoring_output) * tf.reshape(model.mask_before_symmetry,[-1,model.pos_len,model.pos_len]), + input_tensor=tf.square(self.scoring_target - scoring_output) * tf.reshape(model.mask_before_symmetry,[-1,model.pos_len,model.pos_len]), axis=[1,2] ) / model.mask_sum_hw ) @@ -1416,7 +1395,7 @@ def __init__(self,model,for_optimization,placeholders): #due to simply being farther in the future, so multiply by [1,0.25]. self.futurepos_loss_unreduced = 0.25 * self.futurepos_target_weight * ( tf.reduce_sum( - tf.square(tf.tanh(futurepos_output) - self.futurepos_target) + input_tensor=tf.square(tf.tanh(futurepos_output) - self.futurepos_target) * tf.reshape(model.mask_before_symmetry,[-1,model.pos_len,model.pos_len,1]) * tf.reshape(tf.constant([1,0.25],dtype=tf.float32),[1,1,1,2]), axis=[1,2,3] @@ -1427,10 +1406,10 @@ def __init__(self,model,for_optimization,placeholders): owned_target = tf.square(self.ownership_target) unowned_target = 1.0 - owned_target unowned_proportion = ( - tf.reduce_sum(unowned_target * tf.reshape(model.mask_before_symmetry,[-1,model.pos_len,model.pos_len]),axis=[1,2]) - / (1.0 + tf.reduce_sum(tf.reshape(model.mask_before_symmetry,[-1,model.pos_len,model.pos_len]),axis=[1,2])) + tf.reduce_sum(input_tensor=unowned_target * tf.reshape(model.mask_before_symmetry,[-1,model.pos_len,model.pos_len]),axis=[1,2]) + / (1.0 + tf.reduce_sum(input_tensor=tf.reshape(model.mask_before_symmetry,[-1,model.pos_len,model.pos_len]),axis=[1,2])) ) - unowned_proportion = tf.reduce_mean(unowned_proportion * self.ownership_target_weight) + unowned_proportion = tf.reduce_mean(input_tensor=unowned_proportion * self.ownership_target_weight) if model.is_training: moving_unowned_proportion = tf.compat.v1.get_variable(initializer=1.0,name=("moving_unowned_proportion"),trainable=False) moving_unowned_op = tf.keras.backend.moving_average_update(moving_unowned_proportion,unowned_proportion,0.998) @@ -1441,7 +1420,7 @@ def __init__(self,model,for_optimization,placeholders): self.seki_loss_unreduced = ( tf.reduce_sum( - tf.nn.softmax_cross_entropy_with_logits_v2( + input_tensor=tf.nn.softmax_cross_entropy_with_logits( labels=tf.stack([1.0-tf.square(self.seki_target), tf.nn.relu(self.seki_target), tf.nn.relu(-self.seki_target)],axis=3), logits=seki_output[:,:,:,0:3] ) * tf.reshape(model.mask_before_symmetry,[-1,model.pos_len,model.pos_len]), @@ -1450,7 +1429,7 @@ def __init__(self,model,for_optimization,placeholders): ) self.seki_loss_unreduced = self.seki_loss_unreduced + 0.5 * ( tf.reduce_sum( - tf.nn.softmax_cross_entropy_with_logits_v2( + input_tensor=tf.nn.softmax_cross_entropy_with_logits( labels=tf.stack([unowned_target, owned_target],axis=3), logits=tf.stack([seki_output[:,:,:,3],tf.zeros_like(self.ownership_target)],axis=3) ) * tf.reshape(model.mask_before_symmetry,[-1,model.pos_len,model.pos_len]), @@ -1461,7 +1440,7 @@ def __init__(self,model,for_optimization,placeholders): self.seki_weight_scale = seki_weight_scale #This is conditional upon there being a result - expected_score_from_belief = tf.reduce_sum(scorebelief_probs * model.score_belief_offset_vector,axis=1) + expected_score_from_belief = tf.reduce_sum(input_tensor=scorebelief_probs * model.score_belief_offset_vector,axis=1) #Huber will incentivize this to not actually converge to the mean, but rather something meanlike locally and something medianlike #for very large possible losses. This seems... okay - it might actually be what users want. @@ -1470,7 +1449,7 @@ def __init__(self,model,for_optimization,placeholders): self.variance_time_loss_unreduced = 0.0003 * huber_loss(self.variance_time_target, variance_time_prediction, delta = 50.0) stdev_of_belief = tf.sqrt(0.001 + tf.reduce_sum( - scorebelief_probs * tf.square( + input_tensor=scorebelief_probs * tf.square( tf.reshape(model.score_belief_offset_vector,[1,-1]) - tf.reshape(expected_score_from_belief,[-1,1]) ),axis=1)) beliefstdevdiff = stdev_of_belief - scorestdev_prediction @@ -1512,27 +1491,27 @@ def __init__(self,model,for_optimization,placeholders): self.scale_reg_loss_unreduced = tf.reshape(0.0004 * tf.add_n([tf.square(variable) for variable in model.prescale_variables]), [-1]) #self.scale_reg_loss_unreduced = tf.zeros_like(self.winloss_reg_loss_unreduced) - self.policy_loss = tf.reduce_sum(self.target_weight_used * self.policy_loss_unreduced, name="losses/policy_loss") - self.policy1_loss = tf.reduce_sum(self.target_weight_used * self.policy1_loss_unreduced, name="losses/policy1_loss") - self.value_loss = tf.reduce_sum(self.target_weight_used * self.value_loss_unreduced, name="losses/value_loss") - self.td_value_loss = tf.reduce_sum(self.target_weight_used * self.td_value_loss_unreduced, name="losses/td_value_loss") - self.td_score_loss = tf.reduce_sum(self.target_weight_used * self.td_score_loss_unreduced, name="losses/td_score_loss") - self.scoremean_loss = tf.reduce_sum(self.target_weight_used * self.scoremean_loss_unreduced, name="losses/scoremean_loss") - self.lead_loss = tf.reduce_sum(self.target_weight_used * self.lead_loss_unreduced, name="losses/lead_loss") - self.variance_time_loss = tf.reduce_sum(self.target_weight_used * self.variance_time_loss_unreduced, name="losses/variance_time_loss") - self.scorebelief_pdf_loss = tf.reduce_sum(self.target_weight_used * self.scorebelief_pdf_loss_unreduced, name="losses/scorebelief_pdf_loss") - self.scorebelief_cdf_loss = tf.reduce_sum(self.target_weight_used * self.scorebelief_cdf_loss_unreduced, name="losses/scorebelief_cdf_loss") - self.ownership_loss = tf.reduce_sum(self.target_weight_used * self.ownership_loss_unreduced, name="losses/ownership_loss") - self.scoring_loss = tf.reduce_sum(self.target_weight_used * self.scoring_loss_unreduced, name="losses/scoring_loss") - self.futurepos_loss = tf.reduce_sum(self.target_weight_used * self.futurepos_loss_unreduced, name="losses/futurepos_loss") - self.seki_loss = tf.reduce_sum(self.target_weight_used * self.seki_loss_unreduced, name="losses/seki_loss") - self.scorestdev_reg_loss = tf.reduce_sum(self.target_weight_used * self.scorestdev_reg_loss_unreduced, name="losses/scorestdev_reg_loss") - self.shortterm_value_error_loss = tf.reduce_sum(self.target_weight_used * self.shortterm_value_error_loss_unreduced, name="losses/sloss") - self.shortterm_score_error_loss = tf.reduce_sum(self.target_weight_used * self.shortterm_score_error_loss_unreduced, name="losses/shortterm_score_error_loss") + self.policy_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.policy_loss_unreduced, name="losses/policy_loss") + self.policy1_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.policy1_loss_unreduced, name="losses/policy1_loss") + self.value_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.value_loss_unreduced, name="losses/value_loss") + self.td_value_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.td_value_loss_unreduced, name="losses/td_value_loss") + self.td_score_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.td_score_loss_unreduced, name="losses/td_score_loss") + self.scoremean_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.scoremean_loss_unreduced, name="losses/scoremean_loss") + self.lead_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.lead_loss_unreduced, name="losses/lead_loss") + self.variance_time_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.variance_time_loss_unreduced, name="losses/variance_time_loss") + self.scorebelief_pdf_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.scorebelief_pdf_loss_unreduced, name="losses/scorebelief_pdf_loss") + self.scorebelief_cdf_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.scorebelief_cdf_loss_unreduced, name="losses/scorebelief_cdf_loss") + self.ownership_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.ownership_loss_unreduced, name="losses/ownership_loss") + self.scoring_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.scoring_loss_unreduced, name="losses/scoring_loss") + self.futurepos_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.futurepos_loss_unreduced, name="losses/futurepos_loss") + self.seki_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.seki_loss_unreduced, name="losses/seki_loss") + self.scorestdev_reg_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.scorestdev_reg_loss_unreduced, name="losses/scorestdev_reg_loss") + self.shortterm_value_error_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.shortterm_value_error_loss_unreduced, name="losses/sloss") + self.shortterm_score_error_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.shortterm_score_error_loss_unreduced, name="losses/shortterm_score_error_loss") # self.winloss_reg_loss = tf.reduce_sum(self.target_weight_used * self.winloss_reg_loss_unreduced, name="losses/winloss_reg_loss") - self.scale_reg_loss = tf.reduce_sum(self.target_weight_used * self.scale_reg_loss_unreduced, name="losses/scale_reg_loss") + self.scale_reg_loss = tf.reduce_sum(input_tensor=self.target_weight_used * self.scale_reg_loss_unreduced, name="losses/scale_reg_loss") - self.weight_sum = tf.reduce_sum(self.target_weight_used, name="losses/weight_sum") + self.weight_sum = tf.reduce_sum(input_tensor=self.target_weight_used, name="losses/weight_sum") if for_optimization: #Prior/Regularization @@ -1567,35 +1546,24 @@ def __init__(self,model,for_optimization,placeholders): self.scale_reg_loss ) - # self.opt_loss = tf.Print( - # self.opt_loss, - # [self.value_target[0:10], - # self.scorevalue_target[0:10], - # self.scorebelief_target[0] * tf.constant(model.score_belief_offset_vector,dtype=tf.float32), - # self.ownership_target_weight[0:10], - # self.selfkomi[0:10], - # ], - # summarize=2000 - # ) - class Metrics: def __init__(self,model,target_vars,include_debug_stats): #Training results - policy_target_idxs = tf.argmax(target_vars.policy_target, 1) - self.top1_prediction = tf.equal(tf.argmax(model.policy_output[:,:,0], 1), policy_target_idxs) - self.top4_prediction = tf.nn.in_top_k(model.policy_output[:,:,0],policy_target_idxs,4) + policy_target_idxs = tf.argmax(input=target_vars.policy_target, axis=1) + self.top1_prediction = tf.equal(tf.argmax(input=model.policy_output[:,:,0], axis=1), policy_target_idxs) + self.top4_prediction = tf.nn.in_top_k(predictions=model.policy_output[:,:,0],targets=policy_target_idxs,k=4) self.accuracy1_unreduced = tf.cast(self.top1_prediction, tf.float32) self.accuracy4_unreduced = tf.cast(self.top4_prediction, tf.float32) - self.value_entropy_unreduced = tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf.nn.softmax(model.value_output,axis=1), logits=model.value_output) + self.value_entropy_unreduced = tf.nn.softmax_cross_entropy_with_logits(labels=tf.nn.softmax(model.value_output,axis=1), logits=model.value_output) self.value_conf_unreduced = 4 * tf.square(tf.nn.sigmoid(model.value_output[:,0] - model.value_output[:,1]) - 0.5) self.policy_target_entropy_unreduced = target_vars.policy_target_weight * ( - -tf.reduce_sum(target_vars.policy_target * tf.math.log(target_vars.policy_target+(1e-20)), axis=1) + -tf.reduce_sum(input_tensor=target_vars.policy_target * tf.math.log(target_vars.policy_target+(1e-20)), axis=1) ) - self.accuracy1 = tf.reduce_sum(target_vars.target_weight_used * self.accuracy1_unreduced, name="metrics/accuracy1") - self.accuracy4 = tf.reduce_sum(target_vars.target_weight_used * self.accuracy4_unreduced, name="metrics/accuracy4") - self.value_entropy = tf.reduce_sum(target_vars.target_weight_used * self.value_entropy_unreduced, name="metrics/value_entropy") - self.value_conf = tf.reduce_sum(target_vars.target_weight_used * self.value_conf_unreduced, name="metrics/value_conf") - self.policy_target_entropy = tf.reduce_sum(target_vars.target_weight_used * self.policy_target_entropy_unreduced, name="metrics/policy_target_entropy") + self.accuracy1 = tf.reduce_sum(input_tensor=target_vars.target_weight_used * self.accuracy1_unreduced, name="metrics/accuracy1") + self.accuracy4 = tf.reduce_sum(input_tensor=target_vars.target_weight_used * self.accuracy4_unreduced, name="metrics/accuracy4") + self.value_entropy = tf.reduce_sum(input_tensor=target_vars.target_weight_used * self.value_entropy_unreduced, name="metrics/value_entropy") + self.value_conf = tf.reduce_sum(input_tensor=target_vars.target_weight_used * self.value_conf_unreduced, name="metrics/value_conf") + self.policy_target_entropy = tf.reduce_sum(input_tensor=target_vars.target_weight_used * self.policy_target_entropy_unreduced, name="metrics/policy_target_entropy") # self.shortterm_value_error_mean_unreduced = target_vars.shortterm_diff_value # self.shortterm_score_error_mean_unreduced = target_vars.shortterm_diff_score @@ -1607,24 +1575,24 @@ def __init__(self,model,target_vars,include_debug_stats): if include_debug_stats: def reduce_norm(x, axis=None, keepdims=False): - return tf.sqrt(tf.reduce_mean(tf.square(x), axis=axis, keepdims=keepdims)) + return tf.sqrt(tf.reduce_mean(input_tensor=tf.square(x), axis=axis, keepdims=keepdims)) def reduce_stdev(x, axis=None, keepdims=False): - m = tf.reduce_mean(x, axis=axis, keepdims=True) + m = tf.reduce_mean(input_tensor=x, axis=axis, keepdims=True) devs_squared = tf.square(x - m) - return tf.sqrt(tf.reduce_mean(devs_squared, axis=axis, keepdims=keepdims)) + return tf.sqrt(tf.reduce_mean(input_tensor=devs_squared, axis=axis, keepdims=keepdims)) self.activated_prop_by_layer = dict([ - (name,tf.reduce_mean(tf.count_nonzero(layer,axis=[1,2])/layer.shape[1].value/layer.shape[2].value, axis=0)) for (name,layer) in model.outputs_by_layer + (name,tf.reduce_mean(input_tensor=tf.math.count_nonzero(layer,axis=[1,2])/layer.shape.as_list()[1]/layer.shape.as_list()[2], axis=0)) for (name,layer) in model.outputs_by_layer ]) self.mean_output_by_layer = dict([ - (name,tf.reduce_mean(layer,axis=[0,1,2])) for (name,layer) in model.outputs_by_layer + (name,tf.reduce_mean(input_tensor=layer,axis=[0,1,2])) for (name,layer) in model.outputs_by_layer ]) self.stdev_output_by_layer = dict([ (name,reduce_stdev(layer,axis=[0,1,2])) for (name,layer) in model.outputs_by_layer ]) self.mean_weights_by_var = dict([ - (v.name,tf.reduce_mean(v)) for v in tf.compat.v1.trainable_variables() + (v.name,tf.reduce_mean(input_tensor=v)) for v in tf.compat.v1.trainable_variables() ]) self.norm_weights_by_var = dict([ (v.name,reduce_norm(v)) for v in tf.compat.v1.trainable_variables() @@ -1638,7 +1606,7 @@ def print_trainable_variables(logf): shape = variable.get_shape() variable_parameters = 1 for dim in shape: - variable_parameters *= dim.value + variable_parameters *= dim total_parameters += variable_parameters logf("Model variable: %s, %d parameters" % (variable.name,variable_parameters)) @@ -1664,7 +1632,7 @@ def build_model_from_tfrecords_features(features,mode,print_model,trainlog,model bitmasks = tf.reshape(tf.constant([128,64,32,16,8,4,2,1],dtype=tf.uint8),[1,1,1,8]) binchw = tf.reshape(tf.bitwise.bitwise_and(tf.expand_dims(binchwp,axis=3),bitmasks),[-1,num_bin_input_features,((pos_len*pos_len+7)//8)*8]) binchw = binchw[:,:,:pos_len*pos_len] - binhwc = tf.cast(tf.transpose(binchw, [0,2,1]),tf.float32) + binhwc = tf.cast(tf.transpose(a=binchw, perm=[0,2,1]),tf.float32) binhwc = tf.math.minimum(binhwc,tf.constant(1.0)) placeholders["bin_inputs"] = binhwc @@ -1679,11 +1647,11 @@ def build_model_from_tfrecords_features(features,mode,print_model,trainlog,model placeholders["include_history"] = features["gtnc"][:,36:41] policy_target0 = features["ptncm"][:,0,:] - policy_target0 = policy_target0 / tf.reduce_sum(policy_target0,axis=1,keepdims=True) + policy_target0 = policy_target0 / tf.reduce_sum(input_tensor=policy_target0,axis=1,keepdims=True) placeholders["policy_target"] = policy_target0 placeholders["policy_target_weight"] = features["gtnc"][:,26] policy_target1 = features["ptncm"][:,1,:] - policy_target1 = policy_target1 / tf.reduce_sum(policy_target1,axis=1,keepdims=True) + policy_target1 = policy_target1 / tf.reduce_sum(input_tensor=policy_target1,axis=1,keepdims=True) placeholders["policy_target1"] = policy_target1 placeholders["policy_target_weight1"] = features["gtnc"][:,28] @@ -1696,7 +1664,7 @@ def build_model_from_tfrecords_features(features,mode,print_model,trainlog,model placeholders["scorebelief_target"] = features["sdn"] / 100.0 placeholders["ownership_target"] = features["vtnchw"][:,0] placeholders["scoring_target"] = features["vtnchw"][:,4] / 120.0 - placeholders["futurepos_target"] = tf.transpose(features["vtnchw"][:,2:4], [0,2,3,1]) + placeholders["futurepos_target"] = tf.transpose(a=features["vtnchw"][:,2:4], perm=[0,2,3,1]) placeholders["seki_target"] = features["vtnchw"][:,1] placeholders["target_weight_from_data"] = features["gtnc"][:,25] diff --git a/python/shuffle.py b/python/shuffle.py old mode 100755 new mode 100644 index 33402cf85..140150743 --- a/python/shuffle.py +++ b/python/shuffle.py @@ -15,7 +15,8 @@ import numpy as np import tensorflow as tf -from tensorflow.python_io import TFRecordOptions,TFRecordCompressionType,TFRecordWriter +from tensorflow.compat.v1.io import TFRecordCompressionType +from tensorflow.io import TFRecordOptions,TFRecordWriter import tfrecordio diff --git a/python/test.py b/python/test.py index b01cea900..72a355c00 100644 --- a/python/test.py +++ b/python/test.py @@ -65,7 +65,7 @@ def log(s): dataset = dataset.flat_map(lambda fname: tf.data.TFRecordDataset(fname,compression_type="ZLIB")) parse_input = tfrecordio.make_tf_record_parser(model_config,pos_len,batch_size) dataset = dataset.map(parse_input) - iterator = dataset.make_one_shot_iterator() + iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) features = iterator.get_next() elif using_npz: features = tfrecordio.make_raw_input_feature_placeholders(model_config,pos_len,batch_size) diff --git a/python/tfrecordio.py b/python/tfrecordio.py index a091c96a9..4289a5f47 100644 --- a/python/tfrecordio.py +++ b/python/tfrecordio.py @@ -40,8 +40,8 @@ def make_tf_record_parser(model_config,pos_len,batch_size,multi_num_gpus=None): raw_input_features = make_raw_input_features(model_config,pos_len,batch_size) def parse_input(serialized_example): - example = tf.io.parse_single_example(serialized_example,raw_input_features) - binchwp = tf.decode_raw(example["binchwp"],tf.uint8) + example = tf.io.parse_single_example(serialized=serialized_example,features=raw_input_features) + binchwp = tf.io.decode_raw(example["binchwp"],tf.uint8) ginc = example["ginc"] ptncm = example["ptncm"] gtnc = example["gtnc"] diff --git a/python/train.py b/python/train.py old mode 100755 new mode 100644 index 69c2c0040..39bc435ab --- a/python/train.py +++ b/python/train.py @@ -24,6 +24,8 @@ import modelconfigs import tfrecordio + +tf.compat.v1.disable_eager_execution() #Command and args------------------------------------------------------------------- description = """ @@ -163,7 +165,7 @@ def trainlog(s): placeholder = tf.compat.v1.placeholder(variable.dtype,variable.shape) assign_ops.append(tf.compat.v1.assign(variable,placeholder)) swa_assign_placeholders[variable.name] = placeholder - swa_wvalues[variable.name] = np.zeros([elt.value for elt in variable.shape]) + swa_wvalues[variable.name] = np.zeros([elt for elt in variable.shape]) swa_assign_op = tf.group(*assign_ops) trainlog("Build SWA graph for SWA update and saving, %d variables" % len(swa_assign_placeholders)) @@ -243,7 +245,7 @@ def model_fn(features,labels,mode,params): synchronization=tf.VariableSynchronization.ON_READ, aggregation=tf.VariableAggregation.SUM ) - wsum_op = tf.assign_add(wsum,target_vars.weight_sum) + wsum_op = tf.compat.v1.assign_add(wsum,target_vars.weight_sum) eval_metric_ops={ #"wsum": (wsum.read_value(),wsum_op), "p0loss": tf.compat.v1.metrics.mean(target_vars.policy_loss_unreduced, weights=target_vars.target_weight_used), @@ -282,8 +284,8 @@ def model_fn(features,labels,mode,params): printed_model_yet = True def moving_mean(name,x,weights): - sumwx = tf.reduce_sum(x*weights,name="printstats/wx/"+name) - sumw = tf.reduce_sum(weights,name="printstats/w/"+name) + sumwx = tf.reduce_sum(input_tensor=x*weights,name="printstats/wx/"+name) + sumw = tf.reduce_sum(input_tensor=weights,name="printstats/w/"+name) moving_wx = tf.compat.v1.get_variable(initializer=tf.zeros([]),name=(name+"/moving_wx"),trainable=False) moving_w = tf.compat.v1.get_variable(initializer=tf.zeros([]),name=(name+"/moving_w"),trainable=False) diff --git a/python/visualize.py b/python/visualize.py index 14ac47d4c..1605880ca 100644 --- a/python/visualize.py +++ b/python/visualize.py @@ -46,7 +46,7 @@ def log(s): pos_len = 19 # shouldn't matter, all we're doing is exporting weights that don't depend on this if name_scope is not None: - with tf.name_scope(name_scope): + with tf.compat.v1.name_scope(name_scope): model = Model(model_config,pos_len,{}) else: model = Model(model_config,pos_len,{}) @@ -59,7 +59,7 @@ def volume(variable): return variable_parameters total_parameters = 0 -for variable in tf.global_variables(): +for variable in tf.compat.v1.global_variables(): variable_parameters = volume(variable) total_parameters += variable_parameters log("Model variable %s, %d parameters" % (variable.name,variable_parameters)) @@ -120,7 +120,7 @@ def run(fetches): if show_all_weight_magnitudes: print("name,sumsq,l2regstrength,meansq,rms") - for variable in tf.trainable_variables(): + for variable in tf.compat.v1.trainable_variables(): values = np.array(variable.eval()) sq = np.square(values) reg = np.sum(sq) if any(v.name == variable.name for v in model.reg_variables) else 0