diff --git a/core/leras/optimizers/AdaBelief.py b/core/leras/optimizers/AdaBelief.py index aed4308..dd004f8 100644 --- a/core/leras/optimizers/AdaBelief.py +++ b/core/leras/optimizers/AdaBelief.py @@ -50,11 +50,11 @@ class AdaBelief(nn.OptimizerBase): updates = [] if self.clipnorm > 0.0: - norm = tf.sqrt( sum([tf.reduce_sum(tf.square(g)) for g,v in grads_vars])) + norm = tf.sqrt( sum([tf.reduce_sum(tf.square(tf.cast(g, tf.float32))) for g,v in grads_vars])) updates += [ state_ops.assign_add( self.iterations, 1) ] for i, (g,v) in enumerate(grads_vars): if self.clipnorm > 0.0: - g = self.tf_clip_norm(g, self.clipnorm, norm) + g = self.tf_clip_norm(g, self.clipnorm, tf.cast(norm, g.dtype) ) ms = self.ms_dict[ v.name ] vs = self.vs_dict[ v.name ] diff --git a/core/leras/optimizers/RMSprop.py b/core/leras/optimizers/RMSprop.py index 345b2a7..d2eb605 100644 --- a/core/leras/optimizers/RMSprop.py +++ b/core/leras/optimizers/RMSprop.py @@ -47,11 +47,11 @@ class RMSprop(nn.OptimizerBase): updates = [] if self.clipnorm > 0.0: - norm = tf.sqrt( sum([tf.reduce_sum(tf.square(g)) for g,v in grads_vars])) + norm = tf.sqrt( sum([tf.reduce_sum(tf.square(tf.cast(g, tf.float32))) for g,v in grads_vars])) updates += [ state_ops.assign_add( self.iterations, 1) ] for i, (g,v) in enumerate(grads_vars): if self.clipnorm > 0.0: - g = self.tf_clip_norm(g, self.clipnorm, norm) + g = self.tf_clip_norm(g, self.clipnorm, tf.cast(norm, g.dtype) ) a = self.accumulators_dict[ v.name ]