本篇筆記主要記錄以下4個(gè)函數(shù)的用法涩禀、區(qū)別及常用場(chǎng)景亿汞,前兩個(gè)為創(chuàng)建變量亮垫,后兩個(gè)為變量的命名空間羔沙。
- tf.Variable
- tf.get_variable
- tf.variable_scope
- tf.name_scope
1. tf.Variable
A variable maintains state in the graph across calls to run()
. You add a variable to the graph by constructing an instance of the class Variable
. The Variable()
constructor requires an initial value for the variable, which can be a Tensor
of any type and shape. The initial value defines the type and shape of the variable. After construction, the type and shape of the variable are fixed. The value can be changed using one of the assign methods.
If you want to change the shape of a variable later you have to use an assign
Op with validate_shape=False
. Just like any Tensor
, variables created with Variable()
can be used as inputs for other Ops in the graph. Additionally, all the operators overloaded for the Tensor
class are carried over to variables, so you can also add nodes to the graph by just doing arithmetic on variables.
1.1 語(yǔ)法
tf.Variable(*args, **kwargs)
1.2 參數(shù)
-
initial_value: A
Tensor
, or Python object convertible to aTensor
, which is the initial value for the Variable. The initial value must have a shape specified unlessvalidate_shape
is set to False. Can also be a callable with no argument that returns the initial value when called. In that case,dtype
must be specified. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) -
trainable: If
True
, GradientTapes automatically watch uses of this variable. Defaults toTrue
, unlesssynchronization
is set toON_READ
, in which case it defaults toFalse
. - validate_shape: If
False
, allows the variable to be initialized with a value of unknown shape. IfTrue
, the default, the shape ofinitial_value
must be known. - caching_device: Optional device string describing where the Variable should be cached for reading. Defaults to the Variable's device. If not
None
, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying throughSwitch
and other conditional statements. - name: Optional name for the variable. Defaults to
'Variable'
and gets uniquified automatically.
variable_def:VariableDef
protocol buffer. If notNone
, recreates the Variable object with its contents, referencing the variable's nodes in the graph, which must already exist. The graph is not changed.variable_def
and the other arguments are mutually exclusive. - dtype: If set, initial_value will be converted to the given type. If
None
, either the datatype will be kept (ifinitial_value
is a Tensor), orconvert_to_tensor
will decide. - import_scope: Optional
string
. Name scope to add to theVariable.
Only used when initializing from protocol buffer. - constraint: An optional projection function to be applied to the variable after being updated by an
Optimizer
(e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. - synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class
tf.VariableSynchronization
. By default the synchronization is set toAUTO
and the currentDistributionStrategy
chooses when to synchronize. - aggregation: Indicates how a distributed variable will be aggregated. Accepted values are constants defined in the class
tf.VariableAggregation
. - shape: (optional) The shape of this variable. If None, the shape of
initial_value
will be used. When setting this argument totf.TensorShape(None)
(representing an unspecified shape), the variable can be assigned with values of different shapes.
1.3 官方使用示例
import tensorflow as tf
# Create a variable.
w = tf.Variable(<initial-value>, name=<optional-name>)
# Use the variable in the graph like any Tensor.
y = tf.matmul(w, ...another variable or tensor...)
# The overloaded operators are available too.
z = tf.sigmoid(w + y)
# Assign a new value to the variable with `assign()` or a related method.
w.assign(w + 1.0)
w.assign_add(1.0)
2. tf.get_variable
Gets an existing variable with these parameters or create a new one. (在 tensorflow2.0中已移除)
2.1 語(yǔ)法
tf.get_variable(
name,
shape=None,
dtype=None,
initializer=None,
regularizer=None,
trainable=None,
collections=None,
caching_device=None,
partitioner=None,
validate_shape=True,
use_resource=None,
custom_getter=None,
constraint=None,
synchronization=<VariableSynchronization.AUTO: 0>,
aggregation=<VariableAggregation.NONE: 0>,
)
2.2 參數(shù)
- name: The name of the new or existing variable.
- shape: Shape of the new or existing variable.
- dtype: Type of the new or existing variable (defaults to
DT_FLOAT
). - initializer: Initializer for the variable if one is created. Can either be an initializer object or a Tensor. If it's a Tensor, its shape must be known unless validate_shape is False.
- regularizer: A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable will be added to the collection
tf.GraphKeys.REGULARIZATION_LOSSES
and can be used for regularization. -
trainable: If
True
also add the variable to the graph collectionGraphKeys.TRAINABLE_VARIABLES
(seetf.Variable
). collections: List of graph collections keys to add the Variable to. Defaults to[GraphKeys.GLOBAL_VARIABLES]
(seetf.Variable
). - caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not
None
, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying throughSwitch
and other conditional statements. - partitioner: Optional callable that accepts a fully defined
TensorShape
anddtype
of the Variable to be created, and returns a list of partitions for each axis (currently only one axis can be partitioned). - validate_shape: If False, allows the variable to be initialized with a value of unknown shape. If True, the default, the shape of initial_value must be known. For this to be used the initializer must be a Tensor and not an initializer object.
- use_resource: If False, creates a regular Variable. If true, creates an experimental ResourceVariable instead with well-defined semantics. Defaults to False (will later change to True). When eager execution is enabled this argument is always forced to be True.
- custom_getter: Callable that takes as a first argument the true getter, and allows overwriting the internal get_variable method. The signature of
custom_getter
should match that of this method, but the most future-proof version will allow for changes:def custom_getter(getter, *args, **kwargs)
. Direct access to allget_variable
parameters is also allowed:def custom_getter(getter, name, *args, **kwargs)
. A simple identity custom getter that simply creates variables with modified names is:def custom_getter(getter, name, *args, **kwargs): return getter(name + '_suffix', *args, **kwargs)
- constraint: An optional projection function to be applied to the variable after being updated by an
Optimizer
(e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. - synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class
tf.VariableSynchronization
. By default the synchronization is set toAUTO
and the currentDistributionStrategy
chooses when to synchronize. Ifsynchronization
is set toON_READ
,trainable
must not be set toTrue
. - aggregation: Indicates how a distributed variable will be aggregated. Accepted values are constants defined in the class
tf.VariableAggregation
.
2.3 官方使用示例
def foo():
with tf.variable_scope("foo", reuse=tf.AUTO_REUSE):
v = tf.get_variable("v", [1])
return v
v1 = foo() # Creates v.
v2 = foo() # Gets the same, existing v.
assert v1 == v2
"""
If initializer is `None` (the default), the default initializer passed in
the variable scope will be used. If that one is `None` too, a
`glorot_uniform_initializer` will be used. The initializer can also be
a Tensor, in which case the variable is initialized to this value and shape.
"""
3. tf.variable_scope
A context manager for defining ops that creates variables (layers). (在 tensorflow2.0中已移除)
3.1 語(yǔ)法
tf.variable_scope(
name_or_scope,
default_name=None,
values=None,
initializer=None,
regularizer=None,
caching_device=None,
partitioner=None,
custom_getter=None,
reuse=None,
dtype=None,
use_resource=None,
constraint=None,
auxiliary_name_scope=True,
)
3.2 參數(shù)
-
name_or_scope:
string
orVariableScope
: the scope to open. -
default_name: The default name to use if the
name_or_scope
argument isNone
, this name will be uniquified. If name_or_scope is provided it won't be used and therefore it is not required and can be None. - values: The list of
Tensor
arguments that are passed to the op function. - initializer: default initializer for variables within this scope.
- regularizer: default regularizer for variables within this scope.
- caching_device: default caching device for variables within this scope.
- partitioner: default partitioner for variables within this scope.
- custom_getter: default custom getter for variables within this scope.
-
reuse:
True
, None, or tf.compat.v1.AUTO_REUSE; ifTrue
, we go into reuse mode for this scope as well as all sub-scopes; if tf.compat.v1.AUTO_REUSE, we create variables if they do not exist, and return them otherwise; if None, we inherit the parent scope's reuse flag. When eager execution is enabled, new variables are always created unless an EagerVariableStore or template is currently active. - dtype: type of variables created in this scope (defaults to the type in the passed scope, or inherited from parent scope).
- use_resource: If False, all variables will be regular Variables. If True, experimental ResourceVariables with well-defined semantics will be used instead. Defaults to False (will later change to True). When eager execution is enabled this argument is always forced to be True.
- constraint: An optional projection function to be applied to the variable after being updated by an
Optimizer
(e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. - auxiliary_name_scope: If
True
, we create an auxiliary name scope with the scope. IfFalse
, we don't create it. Note that the argument is not inherited, and it only takes effect for once when creating. You should only use it for re-entering a premade variable scope.
3.3 官方使用示例
- Simple example of how to create a new variable:
with tf.compat.v1.variable_scope("foo"):
with tf.compat.v1.variable_scope("bar"):
v = tf.compat.v1.get_variable("v", [1])
assert v.name == "foo/bar/v:0"
- Simple example of how to reenter a premade variable scope safely:
with tf.compat.v1.variable_scope("foo") as vs:
pass
# Re-enter the variable scope.
with tf.compat.v1.variable_scope(vs,
auxiliary_name_scope=False) as vs1:
# Restore the original name_scope.
with tf.name_scope(vs1.original_name_scope):
v = tf.compat.v1.get_variable("v", [1])
assert v.name == "foo/v:0"
c = tf.constant([1], name="c")
assert c.name == "foo/c:0"
- Basic example of sharing a variable AUTO_REUSE:
def foo():
with tf.compat.v1.variable_scope("foo", reuse=tf.compat.v1.AUTO_REUSE):
v = tf.compat.v1.get_variable("v", [1])
return v
v1 = foo() # Creates v.
v2 = foo() # Gets the same, existing v.
assert v1 == v2
- Basic example of sharing a variable with reuse=True:
with tf.compat.v1.variable_scope("foo"):
v = tf.compat.v1.get_variable("v", [1])
with tf.compat.v1.variable_scope("foo", reuse=True):
v1 = tf.compat.v1.get_variable("v", [1])
assert v1 == v
- Sharing a variable by capturing a scope and setting reuse:
with tf.compat.v1.variable_scope("foo") as scope:
v = tf.compat.v1.get_variable("v", [1])
scope.reuse_variables()
v1 = tf.compat.v1.get_variable("v", [1])
assert v1 == v
- To prevent accidental sharing of variables, we raise an exception when getting an existing variable in a non-reusing scope.
with tf.compat.v1.variable_scope("foo"):
v = tf.compat.v1.get_variable("v", [1])
v1 = tf.compat.v1.get_variable("v", [1])
# Raises ValueError("... v already exists ...").
- Similarly, we raise an exception when trying to get a variable that does not exist in reuse mode.
with tf.compat.v1.variable_scope("foo", reuse=True):
v = tf.compat.v1.get_variable("v", [1])
# Raises ValueError("... v does not exists ...").
4. tf.name_scope
A context manager for use when defining a Python op.
4.1 語(yǔ)法
tf.name_scope(name)
4.2 官方使用示例
def my_op(a, b, c, name=None):
with tf.name_scope("MyOp") as scope:
a = tf.convert_to_tensor(a, name="a")
b = tf.convert_to_tensor(b, name="b")
c = tf.convert_to_tensor(c, name="c")
# Define some computation that uses `a`, `b`, and `c`.
return foo_op(..., name=scope)
When executed, the Tensors a
, b
, c
, will have names MyOp/a
, MyOp/b
, and MyOp/c
.
If the scope name already exists, the name will be made unique by appending _n
. For example, calling my_op
the second time will generate MyOp_1/a
, etc.
5. tf.get_varialbe 和 tf.Variable 區(qū)別
- tf.Variable的變量名是一個(gè)可選項(xiàng)躺涝,通過(guò)name=’v’的形式給出。但是tf.get_variable必須指定變量名扼雏。
- 使用tf.Variable時(shí)坚嗜,如果檢測(cè)到命名沖突,系統(tǒng)會(huì)自己處理(自動(dòng)添加_1,和_2進(jìn)行區(qū)分)诗充。使用tf.get_variable()時(shí)苍蔬,系統(tǒng)不會(huì)處理沖突,而會(huì)報(bào)錯(cuò)蝴蜓。
6. tf.variable_scope 和 tf.name_scope 區(qū)別
主要區(qū)別在于 tf.get_variable 函數(shù)不受 tf.name_scope 的影響:
with tf.variable_scope('foo'):
a = tf.get_variable('bar',[1])
print(a.name)
# 結(jié)果為foo/bar:0
with tf.name_scope('a'):
a=tf.Variable([1])
print(a.name)
#結(jié)果為a/Variable:0
b=tf.get_variable('b',[1])
print(b.name)
#結(jié)果為b:0
從這個(gè)結(jié)果中碟绑,我們能很清晰地看到,tf.get_variable創(chuàng)建的變量并不是a/b:0茎匠,而是b:0格仲。這就表示了在tf.name_scope函數(shù)下,tf.get_variable不受其約束诵冒。
7. tf.variable_scope 和 tf.name_scope
這里主要講解下reuse的用法凯肋。當(dāng)reuse為False或者None時(shí)(這也是默認(rèn)值),同一個(gè)tf.variable_scope下面的變量名不能相同造烁;當(dāng)reuse為True時(shí)否过,tf.variable_scope只能獲取已經(jīng)創(chuàng)建過(guò)的變量午笛。
with tf.variable_scope('foo'):
v = tf.get_variable('v',[1],initializer=tf.constant_initializer(1.0))
with tf.variable_scope('foo'):
v1 = tf.get_variable('v',[1])
# Raises ValueError(Variable foo/v already exists, disallowed.Did you mean to set reuse=True in Varscope?)
其原因就是在命名空間foo中創(chuàng)建了相同的變量惭蟋。如果我要在foo下創(chuàng)建一個(gè)變量v1,其name=‘v’药磺,只需要將reuse設(shè)置為Ture就ok了告组。將上面第二部分代碼修改為:
with tf.variable_scope('foo', reuse=True):
v1 = tf.get_variable('v',[1])
print(v1.name) #結(jié)果為foo/v
當(dāng)reuse已經(jīng)設(shè)置為True時(shí),tf.variable_scope只能獲取已經(jīng)創(chuàng)建過(guò)的變量癌佩。
with tf.variable_scope('bar', reuse=True):
v3 = tf.get_variable('v',[1])
# Raises ValueError(Variable bar/v dose not exists, diallowed. Did you mean to set reuse=None in VarScope?)
簡(jiǎn)而言之木缝,reuse=False時(shí),tf.variable_scope創(chuàng)建變量围辙;reuse=True時(shí)我碟,tf.variable_scope獲取變量。
【參考】
1 TF.VARIABLE姚建、TF.GET_VARIABLE矫俺、TF.VARIABLE_SCOPE以及TF.NAME_SCOPE關(guān)系