59 lines
3.1 KiB
Diff
59 lines
3.1 KiB
Diff
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py
|
|
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py
|
|
@@ -1,8 +1,7 @@
|
|
class BatchNorm2d(_BatchNorm):
|
|
r"""Applies Batch Normalization over a 4D input (a mini-batch of 2D inputs
|
|
with additional channel dimension) as described in the paper
|
|
- `Batch Normalization: Accelerating Deep Network Training by Reducing
|
|
- Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`__ .
|
|
+ `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`_ .
|
|
|
|
.. math::
|
|
|
|
@@ -10,9 +9,8 @@
|
|
|
|
The mean and standard-deviation are calculated per-dimension over
|
|
the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors
|
|
- of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set
|
|
- to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated
|
|
- via the biased estimator, equivalent to `torch.var(input, unbiased=False)`.
|
|
+ of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are sampled
|
|
+ from :math:`\mathcal{U}(0, 1)` and the elements of :math:`\beta` are set to 0.
|
|
|
|
Also by default, during training this layer keeps running estimates of its
|
|
computed mean and variance, which are then used for normalization during
|
|
@@ -27,7 +25,7 @@
|
|
This :attr:`momentum` argument is different from one used in optimizer
|
|
classes and the conventional notion of momentum. Mathematically, the
|
|
update rule for running statistics here is
|
|
- :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
|
|
+ :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momemtum} \times x_t`,
|
|
where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
|
|
new observed value.
|
|
|
|
@@ -46,10 +44,8 @@
|
|
learnable affine parameters. Default: ``True``
|
|
track_running_stats: a boolean value that when set to ``True``, this
|
|
module tracks the running mean and variance, and when set to ``False``,
|
|
- this module does not track such statistics, and initializes statistics
|
|
- buffers :attr:`running_mean` and :attr:`running_var` as ``None``.
|
|
- When these buffers are ``None``, this module always uses batch statistics.
|
|
- in both training and eval modes. Default: ``True``
|
|
+ this module does not track such statistics and always uses batch
|
|
+ statistics in both training and eval modes. Default: ``True``
|
|
|
|
Shape:
|
|
- Input: :math:`(N, C, H, W)`
|
|
@@ -63,8 +59,12 @@
|
|
>>> m = nn.BatchNorm2d(100, affine=False)
|
|
>>> input = torch.randn(20, 100, 35, 45)
|
|
>>> output = m(input)
|
|
+
|
|
+ .. _`Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`:
|
|
+ https://arxiv.org/abs/1502.03167
|
|
"""
|
|
|
|
+ @weak_script_method
|
|
def _check_input_dim(self, input):
|
|
if input.dim() != 4:
|
|
raise ValueError('expected 4D input (got {}D input)'
|