delete *.patch
This commit is contained in:
@@ -1,29 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
Args:
|
||||
output_size: the target output size of the image of the form H x W.
|
||||
- Can be a tuple (H, W) or a single H for a square image H x H.
|
||||
+ Can be a tuple (H, W) or a single H for a square image H x H
|
||||
H and W can be either a ``int``, or ``None`` which means the size will
|
||||
be the same as that of the input.
|
||||
|
||||
@@ -20,14 +20,13 @@
|
||||
>>> input = torch.randn(1, 64, 10, 9)
|
||||
>>> output = m(input)
|
||||
>>> # target output size of 10x7
|
||||
- >>> m = nn.AdaptiveAvgPool2d((None, 7))
|
||||
+ >>> m = nn.AdaptiveMaxPool2d((None, 7))
|
||||
>>> input = torch.randn(1, 64, 10, 9)
|
||||
>>> output = m(input)
|
||||
|
||||
"""
|
||||
|
||||
- output_size: _size_2_t
|
||||
-
|
||||
- def forward(self, input: Tensor) -> Tensor:
|
||||
+ @weak_script_method
|
||||
+ def forward(self, input):
|
||||
return F.adaptive_avg_pool2d(input, self.output_size)
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py
|
||||
@@ -1,8 +1,7 @@
|
||||
class BatchNorm1d(_BatchNorm):
|
||||
r"""Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D
|
||||
inputs with optional additional channel dimension) as described in the paper
|
||||
- `Batch Normalization: Accelerating Deep Network Training by Reducing
|
||||
- Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`__ .
|
||||
+ `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`_ .
|
||||
|
||||
.. math::
|
||||
|
||||
@@ -10,9 +9,8 @@
|
||||
|
||||
The mean and standard-deviation are calculated per-dimension over
|
||||
the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors
|
||||
- of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set
|
||||
- to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated
|
||||
- via the biased estimator, equivalent to `torch.var(input, unbiased=False)`.
|
||||
+ of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are sampled
|
||||
+ from :math:`\mathcal{U}(0, 1)` and the elements of :math:`\beta` are set to 0.
|
||||
|
||||
Also by default, during training this layer keeps running estimates of its
|
||||
computed mean and variance, which are then used for normalization during
|
||||
@@ -27,7 +25,7 @@
|
||||
This :attr:`momentum` argument is different from one used in optimizer
|
||||
classes and the conventional notion of momentum. Mathematically, the
|
||||
update rule for running statistics here is
|
||||
- :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
|
||||
+ :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momemtum} \times x_t`,
|
||||
where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
|
||||
new observed value.
|
||||
|
||||
@@ -46,10 +44,8 @@
|
||||
learnable affine parameters. Default: ``True``
|
||||
track_running_stats: a boolean value that when set to ``True``, this
|
||||
module tracks the running mean and variance, and when set to ``False``,
|
||||
- this module does not track such statistics, and initializes statistics
|
||||
- buffers :attr:`running_mean` and :attr:`running_var` as ``None``.
|
||||
- When these buffers are ``None``, this module always uses batch statistics.
|
||||
- in both training and eval modes. Default: ``True``
|
||||
+ this module does not track such statistics and always uses batch
|
||||
+ statistics in both training and eval modes. Default: ``True``
|
||||
|
||||
Shape:
|
||||
- Input: :math:`(N, C)` or :math:`(N, C, L)`
|
||||
@@ -63,8 +59,12 @@
|
||||
>>> m = nn.BatchNorm1d(100, affine=False)
|
||||
>>> input = torch.randn(20, 100)
|
||||
>>> output = m(input)
|
||||
+
|
||||
+ .. _`Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`:
|
||||
+ https://arxiv.org/abs/1502.03167
|
||||
"""
|
||||
|
||||
+ @weak_script_method
|
||||
def _check_input_dim(self, input):
|
||||
if input.dim() != 2 and input.dim() != 3:
|
||||
raise ValueError('expected 2D or 3D input (got {}D input)'
|
||||
@@ -1,59 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py
|
||||
@@ -1,8 +1,7 @@
|
||||
class BatchNorm2d(_BatchNorm):
|
||||
r"""Applies Batch Normalization over a 4D input (a mini-batch of 2D inputs
|
||||
with additional channel dimension) as described in the paper
|
||||
- `Batch Normalization: Accelerating Deep Network Training by Reducing
|
||||
- Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`__ .
|
||||
+ `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`_ .
|
||||
|
||||
.. math::
|
||||
|
||||
@@ -10,9 +9,8 @@
|
||||
|
||||
The mean and standard-deviation are calculated per-dimension over
|
||||
the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors
|
||||
- of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set
|
||||
- to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated
|
||||
- via the biased estimator, equivalent to `torch.var(input, unbiased=False)`.
|
||||
+ of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are sampled
|
||||
+ from :math:`\mathcal{U}(0, 1)` and the elements of :math:`\beta` are set to 0.
|
||||
|
||||
Also by default, during training this layer keeps running estimates of its
|
||||
computed mean and variance, which are then used for normalization during
|
||||
@@ -27,7 +25,7 @@
|
||||
This :attr:`momentum` argument is different from one used in optimizer
|
||||
classes and the conventional notion of momentum. Mathematically, the
|
||||
update rule for running statistics here is
|
||||
- :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
|
||||
+ :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momemtum} \times x_t`,
|
||||
where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
|
||||
new observed value.
|
||||
|
||||
@@ -46,10 +44,8 @@
|
||||
learnable affine parameters. Default: ``True``
|
||||
track_running_stats: a boolean value that when set to ``True``, this
|
||||
module tracks the running mean and variance, and when set to ``False``,
|
||||
- this module does not track such statistics, and initializes statistics
|
||||
- buffers :attr:`running_mean` and :attr:`running_var` as ``None``.
|
||||
- When these buffers are ``None``, this module always uses batch statistics.
|
||||
- in both training and eval modes. Default: ``True``
|
||||
+ this module does not track such statistics and always uses batch
|
||||
+ statistics in both training and eval modes. Default: ``True``
|
||||
|
||||
Shape:
|
||||
- Input: :math:`(N, C, H, W)`
|
||||
@@ -63,8 +59,12 @@
|
||||
>>> m = nn.BatchNorm2d(100, affine=False)
|
||||
>>> input = torch.randn(20, 100, 35, 45)
|
||||
>>> output = m(input)
|
||||
+
|
||||
+ .. _`Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`:
|
||||
+ https://arxiv.org/abs/1502.03167
|
||||
"""
|
||||
|
||||
+ @weak_script_method
|
||||
def _check_input_dim(self, input):
|
||||
if input.dim() != 4:
|
||||
raise ValueError('expected 4D input (got {}D input)'
|
||||
140
Conv2d.patch
140
Conv2d.patch
@@ -1,140 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/conv.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/conv.py
|
||||
@@ -15,8 +15,6 @@
|
||||
:math:`N` is a batch size, :math:`C` denotes a number of channels,
|
||||
:math:`H` is a height of input planes in pixels, and :math:`W` is
|
||||
width in pixels.
|
||||
-
|
||||
- This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
|
||||
|
||||
* :attr:`stride` controls the stride for the cross-correlation, a single
|
||||
number or a tuple.
|
||||
@@ -39,7 +37,7 @@
|
||||
concatenated.
|
||||
* At groups= :attr:`in_channels`, each input channel is convolved with
|
||||
its own set of filters, of size:
|
||||
- :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`.
|
||||
+ :math:`\left\lfloor\frac{C_\text{out}}{C_\text{in}}\right\rfloor`.
|
||||
|
||||
The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
|
||||
|
||||
@@ -47,14 +45,14 @@
|
||||
- a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
|
||||
and the second `int` for the width dimension
|
||||
|
||||
- Note:
|
||||
+ .. note::
|
||||
|
||||
Depending of the size of your kernel, several (of the last)
|
||||
columns of the input might be lost, because it is a valid `cross-correlation`_,
|
||||
and not a full `cross-correlation`_.
|
||||
It is up to the user to add proper padding.
|
||||
|
||||
- Note:
|
||||
+ .. note::
|
||||
|
||||
When `groups == in_channels` and `out_channels == K * in_channels`,
|
||||
where `K` is a positive integer, this operation is also termed in
|
||||
@@ -64,29 +62,17 @@
|
||||
a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments
|
||||
:math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
|
||||
|
||||
- Note:
|
||||
- In some circumstances when using the CUDA backend with CuDNN, this operator
|
||||
- may select a nondeterministic algorithm to increase performance. If this is
|
||||
- undesirable, you can try to make the operation deterministic (potentially at
|
||||
- a performance cost) by setting ``torch.backends.cudnn.deterministic =
|
||||
- True``.
|
||||
- Please see the notes on :doc:`/notes/randomness` for background.
|
||||
-
|
||||
+ .. include:: cudnn_deterministic.rst
|
||||
|
||||
Args:
|
||||
in_channels (int): Number of channels in the input image
|
||||
out_channels (int): Number of channels produced by the convolution
|
||||
kernel_size (int or tuple): Size of the convolving kernel
|
||||
stride (int or tuple, optional): Stride of the convolution. Default: 1
|
||||
- padding (int or tuple, optional): Zero-padding added to both sides of
|
||||
- the input. Default: 0
|
||||
- padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
|
||||
- ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
|
||||
+ padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
|
||||
dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
|
||||
- groups (int, optional): Number of blocked connections from input
|
||||
- channels to output channels. Default: 1
|
||||
- bias (bool, optional): If ``True``, adds a learnable bias to the
|
||||
- output. Default: ``True``
|
||||
+ groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
|
||||
+ bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
|
||||
|
||||
Shape:
|
||||
- Input: :math:`(N, C_{in}, H_{in}, W_{in})`
|
||||
@@ -102,18 +88,16 @@
|
||||
|
||||
Attributes:
|
||||
weight (Tensor): the learnable weights of the module of shape
|
||||
- :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},`
|
||||
- :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`.
|
||||
- The values of these weights are sampled from
|
||||
- :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
|
||||
- :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
|
||||
- bias (Tensor): the learnable bias of the module of shape
|
||||
- (out_channels). If :attr:`bias` is ``True``,
|
||||
- then the values of these weights are
|
||||
- sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
|
||||
- :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
|
||||
+ (out_channels, in_channels, kernel_size[0], kernel_size[1]).
|
||||
+ The values of these weights are sampled from
|
||||
+ :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
|
||||
+ :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
|
||||
+ bias (Tensor): the learnable bias of the module of shape (out_channels). If :attr:`bias` is ``True``,
|
||||
+ then the values of these weights are
|
||||
+ sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
|
||||
+ :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
|
||||
|
||||
- Examples:
|
||||
+ Examples::
|
||||
|
||||
>>> # With square kernels and equal stride
|
||||
>>> m = nn.Conv2d(16, 33, 3, stride=2)
|
||||
@@ -130,34 +114,18 @@
|
||||
.. _link:
|
||||
https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
|
||||
"""
|
||||
- def __init__(
|
||||
- self,
|
||||
- in_channels: int,
|
||||
- out_channels: int,
|
||||
- kernel_size: _size_2_t,
|
||||
- stride: _size_2_t = 1,
|
||||
- padding: _size_2_t = 0,
|
||||
- dilation: _size_2_t = 1,
|
||||
- groups: int = 1,
|
||||
- bias: bool = True,
|
||||
- padding_mode: str = 'zeros' # TODO: refine this type
|
||||
- ):
|
||||
+ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
|
||||
+ padding=0, dilation=1, groups=1, bias=True):
|
||||
kernel_size = _pair(kernel_size)
|
||||
stride = _pair(stride)
|
||||
padding = _pair(padding)
|
||||
dilation = _pair(dilation)
|
||||
super(Conv2d, self).__init__(
|
||||
in_channels, out_channels, kernel_size, stride, padding, dilation,
|
||||
- False, _pair(0), groups, bias, padding_mode)
|
||||
+ False, _pair(0), groups, bias)
|
||||
|
||||
- def _conv_forward(self, input, weight):
|
||||
- if self.padding_mode != 'zeros':
|
||||
- return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
|
||||
- weight, self.bias, self.stride,
|
||||
- _pair(0), self.dilation, self.groups)
|
||||
- return F.conv2d(input, weight, self.bias, self.stride,
|
||||
+ @weak_script_method
|
||||
+ def forward(self, input):
|
||||
+ return F.conv2d(input, self.weight, self.bias, self.stride,
|
||||
self.padding, self.dilation, self.groups)
|
||||
|
||||
- def forward(self, input: Tensor) -> Tensor:
|
||||
- return self._conv_forward(input, self.weight)
|
||||
-
|
||||
@@ -1,97 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/parallel/data_parallel.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/parallel/data_parallel.py
|
||||
@@ -10,16 +10,13 @@
|
||||
|
||||
The batch size should be larger than the number of GPUs used.
|
||||
|
||||
- .. warning::
|
||||
- It is recommended to use :class:`~torch.nn.parallel.DistributedDataParallel`,
|
||||
- instead of this class, to do multi-GPU training, even if there is only a single
|
||||
- node. See: :ref:`cuda-nn-ddp-instead` and :ref:`ddp`.
|
||||
+ See also: :ref:`cuda-nn-dataparallel-instead`
|
||||
|
||||
Arbitrary positional and keyword inputs are allowed to be passed into
|
||||
- DataParallel but some types are specially handled. tensors will be
|
||||
- **scattered** on dim specified (default 0). tuple, list and dict types will
|
||||
- be shallow copied. The other types will be shared among different threads
|
||||
- and can be corrupted if written to in the model's forward pass.
|
||||
+ DataParallel EXCEPT Tensors. All tensors will be scattered on dim
|
||||
+ specified (default 0). Primitive types will be broadcasted, but all
|
||||
+ other types will be a shallow copy and can be corrupted if written to in
|
||||
+ the model's forward pass.
|
||||
|
||||
The parallelized :attr:`module` must have its parameters and buffers on
|
||||
``device_ids[0]`` before running this :class:`~torch.nn.DataParallel`
|
||||
@@ -27,9 +24,9 @@
|
||||
|
||||
.. warning::
|
||||
In each forward, :attr:`module` is **replicated** on each device, so any
|
||||
- updates to the running module in ``forward`` will be lost. For example,
|
||||
+ updates to the runing module in ``forward`` will be lost. For example,
|
||||
if :attr:`module` has a counter attribute that is incremented in each
|
||||
- ``forward``, it will always stay at the initial value because the update
|
||||
+ ``forward``, it will always stay at the initial value becasue the update
|
||||
is done on the replicas which are destroyed after ``forward``. However,
|
||||
:class:`~torch.nn.DataParallel` guarantees that the replica on
|
||||
``device[0]`` will have its parameters and buffers sharing storage with
|
||||
@@ -74,7 +71,7 @@
|
||||
Example::
|
||||
|
||||
>>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
|
||||
- >>> output = net(input_var) # input_var can be on any device, including CPU
|
||||
+ >>> output = net(input_var)
|
||||
"""
|
||||
|
||||
# TODO: update notes/cuda.rst when this class handles 8+ GPUs well
|
||||
@@ -82,15 +79,13 @@
|
||||
def __init__(self, module, device_ids=None, output_device=None, dim=0):
|
||||
super(DataParallel, self).__init__()
|
||||
|
||||
- device_type = _get_available_device_type()
|
||||
- if device_type is None:
|
||||
+ if not torch.cuda.is_available():
|
||||
self.module = module
|
||||
self.device_ids = []
|
||||
return
|
||||
|
||||
if device_ids is None:
|
||||
- device_ids = _get_all_device_indices()
|
||||
-
|
||||
+ device_ids = list(range(torch.cuda.device_count()))
|
||||
if output_device is None:
|
||||
output_device = device_ids[0]
|
||||
|
||||
@@ -98,23 +93,15 @@
|
||||
self.module = module
|
||||
self.device_ids = list(map(lambda x: _get_device_index(x, True), device_ids))
|
||||
self.output_device = _get_device_index(output_device, True)
|
||||
- self.src_device_obj = torch.device(device_type, self.device_ids[0])
|
||||
|
||||
_check_balance(self.device_ids)
|
||||
|
||||
if len(self.device_ids) == 1:
|
||||
- self.module.to(self.src_device_obj)
|
||||
+ self.module.cuda(device_ids[0])
|
||||
|
||||
def forward(self, *inputs, **kwargs):
|
||||
if not self.device_ids:
|
||||
return self.module(*inputs, **kwargs)
|
||||
-
|
||||
- for t in chain(self.module.parameters(), self.module.buffers()):
|
||||
- if t.device != self.src_device_obj:
|
||||
- raise RuntimeError("module must have its parameters and buffers "
|
||||
- "on device {} (device_ids[0]) but found one of "
|
||||
- "them on device: {}".format(self.src_device_obj, t.device))
|
||||
-
|
||||
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
|
||||
if len(self.device_ids) == 1:
|
||||
return self.module(*inputs[0], **kwargs[0])
|
||||
@@ -123,7 +110,7 @@
|
||||
return self.gather(outputs, self.output_device)
|
||||
|
||||
def replicate(self, module, device_ids):
|
||||
- return replicate(module, device_ids, not torch.is_grad_enabled())
|
||||
+ return replicate(module, device_ids)
|
||||
|
||||
def scatter(self, inputs, kwargs, device_ids):
|
||||
return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
|
||||
@@ -1,22 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/dropout.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/dropout.py
|
||||
@@ -18,8 +18,8 @@
|
||||
inplace: If set to ``True``, will do this operation in-place. Default: ``False``
|
||||
|
||||
Shape:
|
||||
- - Input: :math:`(*)`. Input can be of any shape
|
||||
- - Output: :math:`(*)`. Output is of the same shape as input
|
||||
+ - Input: `Any`. Input can be of any shape
|
||||
+ - Output: `Same`. Output is of the same shape as input
|
||||
|
||||
Examples::
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
detectors: https://arxiv.org/abs/1207.0580
|
||||
"""
|
||||
|
||||
- def forward(self, input: Tensor) -> Tensor:
|
||||
+ @weak_script_method
|
||||
+ def forward(self, input):
|
||||
return F.dropout(input, self.p, self.training, self.inplace)
|
||||
|
||||
64
Linear.patch
64
Linear.patch
@@ -1,64 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/linear.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/linear.py
|
||||
@@ -1,19 +1,17 @@
|
||||
class Linear(Module):
|
||||
r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
|
||||
-
|
||||
- This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
|
||||
|
||||
Args:
|
||||
in_features: size of each input sample
|
||||
out_features: size of each output sample
|
||||
- bias: If set to ``False``, the layer will not learn an additive bias.
|
||||
+ bias: If set to False, the layer will not learn an additive bias.
|
||||
Default: ``True``
|
||||
|
||||
Shape:
|
||||
- - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
|
||||
- additional dimensions and :math:`H_{in} = \text{in\_features}`
|
||||
- - Output: :math:`(N, *, H_{out})` where all but the last dimension
|
||||
- are the same shape as the input and :math:`H_{out} = \text{out\_features}`.
|
||||
+ - Input: :math:`(N, *, \text{in\_features})` where :math:`*` means any number of
|
||||
+ additional dimensions
|
||||
+ - Output: :math:`(N, *, \text{out\_features})` where all but the last dimension
|
||||
+ are the same shape as the input.
|
||||
|
||||
Attributes:
|
||||
weight: the learnable weights of the module of shape
|
||||
@@ -33,12 +31,9 @@
|
||||
>>> print(output.size())
|
||||
torch.Size([128, 30])
|
||||
"""
|
||||
- __constants__ = ['in_features', 'out_features']
|
||||
- in_features: int
|
||||
- out_features: int
|
||||
- weight: Tensor
|
||||
+ __constants__ = ['bias']
|
||||
|
||||
- def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
|
||||
+ def __init__(self, in_features, out_features, bias=True):
|
||||
super(Linear, self).__init__()
|
||||
self.in_features = in_features
|
||||
self.out_features = out_features
|
||||
@@ -49,17 +44,18 @@
|
||||
self.register_parameter('bias', None)
|
||||
self.reset_parameters()
|
||||
|
||||
- def reset_parameters(self) -> None:
|
||||
+ def reset_parameters(self):
|
||||
init.kaiming_uniform_(self.weight, a=math.sqrt(5))
|
||||
if self.bias is not None:
|
||||
fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
|
||||
bound = 1 / math.sqrt(fan_in)
|
||||
init.uniform_(self.bias, -bound, bound)
|
||||
|
||||
- def forward(self, input: Tensor) -> Tensor:
|
||||
+ @weak_script_method
|
||||
+ def forward(self, input):
|
||||
return F.linear(input, self.weight, self.bias)
|
||||
|
||||
- def extra_repr(self) -> str:
|
||||
+ def extra_repr(self):
|
||||
return 'in_features={}, out_features={}, bias={}'.format(
|
||||
self.in_features, self.out_features, self.bias is not None
|
||||
)
|
||||
@@ -1,17 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py
|
||||
@@ -57,12 +57,8 @@
|
||||
https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
|
||||
"""
|
||||
|
||||
- kernel_size: _size_2_t
|
||||
- stride: _size_2_t
|
||||
- padding: _size_2_t
|
||||
- dilation: _size_2_t
|
||||
-
|
||||
- def forward(self, input: Tensor) -> Tensor:
|
||||
+ @weak_script_method
|
||||
+ def forward(self, input):
|
||||
return F.max_pool2d(input, self.kernel_size, self.stride,
|
||||
self.padding, self.dilation, self.ceil_mode,
|
||||
self.return_indices)
|
||||
37
PReLU.patch
37
PReLU.patch
@@ -1,37 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py
|
||||
@@ -37,9 +37,10 @@
|
||||
- Output: :math:`(N, *)`, same shape as the input
|
||||
|
||||
Attributes:
|
||||
- weight (Tensor): the learnable weights of shape (:attr:`num_parameters`).
|
||||
+ weight (Tensor): the learnable weights of shape (attr:`num_parameters`).
|
||||
+ The attr:`dtype` is default to
|
||||
|
||||
- .. image:: ../scripts/activation_images/PReLU.png
|
||||
+ .. image:: scripts/activation_images/PReLU.png
|
||||
|
||||
Examples::
|
||||
|
||||
@@ -47,17 +48,16 @@
|
||||
>>> input = torch.randn(2)
|
||||
>>> output = m(input)
|
||||
"""
|
||||
- __constants__ = ['num_parameters']
|
||||
- num_parameters: int
|
||||
|
||||
- def __init__(self, num_parameters: int = 1, init: float = 0.25) -> None:
|
||||
+ def __init__(self, num_parameters=1, init=0.25):
|
||||
self.num_parameters = num_parameters
|
||||
super(PReLU, self).__init__()
|
||||
self.weight = Parameter(torch.Tensor(num_parameters).fill_(init))
|
||||
|
||||
- def forward(self, input: Tensor) -> Tensor:
|
||||
+ @weak_script_method
|
||||
+ def forward(self, input):
|
||||
return F.prelu(input, self.weight)
|
||||
|
||||
- def extra_repr(self) -> str:
|
||||
+ def extra_repr(self):
|
||||
return 'num_parameters={}'.format(self.num_parameters)
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/container.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/container.py
|
||||
@@ -22,15 +22,7 @@
|
||||
]))
|
||||
"""
|
||||
|
||||
- @overload
|
||||
- def __init__(self, *args: Module) -> None:
|
||||
- ...
|
||||
-
|
||||
- @overload
|
||||
- def __init__(self, arg: 'OrderedDict[str, Module]') -> None:
|
||||
- ...
|
||||
-
|
||||
- def __init__(self, *args: Any):
|
||||
+ def __init__(self, *args):
|
||||
super(Sequential, self).__init__()
|
||||
if len(args) == 1 and isinstance(args[0], OrderedDict):
|
||||
for key, module in args[0].items():
|
||||
@@ -48,18 +40,17 @@
|
||||
idx %= size
|
||||
return next(islice(iterator, idx, None))
|
||||
|
||||
- @_copy_to_script_wrapper
|
||||
- def __getitem__(self: T, idx) -> T:
|
||||
+ def __getitem__(self, idx):
|
||||
if isinstance(idx, slice):
|
||||
return self.__class__(OrderedDict(list(self._modules.items())[idx]))
|
||||
else:
|
||||
return self._get_item_by_idx(self._modules.values(), idx)
|
||||
|
||||
- def __setitem__(self, idx: int, module: Module) -> None:
|
||||
+ def __setitem__(self, idx, module):
|
||||
key = self._get_item_by_idx(self._modules.keys(), idx)
|
||||
return setattr(self, key, module)
|
||||
|
||||
- def __delitem__(self, idx: Union[slice, int]) -> None:
|
||||
+ def __delitem__(self, idx):
|
||||
if isinstance(idx, slice):
|
||||
for key in list(self._modules.keys())[idx]:
|
||||
delattr(self, key)
|
||||
@@ -67,26 +58,16 @@
|
||||
key = self._get_item_by_idx(self._modules.keys(), idx)
|
||||
delattr(self, key)
|
||||
|
||||
- @_copy_to_script_wrapper
|
||||
- def __len__(self) -> int:
|
||||
+ def __len__(self):
|
||||
return len(self._modules)
|
||||
|
||||
- @_copy_to_script_wrapper
|
||||
def __dir__(self):
|
||||
keys = super(Sequential, self).__dir__()
|
||||
keys = [key for key in keys if not key.isdigit()]
|
||||
return keys
|
||||
|
||||
- @_copy_to_script_wrapper
|
||||
- def __iter__(self) -> Iterator[Module]:
|
||||
- return iter(self._modules.values())
|
||||
-
|
||||
- # NB: We can't really type check this function as the type of input
|
||||
- # may change dynamically (as is tested in
|
||||
- # TestScript.test_sequential_intermediary_types). Cannot annotate
|
||||
- # with Any as TorchScript expects a more precise type
|
||||
def forward(self, input):
|
||||
- for module in self:
|
||||
+ for module in self._modules.values():
|
||||
input = module(input)
|
||||
return input
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py
|
||||
+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py
|
||||
@@ -2,7 +2,7 @@
|
||||
r"""Applies the element-wise function:
|
||||
|
||||
.. math::
|
||||
- \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)}
|
||||
+ \text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)}
|
||||
|
||||
|
||||
Shape:
|
||||
@@ -10,7 +10,7 @@
|
||||
dimensions
|
||||
- Output: :math:`(N, *)`, same shape as the input
|
||||
|
||||
- .. image:: ../scripts/activation_images/Sigmoid.png
|
||||
+ .. image:: scripts/activation_images/Sigmoid.png
|
||||
|
||||
Examples::
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
>>> output = m(input)
|
||||
"""
|
||||
|
||||
- def forward(self, input: Tensor) -> Tensor:
|
||||
+ @weak_script_method
|
||||
+ def forward(self, input):
|
||||
return torch.sigmoid(input)
|
||||
|
||||
Reference in New Issue
Block a user