diff --git a/AdaptiveAvgPool2d.patch b/AdaptiveAvgPool2d.patch deleted file mode 100644 index e7dc4ac..0000000 --- a/AdaptiveAvgPool2d.patch +++ /dev/null @@ -1,29 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py -@@ -6,7 +6,7 @@ - - Args: - output_size: the target output size of the image of the form H x W. -- Can be a tuple (H, W) or a single H for a square image H x H. -+ Can be a tuple (H, W) or a single H for a square image H x H - H and W can be either a ``int``, or ``None`` which means the size will - be the same as that of the input. - -@@ -20,14 +20,13 @@ - >>> input = torch.randn(1, 64, 10, 9) - >>> output = m(input) - >>> # target output size of 10x7 -- >>> m = nn.AdaptiveAvgPool2d((None, 7)) -+ >>> m = nn.AdaptiveMaxPool2d((None, 7)) - >>> input = torch.randn(1, 64, 10, 9) - >>> output = m(input) - - """ - -- output_size: _size_2_t -- -- def forward(self, input: Tensor) -> Tensor: -+ @weak_script_method -+ def forward(self, input): - return F.adaptive_avg_pool2d(input, self.output_size) - \ No newline at end of file diff --git a/BatchNorm1d.patch b/BatchNorm1d.patch deleted file mode 100644 index f16cb73..0000000 --- a/BatchNorm1d.patch +++ /dev/null @@ -1,59 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py -@@ -1,8 +1,7 @@ - class BatchNorm1d(_BatchNorm): - r"""Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D - inputs with optional additional channel dimension) as described in the paper -- `Batch Normalization: Accelerating Deep Network Training by Reducing -- Internal Covariate Shift `__ . -+ `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`_ . - - .. math:: - -@@ -10,9 +9,8 @@ - - The mean and standard-deviation are calculated per-dimension over - the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors -- of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set -- to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated -- via the biased estimator, equivalent to `torch.var(input, unbiased=False)`. -+ of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are sampled -+ from :math:`\mathcal{U}(0, 1)` and the elements of :math:`\beta` are set to 0. - - Also by default, during training this layer keeps running estimates of its - computed mean and variance, which are then used for normalization during -@@ -27,7 +25,7 @@ - This :attr:`momentum` argument is different from one used in optimizer - classes and the conventional notion of momentum. Mathematically, the - update rule for running statistics here is -- :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`, -+ :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momemtum} \times x_t`, - where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the - new observed value. - -@@ -46,10 +44,8 @@ - learnable affine parameters. Default: ``True`` - track_running_stats: a boolean value that when set to ``True``, this - module tracks the running mean and variance, and when set to ``False``, -- this module does not track such statistics, and initializes statistics -- buffers :attr:`running_mean` and :attr:`running_var` as ``None``. -- When these buffers are ``None``, this module always uses batch statistics. -- in both training and eval modes. Default: ``True`` -+ this module does not track such statistics and always uses batch -+ statistics in both training and eval modes. Default: ``True`` - - Shape: - - Input: :math:`(N, C)` or :math:`(N, C, L)` -@@ -63,8 +59,12 @@ - >>> m = nn.BatchNorm1d(100, affine=False) - >>> input = torch.randn(20, 100) - >>> output = m(input) -+ -+ .. _`Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`: -+ https://arxiv.org/abs/1502.03167 - """ - -+ @weak_script_method - def _check_input_dim(self, input): - if input.dim() != 2 and input.dim() != 3: - raise ValueError('expected 2D or 3D input (got {}D input)' \ No newline at end of file diff --git a/BatchNorm2d.patch b/BatchNorm2d.patch deleted file mode 100644 index c280325..0000000 --- a/BatchNorm2d.patch +++ /dev/null @@ -1,59 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py -@@ -1,8 +1,7 @@ - class BatchNorm2d(_BatchNorm): - r"""Applies Batch Normalization over a 4D input (a mini-batch of 2D inputs - with additional channel dimension) as described in the paper -- `Batch Normalization: Accelerating Deep Network Training by Reducing -- Internal Covariate Shift `__ . -+ `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`_ . - - .. math:: - -@@ -10,9 +9,8 @@ - - The mean and standard-deviation are calculated per-dimension over - the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors -- of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set -- to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated -- via the biased estimator, equivalent to `torch.var(input, unbiased=False)`. -+ of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are sampled -+ from :math:`\mathcal{U}(0, 1)` and the elements of :math:`\beta` are set to 0. - - Also by default, during training this layer keeps running estimates of its - computed mean and variance, which are then used for normalization during -@@ -27,7 +25,7 @@ - This :attr:`momentum` argument is different from one used in optimizer - classes and the conventional notion of momentum. Mathematically, the - update rule for running statistics here is -- :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`, -+ :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momemtum} \times x_t`, - where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the - new observed value. - -@@ -46,10 +44,8 @@ - learnable affine parameters. Default: ``True`` - track_running_stats: a boolean value that when set to ``True``, this - module tracks the running mean and variance, and when set to ``False``, -- this module does not track such statistics, and initializes statistics -- buffers :attr:`running_mean` and :attr:`running_var` as ``None``. -- When these buffers are ``None``, this module always uses batch statistics. -- in both training and eval modes. Default: ``True`` -+ this module does not track such statistics and always uses batch -+ statistics in both training and eval modes. Default: ``True`` - - Shape: - - Input: :math:`(N, C, H, W)` -@@ -63,8 +59,12 @@ - >>> m = nn.BatchNorm2d(100, affine=False) - >>> input = torch.randn(20, 100, 35, 45) - >>> output = m(input) -+ -+ .. _`Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`: -+ https://arxiv.org/abs/1502.03167 - """ - -+ @weak_script_method - def _check_input_dim(self, input): - if input.dim() != 4: - raise ValueError('expected 4D input (got {}D input)' \ No newline at end of file diff --git a/Conv2d.patch b/Conv2d.patch deleted file mode 100644 index a2228e4..0000000 --- a/Conv2d.patch +++ /dev/null @@ -1,140 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/conv.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/conv.py -@@ -15,8 +15,6 @@ - :math:`N` is a batch size, :math:`C` denotes a number of channels, - :math:`H` is a height of input planes in pixels, and :math:`W` is - width in pixels. -- -- This module supports :ref:`TensorFloat32`. - - * :attr:`stride` controls the stride for the cross-correlation, a single - number or a tuple. -@@ -39,7 +37,7 @@ - concatenated. - * At groups= :attr:`in_channels`, each input channel is convolved with - its own set of filters, of size: -- :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`. -+ :math:`\left\lfloor\frac{C_\text{out}}{C_\text{in}}\right\rfloor`. - - The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be: - -@@ -47,14 +45,14 @@ - - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension, - and the second `int` for the width dimension - -- Note: -+ .. note:: - - Depending of the size of your kernel, several (of the last) - columns of the input might be lost, because it is a valid `cross-correlation`_, - and not a full `cross-correlation`_. - It is up to the user to add proper padding. - -- Note: -+ .. note:: - - When `groups == in_channels` and `out_channels == K * in_channels`, - where `K` is a positive integer, this operation is also termed in -@@ -64,29 +62,17 @@ - a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments - :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. - -- Note: -- In some circumstances when using the CUDA backend with CuDNN, this operator -- may select a nondeterministic algorithm to increase performance. If this is -- undesirable, you can try to make the operation deterministic (potentially at -- a performance cost) by setting ``torch.backends.cudnn.deterministic = -- True``. -- Please see the notes on :doc:`/notes/randomness` for background. -- -+ .. include:: cudnn_deterministic.rst - - Args: - in_channels (int): Number of channels in the input image - out_channels (int): Number of channels produced by the convolution - kernel_size (int or tuple): Size of the convolving kernel - stride (int or tuple, optional): Stride of the convolution. Default: 1 -- padding (int or tuple, optional): Zero-padding added to both sides of -- the input. Default: 0 -- padding_mode (string, optional): ``'zeros'``, ``'reflect'``, -- ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` -+ padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 - dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 -- groups (int, optional): Number of blocked connections from input -- channels to output channels. Default: 1 -- bias (bool, optional): If ``True``, adds a learnable bias to the -- output. Default: ``True`` -+ groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 -+ bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` - - Shape: - - Input: :math:`(N, C_{in}, H_{in}, W_{in})` -@@ -102,18 +88,16 @@ - - Attributes: - weight (Tensor): the learnable weights of the module of shape -- :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},` -- :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`. -- The values of these weights are sampled from -- :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where -- :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` -- bias (Tensor): the learnable bias of the module of shape -- (out_channels). If :attr:`bias` is ``True``, -- then the values of these weights are -- sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where -- :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` -+ (out_channels, in_channels, kernel_size[0], kernel_size[1]). -+ The values of these weights are sampled from -+ :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where -+ :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` -+ bias (Tensor): the learnable bias of the module of shape (out_channels). If :attr:`bias` is ``True``, -+ then the values of these weights are -+ sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where -+ :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` - -- Examples: -+ Examples:: - - >>> # With square kernels and equal stride - >>> m = nn.Conv2d(16, 33, 3, stride=2) -@@ -130,34 +114,18 @@ - .. _link: - https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md - """ -- def __init__( -- self, -- in_channels: int, -- out_channels: int, -- kernel_size: _size_2_t, -- stride: _size_2_t = 1, -- padding: _size_2_t = 0, -- dilation: _size_2_t = 1, -- groups: int = 1, -- bias: bool = True, -- padding_mode: str = 'zeros' # TODO: refine this type -- ): -+ def __init__(self, in_channels, out_channels, kernel_size, stride=1, -+ padding=0, dilation=1, groups=1, bias=True): - kernel_size = _pair(kernel_size) - stride = _pair(stride) - padding = _pair(padding) - dilation = _pair(dilation) - super(Conv2d, self).__init__( - in_channels, out_channels, kernel_size, stride, padding, dilation, -- False, _pair(0), groups, bias, padding_mode) -+ False, _pair(0), groups, bias) - -- def _conv_forward(self, input, weight): -- if self.padding_mode != 'zeros': -- return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode), -- weight, self.bias, self.stride, -- _pair(0), self.dilation, self.groups) -- return F.conv2d(input, weight, self.bias, self.stride, -+ @weak_script_method -+ def forward(self, input): -+ return F.conv2d(input, self.weight, self.bias, self.stride, - self.padding, self.dilation, self.groups) - -- def forward(self, input: Tensor) -> Tensor: -- return self._conv_forward(input, self.weight) -- \ No newline at end of file diff --git a/DataParallel.patch b/DataParallel.patch deleted file mode 100644 index 8fddc8e..0000000 --- a/DataParallel.patch +++ /dev/null @@ -1,97 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/parallel/data_parallel.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/parallel/data_parallel.py -@@ -10,16 +10,13 @@ - - The batch size should be larger than the number of GPUs used. - -- .. warning:: -- It is recommended to use :class:`~torch.nn.parallel.DistributedDataParallel`, -- instead of this class, to do multi-GPU training, even if there is only a single -- node. See: :ref:`cuda-nn-ddp-instead` and :ref:`ddp`. -+ See also: :ref:`cuda-nn-dataparallel-instead` - - Arbitrary positional and keyword inputs are allowed to be passed into -- DataParallel but some types are specially handled. tensors will be -- **scattered** on dim specified (default 0). tuple, list and dict types will -- be shallow copied. The other types will be shared among different threads -- and can be corrupted if written to in the model's forward pass. -+ DataParallel EXCEPT Tensors. All tensors will be scattered on dim -+ specified (default 0). Primitive types will be broadcasted, but all -+ other types will be a shallow copy and can be corrupted if written to in -+ the model's forward pass. - - The parallelized :attr:`module` must have its parameters and buffers on - ``device_ids[0]`` before running this :class:`~torch.nn.DataParallel` -@@ -27,9 +24,9 @@ - - .. warning:: - In each forward, :attr:`module` is **replicated** on each device, so any -- updates to the running module in ``forward`` will be lost. For example, -+ updates to the runing module in ``forward`` will be lost. For example, - if :attr:`module` has a counter attribute that is incremented in each -- ``forward``, it will always stay at the initial value because the update -+ ``forward``, it will always stay at the initial value becasue the update - is done on the replicas which are destroyed after ``forward``. However, - :class:`~torch.nn.DataParallel` guarantees that the replica on - ``device[0]`` will have its parameters and buffers sharing storage with -@@ -74,7 +71,7 @@ - Example:: - - >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) -- >>> output = net(input_var) # input_var can be on any device, including CPU -+ >>> output = net(input_var) - """ - - # TODO: update notes/cuda.rst when this class handles 8+ GPUs well -@@ -82,15 +79,13 @@ - def __init__(self, module, device_ids=None, output_device=None, dim=0): - super(DataParallel, self).__init__() - -- device_type = _get_available_device_type() -- if device_type is None: -+ if not torch.cuda.is_available(): - self.module = module - self.device_ids = [] - return - - if device_ids is None: -- device_ids = _get_all_device_indices() -- -+ device_ids = list(range(torch.cuda.device_count())) - if output_device is None: - output_device = device_ids[0] - -@@ -98,23 +93,15 @@ - self.module = module - self.device_ids = list(map(lambda x: _get_device_index(x, True), device_ids)) - self.output_device = _get_device_index(output_device, True) -- self.src_device_obj = torch.device(device_type, self.device_ids[0]) - - _check_balance(self.device_ids) - - if len(self.device_ids) == 1: -- self.module.to(self.src_device_obj) -+ self.module.cuda(device_ids[0]) - - def forward(self, *inputs, **kwargs): - if not self.device_ids: - return self.module(*inputs, **kwargs) -- -- for t in chain(self.module.parameters(), self.module.buffers()): -- if t.device != self.src_device_obj: -- raise RuntimeError("module must have its parameters and buffers " -- "on device {} (device_ids[0]) but found one of " -- "them on device: {}".format(self.src_device_obj, t.device)) -- - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - if len(self.device_ids) == 1: - return self.module(*inputs[0], **kwargs[0]) -@@ -123,7 +110,7 @@ - return self.gather(outputs, self.output_device) - - def replicate(self, module, device_ids): -- return replicate(module, device_ids, not torch.is_grad_enabled()) -+ return replicate(module, device_ids) - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) \ No newline at end of file diff --git a/Dropout.patch b/Dropout.patch deleted file mode 100644 index e4345bf..0000000 --- a/Dropout.patch +++ /dev/null @@ -1,22 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/dropout.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/dropout.py -@@ -18,8 +18,8 @@ - inplace: If set to ``True``, will do this operation in-place. Default: ``False`` - - Shape: -- - Input: :math:`(*)`. Input can be of any shape -- - Output: :math:`(*)`. Output is of the same shape as input -+ - Input: `Any`. Input can be of any shape -+ - Output: `Same`. Output is of the same shape as input - - Examples:: - -@@ -31,6 +31,7 @@ - detectors: https://arxiv.org/abs/1207.0580 - """ - -- def forward(self, input: Tensor) -> Tensor: -+ @weak_script_method -+ def forward(self, input): - return F.dropout(input, self.p, self.training, self.inplace) - \ No newline at end of file diff --git a/Linear.patch b/Linear.patch deleted file mode 100644 index ef25bbe..0000000 --- a/Linear.patch +++ /dev/null @@ -1,64 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/linear.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/linear.py -@@ -1,19 +1,17 @@ - class Linear(Module): - r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b` -- -- This module supports :ref:`TensorFloat32`. - - Args: - in_features: size of each input sample - out_features: size of each output sample -- bias: If set to ``False``, the layer will not learn an additive bias. -+ bias: If set to False, the layer will not learn an additive bias. - Default: ``True`` - - Shape: -- - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of -- additional dimensions and :math:`H_{in} = \text{in\_features}` -- - Output: :math:`(N, *, H_{out})` where all but the last dimension -- are the same shape as the input and :math:`H_{out} = \text{out\_features}`. -+ - Input: :math:`(N, *, \text{in\_features})` where :math:`*` means any number of -+ additional dimensions -+ - Output: :math:`(N, *, \text{out\_features})` where all but the last dimension -+ are the same shape as the input. - - Attributes: - weight: the learnable weights of the module of shape -@@ -33,12 +31,9 @@ - >>> print(output.size()) - torch.Size([128, 30]) - """ -- __constants__ = ['in_features', 'out_features'] -- in_features: int -- out_features: int -- weight: Tensor -+ __constants__ = ['bias'] - -- def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None: -+ def __init__(self, in_features, out_features, bias=True): - super(Linear, self).__init__() - self.in_features = in_features - self.out_features = out_features -@@ -49,17 +44,18 @@ - self.register_parameter('bias', None) - self.reset_parameters() - -- def reset_parameters(self) -> None: -+ def reset_parameters(self): - init.kaiming_uniform_(self.weight, a=math.sqrt(5)) - if self.bias is not None: - fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) - bound = 1 / math.sqrt(fan_in) - init.uniform_(self.bias, -bound, bound) - -- def forward(self, input: Tensor) -> Tensor: -+ @weak_script_method -+ def forward(self, input): - return F.linear(input, self.weight, self.bias) - -- def extra_repr(self) -> str: -+ def extra_repr(self): - return 'in_features={}, out_features={}, bias={}'.format( - self.in_features, self.out_features, self.bias is not None - ) \ No newline at end of file diff --git a/MaxPool2d.patch b/MaxPool2d.patch deleted file mode 100644 index 5a991b0..0000000 --- a/MaxPool2d.patch +++ /dev/null @@ -1,17 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py -@@ -57,12 +57,8 @@ - https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md - """ - -- kernel_size: _size_2_t -- stride: _size_2_t -- padding: _size_2_t -- dilation: _size_2_t -- -- def forward(self, input: Tensor) -> Tensor: -+ @weak_script_method -+ def forward(self, input): - return F.max_pool2d(input, self.kernel_size, self.stride, - self.padding, self.dilation, self.ceil_mode, - self.return_indices) \ No newline at end of file diff --git a/PReLU.patch b/PReLU.patch deleted file mode 100644 index d74cce1..0000000 --- a/PReLU.patch +++ /dev/null @@ -1,37 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py -@@ -37,9 +37,10 @@ - - Output: :math:`(N, *)`, same shape as the input - - Attributes: -- weight (Tensor): the learnable weights of shape (:attr:`num_parameters`). -+ weight (Tensor): the learnable weights of shape (attr:`num_parameters`). -+ The attr:`dtype` is default to - -- .. image:: ../scripts/activation_images/PReLU.png -+ .. image:: scripts/activation_images/PReLU.png - - Examples:: - -@@ -47,17 +48,16 @@ - >>> input = torch.randn(2) - >>> output = m(input) - """ -- __constants__ = ['num_parameters'] -- num_parameters: int - -- def __init__(self, num_parameters: int = 1, init: float = 0.25) -> None: -+ def __init__(self, num_parameters=1, init=0.25): - self.num_parameters = num_parameters - super(PReLU, self).__init__() - self.weight = Parameter(torch.Tensor(num_parameters).fill_(init)) - -- def forward(self, input: Tensor) -> Tensor: -+ @weak_script_method -+ def forward(self, input): - return F.prelu(input, self.weight) - -- def extra_repr(self) -> str: -+ def extra_repr(self): - return 'num_parameters={}'.format(self.num_parameters) - \ No newline at end of file diff --git a/Sequential.patch b/Sequential.patch deleted file mode 100644 index 6c7f6ac..0000000 --- a/Sequential.patch +++ /dev/null @@ -1,70 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/container.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/container.py -@@ -22,15 +22,7 @@ - ])) - """ - -- @overload -- def __init__(self, *args: Module) -> None: -- ... -- -- @overload -- def __init__(self, arg: 'OrderedDict[str, Module]') -> None: -- ... -- -- def __init__(self, *args: Any): -+ def __init__(self, *args): - super(Sequential, self).__init__() - if len(args) == 1 and isinstance(args[0], OrderedDict): - for key, module in args[0].items(): -@@ -48,18 +40,17 @@ - idx %= size - return next(islice(iterator, idx, None)) - -- @_copy_to_script_wrapper -- def __getitem__(self: T, idx) -> T: -+ def __getitem__(self, idx): - if isinstance(idx, slice): - return self.__class__(OrderedDict(list(self._modules.items())[idx])) - else: - return self._get_item_by_idx(self._modules.values(), idx) - -- def __setitem__(self, idx: int, module: Module) -> None: -+ def __setitem__(self, idx, module): - key = self._get_item_by_idx(self._modules.keys(), idx) - return setattr(self, key, module) - -- def __delitem__(self, idx: Union[slice, int]) -> None: -+ def __delitem__(self, idx): - if isinstance(idx, slice): - for key in list(self._modules.keys())[idx]: - delattr(self, key) -@@ -67,26 +58,16 @@ - key = self._get_item_by_idx(self._modules.keys(), idx) - delattr(self, key) - -- @_copy_to_script_wrapper -- def __len__(self) -> int: -+ def __len__(self): - return len(self._modules) - -- @_copy_to_script_wrapper - def __dir__(self): - keys = super(Sequential, self).__dir__() - keys = [key for key in keys if not key.isdigit()] - return keys - -- @_copy_to_script_wrapper -- def __iter__(self) -> Iterator[Module]: -- return iter(self._modules.values()) -- -- # NB: We can't really type check this function as the type of input -- # may change dynamically (as is tested in -- # TestScript.test_sequential_intermediary_types). Cannot annotate -- # with Any as TorchScript expects a more precise type - def forward(self, input): -- for module in self: -+ for module in self._modules.values(): - input = module(input) - return input - \ No newline at end of file diff --git a/Sigmoid.patch b/Sigmoid.patch deleted file mode 100644 index 9ad9766..0000000 --- a/Sigmoid.patch +++ /dev/null @@ -1,29 +0,0 @@ ---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py -+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py -@@ -2,7 +2,7 @@ - r"""Applies the element-wise function: - - .. math:: -- \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)} -+ \text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)} - - - Shape: -@@ -10,7 +10,7 @@ - dimensions - - Output: :math:`(N, *)`, same shape as the input - -- .. image:: ../scripts/activation_images/Sigmoid.png -+ .. image:: scripts/activation_images/Sigmoid.png - - Examples:: - -@@ -19,6 +19,7 @@ - >>> output = m(input) - """ - -- def forward(self, input: Tensor) -> Tensor: -+ @weak_script_method -+ def forward(self, input): - return torch.sigmoid(input) - \ No newline at end of file