From 04080733d8ec0bc4ceed835af41aee289c492e81 Mon Sep 17 00:00:00 2001
From: chenxuanhong <chenxuanhongzju@outlook.com>
Date: Wed, 9 Jun 2021 13:14:36 +0800
Subject: [PATCH] delete *.patch

---
 AdaptiveAvgPool2d.patch |  29 ---------
 BatchNorm1d.patch       |  59 -----------------
 BatchNorm2d.patch       |  59 -----------------
 Conv2d.patch            | 140 ----------------------------------------
 DataParallel.patch      |  97 ----------------------------
 Dropout.patch           |  22 -------
 Linear.patch            |  64 ------------------
 MaxPool2d.patch         |  17 -----
 PReLU.patch             |  37 -----------
 Sequential.patch        |  70 --------------------
 Sigmoid.patch           |  29 ---------
 11 files changed, 623 deletions(-)
 delete mode 100644 AdaptiveAvgPool2d.patch
 delete mode 100644 BatchNorm1d.patch
 delete mode 100644 BatchNorm2d.patch
 delete mode 100644 Conv2d.patch
 delete mode 100644 DataParallel.patch
 delete mode 100644 Dropout.patch
 delete mode 100644 Linear.patch
 delete mode 100644 MaxPool2d.patch
 delete mode 100644 PReLU.patch
 delete mode 100644 Sequential.patch
 delete mode 100644 Sigmoid.patch

diff --git a/AdaptiveAvgPool2d.patch b/AdaptiveAvgPool2d.patch
deleted file mode 100644
index e7dc4ac..0000000
--- a/AdaptiveAvgPool2d.patch
+++ /dev/null
@@ -1,29 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py
-@@ -6,7 +6,7 @@
- 
-     Args:
-         output_size: the target output size of the image of the form H x W.
--                     Can be a tuple (H, W) or a single H for a square image H x H.
-+                     Can be a tuple (H, W) or a single H for a square image H x H
-                      H and W can be either a ``int``, or ``None`` which means the size will
-                      be the same as that of the input.
- 
-@@ -20,14 +20,13 @@
-         >>> input = torch.randn(1, 64, 10, 9)
-         >>> output = m(input)
-         >>> # target output size of 10x7
--        >>> m = nn.AdaptiveAvgPool2d((None, 7))
-+        >>> m = nn.AdaptiveMaxPool2d((None, 7))
-         >>> input = torch.randn(1, 64, 10, 9)
-         >>> output = m(input)
- 
-     """
- 
--    output_size: _size_2_t
--
--    def forward(self, input: Tensor) -> Tensor:
-+    @weak_script_method
-+    def forward(self, input):
-         return F.adaptive_avg_pool2d(input, self.output_size)
- 
\ No newline at end of file
diff --git a/BatchNorm1d.patch b/BatchNorm1d.patch
deleted file mode 100644
index f16cb73..0000000
--- a/BatchNorm1d.patch
+++ /dev/null
@@ -1,59 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py
-@@ -1,8 +1,7 @@
- class BatchNorm1d(_BatchNorm):
-     r"""Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D
-     inputs with optional additional channel dimension) as described in the paper
--    `Batch Normalization: Accelerating Deep Network Training by Reducing
--    Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`__ .
-+    `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`_ .
- 
-     .. math::
- 
-@@ -10,9 +9,8 @@
- 
-     The mean and standard-deviation are calculated per-dimension over
-     the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors
--    of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set
--    to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated
--    via the biased estimator, equivalent to `torch.var(input, unbiased=False)`.
-+    of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are sampled
-+    from :math:`\mathcal{U}(0, 1)` and the elements of :math:`\beta` are set to 0.
- 
-     Also by default, during training this layer keeps running estimates of its
-     computed mean and variance, which are then used for normalization during
-@@ -27,7 +25,7 @@
-         This :attr:`momentum` argument is different from one used in optimizer
-         classes and the conventional notion of momentum. Mathematically, the
-         update rule for running statistics here is
--        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
-+        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momemtum} \times x_t`,
-         where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
-         new observed value.
- 
-@@ -46,10 +44,8 @@
-             learnable affine parameters. Default: ``True``
-         track_running_stats: a boolean value that when set to ``True``, this
-             module tracks the running mean and variance, and when set to ``False``,
--            this module does not track such statistics, and initializes statistics
--            buffers :attr:`running_mean` and :attr:`running_var` as ``None``.
--            When these buffers are ``None``, this module always uses batch statistics.
--            in both training and eval modes. Default: ``True``
-+            this module does not track such statistics and always uses batch
-+            statistics in both training and eval modes. Default: ``True``
- 
-     Shape:
-         - Input: :math:`(N, C)` or :math:`(N, C, L)`
-@@ -63,8 +59,12 @@
-         >>> m = nn.BatchNorm1d(100, affine=False)
-         >>> input = torch.randn(20, 100)
-         >>> output = m(input)
-+
-+    .. _`Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`:
-+        https://arxiv.org/abs/1502.03167
-     """
- 
-+    @weak_script_method
-     def _check_input_dim(self, input):
-         if input.dim() != 2 and input.dim() != 3:
-             raise ValueError('expected 2D or 3D input (got {}D input)'
\ No newline at end of file
diff --git a/BatchNorm2d.patch b/BatchNorm2d.patch
deleted file mode 100644
index c280325..0000000
--- a/BatchNorm2d.patch
+++ /dev/null
@@ -1,59 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py
-@@ -1,8 +1,7 @@
- class BatchNorm2d(_BatchNorm):
-     r"""Applies Batch Normalization over a 4D input (a mini-batch of 2D inputs
-     with additional channel dimension) as described in the paper
--    `Batch Normalization: Accelerating Deep Network Training by Reducing
--    Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`__ .
-+    `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`_ .
- 
-     .. math::
- 
-@@ -10,9 +9,8 @@
- 
-     The mean and standard-deviation are calculated per-dimension over
-     the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors
--    of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set
--    to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated
--    via the biased estimator, equivalent to `torch.var(input, unbiased=False)`.
-+    of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are sampled
-+    from :math:`\mathcal{U}(0, 1)` and the elements of :math:`\beta` are set to 0.
- 
-     Also by default, during training this layer keeps running estimates of its
-     computed mean and variance, which are then used for normalization during
-@@ -27,7 +25,7 @@
-         This :attr:`momentum` argument is different from one used in optimizer
-         classes and the conventional notion of momentum. Mathematically, the
-         update rule for running statistics here is
--        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
-+        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momemtum} \times x_t`,
-         where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
-         new observed value.
- 
-@@ -46,10 +44,8 @@
-             learnable affine parameters. Default: ``True``
-         track_running_stats: a boolean value that when set to ``True``, this
-             module tracks the running mean and variance, and when set to ``False``,
--            this module does not track such statistics, and initializes statistics
--            buffers :attr:`running_mean` and :attr:`running_var` as ``None``.
--            When these buffers are ``None``, this module always uses batch statistics.
--            in both training and eval modes. Default: ``True``
-+            this module does not track such statistics and always uses batch
-+            statistics in both training and eval modes. Default: ``True``
- 
-     Shape:
-         - Input: :math:`(N, C, H, W)`
-@@ -63,8 +59,12 @@
-         >>> m = nn.BatchNorm2d(100, affine=False)
-         >>> input = torch.randn(20, 100, 35, 45)
-         >>> output = m(input)
-+
-+    .. _`Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`:
-+        https://arxiv.org/abs/1502.03167
-     """
- 
-+    @weak_script_method
-     def _check_input_dim(self, input):
-         if input.dim() != 4:
-             raise ValueError('expected 4D input (got {}D input)'
\ No newline at end of file
diff --git a/Conv2d.patch b/Conv2d.patch
deleted file mode 100644
index a2228e4..0000000
--- a/Conv2d.patch
+++ /dev/null
@@ -1,140 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/conv.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/conv.py
-@@ -15,8 +15,6 @@
-     :math:`N` is a batch size, :math:`C` denotes a number of channels,
-     :math:`H` is a height of input planes in pixels, and :math:`W` is
-     width in pixels.
--
--    This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
- 
-     * :attr:`stride` controls the stride for the cross-correlation, a single
-       number or a tuple.
-@@ -39,7 +37,7 @@
-           concatenated.
-         * At groups= :attr:`in_channels`, each input channel is convolved with
-           its own set of filters, of size:
--          :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`.
-+          :math:`\left\lfloor\frac{C_\text{out}}{C_\text{in}}\right\rfloor`.
- 
-     The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
- 
-@@ -47,14 +45,14 @@
-         - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
-           and the second `int` for the width dimension
- 
--    Note:
-+    .. note::
- 
-          Depending of the size of your kernel, several (of the last)
-          columns of the input might be lost, because it is a valid `cross-correlation`_,
-          and not a full `cross-correlation`_.
-          It is up to the user to add proper padding.
- 
--    Note:
-+    .. note::
- 
-         When `groups == in_channels` and `out_channels == K * in_channels`,
-         where `K` is a positive integer, this operation is also termed in
-@@ -64,29 +62,17 @@
-         a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments
-         :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
- 
--    Note:
--        In some circumstances when using the CUDA backend with CuDNN, this operator
--        may select a nondeterministic algorithm to increase performance. If this is
--        undesirable, you can try to make the operation deterministic (potentially at
--        a performance cost) by setting ``torch.backends.cudnn.deterministic =
--        True``.
--        Please see the notes on :doc:`/notes/randomness` for background.
--
-+    .. include:: cudnn_deterministic.rst
- 
-     Args:
-         in_channels (int): Number of channels in the input image
-         out_channels (int): Number of channels produced by the convolution
-         kernel_size (int or tuple): Size of the convolving kernel
-         stride (int or tuple, optional): Stride of the convolution. Default: 1
--        padding (int or tuple, optional): Zero-padding added to both sides of
--            the input. Default: 0
--        padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
--            ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
-+        padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
-         dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
--        groups (int, optional): Number of blocked connections from input
--            channels to output channels. Default: 1
--        bias (bool, optional): If ``True``, adds a learnable bias to the
--            output. Default: ``True``
-+        groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
-+        bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
- 
-     Shape:
-         - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
-@@ -102,18 +88,16 @@
- 
-     Attributes:
-         weight (Tensor): the learnable weights of the module of shape
--            :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},`
--            :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`.
--            The values of these weights are sampled from
--            :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
--            :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
--        bias (Tensor):   the learnable bias of the module of shape
--            (out_channels). If :attr:`bias` is ``True``,
--            then the values of these weights are
--            sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
--            :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
-+                         (out_channels, in_channels, kernel_size[0], kernel_size[1]).
-+                         The values of these weights are sampled from
-+                         :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
-+                         :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
-+        bias (Tensor):   the learnable bias of the module of shape (out_channels). If :attr:`bias` is ``True``,
-+                         then the values of these weights are
-+                         sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
-+                         :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
- 
--    Examples:
-+    Examples::
- 
-         >>> # With square kernels and equal stride
-         >>> m = nn.Conv2d(16, 33, 3, stride=2)
-@@ -130,34 +114,18 @@
-     .. _link:
-         https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
-     """
--    def __init__(
--        self,
--        in_channels: int,
--        out_channels: int,
--        kernel_size: _size_2_t,
--        stride: _size_2_t = 1,
--        padding: _size_2_t = 0,
--        dilation: _size_2_t = 1,
--        groups: int = 1,
--        bias: bool = True,
--        padding_mode: str = 'zeros'  # TODO: refine this type
--    ):
-+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
-+                 padding=0, dilation=1, groups=1, bias=True):
-         kernel_size = _pair(kernel_size)
-         stride = _pair(stride)
-         padding = _pair(padding)
-         dilation = _pair(dilation)
-         super(Conv2d, self).__init__(
-             in_channels, out_channels, kernel_size, stride, padding, dilation,
--            False, _pair(0), groups, bias, padding_mode)
-+            False, _pair(0), groups, bias)
- 
--    def _conv_forward(self, input, weight):
--        if self.padding_mode != 'zeros':
--            return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
--                            weight, self.bias, self.stride,
--                            _pair(0), self.dilation, self.groups)
--        return F.conv2d(input, weight, self.bias, self.stride,
-+    @weak_script_method
-+    def forward(self, input):
-+        return F.conv2d(input, self.weight, self.bias, self.stride,
-                         self.padding, self.dilation, self.groups)
- 
--    def forward(self, input: Tensor) -> Tensor:
--        return self._conv_forward(input, self.weight)
--
\ No newline at end of file
diff --git a/DataParallel.patch b/DataParallel.patch
deleted file mode 100644
index 8fddc8e..0000000
--- a/DataParallel.patch
+++ /dev/null
@@ -1,97 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/parallel/data_parallel.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/parallel/data_parallel.py
-@@ -10,16 +10,13 @@
- 
-     The batch size should be larger than the number of GPUs used.
- 
--    .. warning::
--        It is recommended to use :class:`~torch.nn.parallel.DistributedDataParallel`,
--        instead of this class, to do multi-GPU training, even if there is only a single
--        node. See: :ref:`cuda-nn-ddp-instead` and :ref:`ddp`.
-+    See also: :ref:`cuda-nn-dataparallel-instead`
- 
-     Arbitrary positional and keyword inputs are allowed to be passed into
--    DataParallel but some types are specially handled. tensors will be
--    **scattered** on dim specified (default 0). tuple, list and dict types will
--    be shallow copied. The other types will be shared among different threads
--    and can be corrupted if written to in the model's forward pass.
-+    DataParallel EXCEPT Tensors. All tensors will be scattered on dim
-+    specified (default 0). Primitive types will be broadcasted, but all
-+    other types will be a shallow copy and can be corrupted if written to in
-+    the model's forward pass.
- 
-     The parallelized :attr:`module` must have its parameters and buffers on
-     ``device_ids[0]`` before running this :class:`~torch.nn.DataParallel`
-@@ -27,9 +24,9 @@
- 
-     .. warning::
-         In each forward, :attr:`module` is **replicated** on each device, so any
--        updates to the running module in ``forward`` will be lost. For example,
-+        updates to the runing module in ``forward`` will be lost. For example,
-         if :attr:`module` has a counter attribute that is incremented in each
--        ``forward``, it will always stay at the initial value because the update
-+        ``forward``, it will always stay at the initial value becasue the update
-         is done on the replicas which are destroyed after ``forward``. However,
-         :class:`~torch.nn.DataParallel` guarantees that the replica on
-         ``device[0]`` will have its parameters and buffers sharing storage with
-@@ -74,7 +71,7 @@
-     Example::
- 
-         >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
--        >>> output = net(input_var)  # input_var can be on any device, including CPU
-+        >>> output = net(input_var)
-     """
- 
-     # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
-@@ -82,15 +79,13 @@
-     def __init__(self, module, device_ids=None, output_device=None, dim=0):
-         super(DataParallel, self).__init__()
- 
--        device_type = _get_available_device_type()
--        if device_type is None:
-+        if not torch.cuda.is_available():
-             self.module = module
-             self.device_ids = []
-             return
- 
-         if device_ids is None:
--            device_ids = _get_all_device_indices()
--
-+            device_ids = list(range(torch.cuda.device_count()))
-         if output_device is None:
-             output_device = device_ids[0]
- 
-@@ -98,23 +93,15 @@
-         self.module = module
-         self.device_ids = list(map(lambda x: _get_device_index(x, True), device_ids))
-         self.output_device = _get_device_index(output_device, True)
--        self.src_device_obj = torch.device(device_type, self.device_ids[0])
- 
-         _check_balance(self.device_ids)
- 
-         if len(self.device_ids) == 1:
--            self.module.to(self.src_device_obj)
-+            self.module.cuda(device_ids[0])
- 
-     def forward(self, *inputs, **kwargs):
-         if not self.device_ids:
-             return self.module(*inputs, **kwargs)
--
--        for t in chain(self.module.parameters(), self.module.buffers()):
--            if t.device != self.src_device_obj:
--                raise RuntimeError("module must have its parameters and buffers "
--                                   "on device {} (device_ids[0]) but found one of "
--                                   "them on device: {}".format(self.src_device_obj, t.device))
--
-         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
-         if len(self.device_ids) == 1:
-             return self.module(*inputs[0], **kwargs[0])
-@@ -123,7 +110,7 @@
-         return self.gather(outputs, self.output_device)
- 
-     def replicate(self, module, device_ids):
--        return replicate(module, device_ids, not torch.is_grad_enabled())
-+        return replicate(module, device_ids)
- 
-     def scatter(self, inputs, kwargs, device_ids):
-         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
\ No newline at end of file
diff --git a/Dropout.patch b/Dropout.patch
deleted file mode 100644
index e4345bf..0000000
--- a/Dropout.patch
+++ /dev/null
@@ -1,22 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/dropout.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/dropout.py
-@@ -18,8 +18,8 @@
-         inplace: If set to ``True``, will do this operation in-place. Default: ``False``
- 
-     Shape:
--        - Input: :math:`(*)`. Input can be of any shape
--        - Output: :math:`(*)`. Output is of the same shape as input
-+        - Input: `Any`. Input can be of any shape
-+        - Output: `Same`. Output is of the same shape as input
- 
-     Examples::
- 
-@@ -31,6 +31,7 @@
-         detectors: https://arxiv.org/abs/1207.0580
-     """
- 
--    def forward(self, input: Tensor) -> Tensor:
-+    @weak_script_method
-+    def forward(self, input):
-         return F.dropout(input, self.p, self.training, self.inplace)
- 
\ No newline at end of file
diff --git a/Linear.patch b/Linear.patch
deleted file mode 100644
index ef25bbe..0000000
--- a/Linear.patch
+++ /dev/null
@@ -1,64 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/linear.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/linear.py
-@@ -1,19 +1,17 @@
- class Linear(Module):
-     r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
--
--    This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
- 
-     Args:
-         in_features: size of each input sample
-         out_features: size of each output sample
--        bias: If set to ``False``, the layer will not learn an additive bias.
-+        bias: If set to False, the layer will not learn an additive bias.
-             Default: ``True``
- 
-     Shape:
--        - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
--          additional dimensions and :math:`H_{in} = \text{in\_features}`
--        - Output: :math:`(N, *, H_{out})` where all but the last dimension
--          are the same shape as the input and :math:`H_{out} = \text{out\_features}`.
-+        - Input: :math:`(N, *, \text{in\_features})` where :math:`*` means any number of
-+          additional dimensions
-+        - Output: :math:`(N, *, \text{out\_features})` where all but the last dimension
-+          are the same shape as the input.
- 
-     Attributes:
-         weight: the learnable weights of the module of shape
-@@ -33,12 +31,9 @@
-         >>> print(output.size())
-         torch.Size([128, 30])
-     """
--    __constants__ = ['in_features', 'out_features']
--    in_features: int
--    out_features: int
--    weight: Tensor
-+    __constants__ = ['bias']
- 
--    def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
-+    def __init__(self, in_features, out_features, bias=True):
-         super(Linear, self).__init__()
-         self.in_features = in_features
-         self.out_features = out_features
-@@ -49,17 +44,18 @@
-             self.register_parameter('bias', None)
-         self.reset_parameters()
- 
--    def reset_parameters(self) -> None:
-+    def reset_parameters(self):
-         init.kaiming_uniform_(self.weight, a=math.sqrt(5))
-         if self.bias is not None:
-             fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
-             bound = 1 / math.sqrt(fan_in)
-             init.uniform_(self.bias, -bound, bound)
- 
--    def forward(self, input: Tensor) -> Tensor:
-+    @weak_script_method
-+    def forward(self, input):
-         return F.linear(input, self.weight, self.bias)
- 
--    def extra_repr(self) -> str:
-+    def extra_repr(self):
-         return 'in_features={}, out_features={}, bias={}'.format(
-             self.in_features, self.out_features, self.bias is not None
-         )
\ No newline at end of file
diff --git a/MaxPool2d.patch b/MaxPool2d.patch
deleted file mode 100644
index 5a991b0..0000000
--- a/MaxPool2d.patch
+++ /dev/null
@@ -1,17 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py
-@@ -57,12 +57,8 @@
-         https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
-     """
- 
--    kernel_size: _size_2_t
--    stride: _size_2_t
--    padding: _size_2_t
--    dilation: _size_2_t
--
--    def forward(self, input: Tensor) -> Tensor:
-+    @weak_script_method
-+    def forward(self, input):
-         return F.max_pool2d(input, self.kernel_size, self.stride,
-                             self.padding, self.dilation, self.ceil_mode,
-                             self.return_indices)
\ No newline at end of file
diff --git a/PReLU.patch b/PReLU.patch
deleted file mode 100644
index d74cce1..0000000
--- a/PReLU.patch
+++ /dev/null
@@ -1,37 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py
-@@ -37,9 +37,10 @@
-         - Output: :math:`(N, *)`, same shape as the input
- 
-     Attributes:
--        weight (Tensor): the learnable weights of shape (:attr:`num_parameters`).
-+        weight (Tensor): the learnable weights of shape (attr:`num_parameters`).
-+            The attr:`dtype` is default to
- 
--    .. image:: ../scripts/activation_images/PReLU.png
-+    .. image:: scripts/activation_images/PReLU.png
- 
-     Examples::
- 
-@@ -47,17 +48,16 @@
-         >>> input = torch.randn(2)
-         >>> output = m(input)
-     """
--    __constants__ = ['num_parameters']
--    num_parameters: int
- 
--    def __init__(self, num_parameters: int = 1, init: float = 0.25) -> None:
-+    def __init__(self, num_parameters=1, init=0.25):
-         self.num_parameters = num_parameters
-         super(PReLU, self).__init__()
-         self.weight = Parameter(torch.Tensor(num_parameters).fill_(init))
- 
--    def forward(self, input: Tensor) -> Tensor:
-+    @weak_script_method
-+    def forward(self, input):
-         return F.prelu(input, self.weight)
- 
--    def extra_repr(self) -> str:
-+    def extra_repr(self):
-         return 'num_parameters={}'.format(self.num_parameters)
- 
\ No newline at end of file
diff --git a/Sequential.patch b/Sequential.patch
deleted file mode 100644
index 6c7f6ac..0000000
--- a/Sequential.patch
+++ /dev/null
@@ -1,70 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/container.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/container.py
-@@ -22,15 +22,7 @@
-                 ]))
-     """
- 
--    @overload
--    def __init__(self, *args: Module) -> None:
--        ...
--
--    @overload
--    def __init__(self, arg: 'OrderedDict[str, Module]') -> None:
--        ...
--
--    def __init__(self, *args: Any):
-+    def __init__(self, *args):
-         super(Sequential, self).__init__()
-         if len(args) == 1 and isinstance(args[0], OrderedDict):
-             for key, module in args[0].items():
-@@ -48,18 +40,17 @@
-         idx %= size
-         return next(islice(iterator, idx, None))
- 
--    @_copy_to_script_wrapper
--    def __getitem__(self: T, idx) -> T:
-+    def __getitem__(self, idx):
-         if isinstance(idx, slice):
-             return self.__class__(OrderedDict(list(self._modules.items())[idx]))
-         else:
-             return self._get_item_by_idx(self._modules.values(), idx)
- 
--    def __setitem__(self, idx: int, module: Module) -> None:
-+    def __setitem__(self, idx, module):
-         key = self._get_item_by_idx(self._modules.keys(), idx)
-         return setattr(self, key, module)
- 
--    def __delitem__(self, idx: Union[slice, int]) -> None:
-+    def __delitem__(self, idx):
-         if isinstance(idx, slice):
-             for key in list(self._modules.keys())[idx]:
-                 delattr(self, key)
-@@ -67,26 +58,16 @@
-             key = self._get_item_by_idx(self._modules.keys(), idx)
-             delattr(self, key)
- 
--    @_copy_to_script_wrapper
--    def __len__(self) -> int:
-+    def __len__(self):
-         return len(self._modules)
- 
--    @_copy_to_script_wrapper
-     def __dir__(self):
-         keys = super(Sequential, self).__dir__()
-         keys = [key for key in keys if not key.isdigit()]
-         return keys
- 
--    @_copy_to_script_wrapper
--    def __iter__(self) -> Iterator[Module]:
--        return iter(self._modules.values())
--
--    # NB: We can't really type check this function as the type of input
--    # may change dynamically (as is tested in
--    # TestScript.test_sequential_intermediary_types).  Cannot annotate
--    # with Any as TorchScript expects a more precise type
-     def forward(self, input):
--        for module in self:
-+        for module in self._modules.values():
-             input = module(input)
-         return input
- 
\ No newline at end of file
diff --git a/Sigmoid.patch b/Sigmoid.patch
deleted file mode 100644
index 9ad9766..0000000
--- a/Sigmoid.patch
+++ /dev/null
@@ -1,29 +0,0 @@
---- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py
-+++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py
-@@ -2,7 +2,7 @@
-     r"""Applies the element-wise function:
- 
-     .. math::
--        \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)}
-+        \text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)}
- 
- 
-     Shape:
-@@ -10,7 +10,7 @@
-           dimensions
-         - Output: :math:`(N, *)`, same shape as the input
- 
--    .. image:: ../scripts/activation_images/Sigmoid.png
-+    .. image:: scripts/activation_images/Sigmoid.png
- 
-     Examples::
- 
-@@ -19,6 +19,7 @@
-         >>> output = m(input)
-     """
- 
--    def forward(self, input: Tensor) -> Tensor:
-+    @weak_script_method
-+    def forward(self, input):
-         return torch.sigmoid(input)
- 
\ No newline at end of file