diff --git a/.gitignore b/.gitignore index b6e4761..c38a594 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,7 @@ dmypy.json # Pyre type checker .pyre/ + +checkpoints/ +*.tar +*.zip diff --git a/AdaptiveAvgPool2d.patch b/AdaptiveAvgPool2d.patch new file mode 100644 index 0000000..e7dc4ac --- /dev/null +++ b/AdaptiveAvgPool2d.patch @@ -0,0 +1,29 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py +@@ -6,7 +6,7 @@ + + Args: + output_size: the target output size of the image of the form H x W. +- Can be a tuple (H, W) or a single H for a square image H x H. ++ Can be a tuple (H, W) or a single H for a square image H x H + H and W can be either a ``int``, or ``None`` which means the size will + be the same as that of the input. + +@@ -20,14 +20,13 @@ + >>> input = torch.randn(1, 64, 10, 9) + >>> output = m(input) + >>> # target output size of 10x7 +- >>> m = nn.AdaptiveAvgPool2d((None, 7)) ++ >>> m = nn.AdaptiveMaxPool2d((None, 7)) + >>> input = torch.randn(1, 64, 10, 9) + >>> output = m(input) + + """ + +- output_size: _size_2_t +- +- def forward(self, input: Tensor) -> Tensor: ++ @weak_script_method ++ def forward(self, input): + return F.adaptive_avg_pool2d(input, self.output_size) + \ No newline at end of file diff --git a/BatchNorm1d.patch b/BatchNorm1d.patch new file mode 100644 index 0000000..f16cb73 --- /dev/null +++ b/BatchNorm1d.patch @@ -0,0 +1,59 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py +@@ -1,8 +1,7 @@ + class BatchNorm1d(_BatchNorm): + r"""Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D + inputs with optional additional channel dimension) as described in the paper +- `Batch Normalization: Accelerating Deep Network Training by Reducing +- Internal Covariate Shift `__ . ++ `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`_ . + + .. math:: + +@@ -10,9 +9,8 @@ + + The mean and standard-deviation are calculated per-dimension over + the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors +- of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set +- to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated +- via the biased estimator, equivalent to `torch.var(input, unbiased=False)`. ++ of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are sampled ++ from :math:`\mathcal{U}(0, 1)` and the elements of :math:`\beta` are set to 0. + + Also by default, during training this layer keeps running estimates of its + computed mean and variance, which are then used for normalization during +@@ -27,7 +25,7 @@ + This :attr:`momentum` argument is different from one used in optimizer + classes and the conventional notion of momentum. Mathematically, the + update rule for running statistics here is +- :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`, ++ :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momemtum} \times x_t`, + where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the + new observed value. + +@@ -46,10 +44,8 @@ + learnable affine parameters. Default: ``True`` + track_running_stats: a boolean value that when set to ``True``, this + module tracks the running mean and variance, and when set to ``False``, +- this module does not track such statistics, and initializes statistics +- buffers :attr:`running_mean` and :attr:`running_var` as ``None``. +- When these buffers are ``None``, this module always uses batch statistics. +- in both training and eval modes. Default: ``True`` ++ this module does not track such statistics and always uses batch ++ statistics in both training and eval modes. Default: ``True`` + + Shape: + - Input: :math:`(N, C)` or :math:`(N, C, L)` +@@ -63,8 +59,12 @@ + >>> m = nn.BatchNorm1d(100, affine=False) + >>> input = torch.randn(20, 100) + >>> output = m(input) ++ ++ .. _`Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`: ++ https://arxiv.org/abs/1502.03167 + """ + ++ @weak_script_method + def _check_input_dim(self, input): + if input.dim() != 2 and input.dim() != 3: + raise ValueError('expected 2D or 3D input (got {}D input)' \ No newline at end of file diff --git a/BatchNorm2d.patch b/BatchNorm2d.patch new file mode 100644 index 0000000..c280325 --- /dev/null +++ b/BatchNorm2d.patch @@ -0,0 +1,59 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/batchnorm.py +@@ -1,8 +1,7 @@ + class BatchNorm2d(_BatchNorm): + r"""Applies Batch Normalization over a 4D input (a mini-batch of 2D inputs + with additional channel dimension) as described in the paper +- `Batch Normalization: Accelerating Deep Network Training by Reducing +- Internal Covariate Shift `__ . ++ `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`_ . + + .. math:: + +@@ -10,9 +9,8 @@ + + The mean and standard-deviation are calculated per-dimension over + the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors +- of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set +- to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated +- via the biased estimator, equivalent to `torch.var(input, unbiased=False)`. ++ of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are sampled ++ from :math:`\mathcal{U}(0, 1)` and the elements of :math:`\beta` are set to 0. + + Also by default, during training this layer keeps running estimates of its + computed mean and variance, which are then used for normalization during +@@ -27,7 +25,7 @@ + This :attr:`momentum` argument is different from one used in optimizer + classes and the conventional notion of momentum. Mathematically, the + update rule for running statistics here is +- :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`, ++ :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momemtum} \times x_t`, + where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the + new observed value. + +@@ -46,10 +44,8 @@ + learnable affine parameters. Default: ``True`` + track_running_stats: a boolean value that when set to ``True``, this + module tracks the running mean and variance, and when set to ``False``, +- this module does not track such statistics, and initializes statistics +- buffers :attr:`running_mean` and :attr:`running_var` as ``None``. +- When these buffers are ``None``, this module always uses batch statistics. +- in both training and eval modes. Default: ``True`` ++ this module does not track such statistics and always uses batch ++ statistics in both training and eval modes. Default: ``True`` + + Shape: + - Input: :math:`(N, C, H, W)` +@@ -63,8 +59,12 @@ + >>> m = nn.BatchNorm2d(100, affine=False) + >>> input = torch.randn(20, 100, 35, 45) + >>> output = m(input) ++ ++ .. _`Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`: ++ https://arxiv.org/abs/1502.03167 + """ + ++ @weak_script_method + def _check_input_dim(self, input): + if input.dim() != 4: + raise ValueError('expected 4D input (got {}D input)' \ No newline at end of file diff --git a/Conv2d.patch b/Conv2d.patch new file mode 100644 index 0000000..a2228e4 --- /dev/null +++ b/Conv2d.patch @@ -0,0 +1,140 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/conv.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/conv.py +@@ -15,8 +15,6 @@ + :math:`N` is a batch size, :math:`C` denotes a number of channels, + :math:`H` is a height of input planes in pixels, and :math:`W` is + width in pixels. +- +- This module supports :ref:`TensorFloat32`. + + * :attr:`stride` controls the stride for the cross-correlation, a single + number or a tuple. +@@ -39,7 +37,7 @@ + concatenated. + * At groups= :attr:`in_channels`, each input channel is convolved with + its own set of filters, of size: +- :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`. ++ :math:`\left\lfloor\frac{C_\text{out}}{C_\text{in}}\right\rfloor`. + + The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be: + +@@ -47,14 +45,14 @@ + - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension, + and the second `int` for the width dimension + +- Note: ++ .. note:: + + Depending of the size of your kernel, several (of the last) + columns of the input might be lost, because it is a valid `cross-correlation`_, + and not a full `cross-correlation`_. + It is up to the user to add proper padding. + +- Note: ++ .. note:: + + When `groups == in_channels` and `out_channels == K * in_channels`, + where `K` is a positive integer, this operation is also termed in +@@ -64,29 +62,17 @@ + a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments + :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. + +- Note: +- In some circumstances when using the CUDA backend with CuDNN, this operator +- may select a nondeterministic algorithm to increase performance. If this is +- undesirable, you can try to make the operation deterministic (potentially at +- a performance cost) by setting ``torch.backends.cudnn.deterministic = +- True``. +- Please see the notes on :doc:`/notes/randomness` for background. +- ++ .. include:: cudnn_deterministic.rst + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 +- padding (int or tuple, optional): Zero-padding added to both sides of +- the input. Default: 0 +- padding_mode (string, optional): ``'zeros'``, ``'reflect'``, +- ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` ++ padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 +- groups (int, optional): Number of blocked connections from input +- channels to output channels. Default: 1 +- bias (bool, optional): If ``True``, adds a learnable bias to the +- output. Default: ``True`` ++ groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 ++ bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` + + Shape: + - Input: :math:`(N, C_{in}, H_{in}, W_{in})` +@@ -102,18 +88,16 @@ + + Attributes: + weight (Tensor): the learnable weights of the module of shape +- :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},` +- :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`. +- The values of these weights are sampled from +- :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where +- :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` +- bias (Tensor): the learnable bias of the module of shape +- (out_channels). If :attr:`bias` is ``True``, +- then the values of these weights are +- sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where +- :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` ++ (out_channels, in_channels, kernel_size[0], kernel_size[1]). ++ The values of these weights are sampled from ++ :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where ++ :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` ++ bias (Tensor): the learnable bias of the module of shape (out_channels). If :attr:`bias` is ``True``, ++ then the values of these weights are ++ sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where ++ :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` + +- Examples: ++ Examples:: + + >>> # With square kernels and equal stride + >>> m = nn.Conv2d(16, 33, 3, stride=2) +@@ -130,34 +114,18 @@ + .. _link: + https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md + """ +- def __init__( +- self, +- in_channels: int, +- out_channels: int, +- kernel_size: _size_2_t, +- stride: _size_2_t = 1, +- padding: _size_2_t = 0, +- dilation: _size_2_t = 1, +- groups: int = 1, +- bias: bool = True, +- padding_mode: str = 'zeros' # TODO: refine this type +- ): ++ def __init__(self, in_channels, out_channels, kernel_size, stride=1, ++ padding=0, dilation=1, groups=1, bias=True): + kernel_size = _pair(kernel_size) + stride = _pair(stride) + padding = _pair(padding) + dilation = _pair(dilation) + super(Conv2d, self).__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, +- False, _pair(0), groups, bias, padding_mode) ++ False, _pair(0), groups, bias) + +- def _conv_forward(self, input, weight): +- if self.padding_mode != 'zeros': +- return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode), +- weight, self.bias, self.stride, +- _pair(0), self.dilation, self.groups) +- return F.conv2d(input, weight, self.bias, self.stride, ++ @weak_script_method ++ def forward(self, input): ++ return F.conv2d(input, self.weight, self.bias, self.stride, + self.padding, self.dilation, self.groups) + +- def forward(self, input: Tensor) -> Tensor: +- return self._conv_forward(input, self.weight) +- \ No newline at end of file diff --git a/DataParallel.patch b/DataParallel.patch new file mode 100644 index 0000000..8fddc8e --- /dev/null +++ b/DataParallel.patch @@ -0,0 +1,97 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/parallel/data_parallel.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/parallel/data_parallel.py +@@ -10,16 +10,13 @@ + + The batch size should be larger than the number of GPUs used. + +- .. warning:: +- It is recommended to use :class:`~torch.nn.parallel.DistributedDataParallel`, +- instead of this class, to do multi-GPU training, even if there is only a single +- node. See: :ref:`cuda-nn-ddp-instead` and :ref:`ddp`. ++ See also: :ref:`cuda-nn-dataparallel-instead` + + Arbitrary positional and keyword inputs are allowed to be passed into +- DataParallel but some types are specially handled. tensors will be +- **scattered** on dim specified (default 0). tuple, list and dict types will +- be shallow copied. The other types will be shared among different threads +- and can be corrupted if written to in the model's forward pass. ++ DataParallel EXCEPT Tensors. All tensors will be scattered on dim ++ specified (default 0). Primitive types will be broadcasted, but all ++ other types will be a shallow copy and can be corrupted if written to in ++ the model's forward pass. + + The parallelized :attr:`module` must have its parameters and buffers on + ``device_ids[0]`` before running this :class:`~torch.nn.DataParallel` +@@ -27,9 +24,9 @@ + + .. warning:: + In each forward, :attr:`module` is **replicated** on each device, so any +- updates to the running module in ``forward`` will be lost. For example, ++ updates to the runing module in ``forward`` will be lost. For example, + if :attr:`module` has a counter attribute that is incremented in each +- ``forward``, it will always stay at the initial value because the update ++ ``forward``, it will always stay at the initial value becasue the update + is done on the replicas which are destroyed after ``forward``. However, + :class:`~torch.nn.DataParallel` guarantees that the replica on + ``device[0]`` will have its parameters and buffers sharing storage with +@@ -74,7 +71,7 @@ + Example:: + + >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) +- >>> output = net(input_var) # input_var can be on any device, including CPU ++ >>> output = net(input_var) + """ + + # TODO: update notes/cuda.rst when this class handles 8+ GPUs well +@@ -82,15 +79,13 @@ + def __init__(self, module, device_ids=None, output_device=None, dim=0): + super(DataParallel, self).__init__() + +- device_type = _get_available_device_type() +- if device_type is None: ++ if not torch.cuda.is_available(): + self.module = module + self.device_ids = [] + return + + if device_ids is None: +- device_ids = _get_all_device_indices() +- ++ device_ids = list(range(torch.cuda.device_count())) + if output_device is None: + output_device = device_ids[0] + +@@ -98,23 +93,15 @@ + self.module = module + self.device_ids = list(map(lambda x: _get_device_index(x, True), device_ids)) + self.output_device = _get_device_index(output_device, True) +- self.src_device_obj = torch.device(device_type, self.device_ids[0]) + + _check_balance(self.device_ids) + + if len(self.device_ids) == 1: +- self.module.to(self.src_device_obj) ++ self.module.cuda(device_ids[0]) + + def forward(self, *inputs, **kwargs): + if not self.device_ids: + return self.module(*inputs, **kwargs) +- +- for t in chain(self.module.parameters(), self.module.buffers()): +- if t.device != self.src_device_obj: +- raise RuntimeError("module must have its parameters and buffers " +- "on device {} (device_ids[0]) but found one of " +- "them on device: {}".format(self.src_device_obj, t.device)) +- + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + return self.module(*inputs[0], **kwargs[0]) +@@ -123,7 +110,7 @@ + return self.gather(outputs, self.output_device) + + def replicate(self, module, device_ids): +- return replicate(module, device_ids, not torch.is_grad_enabled()) ++ return replicate(module, device_ids) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) \ No newline at end of file diff --git a/Dropout.patch b/Dropout.patch new file mode 100644 index 0000000..e4345bf --- /dev/null +++ b/Dropout.patch @@ -0,0 +1,22 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/dropout.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/dropout.py +@@ -18,8 +18,8 @@ + inplace: If set to ``True``, will do this operation in-place. Default: ``False`` + + Shape: +- - Input: :math:`(*)`. Input can be of any shape +- - Output: :math:`(*)`. Output is of the same shape as input ++ - Input: `Any`. Input can be of any shape ++ - Output: `Same`. Output is of the same shape as input + + Examples:: + +@@ -31,6 +31,7 @@ + detectors: https://arxiv.org/abs/1207.0580 + """ + +- def forward(self, input: Tensor) -> Tensor: ++ @weak_script_method ++ def forward(self, input): + return F.dropout(input, self.p, self.training, self.inplace) + \ No newline at end of file diff --git a/Linear.patch b/Linear.patch new file mode 100644 index 0000000..ef25bbe --- /dev/null +++ b/Linear.patch @@ -0,0 +1,64 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/linear.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/linear.py +@@ -1,19 +1,17 @@ + class Linear(Module): + r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b` +- +- This module supports :ref:`TensorFloat32`. + + Args: + in_features: size of each input sample + out_features: size of each output sample +- bias: If set to ``False``, the layer will not learn an additive bias. ++ bias: If set to False, the layer will not learn an additive bias. + Default: ``True`` + + Shape: +- - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of +- additional dimensions and :math:`H_{in} = \text{in\_features}` +- - Output: :math:`(N, *, H_{out})` where all but the last dimension +- are the same shape as the input and :math:`H_{out} = \text{out\_features}`. ++ - Input: :math:`(N, *, \text{in\_features})` where :math:`*` means any number of ++ additional dimensions ++ - Output: :math:`(N, *, \text{out\_features})` where all but the last dimension ++ are the same shape as the input. + + Attributes: + weight: the learnable weights of the module of shape +@@ -33,12 +31,9 @@ + >>> print(output.size()) + torch.Size([128, 30]) + """ +- __constants__ = ['in_features', 'out_features'] +- in_features: int +- out_features: int +- weight: Tensor ++ __constants__ = ['bias'] + +- def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None: ++ def __init__(self, in_features, out_features, bias=True): + super(Linear, self).__init__() + self.in_features = in_features + self.out_features = out_features +@@ -49,17 +44,18 @@ + self.register_parameter('bias', None) + self.reset_parameters() + +- def reset_parameters(self) -> None: ++ def reset_parameters(self): + init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + if self.bias is not None: + fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) + bound = 1 / math.sqrt(fan_in) + init.uniform_(self.bias, -bound, bound) + +- def forward(self, input: Tensor) -> Tensor: ++ @weak_script_method ++ def forward(self, input): + return F.linear(input, self.weight, self.bias) + +- def extra_repr(self) -> str: ++ def extra_repr(self): + return 'in_features={}, out_features={}, bias={}'.format( + self.in_features, self.out_features, self.bias is not None + ) \ No newline at end of file diff --git a/MaxPool2d.patch b/MaxPool2d.patch new file mode 100644 index 0000000..5a991b0 --- /dev/null +++ b/MaxPool2d.patch @@ -0,0 +1,17 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/pooling.py +@@ -57,12 +57,8 @@ + https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md + """ + +- kernel_size: _size_2_t +- stride: _size_2_t +- padding: _size_2_t +- dilation: _size_2_t +- +- def forward(self, input: Tensor) -> Tensor: ++ @weak_script_method ++ def forward(self, input): + return F.max_pool2d(input, self.kernel_size, self.stride, + self.padding, self.dilation, self.ceil_mode, + self.return_indices) \ No newline at end of file diff --git a/PReLU.patch b/PReLU.patch new file mode 100644 index 0000000..d74cce1 --- /dev/null +++ b/PReLU.patch @@ -0,0 +1,37 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py +@@ -37,9 +37,10 @@ + - Output: :math:`(N, *)`, same shape as the input + + Attributes: +- weight (Tensor): the learnable weights of shape (:attr:`num_parameters`). ++ weight (Tensor): the learnable weights of shape (attr:`num_parameters`). ++ The attr:`dtype` is default to + +- .. image:: ../scripts/activation_images/PReLU.png ++ .. image:: scripts/activation_images/PReLU.png + + Examples:: + +@@ -47,17 +48,16 @@ + >>> input = torch.randn(2) + >>> output = m(input) + """ +- __constants__ = ['num_parameters'] +- num_parameters: int + +- def __init__(self, num_parameters: int = 1, init: float = 0.25) -> None: ++ def __init__(self, num_parameters=1, init=0.25): + self.num_parameters = num_parameters + super(PReLU, self).__init__() + self.weight = Parameter(torch.Tensor(num_parameters).fill_(init)) + +- def forward(self, input: Tensor) -> Tensor: ++ @weak_script_method ++ def forward(self, input): + return F.prelu(input, self.weight) + +- def extra_repr(self) -> str: ++ def extra_repr(self): + return 'num_parameters={}'.format(self.num_parameters) + \ No newline at end of file diff --git a/README.md b/README.md index d714539..639fb56 100644 --- a/README.md +++ b/README.md @@ -32,12 +32,19 @@ Currently, only the test code is available, and training scripts are coming soon ## Usage ### To test the pretrained model ``` -python test_one_image.py --isTrain false --name people --Arc_path models/BEST_checkpoint.tar --pic_a_path crop_224/mars.jpg --pic_b_path crop_224/ds.jpg --output_path output/ +python test_one_image.py --isTrain false --name people --Arc_path arcface_model/arcface_checkpoint.tar --pic_a_path crop_224/6.jpg --pic_b_path crop_224/ds.jpg --output_path output/ ``` ---name refers to the checkpoint name. +--name refers to the SimSwap training logs name. ## Pretrained model + +### Usage +There are two archive files in the drive: **checkpoints.zip** and **arcface_checkpoint.tar** + +- **Copy the arcface_checkpoint.tar into ./arcface_model** +- **Unzip checkpoints.zip, place it in the root dir ./** + [[Google Drive]](https://dl.acm.org/doi/10.1145/3394171.3413630) [[Baidu Drive]](https://pan.baidu.com/s/1wFV11RVZMHqd-ky4YpLdcA) Password: ```jd2v``` diff --git a/Sequential.patch b/Sequential.patch new file mode 100644 index 0000000..6c7f6ac --- /dev/null +++ b/Sequential.patch @@ -0,0 +1,70 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/container.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/container.py +@@ -22,15 +22,7 @@ + ])) + """ + +- @overload +- def __init__(self, *args: Module) -> None: +- ... +- +- @overload +- def __init__(self, arg: 'OrderedDict[str, Module]') -> None: +- ... +- +- def __init__(self, *args: Any): ++ def __init__(self, *args): + super(Sequential, self).__init__() + if len(args) == 1 and isinstance(args[0], OrderedDict): + for key, module in args[0].items(): +@@ -48,18 +40,17 @@ + idx %= size + return next(islice(iterator, idx, None)) + +- @_copy_to_script_wrapper +- def __getitem__(self: T, idx) -> T: ++ def __getitem__(self, idx): + if isinstance(idx, slice): + return self.__class__(OrderedDict(list(self._modules.items())[idx])) + else: + return self._get_item_by_idx(self._modules.values(), idx) + +- def __setitem__(self, idx: int, module: Module) -> None: ++ def __setitem__(self, idx, module): + key = self._get_item_by_idx(self._modules.keys(), idx) + return setattr(self, key, module) + +- def __delitem__(self, idx: Union[slice, int]) -> None: ++ def __delitem__(self, idx): + if isinstance(idx, slice): + for key in list(self._modules.keys())[idx]: + delattr(self, key) +@@ -67,26 +58,16 @@ + key = self._get_item_by_idx(self._modules.keys(), idx) + delattr(self, key) + +- @_copy_to_script_wrapper +- def __len__(self) -> int: ++ def __len__(self): + return len(self._modules) + +- @_copy_to_script_wrapper + def __dir__(self): + keys = super(Sequential, self).__dir__() + keys = [key for key in keys if not key.isdigit()] + return keys + +- @_copy_to_script_wrapper +- def __iter__(self) -> Iterator[Module]: +- return iter(self._modules.values()) +- +- # NB: We can't really type check this function as the type of input +- # may change dynamically (as is tested in +- # TestScript.test_sequential_intermediary_types). Cannot annotate +- # with Any as TorchScript expects a more precise type + def forward(self, input): +- for module in self: ++ for module in self._modules.values(): + input = module(input) + return input + \ No newline at end of file diff --git a/Sigmoid.patch b/Sigmoid.patch new file mode 100644 index 0000000..9ad9766 --- /dev/null +++ b/Sigmoid.patch @@ -0,0 +1,29 @@ +--- /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py ++++ /usr/local/lib/python3.5/dist-packages/torch/nn/modules/activation.py +@@ -2,7 +2,7 @@ + r"""Applies the element-wise function: + + .. math:: +- \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)} ++ \text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)} + + + Shape: +@@ -10,7 +10,7 @@ + dimensions + - Output: :math:`(N, *)`, same shape as the input + +- .. image:: ../scripts/activation_images/Sigmoid.png ++ .. image:: scripts/activation_images/Sigmoid.png + + Examples:: + +@@ -19,6 +19,7 @@ + >>> output = m(input) + """ + +- def forward(self, input: Tensor) -> Tensor: ++ @weak_script_method ++ def forward(self, input): + return torch.sigmoid(input) + \ No newline at end of file diff --git a/crop_224/1_source.jpg b/crop_224/1_source.jpg new file mode 100644 index 0000000..de06097 Binary files /dev/null and b/crop_224/1_source.jpg differ diff --git a/crop_224/2.jpg b/crop_224/2.jpg new file mode 100644 index 0000000..1a5fbf1 Binary files /dev/null and b/crop_224/2.jpg differ diff --git a/crop_224/6.jpg b/crop_224/6.jpg new file mode 100644 index 0000000..958c554 Binary files /dev/null and b/crop_224/6.jpg differ diff --git a/crop_224/cage.jpg b/crop_224/cage.jpg new file mode 100644 index 0000000..5ee0f57 Binary files /dev/null and b/crop_224/cage.jpg differ diff --git a/crop_224/dnl.jpg b/crop_224/dnl.jpg new file mode 100644 index 0000000..e502d08 Binary files /dev/null and b/crop_224/dnl.jpg differ diff --git a/crop_224/ds.jpg b/crop_224/ds.jpg new file mode 100644 index 0000000..ba3b965 Binary files /dev/null and b/crop_224/ds.jpg differ diff --git a/crop_224/gdg.jpg b/crop_224/gdg.jpg new file mode 100644 index 0000000..8e15a7b Binary files /dev/null and b/crop_224/gdg.jpg differ diff --git a/crop_224/gy.jpg b/crop_224/gy.jpg new file mode 100644 index 0000000..0be6964 Binary files /dev/null and b/crop_224/gy.jpg differ diff --git a/crop_224/hzc.jpg b/crop_224/hzc.jpg new file mode 100644 index 0000000..b76e509 Binary files /dev/null and b/crop_224/hzc.jpg differ diff --git a/crop_224/hzxc.jpg b/crop_224/hzxc.jpg new file mode 100644 index 0000000..abc5581 Binary files /dev/null and b/crop_224/hzxc.jpg differ diff --git a/crop_224/james.jpg b/crop_224/james.jpg new file mode 100644 index 0000000..a36e305 Binary files /dev/null and b/crop_224/james.jpg differ diff --git a/crop_224/jl.jpg b/crop_224/jl.jpg new file mode 100644 index 0000000..dbe942f Binary files /dev/null and b/crop_224/jl.jpg differ diff --git a/crop_224/lcw.jpg b/crop_224/lcw.jpg new file mode 100644 index 0000000..30955c1 Binary files /dev/null and b/crop_224/lcw.jpg differ diff --git a/crop_224/ljm.jpg b/crop_224/ljm.jpg new file mode 100644 index 0000000..d452733 Binary files /dev/null and b/crop_224/ljm.jpg differ diff --git a/crop_224/ljm2.jpg b/crop_224/ljm2.jpg new file mode 100644 index 0000000..cdab34d Binary files /dev/null and b/crop_224/ljm2.jpg differ diff --git a/crop_224/ljm3.jpg b/crop_224/ljm3.jpg new file mode 100644 index 0000000..e977708 Binary files /dev/null and b/crop_224/ljm3.jpg differ diff --git a/crop_224/mars2.jpg b/crop_224/mars2.jpg new file mode 100644 index 0000000..19088a5 Binary files /dev/null and b/crop_224/mars2.jpg differ diff --git a/crop_224/mouth_open.jpg b/crop_224/mouth_open.jpg new file mode 100644 index 0000000..241eac1 Binary files /dev/null and b/crop_224/mouth_open.jpg differ diff --git a/crop_224/mtdm.jpg b/crop_224/mtdm.jpg new file mode 100644 index 0000000..dca6be5 Binary files /dev/null and b/crop_224/mtdm.jpg differ diff --git a/crop_224/trump.jpg b/crop_224/trump.jpg new file mode 100644 index 0000000..442fe0e Binary files /dev/null and b/crop_224/trump.jpg differ diff --git a/crop_224/wlh.jpg b/crop_224/wlh.jpg new file mode 100644 index 0000000..a22ab0b Binary files /dev/null and b/crop_224/wlh.jpg differ diff --git a/crop_224/zjl.jpg b/crop_224/zjl.jpg new file mode 100644 index 0000000..e3719dc Binary files /dev/null and b/crop_224/zjl.jpg differ diff --git a/crop_224/zrf.jpg b/crop_224/zrf.jpg new file mode 100644 index 0000000..0166ae1 Binary files /dev/null and b/crop_224/zrf.jpg differ diff --git a/crop_224/zxy.jpg b/crop_224/zxy.jpg new file mode 100644 index 0000000..c696347 Binary files /dev/null and b/crop_224/zxy.jpg differ diff --git a/output/result.jpg b/output/result.jpg new file mode 100644 index 0000000..edd5322 Binary files /dev/null and b/output/result.jpg differ diff --git a/train.py b/train.py deleted file mode 100644 index 691d8c4..0000000 --- a/train.py +++ /dev/null @@ -1,148 +0,0 @@ -import time -import os -import numpy as np -import torch -from torch.autograd import Variable -from collections import OrderedDict -from subprocess import call -import fractions -from options.train_options import TrainOptions -from data.data_loader import CreateDataLoader -from data.dataset_class import FaceDataSet -from torch.utils.data import DataLoader -from models.models import create_model -import util.util as util -from util.visualizer import Visualizer -import cv2 -from torchvision import transforms - -def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 - - -detransformer = transforms.Compose([ - transforms.Normalize([0, 0, 0], [1/0.229, 1/0.224, 1/0.225]), - transforms.Normalize([-0.485, -0.456, -0.406], [1, 1, 1]) - ]) - -opt = TrainOptions().parse() -iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt') - -if opt.continue_train: - try: - start_epoch, epoch_iter = np.loadtxt(iter_path , delimiter=',', dtype=int) - except: - start_epoch, epoch_iter = 1, 0 - print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter)) -else: - start_epoch, epoch_iter = 1, 0 - -opt.print_freq = lcm(opt.print_freq, opt.batchSize) -if opt.debug: - opt.display_freq = 1 - opt.print_freq = 1 - opt.niter = 1 - opt.niter_decay = 0 - opt.max_dataset_size = 10 - - -dataset = FaceDataSet('people_list.txt', opt.batchSize) -data_loader = DataLoader(dataset, batch_size = opt.batchSize, shuffle=True) -dataset_size = len(data_loader) - -device = torch.device("cuda:0") - - -model = create_model(opt) -visualizer = Visualizer(opt) - -optimizer_G, optimizer_D = model.module.optimizer_G, model.module.optimizer_D - -total_steps = (start_epoch-1) * 8608 + epoch_iter - -display_delta = total_steps % opt.display_freq -print_delta = total_steps % opt.print_freq -save_delta = total_steps % opt.save_latest_freq - -loss_avg = 0 -refresh_count = 0 - -for epoch in range(start_epoch, opt.niter + opt.niter_decay + 1): - epoch_start_time = time.time() - if epoch != start_epoch: - epoch_iter = epoch_iter % dataset_size - for i, (img_id, img_att, latent_id, latent_att, data_type) in enumerate(data_loader): - if total_steps % opt.print_freq == print_delta: - iter_start_time = time.time() - total_steps += opt.batchSize - epoch_iter += opt.batchSize - - # convert numpy to tensor - img_id = img_id.to(device) - img_att = img_att.to(device) - latent_id = latent_id.to(device) - latent_att = latent_att.to(device) - - - # whether to collect output images - save_fake = total_steps % opt.display_freq == display_delta - - ############## Forward Pass ###################### - - losses, img_fake = model(img_id, img_att, latent_id, latent_att, for_G=True) - - # update Generator weights - losses = [ torch.mean(x) if not isinstance(x, int) else x for x in losses ] - loss_dict = dict(zip(model.module.loss_names, losses)) - - loss_G = loss_dict['G_GAN'] + loss_dict.get('G_GAN_Feat', 0) + loss_dict['G_ID'] * opt.lambda_id - if data_type[0] == 0: - loss_G += loss_dict['G_Rec'] - - optimizer_G.zero_grad() - loss_G.backward(retain_graph=True) - optimizer_G.step() - - loss_D = (loss_dict['D_fake'] + loss_dict['D_real']) * 0.5 + loss_dict['D_GP'] - optimizer_D.zero_grad() - loss_D.backward() - optimizer_D.step() - - ############## Display results and errors ########## - ### print out errors - if total_steps % opt.print_freq == print_delta: - errors = {k: v.data.item() if not isinstance(v, int) else v for k, v in loss_dict.items()} - t = (time.time() - iter_start_time) / opt.print_freq - visualizer.print_current_errors(epoch, epoch_iter, errors, t) - visualizer.plot_current_errors(errors, total_steps) - - ### display output images - if save_fake: - '''visuals = OrderedDict([('input_label', util.tensor2label(data['label'][0], opt.label_nc)), - ('synthesized_image', util.tensor2im(generated.data[0])), - ('real_image', util.tensor2im(data['image'][0]))])''' - for i in range(img_id.shape[0]): - if i == 0: - row1 = img_id[i] - row2 = img_att[i] - row3 = img_fake[i] - else: - row1 = torch.cat([row1, img_id[i]], dim=2) - row2 = torch.cat([row2, img_att[i]], dim=2) - row3 = torch.cat([row3, img_fake[i]], dim=2) - full = torch.cat([row1, row2, row3], dim=1).detach() - full = full.permute(1, 2, 0) - output = full.to('cpu') - output = np.array(output)*255 - output = output[..., ::-1] - cv2.imwrite('samples/step_'+str(total_steps)+'.jpg', output) - - ### save latest model - if total_steps % opt.save_latest_freq == save_delta: - print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps)) - model.module.save('latest') - np.savetxt(iter_path, (epoch, epoch_iter), delimiter=',', fmt='%d') - - # end of epoch - iter_end_time = time.time() - print('End of epoch %d / %d \t Time Taken: %d sec' % - (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time)) \ No newline at end of file