From dc8b8bca5a71f08b7a5ac23a98857da9bd9d4e67 Mon Sep 17 00:00:00 2001 From: Sam Khoze <68170403+SamKhoze@users.noreply.github.com> Date: Tue, 18 Jun 2024 13:21:08 -0700 Subject: [PATCH] Add files via upload --- TTS/vc/configs/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 176 bytes TTS/vc/configs/freevc_config.py | 278 +++++++ TTS/vc/configs/shared_configs.py | 155 ++++ TTS/vc/models/__init__.py | 17 + .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 1598 bytes TTS/vc/models/base_vc.py | 429 ++++++++++ TTS/vc/models/freevc.py | 562 +++++++++++++ TTS/vc/modules/__init__.py | 0 TTS/vc/modules/freevc/__init__.py | 0 TTS/vc/modules/freevc/commons.py | 164 ++++ TTS/vc/modules/freevc/mel_processing.py | 125 +++ TTS/vc/modules/freevc/modules.py | 387 +++++++++ .../freevc/speaker_encoder/__init__.py | 0 .../modules/freevc/speaker_encoder/audio.py | 65 ++ .../modules/freevc/speaker_encoder/hparams.py | 31 + .../freevc/speaker_encoder/speaker_encoder.py | 175 ++++ TTS/vc/modules/freevc/wavlm/__init__.py | 35 + TTS/vc/modules/freevc/wavlm/config.json | 99 +++ TTS/vc/modules/freevc/wavlm/modules.py | 768 ++++++++++++++++++ TTS/vc/modules/freevc/wavlm/wavlm.py | 719 ++++++++++++++++ TTS/vocoder/README.md | 39 + TTS/vocoder/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 173 bytes TTS/vocoder/configs/__init__.py | 17 + .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 1371 bytes .../fullband_melgan_config.cpython-311.pyc | Bin 0 -> 6538 bytes .../hifigan_config.cpython-311.pyc | Bin 0 -> 6975 bytes .../__pycache__/melgan_config.cpython-311.pyc | Bin 0 -> 6465 bytes .../multiband_melgan_config.cpython-311.pyc | Bin 0 -> 9352 bytes .../parallel_wavegan_config.cpython-311.pyc | Bin 0 -> 8891 bytes .../shared_configs.cpython-311.pyc | Bin 0 -> 9967 bytes .../univnet_config.cpython-311.pyc | Bin 0 -> 8206 bytes .../wavegrad_config.cpython-311.pyc | Bin 0 -> 4805 bytes .../wavernn_config.cpython-311.pyc | Bin 0 -> 5431 bytes TTS/vocoder/configs/fullband_melgan_config.py | 106 +++ TTS/vocoder/configs/hifigan_config.py | 136 ++++ TTS/vocoder/configs/melgan_config.py | 106 +++ .../configs/multiband_melgan_config.py | 144 ++++ .../configs/parallel_wavegan_config.py | 134 +++ TTS/vocoder/configs/shared_configs.py | 182 +++++ TTS/vocoder/configs/univnet_config.py | 161 ++++ TTS/vocoder/configs/wavegrad_config.py | 90 ++ TTS/vocoder/configs/wavernn_config.py | 102 +++ TTS/vocoder/datasets/__init__.py | 58 ++ .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 2614 bytes .../__pycache__/gan_dataset.cpython-311.pyc | Bin 0 -> 8249 bytes .../__pycache__/preprocess.cpython-311.pyc | Bin 0 -> 5342 bytes .../wavegrad_dataset.cpython-311.pyc | Bin 0 -> 8414 bytes .../wavernn_dataset.cpython-311.pyc | Bin 0 -> 8458 bytes TTS/vocoder/datasets/gan_dataset.py | 152 ++++ TTS/vocoder/datasets/preprocess.py | 75 ++ TTS/vocoder/datasets/wavegrad_dataset.py | 151 ++++ TTS/vocoder/datasets/wavernn_dataset.py | 118 +++ TTS/vocoder/layers/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 180 bytes .../layers/__pycache__/losses.cpython-311.pyc | Bin 0 -> 19347 bytes .../__pycache__/wavegrad.cpython-311.pyc | Bin 0 -> 12964 bytes TTS/vocoder/layers/hifigan.py | 56 ++ TTS/vocoder/layers/losses.py | 368 +++++++++ TTS/vocoder/layers/lvc_block.py | 198 +++++ TTS/vocoder/layers/melgan.py | 43 + TTS/vocoder/layers/parallel_wavegan.py | 77 ++ TTS/vocoder/layers/pqmf.py | 53 ++ TTS/vocoder/layers/qmf.dat | 640 +++++++++++++++ TTS/vocoder/layers/upsample.py | 102 +++ TTS/vocoder/layers/wavegrad.py | 166 ++++ TTS/vocoder/models/__init__.py | 154 ++++ .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 7655 bytes .../__pycache__/base_vocoder.cpython-311.pyc | Bin 0 -> 2657 bytes .../hifigan_discriminator.cpython-311.pyc | Bin 0 -> 11892 bytes .../hifigan_generator.cpython-311.pyc | Bin 0 -> 13759 bytes .../__pycache__/wavegrad.cpython-311.pyc | Bin 0 -> 22882 bytes .../__pycache__/wavernn.cpython-311.pyc | Bin 0 -> 40105 bytes TTS/vocoder/models/base_vocoder.py | 55 ++ .../models/fullband_melgan_generator.py | 33 + TTS/vocoder/models/gan.py | 374 +++++++++ TTS/vocoder/models/hifigan_discriminator.py | 217 +++++ TTS/vocoder/models/hifigan_generator.py | 301 +++++++ TTS/vocoder/models/melgan_discriminator.py | 84 ++ TTS/vocoder/models/melgan_generator.py | 95 +++ .../models/melgan_multiscale_discriminator.py | 50 ++ .../models/multiband_melgan_generator.py | 41 + .../models/parallel_wavegan_discriminator.py | 187 +++++ .../models/parallel_wavegan_generator.py | 164 ++++ .../models/random_window_discriminator.py | 203 +++++ TTS/vocoder/models/univnet_discriminator.py | 95 +++ TTS/vocoder/models/univnet_generator.py | 157 ++++ TTS/vocoder/models/wavegrad.py | 345 ++++++++ TTS/vocoder/models/wavernn.py | 646 +++++++++++++++ TTS/vocoder/pqmf_output.wav | Bin 0 -> 83812 bytes TTS/vocoder/utils/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 179 bytes .../__pycache__/distribution.cpython-311.pyc | Bin 0 -> 8671 bytes .../__pycache__/generic_utils.cpython-311.pyc | Bin 0 -> 4051 bytes TTS/vocoder/utils/distribution.py | 154 ++++ TTS/vocoder/utils/generic_utils.py | 72 ++ 97 files changed, 10910 insertions(+) create mode 100644 TTS/vc/configs/__init__.py create mode 100644 TTS/vc/configs/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/vc/configs/freevc_config.py create mode 100644 TTS/vc/configs/shared_configs.py create mode 100644 TTS/vc/models/__init__.py create mode 100644 TTS/vc/models/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/vc/models/base_vc.py create mode 100644 TTS/vc/models/freevc.py create mode 100644 TTS/vc/modules/__init__.py create mode 100644 TTS/vc/modules/freevc/__init__.py create mode 100644 TTS/vc/modules/freevc/commons.py create mode 100644 TTS/vc/modules/freevc/mel_processing.py create mode 100644 TTS/vc/modules/freevc/modules.py create mode 100644 TTS/vc/modules/freevc/speaker_encoder/__init__.py create mode 100644 TTS/vc/modules/freevc/speaker_encoder/audio.py create mode 100644 TTS/vc/modules/freevc/speaker_encoder/hparams.py create mode 100644 TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py create mode 100644 TTS/vc/modules/freevc/wavlm/__init__.py create mode 100644 TTS/vc/modules/freevc/wavlm/config.json create mode 100644 TTS/vc/modules/freevc/wavlm/modules.py create mode 100644 TTS/vc/modules/freevc/wavlm/wavlm.py create mode 100644 TTS/vocoder/README.md create mode 100644 TTS/vocoder/__init__.py create mode 100644 TTS/vocoder/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/vocoder/configs/__init__.py create mode 100644 TTS/vocoder/configs/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/vocoder/configs/__pycache__/fullband_melgan_config.cpython-311.pyc create mode 100644 TTS/vocoder/configs/__pycache__/hifigan_config.cpython-311.pyc create mode 100644 TTS/vocoder/configs/__pycache__/melgan_config.cpython-311.pyc create mode 100644 TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-311.pyc create mode 100644 TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-311.pyc create mode 100644 TTS/vocoder/configs/__pycache__/shared_configs.cpython-311.pyc create mode 100644 TTS/vocoder/configs/__pycache__/univnet_config.cpython-311.pyc create mode 100644 TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-311.pyc create mode 100644 TTS/vocoder/configs/__pycache__/wavernn_config.cpython-311.pyc create mode 100644 TTS/vocoder/configs/fullband_melgan_config.py create mode 100644 TTS/vocoder/configs/hifigan_config.py create mode 100644 TTS/vocoder/configs/melgan_config.py create mode 100644 TTS/vocoder/configs/multiband_melgan_config.py create mode 100644 TTS/vocoder/configs/parallel_wavegan_config.py create mode 100644 TTS/vocoder/configs/shared_configs.py create mode 100644 TTS/vocoder/configs/univnet_config.py create mode 100644 TTS/vocoder/configs/wavegrad_config.py create mode 100644 TTS/vocoder/configs/wavernn_config.py create mode 100644 TTS/vocoder/datasets/__init__.py create mode 100644 TTS/vocoder/datasets/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-311.pyc create mode 100644 TTS/vocoder/datasets/__pycache__/preprocess.cpython-311.pyc create mode 100644 TTS/vocoder/datasets/__pycache__/wavegrad_dataset.cpython-311.pyc create mode 100644 TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-311.pyc create mode 100644 TTS/vocoder/datasets/gan_dataset.py create mode 100644 TTS/vocoder/datasets/preprocess.py create mode 100644 TTS/vocoder/datasets/wavegrad_dataset.py create mode 100644 TTS/vocoder/datasets/wavernn_dataset.py create mode 100644 TTS/vocoder/layers/__init__.py create mode 100644 TTS/vocoder/layers/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/vocoder/layers/__pycache__/losses.cpython-311.pyc create mode 100644 TTS/vocoder/layers/__pycache__/wavegrad.cpython-311.pyc create mode 100644 TTS/vocoder/layers/hifigan.py create mode 100644 TTS/vocoder/layers/losses.py create mode 100644 TTS/vocoder/layers/lvc_block.py create mode 100644 TTS/vocoder/layers/melgan.py create mode 100644 TTS/vocoder/layers/parallel_wavegan.py create mode 100644 TTS/vocoder/layers/pqmf.py create mode 100644 TTS/vocoder/layers/qmf.dat create mode 100644 TTS/vocoder/layers/upsample.py create mode 100644 TTS/vocoder/layers/wavegrad.py create mode 100644 TTS/vocoder/models/__init__.py create mode 100644 TTS/vocoder/models/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/vocoder/models/__pycache__/base_vocoder.cpython-311.pyc create mode 100644 TTS/vocoder/models/__pycache__/hifigan_discriminator.cpython-311.pyc create mode 100644 TTS/vocoder/models/__pycache__/hifigan_generator.cpython-311.pyc create mode 100644 TTS/vocoder/models/__pycache__/wavegrad.cpython-311.pyc create mode 100644 TTS/vocoder/models/__pycache__/wavernn.cpython-311.pyc create mode 100644 TTS/vocoder/models/base_vocoder.py create mode 100644 TTS/vocoder/models/fullband_melgan_generator.py create mode 100644 TTS/vocoder/models/gan.py create mode 100644 TTS/vocoder/models/hifigan_discriminator.py create mode 100644 TTS/vocoder/models/hifigan_generator.py create mode 100644 TTS/vocoder/models/melgan_discriminator.py create mode 100644 TTS/vocoder/models/melgan_generator.py create mode 100644 TTS/vocoder/models/melgan_multiscale_discriminator.py create mode 100644 TTS/vocoder/models/multiband_melgan_generator.py create mode 100644 TTS/vocoder/models/parallel_wavegan_discriminator.py create mode 100644 TTS/vocoder/models/parallel_wavegan_generator.py create mode 100644 TTS/vocoder/models/random_window_discriminator.py create mode 100644 TTS/vocoder/models/univnet_discriminator.py create mode 100644 TTS/vocoder/models/univnet_generator.py create mode 100644 TTS/vocoder/models/wavegrad.py create mode 100644 TTS/vocoder/models/wavernn.py create mode 100644 TTS/vocoder/pqmf_output.wav create mode 100644 TTS/vocoder/utils/__init__.py create mode 100644 TTS/vocoder/utils/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/vocoder/utils/__pycache__/distribution.cpython-311.pyc create mode 100644 TTS/vocoder/utils/__pycache__/generic_utils.cpython-311.pyc create mode 100644 TTS/vocoder/utils/distribution.py create mode 100644 TTS/vocoder/utils/generic_utils.py diff --git a/TTS/vc/configs/__init__.py b/TTS/vc/configs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/vc/configs/__pycache__/__init__.cpython-311.pyc b/TTS/vc/configs/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..84205b7928892b3edea09b36196fe3da9abbeea7 GIT binary patch literal 176 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%UM6PIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`*JSwA^HFD)~@SU)~K uGcU6wK3=b&@)w6qZhlH>PO4oIE6_}kt;PI6;sY}yBjX1K7*WIw6axUBohqRK literal 0 HcmV?d00001 diff --git a/TTS/vc/configs/freevc_config.py b/TTS/vc/configs/freevc_config.py new file mode 100644 index 0000000..207181b --- /dev/null +++ b/TTS/vc/configs/freevc_config.py @@ -0,0 +1,278 @@ +from dataclasses import dataclass, field +from typing import List, Optional + +from coqpit import Coqpit + +from TTS.vc.configs.shared_configs import BaseVCConfig + + +@dataclass +class FreeVCAudioConfig(Coqpit): + """Audio configuration + + Args: + max_wav_value (float): + The maximum value of the waveform. + + input_sample_rate (int): + The sampling rate of the input waveform. + + output_sample_rate (int): + The sampling rate of the output waveform. + + filter_length (int): + The length of the filter. + + hop_length (int): + The hop length. + + win_length (int): + The window length. + + n_mel_channels (int): + The number of mel channels. + + mel_fmin (float): + The minimum frequency of the mel filterbank. + + mel_fmax (Optional[float]): + The maximum frequency of the mel filterbank. + """ + + max_wav_value: float = field(default=32768.0) + input_sample_rate: int = field(default=16000) + output_sample_rate: int = field(default=24000) + filter_length: int = field(default=1280) + hop_length: int = field(default=320) + win_length: int = field(default=1280) + n_mel_channels: int = field(default=80) + mel_fmin: float = field(default=0.0) + mel_fmax: Optional[float] = field(default=None) + + +@dataclass +class FreeVCArgs(Coqpit): + """FreeVC model arguments + + Args: + spec_channels (int): + The number of channels in the spectrogram. + + inter_channels (int): + The number of channels in the intermediate layers. + + hidden_channels (int): + The number of channels in the hidden layers. + + filter_channels (int): + The number of channels in the filter layers. + + n_heads (int): + The number of attention heads. + + n_layers (int): + The number of layers. + + kernel_size (int): + The size of the kernel. + + p_dropout (float): + The dropout probability. + + resblock (str): + The type of residual block. + + resblock_kernel_sizes (List[int]): + The kernel sizes for the residual blocks. + + resblock_dilation_sizes (List[List[int]]): + The dilation sizes for the residual blocks. + + upsample_rates (List[int]): + The upsample rates. + + upsample_initial_channel (int): + The number of channels in the initial upsample layer. + + upsample_kernel_sizes (List[int]): + The kernel sizes for the upsample layers. + + n_layers_q (int): + The number of layers in the quantization network. + + use_spectral_norm (bool): + Whether to use spectral normalization. + + gin_channels (int): + The number of channels in the global conditioning vector. + + ssl_dim (int): + The dimension of the self-supervised learning embedding. + + use_spk (bool): + Whether to use external speaker encoder. + """ + + spec_channels: int = field(default=641) + inter_channels: int = field(default=192) + hidden_channels: int = field(default=192) + filter_channels: int = field(default=768) + n_heads: int = field(default=2) + n_layers: int = field(default=6) + kernel_size: int = field(default=3) + p_dropout: float = field(default=0.1) + resblock: str = field(default="1") + resblock_kernel_sizes: List[int] = field(default_factory=lambda: [3, 7, 11]) + resblock_dilation_sizes: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]) + upsample_rates: List[int] = field(default_factory=lambda: [10, 8, 2, 2]) + upsample_initial_channel: int = field(default=512) + upsample_kernel_sizes: List[int] = field(default_factory=lambda: [16, 16, 4, 4]) + n_layers_q: int = field(default=3) + use_spectral_norm: bool = field(default=False) + gin_channels: int = field(default=256) + ssl_dim: int = field(default=1024) + use_spk: bool = field(default=False) + num_spks: int = field(default=0) + segment_size: int = field(default=8960) + + +@dataclass +class FreeVCConfig(BaseVCConfig): + """Defines parameters for FreeVC End2End TTS model. + + Args: + model (str): + Model name. Do not change unless you know what you are doing. + + model_args (FreeVCArgs): + Model architecture arguments. Defaults to `FreeVCArgs()`. + + audio (FreeVCAudioConfig): + Audio processing configuration. Defaults to `FreeVCAudioConfig()`. + + grad_clip (List): + Gradient clipping thresholds for each optimizer. Defaults to `[1000.0, 1000.0]`. + + lr_gen (float): + Initial learning rate for the generator. Defaults to 0.0002. + + lr_disc (float): + Initial learning rate for the discriminator. Defaults to 0.0002. + + lr_scheduler_gen (str): + Name of the learning rate scheduler for the generator. One of the `torch.optim.lr_scheduler.*`. Defaults to + `ExponentialLR`. + + lr_scheduler_gen_params (dict): + Parameters for the learning rate scheduler of the generator. Defaults to `{'gamma': 0.999875, "last_epoch":-1}`. + + lr_scheduler_disc (str): + Name of the learning rate scheduler for the discriminator. One of the `torch.optim.lr_scheduler.*`. Defaults to + `ExponentialLR`. + + lr_scheduler_disc_params (dict): + Parameters for the learning rate scheduler of the discriminator. Defaults to `{'gamma': 0.999875, "last_epoch":-1}`. + + scheduler_after_epoch (bool): + If true, step the schedulers after each epoch else after each step. Defaults to `False`. + + optimizer (str): + Name of the optimizer to use with both the generator and the discriminator networks. One of the + `torch.optim.*`. Defaults to `AdamW`. + + kl_loss_alpha (float): + Loss weight for KL loss. Defaults to 1.0. + + disc_loss_alpha (float): + Loss weight for the discriminator loss. Defaults to 1.0. + + gen_loss_alpha (float): + Loss weight for the generator loss. Defaults to 1.0. + + feat_loss_alpha (float): + Loss weight for the feature matching loss. Defaults to 1.0. + + mel_loss_alpha (float): + Loss weight for the mel loss. Defaults to 45.0. + + return_wav (bool): + If true, data loader returns the waveform as well as the other outputs. Do not change. Defaults to `True`. + + compute_linear_spec (bool): + If true, the linear spectrogram is computed and returned alongside the mel output. Do not change. Defaults to `True`. + + use_weighted_sampler (bool): + If true, use weighted sampler with bucketing for balancing samples between datasets used in training. Defaults to `False`. + + weighted_sampler_attrs (dict): + Key retuned by the formatter to be used for weighted sampler. For example `{"root_path": 2.0, "speaker_name": 1.0}` sets sample probabilities + by overweighting `root_path` by 2.0. Defaults to `{}`. + + weighted_sampler_multipliers (dict): + Weight each unique value of a key returned by the formatter for weighted sampling. + For example `{"root_path":{"/raid/datasets/libritts-clean-16khz-bwe-coqui_44khz/LibriTTS/train-clean-100/":1.0, "/raid/datasets/libritts-clean-16khz-bwe-coqui_44khz/LibriTTS/train-clean-360/": 0.5}`. + It will sample instances from `train-clean-100` 2 times more than `train-clean-360`. Defaults to `{}`. + + r (int): + Number of spectrogram frames to be generated at a time. Do not change. Defaults to `1`. + + add_blank (bool): + If true, a blank token is added in between every character. Defaults to `True`. + + test_sentences (List[List]): + List of sentences with speaker and language information to be used for testing. + + language_ids_file (str): + Path to the language ids file. + + use_language_embedding (bool): + If true, language embedding is used. Defaults to `False`. + + Note: + Check :class:`TTS.tts.configs.shared_configs.BaseTTSConfig` for the inherited parameters. + + Example: + + >>> from TTS.vc.configs.freevc_config import FreeVCConfig + >>> config = FreeVCConfig() + """ + + model: str = "freevc" + # model specific params + model_args: FreeVCArgs = field(default_factory=FreeVCArgs) + audio: FreeVCAudioConfig = field(default_factory=FreeVCAudioConfig) + + # optimizer + # TODO with training support + + # loss params + # TODO with training support + + # data loader params + return_wav: bool = True + compute_linear_spec: bool = True + + # sampler params + use_weighted_sampler: bool = False # TODO: move it to the base config + weighted_sampler_attrs: dict = field(default_factory=lambda: {}) + weighted_sampler_multipliers: dict = field(default_factory=lambda: {}) + + # overrides + r: int = 1 # DO NOT CHANGE + add_blank: bool = True + + # multi-speaker settings + # use speaker embedding layer + num_speakers: int = 0 + speakers_file: str = None + speaker_embedding_channels: int = 256 + + # use d-vectors + use_d_vector_file: bool = False + d_vector_file: List[str] = None + d_vector_dim: int = None + + def __post_init__(self): + for key, val in self.model_args.items(): + if hasattr(self, key): + self[key] = val diff --git a/TTS/vc/configs/shared_configs.py b/TTS/vc/configs/shared_configs.py new file mode 100644 index 0000000..74164a7 --- /dev/null +++ b/TTS/vc/configs/shared_configs.py @@ -0,0 +1,155 @@ +from dataclasses import asdict, dataclass, field +from typing import Dict, List + +from coqpit import Coqpit, check_argument + +from TTS.config import BaseAudioConfig, BaseDatasetConfig, BaseTrainingConfig + + +@dataclass +class BaseVCConfig(BaseTrainingConfig): + """Shared parameters among all the tts models. + + Args: + + audio (BaseAudioConfig): + Audio processor config object instance. + + batch_group_size (int): + Size of the batch groups used for bucketing. By default, the dataloader orders samples by the sequence + length for a more efficient and stable training. If `batch_group_size > 1` then it performs bucketing to + prevent using the same batches for each epoch. + + loss_masking (bool): + enable / disable masking loss values against padded segments of samples in a batch. + + min_text_len (int): + Minimum length of input text to be used. All shorter samples will be ignored. Defaults to 0. + + max_text_len (int): + Maximum length of input text to be used. All longer samples will be ignored. Defaults to float("inf"). + + min_audio_len (int): + Minimum length of input audio to be used. All shorter samples will be ignored. Defaults to 0. + + max_audio_len (int): + Maximum length of input audio to be used. All longer samples will be ignored. The maximum length in the + dataset defines the VRAM used in the training. Hence, pay attention to this value if you encounter an + OOM error in training. Defaults to float("inf"). + + compute_f0 (int): + (Not in use yet). + + compute_energy (int): + (Not in use yet). + + compute_linear_spec (bool): + If True data loader computes and returns linear spectrograms alongside the other data. + + precompute_num_workers (int): + Number of workers to precompute features. Defaults to 0. + + use_noise_augment (bool): + Augment the input audio with random noise. + + start_by_longest (bool): + If True, the data loader will start loading the longest batch first. It is useful for checking OOM issues. + Defaults to False. + + shuffle (bool): + If True, the data loader will shuffle the dataset when there is not sampler defined. Defaults to True. + + drop_last (bool): + If True, the data loader will drop the last batch if it is not complete. It helps to prevent + issues that emerge from the partial batch statistics. Defaults to True. + + add_blank (bool): + Add blank characters between each other two characters. It improves performance for some models at expense + of slower run-time due to the longer input sequence. + + datasets (List[BaseDatasetConfig]): + List of datasets used for training. If multiple datasets are provided, they are merged and used together + for training. + + optimizer (str): + Optimizer used for the training. Set one from `torch.optim.Optimizer` or `TTS.utils.training`. + Defaults to ``. + + optimizer_params (dict): + Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}` + + lr_scheduler (str): + Learning rate scheduler for the training. Use one from `torch.optim.Scheduler` schedulers or + `TTS.utils.training`. Defaults to ``. + + lr_scheduler_params (dict): + Parameters for the generator learning rate scheduler. Defaults to `{"warmup": 4000}`. + + test_sentences (List[str]): + List of sentences to be used at testing. Defaults to '[]' + + eval_split_max_size (int): + Number maximum of samples to be used for evaluation in proportion split. Defaults to None (Disabled). + + eval_split_size (float): + If between 0.0 and 1.0 represents the proportion of the dataset to include in the evaluation set. + If > 1, represents the absolute number of evaluation samples. Defaults to 0.01 (1%). + + use_speaker_weighted_sampler (bool): + Enable / Disable the batch balancer by speaker. Defaults to ```False```. + + speaker_weighted_sampler_alpha (float): + Number that control the influence of the speaker sampler weights. Defaults to ```1.0```. + + use_language_weighted_sampler (bool): + Enable / Disable the batch balancer by language. Defaults to ```False```. + + language_weighted_sampler_alpha (float): + Number that control the influence of the language sampler weights. Defaults to ```1.0```. + + use_length_weighted_sampler (bool): + Enable / Disable the batch balancer by audio length. If enabled the dataset will be divided + into 10 buckets considering the min and max audio of the dataset. The sampler weights will be + computed forcing to have the same quantity of data for each bucket in each training batch. Defaults to ```False```. + + length_weighted_sampler_alpha (float): + Number that control the influence of the length sampler weights. Defaults to ```1.0```. + """ + + audio: BaseAudioConfig = field(default_factory=BaseAudioConfig) + # training params + batch_group_size: int = 0 + loss_masking: bool = None + # dataloading + min_audio_len: int = 1 + max_audio_len: int = float("inf") + min_text_len: int = 1 + max_text_len: int = float("inf") + compute_f0: bool = False + compute_energy: bool = False + compute_linear_spec: bool = False + precompute_num_workers: int = 0 + use_noise_augment: bool = False + start_by_longest: bool = False + shuffle: bool = False + drop_last: bool = False + # dataset + datasets: List[BaseDatasetConfig] = field(default_factory=lambda: [BaseDatasetConfig()]) + # optimizer + optimizer: str = "radam" + optimizer_params: dict = None + # scheduler + lr_scheduler: str = None + lr_scheduler_params: dict = field(default_factory=lambda: {}) + # testing + test_sentences: List[str] = field(default_factory=lambda: []) + # evaluation + eval_split_max_size: int = None + eval_split_size: float = 0.01 + # weighted samplers + use_speaker_weighted_sampler: bool = False + speaker_weighted_sampler_alpha: float = 1.0 + use_language_weighted_sampler: bool = False + language_weighted_sampler_alpha: float = 1.0 + use_length_weighted_sampler: bool = False + length_weighted_sampler_alpha: float = 1.0 diff --git a/TTS/vc/models/__init__.py b/TTS/vc/models/__init__.py new file mode 100644 index 0000000..5a09b4e --- /dev/null +++ b/TTS/vc/models/__init__.py @@ -0,0 +1,17 @@ +import importlib +import re +from typing import Dict, List, Union + + +def to_camel(text): + text = text.capitalize() + return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text) + + +def setup_model(config: "Coqpit", samples: Union[List[List], List[Dict]] = None) -> "BaseVC": + print(" > Using model: {}".format(config.model)) + # fetch the right model implementation. + if "model" in config and config["model"].lower() == "freevc": + MyModel = importlib.import_module("TTS.vc.models.freevc").FreeVC + model = MyModel.init_from_config(config, samples) + return model diff --git a/TTS/vc/models/__pycache__/__init__.cpython-311.pyc b/TTS/vc/models/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c2814658e27f663963e006e75963a7520cae95d GIT binary patch literal 1598 zcmZ`&&1)M+6rcUnSDnbJiCfodRj?axN-UhHg_Pir$TW!?n?Ph!)DN-VUAgP*YPT~h zw^n7)!G}-^?WH(}9&{*fiZ5;n)Q2AOC#;0PV!%-7DK~?mhn)I$H(Fb?emnE#y?HZl zelzd4zYGk-5y)D3y}ZmK^tb2qB>K`gserMARHQNnDh4A{WWZ+)*5rzu$rpK3C<+Wx zxouPw)ws&5f+}i)#%uiRy?h8f?$j%@r?whVqh(QzZ41SS$~-_);<>kLN#ZC!uPYAa z@9D&$!g58oDhd;3L`VRegMRWcs2x;CO&I?xyD+sEPH!Q{+w)S^;o^a)jLL39j$MfE zg)0CKw^U0`%)I@%RGRoizFM2RTKr7vgW)ZH75d42Q0K!?nOy+C7ha$lS^+LW!lq8n&u76{@zaVTqxPNxVqr#jWfz(J;ww%H)geyha{6mYvO8W_fe@ZdR!h z$1+P5OVvoWPh1DMp>*; zj#O6xHScI&I{|R~3aq5j-g(%2L=if<0;++!iHqOQd^^)hjCK;EjrpU*z<2Mvsd4vm z9@?`)E3wc?EV%pvzTl-16{}L#*Ql7czJzmWgvh3CXoQMbbE>#P#alAbR`N<%ao80y z=pS%)kaMOaD)avZs-siR^cS?>gCByeqYZ=?>tLPIy%}-~jtr&0>?-%rM~SmeEL?)t zJ+!{4eE(VN-mCet#n#zor01O->!{A`Le{c{g+MP#{ZgTiAInho|Jh`+#+)Cc) zByT(tx`|}tp3gvuS(2joEr5WNps46<+VG^ Init speaker_embedding layer.") + self.speaker_embedding = nn.Embedding(self.num_speakers, self.embedded_speaker_dim) + self.speaker_embedding.weight.data.normal_(0, 0.3) + + def get_aux_input(self, **kwargs) -> Dict: + """Prepare and return `aux_input` used by `forward()`""" + return {"speaker_id": None, "style_wav": None, "d_vector": None, "language_id": None} + + def get_aux_input_from_test_sentences(self, sentence_info): + if hasattr(self.config, "model_args"): + config = self.config.model_args + else: + config = self.config + + # extract speaker and language info + text, speaker_name, style_wav, language_name = None, None, None, None + + if isinstance(sentence_info, list): + if len(sentence_info) == 1: + text = sentence_info[0] + elif len(sentence_info) == 2: + text, speaker_name = sentence_info + elif len(sentence_info) == 3: + text, speaker_name, style_wav = sentence_info + elif len(sentence_info) == 4: + text, speaker_name, style_wav, language_name = sentence_info + else: + text = sentence_info + + # get speaker id/d_vector + speaker_id, d_vector, language_id = None, None, None + if self.speaker_manager is not None: + if config.use_d_vector_file: + if speaker_name is None: + d_vector = self.speaker_manager.get_random_embedding() + else: + d_vector = self.speaker_manager.get_d_vector_by_name(speaker_name) + elif config.use_speaker_embedding: + if speaker_name is None: + speaker_id = self.speaker_manager.get_random_id() + else: + speaker_id = self.speaker_manager.name_to_id[speaker_name] + + # get language id + if self.language_manager is not None and config.use_language_embedding and language_name is not None: + language_id = self.language_manager.name_to_id[language_name] + + return { + "text": text, + "speaker_id": speaker_id, + "style_wav": style_wav, + "d_vector": d_vector, + "language_id": language_id, + } + + def format_batch(self, batch: Dict) -> Dict: + """Generic batch formatting for `VCDataset`. + + You must override this if you use a custom dataset. + + Args: + batch (Dict): [description] + + Returns: + Dict: [description] + """ + # setup input batch + text_input = batch["token_id"] + text_lengths = batch["token_id_lengths"] + speaker_names = batch["speaker_names"] + linear_input = batch["linear"] + mel_input = batch["mel"] + mel_lengths = batch["mel_lengths"] + stop_targets = batch["stop_targets"] + item_idx = batch["item_idxs"] + d_vectors = batch["d_vectors"] + speaker_ids = batch["speaker_ids"] + attn_mask = batch["attns"] + waveform = batch["waveform"] + pitch = batch["pitch"] + energy = batch["energy"] + language_ids = batch["language_ids"] + max_text_length = torch.max(text_lengths.float()) + max_spec_length = torch.max(mel_lengths.float()) + + # compute durations from attention masks + durations = None + if attn_mask is not None: + durations = torch.zeros(attn_mask.shape[0], attn_mask.shape[2]) + for idx, am in enumerate(attn_mask): + # compute raw durations + c_idxs = am[:, : text_lengths[idx], : mel_lengths[idx]].max(1)[1] + # c_idxs, counts = torch.unique_consecutive(c_idxs, return_counts=True) + c_idxs, counts = torch.unique(c_idxs, return_counts=True) + dur = torch.ones([text_lengths[idx]]).to(counts.dtype) + dur[c_idxs] = counts + # smooth the durations and set any 0 duration to 1 + # by cutting off from the largest duration indeces. + extra_frames = dur.sum() - mel_lengths[idx] + largest_idxs = torch.argsort(-dur)[:extra_frames] + dur[largest_idxs] -= 1 + assert ( + dur.sum() == mel_lengths[idx] + ), f" [!] total duration {dur.sum()} vs spectrogram length {mel_lengths[idx]}" + durations[idx, : text_lengths[idx]] = dur + + # set stop targets wrt reduction factor + stop_targets = stop_targets.view(text_input.shape[0], stop_targets.size(1) // self.config.r, -1) + stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze(2) + stop_target_lengths = torch.divide(mel_lengths, self.config.r).ceil_() + + return { + "text_input": text_input, + "text_lengths": text_lengths, + "speaker_names": speaker_names, + "mel_input": mel_input, + "mel_lengths": mel_lengths, + "linear_input": linear_input, + "stop_targets": stop_targets, + "stop_target_lengths": stop_target_lengths, + "attn_mask": attn_mask, + "durations": durations, + "speaker_ids": speaker_ids, + "d_vectors": d_vectors, + "max_text_length": float(max_text_length), + "max_spec_length": float(max_spec_length), + "item_idx": item_idx, + "waveform": waveform, + "pitch": pitch, + "energy": energy, + "language_ids": language_ids, + "audio_unique_names": batch["audio_unique_names"], + } + + def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1): + weights = None + data_items = dataset.samples + + if getattr(config, "use_language_weighted_sampler", False): + alpha = getattr(config, "language_weighted_sampler_alpha", 1.0) + print(" > Using Language weighted sampler with alpha:", alpha) + weights = get_language_balancer_weights(data_items) * alpha + + if getattr(config, "use_speaker_weighted_sampler", False): + alpha = getattr(config, "speaker_weighted_sampler_alpha", 1.0) + print(" > Using Speaker weighted sampler with alpha:", alpha) + if weights is not None: + weights += get_speaker_balancer_weights(data_items) * alpha + else: + weights = get_speaker_balancer_weights(data_items) * alpha + + if getattr(config, "use_length_weighted_sampler", False): + alpha = getattr(config, "length_weighted_sampler_alpha", 1.0) + print(" > Using Length weighted sampler with alpha:", alpha) + if weights is not None: + weights += get_length_balancer_weights(data_items) * alpha + else: + weights = get_length_balancer_weights(data_items) * alpha + + if weights is not None: + sampler = WeightedRandomSampler(weights, len(weights)) + else: + sampler = None + + # sampler for DDP + if sampler is None: + sampler = DistributedSampler(dataset) if num_gpus > 1 else None + else: # If a sampler is already defined use this sampler and DDP sampler together + sampler = DistributedSamplerWrapper(sampler) if num_gpus > 1 else sampler + + return sampler + + def get_data_loader( + self, + config: Coqpit, + assets: Dict, + is_eval: bool, + samples: Union[List[Dict], List[List]], + verbose: bool, + num_gpus: int, + rank: int = None, + ) -> "DataLoader": + if is_eval and not config.run_eval: + loader = None + else: + # setup multi-speaker attributes + if self.speaker_manager is not None: + if hasattr(config, "model_args"): + speaker_id_mapping = ( + self.speaker_manager.name_to_id if config.model_args.use_speaker_embedding else None + ) + d_vector_mapping = self.speaker_manager.embeddings if config.model_args.use_d_vector_file else None + config.use_d_vector_file = config.model_args.use_d_vector_file + else: + speaker_id_mapping = self.speaker_manager.name_to_id if config.use_speaker_embedding else None + d_vector_mapping = self.speaker_manager.embeddings if config.use_d_vector_file else None + else: + speaker_id_mapping = None + d_vector_mapping = None + + # setup multi-lingual attributes + if self.language_manager is not None: + language_id_mapping = self.language_manager.name_to_id if self.args.use_language_embedding else None + else: + language_id_mapping = None + + # init dataloader + dataset = TTSDataset( + outputs_per_step=config.r if "r" in config else 1, + compute_linear_spec=config.model.lower() == "tacotron" or config.compute_linear_spec, + compute_f0=config.get("compute_f0", False), + f0_cache_path=config.get("f0_cache_path", None), + compute_energy=config.get("compute_energy", False), + energy_cache_path=config.get("energy_cache_path", None), + samples=samples, + ap=self.ap, + return_wav=config.return_wav if "return_wav" in config else False, + batch_group_size=0 if is_eval else config.batch_group_size * config.batch_size, + min_text_len=config.min_text_len, + max_text_len=config.max_text_len, + min_audio_len=config.min_audio_len, + max_audio_len=config.max_audio_len, + phoneme_cache_path=config.phoneme_cache_path, + precompute_num_workers=config.precompute_num_workers, + use_noise_augment=False if is_eval else config.use_noise_augment, + verbose=verbose, + speaker_id_mapping=speaker_id_mapping, + d_vector_mapping=d_vector_mapping if config.use_d_vector_file else None, + tokenizer=None, + start_by_longest=config.start_by_longest, + language_id_mapping=language_id_mapping, + ) + + # wait all the DDP process to be ready + if num_gpus > 1: + dist.barrier() + + # sort input sequences from short to long + dataset.preprocess_samples() + + # get samplers + sampler = self.get_sampler(config, dataset, num_gpus) + + loader = DataLoader( + dataset, + batch_size=config.eval_batch_size if is_eval else config.batch_size, + shuffle=config.shuffle if sampler is None else False, # if there is no other sampler + collate_fn=dataset.collate_fn, + drop_last=config.drop_last, # setting this False might cause issues in AMP training. + sampler=sampler, + num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers, + pin_memory=False, + ) + return loader + + def _get_test_aux_input( + self, + ) -> Dict: + d_vector = None + if self.config.use_d_vector_file: + d_vector = [self.speaker_manager.embeddings[name]["embedding"] for name in self.speaker_manager.embeddings] + d_vector = (random.sample(sorted(d_vector), 1),) + + aux_inputs = { + "speaker_id": None + if not self.config.use_speaker_embedding + else random.sample(sorted(self.speaker_manager.name_to_id.values()), 1), + "d_vector": d_vector, + "style_wav": None, # TODO: handle GST style input + } + return aux_inputs + + def test_run(self, assets: Dict) -> Tuple[Dict, Dict]: + """Generic test run for `vc` models used by `Trainer`. + + You can override this for a different behaviour. + + Args: + assets (dict): A dict of training assets. For `vc` models, it must include `{'audio_processor': ap}`. + + Returns: + Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard. + """ + print(" | > Synthesizing test sentences.") + test_audios = {} + test_figures = {} + test_sentences = self.config.test_sentences + aux_inputs = self._get_test_aux_input() + for idx, sen in enumerate(test_sentences): + if isinstance(sen, list): + aux_inputs = self.get_aux_input_from_test_sentences(sen) + sen = aux_inputs["text"] + outputs_dict = synthesis( + self, + sen, + self.config, + "cuda" in str(next(self.parameters()).device), + speaker_id=aux_inputs["speaker_id"], + d_vector=aux_inputs["d_vector"], + style_wav=aux_inputs["style_wav"], + use_griffin_lim=True, + do_trim_silence=False, + ) + test_audios["{}-audio".format(idx)] = outputs_dict["wav"] + test_figures["{}-prediction".format(idx)] = plot_spectrogram( + outputs_dict["outputs"]["model_outputs"], self.ap, output_fig=False + ) + test_figures["{}-alignment".format(idx)] = plot_alignment( + outputs_dict["outputs"]["alignments"], output_fig=False + ) + return test_figures, test_audios + + def on_init_start(self, trainer): + """Save the speaker.pth and language_ids.json at the beginning of the training. Also update both paths.""" + if self.speaker_manager is not None: + output_path = os.path.join(trainer.output_path, "speakers.pth") + self.speaker_manager.save_ids_to_file(output_path) + trainer.config.speakers_file = output_path + # some models don't have `model_args` set + if hasattr(trainer.config, "model_args"): + trainer.config.model_args.speakers_file = output_path + trainer.config.save_json(os.path.join(trainer.output_path, "config.json")) + print(f" > `speakers.pth` is saved to {output_path}.") + print(" > `speakers_file` is updated in the config.json.") + + if self.language_manager is not None: + output_path = os.path.join(trainer.output_path, "language_ids.json") + self.language_manager.save_ids_to_file(output_path) + trainer.config.language_ids_file = output_path + if hasattr(trainer.config, "model_args"): + trainer.config.model_args.language_ids_file = output_path + trainer.config.save_json(os.path.join(trainer.output_path, "config.json")) + print(f" > `language_ids.json` is saved to {output_path}.") + print(" > `language_ids_file` is updated in the config.json.") diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py new file mode 100644 index 0000000..8bb9989 --- /dev/null +++ b/TTS/vc/models/freevc.py @@ -0,0 +1,562 @@ +from typing import Dict, List, Optional, Tuple, Union + +import librosa +import numpy as np +import torch +from coqpit import Coqpit +from torch import nn +from torch.nn import Conv1d, Conv2d, ConvTranspose1d +from torch.nn import functional as F +from torch.nn.utils import spectral_norm +from torch.nn.utils.parametrizations import weight_norm +from torch.nn.utils.parametrize import remove_parametrizations + +import TTS.vc.modules.freevc.commons as commons +import TTS.vc.modules.freevc.modules as modules +from TTS.tts.utils.speakers import SpeakerManager +from TTS.utils.io import load_fsspec +from TTS.vc.configs.freevc_config import FreeVCConfig +from TTS.vc.models.base_vc import BaseVC +from TTS.vc.modules.freevc.commons import get_padding, init_weights +from TTS.vc.modules.freevc.mel_processing import mel_spectrogram_torch +from TTS.vc.modules.freevc.speaker_encoder.speaker_encoder import SpeakerEncoder as SpeakerEncoderEx +from TTS.vc.modules.freevc.wavlm import get_wavlm + + +class ResidualCouplingBlock(nn.Module): + def __init__(self, channels, hidden_channels, kernel_size, dilation_rate, n_layers, n_flows=4, gin_channels=0): + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.n_flows = n_flows + self.gin_channels = gin_channels + + self.flows = nn.ModuleList() + for i in range(n_flows): + self.flows.append( + modules.ResidualCouplingLayer( + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=gin_channels, + mean_only=True, + ) + ) + self.flows.append(modules.Flip()) + + def forward(self, x, x_mask, g=None, reverse=False): + if not reverse: + for flow in self.flows: + x, _ = flow(x, x_mask, g=g, reverse=reverse) + else: + for flow in reversed(self.flows): + x = flow(x, x_mask, g=g, reverse=reverse) + return x + + +class Encoder(nn.Module): + def __init__( + self, in_channels, out_channels, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=0 + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + + self.pre = nn.Conv1d(in_channels, hidden_channels, 1) + self.enc = modules.WN(hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=gin_channels) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward(self, x, x_lengths, g=None): + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) + x = self.pre(x) * x_mask + x = self.enc(x, x_mask, g=g) + stats = self.proj(x) * x_mask + m, logs = torch.split(stats, self.out_channels, dim=1) + z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask + return z, m, logs, x_mask + + +class Generator(torch.nn.Module): + def __init__( + self, + initial_channel, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=0, + ): + super(Generator, self).__init__() + self.num_kernels = len(resblock_kernel_sizes) + self.num_upsamples = len(upsample_rates) + self.conv_pre = Conv1d(initial_channel, upsample_initial_channel, 7, 1, padding=3) + resblock = modules.ResBlock1 if resblock == "1" else modules.ResBlock2 + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): + self.ups.append( + weight_norm( + ConvTranspose1d( + upsample_initial_channel // (2**i), + upsample_initial_channel // (2 ** (i + 1)), + k, + u, + padding=(k - u) // 2, + ) + ) + ) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel // (2 ** (i + 1)) + for j, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)): + self.resblocks.append(resblock(ch, k, d)) + + self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) + self.ups.apply(init_weights) + + if gin_channels != 0: + self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1) + + def forward(self, x, g=None): + x = self.conv_pre(x) + if g is not None: + x = x + self.cond(g) + + for i in range(self.num_upsamples): + x = F.leaky_relu(x, modules.LRELU_SLOPE) + x = self.ups[i](x) + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + print("Removing weight norm...") + for l in self.ups: + remove_parametrizations(l, "weight") + for l in self.resblocks: + remove_parametrizations(l, "weight") + + +class DiscriminatorP(torch.nn.Module): + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): + super(DiscriminatorP, self).__init__() + self.period = period + self.use_spectral_norm = use_spectral_norm + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList( + [ + norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(get_padding(kernel_size, 1), 0))), + ] + ) + self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x): + fmap = [] + + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class DiscriminatorS(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(DiscriminatorS, self).__init__() + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList( + [ + norm_f(Conv1d(1, 16, 15, 1, padding=7)), + norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)), + norm_f(Conv1d(64, 256, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(256, 1024, 41, 4, groups=64, padding=20)), + norm_f(Conv1d(1024, 1024, 41, 4, groups=256, padding=20)), + norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), + ] + ) + self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x): + fmap = [] + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiPeriodDiscriminator(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(MultiPeriodDiscriminator, self).__init__() + periods = [2, 3, 5, 7, 11] + + discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)] + discs = discs + [DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods] + self.discriminators = nn.ModuleList(discs) + + def forward(self, y, y_hat): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + y_d_rs.append(y_d_r) + y_d_gs.append(y_d_g) + fmap_rs.append(fmap_r) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class SpeakerEncoder(torch.nn.Module): + def __init__(self, mel_n_channels=80, model_num_layers=3, model_hidden_size=256, model_embedding_size=256): + super(SpeakerEncoder, self).__init__() + self.lstm = nn.LSTM(mel_n_channels, model_hidden_size, model_num_layers, batch_first=True) + self.linear = nn.Linear(model_hidden_size, model_embedding_size) + self.relu = nn.ReLU() + + def forward(self, mels): + self.lstm.flatten_parameters() + _, (hidden, _) = self.lstm(mels) + embeds_raw = self.relu(self.linear(hidden[-1])) + return embeds_raw / torch.norm(embeds_raw, dim=1, keepdim=True) + + def compute_partial_slices(self, total_frames, partial_frames, partial_hop): + mel_slices = [] + for i in range(0, total_frames - partial_frames, partial_hop): + mel_range = torch.arange(i, i + partial_frames) + mel_slices.append(mel_range) + + return mel_slices + + def embed_utterance(self, mel, partial_frames=128, partial_hop=64): + mel_len = mel.size(1) + last_mel = mel[:, -partial_frames:] + + if mel_len > partial_frames: + mel_slices = self.compute_partial_slices(mel_len, partial_frames, partial_hop) + mels = list(mel[:, s] for s in mel_slices) + mels.append(last_mel) + mels = torch.stack(tuple(mels), 0).squeeze(1) + + with torch.no_grad(): + partial_embeds = self(mels) + embed = torch.mean(partial_embeds, axis=0).unsqueeze(0) + # embed = embed / torch.linalg.norm(embed, 2) + else: + with torch.no_grad(): + embed = self(last_mel) + + return embed + + +class FreeVC(BaseVC): + """ + + Papaer:: + https://arxiv.org/abs/2210.15418# + + Paper Abstract:: + Voice conversion (VC) can be achieved by first extracting source content information and target speaker + information, and then reconstructing waveform with these information. However, current approaches normally + either extract dirty content information with speaker information leaked in, or demand a large amount of + annotated data for training. Besides, the quality of reconstructed waveform can be degraded by the + mismatch between conversion model and vocoder. In this paper, we adopt the end-to-end framework of VITS for + high-quality waveform reconstruction, and propose strategies for clean content information extraction without + text annotation. We disentangle content information by imposing an information bottleneck to WavLM features, + and propose the spectrogram-resize based data augmentation to improve the purity of extracted content + information. Experimental results show that the proposed method outperforms the latest VC models trained with + annotated data and has greater robustness. + + Original Code:: + https://github.com/OlaWod/FreeVC + + Examples: + >>> from TTS.vc.configs.freevc_config import FreeVCConfig + >>> from TTS.vc.models.freevc import FreeVC + >>> config = FreeVCConfig() + >>> model = FreeVC(config) + """ + + def __init__(self, config: Coqpit, speaker_manager: SpeakerManager = None): + super().__init__(config, None, speaker_manager, None) + + self.init_multispeaker(config) + + self.spec_channels = self.args.spec_channels + self.inter_channels = self.args.inter_channels + self.hidden_channels = self.args.hidden_channels + self.filter_channels = self.args.filter_channels + self.n_heads = self.args.n_heads + self.n_layers = self.args.n_layers + self.kernel_size = self.args.kernel_size + self.p_dropout = self.args.p_dropout + self.resblock = self.args.resblock + self.resblock_kernel_sizes = self.args.resblock_kernel_sizes + self.resblock_dilation_sizes = self.args.resblock_dilation_sizes + self.upsample_rates = self.args.upsample_rates + self.upsample_initial_channel = self.args.upsample_initial_channel + self.upsample_kernel_sizes = self.args.upsample_kernel_sizes + self.segment_size = self.args.segment_size + self.gin_channels = self.args.gin_channels + self.ssl_dim = self.args.ssl_dim + self.use_spk = self.args.use_spk + + self.enc_p = Encoder(self.args.ssl_dim, self.inter_channels, self.hidden_channels, 5, 1, 16) + self.dec = Generator( + self.inter_channels, + self.resblock, + self.resblock_kernel_sizes, + self.resblock_dilation_sizes, + self.upsample_rates, + self.upsample_initial_channel, + self.upsample_kernel_sizes, + gin_channels=self.gin_channels, + ) + self.enc_q = Encoder( + self.spec_channels, self.inter_channels, self.hidden_channels, 5, 1, 16, gin_channels=self.gin_channels + ) + self.flow = ResidualCouplingBlock( + self.inter_channels, self.hidden_channels, 5, 1, 4, gin_channels=self.gin_channels + ) + if not self.use_spk: + self.enc_spk = SpeakerEncoder(model_hidden_size=self.gin_channels, model_embedding_size=self.gin_channels) + else: + self.load_pretrained_speaker_encoder() + + self.wavlm = get_wavlm() + + @property + def device(self): + return next(self.parameters()).device + + def load_pretrained_speaker_encoder(self): + """Load pretrained speaker encoder model as mentioned in the paper.""" + print(" > Loading pretrained speaker encoder model ...") + self.enc_spk_ex = SpeakerEncoderEx( + "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/speaker_encoder.pt" + ) + + def init_multispeaker(self, config: Coqpit): + """Initialize multi-speaker modules of a model. A model can be trained either with a speaker embedding layer + or with external `d_vectors` computed from a speaker encoder model. + + You must provide a `speaker_manager` at initialization to set up the multi-speaker modules. + + Args: + config (Coqpit): Model configuration. + data (List, optional): Dataset items to infer number of speakers. Defaults to None. + """ + self.num_spks = self.args.num_spks + if self.speaker_manager: + self.num_spks = self.speaker_manager.num_spks + + def forward( + self, + c: torch.Tensor, + spec: torch.Tensor, + g: Optional[torch.Tensor] = None, + mel: Optional[torch.Tensor] = None, + c_lengths: Optional[torch.Tensor] = None, + spec_lengths: Optional[torch.Tensor] = None, + ) -> Tuple[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor], + ]: + """ + Forward pass of the model. + + Args: + c: WavLM features. Shape: (batch_size, c_seq_len). + spec: The input spectrogram. Shape: (batch_size, spec_seq_len, spec_dim). + g: The speaker embedding. Shape: (batch_size, spk_emb_dim). + mel: The input mel-spectrogram for the speaker encoder. Shape: (batch_size, mel_seq_len, mel_dim). + c_lengths: The lengths of the WavLM features. Shape: (batch_size,). + spec_lengths: The lengths of the spectrogram. Shape: (batch_size,). + + Returns: + o: The output spectrogram. Shape: (batch_size, spec_seq_len, spec_dim). + ids_slice: The slice indices. Shape: (batch_size, num_slices). + spec_mask: The spectrogram mask. Shape: (batch_size, spec_seq_len). + (z, z_p, m_p, logs_p, m_q, logs_q): A tuple of latent variables. + """ + + # If c_lengths is None, set it to the length of the last dimension of c + if c_lengths is None: + c_lengths = (torch.ones(c.size(0)) * c.size(-1)).to(c.device) + + # If spec_lengths is None, set it to the length of the last dimension of spec + if spec_lengths is None: + spec_lengths = (torch.ones(spec.size(0)) * spec.size(-1)).to(spec.device) + + # If use_spk is False, compute g from mel using enc_spk + g = None + if not self.use_spk: + g = self.enc_spk(mel).unsqueeze(-1) + + # Compute m_p, logs_p, z, m_q, logs_q, and spec_mask using enc_p and enc_q + _, m_p, logs_p, _ = self.enc_p(c, c_lengths) + z, m_q, logs_q, spec_mask = self.enc_q(spec.transpose(1, 2), spec_lengths, g=g) + + # Compute z_p using flow + z_p = self.flow(z, spec_mask, g=g) + + # Randomly slice z and compute o using dec + z_slice, ids_slice = commons.rand_slice_segments(z, spec_lengths, self.segment_size) + o = self.dec(z_slice, g=g) + + return o, ids_slice, spec_mask, (z, z_p, m_p, logs_p, m_q, logs_q) + + @torch.no_grad() + def inference(self, c, g=None, mel=None, c_lengths=None): + """ + Inference pass of the model + + Args: + c (torch.Tensor): Input tensor. Shape: (batch_size, c_seq_len). + g (torch.Tensor): Speaker embedding tensor. Shape: (batch_size, spk_emb_dim). + mel (torch.Tensor): Mel-spectrogram tensor. Shape: (batch_size, mel_seq_len, mel_dim). + c_lengths (torch.Tensor): Lengths of the input tensor. Shape: (batch_size,). + + Returns: + torch.Tensor: Output tensor. + """ + if c_lengths == None: + c_lengths = (torch.ones(c.size(0)) * c.size(-1)).to(c.device) + if not self.use_spk: + g = self.enc_spk.embed_utterance(mel) + g = g.unsqueeze(-1) + z_p, m_p, logs_p, c_mask = self.enc_p(c, c_lengths) + z = self.flow(z_p, c_mask, g=g, reverse=True) + o = self.dec(z * c_mask, g=g) + return o + + def extract_wavlm_features(self, y): + """Extract WavLM features from an audio tensor. + + Args: + y (torch.Tensor): Audio tensor. Shape: (batch_size, audio_seq_len). + """ + + with torch.no_grad(): + c = self.wavlm.extract_features(y)[0] + c = c.transpose(1, 2) + return c + + def load_audio(self, wav): + """Read and format the input audio.""" + if isinstance(wav, str): + wav, _ = librosa.load(wav, sr=self.config.audio.input_sample_rate) + if isinstance(wav, np.ndarray): + wav = torch.from_numpy(wav).to(self.device) + if isinstance(wav, torch.Tensor): + wav = wav.to(self.device) + if isinstance(wav, list): + wav = torch.from_numpy(np.array(wav)).to(self.device) + return wav.float() + + @torch.inference_mode() + def voice_conversion(self, src, tgt): + """ + Voice conversion pass of the model. + + Args: + src (str or torch.Tensor): Source utterance. + tgt (str or torch.Tensor): Target utterance. + + Returns: + torch.Tensor: Output tensor. + """ + + wav_tgt = self.load_audio(tgt).cpu().numpy() + wav_tgt, _ = librosa.effects.trim(wav_tgt, top_db=20) + + if self.config.model_args.use_spk: + g_tgt = self.enc_spk_ex.embed_utterance(wav_tgt) + g_tgt = torch.from_numpy(g_tgt)[None, :, None].to(self.device) + else: + wav_tgt = torch.from_numpy(wav_tgt).unsqueeze(0).to(self.device) + mel_tgt = mel_spectrogram_torch( + wav_tgt, + self.config.audio.filter_length, + self.config.audio.n_mel_channels, + self.config.audio.input_sample_rate, + self.config.audio.hop_length, + self.config.audio.win_length, + self.config.audio.mel_fmin, + self.config.audio.mel_fmax, + ) + # src + wav_src = self.load_audio(src) + c = self.extract_wavlm_features(wav_src[None, :]) + + if self.config.model_args.use_spk: + audio = self.inference(c, g=g_tgt) + else: + audio = self.inference(c, mel=mel_tgt.transpose(1, 2)) + audio = audio[0][0].data.cpu().float().numpy() + return audio + + def eval_step(): + ... + + @staticmethod + def init_from_config(config: FreeVCConfig, samples: Union[List[List], List[Dict]] = None, verbose=True): + model = FreeVC(config) + return model + + def load_checkpoint(self, config, checkpoint_path, eval=False, strict=True, cache=False): + state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) + self.load_state_dict(state["model"], strict=strict) + if eval: + self.eval() + + def train_step(): + ... diff --git a/TTS/vc/modules/__init__.py b/TTS/vc/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/vc/modules/freevc/__init__.py b/TTS/vc/modules/freevc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/vc/modules/freevc/commons.py b/TTS/vc/modules/freevc/commons.py new file mode 100644 index 0000000..e799cc2 --- /dev/null +++ b/TTS/vc/modules/freevc/commons.py @@ -0,0 +1,164 @@ +import math + +import numpy as np +import torch +from torch import nn +from torch.nn import functional as F + + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +def get_padding(kernel_size, dilation=1): + return int((kernel_size * dilation - dilation) / 2) + + +def convert_pad_shape(pad_shape): + l = pad_shape[::-1] + pad_shape = [item for sublist in l for item in sublist] + return pad_shape + + +def intersperse(lst, item): + result = [item] * (len(lst) * 2 + 1) + result[1::2] = lst + return result + + +def kl_divergence(m_p, logs_p, m_q, logs_q): + """KL(P||Q)""" + kl = (logs_q - logs_p) - 0.5 + kl += 0.5 * (torch.exp(2.0 * logs_p) + ((m_p - m_q) ** 2)) * torch.exp(-2.0 * logs_q) + return kl + + +def rand_gumbel(shape): + """Sample from the Gumbel distribution, protect from overflows.""" + uniform_samples = torch.rand(shape) * 0.99998 + 0.00001 + return -torch.log(-torch.log(uniform_samples)) + + +def rand_gumbel_like(x): + g = rand_gumbel(x.size()).to(dtype=x.dtype, device=x.device) + return g + + +def slice_segments(x, ids_str, segment_size=4): + ret = torch.zeros_like(x[:, :, :segment_size]) + for i in range(x.size(0)): + idx_str = ids_str[i] + idx_end = idx_str + segment_size + ret[i] = x[i, :, idx_str:idx_end] + return ret + + +def rand_slice_segments(x, x_lengths=None, segment_size=4): + b, d, t = x.size() + if x_lengths is None: + x_lengths = t + ids_str_max = x_lengths - segment_size + 1 + ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long) + ret = slice_segments(x, ids_str, segment_size) + return ret, ids_str + + +def rand_spec_segments(x, x_lengths=None, segment_size=4): + b, d, t = x.size() + if x_lengths is None: + x_lengths = t + ids_str_max = x_lengths - segment_size + ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long) + ret = slice_segments(x, ids_str, segment_size) + return ret, ids_str + + +def get_timing_signal_1d(length, channels, min_timescale=1.0, max_timescale=1.0e4): + position = torch.arange(length, dtype=torch.float) + num_timescales = channels // 2 + log_timescale_increment = math.log(float(max_timescale) / float(min_timescale)) / (num_timescales - 1) + inv_timescales = min_timescale * torch.exp( + torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment + ) + scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1) + signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0) + signal = F.pad(signal, [0, 0, 0, channels % 2]) + signal = signal.view(1, channels, length) + return signal + + +def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): + b, channels, length = x.size() + signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) + return x + signal.to(dtype=x.dtype, device=x.device) + + +def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis=1): + b, channels, length = x.size() + signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) + return torch.cat([x, signal.to(dtype=x.dtype, device=x.device)], axis) + + +def subsequent_mask(length): + mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0) + return mask + + +@torch.jit.script +def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): + n_channels_int = n_channels[0] + in_act = input_a + input_b + t_act = torch.tanh(in_act[:, :n_channels_int, :]) + s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) + acts = t_act * s_act + return acts + + +def shift_1d(x): + x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1] + return x + + +def sequence_mask(length, max_length=None): + if max_length is None: + max_length = length.max() + x = torch.arange(max_length, dtype=length.dtype, device=length.device) + return x.unsqueeze(0) < length.unsqueeze(1) + + +def generate_path(duration, mask): + """ + duration: [b, 1, t_x] + mask: [b, 1, t_y, t_x] + """ + device = duration.device + + b, _, t_y, t_x = mask.shape + cum_duration = torch.cumsum(duration, -1) + + cum_duration_flat = cum_duration.view(b * t_x) + path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype) + path = path.view(b, t_x, t_y) + path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1] + path = path.unsqueeze(1).transpose(2, 3) * mask + return path + + +def clip_grad_value_(parameters, clip_value, norm_type=2): + if isinstance(parameters, torch.Tensor): + parameters = [parameters] + parameters = list(filter(lambda p: p.grad is not None, parameters)) + norm_type = float(norm_type) + if clip_value is not None: + clip_value = float(clip_value) + + total_norm = 0 + for p in parameters: + param_norm = p.grad.data.norm(norm_type) + total_norm += param_norm.item() ** norm_type + if clip_value is not None: + p.grad.data.clamp_(min=-clip_value, max=clip_value) + total_norm = total_norm ** (1.0 / norm_type) + return total_norm diff --git a/TTS/vc/modules/freevc/mel_processing.py b/TTS/vc/modules/freevc/mel_processing.py new file mode 100644 index 0000000..2dcbf21 --- /dev/null +++ b/TTS/vc/modules/freevc/mel_processing.py @@ -0,0 +1,125 @@ +import torch +import torch.utils.data +from librosa.filters import mel as librosa_mel_fn + +MAX_WAV_VALUE = 32768.0 + + +def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): + """ + PARAMS + ------ + C: compression factor + """ + return torch.log(torch.clamp(x, min=clip_val) * C) + + +def dynamic_range_decompression_torch(x, C=1): + """ + PARAMS + ------ + C: compression factor used to compress + """ + return torch.exp(x) / C + + +def spectral_normalize_torch(magnitudes): + output = dynamic_range_compression_torch(magnitudes) + return output + + +def spectral_de_normalize_torch(magnitudes): + output = dynamic_range_decompression_torch(magnitudes) + return output + + +mel_basis = {} +hann_window = {} + + +def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False): + if torch.min(y) < -1.0: + print("min value is ", torch.min(y)) + if torch.max(y) > 1.0: + print("max value is ", torch.max(y)) + + global hann_window + dtype_device = str(y.dtype) + "_" + str(y.device) + wnsize_dtype_device = str(win_size) + "_" + dtype_device + if wnsize_dtype_device not in hann_window: + hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) + + y = torch.nn.functional.pad( + y.unsqueeze(1), (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), mode="reflect" + ) + y = y.squeeze(1) + + spec = torch.stft( + y, + n_fft, + hop_length=hop_size, + win_length=win_size, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=False, + ) + + spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) + return spec + + +def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax): + global mel_basis + dtype_device = str(spec.dtype) + "_" + str(spec.device) + fmax_dtype_device = str(fmax) + "_" + dtype_device + if fmax_dtype_device not in mel_basis: + mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) + mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) + spec = torch.matmul(mel_basis[fmax_dtype_device], spec) + spec = spectral_normalize_torch(spec) + return spec + + +def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False): + if torch.min(y) < -1.0: + print("min value is ", torch.min(y)) + if torch.max(y) > 1.0: + print("max value is ", torch.max(y)) + + global mel_basis, hann_window + dtype_device = str(y.dtype) + "_" + str(y.device) + fmax_dtype_device = str(fmax) + "_" + dtype_device + wnsize_dtype_device = str(win_size) + "_" + dtype_device + if fmax_dtype_device not in mel_basis: + mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) + mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device) + if wnsize_dtype_device not in hann_window: + hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) + + y = torch.nn.functional.pad( + y.unsqueeze(1), (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), mode="reflect" + ) + y = y.squeeze(1) + + spec = torch.stft( + y, + n_fft, + hop_length=hop_size, + win_length=win_size, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=False, + ) + + spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) + + spec = torch.matmul(mel_basis[fmax_dtype_device], spec) + spec = spectral_normalize_torch(spec) + + return spec diff --git a/TTS/vc/modules/freevc/modules.py b/TTS/vc/modules/freevc/modules.py new file mode 100644 index 0000000..9bb5499 --- /dev/null +++ b/TTS/vc/modules/freevc/modules.py @@ -0,0 +1,387 @@ +import torch +from torch import nn +from torch.nn import Conv1d +from torch.nn import functional as F +from torch.nn.utils.parametrizations import weight_norm +from torch.nn.utils.parametrize import remove_parametrizations + +import TTS.vc.modules.freevc.commons as commons +from TTS.vc.modules.freevc.commons import get_padding, init_weights + +LRELU_SLOPE = 0.1 + + +class LayerNorm(nn.Module): + def __init__(self, channels, eps=1e-5): + super().__init__() + self.channels = channels + self.eps = eps + + self.gamma = nn.Parameter(torch.ones(channels)) + self.beta = nn.Parameter(torch.zeros(channels)) + + def forward(self, x): + x = x.transpose(1, -1) + x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps) + return x.transpose(1, -1) + + +class ConvReluNorm(nn.Module): + def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, n_layers, p_dropout): + super().__init__() + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.p_dropout = p_dropout + assert n_layers > 1, "Number of layers should be larger than 0." + + self.conv_layers = nn.ModuleList() + self.norm_layers = nn.ModuleList() + self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size // 2)) + self.norm_layers.append(LayerNorm(hidden_channels)) + self.relu_drop = nn.Sequential(nn.ReLU(), nn.Dropout(p_dropout)) + for _ in range(n_layers - 1): + self.conv_layers.append(nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size // 2)) + self.norm_layers.append(LayerNorm(hidden_channels)) + self.proj = nn.Conv1d(hidden_channels, out_channels, 1) + self.proj.weight.data.zero_() + self.proj.bias.data.zero_() + + def forward(self, x, x_mask): + x_org = x + for i in range(self.n_layers): + x = self.conv_layers[i](x * x_mask) + x = self.norm_layers[i](x) + x = self.relu_drop(x) + x = x_org + self.proj(x) + return x * x_mask + + +class DDSConv(nn.Module): + """ + Dialted and Depth-Separable Convolution + """ + + def __init__(self, channels, kernel_size, n_layers, p_dropout=0.0): + super().__init__() + self.channels = channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.p_dropout = p_dropout + + self.drop = nn.Dropout(p_dropout) + self.convs_sep = nn.ModuleList() + self.convs_1x1 = nn.ModuleList() + self.norms_1 = nn.ModuleList() + self.norms_2 = nn.ModuleList() + for i in range(n_layers): + dilation = kernel_size**i + padding = (kernel_size * dilation - dilation) // 2 + self.convs_sep.append( + nn.Conv1d(channels, channels, kernel_size, groups=channels, dilation=dilation, padding=padding) + ) + self.convs_1x1.append(nn.Conv1d(channels, channels, 1)) + self.norms_1.append(LayerNorm(channels)) + self.norms_2.append(LayerNorm(channels)) + + def forward(self, x, x_mask, g=None): + if g is not None: + x = x + g + for i in range(self.n_layers): + y = self.convs_sep[i](x * x_mask) + y = self.norms_1[i](y) + y = F.gelu(y) + y = self.convs_1x1[i](y) + y = self.norms_2[i](y) + y = F.gelu(y) + y = self.drop(y) + x = x + y + return x * x_mask + + +class WN(torch.nn.Module): + def __init__(self, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=0, p_dropout=0): + super(WN, self).__init__() + assert kernel_size % 2 == 1 + self.hidden_channels = hidden_channels + self.kernel_size = (kernel_size,) + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + self.p_dropout = p_dropout + + self.in_layers = torch.nn.ModuleList() + self.res_skip_layers = torch.nn.ModuleList() + self.drop = nn.Dropout(p_dropout) + + if gin_channels != 0: + cond_layer = torch.nn.Conv1d(gin_channels, 2 * hidden_channels * n_layers, 1) + self.cond_layer = torch.nn.utils.parametrizations.weight_norm(cond_layer, name="weight") + + for i in range(n_layers): + dilation = dilation_rate**i + padding = int((kernel_size * dilation - dilation) / 2) + in_layer = torch.nn.Conv1d( + hidden_channels, 2 * hidden_channels, kernel_size, dilation=dilation, padding=padding + ) + in_layer = torch.nn.utils.parametrizations.weight_norm(in_layer, name="weight") + self.in_layers.append(in_layer) + + # last one is not necessary + if i < n_layers - 1: + res_skip_channels = 2 * hidden_channels + else: + res_skip_channels = hidden_channels + + res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1) + res_skip_layer = torch.nn.utils.parametrizations.weight_norm(res_skip_layer, name="weight") + self.res_skip_layers.append(res_skip_layer) + + def forward(self, x, x_mask, g=None, **kwargs): + output = torch.zeros_like(x) + n_channels_tensor = torch.IntTensor([self.hidden_channels]) + + if g is not None: + g = self.cond_layer(g) + + for i in range(self.n_layers): + x_in = self.in_layers[i](x) + if g is not None: + cond_offset = i * 2 * self.hidden_channels + g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :] + else: + g_l = torch.zeros_like(x_in) + + acts = commons.fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor) + acts = self.drop(acts) + + res_skip_acts = self.res_skip_layers[i](acts) + if i < self.n_layers - 1: + res_acts = res_skip_acts[:, : self.hidden_channels, :] + x = (x + res_acts) * x_mask + output = output + res_skip_acts[:, self.hidden_channels :, :] + else: + output = output + res_skip_acts + return output * x_mask + + def remove_weight_norm(self): + if self.gin_channels != 0: + remove_parametrizations(self.cond_layer, "weight") + for l in self.in_layers: + remove_parametrizations(l, "weight") + for l in self.res_skip_layers: + remove_parametrizations(l, "weight") + + +class ResBlock1(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): + super(ResBlock1, self).__init__() + self.convs1 = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]), + ) + ), + ] + ) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList( + [ + weight_norm( + Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1)) + ), + weight_norm( + Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1)) + ), + weight_norm( + Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1)) + ), + ] + ) + self.convs2.apply(init_weights) + + def forward(self, x, x_mask=None): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c2(xt) + x = xt + x + if x_mask is not None: + x = x * x_mask + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_parametrizations(l, "weight") + for l in self.convs2: + remove_parametrizations(l, "weight") + + +class ResBlock2(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3)): + super(ResBlock2, self).__init__() + self.convs = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + ] + ) + self.convs.apply(init_weights) + + def forward(self, x, x_mask=None): + for c in self.convs: + xt = F.leaky_relu(x, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c(xt) + x = xt + x + if x_mask is not None: + x = x * x_mask + return x + + def remove_weight_norm(self): + for l in self.convs: + remove_parametrizations(l, "weight") + + +class Log(nn.Module): + def forward(self, x, x_mask, reverse=False, **kwargs): + if not reverse: + y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask + logdet = torch.sum(-y, [1, 2]) + return y, logdet + else: + x = torch.exp(x) * x_mask + return x + + +class Flip(nn.Module): + def forward(self, x, *args, reverse=False, **kwargs): + x = torch.flip(x, [1]) + if not reverse: + logdet = torch.zeros(x.size(0)).to(dtype=x.dtype, device=x.device) + return x, logdet + else: + return x + + +class ElementwiseAffine(nn.Module): + def __init__(self, channels): + super().__init__() + self.channels = channels + self.m = nn.Parameter(torch.zeros(channels, 1)) + self.logs = nn.Parameter(torch.zeros(channels, 1)) + + def forward(self, x, x_mask, reverse=False, **kwargs): + if not reverse: + y = self.m + torch.exp(self.logs) * x + y = y * x_mask + logdet = torch.sum(self.logs * x_mask, [1, 2]) + return y, logdet + else: + x = (x - self.m) * torch.exp(-self.logs) * x_mask + return x + + +class ResidualCouplingLayer(nn.Module): + def __init__( + self, + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + p_dropout=0, + gin_channels=0, + mean_only=False, + ): + assert channels % 2 == 0, "channels should be divisible by 2" + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.half_channels = channels // 2 + self.mean_only = mean_only + + self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1) + self.enc = WN( + hidden_channels, kernel_size, dilation_rate, n_layers, p_dropout=p_dropout, gin_channels=gin_channels + ) + self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1) + self.post.weight.data.zero_() + self.post.bias.data.zero_() + + def forward(self, x, x_mask, g=None, reverse=False): + x0, x1 = torch.split(x, [self.half_channels] * 2, 1) + h = self.pre(x0) * x_mask + h = self.enc(h, x_mask, g=g) + stats = self.post(h) * x_mask + if not self.mean_only: + m, logs = torch.split(stats, [self.half_channels] * 2, 1) + else: + m = stats + logs = torch.zeros_like(m) + + if not reverse: + x1 = m + x1 * torch.exp(logs) * x_mask + x = torch.cat([x0, x1], 1) + logdet = torch.sum(logs, [1, 2]) + return x, logdet + else: + x1 = (x1 - m) * torch.exp(-logs) * x_mask + x = torch.cat([x0, x1], 1) + return x diff --git a/TTS/vc/modules/freevc/speaker_encoder/__init__.py b/TTS/vc/modules/freevc/speaker_encoder/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/vc/modules/freevc/speaker_encoder/audio.py b/TTS/vc/modules/freevc/speaker_encoder/audio.py new file mode 100644 index 0000000..52f6fd0 --- /dev/null +++ b/TTS/vc/modules/freevc/speaker_encoder/audio.py @@ -0,0 +1,65 @@ +import struct +from pathlib import Path +from typing import Optional, Union + +# import webrtcvad +import librosa +import numpy as np +from scipy.ndimage.morphology import binary_dilation + +from TTS.vc.modules.freevc.speaker_encoder.hparams import * + +int16_max = (2**15) - 1 + + +def preprocess_wav(fpath_or_wav: Union[str, Path, np.ndarray], source_sr: Optional[int] = None): + """ + Applies the preprocessing operations used in training the Speaker Encoder to a waveform + either on disk or in memory. The waveform will be resampled to match the data hyperparameters. + + :param fpath_or_wav: either a filepath to an audio file (many extensions are supported, not + just .wav), either the waveform as a numpy array of floats. + :param source_sr: if passing an audio waveform, the sampling rate of the waveform before + preprocessing. After preprocessing, the waveform's sampling rate will match the data + hyperparameters. If passing a filepath, the sampling rate will be automatically detected and + this argument will be ignored. + """ + # Load the wav from disk if needed + if isinstance(fpath_or_wav, str) or isinstance(fpath_or_wav, Path): + wav, source_sr = librosa.load(fpath_or_wav, sr=None) + else: + wav = fpath_or_wav + + # Resample the wav if needed + if source_sr is not None and source_sr != sampling_rate: + wav = librosa.resample(wav, source_sr, sampling_rate) + + # Apply the preprocessing: normalize volume and shorten long silences + wav = normalize_volume(wav, audio_norm_target_dBFS, increase_only=True) + wav = trim_long_silences(wav) + + return wav + + +def wav_to_mel_spectrogram(wav): + """ + Derives a mel spectrogram ready to be used by the encoder from a preprocessed audio waveform. + Note: this not a log-mel spectrogram. + """ + frames = librosa.feature.melspectrogram( + y=wav, + sr=sampling_rate, + n_fft=int(sampling_rate * mel_window_length / 1000), + hop_length=int(sampling_rate * mel_window_step / 1000), + n_mels=mel_n_channels, + ) + return frames.astype(np.float32).T + + +def normalize_volume(wav, target_dBFS, increase_only=False, decrease_only=False): + if increase_only and decrease_only: + raise ValueError("Both increase only and decrease only are set") + dBFS_change = target_dBFS - 10 * np.log10(np.mean(wav**2)) + if (dBFS_change < 0 and increase_only) or (dBFS_change > 0 and decrease_only): + return wav + return wav * (10 ** (dBFS_change / 20)) diff --git a/TTS/vc/modules/freevc/speaker_encoder/hparams.py b/TTS/vc/modules/freevc/speaker_encoder/hparams.py new file mode 100644 index 0000000..2c536ae --- /dev/null +++ b/TTS/vc/modules/freevc/speaker_encoder/hparams.py @@ -0,0 +1,31 @@ +## Mel-filterbank +mel_window_length = 25 # In milliseconds +mel_window_step = 10 # In milliseconds +mel_n_channels = 40 + + +## Audio +sampling_rate = 16000 +# Number of spectrogram frames in a partial utterance +partials_n_frames = 160 # 1600 ms + + +## Voice Activation Detection +# Window size of the VAD. Must be either 10, 20 or 30 milliseconds. +# This sets the granularity of the VAD. Should not need to be changed. +vad_window_length = 30 # In milliseconds +# Number of frames to average together when performing the moving average smoothing. +# The larger this value, the larger the VAD variations must be to not get smoothed out. +vad_moving_average_width = 8 +# Maximum number of consecutive silent frames a segment can have. +vad_max_silence_length = 6 + + +## Audio volume normalization +audio_norm_target_dBFS = -30 + + +## Model parameters +model_hidden_size = 256 +model_embedding_size = 256 +model_num_layers = 3 diff --git a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py new file mode 100644 index 0000000..2e21a14 --- /dev/null +++ b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py @@ -0,0 +1,175 @@ +from pathlib import Path +from time import perf_counter as timer +from typing import List, Union + +import numpy as np +import torch +from torch import nn + +from TTS.utils.io import load_fsspec +from TTS.vc.modules.freevc.speaker_encoder import audio +from TTS.vc.modules.freevc.speaker_encoder.hparams import * + + +class SpeakerEncoder(nn.Module): + def __init__(self, weights_fpath, device: Union[str, torch.device] = None, verbose=True): + """ + :param device: either a torch device or the name of a torch device (e.g. "cpu", "cuda"). + If None, defaults to cuda if it is available on your machine, otherwise the model will + run on cpu. Outputs are always returned on the cpu, as numpy arrays. + """ + super().__init__() + + # Define the network + self.lstm = nn.LSTM(mel_n_channels, model_hidden_size, model_num_layers, batch_first=True) + self.linear = nn.Linear(model_hidden_size, model_embedding_size) + self.relu = nn.ReLU() + + # Get the target device + if device is None: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + elif isinstance(device, str): + device = torch.device(device) + self.device = device + + # Load the pretrained model'speaker weights + # weights_fpath = Path(__file__).resolve().parent.joinpath("pretrained.pt") + # if not weights_fpath.exists(): + # raise Exception("Couldn't find the voice encoder pretrained model at %s." % + # weights_fpath) + + start = timer() + checkpoint = load_fsspec(weights_fpath, map_location="cpu") + + self.load_state_dict(checkpoint["model_state"], strict=False) + self.to(device) + + if verbose: + print("Loaded the voice encoder model on %s in %.2f seconds." % (device.type, timer() - start)) + + def forward(self, mels: torch.FloatTensor): + """ + Computes the embeddings of a batch of utterance spectrograms. + :param mels: a batch of mel spectrograms of same duration as a float32 tensor of shape + (batch_size, n_frames, n_channels) + :return: the embeddings as a float 32 tensor of shape (batch_size, embedding_size). + Embeddings are positive and L2-normed, thus they lay in the range [0, 1]. + """ + # Pass the input through the LSTM layers and retrieve the final hidden state of the last + # layer. Apply a cutoff to 0 for negative values and L2 normalize the embeddings. + _, (hidden, _) = self.lstm(mels) + embeds_raw = self.relu(self.linear(hidden[-1])) + return embeds_raw / torch.norm(embeds_raw, dim=1, keepdim=True) + + @staticmethod + def compute_partial_slices(n_samples: int, rate, min_coverage): + """ + Computes where to split an utterance waveform and its corresponding mel spectrogram to + obtain partial utterances of each. Both the waveform and the + mel spectrogram slices are returned, so as to make each partial utterance waveform + correspond to its spectrogram. + + The returned ranges may be indexing further than the length of the waveform. It is + recommended that you pad the waveform with zeros up to wav_slices[-1].stop. + + :param n_samples: the number of samples in the waveform + :param rate: how many partial utterances should occur per second. Partial utterances must + cover the span of the entire utterance, thus the rate should not be lower than the inverse + of the duration of a partial utterance. By default, partial utterances are 1.6s long and + the minimum rate is thus 0.625. + :param min_coverage: when reaching the last partial utterance, it may or may not have + enough frames. If at least of are present, + then the last partial utterance will be considered by zero-padding the audio. Otherwise, + it will be discarded. If there aren't enough frames for one partial utterance, + this parameter is ignored so that the function always returns at least one slice. + :return: the waveform slices and mel spectrogram slices as lists of array slices. Index + respectively the waveform and the mel spectrogram with these slices to obtain the partial + utterances. + """ + assert 0 < min_coverage <= 1 + + # Compute how many frames separate two partial utterances + samples_per_frame = int((sampling_rate * mel_window_step / 1000)) + n_frames = int(np.ceil((n_samples + 1) / samples_per_frame)) + frame_step = int(np.round((sampling_rate / rate) / samples_per_frame)) + assert 0 < frame_step, "The rate is too high" + assert frame_step <= partials_n_frames, "The rate is too low, it should be %f at least" % ( + sampling_rate / (samples_per_frame * partials_n_frames) + ) + + # Compute the slices + wav_slices, mel_slices = [], [] + steps = max(1, n_frames - partials_n_frames + frame_step + 1) + for i in range(0, steps, frame_step): + mel_range = np.array([i, i + partials_n_frames]) + wav_range = mel_range * samples_per_frame + mel_slices.append(slice(*mel_range)) + wav_slices.append(slice(*wav_range)) + + # Evaluate whether extra padding is warranted or not + last_wav_range = wav_slices[-1] + coverage = (n_samples - last_wav_range.start) / (last_wav_range.stop - last_wav_range.start) + if coverage < min_coverage and len(mel_slices) > 1: + mel_slices = mel_slices[:-1] + wav_slices = wav_slices[:-1] + + return wav_slices, mel_slices + + def embed_utterance(self, wav: np.ndarray, return_partials=False, rate=1.3, min_coverage=0.75): + """ + Computes an embedding for a single utterance. The utterance is divided in partial + utterances and an embedding is computed for each. The complete utterance embedding is the + L2-normed average embedding of the partial utterances. + + TODO: independent batched version of this function + + :param wav: a preprocessed utterance waveform as a numpy array of float32 + :param return_partials: if True, the partial embeddings will also be returned along with + the wav slices corresponding to each partial utterance. + :param rate: how many partial utterances should occur per second. Partial utterances must + cover the span of the entire utterance, thus the rate should not be lower than the inverse + of the duration of a partial utterance. By default, partial utterances are 1.6s long and + the minimum rate is thus 0.625. + :param min_coverage: when reaching the last partial utterance, it may or may not have + enough frames. If at least of are present, + then the last partial utterance will be considered by zero-padding the audio. Otherwise, + it will be discarded. If there aren't enough frames for one partial utterance, + this parameter is ignored so that the function always returns at least one slice. + :return: the embedding as a numpy array of float32 of shape (model_embedding_size,). If + is True, the partial utterances as a numpy array of float32 of shape + (n_partials, model_embedding_size) and the wav partials as a list of slices will also be + returned. + """ + # Compute where to split the utterance into partials and pad the waveform with zeros if + # the partial utterances cover a larger range. + wav_slices, mel_slices = self.compute_partial_slices(len(wav), rate, min_coverage) + max_wave_length = wav_slices[-1].stop + if max_wave_length >= len(wav): + wav = np.pad(wav, (0, max_wave_length - len(wav)), "constant") + + # Split the utterance into partials and forward them through the model + mel = audio.wav_to_mel_spectrogram(wav) + mels = np.array([mel[s] for s in mel_slices]) + with torch.no_grad(): + mels = torch.from_numpy(mels).to(self.device) + partial_embeds = self(mels).cpu().numpy() + + # Compute the utterance embedding from the partial embeddings + raw_embed = np.mean(partial_embeds, axis=0) + embed = raw_embed / np.linalg.norm(raw_embed, 2) + + if return_partials: + return embed, partial_embeds, wav_slices + return embed + + def embed_speaker(self, wavs: List[np.ndarray], **kwargs): + """ + Compute the embedding of a collection of wavs (presumably from the same speaker) by + averaging their embedding and L2-normalizing it. + + :param wavs: list of wavs a numpy arrays of float32. + :param kwargs: extra arguments to embed_utterance() + :return: the embedding as a numpy array of float32 of shape (model_embedding_size,). + """ + raw_embed = np.mean([self.embed_utterance(wav, return_partials=False, **kwargs) for wav in wavs], axis=0) + return raw_embed / np.linalg.norm(raw_embed, 2) diff --git a/TTS/vc/modules/freevc/wavlm/__init__.py b/TTS/vc/modules/freevc/wavlm/__init__.py new file mode 100644 index 0000000..6edada4 --- /dev/null +++ b/TTS/vc/modules/freevc/wavlm/__init__.py @@ -0,0 +1,35 @@ +import os +import urllib.request + +import torch + +from TTS.utils.generic_utils import get_user_data_dir +from TTS.vc.modules.freevc.wavlm.wavlm import WavLM, WavLMConfig + +model_uri = "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/WavLM-Large.pt" + + +def get_wavlm(device="cpu"): + """Download the model and return the model object.""" + + output_path = get_user_data_dir("tts") + + output_path = os.path.join(output_path, "wavlm") + if not os.path.exists(output_path): + os.makedirs(output_path) + + output_path = os.path.join(output_path, "WavLM-Large.pt") + if not os.path.exists(output_path): + print(f" > Downloading WavLM model to {output_path} ...") + urllib.request.urlretrieve(model_uri, output_path) + + checkpoint = torch.load(output_path, map_location=torch.device(device)) + cfg = WavLMConfig(checkpoint["cfg"]) + wavlm = WavLM(cfg).to(device) + wavlm.load_state_dict(checkpoint["model"]) + wavlm.eval() + return wavlm + + +if __name__ == "__main__": + wavlm = get_wavlm() diff --git a/TTS/vc/modules/freevc/wavlm/config.json b/TTS/vc/modules/freevc/wavlm/config.json new file mode 100644 index 0000000..c6f851b --- /dev/null +++ b/TTS/vc/modules/freevc/wavlm/config.json @@ -0,0 +1,99 @@ +{ + "_name_or_path": "./wavlm-large/", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "WavLMModel" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 768, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.075, + "mask_time_selection": "static", + "max_bucket_distance": 800, + "model_type": "wavlm", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_buckets": 320, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_ctc_classes": 80, + "num_feat_extract_layers": 7, + "num_hidden_layers": 24, + "num_negatives": 100, + "output_hidden_size": 1024, + "pad_token_id": 0, + "proj_codevector_dim": 768, + "replace_prob": 0.5, + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "torch_dtype": "float32", + "transformers_version": "4.15.0.dev0", + "use_weighted_layer_sum": false, + "vocab_size": 32 + } \ No newline at end of file diff --git a/TTS/vc/modules/freevc/wavlm/modules.py b/TTS/vc/modules/freevc/wavlm/modules.py new file mode 100644 index 0000000..37c1a6e --- /dev/null +++ b/TTS/vc/modules/freevc/wavlm/modules.py @@ -0,0 +1,768 @@ +# -------------------------------------------------------- +# WavLM: Large-Scale Self-Supervised Pre-training for Full Stack Speech Processing (https://arxiv.org/abs/2110.13900.pdf) +# Github source: https://github.com/microsoft/unilm/tree/master/wavlm +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import math +import warnings +from typing import Dict, Optional, Tuple + +import torch +import torch.nn.functional as F +from torch import Tensor, nn +from torch.nn import Parameter + + +class TransposeLast(nn.Module): + def __init__(self, deconstruct_idx=None): + super().__init__() + self.deconstruct_idx = deconstruct_idx + + def forward(self, x): + if self.deconstruct_idx is not None: + x = x[self.deconstruct_idx] + return x.transpose(-2, -1) + + +class Fp32LayerNorm(nn.LayerNorm): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, input): + output = F.layer_norm( + input.float(), + self.normalized_shape, + self.weight.float() if self.weight is not None else None, + self.bias.float() if self.bias is not None else None, + self.eps, + ) + return output.type_as(input) + + +class Fp32GroupNorm(nn.GroupNorm): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, input): + output = F.group_norm( + input.float(), + self.num_groups, + self.weight.float() if self.weight is not None else None, + self.bias.float() if self.bias is not None else None, + self.eps, + ) + return output.type_as(input) + + +class GradMultiply(torch.autograd.Function): + @staticmethod + def forward(ctx, x, scale): + ctx.scale = scale + res = x.new(x) + return res + + @staticmethod + def backward(ctx, grad): + return grad * ctx.scale, None + + +class SamePad(nn.Module): + def __init__(self, kernel_size, causal=False): + super().__init__() + if causal: + self.remove = kernel_size - 1 + else: + self.remove = 1 if kernel_size % 2 == 0 else 0 + + def forward(self, x): + if self.remove > 0: + x = x[:, :, : -self.remove] + return x + + +class Swish(nn.Module): + """Swish function""" + + def __init__(self): + """Construct an MultiHeadedAttention object.""" + super(Swish, self).__init__() + self.act = torch.nn.Sigmoid() + + def forward(self, x): + return x * self.act(x) + + +class GLU_Linear(nn.Module): + def __init__(self, input_dim, output_dim, glu_type="sigmoid", bias_in_glu=True): + super(GLU_Linear, self).__init__() + + self.glu_type = glu_type + self.output_dim = output_dim + + if glu_type == "sigmoid": + self.glu_act = torch.nn.Sigmoid() + elif glu_type == "swish": + self.glu_act = Swish() + elif glu_type == "relu": + self.glu_act = torch.nn.ReLU() + elif glu_type == "gelu": + self.glu_act = torch.nn.GELU() + + if bias_in_glu: + self.linear = nn.Linear(input_dim, output_dim * 2, True) + else: + self.linear = nn.Linear(input_dim, output_dim * 2, False) + + def forward(self, x): + # to be consistent with GLU_Linear, we assume the input always has the #channel (#dim) in the last dimension of the tensor, so need to switch the dimension first for 1D-Conv case + x = self.linear(x) + + if self.glu_type == "bilinear": + x = x[:, :, 0 : self.output_dim] * x[:, :, self.output_dim : self.output_dim * 2] + else: + x = x[:, :, 0 : self.output_dim] * self.glu_act(x[:, :, self.output_dim : self.output_dim * 2]) + + return x + + +def gelu_accurate(x): + if not hasattr(gelu_accurate, "_a"): + gelu_accurate._a = math.sqrt(2 / math.pi) + return 0.5 * x * (1 + torch.tanh(gelu_accurate._a * (x + 0.044715 * torch.pow(x, 3)))) + + +def gelu(x: torch.Tensor) -> torch.Tensor: + return torch.nn.functional.gelu(x.float()).type_as(x) + + +def get_activation_fn(activation: str): + """Returns the activation function corresponding to `activation`""" + + if activation == "relu": + return F.relu + elif activation == "gelu": + return gelu + elif activation == "gelu_fast": + warnings.warn("--activation-fn=gelu_fast has been renamed to gelu_accurate") + return gelu_accurate + elif activation == "gelu_accurate": + return gelu_accurate + elif activation == "tanh": + return torch.tanh + elif activation == "linear": + return lambda x: x + elif activation == "glu": + return lambda x: x + else: + raise RuntimeError("--activation-fn {} not supported".format(activation)) + + +def init_bert_params(module): + """ + Initialize the weights specific to the BERT Model. + This overrides the default initializations depending on the specified arguments. + 1. If normal_init_linear_weights is set then weights of linear + layer will be initialized using the normal distribution and + bais will be set to the specified value. + 2. If normal_init_embed_weights is set then weights of embedding + layer will be initialized using the normal distribution. + 3. If normal_init_proj_weights is set then weights of + in_project_weight for MultiHeadAttention initialized using + the normal distribution (to be validated). + """ + + def normal_(data): + # with FSDP, module params will be on CUDA, so we cast them back to CPU + # so that the RNG is consistent with and without FSDP + data.copy_(data.cpu().normal_(mean=0.0, std=0.02).to(data.device)) + + if isinstance(module, nn.Linear): + normal_(module.weight.data) + if module.bias is not None: + module.bias.data.zero_() + if isinstance(module, nn.Embedding): + normal_(module.weight.data) + if module.padding_idx is not None: + module.weight.data[module.padding_idx].zero_() + if isinstance(module, MultiheadAttention): + normal_(module.q_proj.weight.data) + normal_(module.k_proj.weight.data) + normal_(module.v_proj.weight.data) + + +def quant_noise(module, p, block_size): + """ + Wraps modules and applies quantization noise to the weights for + subsequent quantization with Iterative Product Quantization as + described in "Training with Quantization Noise for Extreme Model Compression" + + Args: + - module: nn.Module + - p: amount of Quantization Noise + - block_size: size of the blocks for subsequent quantization with iPQ + + Remarks: + - Module weights must have the right sizes wrt the block size + - Only Linear, Embedding and Conv2d modules are supported for the moment + - For more detail on how to quantize by blocks with convolutional weights, + see "And the Bit Goes Down: Revisiting the Quantization of Neural Networks" + - We implement the simplest form of noise here as stated in the paper + which consists in randomly dropping blocks + """ + + # if no quantization noise, don't register hook + if p <= 0: + return module + + # supported modules + assert isinstance(module, (nn.Linear, nn.Embedding, nn.Conv2d)) + + # test whether module.weight has the right sizes wrt block_size + is_conv = module.weight.ndim == 4 + + # 2D matrix + if not is_conv: + assert module.weight.size(1) % block_size == 0, "Input features must be a multiple of block sizes" + + # 4D matrix + else: + # 1x1 convolutions + if module.kernel_size == (1, 1): + assert module.in_channels % block_size == 0, "Input channels must be a multiple of block sizes" + # regular convolutions + else: + k = module.kernel_size[0] * module.kernel_size[1] + assert k % block_size == 0, "Kernel size must be a multiple of block size" + + def _forward_pre_hook(mod, input): + # no noise for evaluation + if mod.training: + if not is_conv: + # gather weight and sizes + weight = mod.weight + in_features = weight.size(1) + out_features = weight.size(0) + + # split weight matrix into blocks and randomly drop selected blocks + mask = torch.zeros(in_features // block_size * out_features, device=weight.device) + mask.bernoulli_(p) + mask = mask.repeat_interleave(block_size, -1).view(-1, in_features) + + else: + # gather weight and sizes + weight = mod.weight + in_channels = mod.in_channels + out_channels = mod.out_channels + + # split weight matrix into blocks and randomly drop selected blocks + if mod.kernel_size == (1, 1): + mask = torch.zeros( + int(in_channels // block_size * out_channels), + device=weight.device, + ) + mask.bernoulli_(p) + mask = mask.repeat_interleave(block_size, -1).view(-1, in_channels) + else: + mask = torch.zeros(weight.size(0), weight.size(1), device=weight.device) + mask.bernoulli_(p) + mask = mask.unsqueeze(2).unsqueeze(3).repeat(1, 1, mod.kernel_size[0], mod.kernel_size[1]) + + # scale weights and apply mask + mask = mask.to(torch.bool) # x.bool() is not currently supported in TorchScript + s = 1 / (1 - p) + mod.weight.data = s * weight.masked_fill(mask, 0) + + module.register_forward_pre_hook(_forward_pre_hook) + return module + + +class MultiheadAttention(nn.Module): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + has_relative_attention_bias=False, + num_buckets=32, + max_distance=128, + gru_rel_pos=False, + rescale_init=False, + ): + super().__init__() + self.embed_dim = embed_dim + self.kdim = kdim if kdim is not None else embed_dim + self.vdim = vdim if vdim is not None else embed_dim + self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim + + self.num_heads = num_heads + self.dropout_module = nn.Dropout(dropout) + + self.has_relative_attention_bias = has_relative_attention_bias + self.num_buckets = num_buckets + self.max_distance = max_distance + if self.has_relative_attention_bias: + self.relative_attention_bias = nn.Embedding(num_buckets, num_heads) + + self.head_dim = embed_dim // num_heads + self.q_head_dim = self.head_dim + self.k_head_dim = self.head_dim + assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" + self.scaling = self.head_dim**-0.5 + + self.self_attention = self_attention + self.encoder_decoder_attention = encoder_decoder_attention + + assert not self.self_attention or self.qkv_same_dim, ( + "Self-attention requires query, key and " "value to be of the same size" + ) + + k_bias = True + if rescale_init: + k_bias = False + + k_embed_dim = embed_dim + q_embed_dim = embed_dim + + self.k_proj = quant_noise(nn.Linear(self.kdim, k_embed_dim, bias=k_bias), q_noise, qn_block_size) + self.v_proj = quant_noise(nn.Linear(self.vdim, embed_dim, bias=bias), q_noise, qn_block_size) + self.q_proj = quant_noise(nn.Linear(embed_dim, q_embed_dim, bias=bias), q_noise, qn_block_size) + + self.out_proj = quant_noise(nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size) + + if add_bias_kv: + self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim)) + self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim)) + else: + self.bias_k = self.bias_v = None + + self.add_zero_attn = add_zero_attn + + self.gru_rel_pos = gru_rel_pos + if self.gru_rel_pos: + self.grep_linear = nn.Linear(self.q_head_dim, 8) + self.grep_a = nn.Parameter(torch.ones(1, num_heads, 1, 1)) + + self.reset_parameters() + + def reset_parameters(self): + if self.qkv_same_dim: + # Empirically observed the convergence to be much better with + # the scaled initialization + nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2)) + nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2)) + nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2)) + else: + nn.init.xavier_uniform_(self.k_proj.weight) + nn.init.xavier_uniform_(self.v_proj.weight) + nn.init.xavier_uniform_(self.q_proj.weight) + + nn.init.xavier_uniform_(self.out_proj.weight) + if self.out_proj.bias is not None: + nn.init.constant_(self.out_proj.bias, 0.0) + if self.bias_k is not None: + nn.init.xavier_normal_(self.bias_k) + if self.bias_v is not None: + nn.init.xavier_normal_(self.bias_v) + if self.has_relative_attention_bias: + nn.init.xavier_normal_(self.relative_attention_bias.weight) + + def _relative_positions_bucket(self, relative_positions, bidirectional=True): + num_buckets = self.num_buckets + max_distance = self.max_distance + relative_buckets = 0 + + if bidirectional: + num_buckets = num_buckets // 2 + relative_buckets += (relative_positions > 0).to(torch.long) * num_buckets + relative_positions = torch.abs(relative_positions) + else: + relative_positions = -torch.min(relative_positions, torch.zeros_like(relative_positions)) + + max_exact = num_buckets // 2 + is_small = relative_positions < max_exact + + relative_postion_if_large = max_exact + ( + torch.log(relative_positions.float() / max_exact) + / math.log(max_distance / max_exact) + * (num_buckets - max_exact) + ).to(torch.long) + relative_postion_if_large = torch.min( + relative_postion_if_large, torch.full_like(relative_postion_if_large, num_buckets - 1) + ) + + relative_buckets += torch.where(is_small, relative_positions, relative_postion_if_large) + return relative_buckets + + def compute_bias(self, query_length, key_length): + context_position = torch.arange(query_length, dtype=torch.long)[:, None] + memory_position = torch.arange(key_length, dtype=torch.long)[None, :] + relative_position = memory_position - context_position + relative_position_bucket = self._relative_positions_bucket(relative_position, bidirectional=True) + relative_position_bucket = relative_position_bucket.to(self.relative_attention_bias.weight.device) + values = self.relative_attention_bias(relative_position_bucket) + values = values.permute([2, 0, 1]) + return values + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + position_bias: Optional[Tensor] = None, + ) -> Tuple[Tensor, Optional[Tensor], Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + is_tpu = query.device.type == "xla" + + tgt_len, bsz, embed_dim = query.size() + src_len = tgt_len + assert embed_dim == self.embed_dim + assert list(query.size()) == [tgt_len, bsz, embed_dim] + if key is not None: + src_len, key_bsz, _ = key.size() + if not torch.jit.is_scripting(): + assert key_bsz == bsz + assert value is not None + assert src_len, bsz == value.shape[:2] + + if self.has_relative_attention_bias and position_bias is None: + position_bias = self.compute_bias(tgt_len, src_len) + position_bias = position_bias.unsqueeze(0).repeat(bsz, 1, 1, 1).view(bsz * self.num_heads, tgt_len, src_len) + + if ( + not is_tpu # don't use PyTorch version on TPUs + and incremental_state is None + and not static_kv + # A workaround for quantization to work. Otherwise JIT compilation + # treats bias in linear module as method. + and not torch.jit.is_scripting() + and self.q_head_dim == self.head_dim + ): + assert key is not None and value is not None + assert attn_mask is None + + attn_mask_rel_pos = None + if position_bias is not None: + attn_mask_rel_pos = position_bias + if self.gru_rel_pos: + query_layer = query.transpose(0, 1) + new_x_shape = query_layer.size()[:-1] + (self.num_heads, -1) + query_layer = query_layer.view(*new_x_shape) + query_layer = query_layer.permute(0, 2, 1, 3) + _B, _H, _L, __ = query_layer.size() + + gate_a, gate_b = torch.sigmoid( + self.grep_linear(query_layer).view(_B, _H, _L, 2, 4).sum(-1, keepdim=False) + ).chunk(2, dim=-1) + gate_a_1 = gate_a * (gate_b * self.grep_a - 1.0) + 2.0 + attn_mask_rel_pos = gate_a_1.view(bsz * self.num_heads, -1, 1) * position_bias + + attn_mask_rel_pos = attn_mask_rel_pos.view((-1, tgt_len, tgt_len)) + k_proj_bias = self.k_proj.bias + if k_proj_bias is None: + k_proj_bias = torch.zeros_like(self.q_proj.bias) + + x, attn = F.multi_head_attention_forward( + query, + key, + value, + self.embed_dim, + self.num_heads, + torch.empty([0]), + torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), + self.bias_k, + self.bias_v, + self.add_zero_attn, + self.dropout_module.p, + self.out_proj.weight, + self.out_proj.bias, + self.training, + # self.training or self.dropout_module.apply_during_inference, + key_padding_mask, + need_weights, + attn_mask_rel_pos, + use_separate_proj_weight=True, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + ) + return x, attn, position_bias + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + + q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.q_head_dim).transpose(0, 1) + if k is not None: + k = k.contiguous().view(-1, bsz * self.num_heads, self.k_head_dim).transpose(0, 1) + if v is not None: + v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + src_len = k.size(1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + assert k.size(1) == src_len + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask), + ], + dim=1, + ) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + if not is_tpu: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if before_softmax: + return attn_weights, v, position_bias + + if position_bias is not None: + if self.gru_rel_pos == 1: + query_layer = q.view(bsz, self.num_heads, tgt_len, self.q_head_dim) + _B, _H, _L, __ = query_layer.size() + gate_a, gate_b = torch.sigmoid( + self.grep_linear(query_layer).view(_B, _H, _L, 2, 4).sum(-1, keepdim=False) + ).chunk(2, dim=-1) + gate_a_1 = gate_a * (gate_b * self.grep_a - 1.0) + 2.0 + position_bias = gate_a_1.view(bsz * self.num_heads, -1, 1) * position_bias + + position_bias = position_bias.view(attn_weights.size()) + + attn_weights = attn_weights + position_bias + + attn_weights_float = F.softmax(attn_weights, dim=-1) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + attn_weights = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights, position_bias + + @staticmethod + def _append_prev_key_padding_mask( + key_padding_mask: Optional[Tensor], + prev_key_padding_mask: Optional[Tensor], + batch_size: int, + src_len: int, + static_kv: bool, + ) -> Optional[Tensor]: + # saved key padding masks have shape (bsz, seq_len) + if prev_key_padding_mask is not None and static_kv: + new_key_padding_mask = prev_key_padding_mask + elif prev_key_padding_mask is not None and key_padding_mask is not None: + new_key_padding_mask = torch.cat([prev_key_padding_mask.float(), key_padding_mask.float()], dim=1) + # During incremental decoding, as the padding token enters and + # leaves the frame, there will be a time when prev or current + # is None + elif prev_key_padding_mask is not None: + if src_len > prev_key_padding_mask.size(1): + filler = torch.zeros( + (batch_size, src_len - prev_key_padding_mask.size(1)), + device=prev_key_padding_mask.device, + ) + new_key_padding_mask = torch.cat([prev_key_padding_mask.float(), filler.float()], dim=1) + else: + new_key_padding_mask = prev_key_padding_mask.float() + elif key_padding_mask is not None: + if src_len > key_padding_mask.size(1): + filler = torch.zeros( + (batch_size, src_len - key_padding_mask.size(1)), + device=key_padding_mask.device, + ) + new_key_padding_mask = torch.cat([filler.float(), key_padding_mask.float()], dim=1) + else: + new_key_padding_mask = key_padding_mask.float() + else: + new_key_padding_mask = prev_key_padding_mask + return new_key_padding_mask + + def _get_input_buffer( + self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] + ) -> Dict[str, Optional[Tensor]]: + result = self.get_incremental_state(incremental_state, "attn_state") + if result is not None: + return result + else: + empty_result: Dict[str, Optional[Tensor]] = {} + return empty_result + + def _set_input_buffer( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + buffer: Dict[str, Optional[Tensor]], + ): + return self.set_incremental_state(incremental_state, "attn_state", buffer) + + def apply_sparse_mask(self, attn_weights, tgt_len: int, src_len: int, bsz: int): + return attn_weights diff --git a/TTS/vc/modules/freevc/wavlm/wavlm.py b/TTS/vc/modules/freevc/wavlm/wavlm.py new file mode 100644 index 0000000..fc93bd4 --- /dev/null +++ b/TTS/vc/modules/freevc/wavlm/wavlm.py @@ -0,0 +1,719 @@ +# -------------------------------------------------------- +# WavLM: Large-Scale Self-Supervised Pre-training for Full Stack Speech Processing (https://arxiv.org/abs/2110.13900.pdf) +# Github source: https://github.com/microsoft/unilm/tree/master/wavlm +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import logging +import math +from typing import List, Optional, Tuple + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import LayerNorm + +from TTS.vc.modules.freevc.wavlm.modules import ( + Fp32GroupNorm, + Fp32LayerNorm, + GLU_Linear, + GradMultiply, + MultiheadAttention, + SamePad, + TransposeLast, + get_activation_fn, + init_bert_params, +) + +logger = logging.getLogger(__name__) + + +def compute_mask_indices( + shape: Tuple[int, int], + padding_mask: Optional[torch.Tensor], + mask_prob: float, + mask_length: int, + mask_type: str = "static", + mask_other: float = 0.0, + min_masks: int = 0, + no_overlap: bool = False, + min_space: int = 0, +) -> np.ndarray: + """ + Computes random mask spans for a given shape + + Args: + shape: the the shape for which to compute masks. + should be of size 2 where first element is batch size and 2nd is timesteps + padding_mask: optional padding mask of the same size as shape, which will prevent masking padded elements + mask_prob: probability for each token to be chosen as start of the span to be masked. this will be multiplied by + number of timesteps divided by length of mask span to mask approximately this percentage of all elements. + however due to overlaps, the actual number will be smaller (unless no_overlap is True) + mask_type: how to compute mask lengths + static = fixed size + uniform = sample from uniform distribution [mask_other, mask_length*2] + normal = sample from normal distribution with mean mask_length and stdev mask_other. mask is min 1 element + poisson = sample from possion distribution with lambda = mask length + min_masks: minimum number of masked spans + no_overlap: if false, will switch to an alternative recursive algorithm that prevents spans from overlapping + min_space: only used if no_overlap is True, this is how many elements to keep unmasked between spans + """ + + bsz, all_sz = shape + mask = np.full((bsz, all_sz), False) + + all_num_mask = int( + # add a random number for probabilistic rounding + mask_prob * all_sz / float(mask_length) + + np.random.rand() + ) + + all_num_mask = max(min_masks, all_num_mask) + + mask_idcs = [] + for i in range(bsz): + if padding_mask is not None: + sz = all_sz - padding_mask[i].long().sum().item() + num_mask = int( + # add a random number for probabilistic rounding + mask_prob * sz / float(mask_length) + + np.random.rand() + ) + num_mask = max(min_masks, num_mask) + else: + sz = all_sz + num_mask = all_num_mask + + if mask_type == "static": + lengths = np.full(num_mask, mask_length) + elif mask_type == "uniform": + lengths = np.random.randint(mask_other, mask_length * 2 + 1, size=num_mask) + elif mask_type == "normal": + lengths = np.random.normal(mask_length, mask_other, size=num_mask) + lengths = [max(1, int(round(x))) for x in lengths] + elif mask_type == "poisson": + lengths = np.random.poisson(mask_length, size=num_mask) + lengths = [int(round(x)) for x in lengths] + else: + raise Exception("unknown mask selection " + mask_type) + + if sum(lengths) == 0: + lengths[0] = min(mask_length, sz - 1) + + if no_overlap: + mask_idc = [] + + def arrange(s, e, length, keep_length): + span_start = np.random.randint(s, e - length) + mask_idc.extend(span_start + i for i in range(length)) + + new_parts = [] + if span_start - s - min_space >= keep_length: + new_parts.append((s, span_start - min_space + 1)) + if e - span_start - keep_length - min_space > keep_length: + new_parts.append((span_start + length + min_space, e)) + return new_parts + + parts = [(0, sz)] + min_length = min(lengths) + for length in sorted(lengths, reverse=True): + lens = np.fromiter( + (e - s if e - s >= length + min_space else 0 for s, e in parts), + np.int, + ) + l_sum = np.sum(lens) + if l_sum == 0: + break + probs = lens / np.sum(lens) + c = np.random.choice(len(parts), p=probs) + s, e = parts.pop(c) + parts.extend(arrange(s, e, length, min_length)) + mask_idc = np.asarray(mask_idc) + else: + min_len = min(lengths) + if sz - min_len <= num_mask: + min_len = sz - num_mask - 1 + + mask_idc = np.random.choice(sz - min_len, num_mask, replace=False) + + mask_idc = np.asarray([mask_idc[j] + offset for j in range(len(mask_idc)) for offset in range(lengths[j])]) + + mask_idcs.append(np.unique(mask_idc[mask_idc < sz])) + + min_len = min([len(m) for m in mask_idcs]) + for i, mask_idc in enumerate(mask_idcs): + if len(mask_idc) > min_len: + mask_idc = np.random.choice(mask_idc, min_len, replace=False) + mask[i, mask_idc] = True + + return mask + + +class WavLMConfig: + def __init__(self, cfg=None): + self.extractor_mode: str = "default" # mode for feature extractor. default has a single group norm with d groups in the first conv block, whereas layer_norm has layer norms in every block (meant to use with normalize=True) + self.encoder_layers: int = 12 # num encoder layers in the transformer + + self.encoder_embed_dim: int = 768 # encoder embedding dimension + self.encoder_ffn_embed_dim: int = 3072 # encoder embedding dimension for FFN + self.encoder_attention_heads: int = 12 # num encoder attention heads + self.activation_fn: str = "gelu" # activation function to use + + self.layer_norm_first: bool = False # apply layernorm first in the transformer + self.conv_feature_layers: str = "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2" # string describing convolutional feature extraction layers in form of a python list that contains [(dim, kernel_size, stride), ...] + self.conv_bias: bool = False # include bias in conv encoder + self.feature_grad_mult: float = 1.0 # multiply feature extractor var grads by this + + self.normalize: bool = False # normalize input to have 0 mean and unit variance during training + + # dropouts + self.dropout: float = 0.1 # dropout probability for the transformer + self.attention_dropout: float = 0.1 # dropout probability for attention weights + self.activation_dropout: float = 0.0 # dropout probability after activation in FFN + self.encoder_layerdrop: float = 0.0 # probability of dropping a tarnsformer layer + self.dropout_input: float = 0.0 # dropout to apply to the input (after feat extr) + self.dropout_features: float = 0.0 # dropout to apply to the features (after feat extr) + + # masking + self.mask_length: int = 10 # mask length + self.mask_prob: float = 0.65 # probability of replacing a token with mask + self.mask_selection: str = "static" # how to choose mask length + self.mask_other: float = ( + 0 # secondary mask argument (used for more complex distributions), see help in compute_mask_indicesh + ) + self.no_mask_overlap: bool = False # whether to allow masks to overlap + self.mask_min_space: int = 1 # min space between spans (if no overlap is enabled) + + # channel masking + self.mask_channel_length: int = 10 # length of the mask for features (channels) + self.mask_channel_prob: float = 0.0 # probability of replacing a feature with 0 + self.mask_channel_selection: str = "static" # how to choose mask length for channel masking + self.mask_channel_other: float = ( + 0 # secondary mask argument (used for more complex distributions), see help in compute_mask_indices + ) + self.no_mask_channel_overlap: bool = False # whether to allow channel masks to overlap + self.mask_channel_min_space: int = 1 # min space between spans (if no overlap is enabled) + + # positional embeddings + self.conv_pos: int = 128 # number of filters for convolutional positional embeddings + self.conv_pos_groups: int = 16 # number of groups for convolutional positional embedding + + # relative position embedding + self.relative_position_embedding: bool = False # apply relative position embedding + self.num_buckets: int = 320 # number of buckets for relative position embedding + self.max_distance: int = 1280 # maximum distance for relative position embedding + self.gru_rel_pos: bool = False # apply gated relative position embedding + + if cfg is not None: + self.update(cfg) + + def update(self, cfg: dict): + self.__dict__.update(cfg) + + +class WavLM(nn.Module): + def __init__( + self, + cfg: WavLMConfig, + ) -> None: + super().__init__() + logger.info(f"WavLM Config: {cfg.__dict__}") + + self.cfg = cfg + feature_enc_layers = eval(cfg.conv_feature_layers) + self.embed = feature_enc_layers[-1][0] + + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + mode=cfg.extractor_mode, + conv_bias=cfg.conv_bias, + ) + + self.post_extract_proj = ( + nn.Linear(self.embed, cfg.encoder_embed_dim) if self.embed != cfg.encoder_embed_dim else None + ) + + self.mask_prob = cfg.mask_prob + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length = cfg.mask_length + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + + self.mask_emb = nn.Parameter(torch.FloatTensor(cfg.encoder_embed_dim).uniform_()) + + self.encoder = TransformerEncoder(cfg) + self.layer_norm = LayerNorm(self.embed) + + def apply_mask(self, x, padding_mask): + B, T, C = x.shape + if self.mask_prob > 0: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = torch.from_numpy(mask_channel_indices).to(x.device).unsqueeze(1).expand(-1, T, -1) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def forward_padding_mask( + self, + features: torch.Tensor, + padding_mask: torch.Tensor, + ) -> torch.Tensor: + extra = padding_mask.size(1) % features.size(1) + if extra > 0: + padding_mask = padding_mask[:, :-extra] + padding_mask = padding_mask.view(padding_mask.size(0), features.size(1), -1) + # padding_mask = padding_mask.all(-1) + padding_mask = padding_mask.any(-1) + return padding_mask + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + ret_layer_results: bool = False, + ): + if self.feature_grad_mult > 0: + features = self.feature_extractor(source) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = self.feature_extractor(source) + + features = features.transpose(1, 2) + features = self.layer_norm(features) + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + + if mask: + x, mask_indices = self.apply_mask(features, padding_mask) + else: + x = features + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, layer_results = self.encoder( + x, padding_mask=padding_mask, layer=None if output_layer is None else output_layer - 1 + ) + + res = {"x": x, "padding_mask": padding_mask, "features": features, "layer_results": layer_results} + + feature = res["features"] if ret_conv else res["x"] + if ret_layer_results: + feature = (feature, res["layer_results"]) + return feature, res["padding_mask"] + + +class ConvFeatureExtractionModel(nn.Module): + def __init__( + self, + conv_layers: List[Tuple[int, int, int]], + dropout: float = 0.0, + mode: str = "default", + conv_bias: bool = False, + conv_type: str = "default", + ): + super().__init__() + + assert mode in {"default", "layer_norm"} + + def block( + n_in, + n_out, + k, + stride, + is_layer_norm=False, + is_group_norm=False, + conv_bias=False, + ): + def make_conv(): + conv = nn.Conv1d(n_in, n_out, k, stride=stride, bias=conv_bias) + nn.init.kaiming_normal_(conv.weight) + return conv + + assert (is_layer_norm and is_group_norm) == False, "layer norm and group norm are exclusive" + + if is_layer_norm: + return nn.Sequential( + make_conv(), + nn.Dropout(p=dropout), + nn.Sequential( + TransposeLast(), + Fp32LayerNorm(dim, elementwise_affine=True), + TransposeLast(), + ), + nn.GELU(), + ) + elif is_group_norm: + return nn.Sequential( + make_conv(), + nn.Dropout(p=dropout), + Fp32GroupNorm(dim, dim, affine=True), + nn.GELU(), + ) + else: + return nn.Sequential(make_conv(), nn.Dropout(p=dropout), nn.GELU()) + + self.conv_type = conv_type + if self.conv_type == "default": + in_d = 1 + self.conv_layers = nn.ModuleList() + for i, cl in enumerate(conv_layers): + assert len(cl) == 3, "invalid conv definition: " + str(cl) + (dim, k, stride) = cl + + self.conv_layers.append( + block( + in_d, + dim, + k, + stride, + is_layer_norm=mode == "layer_norm", + is_group_norm=mode == "default" and i == 0, + conv_bias=conv_bias, + ) + ) + in_d = dim + elif self.conv_type == "conv2d": + in_d = 1 + self.conv_layers = nn.ModuleList() + for i, cl in enumerate(conv_layers): + assert len(cl) == 3 + (dim, k, stride) = cl + + self.conv_layers.append(torch.nn.Conv2d(in_d, dim, k, stride)) + self.conv_layers.append(torch.nn.ReLU()) + in_d = dim + elif self.conv_type == "custom": + in_d = 1 + idim = 80 + self.conv_layers = nn.ModuleList() + for i, cl in enumerate(conv_layers): + assert len(cl) == 3 + (dim, k, stride) = cl + self.conv_layers.append(torch.nn.Conv2d(in_d, dim, k, stride, padding=1)) + self.conv_layers.append(torch.nn.LayerNorm([dim, idim])) + self.conv_layers.append(torch.nn.ReLU()) + in_d = dim + if (i + 1) % 2 == 0: + self.conv_layers.append(torch.nn.MaxPool2d(2, stride=2, ceil_mode=True)) + idim = int(math.ceil(idim / 2)) + else: + pass + + def forward(self, x, mask=None): + # BxT -> BxCxT + x = x.unsqueeze(1) + if self.conv_type == "custom": + for conv in self.conv_layers: + if isinstance(conv, nn.LayerNorm): + x = x.transpose(1, 2) + x = conv(x).transpose(1, 2) + else: + x = conv(x) + x = x.transpose(2, 3).contiguous() + x = x.view(x.size(0), -1, x.size(-1)) + else: + for conv in self.conv_layers: + x = conv(x) + if self.conv_type == "conv2d": + b, c, t, f = x.size() + x = x.transpose(2, 3).contiguous().view(b, c * f, t) + return x + + +class TransformerEncoder(nn.Module): + def __init__(self, args): + super().__init__() + + self.dropout = args.dropout + self.embedding_dim = args.encoder_embed_dim + + self.pos_conv = nn.Conv1d( + self.embedding_dim, + self.embedding_dim, + kernel_size=args.conv_pos, + padding=args.conv_pos // 2, + groups=args.conv_pos_groups, + ) + dropout = 0 + std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim)) + nn.init.normal_(self.pos_conv.weight, mean=0, std=std) + nn.init.constant_(self.pos_conv.bias, 0) + + self.pos_conv = nn.utils.parametrizations.weight_norm(self.pos_conv, name="weight", dim=2) + self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU()) + + if hasattr(args, "relative_position_embedding"): + self.relative_position_embedding = args.relative_position_embedding + self.num_buckets = args.num_buckets + self.max_distance = args.max_distance + else: + self.relative_position_embedding = False + self.num_buckets = 0 + self.max_distance = 0 + + self.layers = nn.ModuleList( + [ + TransformerSentenceEncoderLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=self.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + has_relative_attention_bias=(self.relative_position_embedding and i == 0), + num_buckets=self.num_buckets, + max_distance=self.max_distance, + gru_rel_pos=args.gru_rel_pos, + ) + for i in range(args.encoder_layers) + ] + ) + + self.layer_norm_first = args.layer_norm_first + self.layer_norm = LayerNorm(self.embedding_dim) + self.layerdrop = args.encoder_layerdrop + + self.apply(init_bert_params) + + def forward(self, x, padding_mask=None, streaming_mask=None, layer=None): + x, layer_results = self.extract_features(x, padding_mask, streaming_mask, layer) + + if self.layer_norm_first and layer is None: + x = self.layer_norm(x) + + return x, layer_results + + def extract_features(self, x, padding_mask=None, streaming_mask=None, tgt_layer=None): + if padding_mask is not None: + x[padding_mask] = 0 + + x_conv = self.pos_conv(x.transpose(1, 2)) + x_conv = x_conv.transpose(1, 2) + x += x_conv + + if not self.layer_norm_first: + x = self.layer_norm(x) + + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + layer_results = [] + z = None + if tgt_layer is not None: + layer_results.append((x, z)) + r = None + pos_bias = None + for i, layer in enumerate(self.layers): + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, z, pos_bias = layer( + x, + self_attn_padding_mask=padding_mask, + need_weights=False, + self_attn_mask=streaming_mask, + pos_bias=pos_bias, + ) + if tgt_layer is not None: + layer_results.append((x, z)) + if i == tgt_layer: + r = x + break + + if r is not None: + x = r + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + return x, layer_results + + +class TransformerSentenceEncoderLayer(nn.Module): + """ + Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained + models. + """ + + def __init__( + self, + embedding_dim: float = 768, + ffn_embedding_dim: float = 3072, + num_attention_heads: float = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + layer_norm_first: bool = False, + has_relative_attention_bias: bool = False, + num_buckets: int = 0, + max_distance: int = 0, + rescale_init: bool = False, + gru_rel_pos: bool = False, + ) -> None: + super().__init__() + # Initialize parameters + self.embedding_dim = embedding_dim + self.dropout = dropout + self.activation_dropout = activation_dropout + + # Initialize blocks + self.activation_name = activation_fn + self.activation_fn = get_activation_fn(activation_fn) + self.self_attn = MultiheadAttention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + self_attention=True, + has_relative_attention_bias=has_relative_attention_bias, + num_buckets=num_buckets, + max_distance=max_distance, + rescale_init=rescale_init, + gru_rel_pos=gru_rel_pos, + ) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(self.activation_dropout) + self.dropout3 = nn.Dropout(dropout) + + self.layer_norm_first = layer_norm_first + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = LayerNorm(self.embedding_dim) + + if self.activation_name == "glu": + self.fc1 = GLU_Linear(self.embedding_dim, ffn_embedding_dim, "swish") + else: + self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim) + self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim) + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = LayerNorm(self.embedding_dim) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + pos_bias=None, + ): + """ + LayerNorm is applied either before or after the self-attention/ffn + modules similar to the original Transformer imlementation. + """ + residual = x + + if self.layer_norm_first: + x = self.self_attn_layer_norm(x) + x, attn, pos_bias = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + need_weights=False, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + x = self.dropout1(x) + x = residual + x + + residual = x + x = self.final_layer_norm(x) + if self.activation_name == "glu": + x = self.fc1(x) + else: + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + else: + x, attn, pos_bias = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + need_weights=need_weights, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + + x = self.dropout1(x) + x = residual + x + + x = self.self_attn_layer_norm(x) + + residual = x + if self.activation_name == "glu": + x = self.fc1(x) + else: + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + x = self.final_layer_norm(x) + + return x, attn, pos_bias diff --git a/TTS/vocoder/README.md b/TTS/vocoder/README.md new file mode 100644 index 0000000..b9fb17c --- /dev/null +++ b/TTS/vocoder/README.md @@ -0,0 +1,39 @@ +# Mozilla TTS Vocoders (Experimental) + +Here there are vocoder model implementations which can be combined with the other TTS models. + +Currently, following models are implemented: + +- Melgan +- MultiBand-Melgan +- ParallelWaveGAN +- GAN-TTS (Discriminator Only) + +It is also very easy to adapt different vocoder models as we provide a flexible and modular (but not too modular) framework. + +## Training a model + +You can see here an example (Soon)[Colab Notebook]() training MelGAN with LJSpeech dataset. + +In order to train a new model, you need to gather all wav files into a folder and give this folder to `data_path` in '''config.json''' + +You need to define other relevant parameters in your ```config.json``` and then start traning with the following command. + +```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --config_path path/to/config.json``` + +Example config files can be found under `tts/vocoder/configs/` folder. + +You can continue a previous training run by the following command. + +```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --continue_path path/to/your/model/folder``` + +You can fine-tune a pre-trained model by the following command. + +```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --restore_path path/to/your/model.pth``` + +Restoring a model starts a new training in a different folder. It only restores model weights with the given checkpoint file. However, continuing a training starts from the same directory where the previous training run left off. + +You can also follow your training runs on Tensorboard as you do with our TTS models. + +## Acknowledgement +Thanks to @kan-bayashi for his [repository](https://github.com/kan-bayashi/ParallelWaveGAN) being the start point of our work. diff --git a/TTS/vocoder/__init__.py b/TTS/vocoder/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/vocoder/__pycache__/__init__.cpython-311.pyc b/TTS/vocoder/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6bac699d605fe351ecd4be673568cad2d3573a0 GIT binary patch literal 173 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%RxW1IJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`*JKN-j_(vOeN%*!l^ pkJl@x{Ka9Do1apelWJGQ3N#R8WidaH_`uA_$oPQ)Miemv#Q+YLDW3oU literal 0 HcmV?d00001 diff --git a/TTS/vocoder/configs/__init__.py b/TTS/vocoder/configs/__init__.py new file mode 100644 index 0000000..b5e11b9 --- /dev/null +++ b/TTS/vocoder/configs/__init__.py @@ -0,0 +1,17 @@ +import importlib +import os +from inspect import isclass + +# import all files under configs/ +configs_dir = os.path.dirname(__file__) +for file in os.listdir(configs_dir): + path = os.path.join(configs_dir, file) + if not file.startswith("_") and not file.startswith(".") and (file.endswith(".py") or os.path.isdir(path)): + config_name = file[: file.find(".py")] if file.endswith(".py") else file + module = importlib.import_module("TTS.vocoder.configs." + config_name) + for attribute_name in dir(module): + attribute = getattr(module, attribute_name) + + if isclass(attribute): + # Add the class to this package's variables + globals()[attribute_name] = attribute diff --git a/TTS/vocoder/configs/__pycache__/__init__.cpython-311.pyc b/TTS/vocoder/configs/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b04c0b64312d47c97c9514ab3c03919c293b420 GIT binary patch literal 1371 zcma)5L1@!Z7=CHmrdifzb*@gw97A!d+6E?Wm*QZK;RMIPZD3GB*SvPw(xl{NO6O%4 zp^U;R2v!m7X-;<#yy!(3Ui6qqzz{EjCr=i<$WFerjkE>9*W~^G|NY=t>DaKAOew)0pGL)PvF=m`|>1D5&#nGey^++@Vs^lgL?!3 z2l#C+p7Oa>AuonhXMPsk`N`8Ax{XdfzGoo?={?-v0e+i{_d~kIcu~6*$<8y-`+qY1 zUCe#V>}}Wbd@<0)Jk`;2*yFWZ*rl!&|5sfbhO|gvcBEa;^SgU_;5+H1fP$-d7S9~4 z_j#ZggzOd$I&Z*7G3ywNcB1%uoa6g(<~WSP{;r7+V4Q>lofTRbx_E34pK#zE31EK; z`{l86T_kRTIQ7HLEW`iATIOBGar8C@f>mg6s^^)-hQB~3}^p%W5>tRzE0aF~{YfKv`7OGtMe zzngXZ>#C$UJxEV$I=Uxefe=*0rWcSQd6h|tguH~SFss_-)rJB85(MGVa`>#V;yw^#BAZ*ZQc#jR}?Z(dr zLq_LQ4+1t?&h4r@YIV(wj9HPfZN~16m1fGIz0O^@gZR>E}Jte z*33!+Gx*yUpZ>zyZ<+3UzS5vAIl_kJ* zOoR^{IC5(r`atx7i3#S=KVg1?d{fkkh&lNt0#1FJ?`4&>A=aSf(o?<6_cC9;H8cIM zJ$u?2KH1FonVD|J{zu66r!{(ceH|}9GLxAxo8@AuST3H5$Cw#66Lun}rL-6mu{gWJ zOzklN(wWXZNIYDva}ou=U%Y1@!8XvtW@Hn(Eb5evF6 z7*bq0fBlx5c1_`Z<2o5Do34z5m+cu|Z-C`TCRi$FvQ+#gZcuEs3qSZ`%4N_A} zwJEHHus9Pt%+|;3KW)#!`GO`?pVUs#4vrm)V`plY;%KjM>}tZXTXF1G9D7oG#9r|4 zRai%=Lt*CBsIO09gHd0< z!iJ*00fikPOs#jgiS&a?`Vqx(Ff}9&KqjuR(bTZQ#uRoaHKMTbsPCY{4lC?PiYshF zVUwv*g(Ves6qY^~bYCvm_KM+{3&PGCPH0JE=U&Dxij3t5$@7M1pGyyoT;3LELyTJH=jVCGb8~!YX>n?sIISE;ax0g2JztI4D1#bq zdw`Ih|FJ3MWHOxXyqA?{D{MJ9rp+g%?fNqeaU^7FJ~h4={cpp<*AAQrX|y!m9refcRjsU z9Dc(Q+|8)D>ZqV$($sRTsuZJ~I;z)EF4u`snUmHx5lW)xAR|ODY|+SCA+H-U6j4{^ zmVDB*(th$RUxGgB$RfQ|z2ZHR{4lnHJfWvo4aX6-96QTrPao%FIpbkn+sxcaw3_Y% zM=I~pGe#OZBl^FiCwlJubGagTRye|YuU;z@kxrYb6iU2wUwCWWh1EUoxXy_hkB$~< z$;zyxU3j9jkn-R-hU5mX%mZ(5>r}y)BSTb3YMf)m@YAchv>pjQX*p1I-6R*Nm#bdo zqHsdrGvJ3#c8Y&zcv(n$->?gaPV_@i4?y5&tzLrJ(^Yqr;+}2`r`dGp3#R3ADY7}? z_#8x8e>IGa&z<ufJPx)-{gwx+6@X9jU)Ke_-4f$cDKo6(Av*1%FVKMh4juo{|G)lvC==n-PXz@Pw>4){<8c^>U|; zG?xES@>)*h0Gya;(kQ=#Qc#+!OnS`+k%KS!&W#%jTzDQ5yA>uRxh8#)<9|87z#ClO zap4@~d|WtgA-nn^@ui>fb=#FlCLg16k_JLAE?r*YipxbFM)tJrwa^(?*7%=Mi)>ri zx1oLtRh1k+C`Vz1ii=M2e3B?KWDKPV6{$)9Zz9OhV2BVMu>`9T~yST)ysx08-juSSI?=m`6)g{>be}dK3^biYIv7>!FC_*Rrahd z^CJDpw)<|D6QA$7S?sIcDautVW4%L`2IW-3tg&=gBdJNTMslrafBAT}D;Zs0Y~{J1~kVDh1eTArV$mnKIuB0h{HI zFDPCU6$Tv*hn%2A1yykH11F83tM23w9x&=|2{asl>89XyCjf3SVrD$c%*4+zQ~N1_ z^J&ZVt$pa(!YP?06I(>?QG)`>M6jpsd=c!fJy`^uZ=Wds%{*MBu-EvqUvEv{lsMH+ zuNiW6`l66`eK$Y-jhoA?-MluPE=b?aVe4oLIgP`^baZr`j*hO=4LZ@o_Nn~ZuRFfB zjogZ9%s(2Y!*b)nc51u(GK7$Q0PF0RvHzxv#`n>~?-u#pb>{V;50@<+W_aL?jzw$1 zU=y`Q$v@cFc<2Z^D)H9D0Hl0n0MhD@49br|`M5m#DGv2(*OS`T4)l1#09!Bu^F{~| zHSUM@74r2bwqUPQ$m!VH>MORJzAKfsLqLo1dI`{yKcJd#K_a^VpqhWsU-9OxBEOAi zJyc83+pwx={T?nZSkPWQURjKzp)EQmSj9GAJ1Ym0rC|SC2ZUfJHL1OKu!mkNW$SX# zqwt#jq{_(91ITJOb3rGygn4m$gBNsBTSG1iS!;v8d3+3FS=;7;k>Wqc8sa3Guo%L_ zVlbE20cwQHL;P%&3CyVm>#wk&^$WmFU}1_@kE1s%GMyAfD5;z5bD_z=sd8eu2D*|T-oF(uF0+cCL z4hq9osfAidx@BS5Z5M zXHz>@nmGQfQ0l*1?7v%1#3x%{F@QG&irUCCt<2&rtqPz6Z>{XW)cTF5i$(1Oo=xpU z>Coh}Q>Fgf#s1siA1D5C;vWb9;Ks#9{&vS4SL{aOD%b8HVk80#S-c6+p2Z=LL*8bnm*JpzdiY@lIVGNR~tTBxpW6b*eF zJ=KPBkPdEKsLH1S>Kcsdq4TxQK-acKIL>UyK# Os{}Y654_EzmiK=zQm!=s literal 0 HcmV?d00001 diff --git a/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-311.pyc b/TTS/vocoder/configs/__pycache__/hifigan_config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a3db592e614ba2122b1d91cd8fe958a02b24287b GIT binary patch literal 6975 zcmcIp&2QYs73c19^=ZYH^=ZkDMzS1BiL%lvmg*R(QrofP6qcM;a*A${P}FjE#fD3+ zNv>_V2%tj_xfJj%I^-ba;G}R5xfJMM(DUlHQZkoJ5S}zr zB{?T2B%Vl0?@C(gGf9)V{2(Ps5ApiH2lza!BT_D{rB|itwtcFZPI{f1>Z*A|bsR5U z(7B;aC%pbks>9#CxblIOw=`~Fw#Q0li+7`cm*~-*JHmT_9SN|P z1lA+4mvTo1b~I=^Ca_~cTd%-+1MIlKjti_W*T?%YW`BSU2y8%LgSkP04FzpO0vi_C ziQKTjMg%sRJ0Y->go(ABYO$6Pv6h!bkFngS=)nXwo;xY934xu)woH0G*K`P5HGQG0 z|2QT|@9=_da)*^wTP<;y+YT#OHoICej7zGiv1M*xv)FBaw=!K_48JRP)l%8u7krFr zmX?-S!L~|lZEZDEC(et#b(mf%Teiz$lZ$ccJ$*v<%Q(r@v_JkuyXag9=}XWWgH1WE zJsnljK+B@R#41?D;hI>6!wsHyb+gFaP0nn+xal%~0M%u>sk^#r==W4tx6BNKDAbDK zI?T1$`lc_b^-MUiraO6CFX^W0TDH>22D@Q$W);L7F)GNCWU}5U6w%SiysE?~*JIO% zbJDub2COu92#Jc^e|^ z!)OH)m9isrSt+P_?5i_zfqi&>hT)mV^U=&9JwD;K$qmKP?{U#ByEwz>J%{Jej)CGS z`AyZt2$*sqJKtg^n>!nZmH$L(p6QJl_6y9FZ8b~SNz)CX*b##tW@i|>E_^h@S``l+ z&D|<w1c{FY*Hv(?&kkD@%ikV^U>60 zRa2Zz@^LNJdQsEhXDm}S;AByTGg8ZCLx<78(U4zKefESNI!p|N^}EmxwjQi1?jP7( zG&kgc+Y&dtgCef3Dt%w`lELU81N4?Dv2uc~Y z1`d5P9X2i~)dB(qtiJ@xz`4e@x4>1m0!55()|y@6alZ3v9)mv@zq%PH0HaPcY2XTc zF38`9lfLJI$iz#!bK}M`YT7V!yq(%$nSpX+b$i`4~5?N^$d9>^pA3 zRSe64r+OZd6E|Rbb?xdJ6TK`1B!m;g*z%QeV@tR?!{Em6AbxVcB3e9?kIKddwo@T~ zUWgUP@91;m3sSfY)`GKTghriG0%Qb8A)b%364VmJFC-+|+hvX!R&Q0f3d6r5c8{b% zQhSy#NH>1mq}Qk?i{|;gRUu=n7#hoOS{8PX>moM#gwUwv)hoX2C}pZUFfl?9P;0-Q?DSKdoHaqvWipY`4`^1>sdzqT-z!l zrF<$Ylnp*b!<&Q?npsm`5EE`>RZ%7zstO!!jCj38B48?og6qfexp{=}`MGmQ0uY#6 zg~d&)OtA)G#P2abhYsfe7Uo)!-_c#wTHu8x&MIb1EgUR%Dg3pm8l%HDYoS7TUt!&1Dv%5E{5|;rE4^Doq^5hFlGt z{ZGqvOCG~{$^XCmY}Uw+Mk&1z)K4MNfVb#WH{U2dKTtdecfPy-`S@#$3k``~Q#?3y zZ|ea-gpV)){46re&Yh1mFy^8TQ@&O{>L+Bcg$TX&;N0w`g)w;pEzi+jU+f(2A3(j{ zI&HY?-qEn=)5NZ9|hP?QVEwc5^CvV_foA)rQ-lP?t`T1)IQC)-%g+#02Oq{W%p4<8o*0rr`x<^ z%_m%MAgpOTS9-%?b<_FO8wk&ou?|CFb^NI5^@I&{YW%ZgkAa)Vc%|=GW^X#UW18Jk zoz2;Ixbul?m1i$orNY+D_h$1I$F)j0&1l@2#T~_LaFH|{TqMobcg4pu<*n~K-ZIqE zhNdpv8@$$dx0R{auVYr{ZGdg5#%6Yp>>jC|d1LqE-H&Tiuhq`X?&fxLwW;hko!@kR z`zHPUO{c%=*=w~^?4YN8yz4*$*p_UDF>rl(RP%3%g8PCkZ?H)(f=GSU>!Dy81`kM` z(1uK1xWzT>55-)(^*q@-PBp%F3hVl)uE`_#T^mi%CdKiNg_8z8!yEp=yg`dSNF92A z;HJI47T$5+AmYI$E`pKvU#G^tMs9+Zn4xrnN*Ky83b=MoXh|)l$#~N+1_|cSA53eiAFublPmGIj9l)>w$CkOSC zdROSE4;bdK#|caTJWa9gq0M;(0E#s+XO5{_T>NyrZp5pYl2sg|0Le#p_bY@l*^d zA5oNB71an@+7(5!@`_@UbiEX=M!lnog1aT86?F69pou($-9tdQkx8PI>3+>VBZ_o@ zwP`u_Spri8rU|@C;57hT;ZxpY&rk*Jww)m`M}XX(y+nXa&HfbuGJ0V(zAVX4*#zi6 z56;)v)9r@81Nk?JbjN`tU?C|TAKkw4`5RSv1kav4^4-{CRUX>utIGH;?#YX_(aiRZ z`>RzsgJ)09)J{+BE*vD=PmsL=q&g?t9!UUC2w;$%cd!xZHUZFp(y%njwwLdptIA_| z_T;hJ#M!ERVh0uYp52qr)=rIA<^KCxRmOLGPadz0O;+W>9l0vwJGm!M)))#yJN;2b zbC-iQX^hb*JeubfjDKH-2aep~slV>;>;C$C z_2ZviT^$m>Z07Ho-hN5?FCmpL9#mefq4K??Nm|5^a*Ll`cg-DFDUy1tWRJA0oE_DL4ggW1_U-7^bHDZB3E*ko!(DPZHHwnn}H4nXZL)I&vtY$d@B#s zT;AXpe2iL_mX=t?wsLG^V|}(loEHA!Fg=&IY?p=#rxe3f7=1eS`#Or5q(AzyopmmR zRJ&W7`9x|6o`oNlVFWh=E(u$v}lRz^%wM+N_cn99O=-~?8k zDk?6agd{c2v8B4{ZN6QzcZZU^vbKGTG;`0l?U93)>S2e}i zrg(0$)@4mYR9U8KAdjGq&{OkyLx+M9RuqM*FP?A>hlzo(eiu%~HiK2whXb3h&JF!2 zrlkX_1%G`rTb5-sD4d(>mciK^xw@#?s;MCo1t)`8RcUlHUvSxd^&UsA%gu@a@yT@f zgP>G1$c6Bf94LcSQfJ-_SKWfmoqA(!*acoMcUnzj`5z>&ZUzp(h>0c*@{2D8Ww&tB z8!m`U{FCq8yt&G_Z6j^#enN6~9GByAzZl>{b62sX94sHojdud0UA z5DetMN=~HBkMSu|*Hz#3*(z~U!`s{ow)UgJZE4e~snlM-VDmq-4KVS90+H~?x0h{HIPbgj!6?$?1M&Y%HSn?*`bHVWX z>Mjiay`t{QFXJ{#*Zx<%0C$9jd)mc)f4lCjKSaeHh~v{-vK+=WaQ;ug>kL&i33D?tJT7`MJwhF0*s%#$3AK zxK<9wMvXgjxH!xOx6`@cb~+a*z(;5EJ3qI5X{fm^O``ymRI%WF_{8+z&>CC? z^kOTp_VCWJ;T?J74&b#@lQ>>`T~rNsrmNn7z-x|-BI#oNAgevEdA-!)r@qP=&g-ML zhIHez*2Z@l3kW}QZef;DR4QhHaiTv)8e$?Dvk!wtmtZCxuR&5WcJKfV2W0|_0LhsC zXSC}-VNiQcG9c5$z>X3zdu!`;ES&NRTTR2rNvic&h7+*{gPDjog2C`!3i zp6n<~02O2Qgenfqn8gVAUi`tU>GBhqP2T4Xzim?vIOU7pkvDcludm5wbYReUgAH~G zvaHuvYnq^OEt&Az6ouY~6vgXMD4Ys-NKw2)igLH08bMF1qG(oHQS5e_A%-_5uUk>@ z+JR#TU1}Y4b)w5AE8D|FB-6u+O~$q-2_&d7hBpBFIH6MnKBG37E=~3c!b#iq41pwp zlLS5|aEibg0%rm6>PqJ+`y4fVLEt=r3j}^gfWl3r8b3N)2_-3w;+G&Wh3`=4k<8NZYJ@?>dZ zswfZc$we8zQ~UB%iJ>yI*I#WI!*qfrnD)h3V((TtDvwZv0L0?sSS!G50)nMknh=BV>bxf)(Xl-=R3sycfhUFP>L$@Jv^P*~m;&kO-b$6Fk-{hSJ%s#XEgAPZ z!$*DY*p#S*FMEmF2en{doOQNwA=Q)swOfcY01705_-6I9s-nOkw~N@ s%@muJ7t-ZoeSx{6o#kjc5;^h?q8TELeva6B4$lSeH1|02$m%m4rY literal 0 HcmV?d00001 diff --git a/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-311.pyc b/TTS/vocoder/configs/__pycache__/multiband_melgan_config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60702af1b936c12a7308f8ea62802e17e1098116 GIT binary patch literal 9352 zcmc&4O>f-BmAgyHmSn}UW!aJ}$zy%5Bd4u!o@a(LjMlEpl*sNE@UG`V)N2tz0071y62!D1;WM0_T)d-+Sb8m#ej`7!JA~ zz2W1`n|brT=FQN5YHe*6;Fn2%l>S?_%&f8Os)sJ<>ELb5?=SQO%oTlgj(b(<8~mQ$K+ zS#CpG<(d+YxJRza7QZpO@Sc$}6mDKK^t75u1w+6?dGM#{D6+rG12a^YR8(?idc8IV;gdI*Erm@=*BW{k=8M~dv?jVYz$sRlYNAZ_-!W$8}%_pSlq`>5_XEPfn+~nulm>kVPifvNZ4t_NM4CLYZ;=oj1$E~ za+u^jN!V0!gs?M&O(#bQoAKM?gqZIYW~(c*uFP{S zBkP`F%5NVLgg1Fw)w#uTvMFb|%}tA?4U-YURgVA+nyq;jooH@m@SFQs&gL|J(Zle~ zrAwDs+BC9kX=yQ0?wlg~x0ssE8K&)xS<3{CS02G5U~km#IvV%pn>91m#egmgo~E%; z%QoYcmpI@&HR!M^mbbV<%d)t}Q?{ySn7zuGsb*Gf<_#d*Ox0Cel{NLgY^#QzU?3V9 zXTU`|}%h{uUUtXU~pp8n&CP(chyMH!`DVVWq|nS*0{OB3@{jz>4tu~${lVek-Cebp8m9oD{v(#cQ2b}qbr7?)wmqj+!J+4rjy z7ED&-@F77~rntKq&HZJFF|A_1l{k+^UHO=X>91eFw?k~hFmK&Dj=2#T{( zs~mLCeHB(OcCp-4v)Ef9RR#m>RB8>2v|%mUJa_Bes>Rpxu$v6t3GaYm3kq9Uz*dNf zkVHXd89AGkNh~uUtE{SV%Qj%I#ppE&);BRR#-{xGj9*Xt^^9MirTW8a4-Bozv)Pv+ z*pL;6*2R|hAY9xgL-;YQVSd7G`vYaEi;Y=V*+ubT=x zG4+(wkmSN1orRu$A{g+4yLXz)b{_V>u(HB?67rprA%g8~7gTlM02mVCq<&85ae=vM zFzHPjI_dC-wsZUTJmaPb`MByOk=BM~^DKLJcAnKF9l8O=fyT$UZsaqoyRi>r{O^Dp)WS+TS&<&^Ze4tV@Swr3QMgT2566~5Hw8Yb;NV#;+!WtJam@B zD2?DBheB(uK9OCQHLEI3M3+P_lCl)MtBoo4To-E%-TkJKo?XPFlqp!aN5+N5OV zcfda43H!D{rZ3^%=a>2q*BfnjN~zlj(rMdIV5Z>s zH8n8}=^z~2FdJMoa=3}X{^a$T!u)0$z{~_Ciz--gr5Q4tF*XVL4|ReJCzz|kt(RX0 zIe8Vkehcg~aDd07Ic`dDQiqTn1XWVu3ho4)S@|p+&@(KbqdvA_%UUoY#u_rWI_@!% zn5pbn)nS6PD%Efp8gkJKDleJqx;zH+Vz{{ReAdwID#s%d)XR{l&s*i%WU5+vUR>D& zcV2Y(`S`02BUL+6P59VVx8-<2(I$u&KZ^_}C(c(ausWpd%3!@1@)FsrBSN>09Fg`> z?^?rcAg^*yJm;dyZ4ad&-hD^t+FXP?RJ=NWdKduQ=tq=j25#;D7*WJ8VsORTu&~tz zEh=2J3h~$$b~j%sgLuqs4c$h$2deL#+`~IBn7$HBoarh22|vC+e#e4)(eVx0S{;9r zTX$_EH-62?rZ?`~98cxpP?v=)R^iq-+#rqnm!ISQ<>z>+{o$bEU0M3~WE*aG z9l`p0fZM%;v%urrgTZ4RWuPKR87T741yDo;6cJToUqnF_;sPjOKeUhnwm>1?6MQmh ztG#)0UkP6}kC7JS};2RJWoIz`dCK+6a?oz$uE}e%;upKJo&gsYZm1TV#(c) z$pc<$Wlmq~`S(B0c3*ZIy=y0TAJiyAw}I|e+@|0HS)r%HelMPXekBZ&jZdIS-&vdVS0Jp$7&Z= zUjCs_1FC!POE$+~(idjiLgGmTs4=&52O~a6==bx}uXCGiq!BPIY zdk9TBbhLJx@D-g}%><6tv)eJyZN=U}Cgye!T6IdI90NQWdQ~4$+{5_B+v&^47PlSW z)$FbwY4vs%lDP;RqEkKX22hSMo~hLY{&-9+z}HaqYlt*k__-FsPajlU`2A}OKN7=> z-$JF1K{xX{y>J`7OxA6wN?P5P^-7BR!YchWOkkY=0G4{KD29wq7V} zd}m5YnYd-1h9TUxKm-0}Z(=-kJ9kofvBQrl;uZJMj!e*PuakgnVYqOQ)+ooH%x$mM zkNb)>Np7=*8w6bO!t()1x|f$Vzh$2!DMm_?%qE;62H)r0gOUWFgCOTA;~9RhziExguU7nq7#a08K_O@icCp#K#8u24MDM+aQ8b1g8*8Avl9z2EjQ1 zZVWEd%=3s|KyVSkYY5OX%r_8RLU0+u6$G;gt|GXFU>*Vf9|H3>f_D(Si(nA}CJ5xV z-ZljuD3}QFe;-)?0X-F}{%wnkk%s--0>O`>!r{Tf+=DZYH~^(64s16HY&PPE$2Qv? z@d}ipc%?Kl_9$QKyz6w{-Ht^^8^0F-{D8m_`yPqgLbRcg05rf;qtJ7{aQoq+Bc6j& z6wj3gMjy>M7xGvJ)`|mb-wmueV)y2ojtKQyQCur^bQRuupg3X|l%m)LV{{xZTz@bJ z-Jlf3?h@cP9^7%n<4}s?@lr=m;pPL&5qqE%#hy}c|K=U%Od3mXrr4YLt~cX|M;^kM zP-lu_rqmP17b!=CI$jjxrNQ{4xl*U@bn39IAzaoFE^7#ub$oMa=d$`K zMe$T==+vV-!2rWJz%ULl3e;7o%2```? zE{emY-jlvupgvg?PnJ#q{n1U)5uqL|ii0IKg!(_?h)@p|#UYTbPXcv+QS5)>EqvYS zTnEYRVc{oe01gxSa}JN7h~Nd(BSmqfggrVQ=3#tqiqc7VIJVhQc`*oGyEo@5B(*)f zuAU>H`7gv9-In10#&FBTFiuYH_E&#l_4DI|wF);>ik}r*W&`#nUkdvdB+Fi)xnwRw z7YOBobsxZXEE0*7gi)tnc_LhM!Ua?nZP||Aj7A2wh5FFxnfRWXejxaL3@AS9+DSu8 F{vRn*^RWN` literal 0 HcmV?d00001 diff --git a/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-311.pyc b/TTS/vocoder/configs/__pycache__/parallel_wavegan_config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfaae4037a9fbb120825eb0317dccf34acc76557 GIT binary patch literal 8891 zcmcgyOK{u983sv6w&k~FS+brhS+*oUC`x|CPU80?NiEy8q}XMeLJ+Z}(0~LDfVSmK zr|OIkKI&Gb(}O36w3&1=Irf-ij^IoV9Guej(9v|JRmM5x(tiH}1PO|E9FJ3a^@IOD zu>XGlWB*HJ%9aOGh=Gpx@Z=%dM*>b2Vbf)JaytEAD7ZGO-jdSA!jUc^p_2vV6x9>Lh(s&;%ykQQH-YiD6g(EgM z8L>r-*dj)3O>gDfFn*ihwx_oXZinD@rgsQ#m*CpdJNYj3+%33{bi3g82(B}|TlDM_ z+}?DDkfJ;2xkvQu5nOw^Q*gaOTbJPag0{Va>kqhY!3_jlkKh<_0*k>WE9(_28xkY- zrTch4*|Zv<(VwEZ~L&cQD}g3GR^K4yT6&cO>BU3oa?R@$><) z>J)K8!igpljtB`S#fYQnQ8D6JIO2Gd5o2P+iD1M*G2&!6;H);yCmrn&*}wkvm$!rxx+1+Wlf7U3}E+s`%bpC zF?`NjIs3HuMqD12Z?I9@vBqkZ)G#F)3V1Io*<2GZwzf+Og0Dk=aS*%nr>$-J+Bv3$F!7sQP_Nj)Yyg6Rrfb==2ggpX8ngId` zF5lzUA~T^cixtem5rn&{Jp^*$q9IhS+Dv8PJkXyRSq+<82V>Z|PRU_i zJ$zO4B2yERVee<7$mTvADiv)tUo^OqRWm?|@O$srVHTA453?Z~G5-UqKMIC`wz#d# z8)i@muxoZWuR56p#nvBkHmVn()QDIYsFjIU;R&#D$5LVDg9eRu%O;ma5GSe3XKz zgQ_KeeWUZHX*2-N3+lYV**LkC_-3gE4NfmcGKf`^MlTdg4kIr{;L9h4Gy2_h7_aE5 zW)T=+)_L>{LRUn+S*|)I7)MlDGggU5<<6)XEdPb%)eC_EFd9UY2BFNCf}&|S>DvgQ zz%HYb?cBIA#kgf5hU?*H3nq+%jF0hxS;{RuPkg+iXe*AXz$q*z z8tj@`!Szyp9+*LBF_KyQZb_$z02>gm2pwR3fR4)1pnKcNI*MW1h@QUQYY~rP&pO2u zCXix!=E@8c!%S>C*e(oX(RYdY#mH_)(M{|IQ7P6d(=N@=!>9mA7Jl#|HW-K!4D_Y( z)#>kiO(dJbwx?gfG*jHb#`j{9<>4IY96>2!uT4NA3!CDYVft=~s|Y`rg$)rJgxU{* zK^FRQlfFeGS#6$Qm?a7cB|~GG1=ED~xGuIHza#8=YWlK|9VIH&Fe=^P#V+#9kJjC_2}a`_%giB> zd!{IF=~?|5vNV7b`dMSLQu8E{#fY3SG!M9U1Ul@q1tptx#O^wgn#ArpnL75`VRn3C zlD60dvq+m5_9wr`WC|UQfgDd!^L-swTx&j&Ld(P?QFMC0F4?I`d!LJ(ax`n0D&U(u$7oDv~945~tf^RyFiW|#8FD|4(=6nz}iQH{vr{A_c*d!&-bYjM9m zObD$i4H8CPIqe6PS1ffC7p{w{6fUm*uxR9VwbV)g^(qhzWUD3kllAENapeW1^P|I0 zCtrOSsl%=zd^}gT)p#MIO&BkJ5)A7ktPd#96|Y$Yd4<9~13<%4pfpx>*N8J*t)5_l zL_RncBB50+N}U;wH#`hI;&hlwrc$Z8Ock%EgH`kQU#dSlzWf|!(ZK=qA+nuOoFg&| zNl}r!QLS!BO~d-yp_oEYbt8+eXYRbF=5N=XGs4Ll!g2(-+7wMrkaYH$=Z-1@ZYqgb zoJyWLC8Dheor=aY>LN}(z_KpF0!bU$t5!02ptAbKPFI^67JIE;RV;jgxC)?EhYs7Y z?8^^|W&wv6+yY#?Rk!>`DV~SI&FGazWV)yg)dXu0s^>=KMdv=j~{`8t_`g zhU@h=HbhS&UVB7T|H>e8C5ww#aV`6lZk2Im9Mj@@3>RgI>t0*!K4q0!Kb5QKA2?=l{Gyr9F3!F^o+;Umna9yof7s=xqZlmD0ih%ML0m2vbx87EIMVQ!n9fGYpbc&P>P@-4;iO<_bHGbk$ zJq3F0RM(KK_(SXSgNT=eitYv+RYgPH@sDGm2`D{OctV%ZTbDKxfp?81Rv%!{>Kc(r zkgc`R1%w*W_nJ7KIS6>sh&dP7IXfbKcH%$+g$sB&YTE zH0UPC)V%h3^#ql32If6fLAfdb=Ri%1&0Ej|K%mhjRl`Yj}|$&V4pGG870 z@;!H+yRT$_`Pg|~Auc6jD0J8dD zd=QFS#18LmBL5UyyVq9R&|i9ezrzeR1*;|LtBdZhG}pQ3Ubv4^U0!G7BC4z08EGJ` zzUcC-cW7;-Tc7;6fcc0drXfqxpZERsihFCuWwR(%fxFhpRh76SOzRYJQR?SH*T;@{ z8x@6q{ZSOJO`)v>xg)-BQIxwS)d*TPD2irg6vd+L)=S`5ByYQ-s0Cb3id!)oO|;`# zq*#k0x+hcmVs#QfKxB|A6S#G^hKSuqWSHt?x|Or`6Hnp58Y6O$$RQ$!i5wx4Br;Cq zB#~1f_@S3>5UkTw@j4OuLCt!T$QdGMiJT*HfyfmiZxOjct&??oxgdn^4Aw`17}#jfn>MhEcSC}p|3GMaQ{_5{7~OqS(jrE_5EXUkaA07_XN zz&d*dmZp}EVIe4GxxX?rf|swL0?(1MJW}Z!bmiU4nk(ZuSe6GX1H-PoX9df~bGR%I zSJ-|+YPT!nxxXy$uk;NCL+~6b%R}Gv4XwOL%SG1CXe91r=4vs7J?g33K?-bETtJF${^iS1#8iYwQ64I(n9_JVuu) zX5gFD)A@5St8xfhFDnRSn5sr2Q_eYvmua=~ix%$4O_r5C{LUe442 z8XkLc**$%q%J75o@Pn_1AK>LHGp>y1gR=af(g#61mZ4fa1%&$#KAEj_&AMH)Fr5Qr zItR#f4gjs*6|K>9TBUuXQMLIfiM&;K!S%6LZ*%xxEVymaE+rh^+h#A|Hd<4HEZQQ6 z5n~!$3OTeNV9r=`=tBQn!hQ&{mWah-6=~FMR=$x^ZnOaM_~y0vZ)361HK{pudsE+v Srtbv(Hh~e3_O2fzmiIqnbAV9* literal 0 HcmV?d00001 diff --git a/TTS/vocoder/configs/__pycache__/shared_configs.cpython-311.pyc b/TTS/vocoder/configs/__pycache__/shared_configs.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..96822f1fff272ffca1354dc8276ea67247bb8d0b GIT binary patch literal 9967 zcmcgyO>i5>l^y^C{>vgof}}`-Y(kVoN;Lm0c`aFMl|)mrDiY%mw5IR<6t8z)c*E1OW z%BJjfpy8W&-LL!o_0Q|q`stxVJraJY1@IJo$cYM4rX_6MmNZCL< zkPXIz0m6fsP*#r1*>F4@ka!>{U6r)Zdy*#e@JAs@`V@ce?<2fEts+vq!=nv@wu5(k zC2gnHxgpJU?NRHwpwq3XwwlPOmgR(#I?re*JA7HS_@#oT8>>bxsi&MHRIq8PdQQ)! z+_Hqv9UoP>zI+VieThr)fF{L*x1eD>6qGpR|5@;(0A$9W`-9A3-l>Hy0M+Dp7w-lZ z7Fb8ThxY>O6j)chj~@coEwG;WGXm=sSYQ0Gpgbh7XX5<=JM3Wt0_zu;96usB3<&H< zd{ER43T!Ao#3SGk5!i5iSa292jAYkF-;-LTAK{~Daa6SEfj!5Z0kYBSM#9jz>6%E5 z!Q&fg)#Mt>tEQUeHa9J%W{q5mshJG3)12LQTd*wJWvqoBhTkPKWt|sagsJ2RHrwQr zxf+3JB8c*)k>HkPm@FY2$Ml?KtGNVUU@!BeTFBUzZLqDTu4d=9T)xy%!Qx8J&;iv# zD$8>=o82~yOba`=jx^A7`GW09c}KU?%v5ulk!7Nn8e;~|SJaHf7k$n6Z8f7<`HXHW zmVS@3Sv_YrFQuwRI!c$Yyz|rVkBoC9guPS<5z?dVkF@ zY`6xMU_iRq-WX@pY=K?!oaDM7sb{za#)X{5Or9v1mVTROc39Gcwsu;#nCbTir~7jH z`jUOC92zCwipA=DkmRaeF!=$*hD}NAJj5}wa03~}ZOnwUja=4^9Isi%;_oOKo@;0I z>pYjL8Rd?8o5S2$W^v*5IuEqhW_f9;&I5T>Q>?UMVkEWtckM2^fD1GYD~oU{=~Lxs zG0_kMz)q_h-FAV$Jp4kq{%eNOzP|VI0RyoV233S`Y6lic&2M5JXumPLu=JMZzR%IJIB=~ zW_1iOkJvUEtwW=s&H#5bHk-^CYV%BggX^iZ%`~1+cWMGKBX8?jEDk1{wGA_oUJwNf zH><@>GukirXJ5m-Zj@~6lBQk}c^11P^iza>s9aiD`r(#X89oU(hc$O~R+4H0TJAVuvEp2D23nTm zJ&G@!!%a)4Be@&782jr^U;PTA{|~ljr_-A|W{}!<`;*e?^5%@@bo*md?>`Bm4%eZ7 zL<8}2f_F40ENtd<`=)9M1SlBzvL65;PCyH$f?7zEKL}!z46mpD`@iV7cQN(ztG~N9 z^z%PmG)F**m?JE#=JeH@=@ZGEq|Wds{dIlumWAzeaYwb%i!XEQ2ey%4Ts5-Eomw>9;pdt)s-_d>Pq1#q?o z0gBS+iC>@j{LRufFTI$=Qu9=}V^k2M0+~j&Fs;zRWK`2t1r(-QeF)Twb^rF|sc_f6MBs8j znmo39>$8ic=xRB-`Y^g$T)Tg=2AUur1;AH z1eo9{%aO|PIO@S~98Xyue{^J|c;)_Tt$qJs@$&s}@n-Snf84A@Cfv@d%%ZW~Tczic z^h8tTXzF1!Rg#AuXeAl%R9QfLxP~GdwTyR^NnidM_G%7%wO|~9FYoY!+)myF%nfqo zc(=ehL2*wSjtV&@4ddrnm7`;wj`?_L}3c3z6JkRs5v_* zGuo4ttZFCHPoZr!Iu-3G{5yzRnPr@+c;YG8Twi9?!ZwW*;-0T%;*Uke-LBh5bk}LH zdIX5yZI;CWRL5}-VKxm9LPF2%|c2+jbUT@t4Q9cy5%`uQc)EUyp*(OW;PJEJsM z>*+-Qx>??ksMvb90i`2tj5diZ&*1n%o5y@Bfu&~i2zE`?<{(;GSvtGahDsO;^}aDAadOIc7pt^y8m0D90 z89mQtGrHA$O1;FWkUAwgof3IOo2NYl!`S4=tYkDwXCaS<)I#l`jkH&8$6byY5?-`c zW1C&7rX(!CpW0j^JJjbp-aLzO{Z_3$V=5`iG_^bTU*$QD@d7b~=E(EuwytKtS~V%% zjC7*ivM(T`u+q>y@+FD({lAPF6Dfq;x8`9XYeQ!_Pnr63#74EMDIIaWUJ2%R;3v9Z z6<7Hthp*kuW6b5q5Z7O;^EnLZcS0upsys!TwaaJMXx*A%O=7$oOR3qcD(1+!bLX&( z;iAA+L|y=e&n`DCO!cz4{`mhe5%uKzuSvcGnTa}hN;@_1{4Q)&bE2BWrGRVEwt@W{ zX-?CX!l6PQS!r58_>)9J`jWbHC$#WVqrqB3{`@=HWLv&12Z~p}f8&Z6w_Yg5)I6>m z?&^q4DMyM$t(pjI2Q{t z=&3Dcw3e<>8kp8T-G;&z-P>SYWLCzw1Mk*av2EPEBrzpXN@+Z9;w+V*D;Z`P*lQ9v zSvH;e>fGUkK<$LZvb*UV5+}gwA=8PpJrFwmA_S;Av^htpr12o-3{qKBKlU{Ju&PXb76hD2g>F0?0sT9=R_(vd@vP;eKG_Vx*^0L+E-e?hta8OY34 zQQ&mAA*0h<3wbQ3w-(%3Zjz3YpmmwLX`uI_?eY$O-ttEvv5>=+ic)3xWJx@g@wwfX zc3-NDpDu}Kzf+#>*_Qx{Uhmy#;MDoUjV>E3LL|Z>lv#8Wyp-O>Us&tF4R+@TU9d;j z`lcRx9YS9M)t&^m?Y)XGH`m+cbc#X5Gfr<^yy%35F`S;7Eu5a3Dack7&pBPy$gTRN z-uop{qa=^Sd9U5Ws^&vn%yP;6 zR|0VYZxVQm0NLBr2qX!l2&4g=iLVL)Or7YOCc*-h5g>W2H!zoqb^rF|%K^HZ5b%|t zg!KQ~{pWC3;9eK(cSz$?#TyUKl;lY~WqGm^J5iF4?xFzi6J_~CB|7cp{PCVH%hQ!9 zTt^J=%C$QTHiP?w-Jz0<_e@!yX=wsBQ;d4yC3y-@S)Qt#IEfpLT@>JbvMish5Jluc zp(NwY$}+1=$EaIeWZ)eu%dyG~NXK^9YE7mgiJ-unZc`r7g~fhQK1xeBK&WG~;|mGk zF#(~*zSI%!5C9eU>X)WZd>Gyjc2gSyq26h<0eDP6D7G*4QX2qN;A>18WySA381$8a zexuO`g^%C<`0dijh4RRSha(qolLB2X;3>-&Dnnzqc7fz$c*^ovB{Gg{oLm02Ol9IU zNtY zTc7K4dj04v6;m0)f-} VQhQh$pZ`KsJr>Y!-%Kq`^>@S&QuhD= literal 0 HcmV?d00001 diff --git a/TTS/vocoder/configs/__pycache__/univnet_config.cpython-311.pyc b/TTS/vocoder/configs/__pycache__/univnet_config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..369f5c43c235c81954deb7adf16ff335414e6bdd GIT binary patch literal 8206 zcmd^EOK%*<5uV+hC6}ZqQsP5=icN|XNn7Ge56iM6OV-OTQcg%Y_6EjqxYN5FYj$Qe zGb@Wq4DcWzGLVBViHSf!1Rop(&LM~V03UNmfZaeq76Swjl0!fwhc%=_;8VWpnSBhk zdKva9!(mNVcXd~FRdrQ&?{C}NTO~Z1^hfE{K}q^2A(f{oC_G4_@R_7ZTEvjDkz^zr zO-3VxM~zrkPRgPjGve8%WK%@qk*IW0(&U?x7UxZ0#3boU{QJ)r_qXql#6fo>#D;qo>_X&N?97W&st_$&!nnDp-^G+ zz>7%I&ml0Ek`YZxMlVC~WK4@DWe$O~*sCDcxL@Mmf0Da+JNoYuJ=&AIc?YoF0_#Y23am3|+as_&LEBz|?G3OlfprB~ zx4^mu)|2evy%@8XFd@ah22$)3QuK)){mDMjV?bbo$$o(i1#JTY8xF8Rf$a~lA%Tq$ zCOEMMoQ4IbQPE>8xnJ}+5MUz$8yDC_k_qh{46spwO%f)?o@y}mm>7Fn^mrk8Kv?EQ z!fr|Fh<4~^A~}9bx*kbRz+Db|?Uzmcn#oek+{B<>(Gx$j#qncBvPO-FYW!cKg@=S#|C49_bdNya-Ze=W=sTily z(eHPwpuqns2r)!lo(4uHCAwA1GTod9u+~6r!H#5v#=gihK z>n`&LP+g{*x~r>(enWM2%ba0Qftoj5hq)G84JN)iQ)R5_PRiD^x~aOBt<;ObE}5KJ zX~82z1s#%3R_m1_Iyy5iC?U$#5c?`mn%86?%FYf|q6{~={Y0Uv6oEjEfwonLsjSKeDzri~swz~e($uN8TIZ`(5l-l^ zDUCwcN%n5dCj%{4JNR;iScN294PieGVKjp=xQdeH#@I==V(0mxM+qF^>IxLhLA0#3W*C!1buh$aX^1%lO3n z%XO@Cs-`&W6s#J|^^B$=U|6PVAWEW)sG{a_h7RvX#GnA9`r-+XahMng^LN)(ldT4` z3NLgRs;W-%MAOm%)x1BysWri>YBmXEJOGZC0jK$#2C?&L9CiIx|z$n>|^yB zMKQ*(qkECK^a8Jsywtj(p+`(Hh03tqLmVsJEf*D_%q3?n}Gu` z>O@R~HSJ45fxF7|O&3Ha{>gVPU0P(^wvm){Kh!!Kj?1&`M`sq(p`%jf4 zzqST*DkRTyiQ@PjeQA6}iloLGNVbR^r@sz1i`>XSyU!$97H$dR=TZ{cc9Da_^3^<7 zvGC6e>yb7{Yfn=K`Np?n`Udr6H9j9$d0LElLu09R%YyZ|E|Q#22#;D^KJV*}_B@pU zCPoO-b!b)8Zxk-5hO=WakpC)K5jLO4r$}8FeAj1-)SD_E=U%YgGkcXWuQ)lLdT!gj zGta1>Yg-xY2_Gv;-rq|$@}nKfsV=h}2|lFU+N!E;MXRoYKpP@nZIIDLDyAg)6Ol#P z!rajlhuE?Cg%2BK-CAPydo0WyVhcwBj?Fb9uO(?getZs;^9uyg>BEp{pVU%ly#byy z^+pQ+mP(*xg8cxr@c+K8shOeXejjsj5|~OGmP#AUn$HC-NZB|jP@tszjuQd0TQxt4 zHMvGfl`|)w#f~e(@;O1{TCQq@+(5*S2Jvog=2$I-2H{)8eX&fH9TJ9yTn>W$cPn*6 z8LNCL{$G5$XcR|ldvl?v??R)2Y_;RrLcR9N^y#feO9mAj5cZv6!|Qt~I>Y+PTE*2?)X`WUni9 z!uF4r-ku6=)eXFkYSE_&U5d4V8C{G$Ag;K>i)c|TrpX!H&3zuxnzUxrG;0a1m9qH0;0dFP`aEuMH=_6wV4J&UoG~EAQ5@ zj&&XT^}6PE9KE$#duKiuoKw?k3fB{k*JE5m##c9Rk)v>Iw3Ri(N?q|fsQL=G!9$$s zyq+sLTm&gQ$M5i~!R4Ay*R0uA4l?~tvJXJu2PE`2OS6|9Fqqv?o%Pvs+_~afx!JQ; zHobBA&Dm7majh(lG#YniapN!>Tno+mw>{2m#S%U~liTv$whS$5p_(6VN@!BbcR#V0WI2qw}8*~v3y-ty7(Nx(TdwZ$Ik4e?KE~?uR z2mG$}3oip1@yMsW&MM;|T7-O(TuAuj01BT;@WsgI5x}T`G5Do?CkAhfFTwL9Q0o%> zPsi;cx#D$n+Ab4V0GN*H@1tJ-1IX+nqQYzPw@_YN-R{V1tK0hs7w`e;)t(@55a4lk z`vh&yBmnHzJ6RBKX>fY$)Yhre;EM(EmYZcV7eHXHdM)@jLN+9oyuL@Q5Gcv+qUJr1 zQ_0znNPfgPIHR(HYV#f9d*)|%qI)|%eQFzDiV2|#hxQz@y}pW7s#LJo&) z29xX?EDf?!Pk(7(xHPc8)Z16;8!Yt=mG`ueMF1)m3G2o;O6Ks{p@0A5_`qvklVQ{K zp4W^o#oi^a6D0>%16tnTe%qc<4N-+$oAfGli!13sw;2&|??i<|2TSDsYni|NnV$c= z<|{Pn#n-s2B5wN0NYQvo-FR-L+##iq7i~&krT%o}t7Jjd3a@ay6%BM+ITx3}H!vN4 z`ugeinB)!NDkA47u7!}H+HN=#rK-Pq1hW$}%mMjjs;A>#+{x#-?X@dP&T?FZF2fbYi;F8PFJb1h z!qx04qKj7z{`3(6GM&fkW%yznjwE6YZZi|DySC ziQ&H{hKq@jdx?=kVx;uaD}TIk@1?hH$@dZm?p`V;juyo0$L)h&LQ&{u9}>1Il%?TM zDDP5~t9jK3TACF_vr>v;(=T#f48IU~9g2c{icLA)+c{|3C7N)LY41kKCNHx|Phy^S zoJ!LGo=o>Z_6vl+NZ=5G!vtms%mUyiHab4oC#d3O0w)Q)LV)yb|A4?71l}a@7J;`3 zyhGq!0zV``>LWV{)uy;%D9Uuiejl|CIlyUO$zlgVpqU!3LjO=^?XN5^N8*XHBp@7O z@8IV7TSp4=0N$cJfHl4O*6mn9?!#M@`|gh%FUb40x(YJB$BXju(%{VIrQ6E|c?NG$ zo+%xex_hh~ZR@8D3J_}_Z~9sSct`+)Zk;Pj?eQi7(0~$?21hm*Zyznl`|%d#{iU(V zg51A_3VbJv@?>dvv>HQ<2zB5CrZO( zK^J_-it<>wNg5GVyf(`4*w*E8R3=#o#G3X)R)B{DMEA1PM6v>)0j1s2W$oR6N%u1tRvu~>hY<b(@CnfgWcB<3APtz|P z;q(rti^J)A!|6b0_@;|;x-GQ# zA$d*iMh*uTv6j5C`d^xYecz0;j>|x7Uj1JO?I!9fy!!x^#tG1d0=q$fp`WtTKxrvB zzXB-7B9TZ*nkqDUOVYkV_`NTkErefqP_(Ta{U9146+He^=$`u8Z+_^1zK)^e&Hjd6 Hh1~xGlC?S( literal 0 HcmV?d00001 diff --git a/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-311.pyc b/TTS/vocoder/configs/__pycache__/wavegrad_config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00e0a12db9d294def4afde14ee96d04d824f1761 GIT binary patch literal 4805 zcmbUlON;K_}$5ui^<`k~yIq#u^p zNN$`>z}%?B#&Y{4HZHM=+@!?zNo+E=pB(_o{ULTxVh1F4FgGPBrw~IK^yw#Lj}+5v z2G-BCX$}pbIV5RjCD&wbmL*}uoW$mHa}rBQ>~L;gp5jQG+cySqOG$3;$Q2Z9l=k54rX;m;?9zZaY6Vbmj1I?^5(NT=6@e*@Y(-FXauVFUjG?mlRJZ>4Z56q;?p1+)$ss*h823gv29J5cC%+|I{Mzd5mgzOo1co>R{ zP^Y2&3XWB&x)_2?bE_O&g5l92!@KL_a!)3yK8^NyIMMzh6f$fp( zeK5O5x0nl6hE^DdI%8=-c!F*_RcdM5)Mns15UA)X`7OiFx>cLCgFjQ*&f_d(56FgX z!7xpROy5EaJKUVZ6QoPc)#mdA59Gs z4cDZTLGfdKY}m=J{9*F8w*t$(g`lfanoQ$uo^8LyYX{cNJQUeNIpb7Z!^*VG$z|>A zK1BW&lxyQIcV9s{l^LM6G5qanTLz0KiVl^21&xeP*`6!{gHOM_b2;O z=gZT$cCs(g_1w=Dpuju&>wN&=n{`*h6|M|;jt6w~x{^Pa3 zJ$iD<_$N?*8uN$3^aGtBg#3|qsC-KJJIE4#Dua}+1Ax&ORwJCkus=O0F@FuU@HybZ=Mfx2kV1e*4gkeo8o>g9uN3nZ zOo~|mHPV<(VVg@ehm`balK`lNvQ7<1)g@R2PxF#L(#|!dlWT|n_4jl0m;9*7{jsi) z^2fU)spptzdHlWJ4*;!Sg0HaOM)cSxQLw+V5pMM2(sIJs{dt5K?@h#r@#0qsF>3P( zV$k0V5#!ItNAP1YF2lF9P9}fbi#$bwLSaAH_ z<`W3uOPp}jkspK0DM#PpQwXMUOu<_gN3l7YlOL6Xn5p<8;>Qq=%TmB0vI$>Ca1w!Z z8j3yXFT>bFFa$rb1E0OMuD_eV)VQhh-ecjL{dGKUcWAeKlcUJ(Fq5WvR0XvwjEx^`tJ?kUsI>dJH@xdq0JHBwFg1?hNKxBvhE literal 0 HcmV?d00001 diff --git a/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-311.pyc b/TTS/vocoder/configs/__pycache__/wavernn_config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee7379f138f7305c78f3235759d7559c83cbfd30 GIT binary patch literal 5431 zcmbtY&2QYs73c19C0Vj8f5`f@haQR^5)|p7e?rkykGnt)0#8AEGI~*v9*Ug$dqXaFSJ@;kx*84n zX5P&Ee)EQ(kB$y&_|>YvuKw+Wru~zE_HQ72`NqJ@?=(kq3a;iAjDlA*iUop;ZpqUP zUA>pwvNvE16f|BaYPU5<|3q`jT>q@3Xe__fJPK_)EHCHsDh3e#}zcDpyS2~1)WgPN#mq~PAO>IIHmSHt)L-eTtO2V zbXq}^3Ysz|)VgT}%@~sknpMylV~S6MmpKK^8#4+zs~~2~Dj9x45XtPE`$YSW)Mxk{ z)}L1^F2E!U$!MnHROi-DM>XvhuL_?_R<}aS;}H)fs|F#<5ZRN|NR?rR-`h_uukP~e z!wRC2wY4=?4FiuoeE6W!#Uqr%k#Ya{o0>8qb0V{SR z$)bR5?WR7sl?qJz)4G0i@5fO+PPb)2RS_bky|=lUD~)Y=f$MB3mhOqCQ5|4HQfQ+*7MEN=l1L#3;0& zTfbIe_kDMdS@pUrY_(QYNfprJjT`JD@pW;lqxT*e$p#O@n0FbV#(f@IQ4pG`3H$iK zeP8iKEauqoE|$I>L?)sr~V;1-DPt32E9Mr8w`Q(!<==$2}l9%Q<9H z(-EFLcb#3mbb0x^2vz%s=k36QgY!1hrOU7P*(BuBgj4-6k{^2}A#S^Y{g@cKwAyDY zIG1U~PfedkKp`ji9;CP~RUY~g%r;ilvf(^L_Wdi%>~*vcdX({-tsLC8B6|lGe(8W{ zFA#neuHoCf!tO*&);V`1i|Z=(@;SD>hbdP?d1ewmr4Zr6>m{#w+^2~3otz&sWIYUQ zj=0Z~S&k=sc%h&jgYY4{ONf6bEUfOSUVb9U$5~j1ZkHfZw}TU1bgF1qc8Y zuy?9GH1~oS41u$Vy<@qOFRN{^CEIv(>*g|z3(A*Z+YNTf=eIc+g`yGgZjYZTLv|8Q z1o!5VnHmrw0ND>kzyl<&c0%nIwykOkg4MWf^lrwE$bE?csRrx+Uz8E{kT&-Y3Q|81DpZxjzFe$?@M=^E zY9+HA2QdbxAwNw+fbt{C4o|sqh6WEyF|8`f64-^(Q5FU{KNKNVZqd;ydsRjWq@y$9 z+qMwgFUKz;B{c)1MMN|^;CtV&4-_xntHtZVQQG;pK!-ix8IWMIFz}N*)U0UNvzzqp5P# z_qz0E>}|sw&__Fnm48vuNULL{nZs38cf8P9!0Um%gX4fq&drF+sGl!)2&MRJ*Ah|c zIAQE7<@)?-lu=bg9|)vixKx&mS!6v%WXS-vVI(}Hu{3cZ)XAfwpvR49`xF;EW+G;k z$y8{&7HpXZu-dZMXqUPYLoKu%({@FjEmqyYI-KTiV_G2LGeW7SZVeyaLBWVKq{`m7 z!B#7)%ZP;z21NuFRU9bYRqK_?YA%Tz_NRA;s$SPZPCzUYIh0g~^C*AqN+)96(3Er- z##qs}{_WIUF1NybPnNIBDzGh_zV=QfsKW%6d{R|m-9l`ks*J>gh}ZAF*Ht-nv++L^ zP8~Wun%esKT+Q-43&p@{pc#|D@6>Bbsu2h zP8`38(Sg(zUud}1Cc~XmM`&dX3}$D-JIRUfWa#8bp8S%rLy6?YWn3VWu|r}1f<7N4 z6a7QAhB^F>uO~-x7q7jxapqtDxHj`Dsg};X(i397l&Q5rrDQ zJ^ZoYlzv~tWmVtI2z~YDQc0|jmNe0r8PLREFCL+<3T0(Rs5`gcpS17-?S_Bt+s&0n z5?9%kJxlJa+~V?a6x3HX0`f8fVut_%rL>%6ankH^oZZOb0i76dKLi)ZF z9;NOWbz|rfo$hxbsW~a3;0p0gNDEI=H%=X~9Zpa;N!=86Gt`}-ZVny3+)xS&=V^dZ zw@9NpU0{^fm4M+ZG&9uh2Js?8xND>9??7KKl!p#9)!!;=CubXPKl@ozpFwNsGY13O z*$bcYzr5R=UvJH?e>uP2`1$jZroN8W(%1JV=Nj)mKh@Od&|3N&&`(b{Za;h2)Thu| z`qci+*~aGcji!DUt)-vcpPg^qfBvAU&!e^Ud8{}!)p+L_FecGj`sDt^G-#BX`ZQWg zpWZ(;i5;<`JwJcGsh@rxHFZ4CxAgP-6ElrF&j4pU@1J2H0TviqOK1DDb1BJ;$*+cn z4@z2j_S206tymsV-Ph`ESeu%~R&i&mc(C>hDWz;PK;#3@fn&h`0bQ;|CsuDE6v z2_bTemdtb`s{AG^S%{?Wrc8c;HYPt;Zh(kMJ$Ws>hB5jSCx46Xpj0Rn_O-=kzxI{3 v-t2Ar+T~_%g8_>p2gP3%3Qr0L+V?-2;|pJ>)BjE%Uzf1(lj*)o72p2_ceFiC literal 0 HcmV?d00001 diff --git a/TTS/vocoder/configs/fullband_melgan_config.py b/TTS/vocoder/configs/fullband_melgan_config.py new file mode 100644 index 0000000..2ab83aa --- /dev/null +++ b/TTS/vocoder/configs/fullband_melgan_config.py @@ -0,0 +1,106 @@ +from dataclasses import dataclass, field + +from .shared_configs import BaseGANVocoderConfig + + +@dataclass +class FullbandMelganConfig(BaseGANVocoderConfig): + """Defines parameters for FullBand MelGAN vocoder. + + Example: + + >>> from TTS.vocoder.configs import FullbandMelganConfig + >>> config = FullbandMelganConfig() + + Args: + model (str): + Model name used for selecting the right model at initialization. Defaults to `fullband_melgan`. + discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to + 'melgan_multiscale_discriminator`. + discriminator_model_params (dict): The discriminator model parameters. Defaults to + '{"base_channels": 16, "max_channels": 1024, "downsample_factors": [4, 4, 4, 4]}` + generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is + considered as a generator too. Defaults to `melgan_generator`. + batch_size (int): + Batch size used at training. Larger values use more memory. Defaults to 16. + seq_len (int): + Audio segment length used at training. Larger values use more memory. Defaults to 8192. + pad_short (int): + Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0. + use_noise_augment (bool): + enable / disable random noise added to the input waveform. The noise is added after computing the + features. Defaults to True. + use_cache (bool): + enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is + not large enough. Defaults to True. + use_stft_loss (bool): + enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True. + use_subband_stft (bool): + enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True. + use_mse_gan_loss (bool): + enable / disable using Mean Squeare Error GAN loss. Defaults to True. + use_hinge_gan_loss (bool): + enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models. + Defaults to False. + use_feat_match_loss (bool): + enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True. + use_l1_spec_loss (bool): + enable / disable using L1 spectrogram loss originally used by HifiGAN model. Defaults to False. + stft_loss_params (dict): STFT loss parameters. Default to + `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}` + stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total + model loss. Defaults to 0.5. + subband_stft_loss_weight (float): + Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + mse_G_loss_weight (float): + MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5. + hinge_G_loss_weight (float): + Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + feat_match_loss_weight (float): + Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 108. + l1_spec_loss_weight (float): + L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + """ + + model: str = "fullband_melgan" + + # Model specific params + discriminator_model: str = "melgan_multiscale_discriminator" + discriminator_model_params: dict = field( + default_factory=lambda: {"base_channels": 16, "max_channels": 512, "downsample_factors": [4, 4, 4]} + ) + generator_model: str = "melgan_generator" + generator_model_params: dict = field( + default_factory=lambda: {"upsample_factors": [8, 8, 2, 2], "num_res_blocks": 4} + ) + + # Training - overrides + batch_size: int = 16 + seq_len: int = 8192 + pad_short: int = 2000 + use_noise_augment: bool = True + use_cache: bool = True + + # LOSS PARAMETERS - overrides + use_stft_loss: bool = True + use_subband_stft_loss: bool = False + use_mse_gan_loss: bool = True + use_hinge_gan_loss: bool = False + use_feat_match_loss: bool = True # requires MelGAN Discriminators (MelGAN and HifiGAN) + use_l1_spec_loss: bool = False + + stft_loss_params: dict = field( + default_factory=lambda: { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240], + } + ) + + # loss weights - overrides + stft_loss_weight: float = 0.5 + subband_stft_loss_weight: float = 0 + mse_G_loss_weight: float = 2.5 + hinge_G_loss_weight: float = 0 + feat_match_loss_weight: float = 108 + l1_spec_loss_weight: float = 0.0 diff --git a/TTS/vocoder/configs/hifigan_config.py b/TTS/vocoder/configs/hifigan_config.py new file mode 100644 index 0000000..9a102f0 --- /dev/null +++ b/TTS/vocoder/configs/hifigan_config.py @@ -0,0 +1,136 @@ +from dataclasses import dataclass, field + +from TTS.vocoder.configs.shared_configs import BaseGANVocoderConfig + + +@dataclass +class HifiganConfig(BaseGANVocoderConfig): + """Defines parameters for FullBand MelGAN vocoder. + + Example: + + >>> from TTS.vocoder.configs import HifiganConfig + >>> config = HifiganConfig() + + Args: + model (str): + Model name used for selecting the right model at initialization. Defaults to `hifigan`. + discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to + 'hifigan_discriminator`. + generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is + considered as a generator too. Defaults to `hifigan_generator`. + generator_model_params (dict): Parameters of the generator model. Defaults to + ` + { + "upsample_factors": [8, 8, 2, 2], + "upsample_kernel_sizes": [16, 16, 4, 4], + "upsample_initial_channel": 512, + "resblock_kernel_sizes": [3, 7, 11], + "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + "resblock_type": "1", + } + ` + batch_size (int): + Batch size used at training. Larger values use more memory. Defaults to 16. + seq_len (int): + Audio segment length used at training. Larger values use more memory. Defaults to 8192. + pad_short (int): + Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0. + use_noise_augment (bool): + enable / disable random noise added to the input waveform. The noise is added after computing the + features. Defaults to True. + use_cache (bool): + enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is + not large enough. Defaults to True. + use_stft_loss (bool): + enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True. + use_subband_stft (bool): + enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True. + use_mse_gan_loss (bool): + enable / disable using Mean Squeare Error GAN loss. Defaults to True. + use_hinge_gan_loss (bool): + enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models. + Defaults to False. + use_feat_match_loss (bool): + enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True. + use_l1_spec_loss (bool): + enable / disable using L1 spectrogram loss originally used by HifiGAN model. Defaults to False. + stft_loss_params (dict): + STFT loss parameters. Default to + `{ + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240] + }` + l1_spec_loss_params (dict): + L1 spectrogram loss parameters. Default to + `{ + "use_mel": True, + "sample_rate": 22050, + "n_fft": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mels": 80, + "mel_fmin": 0.0, + "mel_fmax": None, + }` + stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total + model loss. Defaults to 0.5. + subband_stft_loss_weight (float): + Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + mse_G_loss_weight (float): + MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5. + hinge_G_loss_weight (float): + Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + feat_match_loss_weight (float): + Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 108. + l1_spec_loss_weight (float): + L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + """ + + model: str = "hifigan" + # model specific params + discriminator_model: str = "hifigan_discriminator" + generator_model: str = "hifigan_generator" + generator_model_params: dict = field( + default_factory=lambda: { + "upsample_factors": [8, 8, 2, 2], + "upsample_kernel_sizes": [16, 16, 4, 4], + "upsample_initial_channel": 512, + "resblock_kernel_sizes": [3, 7, 11], + "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + "resblock_type": "1", + } + ) + + # LOSS PARAMETERS - overrides + use_stft_loss: bool = False + use_subband_stft_loss: bool = False + use_mse_gan_loss: bool = True + use_hinge_gan_loss: bool = False + use_feat_match_loss: bool = True # requires MelGAN Discriminators (MelGAN and HifiGAN) + use_l1_spec_loss: bool = True + + # loss weights - overrides + stft_loss_weight: float = 0 + subband_stft_loss_weight: float = 0 + mse_G_loss_weight: float = 1 + hinge_G_loss_weight: float = 0 + feat_match_loss_weight: float = 108 + l1_spec_loss_weight: float = 45 + l1_spec_loss_params: dict = field( + default_factory=lambda: { + "use_mel": True, + "sample_rate": 22050, + "n_fft": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mels": 80, + "mel_fmin": 0.0, + "mel_fmax": None, + } + ) + + # optimizer parameters + lr: float = 1e-4 + wd: float = 1e-6 diff --git a/TTS/vocoder/configs/melgan_config.py b/TTS/vocoder/configs/melgan_config.py new file mode 100644 index 0000000..dc35b6f --- /dev/null +++ b/TTS/vocoder/configs/melgan_config.py @@ -0,0 +1,106 @@ +from dataclasses import dataclass, field + +from TTS.vocoder.configs.shared_configs import BaseGANVocoderConfig + + +@dataclass +class MelganConfig(BaseGANVocoderConfig): + """Defines parameters for MelGAN vocoder. + + Example: + + >>> from TTS.vocoder.configs import MelganConfig + >>> config = MelganConfig() + + Args: + model (str): + Model name used for selecting the right model at initialization. Defaults to `melgan`. + discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to + 'melgan_multiscale_discriminator`. + discriminator_model_params (dict): The discriminator model parameters. Defaults to + '{"base_channels": 16, "max_channels": 1024, "downsample_factors": [4, 4, 4, 4]}` + generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is + considered as a generator too. Defaults to `melgan_generator`. + batch_size (int): + Batch size used at training. Larger values use more memory. Defaults to 16. + seq_len (int): + Audio segment length used at training. Larger values use more memory. Defaults to 8192. + pad_short (int): + Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0. + use_noise_augment (bool): + enable / disable random noise added to the input waveform. The noise is added after computing the + features. Defaults to True. + use_cache (bool): + enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is + not large enough. Defaults to True. + use_stft_loss (bool): + enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True. + use_subband_stft (bool): + enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True. + use_mse_gan_loss (bool): + enable / disable using Mean Squeare Error GAN loss. Defaults to True. + use_hinge_gan_loss (bool): + enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models. + Defaults to False. + use_feat_match_loss (bool): + enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True. + use_l1_spec_loss (bool): + enable / disable using L1 spectrogram loss originally used by HifiGAN model. Defaults to False. + stft_loss_params (dict): STFT loss parameters. Default to + `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}` + stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total + model loss. Defaults to 0.5. + subband_stft_loss_weight (float): + Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + mse_G_loss_weight (float): + MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5. + hinge_G_loss_weight (float): + Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + feat_match_loss_weight (float): + Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 108. + l1_spec_loss_weight (float): + L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + """ + + model: str = "melgan" + + # Model specific params + discriminator_model: str = "melgan_multiscale_discriminator" + discriminator_model_params: dict = field( + default_factory=lambda: {"base_channels": 16, "max_channels": 1024, "downsample_factors": [4, 4, 4, 4]} + ) + generator_model: str = "melgan_generator" + generator_model_params: dict = field( + default_factory=lambda: {"upsample_factors": [8, 8, 2, 2], "num_res_blocks": 3} + ) + + # Training - overrides + batch_size: int = 16 + seq_len: int = 8192 + pad_short: int = 2000 + use_noise_augment: bool = True + use_cache: bool = True + + # LOSS PARAMETERS - overrides + use_stft_loss: bool = True + use_subband_stft_loss: bool = False + use_mse_gan_loss: bool = True + use_hinge_gan_loss: bool = False + use_feat_match_loss: bool = True # requires MelGAN Discriminators (MelGAN and HifiGAN) + use_l1_spec_loss: bool = False + + stft_loss_params: dict = field( + default_factory=lambda: { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240], + } + ) + + # loss weights - overrides + stft_loss_weight: float = 0.5 + subband_stft_loss_weight: float = 0 + mse_G_loss_weight: float = 2.5 + hinge_G_loss_weight: float = 0 + feat_match_loss_weight: float = 108 + l1_spec_loss_weight: float = 0 diff --git a/TTS/vocoder/configs/multiband_melgan_config.py b/TTS/vocoder/configs/multiband_melgan_config.py new file mode 100644 index 0000000..7631135 --- /dev/null +++ b/TTS/vocoder/configs/multiband_melgan_config.py @@ -0,0 +1,144 @@ +from dataclasses import dataclass, field + +from TTS.vocoder.configs.shared_configs import BaseGANVocoderConfig + + +@dataclass +class MultibandMelganConfig(BaseGANVocoderConfig): + """Defines parameters for MultiBandMelGAN vocoder. + + Example: + + >>> from TTS.vocoder.configs import MultibandMelganConfig + >>> config = MultibandMelganConfig() + + Args: + model (str): + Model name used for selecting the right model at initialization. Defaults to `multiband_melgan`. + discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to + 'melgan_multiscale_discriminator`. + discriminator_model_params (dict): The discriminator model parameters. Defaults to + '{ + "base_channels": 16, + "max_channels": 512, + "downsample_factors": [4, 4, 4] + }` + generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is + considered as a generator too. Defaults to `melgan_generator`. + generator_model_param (dict): + The generator model parameters. Defaults to `{"upsample_factors": [8, 4, 2], "num_res_blocks": 4}`. + use_pqmf (bool): + enable / disable PQMF modulation for multi-band training. Defaults to True. + lr_gen (float): + Initial learning rate for the generator model. Defaults to 0.0001. + lr_disc (float): + Initial learning rate for the discriminator model. Defaults to 0.0001. + optimizer (torch.optim.Optimizer): + Optimizer used for the training. Defaults to `AdamW`. + optimizer_params (dict): + Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}` + lr_scheduler_gen (torch.optim.Scheduler): + Learning rate scheduler for the generator. Defaults to `MultiStepLR`. + lr_scheduler_gen_params (dict): + Parameters for the generator learning rate scheduler. Defaults to + `{"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]}`. + lr_scheduler_disc (torch.optim.Scheduler): + Learning rate scheduler for the discriminator. Defaults to `MultiStepLR`. + lr_scheduler_dict_params (dict): + Parameters for the discriminator learning rate scheduler. Defaults to + `{"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]}`. + batch_size (int): + Batch size used at training. Larger values use more memory. Defaults to 16. + seq_len (int): + Audio segment length used at training. Larger values use more memory. Defaults to 8192. + pad_short (int): + Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0. + use_noise_augment (bool): + enable / disable random noise added to the input waveform. The noise is added after computing the + features. Defaults to True. + use_cache (bool): + enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is + not large enough. Defaults to True. + steps_to_start_discriminator (int): + Number of steps required to start training the discriminator. Defaults to 0. + use_stft_loss (bool):` + enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True. + use_subband_stft (bool): + enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True. + use_mse_gan_loss (bool): + enable / disable using Mean Squeare Error GAN loss. Defaults to True. + use_hinge_gan_loss (bool): + enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models. + Defaults to False. + use_feat_match_loss (bool): + enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True. + use_l1_spec_loss (bool): + enable / disable using L1 spectrogram loss originally used by HifiGAN model. Defaults to False. + stft_loss_params (dict): STFT loss parameters. Default to + `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}` + stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total + model loss. Defaults to 0.5. + subband_stft_loss_weight (float): + Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + mse_G_loss_weight (float): + MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5. + hinge_G_loss_weight (float): + Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + feat_match_loss_weight (float): + Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 108. + l1_spec_loss_weight (float): + L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + """ + + model: str = "multiband_melgan" + + # Model specific params + discriminator_model: str = "melgan_multiscale_discriminator" + discriminator_model_params: dict = field( + default_factory=lambda: {"base_channels": 16, "max_channels": 512, "downsample_factors": [4, 4, 4]} + ) + generator_model: str = "multiband_melgan_generator" + generator_model_params: dict = field(default_factory=lambda: {"upsample_factors": [8, 4, 2], "num_res_blocks": 4}) + use_pqmf: bool = True + + # optimizer - overrides + lr_gen: float = 0.0001 # Initial learning rate. + lr_disc: float = 0.0001 # Initial learning rate. + optimizer: str = "AdamW" + optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "weight_decay": 0.0}) + lr_scheduler_gen: str = "MultiStepLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html + lr_scheduler_gen_params: dict = field( + default_factory=lambda: {"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]} + ) + lr_scheduler_disc: str = "MultiStepLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html + lr_scheduler_disc_params: dict = field( + default_factory=lambda: {"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]} + ) + + # Training - overrides + batch_size: int = 64 + seq_len: int = 16384 + pad_short: int = 2000 + use_noise_augment: bool = False + use_cache: bool = True + steps_to_start_discriminator: bool = 200000 + + # LOSS PARAMETERS - overrides + use_stft_loss: bool = True + use_subband_stft_loss: bool = True + use_mse_gan_loss: bool = True + use_hinge_gan_loss: bool = False + use_feat_match_loss: bool = False # requires MelGAN Discriminators (MelGAN and HifiGAN) + use_l1_spec_loss: bool = False + + subband_stft_loss_params: dict = field( + default_factory=lambda: {"n_ffts": [384, 683, 171], "hop_lengths": [30, 60, 10], "win_lengths": [150, 300, 60]} + ) + + # loss weights - overrides + stft_loss_weight: float = 0.5 + subband_stft_loss_weight: float = 0 + mse_G_loss_weight: float = 2.5 + hinge_G_loss_weight: float = 0 + feat_match_loss_weight: float = 108 + l1_spec_loss_weight: float = 0 diff --git a/TTS/vocoder/configs/parallel_wavegan_config.py b/TTS/vocoder/configs/parallel_wavegan_config.py new file mode 100644 index 0000000..6059d7f --- /dev/null +++ b/TTS/vocoder/configs/parallel_wavegan_config.py @@ -0,0 +1,134 @@ +from dataclasses import dataclass, field + +from .shared_configs import BaseGANVocoderConfig + + +@dataclass +class ParallelWaveganConfig(BaseGANVocoderConfig): + """Defines parameters for ParallelWavegan vocoder. + + Args: + model (str): + Model name used for selecting the right configuration at initialization. Defaults to `gan`. + discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to + 'parallel_wavegan_discriminator`. + discriminator_model_params (dict): The discriminator model kwargs. Defaults to + '{"num_layers": 10}` + generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is + considered as a generator too. Defaults to `parallel_wavegan_generator`. + generator_model_param (dict): + The generator model kwargs. Defaults to `{"upsample_factors": [4, 4, 4, 4], "stacks": 3, "num_res_blocks": 30}`. + batch_size (int): + Batch size used at training. Larger values use more memory. Defaults to 16. + seq_len (int): + Audio segment length used at training. Larger values use more memory. Defaults to 8192. + pad_short (int): + Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0. + use_noise_augment (bool): + enable / disable random noise added to the input waveform. The noise is added after computing the + features. Defaults to True. + use_cache (bool): + enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is + not large enough. Defaults to True. + steps_to_start_discriminator (int): + Number of steps required to start training the discriminator. Defaults to 0. + use_stft_loss (bool):` + enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True. + use_subband_stft (bool): + enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True. + use_mse_gan_loss (bool): + enable / disable using Mean Squeare Error GAN loss. Defaults to True. + use_hinge_gan_loss (bool): + enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models. + Defaults to False. + use_feat_match_loss (bool): + enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True. + use_l1_spec_loss (bool): + enable / disable using L1 spectrogram loss originally used by HifiGAN model. Defaults to False. + stft_loss_params (dict): STFT loss parameters. Default to + `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}` + stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total + model loss. Defaults to 0.5. + subband_stft_loss_weight (float): + Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + mse_G_loss_weight (float): + MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5. + hinge_G_loss_weight (float): + Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + feat_match_loss_weight (float): + Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 0. + l1_spec_loss_weight (float): + L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + lr_gen (float): + Generator model initial learning rate. Defaults to 0.0002. + lr_disc (float): + Discriminator model initial learning rate. Defaults to 0.0002. + optimizer (torch.optim.Optimizer): + Optimizer used for the training. Defaults to `AdamW`. + optimizer_params (dict): + Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}` + lr_scheduler_gen (torch.optim.Scheduler): + Learning rate scheduler for the generator. Defaults to `ExponentialLR`. + lr_scheduler_gen_params (dict): + Parameters for the generator learning rate scheduler. Defaults to `{"gamma": 0.5, "step_size": 200000, "last_epoch": -1}`. + lr_scheduler_disc (torch.optim.Scheduler): + Learning rate scheduler for the discriminator. Defaults to `ExponentialLR`. + lr_scheduler_dict_params (dict): + Parameters for the discriminator learning rate scheduler. Defaults to `{"gamma": 0.5, "step_size": 200000, "last_epoch": -1}`. + """ + + model: str = "parallel_wavegan" + + # Model specific params + discriminator_model: str = "parallel_wavegan_discriminator" + discriminator_model_params: dict = field(default_factory=lambda: {"num_layers": 10}) + generator_model: str = "parallel_wavegan_generator" + generator_model_params: dict = field( + default_factory=lambda: {"upsample_factors": [4, 4, 4, 4], "stacks": 3, "num_res_blocks": 30} + ) + + # Training - overrides + batch_size: int = 6 + seq_len: int = 25600 + pad_short: int = 2000 + use_noise_augment: bool = False + use_cache: bool = True + steps_to_start_discriminator: int = 200000 + target_loss: str = "loss_1" + + # LOSS PARAMETERS - overrides + use_stft_loss: bool = True + use_subband_stft_loss: bool = False + use_mse_gan_loss: bool = True + use_hinge_gan_loss: bool = False + use_feat_match_loss: bool = False # requires MelGAN Discriminators (MelGAN and HifiGAN) + use_l1_spec_loss: bool = False + + stft_loss_params: dict = field( + default_factory=lambda: { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240], + } + ) + + # loss weights - overrides + stft_loss_weight: float = 0.5 + subband_stft_loss_weight: float = 0 + mse_G_loss_weight: float = 2.5 + hinge_G_loss_weight: float = 0 + feat_match_loss_weight: float = 0 + l1_spec_loss_weight: float = 0 + + # optimizer overrides + lr_gen: float = 0.0002 # Initial learning rate. + lr_disc: float = 0.0002 # Initial learning rate. + optimizer: str = "AdamW" + optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "weight_decay": 0.0}) + lr_scheduler_gen: str = "StepLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html + lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.5, "step_size": 200000, "last_epoch": -1}) + lr_scheduler_disc: str = "StepLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html + lr_scheduler_disc_params: dict = field( + default_factory=lambda: {"gamma": 0.5, "step_size": 200000, "last_epoch": -1} + ) + scheduler_after_epoch: bool = False diff --git a/TTS/vocoder/configs/shared_configs.py b/TTS/vocoder/configs/shared_configs.py new file mode 100644 index 0000000..a558cfc --- /dev/null +++ b/TTS/vocoder/configs/shared_configs.py @@ -0,0 +1,182 @@ +from dataclasses import dataclass, field + +from TTS.config import BaseAudioConfig, BaseTrainingConfig + + +@dataclass +class BaseVocoderConfig(BaseTrainingConfig): + """Shared parameters among all the vocoder models. + Args: + audio (BaseAudioConfig): + Audio processor config instance. Defaultsto `BaseAudioConfig()`. + use_noise_augment (bool): + Augment the input audio with random noise. Defaults to False/ + eval_split_size (int): + Number of instances used for evaluation. Defaults to 10. + data_path (str): + Root path of the training data. All the audio files found recursively from this root path are used for + training. Defaults to `""`. + feature_path (str): + Root path to the precomputed feature files. Defaults to None. + seq_len (int): + Length of the waveform segments used for training. Defaults to 1000. + pad_short (int): + Extra padding for the waveforms shorter than `seq_len`. Defaults to 0. + conv_path (int): + Extra padding for the feature frames against convolution of the edge frames. Defaults to MISSING. + Defaults to 0. + use_cache (bool): + enable / disable in memory caching of the computed features. If the RAM is not enough, if may cause OOM. + Defaults to False. + epochs (int): + Number of training epochs to. Defaults to 10000. + wd (float): + Weight decay. + optimizer (torch.optim.Optimizer): + Optimizer used for the training. Defaults to `AdamW`. + optimizer_params (dict): + Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}` + """ + + audio: BaseAudioConfig = field(default_factory=BaseAudioConfig) + # dataloading + use_noise_augment: bool = False # enable/disable random noise augmentation in spectrograms. + eval_split_size: int = 10 # number of samples used for evaluation. + # dataset + data_path: str = "" # root data path. It finds all wav files recursively from there. + feature_path: str = None # if you use precomputed features + seq_len: int = 1000 # signal length used in training. + pad_short: int = 0 # additional padding for short wavs + conv_pad: int = 0 # additional padding against convolutions applied to spectrograms + use_cache: bool = False # use in memory cache to keep the computed features. This might cause OOM. + # OPTIMIZER + epochs: int = 10000 # total number of epochs to train. + wd: float = 0.0 # Weight decay weight. + optimizer: str = "AdamW" + optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "weight_decay": 0.0}) + + +@dataclass +class BaseGANVocoderConfig(BaseVocoderConfig): + """Base config class used among all the GAN based vocoders. + Args: + use_stft_loss (bool): + enable / disable the use of STFT loss. Defaults to True. + use_subband_stft_loss (bool): + enable / disable the use of Subband STFT loss. Defaults to True. + use_mse_gan_loss (bool): + enable / disable the use of Mean Squared Error based GAN loss. Defaults to True. + use_hinge_gan_loss (bool): + enable / disable the use of Hinge GAN loss. Defaults to True. + use_feat_match_loss (bool): + enable / disable feature matching loss. Defaults to True. + use_l1_spec_loss (bool): + enable / disable L1 spectrogram loss. Defaults to True. + stft_loss_weight (float): + Loss weight that multiplies the computed loss value. Defaults to 0. + subband_stft_loss_weight (float): + Loss weight that multiplies the computed loss value. Defaults to 0. + mse_G_loss_weight (float): + Loss weight that multiplies the computed loss value. Defaults to 1. + hinge_G_loss_weight (float): + Loss weight that multiplies the computed loss value. Defaults to 0. + feat_match_loss_weight (float): + Loss weight that multiplies the computed loss value. Defaults to 100. + l1_spec_loss_weight (float): + Loss weight that multiplies the computed loss value. Defaults to 45. + stft_loss_params (dict): + Parameters for the STFT loss. Defaults to `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}`. + l1_spec_loss_params (dict): + Parameters for the L1 spectrogram loss. Defaults to + `{ + "use_mel": True, + "sample_rate": 22050, + "n_fft": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mels": 80, + "mel_fmin": 0.0, + "mel_fmax": None, + }` + target_loss (str): + Target loss name that defines the quality of the model. Defaults to `G_avg_loss`. + grad_clip (list): + A list of gradient clipping theresholds for each optimizer. Any value less than 0 disables clipping. + Defaults to [5, 5]. + lr_gen (float): + Generator model initial learning rate. Defaults to 0.0002. + lr_disc (float): + Discriminator model initial learning rate. Defaults to 0.0002. + lr_scheduler_gen (torch.optim.Scheduler): + Learning rate scheduler for the generator. Defaults to `ExponentialLR`. + lr_scheduler_gen_params (dict): + Parameters for the generator learning rate scheduler. Defaults to `{"gamma": 0.999, "last_epoch": -1}`. + lr_scheduler_disc (torch.optim.Scheduler): + Learning rate scheduler for the discriminator. Defaults to `ExponentialLR`. + lr_scheduler_disc_params (dict): + Parameters for the discriminator learning rate scheduler. Defaults to `{"gamma": 0.999, "last_epoch": -1}`. + scheduler_after_epoch (bool): + Whether to update the learning rate schedulers after each epoch. Defaults to True. + use_pqmf (bool): + enable / disable PQMF for subband approximation at training. Defaults to False. + steps_to_start_discriminator (int): + Number of steps required to start training the discriminator. Defaults to 0. + diff_samples_for_G_and_D (bool): + enable / disable use of different training samples for the generator and the discriminator iterations. + Enabling it results in slower iterations but faster convergance in some cases. Defaults to False. + """ + + model: str = "gan" + + # LOSS PARAMETERS + use_stft_loss: bool = True + use_subband_stft_loss: bool = True + use_mse_gan_loss: bool = True + use_hinge_gan_loss: bool = True + use_feat_match_loss: bool = True # requires MelGAN Discriminators (MelGAN and HifiGAN) + use_l1_spec_loss: bool = True + + # loss weights + stft_loss_weight: float = 0 + subband_stft_loss_weight: float = 0 + mse_G_loss_weight: float = 1 + hinge_G_loss_weight: float = 0 + feat_match_loss_weight: float = 100 + l1_spec_loss_weight: float = 45 + + stft_loss_params: dict = field( + default_factory=lambda: { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240], + } + ) + + l1_spec_loss_params: dict = field( + default_factory=lambda: { + "use_mel": True, + "sample_rate": 22050, + "n_fft": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mels": 80, + "mel_fmin": 0.0, + "mel_fmax": None, + } + ) + + target_loss: str = "loss_0" # loss value to pick the best model to save after each epoch + + # optimizer + grad_clip: float = field(default_factory=lambda: [5, 5]) + lr_gen: float = 0.0002 # Initial learning rate. + lr_disc: float = 0.0002 # Initial learning rate. + lr_scheduler_gen: str = "ExponentialLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html + lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999, "last_epoch": -1}) + lr_scheduler_disc: str = "ExponentialLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html + lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999, "last_epoch": -1}) + scheduler_after_epoch: bool = True + + use_pqmf: bool = False # enable/disable using pqmf for multi-band training. (Multi-band MelGAN) + steps_to_start_discriminator = 0 # start training the discriminator after this number of steps. + diff_samples_for_G_and_D: bool = False # use different samples for G and D training steps. diff --git a/TTS/vocoder/configs/univnet_config.py b/TTS/vocoder/configs/univnet_config.py new file mode 100644 index 0000000..67f324c --- /dev/null +++ b/TTS/vocoder/configs/univnet_config.py @@ -0,0 +1,161 @@ +from dataclasses import dataclass, field +from typing import Dict + +from TTS.vocoder.configs.shared_configs import BaseGANVocoderConfig + + +@dataclass +class UnivnetConfig(BaseGANVocoderConfig): + """Defines parameters for UnivNet vocoder. + + Example: + + >>> from TTS.vocoder.configs import UnivNetConfig + >>> config = UnivNetConfig() + + Args: + model (str): + Model name used for selecting the right model at initialization. Defaults to `UnivNet`. + discriminator_model (str): One of the discriminators from `TTS.vocoder.models.*_discriminator`. Defaults to + 'UnivNet_discriminator`. + generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is + considered as a generator too. Defaults to `UnivNet_generator`. + generator_model_params (dict): Parameters of the generator model. Defaults to + ` + { + "use_mel": True, + "sample_rate": 22050, + "n_fft": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mels": 80, + "mel_fmin": 0.0, + "mel_fmax": None, + } + ` + batch_size (int): + Batch size used at training. Larger values use more memory. Defaults to 32. + seq_len (int): + Audio segment length used at training. Larger values use more memory. Defaults to 8192. + pad_short (int): + Additional padding applied to the audio samples shorter than `seq_len`. Defaults to 0. + use_noise_augment (bool): + enable / disable random noise added to the input waveform. The noise is added after computing the + features. Defaults to True. + use_cache (bool): + enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is + not large enough. Defaults to True. + use_stft_loss (bool): + enable / disable use of STFT loss originally used by ParallelWaveGAN model. Defaults to True. + use_subband_stft (bool): + enable / disable use of subband loss computation originally used by MultiBandMelgan model. Defaults to True. + use_mse_gan_loss (bool): + enable / disable using Mean Squeare Error GAN loss. Defaults to True. + use_hinge_gan_loss (bool): + enable / disable using Hinge GAN loss. You should choose either Hinge or MSE loss for training GAN models. + Defaults to False. + use_feat_match_loss (bool): + enable / disable using Feature Matching loss originally used by MelGAN model. Defaults to True. + use_l1_spec_loss (bool): + enable / disable using L1 spectrogram loss originally used by univnet model. Defaults to False. + stft_loss_params (dict): + STFT loss parameters. Default to + `{ + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240] + }` + l1_spec_loss_params (dict): + L1 spectrogram loss parameters. Default to + `{ + "use_mel": True, + "sample_rate": 22050, + "n_fft": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mels": 80, + "mel_fmin": 0.0, + "mel_fmax": None, + }` + stft_loss_weight (float): STFT loss weight that multiplies the computed loss before summing up the total + model loss. Defaults to 0.5. + subband_stft_loss_weight (float): + Subband STFT loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + mse_G_loss_weight (float): + MSE generator loss weight that multiplies the computed loss before summing up the total loss. faults to 2.5. + hinge_G_loss_weight (float): + Hinge generator loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + feat_match_loss_weight (float): + Feature matching loss weight that multiplies the computed loss before summing up the total loss. faults to 108. + l1_spec_loss_weight (float): + L1 spectrogram loss weight that multiplies the computed loss before summing up the total loss. Defaults to 0. + """ + + model: str = "univnet" + batch_size: int = 32 + # model specific params + discriminator_model: str = "univnet_discriminator" + generator_model: str = "univnet_generator" + generator_model_params: Dict = field( + default_factory=lambda: { + "in_channels": 64, + "out_channels": 1, + "hidden_channels": 32, + "cond_channels": 80, + "upsample_factors": [8, 8, 4], + "lvc_layers_each_block": 4, + "lvc_kernel_size": 3, + "kpnet_hidden_channels": 64, + "kpnet_conv_size": 3, + "dropout": 0.0, + } + ) + + # LOSS PARAMETERS - overrides + use_stft_loss: bool = True + use_subband_stft_loss: bool = False + use_mse_gan_loss: bool = True + use_hinge_gan_loss: bool = False + use_feat_match_loss: bool = False # requires MelGAN Discriminators (MelGAN and univnet) + use_l1_spec_loss: bool = False + + # loss weights - overrides + stft_loss_weight: float = 2.5 + stft_loss_params: Dict = field( + default_factory=lambda: { + "n_ffts": [1024, 2048, 512], + "hop_lengths": [120, 240, 50], + "win_lengths": [600, 1200, 240], + } + ) + subband_stft_loss_weight: float = 0 + mse_G_loss_weight: float = 1 + hinge_G_loss_weight: float = 0 + feat_match_loss_weight: float = 0 + l1_spec_loss_weight: float = 0 + l1_spec_loss_params: Dict = field( + default_factory=lambda: { + "use_mel": True, + "sample_rate": 22050, + "n_fft": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mels": 80, + "mel_fmin": 0.0, + "mel_fmax": None, + } + ) + + # optimizer parameters + lr_gen: float = 1e-4 # Initial learning rate. + lr_disc: float = 1e-4 # Initial learning rate. + lr_scheduler_gen: str = None # one of the schedulers from https:#pytorch.org/docs/stable/optim.html + # lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999, "last_epoch": -1}) + lr_scheduler_disc: str = None # one of the schedulers from https:#pytorch.org/docs/stable/optim.html + # lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999, "last_epoch": -1}) + optimizer_params: Dict = field(default_factory=lambda: {"betas": [0.5, 0.9], "weight_decay": 0.0}) + steps_to_start_discriminator: int = 200000 + + def __post_init__(self): + super().__post_init__() + self.generator_model_params["cond_channels"] = self.audio.num_mels diff --git a/TTS/vocoder/configs/wavegrad_config.py b/TTS/vocoder/configs/wavegrad_config.py new file mode 100644 index 0000000..c39813a --- /dev/null +++ b/TTS/vocoder/configs/wavegrad_config.py @@ -0,0 +1,90 @@ +from dataclasses import dataclass, field + +from TTS.vocoder.configs.shared_configs import BaseVocoderConfig +from TTS.vocoder.models.wavegrad import WavegradArgs + + +@dataclass +class WavegradConfig(BaseVocoderConfig): + """Defines parameters for WaveGrad vocoder. + Example: + + >>> from TTS.vocoder.configs import WavegradConfig + >>> config = WavegradConfig() + + Args: + model (str): + Model name used for selecting the right model at initialization. Defaults to `wavegrad`. + generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is + considered as a generator too. Defaults to `wavegrad`. + model_params (WavegradArgs): Model parameters. Check `WavegradArgs` for default values. + target_loss (str): + Target loss name that defines the quality of the model. Defaults to `avg_wavegrad_loss`. + epochs (int): + Number of epochs to traing the model. Defaults to 10000. + batch_size (int): + Batch size used at training. Larger values use more memory. Defaults to 96. + seq_len (int): + Audio segment length used at training. Larger values use more memory. Defaults to 6144. + use_cache (bool): + enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is + not large enough. Defaults to True. + mixed_precision (bool): + enable / disable mixed precision training. Default is True. + eval_split_size (int): + Number of samples used for evalutaion. Defaults to 50. + train_noise_schedule (dict): + Training noise schedule. Defaults to + `{"min_val": 1e-6, "max_val": 1e-2, "num_steps": 1000}` + test_noise_schedule (dict): + Inference noise schedule. For a better performance, you may need to use `bin/tune_wavegrad.py` to find a + better schedule. Defaults to + ` + { + "min_val": 1e-6, + "max_val": 1e-2, + "num_steps": 50, + } + ` + grad_clip (float): + Gradient clipping threshold. If <= 0.0, no clipping is applied. Defaults to 1.0 + lr (float): + Initila leraning rate. Defaults to 1e-4. + lr_scheduler (str): + One of the learning rate schedulers from `torch.optim.scheduler.*`. Defaults to `MultiStepLR`. + lr_scheduler_params (dict): + kwargs for the scheduler. Defaults to `{"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]}` + """ + + model: str = "wavegrad" + # Model specific params + generator_model: str = "wavegrad" + model_params: WavegradArgs = field(default_factory=WavegradArgs) + target_loss: str = "loss" # loss value to pick the best model to save after each epoch + + # Training - overrides + epochs: int = 10000 + batch_size: int = 96 + seq_len: int = 6144 + use_cache: bool = True + mixed_precision: bool = True + eval_split_size: int = 50 + + # NOISE SCHEDULE PARAMS + train_noise_schedule: dict = field(default_factory=lambda: {"min_val": 1e-6, "max_val": 1e-2, "num_steps": 1000}) + + test_noise_schedule: dict = field( + default_factory=lambda: { # inference noise schedule. Try TTS/bin/tune_wavegrad.py to find the optimal values. + "min_val": 1e-6, + "max_val": 1e-2, + "num_steps": 50, + } + ) + + # optimizer overrides + grad_clip: float = 1.0 + lr: float = 1e-4 # Initial learning rate. + lr_scheduler: str = "MultiStepLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html + lr_scheduler_params: dict = field( + default_factory=lambda: {"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]} + ) diff --git a/TTS/vocoder/configs/wavernn_config.py b/TTS/vocoder/configs/wavernn_config.py new file mode 100644 index 0000000..f39400e --- /dev/null +++ b/TTS/vocoder/configs/wavernn_config.py @@ -0,0 +1,102 @@ +from dataclasses import dataclass, field + +from TTS.vocoder.configs.shared_configs import BaseVocoderConfig +from TTS.vocoder.models.wavernn import WavernnArgs + + +@dataclass +class WavernnConfig(BaseVocoderConfig): + """Defines parameters for Wavernn vocoder. + Example: + + >>> from TTS.vocoder.configs import WavernnConfig + >>> config = WavernnConfig() + + Args: + model (str): + Model name used for selecting the right model at initialization. Defaults to `wavernn`. + mode (str): + Output mode of the WaveRNN vocoder. `mold` for Mixture of Logistic Distribution, `gauss` for a single + Gaussian Distribution and `bits` for quantized bits as the model's output. + mulaw (bool): + enable / disable the use of Mulaw quantization for training. Only applicable if `mode == 'bits'`. Defaults + to `True`. + generator_model (str): + One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is + considered as a generator too. Defaults to `WaveRNN`. + wavernn_model_params (dict): + kwargs for the WaveRNN model. Defaults to + `{ + "rnn_dims": 512, + "fc_dims": 512, + "compute_dims": 128, + "res_out_dims": 128, + "num_res_blocks": 10, + "use_aux_net": True, + "use_upsample_net": True, + "upsample_factors": [4, 8, 8] + }` + batched (bool): + enable / disable the batched inference. It speeds up the inference by splitting the input into segments and + processing the segments in a batch. Then it merges the outputs with a certain overlap and smoothing. If + you set it False, without CUDA, it is too slow to be practical. Defaults to True. + target_samples (int): + Size of the segments in batched mode. Defaults to 11000. + overlap_sampels (int): + Size of the overlap between consecutive segments. Defaults to 550. + batch_size (int): + Batch size used at training. Larger values use more memory. Defaults to 256. + seq_len (int): + Audio segment length used at training. Larger values use more memory. Defaults to 1280. + + use_noise_augment (bool): + enable / disable random noise added to the input waveform. The noise is added after computing the + features. Defaults to True. + use_cache (bool): + enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is + not large enough. Defaults to True. + mixed_precision (bool): + enable / disable mixed precision training. Default is True. + eval_split_size (int): + Number of samples used for evalutaion. Defaults to 50. + num_epochs_before_test (int): + Number of epochs waited to run the next evalution. Since inference takes some time, it is better to + wait some number of epochs not ot waste training time. Defaults to 10. + grad_clip (float): + Gradient clipping threshold. If <= 0.0, no clipping is applied. Defaults to 4.0 + lr (float): + Initila leraning rate. Defaults to 1e-4. + lr_scheduler (str): + One of the learning rate schedulers from `torch.optim.scheduler.*`. Defaults to `MultiStepLR`. + lr_scheduler_params (dict): + kwargs for the scheduler. Defaults to `{"gamma": 0.5, "milestones": [200000, 400000, 600000]}` + """ + + model: str = "wavernn" + + # Model specific params + model_args: WavernnArgs = field(default_factory=WavernnArgs) + target_loss: str = "loss" + + # Inference + batched: bool = True + target_samples: int = 11000 + overlap_samples: int = 550 + + # Training - overrides + epochs: int = 10000 + batch_size: int = 256 + seq_len: int = 1280 + use_noise_augment: bool = False + use_cache: bool = True + mixed_precision: bool = True + eval_split_size: int = 50 + num_epochs_before_test: int = ( + 10 # number of epochs to wait until the next test run (synthesizing a full audio clip). + ) + + # optimizer overrides + grad_clip: float = 4.0 + lr: float = 1e-4 # Initial learning rate. + lr_scheduler: str = "MultiStepLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html + lr_scheduler_params: dict = field(default_factory=lambda: {"gamma": 0.5, "milestones": [200000, 400000, 600000]}) diff --git a/TTS/vocoder/datasets/__init__.py b/TTS/vocoder/datasets/__init__.py new file mode 100644 index 0000000..871eb0d --- /dev/null +++ b/TTS/vocoder/datasets/__init__.py @@ -0,0 +1,58 @@ +from typing import List + +from coqpit import Coqpit +from torch.utils.data import Dataset + +from TTS.utils.audio import AudioProcessor +from TTS.vocoder.datasets.gan_dataset import GANDataset +from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data +from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset +from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset + + +def setup_dataset(config: Coqpit, ap: AudioProcessor, is_eval: bool, data_items: List, verbose: bool) -> Dataset: + if config.model.lower() in "gan": + dataset = GANDataset( + ap=ap, + items=data_items, + seq_len=config.seq_len, + hop_len=ap.hop_length, + pad_short=config.pad_short, + conv_pad=config.conv_pad, + return_pairs=config.diff_samples_for_G_and_D if "diff_samples_for_G_and_D" in config else False, + is_training=not is_eval, + return_segments=not is_eval, + use_noise_augment=config.use_noise_augment, + use_cache=config.use_cache, + verbose=verbose, + ) + dataset.shuffle_mapping() + elif config.model.lower() == "wavegrad": + dataset = WaveGradDataset( + ap=ap, + items=data_items, + seq_len=config.seq_len, + hop_len=ap.hop_length, + pad_short=config.pad_short, + conv_pad=config.conv_pad, + is_training=not is_eval, + return_segments=True, + use_noise_augment=False, + use_cache=config.use_cache, + verbose=verbose, + ) + elif config.model.lower() == "wavernn": + dataset = WaveRNNDataset( + ap=ap, + items=data_items, + seq_len=config.seq_len, + hop_len=ap.hop_length, + pad=config.model_params.pad, + mode=config.model_params.mode, + mulaw=config.model_params.mulaw, + is_training=not is_eval, + verbose=verbose, + ) + else: + raise ValueError(f" [!] Dataset for model {config.model.lower()} cannot be found.") + return dataset diff --git a/TTS/vocoder/datasets/__pycache__/__init__.cpython-311.pyc b/TTS/vocoder/datasets/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10ae7ca4b2a1e7ba3269d5014a44285162513156 GIT binary patch literal 2614 zcmbUiU27XhaL@WUoj$A&JGK=&j$AtlvK#BcG|hu)Nt4usG^TAn3Nqz-(XDJb_o3{a z92^t~ArEQkW1xmo@RQr(JoKRt{TXK?@UbNjC=~kUlJq4{omuIWsCC+=%iPTD-0bYk z?9A@BsZ^Z6Sgx*CFUAP@1t-Bsc9^GM0P_{mi7pw$l**EXe4o*0hRPwqhm5cpDMth! zHln6nmIWU%Vy0471RpixW}=)hljWqC%SOuVFZT;RW~9wbIRm^xLwcObdV*&4B$f0O z(0&@JCiV3Fa5<-EE|X&RH*~Bh@$d&V#|0WaYu~Kbkdx18uI5l=`_DA=n*AZOE7Wmp z29o%~nZ@9LA5R&!rmMHKHC2Z-JlCC8spbk9EJ=T&tA^a%i=vMg14}$wv8`%t znfGaRF4r8Du4x93qfNEuQq$q`8fDjQhw><+Zi88sNGGpI`r!+~_w>6y%yE;fqEw7c zDd1kFTQJ$~3Uq0WuqF2;i%qE+0xM`!jHa|2?($@{s|h;MK(6;S3D}WACI>Q0(nHO@ zBoQB3h1zHj&{4X4_)&NZs$@H;6mlohjBJXE3UmV6>$gO#pTSb_ll-SxBR3#Ngs`{V z;qF56f8xcM^eDvF8;iV^fLP#bk2?|T^Vjw<*75N@Hy_v`|K?*V&|Jl85CisoAx$4B z&2*)BO5sP!7M#m=_iVuXX?HTvAw-YAZY*CDJ@HLw3#xFtTZ#9iW&}DgxyVDyn#D)- zTD7V=nprofqgHLEE~uKNtLNS?CK(36Bb^>_=uOq27MEA-x?q$#bf2?gGndDpgVt1- z^JKe2fUhx!C!m{Lrq!&Pwan9)h@T?~7WVJY*ZunQ?=n(43!l)r%pYlFwHX-0#d zXACaLD8n{xM~Q{VuYSt*^P&JEYDm(JQ|b^Ypv zQl;Uzwh70hQ>S$0%H`6UT>+kz0z)NLg>!XPb+*3FQ((GL7k4gj>`mANi^sVKb$6He za_W9_PtNbk`5k$*C68`2Jb4P>ac;~{XMMnfhaWEbgF_E5`9ou^q2vCc(TA!(f6Xg@ z=FKB)47Kv7J%H*Be{6Ci;TKM9T=d7MTH~iWD+WjR^QnRO?*z~@L=WWsF`{JlVn=sl zM|WZqt=NPoz*o{=j_oOu`f+}5?)>iD`JK6i*4zS!+OX5V)aqaIVoQD^cX#nA90ge{ z4)CbEE@}(k7JT;b&1Lww@QiD-%F1lRtr^ZN9x=mrhNt1=J3gHXo8 znuY76TL4)6{OEydbw=y$C!OJw#PGi3#|~5tn!jrndxB5BQWu)Mt13DteBHJUQH|dd zb`p1+0rWgJ4;|Go=sWB*Oz~=Teg(82k|fC|!(J~uA$hMCd@|+r!V@y>^@2~{_Ikl5 xm%M|(Cuh7~@X5PgFYHH8N&_3m_X&_qOOT&jLA;Nyw&`bjo7U=rz7mN%{0aSsj#U5v literal 0 HcmV?d00001 diff --git a/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-311.pyc b/TTS/vocoder/datasets/__pycache__/gan_dataset.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..69aa451c0bc201f8a14ac9f0276e535ca112b081 GIT binary patch literal 8249 zcmb_BTWk|qmQ}XP@7N}B94CaNAc4dI5}JfwpdlnnAj<%m9Rj=Rbhk>zRdyWw2)D{~ zV>zSDh}L+de56NN5%r`s){5y#3tGxZ%!-d)sXs^i<%&v*RT5fOtNrM|w3v}r{j+<{ zExT-o$Fx>+t9#IpfRD zu@ttS6)S<|*T8*8+2b9UP1m~+6_nsTOHb1s&#Fw=}^d%%dCE zfbO22oT*}!{}(q82`cz%4KGVk5}TL#c$V)aMfHEHJ%PDC5C-xzu zu=UpF<0>_@yr~b%6xaeI;6gMNECAImj!|5-dWI>$wRM`HLJ86U6|Y&%#4TUKT#wE9 zybR2TIMYTq(Fi-SLxRb}`y}S*ARoZKF6qr(|R&M+@V+(ibAtC77(b%U}-rHAXWwpj)Dgx+|$jX(5%A72TbblNniwW@3`gDM;M9jW&2=+4N#gkpyHN-BDdvcT^Y9 zU5imskms{R(VfV50>HZ0;3p_2iL85)vY?PCESyQ`{%TW=y>wSjmV{U|HZSS>u$ajv zA&=%L_vsE`Jgd5X2VAF)8Te0k)7L=?oC8VQ^i?D{Q`{nsuJf4zj zHF_{cV7Rdf;D2Mih)=i4QYuc6fOQTJLncUg1nGvfVFqHHA$Y+AZgrhBU6%mN69XZT+F89_itSpiGftgreVvX zB#+#@IXiMM8-p?#5e+#lk0hcQp$ZHy-Y3X-`L?>q9Io9bVk{2;#s1GS^XDGM<@=>$ z#j)cIZ#%=PvrBV!m7QIc)?m@L;;3}?z{mHTAKE;%weQL4KS=*}bL%6Ozo_vS%iR}? zHqF^t>A*hT+Nbjnj;Mrr__S2I2aP`akKi|4=^e&zcct3v;7J5~s$*aHL+c%?pD#^r z`wy3mcig<8#%Fdo$JL7HNWmwl>#T(f19^UpIR=oq7cc5H* zcdgJ!d%T46VqzwT1Ay|5N^BhpZiA?OwIfZV@(-T03}$LI3oL? zNscR~R7FX)V6RR1Hd!>aM-k^i>cuSe6{#Gt%rtEq(LqXoi4^pZNx)(s*_J^6e`ZoY2 z6!X1*0lD{Z&_6=9%slVt`RxAMeM(}SKccn|YVAPuEAEQZTXe6u4HSQS z>+8`?<;hRgqvP7qarN*8?eGO?Q9~0NmUWIHq+Vy)EG%=pr-}1 zTr%mW)(^i0(2t-Mp{;HRiqj4)qhCyA1XT!85o*30)1@o zqZlp&?E!`W)>2(Q{G~f=mK5Y{qgA(1b07XHbJu5UJ~OaP8YB&w0vpA&32XboZ-H$d zw*=Pcf@v?3BLLOy#RhA13GB)@%xK*+qYW%vTih}W_ALC~H0o76pm&7L&)uN*JTE5W zafyHx&r{`2-H2%^Bax_r4h0zgQ%N~SlIdioRzod%)ZbLXyy`hb++ZJN!70(XoRUld zr=WV){Rv4CKF*Rm5&;V8F zb6oQrM-4d1KKHh+T>XOE@Mz(|o$%@H@M$%CR>Sv#>YdQM6J_tj3y)v(^zJftS7#*{ zemL{1nbN2l9MFOT#Y+``o95?t8EZ>_rDMO=adh*d)^TFhRtb0i#`n3eq^RLzTKL#1 zSLx_pmsShqKwqUTT)b8Z1Rwf-Wou;`^sH4{TFswGn+mkq8$xn50s zDX>uv+@XEQh+^^#(_q;bSP?v_I#)LOWqW7MgSi#Ah!!eIfyW;spVPF&7A!yeykJ={ zYeak9>w@{swX4`XfI3r$zn%x0@HM%qaa^6N5_>4WnR`S|iv}A3X-#lmZ z7ezJ!c_Y?u+8Lfm!h&Z~u$G|wYHF5Zpn;mt%>sTbj0Y}t7`Y>Q;19IC(w_UeSK2G<*PGT1v@_QcNM)1b7-cmzM}AL-)tY3~CBk zA6Ai~nTXBcT8_pAbSn%q93f*(B#JuAwxUa9xJW{ExA_B&ocl`i`!HVqPsl)NdVIy) zFVC)=UAJv`w>|x3Pyh2^XzlX)snRrl<{1Zud@>dnbWKHyk_R;qCD7*W}43&w~Hjp`E!}K7CCc zxvq^|SHm+}c;=6H@hRV!{iC9VZx*jSZw;;;sRY_8t!=A6Sq-fpT)(LuII%VQ^vtuL zJUyo#xKQq#Ch(Ksn%NYd1uF79uAfRmvW^hg9>Y}}IGzPyuZ4&!GbX39m`q`^tw_ymo*?;I< zzh*?oo96p9^M$QhN&K8ukin{Hp&lgMLv1B9U|8#m-{!eWW*D)t54c7^arJ;6D8tvU zXlw5GgH~*ym-?EotO)e7fbIlMXgQ*)G`|LzD|V z7wV&7jwNTohU)!O3$!^u1;txmX~{jiv~ogo8FR+ejRE!QX${w^&j6|b3WCH5)!3I4@j{+TyJJ2G31)61VGt}Is0g&eCT}Tz`>2VP3wm6nR}I6l~?5#hT$=| z9Ug?RU-%s@{0{gBFjHbDbZk3xZ1a{H8r4Fht5z7gb6{-yz}VKUXBX826WW1^+9a^r zp(BrawyfI0G1c2soL+tZ1;iHqzso-lh1U}6Ni{U6g$CcUFa#y{wXNA#bL)3ui^s}6 zBU|xu;Jg|*uLaJ77ZYeNhsM^e8?Mr!jh0Q@6R#Q`gBmSxs>r?Y`d6;3-d?%BKDBX8 z^$u=cdUExd<;jn?z3-LjTk%|89bG%Keq-%?Y3$KCwQZ#AxeV{tt*0M+)B02}pzo|q z%Q=k>Kx{%L4>(-D zKF!y+5RE%j@MPN|_&#pz0G`)U-Q_0H1YlskvYp&_mHXz}8U zu0AwX)vlAptCe6U7>@XSu=yU?o!#ouOXUNX%3YVV;AJ#e!QVWH{^{ScB~d%_u2Fn8 zroH!Lqlgo{|Mg*Q=d>S%G^ut^s=-O{M+0rEw~B?bb3e7mAaIDmi_14}UcP+U zhz0rZ|Bnp*gCOW`K>&Xt2mcWWx>peH=Ax-;3q*y)Y)lYnu$@K_buTa${C${~l=-Yk z-i2uiny2IvW>iX`8A*9aZ>jzr5R+0VK>(rD2Te5UG+}TWT2=%z6gl#LKn5}U+Ouo1 zan4-^vo5B!t#l|6u0b{C zKt%S|2On0e>@3%Z_0`NC;2?xhB4qm(7L5_5{xbyOY}qOd*?ah zUdIUm5DiWFA!HExXW0tlS*A~gaV^uQ!n7>Yr^57=o8K2qYq|DT81FKDDolU5`Q5c& Wx3F*mug_OIzN0OFdP6%!{Qm&JuK>&d literal 0 HcmV?d00001 diff --git a/TTS/vocoder/datasets/__pycache__/preprocess.cpython-311.pyc b/TTS/vocoder/datasets/__pycache__/preprocess.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2fcfe072f90fc4482c201d63845c56e197878165 GIT binary patch literal 5342 zcmb_gU2GFq7M>Yj+vA_uV-geo0#h~+oCdo;OWOn|l!lfP)KEc6A*Heg&lsGHf4DPA z>&RI}h*i@H?S{$~*|w;zL|G6#WTmc@$9?SM3>vA{NRcY7`mk?Vp*}26d+vh)qfVfGClt|0eq+QaJH1Lef z!(-y;4YIC9mj{ z{27nvzlJ8mA}0o~(UY9magCac02cyoix>v3Q>4#OvB-bO;>KgNiQX2}%TRho3s;I+ zmDH$L#2l2NqcdW*@HQ@_B}FOV7-I%=GqUhOO3J4TqGYmHW`w+&odc>cqo#^5j_FAk z@|o-AdGRj)R^ujQLDZ`tQO6FBLejaHbEo(eTMb$5e;ojdFWzXft!qjf6Mpzv08UxS9R z*w)b=c${P8qiWw%WxY<>dH+*(<0<={k~CPtb+5>RpZWw3m`~jY`45CyUdW4lx{xc* zs1l!(WQ%V&qDUkpUddj{3v%4g!*@dEvkIRRvU$10&nS|}Uo7$Og4F5pab6TuK`scQ zgyVio`Y67n3|SQE+wf}sfTH5q5dXG-rM${#3NrNIiNEt|fmg_s>Sc+~WMxUQXC=LD zB;XI!ab<{q3qV7zS8s?3MgBk=&S9(^#$#P3Q&2!in3lvWR!n5!7Mdz#^JXZM&5J1* zBxUWz^brMFz=nxSW8yU?QdB8tvP5r0)C|E2l%kYYap4jcawd~6f($6d^k`@F@Z-+d^;Av#ECHR4;+BZ^fi}*w-TNud|t@G@Yc?3 z++|@AHbs{(0T4`(Mj<1Rv#5WMPbS_`B&;M#f^s==Oj4%RLNPH~$Yo0JoJgc+6t$2` zy5g^u($00PMa|aFX;40G4u#SceGz`QVw|jZk z*ptw?=M3(-g%fLB*V6eL;YxU&>Tvh{mt>1*t+RU>-59D2RgNsOYtin?yEiXx1)@ut%Jerc z>VZBZ(6>%`e0}R4s%wYQ*=uz6ue+GY$Y;T7j{vlXrb9e_ZX>cOEr%9{p}Yi=5UY zr;W(zg|S*7s-dW@lxAQ6{So{uQ-Y|oO(|un%sgldz#zP7>D?aQh|Dw@k5WQo5agDr zFDMb+a@~QN`=r|XuF|R{V{=o@(W))Abx|lp#u+X^5YQsm(z_XsZS>t7e$fz+i4Z#c znIX1le`Qx0!t~afjBVaA8ovM{n4HC9H~-!<@AGqha?wh;qFRc_JeH-JE?LUs9*{9T z81C&t4lV{s5>2mic_x#Qr5Li0H!RHxa!M)6Sv93VKy45HB6L*9j==nF`!_opw@!*+ zG9H0OOX?FKh$rk`F0H(xaY>y^8e9?(G7wrUeLQk~WcldBK#yjpJ;`cG+mp1D9_TTw zYJ$KgJ{rF|zVy6~q6Q)+Vby()q7PAYDXSyiK)i-{;zQAWO9jY`z<#i*g!aN~kcJcTSv`V(l(>0V{Zd5UdTM7On~va8wxUNCO`6X|QzgH0AuMo*D;JxN+6 zFi`*6?bp%P7Sc{zD^hyEvXMisEyM?#cb5?WDcWTBEuk}5T^p<(quxJs0p^%@L%2@v z^u`+9PVG!Jo}ARY*CFS5svHZ!G+SRkGe%vdufNp#08xK-UCvPpyNFnZ-GJ~cZ%(+kJ+Rw8c7}ci-;5y=#H8d7vBmAF9u1%(N$nS5Jq+!`aL!p*8=3OJ{1yrN7O*pl@U*|0t9vb0Y4p6yhY z8Gx6rU2~=y71f;S z2dZClrW!S>Jt?hww$pvp7uP8$Ry|j5aH^R!BP^^oW)EvnnbHt0Y!PtN!S}w3wAfvmd+X z+_KAdLfBUCUYAeZ=eg(Jd+vGFKQ=bHDM+(3^E0RX6!jlis3fMGc_~8XQ;Mf}Iz~+! zZ+e<0Wy`b$-jV>GSVP=1?V%|P^%li5 zcPZX3cpg|O>LL7%G~LM4m#C2ATP#k7Xx;HPm*8dvF=Wwg7ow7^GgGOBn1C(goXkmr z94QYaFO%>b6n_Pz3lx$Iq|(z?7ghc7zAQCE0}-xi8*jf$O*4X>cig2Oz_^Dd#%Txd zgf=Jd0_fu1f}3v;8U#_+I}1svWjIAEp^=;u2jygtizHK$9E}8L1WrzgFf^GUxMpH^f;d7HpgJKD3?ft{<1qDX zTu1tzW%z(Qug~@<#EwBw!6A_QO8x(~niT{^8O;LYOe@e;luB7_S zE!MPDdQh7VfTl8ZhGKCdk}?)3ncR|bik6Hu0S64VHnWcnN)<++G7OaLW}6*K4zuKd zlG7|Xq2w}4ZYYr(f;AZzlp4&o1}K@U1}gj+YsQv_S$O&;p;dC9gS%0`-oK?uFKFkj z8G9NiOe1EBZ8Y(^#kLGp^j6BX?|NK#zb=t~$TlhBl<5P(t9{qu^lq^+ArYM8fD4kr z8$yr^##1pl3de>gI+t9)_OOTx4f%9NN-YSY?qu0$A}X`2Zs8VmS5y|_Yz(iPql{2@ zlt=2W1&(K>xuhuTPGmS1%6daoVr7v79En-oXK)o;;UwLak_0xwMdk#(8MBFG6d;!( zjHWw)XKy4WLH8n;vNIwV7m!9h5D}q5V9U&FUs3W0CX42Mq->Kv8;qR@cvcO(qG;gzAOo%G<=7X=7z@w>86}q7(Shh z&)m6ub~pl;DH#Xyc|jVUnz}T6I~jqzILsTONE(JaD$K(5X3I6gvxjndKXxoTo;f?#ogJ#POLKN9&aP5RAZuH8lsbFiz)DCvsd%%RVvSh6&gv8m#{)AQhVd^-VsTmL*{jI0&a*oEpwo{aB-fp~;^Kh4q^Z@32XwF$*8s8+X(!fsI-`E3mBtY!UuJlm2io{ptu8Ge{qlEjf^DY3&+!c zwV9n>bydQnv1EjcNyi~m8Tlq)k~#n^QA*oM<0;ZP*(`{?&=_)vNT=?I8`@X55n;d> zcbEZ9F)Qd+xT7M{IE7?+l225cbekHDq|}BijQQ_s|>* ztiVjBEki?TW+)7DVhU^+mncM1q7=O?glxKPHkQ1hTauD)L#?jc=9AGxh!zjR6g7Jb zVltZGS-`*&Yardbd7JCokKynTfFKNBUv~T1(=PeU{v|im3;o+eE91R z3lU=(SsPSj8^}`hhD0jPN?aT*EZuG>(Maw3(5NBD@Gp=-*#VSORi0h6@@OnUn~`Mf zTTYwTJWgFdvIt~O0?%>wWQ(zB)tzFZ9B4jh{7reY8%1PbMEi4EL9>g;W zTyX^J9OdO3eSeHqR#=nH5PT9!JKRA?2G=$d{>GiJ@LIZWGw*KcG67hmn*f%mjV{XH znst=AdbO@_)>qouPYjIb{*FSo>L1Yj0~?gZ6(S&ey41PjcfFtYu3FX3{aWY#kI!XK z=UPi1|8FLLJy|$iY%Pqcoz6X!i-x}W)sgZ0>96ju7~(}##%KbcuSQ+=v8$>sCm-` zNy8Ml%W$?omeEQmr+IfJm$8EL@S4#wnz-eZvDV+nG+>#r@HU=#WPbqbd1$T)=>8#; zl9ft(&r#cr{w>9Wi?#h)e@or7)*Ec-ByGEArpuLv^W>elTY$Y<3-oL^8rQyPd(EEO zw%wT;lT&n~M62bdQG!3DoWdp@@;YE2l+K^QPkmXUx=F4FE_rvQ73N1hOVSq3fl*{eTz&OU)Np8J zB%VM$*DX3TPTq|c(npq=T1uUDbEBt5(&q6+^xU?jdahZtZiCBNPs{hgRYZ4jOX6R# zs*H68HCjWu6Xj)Hu%r?xnf&&@ArS|ic*~N^CFI!(b_{C8!GY9$;7F4u_BIzw2~ye? zye$R4MHyDB{@Rv0n^ z*cK$zqdV{o+SKrn@icV?^k8I8cVkNeLI5|xEw5-uL(%E(84;b}RD9u%&dk8<7$vxs z5`?r6av6$=XdwsCgodnm5)so7R)}*Su&_*s95BYq3d&3*CLVfXL`j*7ifFua=POkc z+FaLQgme}J0jMN9tfR zoLcvvQv9c$_w=qzlsa}4>Ee<5qig+NhSl9C|LyRT)K?=y~AcYHlCu8^nH)}6mu+?7u%{&VoG9(gqS zxc?7fb>Ckb72jE@)-$w5`U0w!_I!8Yr}>@5@%tCl=7Vd8)aE0PZGZ5nP2*2ePk*6Y zy8QHS)TS%hvFCxd{6yhL#Y07|xMwx}^$-8@EC2IFvp6JkXV&1YA)ya8L^z%$_N=v={nU_Ny&> zK-UL43j;-KA*2QR0XgF7L+Ae#UF5YrM~&>`BigZxMi%@3?DqrO(8%K-sY7Gh(3sjY zrUu4fthZ%(_L;YT-P>Qhta|rp-hEkX$=jUUos$ZdoRk|}j%TgkG_`6?!D6f0v{!4| z`>g4}deZ^5X;^C-&N9!ve$CrkkW}w(&Aa=VH?-~z8Hk>F53hRc~ zP{16hi}}>=A~;AVO}EEhdnTZE?RBT&%%CD>c=&x};hBu1X0K{?5Y(TQm55!OTU=*VGsf2(*01|G5FryKYN9lPMzY6dY zhJsTj+R+6MZgiRoHsV)+0>s z`rnWFSAbKx0JaVUz@5|>Jx)J4^OgIl`|Ef9=zZe7=Pw`fWq7MQj%ffpN43sT1)iy& zDbrWAscXvhpKDXBHYF%Cv)a^L*8BX_nJ2lIpDXNp+7(W_GONr*wJYm;`)$=Xsre?ew&%@l`R;p>;=pI~EAz_G2vXZTsx^;h?ccQREHY}_ z9<6N;s?|3L_;><5;pS3{KR1@|N8Q<)xx-4wU}0coUy)l0Lssz*s-BSI34ub-U8(9a;0!#bsi}#HiBL0sKNHw3OaNjS72pgo?W>o-s$|;NrXCM`d6i^0gZ&|ffU~rBaOmSS4paGo^>7M|=M;w|iQ4LGv zSE5`?Ulx6D3zx~d6vkpMD;86&xZZHg@!GAHO$R^M_T^0 JO*>i8{{SWoYxV#D literal 0 HcmV?d00001 diff --git a/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-311.pyc b/TTS/vocoder/datasets/__pycache__/wavernn_dataset.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..400487c914899db74f56e3863f74ed5ab58b804f GIT binary patch literal 8458 zcmcIJYiJwSnlpOKFWHVBj^gW%9Xqk|u#!0KZCc+rUb}I8X_c3Q6K}c`MKiMH=;6$a zlUS8PC@GPP;i}*gaS5z`q*<3_!GSJ#As2F?KeDh38Z(QRKD-S0ai zjU_oL&EC65qi@bRbDrP(eCMAU8|x@YQ{kEL^A?KwFD#Ukxs-VofXsc0qc}QBP159T zm^8rK5H-e3lO`I!8KdTyWzrI}PFiWoK#fwI=`O{YdD9~!MLmYUmL_c+eV+1Je#BzH zN2}J6pcE8&$!Aa-Vsp{pO_q;`5*)AEuFnPIQY6WTN?pmTJkYBVDBgo*@nlJ`9D34d zr?|Qd6{cZo`=p68-=!waoCTnTvjVhoHh?zH4$z*VZqbuNYf$qEi%WD1Vo%D~An_p!Z{a zga!)6vy|JW3{cX?Oc|l1kCQS%$*#9Frc6l~1?u$CQ)VdXyH8o5)S$PwLa9+N*`U;< zm+Vk-=%qR+HR~m6W@`o1j6Q!7*38k@$Ua2FZ>i6LwSgZD0RgLgEvi|Zo8<-7#5Tt^ z$P1!>J}6H6M|km?l$iAoCt~6G3uFEeFm58o#$male|-GB|3(5ORq%6~2#bD@2VRKB z*-}N{?7V=aO?s*&tgnnwq@VZ%z+diAf3{HerUmPwb=}ssW^0peUd84u*u2G7XU4Q> z-C*`Dzq``%Zg|;> z&Ng`2i!Lv`>WaJeVDGka`*-@V*#6S7V=hNCrmbm_S4=u#YA1zVTU0o5U0NTHyw@(>j6gV^Qay*;Jq>UE|KDZ_b6V4x+a zYkM>i;foVKK`DMP+uRJ{WQD$FuvsP08mi~U3u>dg?RplVL~Oqx)d zg z9mqg4$YvxZAxCO-JBKyULbr5SPD0zXDbA#fTNZ~iY+2M_I>MX!r%Xr@(Br429V9~l zXUwCL-Ih`{JFI0%3GG5D&FK3|894*k={M{k2{Tpg8u|mX1FpCH0fNab?CbWsJQu!G z#_H9i!S2uKww*KGpafV$+LEQ?%Mz->QmSkvp@u7^>JUm>ZOLbeax=YgDt@a#vC|kRzZv8p(ldk+Lx%D#ODs4zxtLT9yoTQXBkCuE} z;=qrso?#nC%@tf&x5b694s;7h+Rl;4!zC_D{4Kbwde>Y%j$gHw3Y-kUx9av=E&m_z zQ$Z`jal2sdExvuda<7r8v?YG=|G=NVx1_b5v;8csEx@2nhj>4uEylD)6(>V2Bq`tC!5afO<%^!g3Jj zf*(tbu}GXH6hgO9=p!v|!RZ9f9jYDA9fUyJT>#=&B-?6SFtT5C&yK)AVg_h*huW;8 z8k&{*_CHZQ_A$EX_AbTq7ZmsY%qaNIOQY+~?lot3enNI0RGbGhAAx(OICgJR#>Ufh zk?G8jJp5Q@4y_EWnx5I7e*FCC_XG0L(=u}=+n8;9$@Dy|Umkkcw9>i4eK-B|z;oyK z?edXNWG0YpEIOQvpXClO-p)6!ISv*a2RE8s1^4^w?!h(p;B%|&KBc%%$<1R*^H`yI zY@^Nlz>=HGUtd1B+VgDx^Wd{SdEcoQNB@05-ZxR$b5U-)q_kbinv1ROrM>H|y=$$# z%PzUqueACz!|3177us>N~_gOK@QXJ`JSRp+yvg`LCl&S7Qe@TQe&Y5U6h=LX8N zyRds;m3tP^)?R2n1;cgidDyXHRyyC48#*$h*-IO(&ZWIM-@14Ins@*5*vd`WJE(XE z<<=8Q>j_+H>*9Q_bupcj9;B9oE8XAqejAnF`C!fQLBa9C&!VWAd%!_^5LkX!X+NB`Z@Al*rgD+>4*yz*f5pD~Yq?`s=@^#XBZ_+@YXsU> z;i==n-gWQ3HSfOVU(4Qp#oM2?X1C+b2zcA@wm;aL_pNvJuXXjWjILgiyGE6+QQ3P+ z@t(q7Ke)SUkpo-Jj^-u`j_!i3d%GuyvO_$$q9bVoXS@q>klOk$t);k}OA*kDBSU0YQ& zrRE@2+b?a*P#HV)v%F>~_>I(=!R#N{9L%71zPMhPoN*i0b9|@0|rBrQI zh1-9#UFyDU4Q8u%xfQ>v!DHG4p4l(hXI_tam3M^iNAQ2FS7>LKN(>}{www)p`DA@L z2OfBNK09K!3Y(TU(5BXlH^Sl9n6ePxqAnox!y2jrCCHIz<6fNu@U;OC?kVci-Dx@n zv9~Q>Zw9_<^@z8Z@eN<^bUiSHYTCPKlecpNh2 zwpU>g5jPB>tb+TD_B=nEy`F1Xl9rPBuBF?{o|VHZ(s#F>4#>Vi*>gf~rl~t!dBGq$ zq5@DTp{XlPOHeAOEkwLNa=}5Mx0<~HNxK0_v=-kq3PUiIYJtE5mxzH60&nnF0A-($ z`b0PkUY>{wv=q=0PDB1}aKb@*aUTFgNkDs6Enf>u^Xs18HBawy%koLt)312?_0QW; zfz%;9a}+Wd|0X=+M4nkNY;g*50+e+mBG2U2j76@q!zl)o{*|cRp;B+kDI8PcF-zV_H4H?fb(B0@>eX$mlZBOHPF} z2Hz?rgQGe63y^}!^J7iPgqWz7U%-P8!%c+n0pS8P@R>C-(-BP~>QN#}Bo~nBM6|e! zE*5W%m?<0sISdiwxS~COl%8LI(V3;QolBxFaYvz^(YrS9x!`NN=6>Q{!IG=T@& zF86-)f|h&73Z5~g^|yK-f;<652MGdcb5I~#X|B01Q0+X#8F>NJo2D4daLEz62Ej;C znumjrH4MiEss9K$-oVXA0xrYU`jc2W&d0@sAmEWl7zThLVd=f2YTd_)_>}fhHIweX zIt^9kt3e67$AY)Y7=zOu9HUfAC=nFk3Stb$vCXrk;zb-A*;1GUAcDrHt^jjFI&tDe zDOO%m&f8+;rTQz_aRcD z=H!OUtGM>$Z^*7b#nqQNS#-DO&f+iMgt*8z6@JyHxc!;Y%;-j=Cue$K|E6DV+^aP1 zg)^T068-w&FUqd>6xVyD;d}Bh{XxZb5Qq2V_T*d0FOcURM)QF}=MgwMtb32Gd5_87 z4;Al+virl;>(9Ht_x)??MMxezE4$AXE?kt|7vapgV`r|D{Lbdi!LMiOHyPW8!>Kr! zyi<1UQ5?zpx5PtU=$4`#K#V*|3#e*>(Ztm(>79s+3QOq~6EW`w& zr7v*B!X-j1OD?K}E?^P?)wysOK{o=FMYXYXX%^z6QI-Wo&E5928UT1b_F8^2N|2|@Ic zet20K7cm+l3TUZGkV8WTni*{-4B-A`8_+0lTo9m$c3S)#z^0L==_1uyC_hE2;STu~ isqRAc^O71ayzbew#AzD#{Cb$3_!If^-*5O%*7(1b##nLy literal 0 HcmV?d00001 diff --git a/TTS/vocoder/datasets/gan_dataset.py b/TTS/vocoder/datasets/gan_dataset.py new file mode 100644 index 0000000..50c38c4 --- /dev/null +++ b/TTS/vocoder/datasets/gan_dataset.py @@ -0,0 +1,152 @@ +import glob +import os +import random +from multiprocessing import Manager + +import numpy as np +import torch +from torch.utils.data import Dataset + + +class GANDataset(Dataset): + """ + GAN Dataset searchs for all the wav files under root path + and converts them to acoustic features on the fly and returns + random segments of (audio, feature) couples. + """ + + def __init__( + self, + ap, + items, + seq_len, + hop_len, + pad_short, + conv_pad=2, + return_pairs=False, + is_training=True, + return_segments=True, + use_noise_augment=False, + use_cache=False, + verbose=False, + ): + super().__init__() + self.ap = ap + self.item_list = items + self.compute_feat = not isinstance(items[0], (tuple, list)) + self.seq_len = seq_len + self.hop_len = hop_len + self.pad_short = pad_short + self.conv_pad = conv_pad + self.return_pairs = return_pairs + self.is_training = is_training + self.return_segments = return_segments + self.use_cache = use_cache + self.use_noise_augment = use_noise_augment + self.verbose = verbose + + assert seq_len % hop_len == 0, " [!] seq_len has to be a multiple of hop_len." + self.feat_frame_len = seq_len // hop_len + (2 * conv_pad) + + # map G and D instances + self.G_to_D_mappings = list(range(len(self.item_list))) + self.shuffle_mapping() + + # cache acoustic features + if use_cache: + self.create_feature_cache() + + def create_feature_cache(self): + self.manager = Manager() + self.cache = self.manager.list() + self.cache += [None for _ in range(len(self.item_list))] + + @staticmethod + def find_wav_files(path): + return glob.glob(os.path.join(path, "**", "*.wav"), recursive=True) + + def __len__(self): + return len(self.item_list) + + def __getitem__(self, idx): + """Return different items for Generator and Discriminator and + cache acoustic features""" + + # set the seed differently for each worker + if torch.utils.data.get_worker_info(): + random.seed(torch.utils.data.get_worker_info().seed) + + if self.return_segments: + item1 = self.load_item(idx) + if self.return_pairs: + idx2 = self.G_to_D_mappings[idx] + item2 = self.load_item(idx2) + return item1, item2 + return item1 + item1 = self.load_item(idx) + return item1 + + def _pad_short_samples(self, audio, mel=None): + """Pad samples shorter than the output sequence length""" + if len(audio) < self.seq_len: + audio = np.pad(audio, (0, self.seq_len - len(audio)), mode="constant", constant_values=0.0) + + if mel is not None and mel.shape[1] < self.feat_frame_len: + pad_value = self.ap.melspectrogram(np.zeros([self.ap.win_length]))[:, 0] + mel = np.pad( + mel, + ([0, 0], [0, self.feat_frame_len - mel.shape[1]]), + mode="constant", + constant_values=pad_value.mean(), + ) + return audio, mel + + def shuffle_mapping(self): + random.shuffle(self.G_to_D_mappings) + + def load_item(self, idx): + """load (audio, feat) couple""" + if self.compute_feat: + # compute features from wav + wavpath = self.item_list[idx] + # print(wavpath) + + if self.use_cache and self.cache[idx] is not None: + audio, mel = self.cache[idx] + else: + audio = self.ap.load_wav(wavpath) + mel = self.ap.melspectrogram(audio) + audio, mel = self._pad_short_samples(audio, mel) + else: + # load precomputed features + wavpath, feat_path = self.item_list[idx] + + if self.use_cache and self.cache[idx] is not None: + audio, mel = self.cache[idx] + else: + audio = self.ap.load_wav(wavpath) + mel = np.load(feat_path) + audio, mel = self._pad_short_samples(audio, mel) + + # correct the audio length wrt padding applied in stft + audio = np.pad(audio, (0, self.hop_len), mode="edge") + audio = audio[: mel.shape[-1] * self.hop_len] + assert ( + mel.shape[-1] * self.hop_len == audio.shape[-1] + ), f" [!] {mel.shape[-1] * self.hop_len} vs {audio.shape[-1]}" + + audio = torch.from_numpy(audio).float().unsqueeze(0) + mel = torch.from_numpy(mel).float().squeeze(0) + + if self.return_segments: + max_mel_start = mel.shape[1] - self.feat_frame_len + mel_start = random.randint(0, max_mel_start) + mel_end = mel_start + self.feat_frame_len + mel = mel[:, mel_start:mel_end] + + audio_start = mel_start * self.hop_len + audio = audio[:, audio_start : audio_start + self.seq_len] + + if self.use_noise_augment and self.is_training and self.return_segments: + audio = audio + (1 / 32768) * torch.randn_like(audio) + return (mel, audio) diff --git a/TTS/vocoder/datasets/preprocess.py b/TTS/vocoder/datasets/preprocess.py new file mode 100644 index 0000000..503bb04 --- /dev/null +++ b/TTS/vocoder/datasets/preprocess.py @@ -0,0 +1,75 @@ +import glob +import os +from pathlib import Path + +import numpy as np +from coqpit import Coqpit +from tqdm import tqdm + +from TTS.utils.audio import AudioProcessor +from TTS.utils.audio.numpy_transforms import mulaw_encode, quantize + + +def preprocess_wav_files(out_path: str, config: Coqpit, ap: AudioProcessor): + """Process wav and compute mel and quantized wave signal. + It is mainly used by WaveRNN dataloader. + + Args: + out_path (str): Parent folder path to save the files. + config (Coqpit): Model config. + ap (AudioProcessor): Audio processor. + """ + os.makedirs(os.path.join(out_path, "quant"), exist_ok=True) + os.makedirs(os.path.join(out_path, "mel"), exist_ok=True) + wav_files = find_wav_files(config.data_path) + for path in tqdm(wav_files): + wav_name = Path(path).stem + quant_path = os.path.join(out_path, "quant", wav_name + ".npy") + mel_path = os.path.join(out_path, "mel", wav_name + ".npy") + y = ap.load_wav(path) + mel = ap.melspectrogram(y) + np.save(mel_path, mel) + if isinstance(config.mode, int): + quant = ( + mulaw_encode(wav=y, mulaw_qc=config.mode) + if config.model_args.mulaw + else quantize(x=y, quantize_bits=config.mode) + ) + np.save(quant_path, quant) + + +def find_wav_files(data_path, file_ext="wav"): + wav_paths = glob.glob(os.path.join(data_path, "**", f"*.{file_ext}"), recursive=True) + return wav_paths + + +def find_feat_files(data_path): + feat_paths = glob.glob(os.path.join(data_path, "**", "*.npy"), recursive=True) + return feat_paths + + +def load_wav_data(data_path, eval_split_size, file_ext="wav"): + wav_paths = find_wav_files(data_path, file_ext=file_ext) + assert len(wav_paths) > 0, f" [!] {data_path} is empty." + np.random.seed(0) + np.random.shuffle(wav_paths) + return wav_paths[:eval_split_size], wav_paths[eval_split_size:] + + +def load_wav_feat_data(data_path, feat_path, eval_split_size): + wav_paths = find_wav_files(data_path) + feat_paths = find_feat_files(feat_path) + + wav_paths.sort(key=lambda x: Path(x).stem) + feat_paths.sort(key=lambda x: Path(x).stem) + + assert len(wav_paths) == len(feat_paths), f" [!] {len(wav_paths)} vs {feat_paths}" + for wav, feat in zip(wav_paths, feat_paths): + wav_name = Path(wav).stem + feat_name = Path(feat).stem + assert wav_name == feat_name + + items = list(zip(wav_paths, feat_paths)) + np.random.seed(0) + np.random.shuffle(items) + return items[:eval_split_size], items[eval_split_size:] diff --git a/TTS/vocoder/datasets/wavegrad_dataset.py b/TTS/vocoder/datasets/wavegrad_dataset.py new file mode 100644 index 0000000..305fe43 --- /dev/null +++ b/TTS/vocoder/datasets/wavegrad_dataset.py @@ -0,0 +1,151 @@ +import glob +import os +import random +from multiprocessing import Manager +from typing import List, Tuple + +import numpy as np +import torch +from torch.utils.data import Dataset + + +class WaveGradDataset(Dataset): + """ + WaveGrad Dataset searchs for all the wav files under root path + and converts them to acoustic features on the fly and returns + random segments of (audio, feature) couples. + """ + + def __init__( + self, + ap, + items, + seq_len, + hop_len, + pad_short, + conv_pad=2, + is_training=True, + return_segments=True, + use_noise_augment=False, + use_cache=False, + verbose=False, + ): + super().__init__() + self.ap = ap + self.item_list = items + self.seq_len = seq_len if return_segments else None + self.hop_len = hop_len + self.pad_short = pad_short + self.conv_pad = conv_pad + self.is_training = is_training + self.return_segments = return_segments + self.use_cache = use_cache + self.use_noise_augment = use_noise_augment + self.verbose = verbose + + if return_segments: + assert seq_len % hop_len == 0, " [!] seq_len has to be a multiple of hop_len." + self.feat_frame_len = seq_len // hop_len + (2 * conv_pad) + + # cache acoustic features + if use_cache: + self.create_feature_cache() + + def create_feature_cache(self): + self.manager = Manager() + self.cache = self.manager.list() + self.cache += [None for _ in range(len(self.item_list))] + + @staticmethod + def find_wav_files(path): + return glob.glob(os.path.join(path, "**", "*.wav"), recursive=True) + + def __len__(self): + return len(self.item_list) + + def __getitem__(self, idx): + item = self.load_item(idx) + return item + + def load_test_samples(self, num_samples: int) -> List[Tuple]: + """Return test samples. + + Args: + num_samples (int): Number of samples to return. + + Returns: + List[Tuple]: melspectorgram and audio. + + Shapes: + - melspectrogram (Tensor): :math:`[C, T]` + - audio (Tensor): :math:`[T_audio]` + """ + samples = [] + return_segments = self.return_segments + self.return_segments = False + for idx in range(num_samples): + mel, audio = self.load_item(idx) + samples.append([mel, audio]) + self.return_segments = return_segments + return samples + + def load_item(self, idx): + """load (audio, feat) couple""" + # compute features from wav + wavpath = self.item_list[idx] + + if self.use_cache and self.cache[idx] is not None: + audio = self.cache[idx] + else: + audio = self.ap.load_wav(wavpath) + + if self.return_segments: + # correct audio length wrt segment length + if audio.shape[-1] < self.seq_len + self.pad_short: + audio = np.pad( + audio, (0, self.seq_len + self.pad_short - len(audio)), mode="constant", constant_values=0.0 + ) + assert ( + audio.shape[-1] >= self.seq_len + self.pad_short + ), f"{audio.shape[-1]} vs {self.seq_len + self.pad_short}" + + # correct the audio length wrt hop length + p = (audio.shape[-1] // self.hop_len + 1) * self.hop_len - audio.shape[-1] + audio = np.pad(audio, (0, p), mode="constant", constant_values=0.0) + + if self.use_cache: + self.cache[idx] = audio + + if self.return_segments: + max_start = len(audio) - self.seq_len + start = random.randint(0, max_start) + end = start + self.seq_len + audio = audio[start:end] + + if self.use_noise_augment and self.is_training and self.return_segments: + audio = audio + (1 / 32768) * torch.randn_like(audio) + + mel = self.ap.melspectrogram(audio) + mel = mel[..., :-1] # ignore the padding + + audio = torch.from_numpy(audio).float() + mel = torch.from_numpy(mel).float().squeeze(0) + return (mel, audio) + + @staticmethod + def collate_full_clips(batch): + """This is used in tune_wavegrad.py. + It pads sequences to the max length.""" + max_mel_length = max([b[0].shape[1] for b in batch]) if len(batch) > 1 else batch[0][0].shape[1] + max_audio_length = max([b[1].shape[0] for b in batch]) if len(batch) > 1 else batch[0][1].shape[0] + + mels = torch.zeros([len(batch), batch[0][0].shape[0], max_mel_length]) + audios = torch.zeros([len(batch), max_audio_length]) + + for idx, b in enumerate(batch): + mel = b[0] + audio = b[1] + mels[idx, :, : mel.shape[1]] = mel + audios[idx, : audio.shape[0]] = audio + + return mels, audios diff --git a/TTS/vocoder/datasets/wavernn_dataset.py b/TTS/vocoder/datasets/wavernn_dataset.py new file mode 100644 index 0000000..a67c5b3 --- /dev/null +++ b/TTS/vocoder/datasets/wavernn_dataset.py @@ -0,0 +1,118 @@ +import numpy as np +import torch +from torch.utils.data import Dataset + +from TTS.utils.audio.numpy_transforms import mulaw_encode, quantize + + +class WaveRNNDataset(Dataset): + """ + WaveRNN Dataset searchs for all the wav files under root path + and converts them to acoustic features on the fly. + """ + + def __init__( + self, ap, items, seq_len, hop_len, pad, mode, mulaw, is_training=True, verbose=False, return_segments=True + ): + super().__init__() + self.ap = ap + self.compute_feat = not isinstance(items[0], (tuple, list)) + self.item_list = items + self.seq_len = seq_len + self.hop_len = hop_len + self.mel_len = seq_len // hop_len + self.pad = pad + self.mode = mode + self.mulaw = mulaw + self.is_training = is_training + self.verbose = verbose + self.return_segments = return_segments + + assert self.seq_len % self.hop_len == 0 + + def __len__(self): + return len(self.item_list) + + def __getitem__(self, index): + item = self.load_item(index) + return item + + def load_test_samples(self, num_samples): + samples = [] + return_segments = self.return_segments + self.return_segments = False + for idx in range(num_samples): + mel, audio, _ = self.load_item(idx) + samples.append([mel, audio]) + self.return_segments = return_segments + return samples + + def load_item(self, index): + """ + load (audio, feat) couple if feature_path is set + else compute it on the fly + """ + if self.compute_feat: + wavpath = self.item_list[index] + audio = self.ap.load_wav(wavpath) + if self.return_segments: + min_audio_len = 2 * self.seq_len + (2 * self.pad * self.hop_len) + else: + min_audio_len = audio.shape[0] + (2 * self.pad * self.hop_len) + if audio.shape[0] < min_audio_len: + print(" [!] Instance is too short! : {}".format(wavpath)) + audio = np.pad(audio, [0, min_audio_len - audio.shape[0] + self.hop_len]) + mel = self.ap.melspectrogram(audio) + + if self.mode in ["gauss", "mold"]: + x_input = audio + elif isinstance(self.mode, int): + x_input = ( + mulaw_encode(wav=audio, mulaw_qc=self.mode) + if self.mulaw + else quantize(x=audio, quantize_bits=self.mode) + ) + else: + raise RuntimeError("Unknown dataset mode - ", self.mode) + + else: + wavpath, feat_path = self.item_list[index] + mel = np.load(feat_path.replace("/quant/", "/mel/")) + + if mel.shape[-1] < self.mel_len + 2 * self.pad: + print(" [!] Instance is too short! : {}".format(wavpath)) + self.item_list[index] = self.item_list[index + 1] + feat_path = self.item_list[index] + mel = np.load(feat_path.replace("/quant/", "/mel/")) + if self.mode in ["gauss", "mold"]: + x_input = self.ap.load_wav(wavpath) + elif isinstance(self.mode, int): + x_input = np.load(feat_path.replace("/mel/", "/quant/")) + else: + raise RuntimeError("Unknown dataset mode - ", self.mode) + + return mel, x_input, wavpath + + def collate(self, batch): + mel_win = self.seq_len // self.hop_len + 2 * self.pad + max_offsets = [x[0].shape[-1] - (mel_win + 2 * self.pad) for x in batch] + + mel_offsets = [np.random.randint(0, offset) for offset in max_offsets] + sig_offsets = [(offset + self.pad) * self.hop_len for offset in mel_offsets] + + mels = [x[0][:, mel_offsets[i] : mel_offsets[i] + mel_win] for i, x in enumerate(batch)] + + coarse = [x[1][sig_offsets[i] : sig_offsets[i] + self.seq_len + 1] for i, x in enumerate(batch)] + + mels = np.stack(mels).astype(np.float32) + if self.mode in ["gauss", "mold"]: + coarse = np.stack(coarse).astype(np.float32) + coarse = torch.FloatTensor(coarse) + x_input = coarse[:, : self.seq_len] + elif isinstance(self.mode, int): + coarse = np.stack(coarse).astype(np.int64) + coarse = torch.LongTensor(coarse) + x_input = 2 * coarse[:, : self.seq_len].float() / (2**self.mode - 1.0) - 1.0 + y_coarse = coarse[:, 1:] + mels = torch.FloatTensor(mels) + return x_input, mels, y_coarse diff --git a/TTS/vocoder/layers/__init__.py b/TTS/vocoder/layers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/vocoder/layers/__pycache__/__init__.cpython-311.pyc b/TTS/vocoder/layers/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..406b4798790e43e5a321791cd438117de46e8925 GIT binary patch literal 180 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%UwUTIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`*JKN-j_($7h(1Zj$o u&&c4k#S*bg?@&8PTQixMTZD5^z`Oi3JBFH$dBq%2FaB+7ET#con9HM{Am zre$%OGs?uXX*t>%W>$vVvz!c(I1UwiG1?%D2#a8$O#-a334$u2u zEfCC~-S6D$s;+*B<5?KkMJ|hn_i^r{?mhRMbI(1+Kdz~9a|lzD*C)U83dj8uCG=p= zN1kuCaNNgSgp2TTZkiwGr!C_ap3;`Mb=-=-*0^=rHg02iwzz$|YTUu%_PBG}HSS{Z zsN4X|pX-jW{cSeX}PzBO8?XIOQ>s$T=EfnhZOt8opiMuyb{tmZYani$qP zV707))f{R4fE!;IX#;FwHQG_5V~rZE4674ZU29;qF|75#@~?r_9s(TpxOVp3v2&=<8HtG#LNpbdi$+4z zvG+sqCDMYU!sJS&Q^aDpW#bYh^tpiUN^Z^fVA0i{N{aX!M0TO$0pHN#Ev z5z7b8aa+WSd9i1>_xbTEjMSz&spnH@eD1$f%uoI7@SjY~%%=QP(L_`Tr=tFFBH|aK z;kf^1_`T?4QkWL~VIk_DNG9Hk3aMzs?MFx@{o+h?A|)iJgfK8BlG8K5iuiBFQrG;a z`dKSy83tO?>Y|5=R4|?O@tZ&wn4_BmQG9#Uqjr zpW*N4dV*Url^B^^F%HkA`7{^O=bKAgkTUjb+KQCXd#bw73Tj!>*5bVe+$tR-!ky*r z^J4*rY8PkmqN+0#iX~#HP)M~WLX(pz)qO2F6N*O@Q>kmJ`(_M8z~X`fO{g|8HJJ+7 z@i!Wu6nw}M>Il?Rp(_-ch=)Zn6cTARXti$t>S*x17!}0et+044I2;vkq>?kiq2%=B zt@9^?6SHC}IUPzQBT+GU?%dhnd&vpJgy#$QF+R z{L5`_#aVO5ao6$0+49)gB0Jj@XPe|~%QdxSY|-v?Eg_>kr1{TK{<*QP^H`syax~wohd?W;+HZxf zg;OejOLeeVXjHY{0&o&Rgr>t&XoF(ngh=y5l!hQQQ(Xr*$IY-1ne!RclFw;Jv6u$9 z&E@KwKMwpfkUc8bcPsVX8Aqny-oBroU%a#=e)9I>+yCMG z7ndH~l=}`UeFx=+LrTM;jPtAdb@#Sq&o1rx|r?VF33XwC@t(=_vM{I~1BuMrPv__k=?4&W7Xp5=ST$ zNlt`9jQWMm^maA*JvI@I$3vm}ywF51MTB>Vy}m%afC1Vu;#mM}4gF`;VzH0%s~m$) zyZzNwt^%la*f*?lYXSBWtd|LNU<93#pKn7}sdR`qn0&B*;DUUpDv}RQwSIIqo{F8F z2*;!Ps?Gfmme(gzrvG1$)VObrabVps&Pf2<^1k)!_c32zuM*e=0E~;sk39gI@Ev3n z2^v(>SdtH`{%r`F6uQyFb$toBB~z|m%9RKy2o{5Vqzu&(Os;|u8s@G9-$Lv2ws|{P z1b5x2xnyE>g`QF*SMmX@2`r^ps1>s~p1Zq~M4LdP2)Kk={LywzC&j2Q)ji5~*(qo` zs++M7(vU>eIv1N!tHLug(L{vNbik=`p(-X2MYXyh5s@f}M6pBwYI~8^hp{6_KW$4g z+0|Z7;f55W?IQj*z)PjrO56Iytxwwe9=G+$Z9%0iNWyHTZ|8&NhwYDU$bD}peQ&IC zyz3M;H$mAG$Q)f5%+BRJzPqQNc)A~Zy8meJ^3ex_59j2KBg)1R+4Gv>c}-%^s3nZwDw93N9{E#l(V*i3NL+Sa7KBQ~jjMpMtEP zdkeZfMf^#e2K<_apFabHML!hC2_beB=YUwke(;bw=_(M&#Szv1uoGCDAAgW`&B>br3gC94_rhJHOyR0|)t>xCjeoMV~laz%mB? zyIKrS&p>0KbCc?vj>bci)3Jme3%{>|cOh3)9kXJTVgb8}C%%T3nLf`{vVdFLD#t0; zN5D|&g%gxKNnnHktH?ACnjvG;nE6~oVaW{sb_mGgIRFsg|El6#wUzYrbzR_T9%$w{ zPhHN_Oi#1JoECXhBO zbP!-7t_v~f*rYxen4Zv0$&Cbv?TIuq+Nx=iiB!tDdee&K`P=J+Oe_AaXv*MU882I= zfNgAS>|AAbbrjtR1YV`iF9Dd?Rn9gk*(O)3xCAfotJYe(dzE_`@Z0Szq}RS2Xq<}r zD&kc7{(MfeR?4Xg#tmlHz-k1nYO6^^%wD|;1BRi!ima$adG%v{{nMoC`U%+&y+0cF zCudWzgXAp}$H*qZ>K*|`-)J{@s?yJ2z5(uT!tvZqJ!^kn!IZ{wYtcW?gW2Os_*JNt|IpUp3yk=F;6 z^}z>qf8P41tq=SEW5=I$$le2r_kiR)pzXveVJo^60tB`Z=p{fy7D!N+X%l0VN#G)W z#bE%`_FT2__F+;z2|BIzgHU0M1&hu*tZB2Au0XyJ$NZAH7S&_{8^s{fVvV<$ ztmZ9gSU|Mnh>h`T2xo}mIL4zo6VaQYWFjgGBoyyk87--9aUv-|q{9NLx(F{c8NLw} zsXNe=iDCQnx7I0$h)1MFggV;TbZ;a(w>gb50mja26ZP4^U)d$OoUo|Tq!KDnFkdXG=gO(6IjW@f*MVxJ+`p- zEn_t42&)J0+n5AaZPOq@ZK-WSG(Ia_MsbDZt1rBgzJPR;5NYq-=Duody*KjncNgDV znphr@HxJ0`_bBW4$c=lI#=XUhCy+`ngb0vLNh6Rj#R$alTeec&lC68S%1Vn=4zyON zZqZ8V+u%1g`GZP_+1^}Vv^Og(A12rc)E$xh^Y;P14b{8yQh$cl3xcOq$=3F*S!E`f zr8@9R^hWyjg!$mUMH2*!{nOZW=FblOb^06_s_^%LA`*iI_k5Wuywcjf*!D@sVh2>* z+6;c!%2XEae}|^lP;o)5OXk@HwieQsf~~oP-0>DBYw^o1*~BAhrOE<$q)s%YTdBBh zSe(_jup(Bt6HqY-8JoE$u_1kcNjZz_#%0;jGmdr7X_oT@1tf2;%^_bjM=OS zSE&*6@>d~)ZQTEi$ZyPdEx(rUS}Tj{%G;Z9a*U&dwDF=QPF|V(BB&-r2{5iquLbdo zV3NQZ3ojy{1>lM20Z0a~#lqJvU5jMKDsuc;8Y+RiG=fsuta1F8(1b<~RJVv6^v(Nx z6H3hIAa(}galvcVv@p|2s165}sc_<0G@P0hqQoR8bgDul?B19W$Nz_1z3NmyjaOg{ zXBvYQX#$G)cw5=q84Nv36}|yOZ~dfwvHj%?-GOG<1pOrR>R5-!GzGO-jE2(HMz60t#!v`kl`5{sjIv%U~ z%B^YA&8?bARyc!pa8Pzd6SLFomS5P195&Z^6D=GSz`;Y4pd3+k6Zh8RjD2hQE>zEF zTVs*;RX(J;jB2W@P|JnZ*={Y_v90ARm_L5~0yV_n0RZ1`YRXtL!|^Yze-V^&qI%0R8t-nPoE1vDN(Ef*c5Y{+2r`um75 zdbJeMDwi_RQZe-iThkmUuYTT&ivS+Z3Z-1x0;Pa104#SK(nZ#SbO}Fa*8rA84Qr>Q z3>O79?GP5Q%7I6jL* z8odc<*c}sN2{9FhFNtc4;}%IcNCjk!rGuMxTw|22I?)R|>Iv@xB~PltEp3xq`jnQw%t^XgaLJwxnG?8NaQZ$P%2vybo0P^)vU5|$nQ=aC>|8jRy&*Sl zQyRBHQ)%eHJwtWF?bB?%iv&BVzHF%vVWA4F2`tL~jhB4dET+>~_F|F%IrK0_R5kCw zVp_-)Uh0LwE)Sjau6Z|Djdm|YhI5z3f?z&(vzYzv=CrlQz+OTajA&4^2&cq7Hbugnn!7ZC`B;EcZV6 zp1gTT**pa1x!k?ThkQpJN6iklX=D-j|#`S2C;b!HBR~%g7k~c;2vWvct{$kV7 zL`rk5i^L`;qXH#S_bORmSYzeQGe^7vHnVYy{;RkA*WvBO`kS~GPE5hBL+*Kn(Zc}F zN)HQD;vkEW3?K4uJ)BAjSEyS*Xc%- zg-`h(lV2wJJFW3^Coi=J=PqzgF~@->_EG-?`aC^V${^!y4=He}vo0d< zzd{}31b{lm8l%oDYt*4DMHW!USaV?jsSdp);xihid7-OE7R>oK*n5eV5$=b4if;4r zAwQQ!o&1cfH_CCn&91oX$A0yS|MG?_{-JOpkxcnMh80Y`YYTd1kf#erE{HWfN=~C_C z*+?v@HWW>l);_yzRz2nt{VIkZ(tNpSvl-WU#4YBU4Y^E&XjlRM31#BZwsYU8`1$YlzQn+@x&Wqs2*mAL5@@A9-e{$P1Q4rq8|VTB)};wcd`sBSoUp zF?yDHW$O_M&y56?_2dRer893xgY-~u0}`H#m$lvwoRuzIln&5CFE=RRd7F)-J9lJU zdOIW?p=T+qY(602iC&|68!Nv-h_wlA^e!$XdDk(1r86jX9>w!ugR*N_i)M!Jj^(O7 zN;OVXyz2_T(sMxSc?-{@I_2p8ePJf!h)M}+&4MTfXfFoc#FyYw#pN0v#G7r_#Zp)5p7PX1{78{rin9oez5 zdft<+PP-$v{pcm_LCnr#@VY7}V8HOuWy2f`>fZ;8BBSV1_ z0%_}SZAlg6HHTv;XZoZID`~pu@uhv}#l?DY0nbpN(mvyzDBn<;sQugJ?fdk%D}Jl_ zx^x}dcN^^+Dpk6!sC`4V61JsnzeTIkptorLO9$$b9_rKe=%L!^!BD-@^`;)2Fd3jB zV0!X1>W$oGeEat{R-J7>yz5!%r##C#M7gUJQ^i*+LoS?K>n5fObFO&bMS1}*; zy~jf&eT{RsT6a9Jk`!f?h&bWL7@+Dt9-4~gbs5BUYf_*|6Z*@9V-sQ263)~6FsaYv z|1T-|hXj~n@Fx^w>cGz^M#dDP+i)W9hf2d+=Ko&^*0diP{nF|p*AZdLJ|79&0}Z{H1nnud;#bc)W(WaT_G0} zT~j>GUQLTICYimGNK{7$(2`)f1!^kR1@iV}I~b|B3ww*%?qE!IYVN?zEtzUq7(70u z8Un=Ty2$4OeZTDbldgyMhhh48RL6c^dldd}lYcq+<(dB)`|H@F*Q5idmA$V&*?aNv z-iy+uae41$W$$I_%G*z_Og_FcDNS9Iuf&uqF*$Hu30#*wn=_}f+jHCZW?qw8w!#bC z99_45X_L~u=i&Y@x69vsQ|`VXue*>rkslKTmDM%E=i1jlY2W_1ef#qH2Z!bM0i}H) zbE>?N!G~|j-6!RBCyn}KT6WFwxqYub*?0Q!zSGj{m!!+@$os;|zOd{WkUV}U8@b&l zXka^KPro^;4eX#abU_+V$U_OaJ1MVA8cnmIy~Zy=5bs~<=v%(=aQ9!lDtDYzI!z6X**)4WakD`%uC;@F$o@Uz(K$Zdl z6~E#Hz`xuEscAo}mMXh*l_TKuGmos7z+*hVE`ArDltDY((|;{s17p2XhepsHNkS;RpP=}-=Ja5D%`+s5B!I4*gX?zS=@aRT9jf(~3 z7vd&-9KKB&05MSFq*_27!;D~VOK38GS5sEU&^zeZoc9hF=mrR%u#o)LK7nDJoU!g0 zmg|zj~T=zyCPAIw_ ziaX<%-$dcgoX(u4Pg!tf2cTJKjg`P~X@(i~tRkXaWIz+PL8n1l3qr8uCKaar!iwWc z0`&`@%`+oEZeQme^eIZj%5;Y+#56tBftaR;TGMuRIYIiDF`|ln+4PZA>`S};aH3sJ z_jDnqX^3vbG!3zEIYKwInm%WPKI6`RBx7Y41;W2YN7`jo?J!e2^h3YeP)M_TRXSap zGYvVZ`i4tQCQas?LhYh#qdwDI@L>X@Pr5$PNYhX<0@a#AL286HUQ%_0zXDL*B*L|u z2i2>^3*F^2jE+=~CgJn=b$Y5$!^kq~RJ`+G7qD}!|8Q^a zJHr$W%ATF(C^|TWPnt5rPrJHxu~iUJl_k^P(Bcr-L$ff7h)FW#Y|InI=CNaURsbFA zND!?BG#NzgPvF-G`i6Oa2w7%ll?WfTMr~31q@~qBp0wmIVm#wj5o^>jX~9*DZQRKm zSzOp8xXh*h1MtQevooqS^q&YqLINMtz*E0S6j8slm7j0^)S}S^>=Y4($Je5TkKvE;=Mc z*%hd2jU`gr*Du7{Ro@W21I44rOU~RPtwMFbNjLaM1tBTmqd7Vc3jaWWhFL-8HJaAm zkapz9B>n&uUMlT!)h*d$kE^?->h6`!L9!)f@RO@)%T7J6>5*!BRyw!oxhpm6vNeAs z$~C=84Rb9yxYF6Zc=1W+z~jyVxpS}5xmQR1s=h<&d{wSLpwu6bJeu3STBRL|CX8vv zzzk9xKw=GN&~iHlKGIBEV>}2SpHLEW4e*+4PTA)TR47tMbR06Bff~$Hmgj;AnlZBK z6Blgw$@f3}em1&vM)qz|yj#E%ea%^0=KGSfQ&V@{rJgR-i)w{?h42(HO@*P?UBmn3 z@2Qx;RqC4T?YH`^QE=8Az9yBbU_%yl7A^CF9Y_WGO?S@6o-t`TD#+0b%K!;kD zx;29v1c42f-oiwiPWG=3b|ex)6&17tsDAv%iygD6SX|t}tkgRgPADaW6C$Q^TGamM zMi-&w|1m}$Jwo0mv8$+?ObBde(56tUv@e{s{{axBevT3tBXEI0m;h4(nf}L|hJKgw znDqUKVvLKk`3zB#uAbO(|H`=^JP`k-;DGSY$iqBy;-7<&K;h!~9M>gPdY*Eg+w3RD zHA=-#j@u*^KTo*s2A=xX#g(LS)K4WDs%ouZ)tyHHRXx$_KMWOIbzLv*UEe8wT_X5u- MpXQ(xgAw-s0ksztj{pDw literal 0 HcmV?d00001 diff --git a/TTS/vocoder/layers/__pycache__/wavegrad.cpython-311.pyc b/TTS/vocoder/layers/__pycache__/wavegrad.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..413ffdb599f75e2b983cbfae8ff8a27367046ef0 GIT binary patch literal 12964 zcmds7Z*1Gvb|)!PqHS4{ejU`Ninqo$w1oEx~H{~96lRTU7O!1>UO)=E#l)&Af z1h>dcRGCBEaZ%Jg_%qUI6=~&xRy?#KGsBvaRyCwm6||~;N-M8OP1F_`QxiTjC)C`a z?!e7Gvw#})6MkOcSNjxx0n*9`t^7}ERVUDIQ{liP7PLN>=H;;Ef=9eIUks751YLv-RW3)iu;746q z>MA|T!g!4CKAuX>?iFINQdYwC^TXfce+2U9^yk1bZL4=mVZy4(&~YlmEYW{P(^Q(Z zxKR_a;V`dr^31d->3k#- zPsY=ch=jA#S-jC}Qd0U-YCM&UCL+4qkiG7n6Qz_K(b*51+4r>4fPy>qHJHY2A~ zQ?RarDEFQ__jd1WDh7F}Hxa#tovuV@#c?Sr^h{rqYJlUM-&m&}bJ}PL^4mbZy-uz3 zjepAB^elKZzA@LX@Zo$15aZh5s~0>E`Q|mgId|grX_fEN_%4O-BITd@KJhK4Z(URQ zR*i2}%<_%Sh&h(zwv(sMXF zT*gCtVJvowp@v)FSR67aan9h zh_hm%C6-DGq$hNF6zHU=m^l}i35D8#_m9zN(3C10Qr=Q~Xlf;=vC&k9&QKB4#KGL) z&Kgg^I?Ke;s5gZ)VHu3T2+SSmf3L`cidbfYW)gM{$FudYUf3ecOXy9FWv)&}Vwa-H zq?nL(&s6kkBq1hs`nb-eQ&Q}b&Jjb=nQ1Z1N_E()1xYIsq7bHO7&<}Jy16K0Q|^|E z2ce_4Zao?{2|}C#y6^bNaI5aA6gH!XW0e+dczL*=UuPc2LId}!G>GixoEyl z4Yp~)HYoCB;YSq5u{QYBWNprZJAwPXIno|r6~B|T~Mz3h!>s~ zBU4e>&#r=Zjn9d?TN0;5D0q={CCa4JSM?fVc%XJ*crrdHI9j4ix@RmUfh!i~LR%>54}6rygT#sl4qG-s$L$v z|N6?{{h?J_^&Zx|hZXPPb$?)SAop{vzE}14uC#yAx!Ov8=T|TPcJ}XQA4LD|+LzZ> z&#(4qeWxDwy|vc&mNIfy8+lK8e^eVeuUxpO{4A>WeW>+)sEBb*oK*WJRsW>IPZ~OE zUk&t^L`Ra;Tow8^X))%H{inLxTteuiBvSLSIs2l%QK<~2IZPgUC~`~JgK{4DOs;dLqmBw zC5t)(&KoT;>?!rxdyp?NQXX28Jx=F^_-rICN>egQ3(OCb;m==!g1Oc$%+G1zICJ@5 zfPho44KAF|HRNAXYxiiid$P<%edDboc{+daZ+Csst<)b?>yK*nM<3RoT&q9%Rh?RY zTB|>u<+0VF+&jN{b?J?jx|N{^wAy}5Yd`j|{p4EvNws}QYajY*RBeA(t$$Cee=o~_ zy}kKXJkKxpt1bJ~?fbRu`?E}TFZo5Y!eU)k$R4~|v++!=)`QP|zWZ0ZbC>h2YOq}k zwm%GZt_3@nX*Kwg7JSLE*@S7bOt6JQfJ8}+NYDrAo=8MU#Q=KZ7AW;1LE%XuB+WqV z(CGy%M1qK17J%5G(>gn#umhVc$F*-#m59&9C1}J~7OnPh-J8_Yhyk;3*ckj-$#f)T zFsm3_2n;|gR&WuR#X4XXr?aoc-+0rp38UE(2iFI@e+Yn-www@dyiRvz2TN(2ITqs& zfL9PB3Cw-h9dN_<;Lk|V({eBsdolt$2cuW$Z@HJW&0$Pf`3mlVDnd6~Tt}9tcQi zi&6kc0qL4d2zW7>;7vu~TZ@)pLESL0l2srTq3%ZGOUsAV#sga80dO+48=>cKhx0=# z)vJRKE-Rr^YUq>}It4ZUjpi3_AIgueG_PL%o1G6@zZy}RĥwB|Ee7UILNjd*b9 z8ox8oF8Q^V11f(|;}0tQ!9u8LRB0_g(OM3we80x`D}28_T>QD#(5>>jHGa3k@3vKF z4V_xUZk6Ao@p}~WDu>-@pTy#S4;g^nR3849r&8OUccrmcAx)T`h|0JC+&VVec^B}s zwKe;IKX7GAf*;_zwI87JmQEM@BL}Bm0~mJQXPtoN1DMS`JI`g<3^&FQApCEC@oP-~ z@Kd6frn0((_!Mlr$6)}Seoe1Vh|$Sw5lKwUNXMXzZ0tH0yEK!WlukfF*lldlxG|ep zSdK*#qE4p}RL95C@DB@QoIKf^3!5`;GIL&{a|JZKA4=pn5J1D;z^7+EIg=Yyy*o7T zjx4?5tzCE{SC>1kdRsJaOP0nKZ+`M-t~GaF^@cTX7}_*Ef9s?C{)cV**4p;1461Dh zw6+7Q^y+1$VNh)t)EWkFdb3=1X5IhXqL|Gne6yhn7!8;S-78UrQOG!BV@GsTvcTX> zY+RtFr3|I2fJce20L(Bk+ptSQ+bj8n;WTL>hAg(EiG!P{H|kz0Ai0QSYj z!1$Dy(+b3G}3D9S{_LnXUJ>Y~%riE9zNK(-Tsl$HM!d+r7T@(At93ri_AbU+KC!veR| zkxy%#2i4|&t+}7LB86{+i-_ZHDMqqEqjNaDKSHu4K01muNDvpxA`lxs+GMJ@o=s{C zvYq3eN61tlK88E8Nm+zvxI=hbnUHCdaUF2RT1q^o0_O&iS%yMTfH5&RCRai1cX8(+ z0DA-~q_>cq0b(8>7PQfaMjeSJg$OZg3GL(rHp5f3>(uY~+C?r$e^#aPP1gq@sJ(vD z&@QO8lDX#J7qhwFIhIJpCSzziX2M;YXzU5#{)3}7Hf{60c5E|N;=N^Ni24O>Kj_M^ zPMsI%N%H(gOYr}89EZDjA;B`?!5BP~{a7Pn;i@zoh?%K~n$o2lTo7j3j!K+&_qjVD z!+Vw-9laSCAA8%5Wt?}Vy~i2nlfsi(Sn3&OYmVnG9_+w)w!6~tww$f)ND6w}?n--a zFUk1m-f3tq?*Ok|8Rhw#a$UbU_t!7y-&Fm5 zn!ius$>s0c@CO(BZ_Y2w=jk{xoUmixVYx(ClnBL<^S%bfcFc?`>9=@nRf_zhmf}~3qQk!p*ENa zO@_D|1bWU5(sl|4p_?5E*c99L3JQHOoOJ=8;>pZB3xLr+o;fa6>P{-1ccJo^@N}L| zSzC3+{k>=DX7J3{a*Ym0<90fOdAGw{-30hLoEx;T<^(~b<&-k+@+T=7m$ON>Y+Q17 z2J)NtIA-Aa0cK!1;~)9Si{n21$=w+b+2!3CH(*Zp@T32L1Q4?u#37wRf-cPf|5Xw? z1HC4mgu}Gy6nHpM3S%wtD`S{@1IZ;Mxa&&@)!`@!@u~qF2xdU^pnI;uyG&r9yTHK_ z7@@O}>yvTjMigKk(vdj8SMW=PNFV~OWc%4_lW8}qIr*CEU_bdVgaHJ2sLt-Rd1{2ZN+Y(+}@m+e@FFpYu@fHW00--t%F}2 zxp^haB7^<;ZOfeM?a{nFTQYb71|H7s)x0m{qGo9GECX-VZ{1tF$lg2&&*KkddBbPf zEQ+#xRsu?$KI=PT8_O2SJ0A3VBsneo1a;vs2{h?>#va0En7pmO*%!{vA$!ldb@CFJ zNivEQu=@l6BxP~e!hqv=&E}1q9RIgwhfw0K%ZT=jw@n@+%ttIpbwK$_A{+*w`@|nQJN-Pror`YK?pUyUaE@b23j+#u^xzMs&~!ne8=MMVI>+K%GV~Exw1s2OtlmHVT&<1YoGVdwINHEn1&)yrz({20 zNMp{@aA^i_?W*mH3s1@|9e``-cq&2DPXpIf@9HtpA%_8VBU zEq`L^j2hanh4!N^9wy}GQ15}^lKgcz|fbI)j->~B?>ORi!y1(M?#@ zOpvSIq_!YkbzI#hWswdK9vfIBXyuA!&Kxm0GHwipv=S~WrtclMpD^Nf)C)7!KnCsa zaR)s14-vNmth0TNlD1Cm@!bpAKwvaSL~YH+9<>)iZ5apHgajJi#()p8Mb7qdFp`Bg zF9Q1vt>D9-(pKXoC%W)F7IzYv!VB0g5SbB_B33faa4$m6{&kFj+_0-*geYcd90^)t zm@|;uYgmBCd`5E;JED6;l;#K>jm5J<$3KBK@@GImBP)Ch^Q@5|zAy-!QUj;7z^P)0 zHu4p%1cubWkQNx)x^&aSVkGFsNDv4{f?4<(8-}T@FvBA9@sbV0@g0f%D;L}MD|66) z9=>I&^!<#}al@g#wL}oRSSp1vMpX04|IRASPgkr3B=pDI}+WfZGmZ9(|3WHW(I2NhHL9V-R2-LXhLaZmgO{ zg2#o17sqL#0+|@a0Qm(CDuDc7K^ys(K;XFFA6U4uC@p-H6K=zYC*dq>qWTflw^Q@& z1XSpL`e44jX>QkZ*GKm zaV^lE9b6A=TjUm_pYgYNKskf>>C8TW>xCd6ov)eD42W;~C&>K&0nF$_BpDtFmJuJZ z@)kV{way*c0Gow{EC-vnvBM;g|8cNcvVE3)Elk$!_b`*n>*h3!|Eu_rtR`ABtS0)M z%BxB663WFgOMHhh*H~E1GJN%yP%ES1{}}iR*VDjg2Kd+#4*fAqMndv2P7Pg@0}fRq z9NL03@?66vWs&DtghLkbvK$QjG-AvM#|;OKSD^Le`_i6dvS(~288g1pEu7%m3ea=v zJFTWO>3Bl!Dg9EOESesw*h(~P6rH~AHjanMmjmQYkM5d)kHp<_Oo~sZjZ>Kqpdpcw z^gibBtq;MHf8dW8A50&WK7lgybMiRQO&3km>y-C8`K?nsl?vB7^_=3o)~SPv^IE5l zD$Z-u&C>8GbvenNU3mW+EPqn0^lhi%E1+_c3*K(}2FsrmEBl&gSjBRZ?OQnX4VFJC zR`yej=SGu8*KINkT?-onUHgrTVuDslbw4sIC&nb9{7JELmTsj%Vakco7|Ne)QArl! FKLNa@(~1B9 literal 0 HcmV?d00001 diff --git a/TTS/vocoder/layers/hifigan.py b/TTS/vocoder/layers/hifigan.py new file mode 100644 index 0000000..8dd7513 --- /dev/null +++ b/TTS/vocoder/layers/hifigan.py @@ -0,0 +1,56 @@ +from torch import nn +from torch.nn.utils.parametrize import remove_parametrizations + + +# pylint: disable=dangerous-default-value +class ResStack(nn.Module): + def __init__(self, kernel, channel, padding, dilations=[1, 3, 5]): + super().__init__() + resstack = [] + for dilation in dilations: + resstack += [ + nn.LeakyReLU(0.2), + nn.ReflectionPad1d(dilation), + nn.utils.parametrizations.weight_norm( + nn.Conv1d(channel, channel, kernel_size=kernel, dilation=dilation) + ), + nn.LeakyReLU(0.2), + nn.ReflectionPad1d(padding), + nn.utils.parametrizations.weight_norm(nn.Conv1d(channel, channel, kernel_size=1)), + ] + self.resstack = nn.Sequential(*resstack) + + self.shortcut = nn.utils.parametrizations.weight_norm(nn.Conv1d(channel, channel, kernel_size=1)) + + def forward(self, x): + x1 = self.shortcut(x) + x2 = self.resstack(x) + return x1 + x2 + + def remove_weight_norm(self): + remove_parametrizations(self.shortcut, "weight") + remove_parametrizations(self.resstack[2], "weight") + remove_parametrizations(self.resstack[5], "weight") + remove_parametrizations(self.resstack[8], "weight") + remove_parametrizations(self.resstack[11], "weight") + remove_parametrizations(self.resstack[14], "weight") + remove_parametrizations(self.resstack[17], "weight") + + +class MRF(nn.Module): + def __init__(self, kernels, channel, dilations=[1, 3, 5]): # # pylint: disable=dangerous-default-value + super().__init__() + self.resblock1 = ResStack(kernels[0], channel, 0, dilations) + self.resblock2 = ResStack(kernels[1], channel, 6, dilations) + self.resblock3 = ResStack(kernels[2], channel, 12, dilations) + + def forward(self, x): + x1 = self.resblock1(x) + x2 = self.resblock2(x) + x3 = self.resblock3(x) + return x1 + x2 + x3 + + def remove_weight_norm(self): + self.resblock1.remove_weight_norm() + self.resblock2.remove_weight_norm() + self.resblock3.remove_weight_norm() diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py new file mode 100644 index 0000000..74cfc72 --- /dev/null +++ b/TTS/vocoder/layers/losses.py @@ -0,0 +1,368 @@ +from typing import Dict, Union + +import torch +from torch import nn +from torch.nn import functional as F + +from TTS.utils.audio.torch_transforms import TorchSTFT +from TTS.vocoder.utils.distribution import discretized_mix_logistic_loss, gaussian_loss + +################################# +# GENERATOR LOSSES +################################# + + +class STFTLoss(nn.Module): + """STFT loss. Input generate and real waveforms are converted + to spectrograms compared with L1 and Spectral convergence losses. + It is from ParallelWaveGAN paper https://arxiv.org/pdf/1910.11480.pdf""" + + def __init__(self, n_fft, hop_length, win_length): + super().__init__() + self.n_fft = n_fft + self.hop_length = hop_length + self.win_length = win_length + self.stft = TorchSTFT(n_fft, hop_length, win_length) + + def forward(self, y_hat, y): + y_hat_M = self.stft(y_hat) + y_M = self.stft(y) + # magnitude loss + loss_mag = F.l1_loss(torch.log(y_M), torch.log(y_hat_M)) + # spectral convergence loss + loss_sc = torch.norm(y_M - y_hat_M, p="fro") / torch.norm(y_M, p="fro") + return loss_mag, loss_sc + + +class MultiScaleSTFTLoss(torch.nn.Module): + """Multi-scale STFT loss. Input generate and real waveforms are converted + to spectrograms compared with L1 and Spectral convergence losses. + It is from ParallelWaveGAN paper https://arxiv.org/pdf/1910.11480.pdf""" + + def __init__(self, n_ffts=(1024, 2048, 512), hop_lengths=(120, 240, 50), win_lengths=(600, 1200, 240)): + super().__init__() + self.loss_funcs = torch.nn.ModuleList() + for n_fft, hop_length, win_length in zip(n_ffts, hop_lengths, win_lengths): + self.loss_funcs.append(STFTLoss(n_fft, hop_length, win_length)) + + def forward(self, y_hat, y): + N = len(self.loss_funcs) + loss_sc = 0 + loss_mag = 0 + for f in self.loss_funcs: + lm, lsc = f(y_hat, y) + loss_mag += lm + loss_sc += lsc + loss_sc /= N + loss_mag /= N + return loss_mag, loss_sc + + +class L1SpecLoss(nn.Module): + """L1 Loss over Spectrograms as described in HiFiGAN paper https://arxiv.org/pdf/2010.05646.pdf""" + + def __init__( + self, sample_rate, n_fft, hop_length, win_length, mel_fmin=None, mel_fmax=None, n_mels=None, use_mel=True + ): + super().__init__() + self.use_mel = use_mel + self.stft = TorchSTFT( + n_fft, + hop_length, + win_length, + sample_rate=sample_rate, + mel_fmin=mel_fmin, + mel_fmax=mel_fmax, + n_mels=n_mels, + use_mel=use_mel, + ) + + def forward(self, y_hat, y): + y_hat_M = self.stft(y_hat) + y_M = self.stft(y) + # magnitude loss + loss_mag = F.l1_loss(torch.log(y_M), torch.log(y_hat_M)) + return loss_mag + + +class MultiScaleSubbandSTFTLoss(MultiScaleSTFTLoss): + """Multiscale STFT loss for multi band model outputs. + From MultiBand-MelGAN paper https://arxiv.org/abs/2005.05106""" + + # pylint: disable=no-self-use + def forward(self, y_hat, y): + y_hat = y_hat.view(-1, 1, y_hat.shape[2]) + y = y.view(-1, 1, y.shape[2]) + return super().forward(y_hat.squeeze(1), y.squeeze(1)) + + +class MSEGLoss(nn.Module): + """Mean Squared Generator Loss""" + + # pylint: disable=no-self-use + def forward(self, score_real): + loss_fake = F.mse_loss(score_real, score_real.new_ones(score_real.shape)) + return loss_fake + + +class HingeGLoss(nn.Module): + """Hinge Discriminator Loss""" + + # pylint: disable=no-self-use + def forward(self, score_real): + # TODO: this might be wrong + loss_fake = torch.mean(F.relu(1.0 - score_real)) + return loss_fake + + +################################## +# DISCRIMINATOR LOSSES +################################## + + +class MSEDLoss(nn.Module): + """Mean Squared Discriminator Loss""" + + def __init__( + self, + ): + super().__init__() + self.loss_func = nn.MSELoss() + + # pylint: disable=no-self-use + def forward(self, score_fake, score_real): + loss_real = self.loss_func(score_real, score_real.new_ones(score_real.shape)) + loss_fake = self.loss_func(score_fake, score_fake.new_zeros(score_fake.shape)) + loss_d = loss_real + loss_fake + return loss_d, loss_real, loss_fake + + +class HingeDLoss(nn.Module): + """Hinge Discriminator Loss""" + + # pylint: disable=no-self-use + def forward(self, score_fake, score_real): + loss_real = torch.mean(F.relu(1.0 - score_real)) + loss_fake = torch.mean(F.relu(1.0 + score_fake)) + loss_d = loss_real + loss_fake + return loss_d, loss_real, loss_fake + + +class MelganFeatureLoss(nn.Module): + def __init__( + self, + ): + super().__init__() + self.loss_func = nn.L1Loss() + + # pylint: disable=no-self-use + def forward(self, fake_feats, real_feats): + loss_feats = 0 + num_feats = 0 + for idx, _ in enumerate(fake_feats): + for fake_feat, real_feat in zip(fake_feats[idx], real_feats[idx]): + loss_feats += self.loss_func(fake_feat, real_feat) + num_feats += 1 + loss_feats = loss_feats / num_feats + return loss_feats + + +##################################### +# LOSS WRAPPERS +##################################### + + +def _apply_G_adv_loss(scores_fake, loss_func): + """Compute G adversarial loss function + and normalize values""" + adv_loss = 0 + if isinstance(scores_fake, list): + for score_fake in scores_fake: + fake_loss = loss_func(score_fake) + adv_loss += fake_loss + adv_loss /= len(scores_fake) + else: + fake_loss = loss_func(scores_fake) + adv_loss = fake_loss + return adv_loss + + +def _apply_D_loss(scores_fake, scores_real, loss_func): + """Compute D loss func and normalize loss values""" + loss = 0 + real_loss = 0 + fake_loss = 0 + if isinstance(scores_fake, list): + # multi-scale loss + for score_fake, score_real in zip(scores_fake, scores_real): + total_loss, real_loss_, fake_loss_ = loss_func(score_fake=score_fake, score_real=score_real) + loss += total_loss + real_loss += real_loss_ + fake_loss += fake_loss_ + # normalize loss values with number of scales (discriminators) + loss /= len(scores_fake) + real_loss /= len(scores_real) + fake_loss /= len(scores_fake) + else: + # single scale loss + total_loss, real_loss, fake_loss = loss_func(scores_fake, scores_real) + loss = total_loss + return loss, real_loss, fake_loss + + +################################## +# MODEL LOSSES +################################## + + +class GeneratorLoss(nn.Module): + """Generator Loss Wrapper. Based on model configuration it sets a right set of loss functions and computes + losses. It allows to experiment with different combinations of loss functions with different models by just + changing configurations. + + Args: + C (AttrDict): model configuration. + """ + + def __init__(self, C): + super().__init__() + assert not ( + C.use_mse_gan_loss and C.use_hinge_gan_loss + ), " [!] Cannot use HingeGANLoss and MSEGANLoss together." + + self.use_stft_loss = C.use_stft_loss if "use_stft_loss" in C else False + self.use_subband_stft_loss = C.use_subband_stft_loss if "use_subband_stft_loss" in C else False + self.use_mse_gan_loss = C.use_mse_gan_loss if "use_mse_gan_loss" in C else False + self.use_hinge_gan_loss = C.use_hinge_gan_loss if "use_hinge_gan_loss" in C else False + self.use_feat_match_loss = C.use_feat_match_loss if "use_feat_match_loss" in C else False + self.use_l1_spec_loss = C.use_l1_spec_loss if "use_l1_spec_loss" in C else False + + self.stft_loss_weight = C.stft_loss_weight if "stft_loss_weight" in C else 0.0 + self.subband_stft_loss_weight = C.subband_stft_loss_weight if "subband_stft_loss_weight" in C else 0.0 + self.mse_gan_loss_weight = C.mse_G_loss_weight if "mse_G_loss_weight" in C else 0.0 + self.hinge_gan_loss_weight = C.hinge_G_loss_weight if "hinde_G_loss_weight" in C else 0.0 + self.feat_match_loss_weight = C.feat_match_loss_weight if "feat_match_loss_weight" in C else 0.0 + self.l1_spec_loss_weight = C.l1_spec_loss_weight if "l1_spec_loss_weight" in C else 0.0 + + if C.use_stft_loss: + self.stft_loss = MultiScaleSTFTLoss(**C.stft_loss_params) + if C.use_subband_stft_loss: + self.subband_stft_loss = MultiScaleSubbandSTFTLoss(**C.subband_stft_loss_params) + if C.use_mse_gan_loss: + self.mse_loss = MSEGLoss() + if C.use_hinge_gan_loss: + self.hinge_loss = HingeGLoss() + if C.use_feat_match_loss: + self.feat_match_loss = MelganFeatureLoss() + if C.use_l1_spec_loss: + assert C.audio["sample_rate"] == C.l1_spec_loss_params["sample_rate"] + self.l1_spec_loss = L1SpecLoss(**C.l1_spec_loss_params) + + def forward( + self, y_hat=None, y=None, scores_fake=None, feats_fake=None, feats_real=None, y_hat_sub=None, y_sub=None + ): + gen_loss = 0 + adv_loss = 0 + return_dict = {} + + # STFT Loss + if self.use_stft_loss: + stft_loss_mg, stft_loss_sc = self.stft_loss(y_hat[:, :, : y.size(2)].squeeze(1), y.squeeze(1)) + return_dict["G_stft_loss_mg"] = stft_loss_mg + return_dict["G_stft_loss_sc"] = stft_loss_sc + gen_loss = gen_loss + self.stft_loss_weight * (stft_loss_mg + stft_loss_sc) + + # L1 Spec loss + if self.use_l1_spec_loss: + l1_spec_loss = self.l1_spec_loss(y_hat, y) + return_dict["G_l1_spec_loss"] = l1_spec_loss + gen_loss = gen_loss + self.l1_spec_loss_weight * l1_spec_loss + + # subband STFT Loss + if self.use_subband_stft_loss: + subband_stft_loss_mg, subband_stft_loss_sc = self.subband_stft_loss(y_hat_sub, y_sub) + return_dict["G_subband_stft_loss_mg"] = subband_stft_loss_mg + return_dict["G_subband_stft_loss_sc"] = subband_stft_loss_sc + gen_loss = gen_loss + self.subband_stft_loss_weight * (subband_stft_loss_mg + subband_stft_loss_sc) + + # multiscale MSE adversarial loss + if self.use_mse_gan_loss and scores_fake is not None: + mse_fake_loss = _apply_G_adv_loss(scores_fake, self.mse_loss) + return_dict["G_mse_fake_loss"] = mse_fake_loss + adv_loss = adv_loss + self.mse_gan_loss_weight * mse_fake_loss + + # multiscale Hinge adversarial loss + if self.use_hinge_gan_loss and not scores_fake is not None: + hinge_fake_loss = _apply_G_adv_loss(scores_fake, self.hinge_loss) + return_dict["G_hinge_fake_loss"] = hinge_fake_loss + adv_loss = adv_loss + self.hinge_gan_loss_weight * hinge_fake_loss + + # Feature Matching Loss + if self.use_feat_match_loss and not feats_fake is None: + feat_match_loss = self.feat_match_loss(feats_fake, feats_real) + return_dict["G_feat_match_loss"] = feat_match_loss + adv_loss = adv_loss + self.feat_match_loss_weight * feat_match_loss + return_dict["loss"] = gen_loss + adv_loss + return_dict["G_gen_loss"] = gen_loss + return_dict["G_adv_loss"] = adv_loss + return return_dict + + +class DiscriminatorLoss(nn.Module): + """Like ```GeneratorLoss```""" + + def __init__(self, C): + super().__init__() + assert not ( + C.use_mse_gan_loss and C.use_hinge_gan_loss + ), " [!] Cannot use HingeGANLoss and MSEGANLoss together." + + self.use_mse_gan_loss = C.use_mse_gan_loss + self.use_hinge_gan_loss = C.use_hinge_gan_loss + + if C.use_mse_gan_loss: + self.mse_loss = MSEDLoss() + if C.use_hinge_gan_loss: + self.hinge_loss = HingeDLoss() + + def forward(self, scores_fake, scores_real): + loss = 0 + return_dict = {} + + if self.use_mse_gan_loss: + mse_D_loss, mse_D_real_loss, mse_D_fake_loss = _apply_D_loss( + scores_fake=scores_fake, scores_real=scores_real, loss_func=self.mse_loss + ) + return_dict["D_mse_gan_loss"] = mse_D_loss + return_dict["D_mse_gan_real_loss"] = mse_D_real_loss + return_dict["D_mse_gan_fake_loss"] = mse_D_fake_loss + loss += mse_D_loss + + if self.use_hinge_gan_loss: + hinge_D_loss, hinge_D_real_loss, hinge_D_fake_loss = _apply_D_loss( + scores_fake=scores_fake, scores_real=scores_real, loss_func=self.hinge_loss + ) + return_dict["D_hinge_gan_loss"] = hinge_D_loss + return_dict["D_hinge_gan_real_loss"] = hinge_D_real_loss + return_dict["D_hinge_gan_fake_loss"] = hinge_D_fake_loss + loss += hinge_D_loss + + return_dict["loss"] = loss + return return_dict + + +class WaveRNNLoss(nn.Module): + def __init__(self, wave_rnn_mode: Union[str, int]): + super().__init__() + if wave_rnn_mode == "mold": + self.loss_func = discretized_mix_logistic_loss + elif wave_rnn_mode == "gauss": + self.loss_func = gaussian_loss + elif isinstance(wave_rnn_mode, int): + self.loss_func = torch.nn.CrossEntropyLoss() + else: + raise ValueError(" [!] Unknown mode for Wavernn.") + + def forward(self, y_hat, y) -> Dict: + loss = self.loss_func(y_hat, y) + return {"loss": loss} diff --git a/TTS/vocoder/layers/lvc_block.py b/TTS/vocoder/layers/lvc_block.py new file mode 100644 index 0000000..8913a11 --- /dev/null +++ b/TTS/vocoder/layers/lvc_block.py @@ -0,0 +1,198 @@ +import torch +import torch.nn.functional as F + + +class KernelPredictor(torch.nn.Module): + """Kernel predictor for the location-variable convolutions""" + + def __init__( # pylint: disable=dangerous-default-value + self, + cond_channels, + conv_in_channels, + conv_out_channels, + conv_layers, + conv_kernel_size=3, + kpnet_hidden_channels=64, + kpnet_conv_size=3, + kpnet_dropout=0.0, + kpnet_nonlinear_activation="LeakyReLU", + kpnet_nonlinear_activation_params={"negative_slope": 0.1}, + ): + """ + Args: + cond_channels (int): number of channel for the conditioning sequence, + conv_in_channels (int): number of channel for the input sequence, + conv_out_channels (int): number of channel for the output sequence, + conv_layers (int): + kpnet_ + """ + super().__init__() + + self.conv_in_channels = conv_in_channels + self.conv_out_channels = conv_out_channels + self.conv_kernel_size = conv_kernel_size + self.conv_layers = conv_layers + + l_w = conv_in_channels * conv_out_channels * conv_kernel_size * conv_layers + l_b = conv_out_channels * conv_layers + + padding = (kpnet_conv_size - 1) // 2 + self.input_conv = torch.nn.Sequential( + torch.nn.Conv1d(cond_channels, kpnet_hidden_channels, 5, padding=(5 - 1) // 2, bias=True), + getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params), + ) + + self.residual_conv = torch.nn.Sequential( + torch.nn.Dropout(kpnet_dropout), + torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True), + getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params), + torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True), + getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params), + torch.nn.Dropout(kpnet_dropout), + torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True), + getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params), + torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True), + getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params), + torch.nn.Dropout(kpnet_dropout), + torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True), + getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params), + torch.nn.Conv1d(kpnet_hidden_channels, kpnet_hidden_channels, kpnet_conv_size, padding=padding, bias=True), + getattr(torch.nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params), + ) + + self.kernel_conv = torch.nn.Conv1d(kpnet_hidden_channels, l_w, kpnet_conv_size, padding=padding, bias=True) + self.bias_conv = torch.nn.Conv1d(kpnet_hidden_channels, l_b, kpnet_conv_size, padding=padding, bias=True) + + def forward(self, c): + """ + Args: + c (Tensor): the conditioning sequence (batch, cond_channels, cond_length) + Returns: + """ + batch, _, cond_length = c.shape + + c = self.input_conv(c) + c = c + self.residual_conv(c) + k = self.kernel_conv(c) + b = self.bias_conv(c) + + kernels = k.contiguous().view( + batch, self.conv_layers, self.conv_in_channels, self.conv_out_channels, self.conv_kernel_size, cond_length + ) + bias = b.contiguous().view(batch, self.conv_layers, self.conv_out_channels, cond_length) + return kernels, bias + + +class LVCBlock(torch.nn.Module): + """the location-variable convolutions""" + + def __init__( + self, + in_channels, + cond_channels, + upsample_ratio, + conv_layers=4, + conv_kernel_size=3, + cond_hop_length=256, + kpnet_hidden_channels=64, + kpnet_conv_size=3, + kpnet_dropout=0.0, + ): + super().__init__() + + self.cond_hop_length = cond_hop_length + self.conv_layers = conv_layers + self.conv_kernel_size = conv_kernel_size + self.convs = torch.nn.ModuleList() + + self.upsample = torch.nn.ConvTranspose1d( + in_channels, + in_channels, + kernel_size=upsample_ratio * 2, + stride=upsample_ratio, + padding=upsample_ratio // 2 + upsample_ratio % 2, + output_padding=upsample_ratio % 2, + ) + + self.kernel_predictor = KernelPredictor( + cond_channels=cond_channels, + conv_in_channels=in_channels, + conv_out_channels=2 * in_channels, + conv_layers=conv_layers, + conv_kernel_size=conv_kernel_size, + kpnet_hidden_channels=kpnet_hidden_channels, + kpnet_conv_size=kpnet_conv_size, + kpnet_dropout=kpnet_dropout, + ) + + for i in range(conv_layers): + padding = (3**i) * int((conv_kernel_size - 1) / 2) + conv = torch.nn.Conv1d( + in_channels, in_channels, kernel_size=conv_kernel_size, padding=padding, dilation=3**i + ) + + self.convs.append(conv) + + def forward(self, x, c): + """forward propagation of the location-variable convolutions. + Args: + x (Tensor): the input sequence (batch, in_channels, in_length) + c (Tensor): the conditioning sequence (batch, cond_channels, cond_length) + + Returns: + Tensor: the output sequence (batch, in_channels, in_length) + """ + in_channels = x.shape[1] + kernels, bias = self.kernel_predictor(c) + + x = F.leaky_relu(x, 0.2) + x = self.upsample(x) + + for i in range(self.conv_layers): + y = F.leaky_relu(x, 0.2) + y = self.convs[i](y) + y = F.leaky_relu(y, 0.2) + + k = kernels[:, i, :, :, :, :] + b = bias[:, i, :, :] + y = self.location_variable_convolution(y, k, b, 1, self.cond_hop_length) + x = x + torch.sigmoid(y[:, :in_channels, :]) * torch.tanh(y[:, in_channels:, :]) + return x + + @staticmethod + def location_variable_convolution(x, kernel, bias, dilation, hop_size): + """perform location-variable convolution operation on the input sequence (x) using the local convolution kernl. + Time: 414 μs ± 309 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each), test on NVIDIA V100. + Args: + x (Tensor): the input sequence (batch, in_channels, in_length). + kernel (Tensor): the local convolution kernel (batch, in_channel, out_channels, kernel_size, kernel_length) + bias (Tensor): the bias for the local convolution (batch, out_channels, kernel_length) + dilation (int): the dilation of convolution. + hop_size (int): the hop_size of the conditioning sequence. + Returns: + (Tensor): the output sequence after performing local convolution. (batch, out_channels, in_length). + """ + batch, _, in_length = x.shape + batch, _, out_channels, kernel_size, kernel_length = kernel.shape + + assert in_length == ( + kernel_length * hop_size + ), f"length of (x, kernel) is not matched, {in_length} vs {kernel_length * hop_size}" + + padding = dilation * int((kernel_size - 1) / 2) + x = F.pad(x, (padding, padding), "constant", 0) # (batch, in_channels, in_length + 2*padding) + x = x.unfold(2, hop_size + 2 * padding, hop_size) # (batch, in_channels, kernel_length, hop_size + 2*padding) + + if hop_size < dilation: + x = F.pad(x, (0, dilation), "constant", 0) + x = x.unfold( + 3, dilation, dilation + ) # (batch, in_channels, kernel_length, (hop_size + 2*padding)/dilation, dilation) + x = x[:, :, :, :, :hop_size] + x = x.transpose(3, 4) # (batch, in_channels, kernel_length, dilation, (hop_size + 2*padding)/dilation) + x = x.unfold(4, kernel_size, 1) # (batch, in_channels, kernel_length, dilation, _, kernel_size) + + o = torch.einsum("bildsk,biokl->bolsd", x, kernel) + o = o + bias.unsqueeze(-1).unsqueeze(-1) + o = o.contiguous().view(batch, out_channels, -1) + return o diff --git a/TTS/vocoder/layers/melgan.py b/TTS/vocoder/layers/melgan.py new file mode 100644 index 0000000..7ad41a0 --- /dev/null +++ b/TTS/vocoder/layers/melgan.py @@ -0,0 +1,43 @@ +from torch import nn +from torch.nn.utils.parametrizations import weight_norm +from torch.nn.utils.parametrize import remove_parametrizations + + +class ResidualStack(nn.Module): + def __init__(self, channels, num_res_blocks, kernel_size): + super().__init__() + + assert (kernel_size - 1) % 2 == 0, " [!] kernel_size has to be odd." + base_padding = (kernel_size - 1) // 2 + + self.blocks = nn.ModuleList() + for idx in range(num_res_blocks): + layer_kernel_size = kernel_size + layer_dilation = layer_kernel_size**idx + layer_padding = base_padding * layer_dilation + self.blocks += [ + nn.Sequential( + nn.LeakyReLU(0.2), + nn.ReflectionPad1d(layer_padding), + weight_norm( + nn.Conv1d(channels, channels, kernel_size=kernel_size, dilation=layer_dilation, bias=True) + ), + nn.LeakyReLU(0.2), + weight_norm(nn.Conv1d(channels, channels, kernel_size=1, bias=True)), + ) + ] + + self.shortcuts = nn.ModuleList( + [weight_norm(nn.Conv1d(channels, channels, kernel_size=1, bias=True)) for _ in range(num_res_blocks)] + ) + + def forward(self, x): + for block, shortcut in zip(self.blocks, self.shortcuts): + x = shortcut(x) + block(x) + return x + + def remove_weight_norm(self): + for block, shortcut in zip(self.blocks, self.shortcuts): + remove_parametrizations(block[2], "weight") + remove_parametrizations(block[4], "weight") + remove_parametrizations(shortcut, "weight") diff --git a/TTS/vocoder/layers/parallel_wavegan.py b/TTS/vocoder/layers/parallel_wavegan.py new file mode 100644 index 0000000..51142e5 --- /dev/null +++ b/TTS/vocoder/layers/parallel_wavegan.py @@ -0,0 +1,77 @@ +import torch +from torch.nn import functional as F + + +class ResidualBlock(torch.nn.Module): + """Residual block module in WaveNet.""" + + def __init__( + self, + kernel_size=3, + res_channels=64, + gate_channels=128, + skip_channels=64, + aux_channels=80, + dropout=0.0, + dilation=1, + bias=True, + use_causal_conv=False, + ): + super().__init__() + self.dropout = dropout + # no future time stamps available + if use_causal_conv: + padding = (kernel_size - 1) * dilation + else: + assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." + padding = (kernel_size - 1) // 2 * dilation + self.use_causal_conv = use_causal_conv + + # dilation conv + self.conv = torch.nn.Conv1d( + res_channels, gate_channels, kernel_size, padding=padding, dilation=dilation, bias=bias + ) + + # local conditioning + if aux_channels > 0: + self.conv1x1_aux = torch.nn.Conv1d(aux_channels, gate_channels, 1, bias=False) + else: + self.conv1x1_aux = None + + # conv output is split into two groups + gate_out_channels = gate_channels // 2 + self.conv1x1_out = torch.nn.Conv1d(gate_out_channels, res_channels, 1, bias=bias) + self.conv1x1_skip = torch.nn.Conv1d(gate_out_channels, skip_channels, 1, bias=bias) + + def forward(self, x, c): + """ + x: B x D_res x T + c: B x D_aux x T + """ + residual = x + x = F.dropout(x, p=self.dropout, training=self.training) + x = self.conv(x) + + # remove future time steps if use_causal_conv conv + x = x[:, :, : residual.size(-1)] if self.use_causal_conv else x + + # split into two part for gated activation + splitdim = 1 + xa, xb = x.split(x.size(splitdim) // 2, dim=splitdim) + + # local conditioning + if c is not None: + assert self.conv1x1_aux is not None + c = self.conv1x1_aux(c) + ca, cb = c.split(c.size(splitdim) // 2, dim=splitdim) + xa, xb = xa + ca, xb + cb + + x = torch.tanh(xa) * torch.sigmoid(xb) + + # for skip connection + s = self.conv1x1_skip(x) + + # for residual connection + x = (self.conv1x1_out(x) + residual) * (0.5**2) + + return x, s diff --git a/TTS/vocoder/layers/pqmf.py b/TTS/vocoder/layers/pqmf.py new file mode 100644 index 0000000..6253efb --- /dev/null +++ b/TTS/vocoder/layers/pqmf.py @@ -0,0 +1,53 @@ +import numpy as np +import torch +import torch.nn.functional as F +from scipy import signal as sig + + +# adapted from +# https://github.com/kan-bayashi/ParallelWaveGAN/tree/master/parallel_wavegan +class PQMF(torch.nn.Module): + def __init__(self, N=4, taps=62, cutoff=0.15, beta=9.0): + super().__init__() + + self.N = N + self.taps = taps + self.cutoff = cutoff + self.beta = beta + + QMF = sig.firwin(taps + 1, cutoff, window=("kaiser", beta)) + H = np.zeros((N, len(QMF))) + G = np.zeros((N, len(QMF))) + for k in range(N): + constant_factor = ( + (2 * k + 1) * (np.pi / (2 * N)) * (np.arange(taps + 1) - ((taps - 1) / 2)) + ) # TODO: (taps - 1) -> taps + phase = (-1) ** k * np.pi / 4 + H[k] = 2 * QMF * np.cos(constant_factor + phase) + + G[k] = 2 * QMF * np.cos(constant_factor - phase) + + H = torch.from_numpy(H[:, None, :]).float() + G = torch.from_numpy(G[None, :, :]).float() + + self.register_buffer("H", H) + self.register_buffer("G", G) + + updown_filter = torch.zeros((N, N, N)).float() + for k in range(N): + updown_filter[k, k, 0] = 1.0 + self.register_buffer("updown_filter", updown_filter) + self.N = N + + self.pad_fn = torch.nn.ConstantPad1d(taps // 2, 0.0) + + def forward(self, x): + return self.analysis(x) + + def analysis(self, x): + return F.conv1d(x, self.H, padding=self.taps // 2, stride=self.N) + + def synthesis(self, x): + x = F.conv_transpose1d(x, self.updown_filter * self.N, stride=self.N) + x = F.conv1d(x, self.G, padding=self.taps // 2) + return x diff --git a/TTS/vocoder/layers/qmf.dat b/TTS/vocoder/layers/qmf.dat new file mode 100644 index 0000000..17eab13 --- /dev/null +++ b/TTS/vocoder/layers/qmf.dat @@ -0,0 +1,640 @@ + 0.0000000e+000 + -5.5252865e-004 + -5.6176926e-004 + -4.9475181e-004 + -4.8752280e-004 + -4.8937912e-004 + -5.0407143e-004 + -5.2265643e-004 + -5.4665656e-004 + -5.6778026e-004 + -5.8709305e-004 + -6.1327474e-004 + -6.3124935e-004 + -6.5403334e-004 + -6.7776908e-004 + -6.9416146e-004 + -7.1577365e-004 + -7.2550431e-004 + -7.4409419e-004 + -7.4905981e-004 + -7.6813719e-004 + -7.7248486e-004 + -7.8343323e-004 + -7.7798695e-004 + -7.8036647e-004 + -7.8014496e-004 + -7.7579773e-004 + -7.6307936e-004 + -7.5300014e-004 + -7.3193572e-004 + -7.2153920e-004 + -6.9179375e-004 + -6.6504151e-004 + -6.3415949e-004 + -5.9461189e-004 + -5.5645764e-004 + -5.1455722e-004 + -4.6063255e-004 + -4.0951215e-004 + -3.5011759e-004 + -2.8969812e-004 + -2.0983373e-004 + -1.4463809e-004 + -6.1733441e-005 + 1.3494974e-005 + 1.0943831e-004 + 2.0430171e-004 + 2.9495311e-004 + 4.0265402e-004 + 5.1073885e-004 + 6.2393761e-004 + 7.4580259e-004 + 8.6084433e-004 + 9.8859883e-004 + 1.1250155e-003 + 1.2577885e-003 + 1.3902495e-003 + 1.5443220e-003 + 1.6868083e-003 + 1.8348265e-003 + 1.9841141e-003 + 2.1461584e-003 + 2.3017255e-003 + 2.4625617e-003 + 2.6201759e-003 + 2.7870464e-003 + 2.9469448e-003 + 3.1125421e-003 + 3.2739613e-003 + 3.4418874e-003 + 3.6008268e-003 + 3.7603923e-003 + 3.9207432e-003 + 4.0819753e-003 + 4.2264269e-003 + 4.3730720e-003 + 4.5209853e-003 + 4.6606461e-003 + 4.7932561e-003 + 4.9137604e-003 + 5.0393023e-003 + 5.1407354e-003 + 5.2461166e-003 + 5.3471681e-003 + 5.4196776e-003 + 5.4876040e-003 + 5.5475715e-003 + 5.5938023e-003 + 5.6220643e-003 + 5.6455197e-003 + 5.6389200e-003 + 5.6266114e-003 + 5.5917129e-003 + 5.5404364e-003 + 5.4753783e-003 + 5.3838976e-003 + 5.2715759e-003 + 5.1382275e-003 + 4.9839688e-003 + 4.8109469e-003 + 4.6039530e-003 + 4.3801862e-003 + 4.1251642e-003 + 3.8456408e-003 + 3.5401247e-003 + 3.2091886e-003 + 2.8446758e-003 + 2.4508540e-003 + 2.0274176e-003 + 1.5784683e-003 + 1.0902329e-003 + 5.8322642e-004 + 2.7604519e-005 + -5.4642809e-004 + -1.1568136e-003 + -1.8039473e-003 + -2.4826724e-003 + -3.1933778e-003 + -3.9401124e-003 + -4.7222596e-003 + -5.5337211e-003 + -6.3792293e-003 + -7.2615817e-003 + -8.1798233e-003 + -9.1325330e-003 + -1.0115022e-002 + -1.1131555e-002 + -1.2185000e-002 + -1.3271822e-002 + -1.4390467e-002 + -1.5540555e-002 + -1.6732471e-002 + -1.7943338e-002 + -1.9187243e-002 + -2.0453179e-002 + -2.1746755e-002 + -2.3068017e-002 + -2.4416099e-002 + -2.5787585e-002 + -2.7185943e-002 + -2.8607217e-002 + -3.0050266e-002 + -3.1501761e-002 + -3.2975408e-002 + -3.4462095e-002 + -3.5969756e-002 + -3.7481285e-002 + -3.9005368e-002 + -4.0534917e-002 + -4.2064909e-002 + -4.3609754e-002 + -4.5148841e-002 + -4.6684303e-002 + -4.8216572e-002 + -4.9738576e-002 + -5.1255616e-002 + -5.2763075e-002 + -5.4245277e-002 + -5.5717365e-002 + -5.7161645e-002 + -5.8591568e-002 + -5.9983748e-002 + -6.1345517e-002 + -6.2685781e-002 + -6.3971590e-002 + -6.5224711e-002 + -6.6436751e-002 + -6.7607599e-002 + -6.8704383e-002 + -6.9763024e-002 + -7.0762871e-002 + -7.1700267e-002 + -7.2568258e-002 + -7.3362026e-002 + -7.4100364e-002 + -7.4745256e-002 + -7.5313734e-002 + -7.5800836e-002 + -7.6199248e-002 + -7.6499217e-002 + -7.6709349e-002 + -7.6817398e-002 + -7.6823001e-002 + -7.6720492e-002 + -7.6505072e-002 + -7.6174832e-002 + -7.5730576e-002 + -7.5157626e-002 + -7.4466439e-002 + -7.3640601e-002 + -7.2677464e-002 + -7.1582636e-002 + -7.0353307e-002 + -6.8966401e-002 + -6.7452502e-002 + -6.5769067e-002 + -6.3944481e-002 + -6.1960278e-002 + -5.9816657e-002 + -5.7515269e-002 + -5.5046003e-002 + -5.2409382e-002 + -4.9597868e-002 + -4.6630331e-002 + -4.3476878e-002 + -4.0145828e-002 + -3.6641812e-002 + -3.2958393e-002 + -2.9082401e-002 + -2.5030756e-002 + -2.0799707e-002 + -1.6370126e-002 + -1.1762383e-002 + -6.9636862e-003 + -1.9765601e-003 + 3.2086897e-003 + 8.5711749e-003 + 1.4128883e-002 + 1.9883413e-002 + 2.5822729e-002 + 3.1953127e-002 + 3.8277657e-002 + 4.4780682e-002 + 5.1480418e-002 + 5.8370533e-002 + 6.5440985e-002 + 7.2694330e-002 + 8.0137293e-002 + 8.7754754e-002 + 9.5553335e-002 + 1.0353295e-001 + 1.1168269e-001 + 1.2000780e-001 + 1.2850029e-001 + 1.3715518e-001 + 1.4597665e-001 + 1.5496071e-001 + 1.6409589e-001 + 1.7338082e-001 + 1.8281725e-001 + 1.9239667e-001 + 2.0212502e-001 + 2.1197359e-001 + 2.2196527e-001 + 2.3206909e-001 + 2.4230169e-001 + 2.5264803e-001 + 2.6310533e-001 + 2.7366340e-001 + 2.8432142e-001 + 2.9507167e-001 + 3.0590986e-001 + 3.1682789e-001 + 3.2781137e-001 + 3.3887227e-001 + 3.4999141e-001 + 3.6115899e-001 + 3.7237955e-001 + 3.8363500e-001 + 3.9492118e-001 + 4.0623177e-001 + 4.1756969e-001 + 4.2891199e-001 + 4.4025538e-001 + 4.5159965e-001 + 4.6293081e-001 + 4.7424532e-001 + 4.8552531e-001 + 4.9677083e-001 + 5.0798175e-001 + 5.1912350e-001 + 5.3022409e-001 + 5.4125534e-001 + 5.5220513e-001 + 5.6307891e-001 + 5.7385241e-001 + 5.8454032e-001 + 5.9511231e-001 + 6.0557835e-001 + 6.1591099e-001 + 6.2612427e-001 + 6.3619801e-001 + 6.4612697e-001 + 6.5590163e-001 + 6.6551399e-001 + 6.7496632e-001 + 6.8423533e-001 + 6.9332824e-001 + 7.0223887e-001 + 7.1094104e-001 + 7.1944626e-001 + 7.2774489e-001 + 7.3582118e-001 + 7.4368279e-001 + 7.5131375e-001 + 7.5870808e-001 + 7.6586749e-001 + 7.7277809e-001 + 7.7942875e-001 + 7.8583531e-001 + 7.9197358e-001 + 7.9784664e-001 + 8.0344858e-001 + 8.0876950e-001 + 8.1381913e-001 + 8.1857760e-001 + 8.2304199e-001 + 8.2722753e-001 + 8.3110385e-001 + 8.3469374e-001 + 8.3797173e-001 + 8.4095414e-001 + 8.4362383e-001 + 8.4598185e-001 + 8.4803158e-001 + 8.4978052e-001 + 8.5119715e-001 + 8.5230470e-001 + 8.5310209e-001 + 8.5357206e-001 + 8.5373856e-001 + 8.5357206e-001 + 8.5310209e-001 + 8.5230470e-001 + 8.5119715e-001 + 8.4978052e-001 + 8.4803158e-001 + 8.4598185e-001 + 8.4362383e-001 + 8.4095414e-001 + 8.3797173e-001 + 8.3469374e-001 + 8.3110385e-001 + 8.2722753e-001 + 8.2304199e-001 + 8.1857760e-001 + 8.1381913e-001 + 8.0876950e-001 + 8.0344858e-001 + 7.9784664e-001 + 7.9197358e-001 + 7.8583531e-001 + 7.7942875e-001 + 7.7277809e-001 + 7.6586749e-001 + 7.5870808e-001 + 7.5131375e-001 + 7.4368279e-001 + 7.3582118e-001 + 7.2774489e-001 + 7.1944626e-001 + 7.1094104e-001 + 7.0223887e-001 + 6.9332824e-001 + 6.8423533e-001 + 6.7496632e-001 + 6.6551399e-001 + 6.5590163e-001 + 6.4612697e-001 + 6.3619801e-001 + 6.2612427e-001 + 6.1591099e-001 + 6.0557835e-001 + 5.9511231e-001 + 5.8454032e-001 + 5.7385241e-001 + 5.6307891e-001 + 5.5220513e-001 + 5.4125534e-001 + 5.3022409e-001 + 5.1912350e-001 + 5.0798175e-001 + 4.9677083e-001 + 4.8552531e-001 + 4.7424532e-001 + 4.6293081e-001 + 4.5159965e-001 + 4.4025538e-001 + 4.2891199e-001 + 4.1756969e-001 + 4.0623177e-001 + 3.9492118e-001 + 3.8363500e-001 + 3.7237955e-001 + 3.6115899e-001 + 3.4999141e-001 + 3.3887227e-001 + 3.2781137e-001 + 3.1682789e-001 + 3.0590986e-001 + 2.9507167e-001 + 2.8432142e-001 + 2.7366340e-001 + 2.6310533e-001 + 2.5264803e-001 + 2.4230169e-001 + 2.3206909e-001 + 2.2196527e-001 + 2.1197359e-001 + 2.0212502e-001 + 1.9239667e-001 + 1.8281725e-001 + 1.7338082e-001 + 1.6409589e-001 + 1.5496071e-001 + 1.4597665e-001 + 1.3715518e-001 + 1.2850029e-001 + 1.2000780e-001 + 1.1168269e-001 + 1.0353295e-001 + 9.5553335e-002 + 8.7754754e-002 + 8.0137293e-002 + 7.2694330e-002 + 6.5440985e-002 + 5.8370533e-002 + 5.1480418e-002 + 4.4780682e-002 + 3.8277657e-002 + 3.1953127e-002 + 2.5822729e-002 + 1.9883413e-002 + 1.4128883e-002 + 8.5711749e-003 + 3.2086897e-003 + -1.9765601e-003 + -6.9636862e-003 + -1.1762383e-002 + -1.6370126e-002 + -2.0799707e-002 + -2.5030756e-002 + -2.9082401e-002 + -3.2958393e-002 + -3.6641812e-002 + -4.0145828e-002 + -4.3476878e-002 + -4.6630331e-002 + -4.9597868e-002 + -5.2409382e-002 + -5.5046003e-002 + -5.7515269e-002 + -5.9816657e-002 + -6.1960278e-002 + -6.3944481e-002 + -6.5769067e-002 + -6.7452502e-002 + -6.8966401e-002 + -7.0353307e-002 + -7.1582636e-002 + -7.2677464e-002 + -7.3640601e-002 + -7.4466439e-002 + -7.5157626e-002 + -7.5730576e-002 + -7.6174832e-002 + -7.6505072e-002 + -7.6720492e-002 + -7.6823001e-002 + -7.6817398e-002 + -7.6709349e-002 + -7.6499217e-002 + -7.6199248e-002 + -7.5800836e-002 + -7.5313734e-002 + -7.4745256e-002 + -7.4100364e-002 + -7.3362026e-002 + -7.2568258e-002 + -7.1700267e-002 + -7.0762871e-002 + -6.9763024e-002 + -6.8704383e-002 + -6.7607599e-002 + -6.6436751e-002 + -6.5224711e-002 + -6.3971590e-002 + -6.2685781e-002 + -6.1345517e-002 + -5.9983748e-002 + -5.8591568e-002 + -5.7161645e-002 + -5.5717365e-002 + -5.4245277e-002 + -5.2763075e-002 + -5.1255616e-002 + -4.9738576e-002 + -4.8216572e-002 + -4.6684303e-002 + -4.5148841e-002 + -4.3609754e-002 + -4.2064909e-002 + -4.0534917e-002 + -3.9005368e-002 + -3.7481285e-002 + -3.5969756e-002 + -3.4462095e-002 + -3.2975408e-002 + -3.1501761e-002 + -3.0050266e-002 + -2.8607217e-002 + -2.7185943e-002 + -2.5787585e-002 + -2.4416099e-002 + -2.3068017e-002 + -2.1746755e-002 + -2.0453179e-002 + -1.9187243e-002 + -1.7943338e-002 + -1.6732471e-002 + -1.5540555e-002 + -1.4390467e-002 + -1.3271822e-002 + -1.2185000e-002 + -1.1131555e-002 + -1.0115022e-002 + -9.1325330e-003 + -8.1798233e-003 + -7.2615817e-003 + -6.3792293e-003 + -5.5337211e-003 + -4.7222596e-003 + -3.9401124e-003 + -3.1933778e-003 + -2.4826724e-003 + -1.8039473e-003 + -1.1568136e-003 + -5.4642809e-004 + 2.7604519e-005 + 5.8322642e-004 + 1.0902329e-003 + 1.5784683e-003 + 2.0274176e-003 + 2.4508540e-003 + 2.8446758e-003 + 3.2091886e-003 + 3.5401247e-003 + 3.8456408e-003 + 4.1251642e-003 + 4.3801862e-003 + 4.6039530e-003 + 4.8109469e-003 + 4.9839688e-003 + 5.1382275e-003 + 5.2715759e-003 + 5.3838976e-003 + 5.4753783e-003 + 5.5404364e-003 + 5.5917129e-003 + 5.6266114e-003 + 5.6389200e-003 + 5.6455197e-003 + 5.6220643e-003 + 5.5938023e-003 + 5.5475715e-003 + 5.4876040e-003 + 5.4196776e-003 + 5.3471681e-003 + 5.2461166e-003 + 5.1407354e-003 + 5.0393023e-003 + 4.9137604e-003 + 4.7932561e-003 + 4.6606461e-003 + 4.5209853e-003 + 4.3730720e-003 + 4.2264269e-003 + 4.0819753e-003 + 3.9207432e-003 + 3.7603923e-003 + 3.6008268e-003 + 3.4418874e-003 + 3.2739613e-003 + 3.1125421e-003 + 2.9469448e-003 + 2.7870464e-003 + 2.6201759e-003 + 2.4625617e-003 + 2.3017255e-003 + 2.1461584e-003 + 1.9841141e-003 + 1.8348265e-003 + 1.6868083e-003 + 1.5443220e-003 + 1.3902495e-003 + 1.2577885e-003 + 1.1250155e-003 + 9.8859883e-004 + 8.6084433e-004 + 7.4580259e-004 + 6.2393761e-004 + 5.1073885e-004 + 4.0265402e-004 + 2.9495311e-004 + 2.0430171e-004 + 1.0943831e-004 + 1.3494974e-005 + -6.1733441e-005 + -1.4463809e-004 + -2.0983373e-004 + -2.8969812e-004 + -3.5011759e-004 + -4.0951215e-004 + -4.6063255e-004 + -5.1455722e-004 + -5.5645764e-004 + -5.9461189e-004 + -6.3415949e-004 + -6.6504151e-004 + -6.9179375e-004 + -7.2153920e-004 + -7.3193572e-004 + -7.5300014e-004 + -7.6307936e-004 + -7.7579773e-004 + -7.8014496e-004 + -7.8036647e-004 + -7.7798695e-004 + -7.8343323e-004 + -7.7248486e-004 + -7.6813719e-004 + -7.4905981e-004 + -7.4409419e-004 + -7.2550431e-004 + -7.1577365e-004 + -6.9416146e-004 + -6.7776908e-004 + -6.5403334e-004 + -6.3124935e-004 + -6.1327474e-004 + -5.8709305e-004 + -5.6778026e-004 + -5.4665656e-004 + -5.2265643e-004 + -5.0407143e-004 + -4.8937912e-004 + -4.8752280e-004 + -4.9475181e-004 + -5.6176926e-004 + -5.5252865e-004 diff --git a/TTS/vocoder/layers/upsample.py b/TTS/vocoder/layers/upsample.py new file mode 100644 index 0000000..e169db0 --- /dev/null +++ b/TTS/vocoder/layers/upsample.py @@ -0,0 +1,102 @@ +import torch +from torch.nn import functional as F + + +class Stretch2d(torch.nn.Module): + def __init__(self, x_scale, y_scale, mode="nearest"): + super().__init__() + self.x_scale = x_scale + self.y_scale = y_scale + self.mode = mode + + def forward(self, x): + """ + x (Tensor): Input tensor (B, C, F, T). + Tensor: Interpolated tensor (B, C, F * y_scale, T * x_scale), + """ + return F.interpolate(x, scale_factor=(self.y_scale, self.x_scale), mode=self.mode) + + +class UpsampleNetwork(torch.nn.Module): + # pylint: disable=dangerous-default-value + def __init__( + self, + upsample_factors, + nonlinear_activation=None, + nonlinear_activation_params={}, + interpolate_mode="nearest", + freq_axis_kernel_size=1, + use_causal_conv=False, + ): + super().__init__() + self.use_causal_conv = use_causal_conv + self.up_layers = torch.nn.ModuleList() + for scale in upsample_factors: + # interpolation layer + stretch = Stretch2d(scale, 1, interpolate_mode) + self.up_layers += [stretch] + + # conv layer + assert (freq_axis_kernel_size - 1) % 2 == 0, "Not support even number freq axis kernel size." + freq_axis_padding = (freq_axis_kernel_size - 1) // 2 + kernel_size = (freq_axis_kernel_size, scale * 2 + 1) + if use_causal_conv: + padding = (freq_axis_padding, scale * 2) + else: + padding = (freq_axis_padding, scale) + conv = torch.nn.Conv2d(1, 1, kernel_size=kernel_size, padding=padding, bias=False) + self.up_layers += [conv] + + # nonlinear + if nonlinear_activation is not None: + nonlinear = getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params) + self.up_layers += [nonlinear] + + def forward(self, c): + """ + c : (B, C, T_in). + Tensor: (B, C, T_upsample) + """ + c = c.unsqueeze(1) # (B, 1, C, T) + for f in self.up_layers: + c = f(c) + return c.squeeze(1) # (B, C, T') + + +class ConvUpsample(torch.nn.Module): + # pylint: disable=dangerous-default-value + def __init__( + self, + upsample_factors, + nonlinear_activation=None, + nonlinear_activation_params={}, + interpolate_mode="nearest", + freq_axis_kernel_size=1, + aux_channels=80, + aux_context_window=0, + use_causal_conv=False, + ): + super().__init__() + self.aux_context_window = aux_context_window + self.use_causal_conv = use_causal_conv and aux_context_window > 0 + # To capture wide-context information in conditional features + kernel_size = aux_context_window + 1 if use_causal_conv else 2 * aux_context_window + 1 + # NOTE(kan-bayashi): Here do not use padding because the input is already padded + self.conv_in = torch.nn.Conv1d(aux_channels, aux_channels, kernel_size=kernel_size, bias=False) + self.upsample = UpsampleNetwork( + upsample_factors=upsample_factors, + nonlinear_activation=nonlinear_activation, + nonlinear_activation_params=nonlinear_activation_params, + interpolate_mode=interpolate_mode, + freq_axis_kernel_size=freq_axis_kernel_size, + use_causal_conv=use_causal_conv, + ) + + def forward(self, c): + """ + c : (B, C, T_in). + Tensor: (B, C, T_upsampled), + """ + c_ = self.conv_in(c) + c = c_[:, :, : -self.aux_context_window] if self.use_causal_conv else c_ + return self.upsample(c) diff --git a/TTS/vocoder/layers/wavegrad.py b/TTS/vocoder/layers/wavegrad.py new file mode 100644 index 0000000..9f1512c --- /dev/null +++ b/TTS/vocoder/layers/wavegrad.py @@ -0,0 +1,166 @@ +import torch +import torch.nn.functional as F +from torch import nn +from torch.nn.utils.parametrizations import weight_norm +from torch.nn.utils.parametrize import remove_parametrizations + + +class Conv1d(nn.Conv1d): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + nn.init.orthogonal_(self.weight) + nn.init.zeros_(self.bias) + + +class PositionalEncoding(nn.Module): + """Positional encoding with noise level conditioning""" + + def __init__(self, n_channels, max_len=10000): + super().__init__() + self.n_channels = n_channels + self.max_len = max_len + self.C = 5000 + self.pe = torch.zeros(0, 0) + + def forward(self, x, noise_level): + if x.shape[2] > self.pe.shape[1]: + self.init_pe_matrix(x.shape[1], x.shape[2], x) + return x + noise_level[..., None, None] + self.pe[:, : x.size(2)].repeat(x.shape[0], 1, 1) / self.C + + def init_pe_matrix(self, n_channels, max_len, x): + pe = torch.zeros(max_len, n_channels) + position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) + div_term = torch.pow(10000, torch.arange(0, n_channels, 2).float() / n_channels) + + pe[:, 0::2] = torch.sin(position / div_term) + pe[:, 1::2] = torch.cos(position / div_term) + self.pe = pe.transpose(0, 1).to(x) + + +class FiLM(nn.Module): + def __init__(self, input_size, output_size): + super().__init__() + self.encoding = PositionalEncoding(input_size) + self.input_conv = nn.Conv1d(input_size, input_size, 3, padding=1) + self.output_conv = nn.Conv1d(input_size, output_size * 2, 3, padding=1) + + nn.init.xavier_uniform_(self.input_conv.weight) + nn.init.xavier_uniform_(self.output_conv.weight) + nn.init.zeros_(self.input_conv.bias) + nn.init.zeros_(self.output_conv.bias) + + def forward(self, x, noise_scale): + o = self.input_conv(x) + o = F.leaky_relu(o, 0.2) + o = self.encoding(o, noise_scale) + shift, scale = torch.chunk(self.output_conv(o), 2, dim=1) + return shift, scale + + def remove_weight_norm(self): + remove_parametrizations(self.input_conv, "weight") + remove_parametrizations(self.output_conv, "weight") + + def apply_weight_norm(self): + self.input_conv = weight_norm(self.input_conv) + self.output_conv = weight_norm(self.output_conv) + + +@torch.jit.script +def shif_and_scale(x, scale, shift): + o = shift + scale * x + return o + + +class UBlock(nn.Module): + def __init__(self, input_size, hidden_size, factor, dilation): + super().__init__() + assert isinstance(dilation, (list, tuple)) + assert len(dilation) == 4 + + self.factor = factor + self.res_block = Conv1d(input_size, hidden_size, 1) + self.main_block = nn.ModuleList( + [ + Conv1d(input_size, hidden_size, 3, dilation=dilation[0], padding=dilation[0]), + Conv1d(hidden_size, hidden_size, 3, dilation=dilation[1], padding=dilation[1]), + ] + ) + self.out_block = nn.ModuleList( + [ + Conv1d(hidden_size, hidden_size, 3, dilation=dilation[2], padding=dilation[2]), + Conv1d(hidden_size, hidden_size, 3, dilation=dilation[3], padding=dilation[3]), + ] + ) + + def forward(self, x, shift, scale): + x_inter = F.interpolate(x, size=x.shape[-1] * self.factor) + res = self.res_block(x_inter) + o = F.leaky_relu(x_inter, 0.2) + o = F.interpolate(o, size=x.shape[-1] * self.factor) + o = self.main_block[0](o) + o = shif_and_scale(o, scale, shift) + o = F.leaky_relu(o, 0.2) + o = self.main_block[1](o) + res2 = res + o + o = shif_and_scale(res2, scale, shift) + o = F.leaky_relu(o, 0.2) + o = self.out_block[0](o) + o = shif_and_scale(o, scale, shift) + o = F.leaky_relu(o, 0.2) + o = self.out_block[1](o) + o = o + res2 + return o + + def remove_weight_norm(self): + remove_parametrizations(self.res_block, "weight") + for _, layer in enumerate(self.main_block): + if len(layer.state_dict()) != 0: + remove_parametrizations(layer, "weight") + for _, layer in enumerate(self.out_block): + if len(layer.state_dict()) != 0: + remove_parametrizations(layer, "weight") + + def apply_weight_norm(self): + self.res_block = weight_norm(self.res_block) + for idx, layer in enumerate(self.main_block): + if len(layer.state_dict()) != 0: + self.main_block[idx] = weight_norm(layer) + for idx, layer in enumerate(self.out_block): + if len(layer.state_dict()) != 0: + self.out_block[idx] = weight_norm(layer) + + +class DBlock(nn.Module): + def __init__(self, input_size, hidden_size, factor): + super().__init__() + self.factor = factor + self.res_block = Conv1d(input_size, hidden_size, 1) + self.main_block = nn.ModuleList( + [ + Conv1d(input_size, hidden_size, 3, dilation=1, padding=1), + Conv1d(hidden_size, hidden_size, 3, dilation=2, padding=2), + Conv1d(hidden_size, hidden_size, 3, dilation=4, padding=4), + ] + ) + + def forward(self, x): + size = x.shape[-1] // self.factor + res = self.res_block(x) + res = F.interpolate(res, size=size) + o = F.interpolate(x, size=size) + for layer in self.main_block: + o = F.leaky_relu(o, 0.2) + o = layer(o) + return o + res + + def remove_weight_norm(self): + remove_parametrizations(self.res_block, "weight") + for _, layer in enumerate(self.main_block): + if len(layer.state_dict()) != 0: + remove_parametrizations(layer, "weight") + + def apply_weight_norm(self): + self.res_block = weight_norm(self.res_block) + for idx, layer in enumerate(self.main_block): + if len(layer.state_dict()) != 0: + self.main_block[idx] = weight_norm(layer) diff --git a/TTS/vocoder/models/__init__.py b/TTS/vocoder/models/__init__.py new file mode 100644 index 0000000..6590161 --- /dev/null +++ b/TTS/vocoder/models/__init__.py @@ -0,0 +1,154 @@ +import importlib +import re + +from coqpit import Coqpit + + +def to_camel(text): + text = text.capitalize() + return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text) + + +def setup_model(config: Coqpit): + """Load models directly from configuration.""" + if "discriminator_model" in config and "generator_model" in config: + MyModel = importlib.import_module("TTS.vocoder.models.gan") + MyModel = getattr(MyModel, "GAN") + else: + MyModel = importlib.import_module("TTS.vocoder.models." + config.model.lower()) + if config.model.lower() == "wavernn": + MyModel = getattr(MyModel, "Wavernn") + elif config.model.lower() == "gan": + MyModel = getattr(MyModel, "GAN") + elif config.model.lower() == "wavegrad": + MyModel = getattr(MyModel, "Wavegrad") + else: + try: + MyModel = getattr(MyModel, to_camel(config.model)) + except ModuleNotFoundError as e: + raise ValueError(f"Model {config.model} not exist!") from e + print(" > Vocoder Model: {}".format(config.model)) + return MyModel.init_from_config(config) + + +def setup_generator(c): + """TODO: use config object as arguments""" + print(" > Generator Model: {}".format(c.generator_model)) + MyModel = importlib.import_module("TTS.vocoder.models." + c.generator_model.lower()) + MyModel = getattr(MyModel, to_camel(c.generator_model)) + # this is to preserve the Wavernn class name (instead of Wavernn) + if c.generator_model.lower() in "hifigan_generator": + model = MyModel(in_channels=c.audio["num_mels"], out_channels=1, **c.generator_model_params) + elif c.generator_model.lower() in "melgan_generator": + model = MyModel( + in_channels=c.audio["num_mels"], + out_channels=1, + proj_kernel=7, + base_channels=512, + upsample_factors=c.generator_model_params["upsample_factors"], + res_kernel=3, + num_res_blocks=c.generator_model_params["num_res_blocks"], + ) + elif c.generator_model in "melgan_fb_generator": + raise ValueError("melgan_fb_generator is now fullband_melgan_generator") + elif c.generator_model.lower() in "multiband_melgan_generator": + model = MyModel( + in_channels=c.audio["num_mels"], + out_channels=4, + proj_kernel=7, + base_channels=384, + upsample_factors=c.generator_model_params["upsample_factors"], + res_kernel=3, + num_res_blocks=c.generator_model_params["num_res_blocks"], + ) + elif c.generator_model.lower() in "fullband_melgan_generator": + model = MyModel( + in_channels=c.audio["num_mels"], + out_channels=1, + proj_kernel=7, + base_channels=512, + upsample_factors=c.generator_model_params["upsample_factors"], + res_kernel=3, + num_res_blocks=c.generator_model_params["num_res_blocks"], + ) + elif c.generator_model.lower() in "parallel_wavegan_generator": + model = MyModel( + in_channels=1, + out_channels=1, + kernel_size=3, + num_res_blocks=c.generator_model_params["num_res_blocks"], + stacks=c.generator_model_params["stacks"], + res_channels=64, + gate_channels=128, + skip_channels=64, + aux_channels=c.audio["num_mels"], + dropout=0.0, + bias=True, + use_weight_norm=True, + upsample_factors=c.generator_model_params["upsample_factors"], + ) + elif c.generator_model.lower() in "univnet_generator": + model = MyModel(**c.generator_model_params) + else: + raise NotImplementedError(f"Model {c.generator_model} not implemented!") + return model + + +def setup_discriminator(c): + """TODO: use config objekt as arguments""" + print(" > Discriminator Model: {}".format(c.discriminator_model)) + if "parallel_wavegan" in c.discriminator_model: + MyModel = importlib.import_module("TTS.vocoder.models.parallel_wavegan_discriminator") + else: + MyModel = importlib.import_module("TTS.vocoder.models." + c.discriminator_model.lower()) + MyModel = getattr(MyModel, to_camel(c.discriminator_model.lower())) + if c.discriminator_model in "hifigan_discriminator": + model = MyModel() + if c.discriminator_model in "random_window_discriminator": + model = MyModel( + cond_channels=c.audio["num_mels"], + hop_length=c.audio["hop_length"], + uncond_disc_donwsample_factors=c.discriminator_model_params["uncond_disc_donwsample_factors"], + cond_disc_downsample_factors=c.discriminator_model_params["cond_disc_downsample_factors"], + cond_disc_out_channels=c.discriminator_model_params["cond_disc_out_channels"], + window_sizes=c.discriminator_model_params["window_sizes"], + ) + if c.discriminator_model in "melgan_multiscale_discriminator": + model = MyModel( + in_channels=1, + out_channels=1, + kernel_sizes=(5, 3), + base_channels=c.discriminator_model_params["base_channels"], + max_channels=c.discriminator_model_params["max_channels"], + downsample_factors=c.discriminator_model_params["downsample_factors"], + ) + if c.discriminator_model == "residual_parallel_wavegan_discriminator": + model = MyModel( + in_channels=1, + out_channels=1, + kernel_size=3, + num_layers=c.discriminator_model_params["num_layers"], + stacks=c.discriminator_model_params["stacks"], + res_channels=64, + gate_channels=128, + skip_channels=64, + dropout=0.0, + bias=True, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + ) + if c.discriminator_model == "parallel_wavegan_discriminator": + model = MyModel( + in_channels=1, + out_channels=1, + kernel_size=3, + num_layers=c.discriminator_model_params["num_layers"], + conv_channels=64, + dilation_factor=1, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + bias=True, + ) + if c.discriminator_model == "univnet_discriminator": + model = MyModel() + return model diff --git a/TTS/vocoder/models/__pycache__/__init__.cpython-311.pyc b/TTS/vocoder/models/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40c5c0e6c9ef811b931acf0049bb7b7b846bfe1b GIT binary patch literal 7655 zcmcIpTWs6b876h3E|zRdv24jVDM=m6S2#}6CUKnj-dvibs}rZ$5|uV-hp|MdBo)V= z-5Et6+@aVqpg^-N9WqQLsL0Lw%7MhXY8K)^tOtr+~~A$chBwEs{yQnAyd z=#GzmlIQ&AKi~P!&vQN=f9`fW2}m>3H>b}82;yJ(p^!}Zo5!c%<`aS;7&1m&&0Wc> zBs`m9rnvd4nIstV2gFqiV`0tHCdT@K<*JpjL5Yo_KvE3(77?*;VRPprq-q;Vyqn~t zDH0dtlNGLJ_&vS?;uB(#NW=BnvDW&tnYWK17`ja8^!FKPe=^ZA9U z)|27uk!b7B>CT0L&a2lV1{he`2fxP`LF^8LAy)zXdT{{h;$0q#b}@p5?oH>nuwuld zT4#hrDydpi$s{X)OUSs0YeG-Qx-W^WAa>8w;*IWMR-Ba*$?l;F%kNC?(=i zKEbeJ_xSi*-M11`a4&SnLB_=HXq4kQDH`oc&Of#H#pw7XL-#KPr9^a!j9hOF?yEBJ0wr-V#~Tn+$=^^+ETEhzZ$1~0jkRT ze_0!hZB|1kRF@&A%8~QyFKyeti|uE;!Y{qFZO1OQozLLD;2?}+lUyaA>iaeN@)+Rn6Ux2eA%+v!DJeD|nidlA zQ0@&$3ADr|_%7AQaN?A}#W|jq5<*mKqE^nZJPR6yf(1V;99{W^qbp~&Ylh}k^Vq=o zr@kF3RcemD#R@#HQWNw|X`4*7;N0H#&?dK!Lf-V&BX2}* z8&teQ$UC$`ZF&QD52Oz(-UyVhP#;p?`g&z$KRz|1k0oe-oCg&VCPR z+l<@5&36`u>H|aimM&D+GR_- zSO>2EN9*2U7xBpD9PTwgstF8tnICmo;oh(}RDq(lH+%`J@+X9T{btCWH)wcci>toz z%`-lcgoL-Te7ad&_3^$_vXIv8Dg|9w`{B04Sa#YCBzH-Bk<_nMdh13+7H|xdu(eO>^@i89gyHcttj0^UFN~1U5$Eb>t@IwBp0-J}P zJDzIB&Ug)|f8==h?UzR9`$2S8K=+e)oB#7mquT@MMzq+oLX7P?vj*SQlxCdqGqsE# zYIly>Iwrsb3~hCOzaLzMw{5-n#x0rnftSW#5BNQ~cZY+oj8a_Y-VeS`s@26>gWf*y zjmC1m0nulIoxXWt|M-RB3&%n!k-tHF3GCUHB~ zZQ&K1TZa8y4Lrv{k{0M#jEzM#qGNEXO_XRD?bazUw6zT4Kk(Dic*L#uKyGkR5k8u< z5eg?jORd1;F3y2_hL%d6QQhJ!mn_v*(5c&{CyEggNtiR$GRe`RS_#1tonyI~8xnjL z3h|sh4~0hPcA;8hWx%vzERH3X34cLEDuh>{PH4dbuaQC%7R^}f#Ug|S zroXC{PBC0U^%r9+$B?4fPjOM$k9B-N%TAX%&0TCtrl2qV|3%yF%vO#y#- ztJFYnUkh4#*oIsWnvgO34rxtl$kz0;`O{4w`8~v9u z{g;&f3DiHKv`i?TE68(Yd2G{Db7$t$+iN6hJ*qSxL(Rt&&vE2AzC89VWk*!q2Gx|I zniML8sF1w>Rhhy$TUC39lB<~Xz-MD?XVCsvAJXsy`I{beb}DzfV*SvXbvThDxZwz8 z93jQA7diIIEw9NAoHY~Ky1+du?L>9$*+38;vcbk|V-spTEFXCtHJ;Aa)@AD(P<^j_ zXc*OxY**T=o&P03Ji!8YuXjWhZ+hzFzy-x~5qU1k_KQ0K7?d42{}cdL ziU5pb9FQG2|3AAZ8%5rKI&Wotdyubv?RDfkoDI}t&juSn>t1a-hyoqiJ+;~DnruTz z@9*t;4|Xt!Cs?dlVSG(F2OGzCRCeH;4})I0s`qXf1rNd%GH&*rCoJ#1u-1dx`{0_( zrAcvBEuYg)G7Kyge~D@=^-mywYL;r`qp(ZW?7~~izIc~NzH1jvi+=Zd5oYw`8SaZ% zH}baUv0%mu<9Fg++Q`6{44j>@f!15k@Hnc(HemcEo?$nXm-wh-mvSBR(DMYwwTr%y z*TR0ij`wUWmEp#m_MpZ=Dw#^A%3!^E7wg_#%6+Bs`+HWO*K1AwJg2+ZGv;9>Kh%~i z8OrNQ<-pE@@T+GUm`0|FY39iLp;h?4UN4^7l#Mi;?=$?veS+P}f>oyB{@ztUwq8V6 zW_MrS?#?lS8QK0_7};zYBMSl}8`dAalvtTsRZb!ndUiNsDOZXbPI7Y8uBJfk40UR~ z0PF{jHs(0E%#}6lgwRo)HxkKcjOAyf8){REhcggHQ;RYQe$Ghg)CPU+9A8%JFVz}o zV@1Am%yPttO?7K}1%4x30u;h!EGDqPC;YP%FCAx-c4?*ciN)8N8+pM$uqYX^Se}caZlTIO8j^$`c!{Lz&hgrF8_gjwsEe zsCiWJj3Lk1@>mu#%mz86XO+KmeC$}U7HUq&6wWy!6|BE^Al-(7oxlq}p1Zq5{_lq|e2qG2&DLwV3;ucDv&^NOySppT((d_KZoA)*E zz4@(v?Cy>sXp5Pp%&$>|eiMs&3;UDZi!gbD3}oOO%HuS~Vjjo^^1*ZvqX3#jM(`mr zLNvG)L}(ixucbpqcm^dRzl&u8yOC@5or3AWtoLJ$(^;mO7G*bWgXVN!M7{$4V)rym zz6H}1rLlq10RyLlMj#z}gfbXRD(Ns7jbs8w_+cm=eT44ev?2_;okIVdt%GW&8Cz+z9JEm=MVq3(q3&hTld3nBhUw+zlKooXOt59@OcF}|`lva(01w@&?hk=c`b0f}9!m)q}?inxkjQJ>r;o3NJmYSr*Ol z`G&<4@*`h|^rg zjp(+OF&EtqRh7k}s$6(a$m;7Gsau>fo?6j(HZ@N9ZOC-$nw`(A+?q=1P&js8wV;Z4 zYIb%e<;Nl=ZSd3r6r$?W#tJJ;T)+GMP31aP_iX@n_!$`fUPUjJ?(d@CM_(v|&y~R) zWvHSIm6f5Y()B_)^jtZ#KDVQcRFsi&z3+k;V%;#bk}!?Q59cZc@S zr@api&25~x3a6{PaF+sC!dw#fG-n;;t zd4B(x9|{TWfR+?OKp6zVKe+(e9l00B(wTLhHYvm$Sn+hG;+lE~d? zR^yuEFxD^RLLy}BfSAVSv|N!+FlMtvh;;$Ulf^0glSVgry)YZSXTp4(;GH|loDSEYMczwCtGYY+$JozGd<$3bTFOi+(WF8axRcwFo*gD$G^m zM>fuv;|b_hGP2oOA?Me|A5XoCzr8V6j=vAxCtnccHTczns6Pqxzal7pxO{kWr|){D z?|QlGI_rf9Bs$nZc)2lEz;`hx($uA@cZyohPg+z}4O<6t?B?g=6Q8NGUw$zm@s%K-B?^1}*v^pbv8GqD4@k zJ?9QN=n zh>Xa_m|^_3#h5rd%(8fFkJ;k(VFw&LhMhL1hY_9cGonjk#^5ZYjJXO&kL(Qd1U^O? zc2j*f=zCsC-$V6lK)?2t^lPZT7xaCvq+d(*>p;I=Vk?f?E3$)3>!ww_h`v@N`>TgyWk z62>K&mnJ20hwoQL`AH#`miRD9#(8B_;-iUFTH$XAlTtWI;=GU$c_EdGMJ4#=Wkm>$ z^T{wDN+u?gv9y4fCbt`g`;v-uoOXfTM8o_@iVkChkIMXEvth)veUiV{*~TAkQ!5ZGE!cJu#+T>7@NNs#r5|k06@6#kA7yG`=w+u-M zIZ0ZNue+Z7PzNL>O%f$%Tr>1<9OuQ-Sv7i+dAJZ>7UHQG&JUfYn}QM=Z8tCK+U2OM z6pVnC82rR}!Xc&!%r~81NeGP=rp{Otd`!3_k^HCzM}?GBGN{|f`DiY z+Yb6k($s-KuwUZn@S==z>}aEb2kxd*rC`JQ|5 z6ncO=Qd51dBTnnw!@8oe1@CE<```h~weay)w+`04=IUU;E&BRG=0bo5T%w!d^WrcF z>$b?iBAsv*0;K=RKgwkr$*>Rl}Wvt`*uZB9V$`X+~?g;W| z$2mzJSCXlYv&ne)&eaPYp|q?dC)oStH{q4bEG!a#T!S=Uf$&e6}Pr-qq%EN%G!a?73%^(4mVkg+Yqb%a zSgHlbxH(A!2sp+squ!@GveKU3MoX0!DNuQ&ZPt* z#3hA9r;PBSdv8h6$f#m8*Y+mGbWB2js=LmD$3HCUPU`vf8vF{TlClEf&ml{(Fxig% zc*iA@kYYhOIwk2Y87zyE?oZ27(Au*%A}PWA_3ADhOfan11cRZNAj`p^j8}=F3(8%x z2`>|;bShBcHtd8(`LAF`|NAcU%u{#I{lNW?o~^(4Y|VL^HBYnZXk&0Nmg^ta`p4CHLVY)>22!~|N(0GFXxzk#X`o?YaPhI6{-Wi_DW0*VHbOKV)ZXrOUHSsY*LR1ck8n)tgPsDUfFz!eQ7 zH=uC?YhG7f{d2pgsrDrU0;aikt$}Ig=bApV{ibGd-&6MK(WTDiu*&!5_+Aalf8;EsO8M3`w?h8=@Sqg;o!Y5TMT;-E8Af?}uOJdqv8Ga1rR z+ET8Ejs~Kh8I>UF(ev=!3@fr3R~Vxi)D_Y2Gz)fIQ=kDovkPX-#d8WJW6wC&^#=R6 zbS#hHyeH~qYGx2FMfXWSXq>Hw!%%UI1Ao?(^xLf_ytp|4`}1>3d-GnN;% z@CvQgS?0mX46|TH_iDP|#HzBTXlcxh4luuV(NQ+HFxs#U9KBQU!fiLt(xj zJ9t#xF&UL^kvBj~uZ>B<_?;k;Vrkubd7$g^)!^Xecl*0^7vvFALL_@p%LzG4XjFHH zV}hbciB=EUht=$D-GP`{XK(6kNM{w@nLwAVv$0b5%v1LaXD7)mfrwL0>s)lc{t%dy z{|m%jW^F6OHDn!YE@pG%?EZW1ESv4j3M*b;R(bHVd+*E%zxVD|y}MT$x6WQ#=v+{K z^RrLi$u+*2?Rv)fXPq;`+@86sbIl7s$#EST*O5KF!ue;qKB$2MU){s*``t60pyQtt z=f`u~S{L8?{6x-oO!FPfvS0eQ%r>tv_L}Wf7-zCGdp>Z?I3j9)%bxlC4|-vAR~BqJ z-(JnP7rNZU&FstG%6qw1J3*XHV|e#tHwNnj#NS|0uMGwPj6kr3V_z^hkrrb47I!cx zCPTp>4S?hn-kG|5oD4~^STOjQB?oYF5l{TF5i8L>5EQthjMcK5 zrv9mrRa1>ahKe<_=39FE&$Uwb@gsuEoba#&X`(o?fyohGNz$zP4j)x8qZFgzutcPU zf|({{?vPUBCNxf!d4Wj0oQ^~!S%EYh&b=w|DM!1PF?CL=w{>iT0U<LS~)3E?A!!i2{o+#Gtn zGEU*O@ z;uCZ+v;a^!c^$w8f-su;uLBgc%{2|_^#W*>myByY0Lj=ZXu8*HJ~)*D(t$z+RyY7d zdhJl55`#p!u2TNUvVMvT6#5Iph6O8>a8Xq_DT~ehzz$X?s<5wDMiQ-dy+#5*B@sc9 z$Yr=MskRI;C$A&b57KDb>3FGA{#99TOlTCi#(GDF(#Hxzcbn!rw&FISO>Nz+=)5 z|7m6vX(VnQ?l@h{K7rVxbYnNurXFU$P-mYd0B49Q?1rH4pb8sTNy z84RaD5%9SL1V2n<*fbX=nnCY6xBE<^QQJr?HVgV9Z5hza41H@qMBZzoUFzV4_^{G{D zc&QEFcd+3V>%fLb{4O@?#QN`Sqn_GeztcF-#wNHc?&^0XvMy=?Ich?!DzjY=mO1#a z)vcEjw-g5X`wS|u32yExWy>zY5oyGqt$2h;XcbrMD%Jsc>WYwLIwccK&{r97hpQYM zkpPOVC~%2VJk{!551@8n6EbdO7mD2|cn}6h*eTN7lctufjuJ#g2SQ;L4g|thWnyp8 zL{onPs`3PkuOgcIMoiw>#fHTzK*a@)=K{wyklYE4J5l7hkZC)-II-kkKBESDa)BNV zBzIop&X?;nEDxxG^SQuz4J3C#<1Tzt9dYr@Q+DZ%<(q2YLN0JY1Ib;~xQj(Q$h!?Z z^)DS>26FFWE^tu;$z9U8OIDpLV5d_JoXiDIY9P5&8h2`KlLu+JTBPMbP+G3ZO3QUV z6_))fe>TUT)j)1Lr)@h|nU+JpMiFBZ+>bmh$BaRxQ7Cz3ht&;WY?%kIIl7+<+J?kXQDpFJ3zwMTloS z6Wy4%vI5Ui$XV&(Cq7(80v{e5(`TQ?qtAS%i`^OS1UVbPhHt#Rn4>3tyufS$8!s5)+M5EaB=KA{XG2$WKrpaG*E> z{J=&$3f%>mNa{7GCnNm;?K?%iCN2qy6dcQsZ3Y0KVILQ(CFmOoWBDpHqyKN=4EdKJ zDn|bTm-4hx<&NgKqZ)SG~b{BBKsnRe$tj^=3%z5pzAF)+4I>rQEi#wk`Y$@$n^$_**b4*9(kF z#$i6j+6M8oG-5hL8$Pn! zSO4<8L_cdxvoIw2zXfd&14~bOHR^@_0Z7J#vl2^7V{1SW}Hg*RqJYS=C=k|8-=EA*t~9pJ zoSW@~=PDDQ9?=?G7x&~E+qA~E8TK_1EgMHm0k0axuW|HcEQ<}JB??cCGXl}Ff^705 z%Ti4kaS}TU!N)A$HcmF4H~t!84Uz{75zD#~Z{gzx`2PkE4jFVeDlHF=0n=%L*QRM&o&>(ZEn*xx8aX0yy$JUJR!l0qfKopmXlrrrir$7MniZrEcm_JsrmW*bvZSCPz^GEq2ul z0&Z5}wcUH z?lS&-M4!}9ewKo5x(?|mA2WtUoIm)ZmhtrT6j9(O-dVX5g7k@MWF*|d0A{8X7`}=axo{(kIpBbW6R58^LchN3wNYk%pIQpqvu%va)U-XrT+&{ CXkknM literal 0 HcmV?d00001 diff --git a/TTS/vocoder/models/__pycache__/hifigan_generator.cpython-311.pyc b/TTS/vocoder/models/__pycache__/hifigan_generator.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10f90ceece8cb14b81c0a4299785e378bfd3e60d GIT binary patch literal 13759 zcmeG@ZEO=~nlt`L#<7#wAz$Q!3?CtJ2?-9gKuUqo5THQ9M`*WU>C|{8iHT#UGZPA_ zt+FRh7f_^{Q+my=*i-hda6$A3E1h&*X)Ez_@1FlE)5s7tglg-g{&?yi%f?!#A4qkdA~2lXpos(ygfuY&s3 zFIB&Wr_WMB{}UW;C`fCLcs%IP%HB-GFNb)o8h=N`a9o;BNJ5B*3Ke5B@rWEv#KSRo z@_r~p$0uYio)D+tu|X835|;&TIxL2#1X+yEhOw%IHDie|&y7jav=ABp)h{vq?m)zm zUnl;!;EM}?)(hY>N;Z*0QAsLE&%!nVgl**MSuaK1h4TAm9aw~8=m~ZQV>zR78CF40 zYBVq5JC^;^sh)F^AWA(~!_q|0fFMoEiRqp<6H{YX&mHTD%t&%#3Rb}jQqRc9*`CXZ z2;{|{DS$DlXCgWl9S_I3aUm{P1yj5vB z-G9ryNZ;}-p1$$1;%!sCZ5dbFGbpDb2#jQG_Ol)E=5Ju-0yRqW)To1}N1c5695qJs zuIrvr7w-lFa`R;X%jT#n^k_M(n$bLG1nE#L5t$5yUx&rQQYn|sPH28+Fen>LK+g}4-?{KU!VSnudcA$ z_p>okh|ORI`welSLbkV%-HF**1h(qEh3dVWS-qDd)e~5K8-fIx0FId4{ufWl3Vnh7 z;<(gj4jh>X$KyZ=Y$uXd9~+;Ux+sWjVvL=L^1Kjd%?e`)k(DQeB24ON?l$|I6h!Eb zlcKYN(Iv@4heeIqv1m+&p^P0OJrz3QqcQ!6v7ND~gj2&GoS0F(G>Ja9=39gguiadyE`K$#B?>l%(Mg4o1?+JJuU7DY??jQb~us>@FOM`^W=G`oej) z44&buCp!~)iBahnjA|+Skwr!I{IEUYY5P9#^!J(1cj5l0 z-1{oHWDH#L6fkM`kXR2dM5M!e4vjI+rdDpq52W#+M{`Rv(}Jim92bp8WsVc^L}(Q! z6Z}j}H~}YH#7z?ML=ussl_A@eLiyj`dv47o39&JaF&L8OweT9T3A^)fTqG8jB#x8t z@Zrh7CZYl&wgQ-~G380OF^!GzUiud}&;Jdgjj6m*K41Pd)A)#KRG21}Y05B7EA@>F zQ{VWTS8AKGjV$?W&Ni;kZVWDO{JeRE-L`!E%f^Qrf8VUIhgJ4)uClzY{wW3EIfAE7 zPoU;G1>oCSD$w$i=p?tTt+MK=b7Mora|*zmYu@|qAWb!_H`ckhXX$6lAAQ;X2m0U6 zzL()HWnxpx*|>T(o~cbJwF$L0@zm+SWdZoMOkdVtx=pUS0w?XmNdv%1&%vJ# z?XvTrlxvNiT;I`?v$UtA3zS6|H$~5%OQMcT&;H63zjon{2jB|ZIU>ZRga{He8lRq# zS(!W*B<&eNo)O~~odBEZZTi^6jJ#T#vlHQIp{U7~KKA^fF80kXcI1LNpi<1#5m07Gtl%e!LOcx#A;HsY$^q){}&4tnwno{CDNB|gKYVpV@NC0 z4Va))M?JD_g8zGtJdR^|142h?P2RwH;0&)ZLK92hU zY=V4AImhvd2uK&?_hJu%R{?01dBG47Vlj@pM~m(7R>IvnsLO|SSQP=PfzlWN5D3O! z&f#-2ISN5b$X$`6)&gU6wL6fbOt8u229&%AkcEum#Nimw!T*~z z#t9?F)DJXk#vOLyZZ$B7QtYxQ#UdIwK}JOHiO!X9H^dDHiZos~(Rjs;P-Kd{Rn^yC zQ+@pi>wX1bZS}RTVd270MSaaR)S(jBP!CER08rv8=HTxIO5Cclvn%DW%Fco!2a1_# z0GB9cPNJ9<8HXun(rKzw+fkjmM-K&EVn6OrCj#)PQsOHB;4LaA;x+&p9TB_XS3;c- z)g@hupuj0;CB5Bwt>o{pCibX{%BCFUL_Gumh{{qOq$V9m_ui^or~{Sb<+Y4H=>Lal z8A!8Y2%3YyCF>@-!cLqts>sITP>>c>g5K^@mEhOd1buzxIt_Wgs!FhixIX|Lh_4~o zhyBq2@WNUsH6bEAwlL6rk#^oxKlqjB8TWwZz-R_~^VGX8k4 z>Em?rKJahSqhKBg?n@?>-i5XW#}Z$Dy~vF;TFx`z6=gi&CI{y_J?bI;(F#!et2F;n z-Fth)aNddy&>gYGMBc=72l6t5x z>-8*NFWHPAppu*{=V{)-JHK$hkH5&&b)g2i9!POhyeepJX&{@+rwA%j_1ReQuz}zfG*mY6AVK04VupuS@7{FG*JK zlf1c54A>}3R?os(XJLi<6YoIV(0mt2`jhY_!G$uc^bB6SHzhVSYBc zu&mNPiv0=j^Rra-YHLZ=tci0sSjn)qf;Y6J&+P=;>j(^_Y60iwj914w2#cy@4dAZU z9=Edh3>fO3f7iA@R(h>Lk1o<{vhs`a7x`y_vP^1BR`ARuIWuCj4g01qB3S|WaoFbt zJ)oULQqn8c$=i)5pbYAgc^-I=9aA)w^hpgxwavMd*5-lr;5XL3*u)m1;UMrQbG!AX zayUMLH3|V4oWx6|?(RZ0=wbF43z3|pZ83!kYvqh-4snx1`Na%jKOF@$m3nrfanA6C z8ljd#^v3GyF=IFdpASw1Tstr5z~=Z_qX%Q!oQ$!!PH^IalZoW>;hy);!hy{bjHFfKgMl`wU>!op141F(v?vfq5z&B2_=H%IniHC6 z9swF-1YZzQ0BdEiW+Bdts3B<{gXksA4Ma9BXdb;s;DA~MWO<#fiYO(5^}2u1Yhf>% z&&n&bN~1q=?(uGf&Wbtv3D)Y&0_&O3>Wnqoj*8Z3zAhqe&2N_EX+=DN%c>+ZHway; zHL%24e3)9Dxl-Hfs={mBH;pl-s?kx6o)J+a1OM}cMsp%+(z?SCX#h#DBw5Y$^~GAb zX|4YwG?xZIQ310BBQBn*RpL$AjlFX&mD#Y;ydl%JOKE;pZGIJBG_JI=nT;W(wO4H= zHJZL@+8exA4ZT0}BVBoLO1F>CVnf$L`GDA!YNhx_MYxe@b0{Do1%eRRr8PHrIdS zGFZGr_lwR8!0C)?c>WZi6XStpw^nl@@p1 z-g7IpkWvEeYM>ph=RN|!{~ib~hadFAOb*|RDS_9)b>F)6j+EJUFmvc#rDIg>7*$%& ztF7n3J6#El0NnDV>07U*`_ms}{k02KU;Eo1`P-K!mL`_P2cZXHW$PYw>mJ3wSM~4B zkPq0Zm367ng0e)b;F82SR1j1t>#lC~?c5azP9&mz_)0nF|9&mJ+1CXAEo#YvVFS}(6qH~}E-c7mtd-VFIzjz+ z&i;=n?n_luYtV5_2t#~PSR?p|taRKGq8yJUF2WTrV9z{gIEgsu{6#IzVP7V0`C<67 z0Lsf${;n5}_R^tCtZalqF|Zl31vbTZ0GC6-D)AKjYUOYREiB6-c`KfSSK=^&P6Vb4 zQ9O;0LkLC?ybVBegLVXh#)URWu-xin9M|ZCD3ChDpwZ|{5}t9K7$l3&v9Uzv(jE@5L5!MsDW2fT7J6^@#rTV*o8T*^lt_5Z> zu;5+nzg4wR1+Quw=MJybH7)K^o3|--U20ty1jQ#QSH?l1QivzcZmP5<3 z+H*kJcHrSgW!sx-%fQ3aO3M+o*0rCrS+)VdQ_=BidD1zx=JxW8iYIy9QEFcgf^m)~<>r0i0{c?7x}^?8WE$r+-9I z@deF40$OpfLUX`5!?Q}%bFGXQE=MDRIEim?TSdgU<^-F8XqO~-3=Z*4STq2@>I>IA zranL0RC4spGA|HPF0_LFq6WGY-*(lvU8lXHs;}*9-{wacH(z$C zo%pn@|OfL4{_;~)~46{LxnwYc(!ir6rfE#P!_%9g276X-Ej(S8?0CFU~F@& z(s7DvnkS*OCW*E=hh`hj;O%S_6}sxr;;rrO?ryTx!E((#ErM;Yb6-4eI`_qZhE5U4 zhsD(c9-B+Hw>0k2g8Tf}&!A@DD})#TtQrQvIvcQ<=!Bbr zU-~M)_OXwAtm50G`Zi_AXN757a(!O8d_`gQsLY-WvqyhZ^?*^By(+UeV|)l*3`R@@ z!@LlMgmwKFWbA~ITqBfLOfX5;7eyY&6sRRjCOJUIW=gtwXF-=JAv~wJ6I#9$*;olh zbi76V6|ypuAj|L8ix&a#!@7^sAf7Sl`u7@*9 z-$}LaWaivx#(!S%pI80o=iFJw_chb^Ddl3|FpAM$;@XPy;aU&@}Md)nVGbz${chC8f`;b!5j*Y*h-37EtoMP#mL=J80PT)nR&O R;mX3tPf6+XHR=%T{x7TQ3-T{c zmO|~g3lEkFj7FLDBHGs5VY8WxvdN(CG>dxCEY`C#)4J%k;1zBm_M!p|6hYVM14@%Y zgDueQ?>m>g)qe^ggj!{Hhmy)wFU56Ar%a+EH6 z@#Tw+CXRcT<2l|G=MtunDPazoP2@JmEeUJLny`gz346%S{48-t$cZpZ+!b=+X^qz; zYD2Xw%ocYg+#xr6x5w)e^`Uz9?udI5-jJ8QJL3%rU&zPaUGc_5Q>cl(*TkC>{*XV> z5^7=Y+IVZCE!4)|-SKsaKq$c8>*DQ+^`Z5Nj!;KpLuf-H7z#4~`gmtzV+c(_ojmcb zM0cp$#0e%dcbwzBw>iE+sQtjgaUbEYT_51_QOUuDdRR;!Vm1mJjWI*LyyZOC*Yt#9 z_nB1}A4x}|@rWd;w$Ydn=ld+G^++t5R;{OEQd+fLn4E|UXZlR4eK>V(B9_LRIhh2k zy&=TL#?#?sN=(4nA_|Grbs;IJdR~rP7mkY&p0bvN zbf1;#m%P4%SM5g*$5YX(s`Xgx)M?dzk+~>`!-)7&Dw@)&;hu=6(qU1MCgW)-s^bnX zC9D?5lE8&bJQp%w#Fz?M1gl`<%?A*Tw}kA>Wo0f~$RRipW@j!($i>2(%;gHzFjoz^ z%$!im*WTtnc7K4e`Vs!x6>0E-`1LE|d-V7iB`fmxtcc&B$H!P% z5#NjWh8h{_)<|jjCXA(K)lI!IhTb?Nj!92mM1WcwONOK4kz`VcOHZhcsyj8A*1e9Y zo=HgvR}7&iL5k#=3P;h6db}9*hU&R#^fh#s8R@9Yz98!GE;oj5de4Mvna}iDpWK3< z*ah%}LnWR7`P_$`*b9#@@aO*a_~1oJ5T(JXh%`QUM3AngQxk*3sl@2i#peg3lTtdB zzyRk3Y4E~@^Mlv5(KDEUPh1+jQEc_V#MIw955yyh5k7Koy15b=110ZvlqS^y+~V@X zFKhQLPTT|^8qT~h8o@vek4B>DlsKg}@*^~e!<9JurmeP#J;+r9-SkH7*8nw~wVo9$ zZYF0Rz#5fak08C$m-EAP-_k0rHctKqHGE>Fk_SuR3H714oxrbI!w#g8sN-*O4?XlN zRnI#)Au-}oM5%F#+mV8e8NyF#%xEJA(EOgs0Bsme7K|p5I#H{p{$}u{20d|7q+hcc zL8OtWH*RsccKXd{==b0<{T>~lU;b60cn3#Ijr8e_#gc%pIT-{Y3HTPJ55^VT-pvbC+y+IH*IY**Rc z-g)bV*$a7VCvMr=SqQ9$`+VNI9=B{=UkG$)?vA{*L$-DldbZ>)=J&r$ch4(w&nrsL zD|u_nT+>|i9*V&GE3);KVyw&g{g?ASugN{HDLt=M#KQY)vh}sX#!b1N{Jt0H-uR-t z@kM3hi+O9)EI+sFZY*!b`-`&m#ln`s`JO`SXufrH*<#*m|C|H(l0e?-pR>+g$aUR& z^`2*$Gu!M8AVTqtIkhQB2r^9UdU(+F#WFDV`xu<1^@mm;-Yi&0%~@{L#9MB=LpI)e zn+w@_8=wR0it~y~OU!W};HAtF@LPDtZIjV2RKq)wu9kNJy7?NwI$D=P^)8Oz%GcdC zg*<${;N?B|dxZwUD|koEsQHR#s)~a^3tzIMEKI-P%Q0_WlgX+1C4~$y*_S?2lfM1VxxAPr< z>xB+}!|f{kg*L#KYD74&^1&5l2B~LQuATfwzDwxjyM>K>kI;xRdr@W=zX^GFBbR=@ zZ$;{!EO*ls>IIqEraDW@$K>B(x-I`;_Cav@2Y22Ik~c9tnv1PK12sWhFGeN=F}N)_ zo=#6lLxY16@n-D$KuR1NjEqQwJGXCtdSLtZy*qZ*u-Hd$GO8#IF&CwOJ{e2LBJtRC zEIAfTj|;(K-g<`N835EcWG7+RMDo8r2`h$@mvf+Q%SA4`?$ho%Za;E@L&B5B|^7zOr=14jssLkL1J z7(!}`mQrh=6hsG8cT2%h;RZdV*v(S^Kr+&%FHOOtH!+1{)I14Iy)FQW`g8{-e^pui zL9mEOO{19=#$QKoL`2N!Rq&uUmHIu-#s(p>JTie^F@-wEV^AyS3BQu<;O z(LQvPBwU+B8>r|Ms-!U# zlo-U)MQfY||1XZ&IPMQk?{aCZl%>nK7UU{IN-;PtV=_u!CirByuwF*SbcOO~*A?P< z*o7GSd%Cvd%W+rirFTO*N6FV%7IGm5kzG6om%d^%7t?T1Aam)ZFqd0PRSY9pf?X`E z#5HK?JcclD$(8OZy&(^ip|-wK2yf;sA6q{_OFzSv+Y2ucPmm!Y6;-`0&!l}v1Yg{dVO;1%DI-` z(tFGA8FR*xu^ROz#`a;vSZ2(3c6?)Oki5VyLpyY=0~>3#mAG+s8Eb&n_0L$)EZQ*ngRkCt-o~3&nMvKHS{U-R81f+b zon33>(OU{Pq|)cB=+AID62w+qM(RlV#q;dnXFtP)+b5u?nuH6bgkvJY^H@`!5HalA ztlFeWP<4_nV_+E}DALeU-E1Az)P9mCHky+YsugSv&xAwOF`k+T$AzR?%a|3sf;mxL zSlz>HF^8Bbq$8>=5}z24s4n&hW2sfm(H;2jM8UDKL`0<7!h})Pu4#!xqMd5ZX(r(^ zZKXp~n@Fl25^NK>9hy2r(ga;5&ywt- z$;oz2|HLF?)yZ@+P*SN4$S^)tsl@7Ys#YwlqZ(?=v8ZZJ;m?n%=1KbVsw*63+SYJb zqIG~)e_XdTk$D(C(+wp7V4#HP1Mrrvfs}o9i(7Kmz3I5)Sah~7I9uoZvNNbSgL!AL z;BU`b@6;AH^e7v4DjN=Gt%|dKsip1ijk)X0XIO4IqO=@ABzGaOfr48LZR_EzFEq8{ z;VLw@;Zala^!ed(6p9Zm?3Z?~W&PaQoLvd-Qi8(|&*on|oA;lS{pS?_IYh6e0%vmP zwxKnoTQrx@p?p=k3#_ZwjVN|QR z*n?-fdM6OSoRGozTt8?*~3)E^Na5G1$X-&U&w9Djr`H& zdzWSR7R9|K@7_|*zjfW(?OM0K;Pxr*4X9J;Wqavm=jRSsV41U1^8nsFLCu>z zyj;W8HB>k8{!Tf#M+xqc-Fp@H-n@Hnu@ay8+vi?V)^AtVA9ygT96YNWjO3$yKAFn< zCuIMG;-6q0(AqI4=JqIExQqSMdQNUVr?gW4)IF|kP-;7r+JR-x=xNG|>a5o*>N`Ev`9uth}n(tQk|rx;jH<1|xJuU$}Ww zS;+v#>}!^+w7zJ7tXJk@=DF`xs4Ebrx5KyyVj&r1i~^Ps)WrU4)!X*wtfOU{g`sx>-mnbui4w9;x!wW2l6tTlE99Wzd?O&LeVXaBtU z(y3d{h}2DNt>|-!r0Ewg5_pNgc>?DMyh7k*0v8Cp3V`UB@K8-R)!GU{va~2j(MViS zJ<@n=G!18j zB1``l;1*X{*Pit}c6;AG`PRuf)0=1RoXMIB&Xz@I$AYtCE;E_Znd=}`?Lg#(+zI!wC{Ks)QuwM!6C(`mX18<(p9+~Yac$;T!vype4 zcb(Zo1$RK3Zn@pL-SejT{c_h{rE9P3-lw?t<=I`TO|3Q~v!~j=9=xOsUZTmlg8|vSHG5QU+&X(AS5qu)Ze1=Ud$%jz?b$t$g@sF(&H?pM8)farIBqdY6(hPj??Y>z*#L6$)!U8SdTpocW%t4=ezTs zLD@5?cm@|eyB9pWWzQbPv*-SG#d9!gEjZna&ftPGm}`=qy^6CpU%FX8oQYPVgTy@6 z5d|RjT`=vpo~jgZD?6itW>!!-p2Pw@N3e)`2k{l#i_@S;RrJ7|95R%o)643PS)H2k zRn+X8#lmnf=X5%Uw(Hjl8iav!MW^=PkZxJh#O@WZw62b3YlSXe8|f);nT8n7Tlp#x z5bMNyOqA3G@k|9iAzX1+bfMf3!sT@Fyi^l#7he00o`Pabg_DLj zJB(2(R;nj|higo$*z#Zg*W$}FeT}LOO2X*4Y7?>IBq3)|Ma1KXpw=G4yiZ?%mNzA; zuF0eX)s`?VsE+j1gb4t;w3;o(nD%G-|1zULI* zbJ^kS@Mpdj#n+V^mVKKQ-{wW%&IRAj`#WUcQ;P4YMc*?EzGohqWZz-McQ`v-sBd`t z>RVUmI^Rs*NoFmO9hA1d#kT&1w*L9Ea@(NNHkhqnYViM|G`IWS&^zC~``!5lxn;}z z;rnK}VVBadYnijTHZp*f%ICkkX|D6`mYgNWzdtq~dH>4&-T(5yPYyh^JUsiTKL7G7 z%5$$SKKGr4=f0DVj3|-ueC&!6xhg*wSDuT@yAsN-gxs5ug9+J}MAbar*&T09W+#`L z+LWg5+(I2^@MrDdI zV}-)XVyKVKd#+WsvHjl=uhtBsRg6BN2B2lOoUUdiES0Vm*s>XF2b^eEbWnNRgMXEwOYU zjnhp!rK|A$17s%kGFtaIut5oIT?`B?1P10MIj~C!>|(l(rP}(ebZ6?#gLe+jon5Hy z$k%ohe9iA}{XR(II|FwIvcpU6hC5)B=PulPeLfXUxo7Keic}1~6U4&>5>?g6eD|$e~Wm zO0}bBS}y~{PbW4;qGEJa?#QfBmib#OOD8dFlx6uA%hDHuHOjJni)HC7GR6pZMQ^3n z$J3|@@T@b-#yE_SQYyW^%!iGRVoIHz=4mO?Z~*9yq%HU&PtMtJ?w-pa?QjrFYL7_7i9*vseCn>9Os>`V&r_7oa|Ftq zC^4LUHjU9dK|v=8=rJXlzh&N1Ou<zURQh9eMvz*?&~=A0=YAA=mg$ zGFwZ!+;w+P<(hsJcrWndb03G~&V5SfKDp^>rRixD;95fPTwJ<>i^`b6J%M#7Z@5CXJJ7|Y^~uh0-JA#m__gGF zW2q+YRqoE^8{AFHtK1FKok*peib<}McVq4T9=3FiW$N!mODo3@S3z=Dj3mQIt6V_7 zE@pW#SP_#pJ4+gD)vlSI_qkP60-*0d=5(9$t90(_Nm3ju7$=fxv@uS?2$dail=wJbHTGU@7cO+ zwloYbHEo?gdLNYZfYNjz&+f05S~{UcZa`KgP-tpfZ0cWV>X(}al%|1v>BgQ=!{Aq6 zeYNc1y#2p)b8Q<@n?AXuvQ@7%NG+yI}t`J1wH zpW@t?cka`CD&rs;ZV3mQxSw*~Lk;GiHgp`?Y5VC86X0Jx7XcGsbk$|!A2Q`c6*1Lgt{|odzx8FK3FN^C*ol_-Vdo9@q9Id|G7`7NIh_kU>2KmTXhQ9ExNTG)?5^UB~vrsSm zV+@5d(G~BJGem%GCo*bENww`nourCCfS-|&QljZ~Iw4(Qm+{w>qIWeqYIbd*{kkpR zh>l`|ZPB}F!MjQJ_9@=JytnVG&lp+O3E{VsolePqA1*d{m7q4oP zRk@#37DKT~TCp$0+sXoIRfv7f5Jx$r3S%&I%jv>(sb2JiF&oDBM>=WOHc!{Ap|e~< zRShdcj5Vwb`PORK=Q^WWq7#!i1L{shU?`S?zze&0)fSCFnpSPhzD#X|Nj{x5rCr^U zl&v~o@*KV%iQ^@5lU_6}WD`u!5nznmS0~bLS5b=Qh3l~>>hXB}N zBDPYsk-?iJkxavc?$VZyR-JP4=~kVjv5J;MmXK5$U<3^kb3^;w3xCn`Y5&7XdFZq< zbUJ@Al=ohey_Xd4C1??8_qlz+**6n{Y{NF1IfH|x=|m)WzSzyLQ*o_;vTyjW;Zd@8@#~o zZhh9YZ09y@)igNwugSenE4@$4&aSL2dtwkxa2T8TZQ)6SWEV7>}J~1hUG)}@Z zyVR(@N}!_DQ*1PlCw*YxJolmL3_J7qVQ~YOVecV=#h()R8G)YztlYWP5P`xgc5Zb) zD~kGpmLKfg+BsKU*2-#a7@JlEv)T;c1!I%T;kojHeL#*Wf&60e}kM> zV9Lo#BSe+0VoP3iC|~@yNTqdZEn6UnObVh%E9B~Ci|;e`rD`(M`2eKQF2l68tasz!k^3O zaziD$GOnr|Ytt>|@Jwx{2F5B*qhKMq*q}q3nCh$7))7Yf{sYB)NP*_agj#Rp#zfL=VJlfXjH=qT7q;w( zpHdJ^{3k>h?K^OsbP}idRJRr-d_4umO`;v3@}fg;ngzWK|LMB&iUVBW^DHGxn8$UE zd6hGJEZ= z3l=We9O<4jvEPn4{ARb@ty}a47renAHs$yqrQS=)y?d43z1pUS()o<+eOB>4ODR`o zF}vw*P4>`ZEQhau|MlD={u!BYEeD)^^0xFDiizhl6-Q+w}nK!Ry`rO{|7Xt+gL}u}DkT88D*1mi zPB)lyh_~`uVP!;ipRnM$N8Gx#!cGzW^Z?1lKL@~&ouY?@F=whN!N{VKKXcQe-$**l z#F_FZTm~w=w^NgMA}7X8!W{kVottybbJy;*W_Q4%#a(Bxes5bhZvx9qw{r_ai0X(* zVPd&1h$?bX9XL!il9B|~N$0o5CMKn*1OBCpR!oMn!zh=Gmn%^=<1lD=9U0=g^bt`( z9voj!6e`luTw_nBihAb7q}yQF{5Y>>oM>-PUHHY#CM-& zTxs117D^VUp~l3evy0`(T7uGDP*1rH@pL{WS!?ju>x476_78x^k91rTUxeY|I3wm! z!i8hdmKN{=(GF(r5>`&(8l9=rj8$DYsfU9R_z;8YgvG#h%zM}lpyQZO0X>H5(FEME zB#g0R=4u0deo0%CBa_SzR<7`3N*D2epeXH-hh42VD$j*0UDSDJ_#mH^|?htp~j1RPK~H3o(LR+a^!ze&=&;$Hvt+z zjC~is!kgMcc@{Z-;_UVOeH5T^`z1f1h>>pY%pp6c-5m9{>^o zDPM`Yg4l@SrBM|REepgg6-%nn$5e&vrD^t8$OFep%1p+Rue0E3DEQiN#P04%rD;;8P}{zpgt`lNE|N};8F+2iz~2!JmMWbMm7&g;+Ctz69X_5B~AfkX!@ zgr!QI$QpWJvJ@x_L`3-vt_snqN?5`R>?LlE2G^I$9A=+aG3@jaf*8dO;-#9?Frulno|?o1h>I15@KV7Psa|%l zmp(K?-&+IcJ>6C+d7wJzWoj>JRB0`3nX}8D4T@(&-noHQS<^C@j~&CB#;|Wk^uA5v zQ%E4v9;dh$Ky`$}d@72g#Iy?~4iliUC0-*i37|TXsW5%5M3X+=A%7BLG)*jR`-uO7 z0IdNc5l^Pq5ckpBQ2?apBcd3Y5+9Kl5p3129T*n>9r^rw0x|){P7+51QG-5)6~$K! z$8m=FZz%A;5ukY>4iUg7B~c~;CxBX4Jk%Q%;_+}8h>ai;L+rDfSVH(_NbjZqGy3bR zTR@!lTQ)b@oXZ@6fqq-h-1TJ+kF~*NQ@yQWnbSegY}~6I|@vDQ~9ve&rbkNNWJ+o}_+h71- z1l#Lu`*YT14v+gd@`}fUo`=?l{KFb$=p=JjgAR+W4)k!@LeK8a+H>cS_B`^O_B?xm zJs-R3v+3D$cSCs$IahGmhBV8Z5!4uvV6yF)OGANe+i~CgiHp5F5FQ?R)cx>;a_|M_ zE(0A&h`wz9wdz&3dR=_Br>GB}n%m7D^Ktg52AfT`abUj!+D+KhCm$W!IlFJ!Y_fHd zZzt8%cl)yDp#@oOTj!1}b9m$~utyo_u@SiJZnODwqstr~_j&esi1={jFTA2Xqhs1r z!f=FtHP~aZZ6>4_0r1T#7Nm-4KrgT4Q4V5n8l7O2Xp>q?t1LShFGwPhGxn`H)eaj+ z+J<4%n@vA9#}_FAP4D7vPLzF_R3jp6if>@&dIplofys0%E)7(FlvE^+OkKBTEI~8+ z@oEvYJgA0@Gf~^Cy$AU6Crud#qc#_pm4V_j%<`zDG4wGlEn|Esis-1?RQY|aqK-yl ztdkn33}UBYrGXp8uTN^PD@SxZZDu|ntJ^_TOkjYfwHs@En3tV@8?fWt3%a`r~ z=f1^$1+Fb`ybD}Y-gpS=ctxEH-1fZjE^tTk zUw5xo&wY8$a6{{Fu3a|Qn(*1cm0)(y=j8h(0ZcJ-?d>)#Zu_LwgqgQG%)aqC3;A-5 Ia5fqLAAGFjqyPW_ literal 0 HcmV?d00001 diff --git a/TTS/vocoder/models/__pycache__/wavernn.cpython-311.pyc b/TTS/vocoder/models/__pycache__/wavernn.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..26eb7f3e17d1a863545b9965c30d4ba4a99a6d8f GIT binary patch literal 40105 zcmeIb3vgRknkIJf21t+~K=AznMNuLpiV`VGl&F^_QIz$fEK9N^$}$WQAPI>CsS8jK z8gy%B+%u5ET|+8*1bfxn=w6S_UTd@LTK7iP@h0ktCvmE_D!9y@5Q~g9O0upUCsPSM z_9VSsn@Ya_T-=L$K~gQdXD3yuTwI)c?z#VYpYxyp|IdHU`M9X4fWtLBbY@leG0T8u%sOCY zZn{z1KmqexI8ex*_OYTty^b3z9&j)S<7mm4bHK^$rqRN&qJbi2H;=l;N(V}r-7@MP zD;p?dcI#;QSj9jEv)e{1$EpUZn7v@MdaP!khS>{8Yscya>X_X=T0hn>(7^0Pqn@$G zfktL89$homG|&J2>ju{0z2qHkV7;C@%=w+~aDG>y=t_%RC?6R(?i2i_ zYoL`ul>*BB9HgPah3K$q%4d7}6PHn0`LgGJ> zv%n3sGsrqX)(4ujkOQ0jy3<^1!&d~|->T2p{Jw~9aMTwLXG}xEz^K2~kTLcK2O}Bd z@nATTF`b#57!9=QGUkKh*Cv7y*z}dI8J_FHc4S{?ooOcp=x4 ziP7byl)I{#rr4y!SPr4@xeeiJWf#dvB^>24X-~iIPRyI3P%yDcPLE$ z$iq3`^}wtB{S;p~KnWl9Un$QgM%5j8fw@#84d@1p+UV;h#Y5xo!Z4WufTY~MO2@zp#Ml$Qtd-_M8Ar1 z|A70EKB}8SpZf^VpC~@4b)p9J{lC!_p{xzzTj7j39Py7&qOfo9!AJmkfCC>44QFiE zeSFY&c@#xO5qUhn>DzoZ9N@#7Z~4L_n|lM{tC8`E%?HQFhHjlbvUzYa96_c-D2edq zGiOe3zAlx@<}tX8hBx0pN%Emk`@}64L%=u2*CE(S{KKs%g z`*ua?=}}!2>GJCz89qWye?qm*t|;eK?BCg^5|hR!*}%OZb!VINl62Wv;h z{;#MI`hKR*6kbK;21dPMwDyega?lrUwPj4<$%z1;v3kA1P%z^4@-{$IXAWKO@Mlbe z^pqZ*)R7{CBmLw27(NwVrWdp&ys2M02|wQS*xFxRm@&UM3QIbuRWej zl^qhw4vA%lV#b+*=X|NDd$@OQtx&UBtl1nhiq^`f%Un{{O5A)s;?}e`N;PY5&tQ4L zMRn8qD3^qN`4!EpziIUImD}KQNKO5!TA{W@un?8@r%u805D~Z1-hKq zO_A@WOciAt>tJAX)a(73j^9ElrVjLy)M`Dj@?=mm2_J@mb}Ijt^d{4mC2j?y*kY<( z;+|))^sSg8e%H8vbW6R>l5lr#pV_UPYqgUQNvWw4!SE zR z7mcaP<3i=D~jQ_eB>ZJ&3fE_2Mb1y|5rziE0h{`p}iMP>-Ag|^ryDnPU~ih zzssm`n&UeWBu0z_c<=|w*h)qh8HdPt8Af)<;J3k^(G6)v3x;>6oVliu9V&JKHXML~ zp#r1C@w>+p&O0Y(PD-P|$-5^L`vrTwXs^#7Bb;TkHB_Bt%s`!%IODo_<2~c7F}9yo z@w+GPo=7wb_FB&YC2kedZ813s z?XPY~5mWGk_$$}vgvD}g@twx4p__Ih7{~8&raH)j?PiSz2k|_m8^Z_bNcy4tZQBhAQ zAT2#&_LzNn-y~emilr8HO;lYH4$Cj?i1Ip8IIW*HOdF?7@IkF)+2zmcQ>#Gf^Lrk` zQwmibsNX<8j83^irZ*}pc70^Vn1{b~fj-o&LiN!&Y*dFsV|Ub~4vSp0qcr$1a1j4D zuCFM5HXyZ{LEsY)VVewvuT2I5u$uY6M8Fr3DwC=;W4s;=+yG`EVlmtEFK2Xv8Qn-m zcOy&*mWBk0t|IDd%9*R|EEYHh=kP5Uzyf9E4_>?f+P(9$=kFB6bg_<@FYZj+i{3qU z_gF$tsA19GykKum`UHE6Xm24zQS|Q1cVCX5O6U?D3ID^QP2 z6^sI-=_R0y-)>4#Qv`$fE0^3#z6&SqI6EQ9Ge8vH80W7JDx?mARl33yg8tjU5UY?r zc}?E0t5Wg=9HKPns5!q$;wu`abVWg^<;cLOh3||w%`t?z(-;g7chxF4nzcFgX zvp_?V0#Y0MbfG5BVmV~L9%0&|1%A_`ygW|ZqxLIGSzIZReI%W-P5ty+qBg%iR|8Xp z%LP%W2PH>Im)f^%j}~gtm=2}(Tm9Jc{mlF^BYLg<{CNb-a16hnY*ZJ0pdQxZnX!Pj z^h4;yH{;|3-UtMTM<5F&DJcm^ z!8Y>56}A>jRJ3gfSW%*l9h2EUD5WRN6LRB^kU>-he}aswFd&nm1oMPZ_y9RrvWdwU ziAt9SOKK4k2F;r4VY4!?zbk_$Wih-H0~b)$-?%wj@k#;~h(`9zH3&63#hRUu*N8QH zn8YMqUX|z-YukkKHX@zz6F zbj`-OUa@WWW1q0*C2`G5v0iZYPpfJk*1g~GumM38r_0uYO0u;uBX%fVUXdtEI^VBK zo_fD7Rkl9Xm#(NxG{>*R4nHk#NScz@gz{FgycMsdWwSdIr;@t4>yK}IaW++YS|~j& zmY$CFGD;rw-J*TXf_+VL>s+Y9k+$dJQstCK|};?-i_sD4OK;^+PxB5Jj=j>(Urboh&8jKM&+ z%u+wL2srBS#5g4HA^J)UAWP$BbYmFaHrHry{?rW=S6kDEt>NK>8Fq z#6mDNW?MztRsLY({f!CVz4qDmMc0M}*M_+c!L>D7ZF=t_|c= zmV=jGH{IWq)Ft7mS#&kW4#o~XbCu&umXgxl>7VHbaRfH1o^epjSb>cydiTiPBXQrI z<1^sQ*l&M=evyZ7&G{c)oqIjiurpP)Q?&O;q}E$^-vSlp6C57V;Yl{79L*_f^K+;x zqIq~SUZsK{oT{O&s!Tu^D;V4DCb0C+Go&~vWcn+YJnnxNvdSvgora#G7HPo$n|}`D zZ-br!O|vS;MNB*+sw<>9T5!AqDBu zay+awNFBO&RXsq_m{B}$d%XLL+SIuLq5Ps)ev!V%@LP_|30bkqyT2TnUwBbuKCO?a zP;ZWN%W-Y1@@Oh_xzg+tL)J=FA+5^gH~LLlL^hp8*55NN8q~dFT4;t$i;NYv0?1gg zuY`>KW@@wyEx%xHvS``TpD~XHLjfOu1`|zoMk#rRe1nm3K4YY5%uJDQG&mgc4vu58 zix20J_;A7EKbqK6@bNyhR@M}$i8PG zZ8p6Q=64mdq>zyJ^9%`jQ}7z(O+A*bZ~irm+}!e6%(Tb7sRTWnsD4Tj_wC#p-5n(1BrZTo z>BsTMo~+V?ht1`CeEjg_7}%X~dqDxCI6{*6-qV7H-P*(4$i07Z>~etjj1PH6f`0Hv zp1~1c2(!dtPcYF$=$Ka=4GyyA7967NquloF@vNuxtXGw^k}sf?H>uXM1r0$@e)k|$^@=3_ zk%9-_fG^5fFoZs*boC)Um}8P)X)C`yk|_Tu}EUM6L9$I6&O5G`2DTr z{7!o5A%o`aG8V7bKRyU$qPz5Fz=)G^c)b``AdaJI2;7u(3JD1jCeK*#>VuT^~vVcjw5uhIV!9Q` z-WZ~lli@1AeJIwIdv}!FJ{T*#ef;+E-#eZztGIn67FO~gS)?XHg`jC`b~w=W%`d^; zsMYZNx`1IwQpVH2V;?a34N%52`PchR@8|~1ely-IehW-%z~;BUlc#70LdoWD@fW-`OWPoUc07^v~D36u@l{Y~&+>tBm9u0tpr{Ogwa zu4l@T4ZsZ>GnQ<3&zOPVhJwQyoi!T@f}xZ@1nT%cB8YqeomNW{+#Q9^t{hJrAv}}4 zw45kkMjEdx`I$ocAgrM7&92bVmErvQt4u({V>(3x-?<9BQGqAnNiO_q^>mS+WMjioIv?3e{{Mp6- zDAFL?IW9NRqX|_>v6dt8TXNipIu}=z=^d%u!Uab;zg3$Hl*9U5yqehDvb)ArNrT3< zY(?1e6ph=wqAJ_3aBHg*1T_MtdT^e|G3Y9iSVhz8IObsmQeX~p37f_l+d$3Cr zqtYT*4rOImf5D@|WtwoFZ1CK73}2I@?+{<(it(*nf=a0%{MF-IBZs)G?3&~+n!K$X zmlDeNjBD-pj7!0l%TnJghv>JT*4EC-IZ)c$|0mj4Zk$kB!HVto`?jZC_)5($tAX#^ zeqJQ(|G@2~wRrNcfRvYus&ZV{Nj^9ngz(if;lr8)&nC}EBr*~1*}U1u-wa-FALoZR z`!0t!ckI~G*}i2**Vb(c19@_2C^#5|&mb)=BDt5uoaTZU4n{^MFSkRSw0VdG0OP!W zbCy$=1Z2eWkw7gc8Y35pw(_)O7dQ2IE@J7-ARn9{Hs(^!=c~S8IFRS})k*N-V}V0F zKhCe}@z71S8Y$1~-o1N0v_8p$MLO-WAVEqC1ao5(<9vk4`@rrUWYnKh5|n82olW>2 zEm%wIGC)aOsKB*I9$Rx+dxcfT{>$LIAZFJV&*=bWK#@z7JPQYm_uxn{a6RA&qXq(- z#)me61K;FBav|1Y;zNkDe6+%=&2v2%h7LXxUqk|si@@$5A0xiqa|5+SK4@ha_=Cnl_4B%FHN& z7#s-&QS4zRdf@}mbfJW^OtJDnoAcZl2}0OMDU0Aw&QtgWBVqEr=+Io>ADGVVi*%Yl;jXHj;v5urYiJ#4!?4?-E$hN5kr2e zHztpQ&YDsXdRn#-whmiIa8fp&&$?6fwgqQXP$rU#1Rl@RhS9KUXTS-P1<{%;+svap zaH`95vGI~TS%5Z7I&optQ~A$HK!bLgkDdO!Q`J-Sq#f&QM2nYkFzOnNuH`Z zk}({9^(+?Vg+d+7>}1l^jA3Z7gN#lxw#ut6k`(e57};rRwVq%JbvOj0=v(|L!5uV6 z`hhR$Gzx-=a>42mt)7(C^JQUK+&>$f4gJZWP`E)X+(4>({kn8rJrt>J9~KLB?NI&e zkM*aEoHG|@-bh?uC~8g>HK*&EwC*<+iq@ry*1@CX{-vuv)v)`q|Fhs{A;EP*be%}q zPkia}q+C6?liNSsz1Y;f(9|t7?G&4KO0H1mut4zo&?Z*4&o>H{Tj5ajtk5yDQ!H#= z;>vBsUzWILi=Y7>6zg{gB|F6uQfn;P^Q8RqT5HV9(#-u$- z-=XuUd{kSO>1Y}vs&KC8WmxEf8}6(u!UTM7+lp1xjdVHPOs$x3+W2=IpXm;lR77@i z?&z7X*ZMm0jwLu|O|EHvj9vV@Y&!wNJ65%Jz2B(KU2op(1r~prHHu6Y&DsQuT2DSt zuhhECr)vHpYVoU-Kk|Iqmibh{!lzl0TITDr7A*6rf`w0&h&M0J!c|JGO;g*77$2=u z5n0P6Oiu`gF?;k0{?Y{?A@}VpMu-P7RK)T&*;sww3Z3 zqSPa=SClm!FPwVCM_%gl6a<&2ewFsIdddq{PkBTUgkh3GnF>JJDpL*G)M?w4_KlUp zcPrDv?~yWP{_wk{EU5@OpXU#=b_M-xNed%V2bJyvPe%MHa;AJt^L4|J#$U1{+xja? zn3AIJJ^f4(7Fm)S0M>;66M7gj`t~guU63s~(NKB6DcDUKGsGzGoobSay{t+ilmx~> zREPJ%A_HLqB`)kipufJ&rOJ0n_uQ#?WOf?ME-G3SH#WF^g<*`7sQe3LQ;7GU=`se8 z!mVZe0J;AVM#eN1;K#%K$K=5D0`!sbjM*Q!9vlqt|CC&b{N?)y)-Z^*giO)?=j8U! z$jvnb7BmtFdBG}X`CI-|`nr{jE;4>W#y=(BM&6qB;g^T);j*Nb! zlPPazEJU{l{Wn3$!`0V`|5p6FG6o2s_>%}IOjzShl94EvN`c>zSFGh!g#QhODHI_V zA3rePH)F-RZPpeuJuPz0T!gOtA*ft}e%&S%Z5NBSFBa`wDBAhB@v|18=%841Fs6S| zxU{?S!HN4Pl4}L`deOc9jxA=44Q44{4>qE(b*1GxM8*B0`LXWjEuT^y3gx@R@?EjR zw3_}D{jfxme%;BAhdpx^td%K9)pVxHIz{K!*nz)$s3j{`;-0xBI@T^3^hLY=-l3Tz z@oNv>y8l+P^WG0;e=uj9yPPWTn7@{??-cA-mM8lG`+dTHv7n_LQo0`OxW6N@|K6_IUGml`&wfY$l>eJ|Zzgni-kNy}`|hwF z-#W7dmyYl>4BPwb~MHvJ3ldKPp+r~i^LN&fdb&(R~BbQSP(9#L~c}>_V3QL=YmFcZW zjuhk~HM!W3#}}Yyv@ztU39Y;Kjp&%1Vu4?mRi-EdMW~GAxpa_H^>Y6!kMsV#OjzTA!nx5C;iR`+F6u`M6WN+y~;&tz7on ze)T$XJY6tdIBnOo&D>OOfu^*SNl$HwCKYvnTs~Hy#Ebq8;wWA%4)A{%jz3Yj<`+z} zLz90+!GUjryGPFL-MUz9tZqmT-8gIHpzOMwM_R^nmZ+ig*HQO!al@sDckAT^Hh1bJ z{#3K3(YZ?cN@MldQqT5|HCmD%>+8UM)2`{#Y4>#5D!!{UrMO1UQPdH2MqS|YjE@Sm zr5UPN-3Ms!W$@D|*JP6%uKF2uX#32x18YSsh`C4`Bls=2F@9^cI;vbXO{FWP3A~nL zx*V}tqvg@!yx58nTk-dd&An=DjtI$pv)7eo*@m)NC)?G}{Iba}sa0xo^>Qft9w}R| zPLXW??I}yP$gt{XzQ1W;ITdzpm0|sb+0o}tj) zqLs9Lz-xM;-+IIc>q410xUU)YuGkZtN`v`X7rrmLYYTb=Aw!^n2+ z7@j2FciE+dkyZy^AqJUU{vx8dXul81HvOEou1yzf#VgZ)c~srBBUBx^D#wVJu&vli zeOp_?zpukM=b$Zd6?NZA;KS{?aB{Xg_0hs8B%#{vigXw^XZ|6y%h^vGx{eDKHgO$T zgnvVS({O>iq0?~}K+S`mM_I_%6-A9QYSQNZMf)$dhB760$lT?Xcgx?@Ppk!BPH~z-R&4F4l4Fy9RqgGOL*RRUr~XtE znU+HWKG(*fVE;!Y+FAt9sA_>%XbC#IKkWZHrvmFA3a; zn$7K(L#t*YBdvNik4)QhVYh6Bp9Umx>Qpk{^ul;gpA|6Hl7LY?^Kl0|a}d6ur(Z`j z54~w;5g>r9W2=*-rPWLjY2<$iSpGjHBMKv<^Jn+ExSgGD{*1zEmPKX35VQ>fq{gs< z3~~So5AOy1uOWl1?U(K+y2R340e<0q4I*XH*_sCDRPZ>84vq#Ap~x?PG7 zCa+*3Nli%ryKsaxzag~pp6(d>KgldEl@iY@m4#d;YHIs6?g8+{j9b|qL6X24SjICt zAIrN|_G;TrI$Jj<_`OqWpOP^b`Hp=&3a(~Go@0*U}etZg>lIhs5W8TMpmPc2l zZ)#J@?tcdF{2!C?kH{Dz<3ku<`C(w=aKjDX^^BFRz|{EArZ;9p^Px7Gt0V*slp zGe**L%Giek5$`ZR>7|WunfO%_wf+$B8B1s!-+X?a+9ywX4si1Oc;0s_Q`m>i*&}D5 zMKsRyzoPFyqVGnOLnKq6lHC5OU5DNo5{{*XZPA zzmec4V}%+L)UKezV!$$1>}5dfFk2g>J@@{MAS_G`CV-u)BBTix$yl-_l`%_;m0242 zNUn}ERy2_SEsM>VFH0M_TA;N=4on!%|5uckFUX*_nz51i9N7s*GKMiPw!-ptFtd}H zTbbDf6RQ9xuyq4gy+*?P|H6Di`3gEwLG0XM$rfeC$})?jF*L&ew-gOwl`yg2iu8R2 z%S$i4#ANarn}S=WoXdLMF6v@E7Id*+1Gzvd`>yS-?M@+W29kD`VnbQd_qbO$TNpDz z{JH4xEMTu1=r9P5jiO^?%($ZCmyqgTy5pVkE^)m&8}04K9#0)riM4;C|FA&vjD2*9 z&gq_3)QS}?b8excU94zdtms&%==ep`FZ_?Y{_}mm+$ZchF77%mY(F8=eMaa!D|Tkp zCE~_sj?$T1iT!uqj=i0(sCn@A{kO5zVMPmqiyNW-@?+Zvw&a@kiyjuu>4eJGxigP0 ziktR|n~n$-M`ABOE3J62@BY4I*T;K*x_7>D{*=(vEjD#OKJ(dy&n~4-or~`iO3#a> z&;y~Miasbx`acf+G&Fzeu})amBd+TaDtC#MyRa!lZ@lT516zI>iVo|Lj>U>i3l*E@ z{PTT}4+|B&Vnr`fQ(7M1m#(f~tlqLvy=A_1e(3SAP~9h1_r(uBYi$0w>Zesdsrj%5 z`?tpW;)fHbNOE73crAJ6!wX__x8U3%I(IBOUs`a!^u#SV`$T77>;QJ`{p~O%WwV8e zzT|$PWSv;DZn30&p`?8-B9wHAC0()otN>|BfzubA)M4&q_C#ElmIoMV-1xJzUv&KH z{O9NKCe-$ewf%A1mu1z7?t5>{zOlq@D0)fvH8bI{yc+u%(a$H1ONs8;vA6*RJ-bsZ z#pXmh=Lub!p>PRJKWW8N*mPWk>E00UO{`5Clm2Hrc9T-eiIjWny+gA{64#PV$w(qJ z--SOk=Y}7 zEvt&3Ni-&2y?1f;VqE{sU6#<_>x28Yout}wG*y1;y})b`Y3@zh=B{BHMJVqO%R696 zy>cpUko-3#_-RC6F0MRp52>#ZJ~5ys&wPiy8Tbq zEgpDv;lQh@(`SSOXT<|&7Y}$B4tP^<`ci{_;Xpt<5D@By#JZt)U)o*G=FQf9yz!?S z=Pv(b^M}Ypr|9lXxjWO9^^27)3zaQ%&bj?UWjmD07Avs}Me0!xKpUx37qi87lXo{Q$pRV zsk7&Wy7Q_u3+`6Y-I{W@=8DNBRJMtgZSno_{a;qEORevH;{QDOc}S=}D^{OPxz9c; zb;rZ^x)Z+pyW_jjSQp)E7u;);r!dV}{bf^)6tT)XIOUvRdAsB9FRU81uqW==aR9#r42 zzE?Y2i)5U?1Y_Gi$t?A z2I~JJ;D1lXZ(wAsay+y(sT4AK4339E$H+91q>u1daF{BUsU8)j`D3DcuHh?)9t;S& zdxw4mZP@p(KD;`2NT}}+N;~IY6H0eI_6wy4(1fZQlfCaZ%ym85C2rg;ZhTp&dO247 z)LNXjpGp@M$II^wBr1~Ig{oFG@EwoeNZF5nZ8F#jzU5$mGEf_gEg3W-YWX4tjJ1uQ zB2lZWKx$hBo2>LWIe(B3J#28U4!A{$-*{$v> z#mez5P?6J5LBR{Y^k^HbMG$<{#$Yk+l~h_7`H(NPq5_Ee^3HCU)~&MLaenA#t>7W4 zE)zKl^pq=TO>r8s)9?u2Cryrq{dR6(|+RBrTidNz2nv9FZF-T+pPjpnUOyy@BFxp;8| zr8Cmm=4o$l_l$%tT~ZQ5rFd=xk{u7rHZo2tW3FQnA6C^N`zzLm#`mG&o@@&DAcAc2 z_H;;pBj6aS7as?6q8`ItZs3qyv$%|a(nKqu`7MjybF|2Jg(kc=l}{E7^ceK27}t3yKj z#7Oi1CmFvc<0OoXP1?Jf_;9P-{;)JfuE>lX5*3`@f^+K##3n66L_3#uj5bR}B1z7(bTQaci1BF8^uyPbxpepvSjA4&1QvA)-pFVe}gvAckheY(iNWM;khAUZI@87?Xg9u*asrQU6-^A?zWf> ze(V^F?(U84O~c~6(;qvMF0Tb`)BXNiLivWd3qtvJ=%|&rA2{wilDcH~d_~+Lt>1|4 zmzc&2bLM&H{JEdk2rauGj|k4gqVw>g^XP)}=ocOTa);nNBRbE-%%GZF74chwvr%+5 z(g3xQ8Q9epgJC)Lf39i;ONO1NT@{IS_u7*k$s0e}CAiwL*RwGnA6cO zv01F#0M-@9QpJuuBceze<lGZO- zuwLmPM2T2;MZ0xLVx>Z9$3f}EE-ircj*NImx9~)&WNeC4M*bS4ugZtu2#z|!F?NzC z$hstZ)=`3$9D@*zXABT%f||-oHN#Z$@=nZ8VOKWLYF7(C1#0PP8^GckncQSS)(B5J1SxhNqU7jF*^!JFRjp&Rl5|FiUQyk$(z#QTaGz+Mkdcl`ARB09Rg9|kh*P^T1%_{` zWh5#{c+Fnw)~TgRBns;~$77lVR2L@YKy^UO4jLbR3ba)TO^ckRJ+p$w17 z8KV|5Y5`SZ>gTDn80w`1F)s(8)f|xAaLOzIa;nF9s296=Gla_o$%L$Vs^5$*!h7k7 z9bs{3K*kK=Gq7uzx{Zu7C0zM$;4pVa zSCVdMN?!lq?Zn$Sk7$3waPRQF!g%3)$9(wb-B1l?@~u-z+TGsSDmq&ioox%wwz*S} z&I``%qH{a3pN>>%bE=T;^oEUufY@&^aS&3G!|}1?Df*>K*J0;+Y+H}Dcky#~y)kdP z!J`0Qj$eykmQY%J2hKoZJe`ET)Rn6=yhrFJ)bMib8q;J$^Fo87V*^2GGP0R;*Rsb5 zN~{)ZT$tko>xVEvxa*^425DKP!I>ipGQ^4jB{h%o_Gh2AYWypR5y#{hqn1^}0Nf`E z?6gf22J#?XeiOoAo8eF%@)~o&bNv8iuHdIo<3~Z)&@;lyiRXr&gDu4Bnta&8D7K3t zBHoX3#r$0U{9=K2elbh%>kHv)1?WOGv?j$$jYLhWmMma$x^>VdBqAewP&kS+tPqt`5~Ile%gW(%Z0I6 zKFe8PnKza>OH(Mc=p4(2eVz`i!KQhu98a*Ey}0FRVY6Gevh2kHTGI_`Gq;o*l&!S% zJiA7F6`E7RA}>afYdH-W|Hw~2O%d%0O|)kxi8K-Z2jM8>66n$9lz6pVDQc1-rxnj^ zmKB0_)y%D$uN9e*Ta4gGHFT^*zF)+L&ec9_RnhU3P--r;g#+yx>IsCff5G?!8cf6^ zZ8ieRnb;S$FGY5RzjlL=TvBidV;eOMOoA~+EBk^Ot<}=fDSN?;SezgAymWyG29IMPJPuV7`Wgw~SI^;NVIcZ_eNS9|L zWswrz%JP>>IatD60;IuNmoc$=%_%DkOW5JrzW!D#<{ybgWz)#gIQ=!g^54LK8pAan z=kxr8ldD83O(s%fz#lJfz)EO+n85*PICL+FIZxUSCTD~P_p+j@J6I&rb_Wh*!{XIfyt616^vmzvXXiu)6>rORp6z(4X)s}^z z?-%fEi#cMB8AoD=U~MAn9S70VI}#UIs!Mx>(jKw2howWyt6?h5U~731Ud`@~pG|Cy zUr5;-aAPBUFmk6p>|)dqxnN}s%x4O{-KUXRDUAST4tp@y4Uv??a8{G-z!MhLC0d zZ-4}WY#;Msj|{B$BXJlG^oCd}fK7dAS7;p4M?eh2j_Qg4U^!c?$Q?_vEW8+y*$RE6A!ShTqh z4WKN-A^65wRc6jvn>hI|nm=oMGAZ<&5PMFf&JLtXE(#?V#gdDduD9A3t+fl*S}^L} z58n{1ZKAa;Wo-k|Ovh6-(;=cbu;$LOnPahIn75vtPTAMeUAlJ3gzpUK8FOOPFqR6^ zIG?d$voEHk0Cfl)2nJc3hC!nWqU!x-2pN_pI&##>G-*+(w7x(y>(rIb8V5WtzrdUs z@`L~yHVsv-P-ZzwIij%m`62{6RCp0A*~nr4H*V%lXAj0qZyDPJxLW+n94}p;j)jjcJEFes1P%=$VvOCaT5KwU94V zH>5lrVs&TC@eG16`+a-jd=e@RZDL6qxW?uCuo!IH)9$JVC-0vmB{^xncG10c!M%0< zfZ*;H-QBT6u|v=5H!aq0U8vtG)Nd2(x5ZATt6LYVw=Pr@RCTvl-HrI%b@3yKK+-F? zw~6j;5a*TDCrv5Gx;b0Q(UG!tEN2^)?m(rsy1arIMQs(!@**971jQEoO6sK+~PFg00sV=Nw{K4HHBqozV!LdbjY)M(SNSzy6R@hbzI$J72Ok zl96%RR`9BdkPdvY!mDCRONCqVdS^kjVC6oi;m)|E3S%Ij*-P7hwIQokIHICH!C$(j zY1&K?8_KTqgqAjUB_}#m#DyAHWok>~S}cd7)$*P$j2e?FE>ng=TzThF;!I3ak|?-k z7pBCl|A6xwe`JU>=>ra&rON>M}}x&Fz;ME=(Fo) zXv)*fFrOuRCLLNL$*HLAP8Dq?VjkLo5q>f0f`QH1`pC~#s5gQ|A~x0CBud)m!`7+54>iIB?iPR zhvUeypNc~8$p9u$M9iTaO501{ z?Z4aq%-xZd4`DP^}c+2s_qz&<1sEzr!>thwWeS>oksYw@DBX2DvMIF#%btQ$q^#*}p< zbFE&mRwuThjjesOUfi%puBk!m;?m(kd{i5Io& zc`BPti10S|4AhWwb_a+d&_}0cZh}6lZb<4Lw!|Fis=B!yUpP{Z^K@I!XB%TM?4>F^ z!_!)Vj0IW;m$A`f#!Azt!xNL?LDksH(b&YAV;6w)MtF^^8cHUh;B!qYYRNS&xKm-X zOEpHTT@|`o1FNJYYOrI4$0OZG80|k%Mtmr-n6Vm)h0QzFYhUPO=|_2M2SZ@}KrfJr zR`!CV*}&t>=#Uw)D&sT4G)Zd&qt@KmjB}+8yajN|7|nbsV>9A2Yd6=&s9!)b*M6L- z$A%aam?i9hNGePjOLogcX>Y?nfD=<)$I*IW3|7*%w0w|S&>DQBO&cxYk;$PU9DPg; z4adwEu=c?w2>EfU$%4W0QS1T}@D7nQhpE`}nL?^nI@KH-SYpjUFytKzjE(cRu>T9C ziKp?Or6{|qKw2w|iudN9p&ZLVBw z?Mb_9le+sS(iQcyKS0Rop8cuHHxp*Da(k+B`;+V9;WxpJFFC9(#0LYrCb|k@W~9HQ ze5HAH>Znp@g(Xi)v!Z68mW-463RX;r^$}WYR(7 ze4C2g4NsUJc2p<29`1a&S8!|)9UD^C4H7ac%6dE~9n(TF$d{5C4_xvyvvR_9{Eq>P z%S)B61_66e3)MU2QmWWHcSSgQ~n_tQ}h}o;|FByCbvB>Sn){9 zLRhIW<5%?ImneZ>-$wqW-;(}SovBxsxW2?O%Zofl^i3w5$*dShb(eJ}tczG?zHBg? zpcSbYB`pM|7-w{x*|nrMn<@#W5*v`3UGKauIh3$Z1q3e}%6D?8WZ$=uR~X7CasUGB z*|%h{m>}8G8m=N!XVSRD;W6)KkH^hVj8FVe3dEjc>^PU13jBJNzd+G(zEw4drD!wIy;cUc10+#f-cd!#L+W(MtZ7Er6dVp$=+Zpe^yeyWWp0olyx{qF?8`W1D!Oes4Dw;fvn=MKSTx;y+I65v*X!AVos zCV~;x>oX;i2OO(68g8eU*}$2p&3A5BR>@&*g~qpxn+`RI;AB$CuXGH6#1A^?ljO{( zZ)wYAb_#;ZwH>4G#4VyBLSZ5q!#p9gj6LTWYsyGIkXfrfKY8 z1VW}t4Vpdll%#st1CEA@OoRG6BO$`=v_ZHxdpra0BB**bRJLqX#}e>FnMrF2>j>O@ z0^4|F-XSSWZWTxFg~NLrHZe`E6)8x8sK#=9|z9E&4oV)Qd=_UbhO^|Q9G$^F~h#|;HKT0^+9DS4o8Ak1=b)h#+4 zoV?tKU!DzqP43_3KE9@N6Yz3FN;urV&3$a=^o2|MRvku|wW799}U%4Y29*X+x;t9)mz{Qm(0z~xi` literal 0 HcmV?d00001 diff --git a/TTS/vocoder/models/base_vocoder.py b/TTS/vocoder/models/base_vocoder.py new file mode 100644 index 0000000..0bcbe7b --- /dev/null +++ b/TTS/vocoder/models/base_vocoder.py @@ -0,0 +1,55 @@ +from coqpit import Coqpit + +from TTS.model import BaseTrainerModel + +# pylint: skip-file + + +class BaseVocoder(BaseTrainerModel): + """Base `vocoder` class. Every new `vocoder` model must inherit this. + + It defines `vocoder` specific functions on top of `Model`. + + Notes on input/output tensor shapes: + Any input or output tensor of the model must be shaped as + + - 3D tensors `batch x time x channels` + - 2D tensors `batch x channels` + - 1D tensors `batch x 1` + """ + + MODEL_TYPE = "vocoder" + + def __init__(self, config): + super().__init__() + self._set_model_args(config) + + def _set_model_args(self, config: Coqpit): + """Setup model args based on the config type. + + If the config is for training with a name like "*Config", then the model args are embeded in the + config.model_args + + If the config is for the model with a name like "*Args", then we assign the directly. + """ + # don't use isintance not to import recursively + if "Config" in config.__class__.__name__: + if "characters" in config: + _, self.config, num_chars = self.get_characters(config) + self.config.num_chars = num_chars + if hasattr(self.config, "model_args"): + config.model_args.num_chars = num_chars + if "model_args" in config: + self.args = self.config.model_args + # This is for backward compatibility + if "model_params" in config: + self.args = self.config.model_params + else: + self.config = config + if "model_args" in config: + self.args = self.config.model_args + # This is for backward compatibility + if "model_params" in config: + self.args = self.config.model_params + else: + raise ValueError("config must be either a *Config or *Args") diff --git a/TTS/vocoder/models/fullband_melgan_generator.py b/TTS/vocoder/models/fullband_melgan_generator.py new file mode 100644 index 0000000..ee25559 --- /dev/null +++ b/TTS/vocoder/models/fullband_melgan_generator.py @@ -0,0 +1,33 @@ +import torch + +from TTS.vocoder.models.melgan_generator import MelganGenerator + + +class FullbandMelganGenerator(MelganGenerator): + def __init__( + self, + in_channels=80, + out_channels=1, + proj_kernel=7, + base_channels=512, + upsample_factors=(2, 8, 2, 2), + res_kernel=3, + num_res_blocks=4, + ): + super().__init__( + in_channels=in_channels, + out_channels=out_channels, + proj_kernel=proj_kernel, + base_channels=base_channels, + upsample_factors=upsample_factors, + res_kernel=res_kernel, + num_res_blocks=num_res_blocks, + ) + + @torch.no_grad() + def inference(self, cond_features): + cond_features = cond_features.to(self.layers[1].weight.device) + cond_features = torch.nn.functional.pad( + cond_features, (self.inference_padding, self.inference_padding), "replicate" + ) + return self.layers(cond_features) diff --git a/TTS/vocoder/models/gan.py b/TTS/vocoder/models/gan.py new file mode 100644 index 0000000..19c30e9 --- /dev/null +++ b/TTS/vocoder/models/gan.py @@ -0,0 +1,374 @@ +from inspect import signature +from typing import Dict, List, Tuple + +import numpy as np +import torch +from coqpit import Coqpit +from torch import nn +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler +from trainer.trainer_utils import get_optimizer, get_scheduler + +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_fsspec +from TTS.vocoder.datasets.gan_dataset import GANDataset +from TTS.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss +from TTS.vocoder.models import setup_discriminator, setup_generator +from TTS.vocoder.models.base_vocoder import BaseVocoder +from TTS.vocoder.utils.generic_utils import plot_results + + +class GAN(BaseVocoder): + def __init__(self, config: Coqpit, ap: AudioProcessor = None): + """Wrap a generator and a discriminator network. It provides a compatible interface for the trainer. + It also helps mixing and matching different generator and disciminator networks easily. + + To implement a new GAN models, you just need to define the generator and the discriminator networks, the rest + is handled by the `GAN` class. + + Args: + config (Coqpit): Model configuration. + ap (AudioProcessor): 🐸TTS AudioProcessor instance. Defaults to None. + + Examples: + Initializing the GAN model with HifiGAN generator and discriminator. + >>> from TTS.vocoder.configs import HifiganConfig + >>> config = HifiganConfig() + >>> model = GAN(config) + """ + super().__init__(config) + self.config = config + self.model_g = setup_generator(config) + self.model_d = setup_discriminator(config) + self.train_disc = False # if False, train only the generator. + self.y_hat_g = None # the last generator prediction to be passed onto the discriminator + self.ap = ap + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Run the generator's forward pass. + + Args: + x (torch.Tensor): Input tensor. + + Returns: + torch.Tensor: output of the GAN generator network. + """ + return self.model_g.forward(x) + + def inference(self, x: torch.Tensor) -> torch.Tensor: + """Run the generator's inference pass. + + Args: + x (torch.Tensor): Input tensor. + Returns: + torch.Tensor: output of the GAN generator network. + """ + return self.model_g.inference(x) + + def train_step(self, batch: Dict, criterion: Dict, optimizer_idx: int) -> Tuple[Dict, Dict]: + """Compute model outputs and the loss values. `optimizer_idx` selects the generator or the discriminator for + network on the current pass. + + Args: + batch (Dict): Batch of samples returned by the dataloader. + criterion (Dict): Criterion used to compute the losses. + optimizer_idx (int): ID of the optimizer in use on the current pass. + + Raises: + ValueError: `optimizer_idx` is an unexpected value. + + Returns: + Tuple[Dict, Dict]: model outputs and the computed loss values. + """ + outputs = {} + loss_dict = {} + + x = batch["input"] + y = batch["waveform"] + + if optimizer_idx not in [0, 1]: + raise ValueError(" [!] Unexpected `optimizer_idx`.") + + if optimizer_idx == 0: + # DISCRIMINATOR optimization + + # generator pass + y_hat = self.model_g(x)[:, :, : y.size(2)] + + # cache for generator loss + # pylint: disable=W0201 + self.y_hat_g = y_hat + self.y_hat_sub = None + self.y_sub_g = None + + # PQMF formatting + if y_hat.shape[1] > 1: + self.y_hat_sub = y_hat + y_hat = self.model_g.pqmf_synthesis(y_hat) + self.y_hat_g = y_hat # save for generator loss + self.y_sub_g = self.model_g.pqmf_analysis(y) + + scores_fake, feats_fake, feats_real = None, None, None + + if self.train_disc: + # use different samples for G and D trainings + if self.config.diff_samples_for_G_and_D: + x_d = batch["input_disc"] + y_d = batch["waveform_disc"] + # use a different sample than generator + with torch.no_grad(): + y_hat = self.model_g(x_d) + + # PQMF formatting + if y_hat.shape[1] > 1: + y_hat = self.model_g.pqmf_synthesis(y_hat) + else: + # use the same samples as generator + x_d = x.clone() + y_d = y.clone() + y_hat = self.y_hat_g + + # run D with or without cond. features + if len(signature(self.model_d.forward).parameters) == 2: + D_out_fake = self.model_d(y_hat.detach().clone(), x_d) + D_out_real = self.model_d(y_d, x_d) + else: + D_out_fake = self.model_d(y_hat.detach()) + D_out_real = self.model_d(y_d) + + # format D outputs + if isinstance(D_out_fake, tuple): + # self.model_d returns scores and features + scores_fake, feats_fake = D_out_fake + if D_out_real is None: + scores_real, feats_real = None, None + else: + scores_real, feats_real = D_out_real + else: + # model D returns only scores + scores_fake = D_out_fake + scores_real = D_out_real + + # compute losses + loss_dict = criterion[optimizer_idx](scores_fake, scores_real) + outputs = {"model_outputs": y_hat} + + if optimizer_idx == 1: + # GENERATOR loss + scores_fake, feats_fake, feats_real = None, None, None + if self.train_disc: + if len(signature(self.model_d.forward).parameters) == 2: + D_out_fake = self.model_d(self.y_hat_g, x) + else: + D_out_fake = self.model_d(self.y_hat_g) + D_out_real = None + + if self.config.use_feat_match_loss: + with torch.no_grad(): + D_out_real = self.model_d(y) + + # format D outputs + if isinstance(D_out_fake, tuple): + scores_fake, feats_fake = D_out_fake + if D_out_real is None: + feats_real = None + else: + _, feats_real = D_out_real + else: + scores_fake = D_out_fake + feats_fake, feats_real = None, None + + # compute losses + loss_dict = criterion[optimizer_idx]( + self.y_hat_g, y, scores_fake, feats_fake, feats_real, self.y_hat_sub, self.y_sub_g + ) + outputs = {"model_outputs": self.y_hat_g} + return outputs, loss_dict + + def _log(self, name: str, ap: AudioProcessor, batch: Dict, outputs: Dict) -> Tuple[Dict, Dict]: + """Logging shared by the training and evaluation. + + Args: + name (str): Name of the run. `train` or `eval`, + ap (AudioProcessor): Audio processor used in training. + batch (Dict): Batch used in the last train/eval step. + outputs (Dict): Model outputs from the last train/eval step. + + Returns: + Tuple[Dict, Dict]: log figures and audio samples. + """ + y_hat = outputs[0]["model_outputs"] if self.train_disc else outputs[1]["model_outputs"] + y = batch["waveform"] + figures = plot_results(y_hat, y, ap, name) + sample_voice = y_hat[0].squeeze(0).detach().cpu().numpy() + audios = {f"{name}/audio": sample_voice} + return figures, audios + + def train_log( + self, batch: Dict, outputs: Dict, logger: "Logger", assets: Dict, steps: int # pylint: disable=unused-argument + ) -> Tuple[Dict, np.ndarray]: + """Call `_log()` for training.""" + figures, audios = self._log("eval", self.ap, batch, outputs) + logger.eval_figures(steps, figures) + logger.eval_audios(steps, audios, self.ap.sample_rate) + + @torch.no_grad() + def eval_step(self, batch: Dict, criterion: nn.Module, optimizer_idx: int) -> Tuple[Dict, Dict]: + """Call `train_step()` with `no_grad()`""" + self.train_disc = True # Avoid a bug in the Training with the missing discriminator loss + return self.train_step(batch, criterion, optimizer_idx) + + def eval_log( + self, batch: Dict, outputs: Dict, logger: "Logger", assets: Dict, steps: int # pylint: disable=unused-argument + ) -> Tuple[Dict, np.ndarray]: + """Call `_log()` for evaluation.""" + figures, audios = self._log("eval", self.ap, batch, outputs) + logger.eval_figures(steps, figures) + logger.eval_audios(steps, audios, self.ap.sample_rate) + + def load_checkpoint( + self, + config: Coqpit, + checkpoint_path: str, + eval: bool = False, # pylint: disable=unused-argument, redefined-builtin + cache: bool = False, + ) -> None: + """Load a GAN checkpoint and initialize model parameters. + + Args: + config (Coqpit): Model config. + checkpoint_path (str): Checkpoint file path. + eval (bool, optional): If true, load the model for inference. If falseDefaults to False. + """ + state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) + # band-aid for older than v0.0.15 GAN models + if "model_disc" in state: + self.model_g.load_checkpoint(config, checkpoint_path, eval) + else: + self.load_state_dict(state["model"]) + if eval: + self.model_d = None + if hasattr(self.model_g, "remove_weight_norm"): + self.model_g.remove_weight_norm() + + def on_train_step_start(self, trainer) -> None: + """Enable the discriminator training based on `steps_to_start_discriminator` + + Args: + trainer (Trainer): Trainer object. + """ + self.train_disc = trainer.total_steps_done >= self.config.steps_to_start_discriminator + + def get_optimizer(self) -> List: + """Initiate and return the GAN optimizers based on the config parameters. + + It returnes 2 optimizers in a list. First one is for the generator and the second one is for the discriminator. + + Returns: + List: optimizers. + """ + optimizer1 = get_optimizer( + self.config.optimizer, self.config.optimizer_params, self.config.lr_gen, self.model_g + ) + optimizer2 = get_optimizer( + self.config.optimizer, self.config.optimizer_params, self.config.lr_disc, self.model_d + ) + return [optimizer2, optimizer1] + + def get_lr(self) -> List: + """Set the initial learning rates for each optimizer. + + Returns: + List: learning rates for each optimizer. + """ + return [self.config.lr_disc, self.config.lr_gen] + + def get_scheduler(self, optimizer) -> List: + """Set the schedulers for each optimizer. + + Args: + optimizer (List[`torch.optim.Optimizer`]): List of optimizers. + + Returns: + List: Schedulers, one for each optimizer. + """ + scheduler1 = get_scheduler(self.config.lr_scheduler_gen, self.config.lr_scheduler_gen_params, optimizer[0]) + scheduler2 = get_scheduler(self.config.lr_scheduler_disc, self.config.lr_scheduler_disc_params, optimizer[1]) + return [scheduler2, scheduler1] + + @staticmethod + def format_batch(batch: List) -> Dict: + """Format the batch for training. + + Args: + batch (List): Batch out of the dataloader. + + Returns: + Dict: formatted model inputs. + """ + if isinstance(batch[0], list): + x_G, y_G = batch[0] + x_D, y_D = batch[1] + return {"input": x_G, "waveform": y_G, "input_disc": x_D, "waveform_disc": y_D} + x, y = batch + return {"input": x, "waveform": y} + + def get_data_loader( # pylint: disable=no-self-use, unused-argument + self, + config: Coqpit, + assets: Dict, + is_eval: True, + samples: List, + verbose: bool, + num_gpus: int, + rank: int = None, # pylint: disable=unused-argument + ): + """Initiate and return the GAN dataloader. + + Args: + config (Coqpit): Model config. + ap (AudioProcessor): Audio processor. + is_eval (True): Set the dataloader for evaluation if true. + samples (List): Data samples. + verbose (bool): Log information if true. + num_gpus (int): Number of GPUs in use. + rank (int): Rank of the current GPU. Defaults to None. + + Returns: + DataLoader: Torch dataloader. + """ + dataset = GANDataset( + ap=self.ap, + items=samples, + seq_len=config.seq_len, + hop_len=self.ap.hop_length, + pad_short=config.pad_short, + conv_pad=config.conv_pad, + return_pairs=config.diff_samples_for_G_and_D if "diff_samples_for_G_and_D" in config else False, + is_training=not is_eval, + return_segments=not is_eval, + use_noise_augment=config.use_noise_augment, + use_cache=config.use_cache, + verbose=verbose, + ) + dataset.shuffle_mapping() + sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None + loader = DataLoader( + dataset, + batch_size=1 if is_eval else config.batch_size, + shuffle=num_gpus == 0, + drop_last=False, + sampler=sampler, + num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers, + pin_memory=False, + ) + return loader + + def get_criterion(self): + """Return criterions for the optimizers""" + return [DiscriminatorLoss(self.config), GeneratorLoss(self.config)] + + @staticmethod + def init_from_config(config: Coqpit, verbose=True) -> "GAN": + ap = AudioProcessor.init_from_config(config, verbose=verbose) + return GAN(config, ap=ap) diff --git a/TTS/vocoder/models/hifigan_discriminator.py b/TTS/vocoder/models/hifigan_discriminator.py new file mode 100644 index 0000000..7447a5f --- /dev/null +++ b/TTS/vocoder/models/hifigan_discriminator.py @@ -0,0 +1,217 @@ +# adopted from https://github.com/jik876/hifi-gan/blob/master/models.py +import torch +from torch import nn +from torch.nn import functional as F + +LRELU_SLOPE = 0.1 + + +class DiscriminatorP(torch.nn.Module): + """HiFiGAN Periodic Discriminator + + Takes every Pth value from the input waveform and applied a stack of convoluations. + + Note: + if `period` is 2 + `waveform = [1, 2, 3, 4, 5, 6 ...] --> [1, 3, 5 ... ] --> convs -> score, feat` + + Args: + x (Tensor): input waveform. + + Returns: + [Tensor]: discriminator scores per sample in the batch. + [List[Tensor]]: list of features from each convolutional layer. + + Shapes: + x: [B, 1, T] + """ + + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): + super().__init__() + self.period = period + get_padding = lambda k, d: int((k * d - d) / 2) + norm_f = nn.utils.spectral_norm if use_spectral_norm else nn.utils.parametrizations.weight_norm + self.convs = nn.ModuleList( + [ + norm_f(nn.Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(nn.Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(nn.Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(nn.Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(nn.Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))), + ] + ) + self.conv_post = norm_f(nn.Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x): + """ + Args: + x (Tensor): input waveform. + + Returns: + [Tensor]: discriminator scores per sample in the batch. + [List[Tensor]]: list of features from each convolutional layer. + + Shapes: + x: [B, 1, T] + """ + feat = [] + + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, LRELU_SLOPE) + feat.append(x) + x = self.conv_post(x) + feat.append(x) + x = torch.flatten(x, 1, -1) + + return x, feat + + +class MultiPeriodDiscriminator(torch.nn.Module): + """HiFiGAN Multi-Period Discriminator (MPD) + Wrapper for the `PeriodDiscriminator` to apply it in different periods. + Periods are suggested to be prime numbers to reduce the overlap between each discriminator. + """ + + def __init__(self, use_spectral_norm=False): + super().__init__() + self.discriminators = nn.ModuleList( + [ + DiscriminatorP(2, use_spectral_norm=use_spectral_norm), + DiscriminatorP(3, use_spectral_norm=use_spectral_norm), + DiscriminatorP(5, use_spectral_norm=use_spectral_norm), + DiscriminatorP(7, use_spectral_norm=use_spectral_norm), + DiscriminatorP(11, use_spectral_norm=use_spectral_norm), + ] + ) + + def forward(self, x): + """ + Args: + x (Tensor): input waveform. + + Returns: + [List[Tensor]]: list of scores from each discriminator. + [List[List[Tensor]]]: list of list of features from each discriminator's each convolutional layer. + + Shapes: + x: [B, 1, T] + """ + scores = [] + feats = [] + for _, d in enumerate(self.discriminators): + score, feat = d(x) + scores.append(score) + feats.append(feat) + return scores, feats + + +class DiscriminatorS(torch.nn.Module): + """HiFiGAN Scale Discriminator. + It is similar to `MelganDiscriminator` but with a specific architecture explained in the paper. + + Args: + use_spectral_norm (bool): if `True` swith to spectral norm instead of weight norm. + + """ + + def __init__(self, use_spectral_norm=False): + super().__init__() + norm_f = nn.utils.spectral_norm if use_spectral_norm else nn.utils.parametrizations.weight_norm + self.convs = nn.ModuleList( + [ + norm_f(nn.Conv1d(1, 128, 15, 1, padding=7)), + norm_f(nn.Conv1d(128, 128, 41, 2, groups=4, padding=20)), + norm_f(nn.Conv1d(128, 256, 41, 2, groups=16, padding=20)), + norm_f(nn.Conv1d(256, 512, 41, 4, groups=16, padding=20)), + norm_f(nn.Conv1d(512, 1024, 41, 4, groups=16, padding=20)), + norm_f(nn.Conv1d(1024, 1024, 41, 1, groups=16, padding=20)), + norm_f(nn.Conv1d(1024, 1024, 5, 1, padding=2)), + ] + ) + self.conv_post = norm_f(nn.Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x): + """ + Args: + x (Tensor): input waveform. + + Returns: + Tensor: discriminator scores. + List[Tensor]: list of features from the convolutiona layers. + """ + feat = [] + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, LRELU_SLOPE) + feat.append(x) + x = self.conv_post(x) + feat.append(x) + x = torch.flatten(x, 1, -1) + return x, feat + + +class MultiScaleDiscriminator(torch.nn.Module): + """HiFiGAN Multi-Scale Discriminator. + It is similar to `MultiScaleMelganDiscriminator` but specially tailored for HiFiGAN as in the paper. + """ + + def __init__(self): + super().__init__() + self.discriminators = nn.ModuleList( + [ + DiscriminatorS(use_spectral_norm=True), + DiscriminatorS(), + DiscriminatorS(), + ] + ) + self.meanpools = nn.ModuleList([nn.AvgPool1d(4, 2, padding=2), nn.AvgPool1d(4, 2, padding=2)]) + + def forward(self, x): + """ + Args: + x (Tensor): input waveform. + + Returns: + List[Tensor]: discriminator scores. + List[List[Tensor]]: list of list of features from each layers of each discriminator. + """ + scores = [] + feats = [] + for i, d in enumerate(self.discriminators): + if i != 0: + x = self.meanpools[i - 1](x) + score, feat = d(x) + scores.append(score) + feats.append(feat) + return scores, feats + + +class HifiganDiscriminator(nn.Module): + """HiFiGAN discriminator wrapping MPD and MSD.""" + + def __init__(self): + super().__init__() + self.mpd = MultiPeriodDiscriminator() + self.msd = MultiScaleDiscriminator() + + def forward(self, x): + """ + Args: + x (Tensor): input waveform. + + Returns: + List[Tensor]: discriminator scores. + List[List[Tensor]]: list of list of features from each layers of each discriminator. + """ + scores, feats = self.mpd(x) + scores_, feats_ = self.msd(x) + return scores + scores_, feats + feats_ diff --git a/TTS/vocoder/models/hifigan_generator.py b/TTS/vocoder/models/hifigan_generator.py new file mode 100644 index 0000000..9247532 --- /dev/null +++ b/TTS/vocoder/models/hifigan_generator.py @@ -0,0 +1,301 @@ +# adopted from https://github.com/jik876/hifi-gan/blob/master/models.py +import torch +from torch import nn +from torch.nn import Conv1d, ConvTranspose1d +from torch.nn import functional as F +from torch.nn.utils.parametrizations import weight_norm +from torch.nn.utils.parametrize import remove_parametrizations + +from TTS.utils.io import load_fsspec + +LRELU_SLOPE = 0.1 + + +def get_padding(k, d): + return int((k * d - d) / 2) + + +class ResBlock1(torch.nn.Module): + """Residual Block Type 1. It has 3 convolutional layers in each convolutional block. + + Network:: + + x -> lrelu -> conv1_1 -> conv1_2 -> conv1_3 -> z -> lrelu -> conv2_1 -> conv2_2 -> conv2_3 -> o -> + -> o + |--------------------------------------------------------------------------------------------------| + + + Args: + channels (int): number of hidden channels for the convolutional layers. + kernel_size (int): size of the convolution filter in each layer. + dilations (list): list of dilation value for each conv layer in a block. + """ + + def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): + super().__init__() + self.convs1 = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]), + ) + ), + ] + ) + + self.convs2 = nn.ModuleList( + [ + weight_norm( + Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1)) + ), + weight_norm( + Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1)) + ), + weight_norm( + Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1)) + ), + ] + ) + + def forward(self, x): + """ + Args: + x (Tensor): input tensor. + Returns: + Tensor: output tensor. + Shapes: + x: [B, C, T] + """ + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + xt = c2(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_parametrizations(l, "weight") + for l in self.convs2: + remove_parametrizations(l, "weight") + + +class ResBlock2(torch.nn.Module): + """Residual Block Type 2. It has 1 convolutional layers in each convolutional block. + + Network:: + + x -> lrelu -> conv1-> -> z -> lrelu -> conv2-> o -> + -> o + |---------------------------------------------------| + + + Args: + channels (int): number of hidden channels for the convolutional layers. + kernel_size (int): size of the convolution filter in each layer. + dilations (list): list of dilation value for each conv layer in a block. + """ + + def __init__(self, channels, kernel_size=3, dilation=(1, 3)): + super().__init__() + self.convs = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + ] + ) + + def forward(self, x): + for c in self.convs: + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs: + remove_parametrizations(l, "weight") + + +class HifiganGenerator(torch.nn.Module): + def __init__( + self, + in_channels, + out_channels, + resblock_type, + resblock_dilation_sizes, + resblock_kernel_sizes, + upsample_kernel_sizes, + upsample_initial_channel, + upsample_factors, + inference_padding=5, + cond_channels=0, + conv_pre_weight_norm=True, + conv_post_weight_norm=True, + conv_post_bias=True, + ): + r"""HiFiGAN Generator with Multi-Receptive Field Fusion (MRF) + + Network: + x -> lrelu -> upsampling_layer -> resblock1_k1x1 -> z1 -> + -> z_sum / #resblocks -> lrelu -> conv_post_7x1 -> tanh -> o + .. -> zI ---| + resblockN_kNx1 -> zN ---' + + Args: + in_channels (int): number of input tensor channels. + out_channels (int): number of output tensor channels. + resblock_type (str): type of the `ResBlock`. '1' or '2'. + resblock_dilation_sizes (List[List[int]]): list of dilation values in each layer of a `ResBlock`. + resblock_kernel_sizes (List[int]): list of kernel sizes for each `ResBlock`. + upsample_kernel_sizes (List[int]): list of kernel sizes for each transposed convolution. + upsample_initial_channel (int): number of channels for the first upsampling layer. This is divided by 2 + for each consecutive upsampling layer. + upsample_factors (List[int]): upsampling factors (stride) for each upsampling layer. + inference_padding (int): constant padding applied to the input at inference time. Defaults to 5. + """ + super().__init__() + self.inference_padding = inference_padding + self.num_kernels = len(resblock_kernel_sizes) + self.num_upsamples = len(upsample_factors) + # initial upsampling layers + self.conv_pre = weight_norm(Conv1d(in_channels, upsample_initial_channel, 7, 1, padding=3)) + resblock = ResBlock1 if resblock_type == "1" else ResBlock2 + # upsampling layers + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(upsample_factors, upsample_kernel_sizes)): + self.ups.append( + weight_norm( + ConvTranspose1d( + upsample_initial_channel // (2**i), + upsample_initial_channel // (2 ** (i + 1)), + k, + u, + padding=(k - u) // 2, + ) + ) + ) + # MRF blocks + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel // (2 ** (i + 1)) + for _, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)): + self.resblocks.append(resblock(ch, k, d)) + # post convolution layer + self.conv_post = weight_norm(Conv1d(ch, out_channels, 7, 1, padding=3, bias=conv_post_bias)) + if cond_channels > 0: + self.cond_layer = nn.Conv1d(cond_channels, upsample_initial_channel, 1) + + if not conv_pre_weight_norm: + remove_parametrizations(self.conv_pre, "weight") + + if not conv_post_weight_norm: + remove_parametrizations(self.conv_post, "weight") + + def forward(self, x, g=None): + """ + Args: + x (Tensor): feature input tensor. + g (Tensor): global conditioning input tensor. + + Returns: + Tensor: output waveform. + + Shapes: + x: [B, C, T] + Tensor: [B, 1, T] + """ + o = self.conv_pre(x) + if hasattr(self, "cond_layer"): + o = o + self.cond_layer(g) + for i in range(self.num_upsamples): + o = F.leaky_relu(o, LRELU_SLOPE) + o = self.ups[i](o) + z_sum = None + for j in range(self.num_kernels): + if z_sum is None: + z_sum = self.resblocks[i * self.num_kernels + j](o) + else: + z_sum += self.resblocks[i * self.num_kernels + j](o) + o = z_sum / self.num_kernels + o = F.leaky_relu(o) + o = self.conv_post(o) + o = torch.tanh(o) + return o + + @torch.no_grad() + def inference(self, c): + """ + Args: + x (Tensor): conditioning input tensor. + + Returns: + Tensor: output waveform. + + Shapes: + x: [B, C, T] + Tensor: [B, 1, T] + """ + c = c.to(self.conv_pre.weight.device) + c = torch.nn.functional.pad(c, (self.inference_padding, self.inference_padding), "replicate") + return self.forward(c) + + def remove_weight_norm(self): + print("Removing weight norm...") + for l in self.ups: + remove_parametrizations(l, "weight") + for l in self.resblocks: + l.remove_weight_norm() + remove_parametrizations(self.conv_pre, "weight") + remove_parametrizations(self.conv_post, "weight") + + def load_checkpoint( + self, config, checkpoint_path, eval=False, cache=False + ): # pylint: disable=unused-argument, redefined-builtin + state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) + self.load_state_dict(state["model"]) + if eval: + self.eval() + assert not self.training + self.remove_weight_norm() diff --git a/TTS/vocoder/models/melgan_discriminator.py b/TTS/vocoder/models/melgan_discriminator.py new file mode 100644 index 0000000..e41467d --- /dev/null +++ b/TTS/vocoder/models/melgan_discriminator.py @@ -0,0 +1,84 @@ +import numpy as np +from torch import nn +from torch.nn.utils.parametrizations import weight_norm + + +class MelganDiscriminator(nn.Module): + def __init__( + self, + in_channels=1, + out_channels=1, + kernel_sizes=(5, 3), + base_channels=16, + max_channels=1024, + downsample_factors=(4, 4, 4, 4), + groups_denominator=4, + ): + super().__init__() + self.layers = nn.ModuleList() + + layer_kernel_size = np.prod(kernel_sizes) + layer_padding = (layer_kernel_size - 1) // 2 + + # initial layer + self.layers += [ + nn.Sequential( + nn.ReflectionPad1d(layer_padding), + weight_norm(nn.Conv1d(in_channels, base_channels, layer_kernel_size, stride=1)), + nn.LeakyReLU(0.2, inplace=True), + ) + ] + + # downsampling layers + layer_in_channels = base_channels + for downsample_factor in downsample_factors: + layer_out_channels = min(layer_in_channels * downsample_factor, max_channels) + layer_kernel_size = downsample_factor * 10 + 1 + layer_padding = (layer_kernel_size - 1) // 2 + layer_groups = layer_in_channels // groups_denominator + self.layers += [ + nn.Sequential( + weight_norm( + nn.Conv1d( + layer_in_channels, + layer_out_channels, + kernel_size=layer_kernel_size, + stride=downsample_factor, + padding=layer_padding, + groups=layer_groups, + ) + ), + nn.LeakyReLU(0.2, inplace=True), + ) + ] + layer_in_channels = layer_out_channels + + # last 2 layers + layer_padding1 = (kernel_sizes[0] - 1) // 2 + layer_padding2 = (kernel_sizes[1] - 1) // 2 + self.layers += [ + nn.Sequential( + weight_norm( + nn.Conv1d( + layer_out_channels, + layer_out_channels, + kernel_size=kernel_sizes[0], + stride=1, + padding=layer_padding1, + ) + ), + nn.LeakyReLU(0.2, inplace=True), + ), + weight_norm( + nn.Conv1d( + layer_out_channels, out_channels, kernel_size=kernel_sizes[1], stride=1, padding=layer_padding2 + ) + ), + ] + + def forward(self, x): + feats = [] + for layer in self.layers: + x = layer(x) + feats.append(x) + return x, feats diff --git a/TTS/vocoder/models/melgan_generator.py b/TTS/vocoder/models/melgan_generator.py new file mode 100644 index 0000000..bb3fee7 --- /dev/null +++ b/TTS/vocoder/models/melgan_generator.py @@ -0,0 +1,95 @@ +import torch +from torch import nn +from torch.nn.utils.parametrizations import weight_norm + +from TTS.utils.io import load_fsspec +from TTS.vocoder.layers.melgan import ResidualStack + + +class MelganGenerator(nn.Module): + def __init__( + self, + in_channels=80, + out_channels=1, + proj_kernel=7, + base_channels=512, + upsample_factors=(8, 8, 2, 2), + res_kernel=3, + num_res_blocks=3, + ): + super().__init__() + + # assert model parameters + assert (proj_kernel - 1) % 2 == 0, " [!] proj_kernel should be an odd number." + + # setup additional model parameters + base_padding = (proj_kernel - 1) // 2 + act_slope = 0.2 + self.inference_padding = 2 + + # initial layer + layers = [] + layers += [ + nn.ReflectionPad1d(base_padding), + weight_norm(nn.Conv1d(in_channels, base_channels, kernel_size=proj_kernel, stride=1, bias=True)), + ] + + # upsampling layers and residual stacks + for idx, upsample_factor in enumerate(upsample_factors): + layer_in_channels = base_channels // (2**idx) + layer_out_channels = base_channels // (2 ** (idx + 1)) + layer_filter_size = upsample_factor * 2 + layer_stride = upsample_factor + layer_output_padding = upsample_factor % 2 + layer_padding = upsample_factor // 2 + layer_output_padding + layers += [ + nn.LeakyReLU(act_slope), + weight_norm( + nn.ConvTranspose1d( + layer_in_channels, + layer_out_channels, + layer_filter_size, + stride=layer_stride, + padding=layer_padding, + output_padding=layer_output_padding, + bias=True, + ) + ), + ResidualStack(channels=layer_out_channels, num_res_blocks=num_res_blocks, kernel_size=res_kernel), + ] + + layers += [nn.LeakyReLU(act_slope)] + + # final layer + layers += [ + nn.ReflectionPad1d(base_padding), + weight_norm(nn.Conv1d(layer_out_channels, out_channels, proj_kernel, stride=1, bias=True)), + nn.Tanh(), + ] + self.layers = nn.Sequential(*layers) + + def forward(self, c): + return self.layers(c) + + def inference(self, c): + c = c.to(self.layers[1].weight.device) + c = torch.nn.functional.pad(c, (self.inference_padding, self.inference_padding), "replicate") + return self.layers(c) + + def remove_weight_norm(self): + for _, layer in enumerate(self.layers): + if len(layer.state_dict()) != 0: + try: + nn.utils.parametrize.remove_parametrizations(layer, "weight") + except ValueError: + layer.remove_weight_norm() + + def load_checkpoint( + self, config, checkpoint_path, eval=False, cache=False + ): # pylint: disable=unused-argument, redefined-builtin + state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) + self.load_state_dict(state["model"]) + if eval: + self.eval() + assert not self.training + self.remove_weight_norm() diff --git a/TTS/vocoder/models/melgan_multiscale_discriminator.py b/TTS/vocoder/models/melgan_multiscale_discriminator.py new file mode 100644 index 0000000..b4909f3 --- /dev/null +++ b/TTS/vocoder/models/melgan_multiscale_discriminator.py @@ -0,0 +1,50 @@ +from torch import nn + +from TTS.vocoder.models.melgan_discriminator import MelganDiscriminator + + +class MelganMultiscaleDiscriminator(nn.Module): + def __init__( + self, + in_channels=1, + out_channels=1, + num_scales=3, + kernel_sizes=(5, 3), + base_channels=16, + max_channels=1024, + downsample_factors=(4, 4, 4), + pooling_kernel_size=4, + pooling_stride=2, + pooling_padding=2, + groups_denominator=4, + ): + super().__init__() + + self.discriminators = nn.ModuleList( + [ + MelganDiscriminator( + in_channels=in_channels, + out_channels=out_channels, + kernel_sizes=kernel_sizes, + base_channels=base_channels, + max_channels=max_channels, + downsample_factors=downsample_factors, + groups_denominator=groups_denominator, + ) + for _ in range(num_scales) + ] + ) + + self.pooling = nn.AvgPool1d( + kernel_size=pooling_kernel_size, stride=pooling_stride, padding=pooling_padding, count_include_pad=False + ) + + def forward(self, x): + scores = [] + feats = [] + for disc in self.discriminators: + score, feat = disc(x) + scores.append(score) + feats.append(feat) + x = self.pooling(x) + return scores, feats diff --git a/TTS/vocoder/models/multiband_melgan_generator.py b/TTS/vocoder/models/multiband_melgan_generator.py new file mode 100644 index 0000000..25d6590 --- /dev/null +++ b/TTS/vocoder/models/multiband_melgan_generator.py @@ -0,0 +1,41 @@ +import torch + +from TTS.vocoder.layers.pqmf import PQMF +from TTS.vocoder.models.melgan_generator import MelganGenerator + + +class MultibandMelganGenerator(MelganGenerator): + def __init__( + self, + in_channels=80, + out_channels=4, + proj_kernel=7, + base_channels=384, + upsample_factors=(2, 8, 2, 2), + res_kernel=3, + num_res_blocks=3, + ): + super().__init__( + in_channels=in_channels, + out_channels=out_channels, + proj_kernel=proj_kernel, + base_channels=base_channels, + upsample_factors=upsample_factors, + res_kernel=res_kernel, + num_res_blocks=num_res_blocks, + ) + self.pqmf_layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0) + + def pqmf_analysis(self, x): + return self.pqmf_layer.analysis(x) + + def pqmf_synthesis(self, x): + return self.pqmf_layer.synthesis(x) + + @torch.no_grad() + def inference(self, cond_features): + cond_features = cond_features.to(self.layers[1].weight.device) + cond_features = torch.nn.functional.pad( + cond_features, (self.inference_padding, self.inference_padding), "replicate" + ) + return self.pqmf_synthesis(self.layers(cond_features)) diff --git a/TTS/vocoder/models/parallel_wavegan_discriminator.py b/TTS/vocoder/models/parallel_wavegan_discriminator.py new file mode 100644 index 0000000..d02af75 --- /dev/null +++ b/TTS/vocoder/models/parallel_wavegan_discriminator.py @@ -0,0 +1,187 @@ +import math + +import torch +from torch import nn +from torch.nn.utils.parametrize import remove_parametrizations + +from TTS.vocoder.layers.parallel_wavegan import ResidualBlock + + +class ParallelWaveganDiscriminator(nn.Module): + """PWGAN discriminator as in https://arxiv.org/abs/1910.11480. + It classifies each audio window real/fake and returns a sequence + of predictions. + It is a stack of convolutional blocks with dilation. + """ + + # pylint: disable=dangerous-default-value + def __init__( + self, + in_channels=1, + out_channels=1, + kernel_size=3, + num_layers=10, + conv_channels=64, + dilation_factor=1, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + bias=True, + ): + super().__init__() + assert (kernel_size - 1) % 2 == 0, " [!] does not support even number kernel size." + assert dilation_factor > 0, " [!] dilation factor must be > 0." + self.conv_layers = nn.ModuleList() + conv_in_channels = in_channels + for i in range(num_layers - 1): + if i == 0: + dilation = 1 + else: + dilation = i if dilation_factor == 1 else dilation_factor**i + conv_in_channels = conv_channels + padding = (kernel_size - 1) // 2 * dilation + conv_layer = [ + nn.Conv1d( + conv_in_channels, + conv_channels, + kernel_size=kernel_size, + padding=padding, + dilation=dilation, + bias=bias, + ), + getattr(nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params), + ] + self.conv_layers += conv_layer + padding = (kernel_size - 1) // 2 + last_conv_layer = nn.Conv1d(conv_in_channels, out_channels, kernel_size=kernel_size, padding=padding, bias=bias) + self.conv_layers += [last_conv_layer] + self.apply_weight_norm() + + def forward(self, x): + """ + x : (B, 1, T). + Returns: + Tensor: (B, 1, T) + """ + for f in self.conv_layers: + x = f(x) + return x + + def apply_weight_norm(self): + def _apply_weight_norm(m): + if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)): + torch.nn.utils.parametrizations.weight_norm(m) + + self.apply(_apply_weight_norm) + + def remove_weight_norm(self): + def _remove_weight_norm(m): + try: + # print(f"Weight norm is removed from {m}.") + remove_parametrizations(m, "weight") + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) + + +class ResidualParallelWaveganDiscriminator(nn.Module): + # pylint: disable=dangerous-default-value + def __init__( + self, + in_channels=1, + out_channels=1, + kernel_size=3, + num_layers=30, + stacks=3, + res_channels=64, + gate_channels=128, + skip_channels=64, + dropout=0.0, + bias=True, + nonlinear_activation="LeakyReLU", + nonlinear_activation_params={"negative_slope": 0.2}, + ): + super().__init__() + assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." + + self.in_channels = in_channels + self.out_channels = out_channels + self.num_layers = num_layers + self.stacks = stacks + self.kernel_size = kernel_size + self.res_factor = math.sqrt(1.0 / num_layers) + + # check the number of num_layers and stacks + assert num_layers % stacks == 0 + layers_per_stack = num_layers // stacks + + # define first convolution + self.first_conv = nn.Sequential( + nn.Conv1d(in_channels, res_channels, kernel_size=1, padding=0, dilation=1, bias=True), + getattr(nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params), + ) + + # define residual blocks + self.conv_layers = nn.ModuleList() + for layer in range(num_layers): + dilation = 2 ** (layer % layers_per_stack) + conv = ResidualBlock( + kernel_size=kernel_size, + res_channels=res_channels, + gate_channels=gate_channels, + skip_channels=skip_channels, + aux_channels=-1, + dilation=dilation, + dropout=dropout, + bias=bias, + use_causal_conv=False, + ) + self.conv_layers += [conv] + + # define output layers + self.last_conv_layers = nn.ModuleList( + [ + getattr(nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params), + nn.Conv1d(skip_channels, skip_channels, kernel_size=1, padding=0, dilation=1, bias=True), + getattr(nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params), + nn.Conv1d(skip_channels, out_channels, kernel_size=1, padding=0, dilation=1, bias=True), + ] + ) + + # apply weight norm + self.apply_weight_norm() + + def forward(self, x): + """ + x: (B, 1, T). + """ + x = self.first_conv(x) + + skips = 0 + for f in self.conv_layers: + x, h = f(x, None) + skips += h + skips *= self.res_factor + + # apply final layers + x = skips + for f in self.last_conv_layers: + x = f(x) + return x + + def apply_weight_norm(self): + def _apply_weight_norm(m): + if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)): + torch.nn.utils.parametrizations.weight_norm(m) + + self.apply(_apply_weight_norm) + + def remove_weight_norm(self): + def _remove_weight_norm(m): + try: + print(f"Weight norm is removed from {m}.") + remove_parametrizations(m, "weight") + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py new file mode 100644 index 0000000..8338d94 --- /dev/null +++ b/TTS/vocoder/models/parallel_wavegan_generator.py @@ -0,0 +1,164 @@ +import math + +import numpy as np +import torch +from torch.nn.utils.parametrize import remove_parametrizations + +from TTS.utils.io import load_fsspec +from TTS.vocoder.layers.parallel_wavegan import ResidualBlock +from TTS.vocoder.layers.upsample import ConvUpsample + + +class ParallelWaveganGenerator(torch.nn.Module): + """PWGAN generator as in https://arxiv.org/pdf/1910.11480.pdf. + It is similar to WaveNet with no causal convolution. + It is conditioned on an aux feature (spectrogram) to generate + an output waveform from an input noise. + """ + + # pylint: disable=dangerous-default-value + def __init__( + self, + in_channels=1, + out_channels=1, + kernel_size=3, + num_res_blocks=30, + stacks=3, + res_channels=64, + gate_channels=128, + skip_channels=64, + aux_channels=80, + dropout=0.0, + bias=True, + use_weight_norm=True, + upsample_factors=[4, 4, 4, 4], + inference_padding=2, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.aux_channels = aux_channels + self.num_res_blocks = num_res_blocks + self.stacks = stacks + self.kernel_size = kernel_size + self.upsample_factors = upsample_factors + self.upsample_scale = np.prod(upsample_factors) + self.inference_padding = inference_padding + self.use_weight_norm = use_weight_norm + + # check the number of layers and stacks + assert num_res_blocks % stacks == 0 + layers_per_stack = num_res_blocks // stacks + + # define first convolution + self.first_conv = torch.nn.Conv1d(in_channels, res_channels, kernel_size=1, bias=True) + + # define conv + upsampling network + self.upsample_net = ConvUpsample(upsample_factors=upsample_factors) + + # define residual blocks + self.conv_layers = torch.nn.ModuleList() + for layer in range(num_res_blocks): + dilation = 2 ** (layer % layers_per_stack) + conv = ResidualBlock( + kernel_size=kernel_size, + res_channels=res_channels, + gate_channels=gate_channels, + skip_channels=skip_channels, + aux_channels=aux_channels, + dilation=dilation, + dropout=dropout, + bias=bias, + ) + self.conv_layers += [conv] + + # define output layers + self.last_conv_layers = torch.nn.ModuleList( + [ + torch.nn.ReLU(inplace=True), + torch.nn.Conv1d(skip_channels, skip_channels, kernel_size=1, bias=True), + torch.nn.ReLU(inplace=True), + torch.nn.Conv1d(skip_channels, out_channels, kernel_size=1, bias=True), + ] + ) + + # apply weight norm + if use_weight_norm: + self.apply_weight_norm() + + def forward(self, c): + """ + c: (B, C ,T'). + o: Output tensor (B, out_channels, T) + """ + # random noise + x = torch.randn([c.shape[0], 1, c.shape[2] * self.upsample_scale]) + x = x.to(self.first_conv.bias.device) + + # perform upsampling + if c is not None and self.upsample_net is not None: + c = self.upsample_net(c) + assert ( + c.shape[-1] == x.shape[-1] + ), f" [!] Upsampling scale does not match the expected output. {c.shape} vs {x.shape}" + + # encode to hidden representation + x = self.first_conv(x) + skips = 0 + for f in self.conv_layers: + x, h = f(x, c) + skips += h + skips *= math.sqrt(1.0 / len(self.conv_layers)) + + # apply final layers + x = skips + for f in self.last_conv_layers: + x = f(x) + + return x + + @torch.no_grad() + def inference(self, c): + c = c.to(self.first_conv.weight.device) + c = torch.nn.functional.pad(c, (self.inference_padding, self.inference_padding), "replicate") + return self.forward(c) + + def remove_weight_norm(self): + def _remove_weight_norm(m): + try: + # print(f"Weight norm is removed from {m}.") + remove_parametrizations(m, "weight") + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) + + def apply_weight_norm(self): + def _apply_weight_norm(m): + if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)): + torch.nn.utils.parametrizations.weight_norm(m) + # print(f"Weight norm is applied to {m}.") + + self.apply(_apply_weight_norm) + + @staticmethod + def _get_receptive_field_size(layers, stacks, kernel_size, dilation=lambda x: 2**x): + assert layers % stacks == 0 + layers_per_cycle = layers // stacks + dilations = [dilation(i % layers_per_cycle) for i in range(layers)] + return (kernel_size - 1) * sum(dilations) + 1 + + @property + def receptive_field_size(self): + return self._get_receptive_field_size(self.layers, self.stacks, self.kernel_size) + + def load_checkpoint( + self, config, checkpoint_path, eval=False, cache=False + ): # pylint: disable=unused-argument, redefined-builtin + state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) + self.load_state_dict(state["model"]) + if eval: + self.eval() + assert not self.training + if self.use_weight_norm: + self.remove_weight_norm() diff --git a/TTS/vocoder/models/random_window_discriminator.py b/TTS/vocoder/models/random_window_discriminator.py new file mode 100644 index 0000000..79b68e9 --- /dev/null +++ b/TTS/vocoder/models/random_window_discriminator.py @@ -0,0 +1,203 @@ +import numpy as np +from torch import nn + + +class GBlock(nn.Module): + def __init__(self, in_channels, cond_channels, downsample_factor): + super().__init__() + + self.in_channels = in_channels + self.cond_channels = cond_channels + self.downsample_factor = downsample_factor + + self.start = nn.Sequential( + nn.AvgPool1d(downsample_factor, stride=downsample_factor), + nn.ReLU(), + nn.Conv1d(in_channels, in_channels * 2, kernel_size=3, padding=1), + ) + self.lc_conv1d = nn.Conv1d(cond_channels, in_channels * 2, kernel_size=1) + self.end = nn.Sequential( + nn.ReLU(), nn.Conv1d(in_channels * 2, in_channels * 2, kernel_size=3, dilation=2, padding=2) + ) + self.residual = nn.Sequential( + nn.Conv1d(in_channels, in_channels * 2, kernel_size=1), + nn.AvgPool1d(downsample_factor, stride=downsample_factor), + ) + + def forward(self, inputs, conditions): + outputs = self.start(inputs) + self.lc_conv1d(conditions) + outputs = self.end(outputs) + residual_outputs = self.residual(inputs) + outputs = outputs + residual_outputs + + return outputs + + +class DBlock(nn.Module): + def __init__(self, in_channels, out_channels, downsample_factor): + super().__init__() + + self.in_channels = in_channels + self.downsample_factor = downsample_factor + self.out_channels = out_channels + + self.donwsample_layer = nn.AvgPool1d(downsample_factor, stride=downsample_factor) + self.layers = nn.Sequential( + nn.ReLU(), + nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv1d(out_channels, out_channels, kernel_size=3, dilation=2, padding=2), + ) + self.residual = nn.Sequential( + nn.Conv1d(in_channels, out_channels, kernel_size=1), + ) + + def forward(self, inputs): + if self.downsample_factor > 1: + outputs = self.layers(self.donwsample_layer(inputs)) + self.donwsample_layer(self.residual(inputs)) + else: + outputs = self.layers(inputs) + self.residual(inputs) + return outputs + + +class ConditionalDiscriminator(nn.Module): + def __init__(self, in_channels, cond_channels, downsample_factors=(2, 2, 2), out_channels=(128, 256)): + super().__init__() + + assert len(downsample_factors) == len(out_channels) + 1 + + self.in_channels = in_channels + self.cond_channels = cond_channels + self.downsample_factors = downsample_factors + self.out_channels = out_channels + + self.pre_cond_layers = nn.ModuleList() + self.post_cond_layers = nn.ModuleList() + + # layers before condition features + self.pre_cond_layers += [DBlock(in_channels, 64, 1)] + in_channels = 64 + for i, channel in enumerate(out_channels): + self.pre_cond_layers.append(DBlock(in_channels, channel, downsample_factors[i])) + in_channels = channel + + # condition block + self.cond_block = GBlock(in_channels, cond_channels, downsample_factors[-1]) + + # layers after condition block + self.post_cond_layers += [ + DBlock(in_channels * 2, in_channels * 2, 1), + DBlock(in_channels * 2, in_channels * 2, 1), + nn.AdaptiveAvgPool1d(1), + nn.Conv1d(in_channels * 2, 1, kernel_size=1), + ] + + def forward(self, inputs, conditions): + batch_size = inputs.size()[0] + outputs = inputs.view(batch_size, self.in_channels, -1) + for layer in self.pre_cond_layers: + outputs = layer(outputs) + outputs = self.cond_block(outputs, conditions) + for layer in self.post_cond_layers: + outputs = layer(outputs) + + return outputs + + +class UnconditionalDiscriminator(nn.Module): + def __init__(self, in_channels, base_channels=64, downsample_factors=(8, 4), out_channels=(128, 256)): + super().__init__() + + self.downsample_factors = downsample_factors + self.in_channels = in_channels + self.downsample_factors = downsample_factors + self.out_channels = out_channels + + self.layers = nn.ModuleList() + self.layers += [DBlock(self.in_channels, base_channels, 1)] + in_channels = base_channels + for i, factor in enumerate(downsample_factors): + self.layers.append(DBlock(in_channels, out_channels[i], factor)) + in_channels *= 2 + self.layers += [ + DBlock(in_channels, in_channels, 1), + DBlock(in_channels, in_channels, 1), + nn.AdaptiveAvgPool1d(1), + nn.Conv1d(in_channels, 1, kernel_size=1), + ] + + def forward(self, inputs): + batch_size = inputs.size()[0] + outputs = inputs.view(batch_size, self.in_channels, -1) + for layer in self.layers: + outputs = layer(outputs) + return outputs + + +class RandomWindowDiscriminator(nn.Module): + """Random Window Discriminator as described in + http://arxiv.org/abs/1909.11646""" + + def __init__( + self, + cond_channels, + hop_length, + uncond_disc_donwsample_factors=(8, 4), + cond_disc_downsample_factors=((8, 4, 2, 2, 2), (8, 4, 2, 2), (8, 4, 2), (8, 4), (4, 2, 2)), + cond_disc_out_channels=((128, 128, 256, 256), (128, 256, 256), (128, 256), (256,), (128, 256)), + window_sizes=(512, 1024, 2048, 4096, 8192), + ): + super().__init__() + self.cond_channels = cond_channels + self.window_sizes = window_sizes + self.hop_length = hop_length + self.base_window_size = self.hop_length * 2 + self.ks = [ws // self.base_window_size for ws in window_sizes] + + # check arguments + assert len(cond_disc_downsample_factors) == len(cond_disc_out_channels) == len(window_sizes) + for ws in window_sizes: + assert ws % hop_length == 0 + + for idx, cf in enumerate(cond_disc_downsample_factors): + assert np.prod(cf) == hop_length // self.ks[idx] + + # define layers + self.unconditional_discriminators = nn.ModuleList([]) + for k in self.ks: + layer = UnconditionalDiscriminator( + in_channels=k, base_channels=64, downsample_factors=uncond_disc_donwsample_factors + ) + self.unconditional_discriminators.append(layer) + + self.conditional_discriminators = nn.ModuleList([]) + for idx, k in enumerate(self.ks): + layer = ConditionalDiscriminator( + in_channels=k, + cond_channels=cond_channels, + downsample_factors=cond_disc_downsample_factors[idx], + out_channels=cond_disc_out_channels[idx], + ) + self.conditional_discriminators.append(layer) + + def forward(self, x, c): + scores = [] + feats = [] + # unconditional pass + for window_size, layer in zip(self.window_sizes, self.unconditional_discriminators): + index = np.random.randint(x.shape[-1] - window_size) + + score = layer(x[:, :, index : index + window_size]) + scores.append(score) + + # conditional pass + for window_size, layer in zip(self.window_sizes, self.conditional_discriminators): + frame_size = window_size // self.hop_length + lc_index = np.random.randint(c.shape[-1] - frame_size) + sample_index = lc_index * self.hop_length + x_sub = x[:, :, sample_index : (lc_index + frame_size) * self.hop_length] + c_sub = c[:, :, lc_index : lc_index + frame_size] + + score = layer(x_sub, c_sub) + scores.append(score) + return scores, feats diff --git a/TTS/vocoder/models/univnet_discriminator.py b/TTS/vocoder/models/univnet_discriminator.py new file mode 100644 index 0000000..497d67a --- /dev/null +++ b/TTS/vocoder/models/univnet_discriminator.py @@ -0,0 +1,95 @@ +import torch +import torch.nn.functional as F +from torch import nn +from torch.nn.utils import spectral_norm +from torch.nn.utils.parametrizations import weight_norm + +from TTS.utils.audio.torch_transforms import TorchSTFT +from TTS.vocoder.models.hifigan_discriminator import MultiPeriodDiscriminator + +LRELU_SLOPE = 0.1 + + +class SpecDiscriminator(nn.Module): + """docstring for Discriminator.""" + + def __init__(self, fft_size=1024, hop_length=120, win_length=600, use_spectral_norm=False): + super().__init__() + norm_f = weight_norm if use_spectral_norm is False else spectral_norm + self.fft_size = fft_size + self.hop_length = hop_length + self.win_length = win_length + self.stft = TorchSTFT(fft_size, hop_length, win_length) + self.discriminators = nn.ModuleList( + [ + norm_f(nn.Conv2d(1, 32, kernel_size=(3, 9), padding=(1, 4))), + norm_f(nn.Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 2), padding=(1, 4))), + norm_f(nn.Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 2), padding=(1, 4))), + norm_f(nn.Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 2), padding=(1, 4))), + norm_f(nn.Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))), + ] + ) + + self.out = norm_f(nn.Conv2d(32, 1, 3, 1, 1)) + + def forward(self, y): + fmap = [] + with torch.no_grad(): + y = y.squeeze(1) + y = self.stft(y) + y = y.unsqueeze(1) + for _, d in enumerate(self.discriminators): + y = d(y) + y = F.leaky_relu(y, LRELU_SLOPE) + fmap.append(y) + + y = self.out(y) + fmap.append(y) + + return torch.flatten(y, 1, -1), fmap + + +class MultiResSpecDiscriminator(torch.nn.Module): + def __init__( # pylint: disable=dangerous-default-value + self, fft_sizes=[1024, 2048, 512], hop_sizes=[120, 240, 50], win_lengths=[600, 1200, 240], window="hann_window" + ): + super().__init__() + self.discriminators = nn.ModuleList( + [ + SpecDiscriminator(fft_sizes[0], hop_sizes[0], win_lengths[0], window), + SpecDiscriminator(fft_sizes[1], hop_sizes[1], win_lengths[1], window), + SpecDiscriminator(fft_sizes[2], hop_sizes[2], win_lengths[2], window), + ] + ) + + def forward(self, x): + scores = [] + feats = [] + for d in self.discriminators: + score, feat = d(x) + scores.append(score) + feats.append(feat) + + return scores, feats + + +class UnivnetDiscriminator(nn.Module): + """Univnet discriminator wrapping MPD and MSD.""" + + def __init__(self): + super().__init__() + self.mpd = MultiPeriodDiscriminator() + self.msd = MultiResSpecDiscriminator() + + def forward(self, x): + """ + Args: + x (Tensor): input waveform. + + Returns: + List[Tensor]: discriminator scores. + List[List[Tensor]]: list of list of features from each layers of each discriminator. + """ + scores, feats = self.mpd(x) + scores_, feats_ = self.msd(x) + return scores + scores_, feats + feats_ diff --git a/TTS/vocoder/models/univnet_generator.py b/TTS/vocoder/models/univnet_generator.py new file mode 100644 index 0000000..5e66b70 --- /dev/null +++ b/TTS/vocoder/models/univnet_generator.py @@ -0,0 +1,157 @@ +from typing import List + +import numpy as np +import torch +import torch.nn.functional as F +from torch.nn.utils import parametrize + +from TTS.vocoder.layers.lvc_block import LVCBlock + +LRELU_SLOPE = 0.1 + + +class UnivnetGenerator(torch.nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + hidden_channels: int, + cond_channels: int, + upsample_factors: List[int], + lvc_layers_each_block: int, + lvc_kernel_size: int, + kpnet_hidden_channels: int, + kpnet_conv_size: int, + dropout: float, + use_weight_norm=True, + ): + """Univnet Generator network. + + Paper: https://arxiv.org/pdf/2106.07889.pdf + + Args: + in_channels (int): Number of input tensor channels. + out_channels (int): Number of channels of the output tensor. + hidden_channels (int): Number of hidden network channels. + cond_channels (int): Number of channels of the conditioning tensors. + upsample_factors (List[int]): List of uplsample factors for the upsampling layers. + lvc_layers_each_block (int): Number of LVC layers in each block. + lvc_kernel_size (int): Kernel size of the LVC layers. + kpnet_hidden_channels (int): Number of hidden channels in the key-point network. + kpnet_conv_size (int): Number of convolution channels in the key-point network. + dropout (float): Dropout rate. + use_weight_norm (bool, optional): Enable/disable weight norm. Defaults to True. + """ + + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.cond_channels = cond_channels + self.upsample_scale = np.prod(upsample_factors) + self.lvc_block_nums = len(upsample_factors) + + # define first convolution + self.first_conv = torch.nn.Conv1d( + in_channels, hidden_channels, kernel_size=7, padding=(7 - 1) // 2, dilation=1, bias=True + ) + + # define residual blocks + self.lvc_blocks = torch.nn.ModuleList() + cond_hop_length = 1 + for n in range(self.lvc_block_nums): + cond_hop_length = cond_hop_length * upsample_factors[n] + lvcb = LVCBlock( + in_channels=hidden_channels, + cond_channels=cond_channels, + upsample_ratio=upsample_factors[n], + conv_layers=lvc_layers_each_block, + conv_kernel_size=lvc_kernel_size, + cond_hop_length=cond_hop_length, + kpnet_hidden_channels=kpnet_hidden_channels, + kpnet_conv_size=kpnet_conv_size, + kpnet_dropout=dropout, + ) + self.lvc_blocks += [lvcb] + + # define output layers + self.last_conv_layers = torch.nn.ModuleList( + [ + torch.nn.Conv1d( + hidden_channels, out_channels, kernel_size=7, padding=(7 - 1) // 2, dilation=1, bias=True + ), + ] + ) + + # apply weight norm + if use_weight_norm: + self.apply_weight_norm() + + def forward(self, c): + """Calculate forward propagation. + Args: + c (Tensor): Local conditioning auxiliary features (B, C ,T'). + Returns: + Tensor: Output tensor (B, out_channels, T) + """ + # random noise + x = torch.randn([c.shape[0], self.in_channels, c.shape[2]]) + x = x.to(self.first_conv.bias.device) + x = self.first_conv(x) + + for n in range(self.lvc_block_nums): + x = self.lvc_blocks[n](x, c) + + # apply final layers + for f in self.last_conv_layers: + x = F.leaky_relu(x, LRELU_SLOPE) + x = f(x) + x = torch.tanh(x) + return x + + def remove_weight_norm(self): + """Remove weight normalization module from all of the layers.""" + + def _remove_weight_norm(m): + try: + # print(f"Weight norm is removed from {m}.") + parametrize.remove_parametrizations(m, "weight") + except ValueError: # this module didn't have weight norm + return + + self.apply(_remove_weight_norm) + + def apply_weight_norm(self): + """Apply weight normalization module from all of the layers.""" + + def _apply_weight_norm(m): + if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)): + torch.nn.utils.parametrizations.weight_norm(m) + # print(f"Weight norm is applied to {m}.") + + self.apply(_apply_weight_norm) + + @staticmethod + def _get_receptive_field_size(layers, stacks, kernel_size, dilation=lambda x: 2**x): + assert layers % stacks == 0 + layers_per_cycle = layers // stacks + dilations = [dilation(i % layers_per_cycle) for i in range(layers)] + return (kernel_size - 1) * sum(dilations) + 1 + + @property + def receptive_field_size(self): + """Return receptive field size.""" + return self._get_receptive_field_size(self.layers, self.stacks, self.kernel_size) + + @torch.no_grad() + def inference(self, c): + """Perform inference. + Args: + c (Tensor): Local conditioning auxiliary features :math:`(B, C, T)`. + Returns: + Tensor: Output tensor (T, out_channels) + """ + x = torch.randn([c.shape[0], self.in_channels, c.shape[2]]) + x = x.to(self.first_conv.bias.device) + + c = c.to(next(self.parameters())) + return self.forward(c) diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py new file mode 100644 index 0000000..c1166e0 --- /dev/null +++ b/TTS/vocoder/models/wavegrad.py @@ -0,0 +1,345 @@ +from dataclasses import dataclass, field +from typing import Dict, List, Tuple + +import numpy as np +import torch +from coqpit import Coqpit +from torch import nn +from torch.nn.utils.parametrizations import weight_norm +from torch.nn.utils.parametrize import remove_parametrizations +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler +from trainer.trainer_utils import get_optimizer, get_scheduler + +from TTS.utils.io import load_fsspec +from TTS.vocoder.datasets import WaveGradDataset +from TTS.vocoder.layers.wavegrad import Conv1d, DBlock, FiLM, UBlock +from TTS.vocoder.models.base_vocoder import BaseVocoder +from TTS.vocoder.utils.generic_utils import plot_results + + +@dataclass +class WavegradArgs(Coqpit): + in_channels: int = 80 + out_channels: int = 1 + use_weight_norm: bool = False + y_conv_channels: int = 32 + x_conv_channels: int = 768 + dblock_out_channels: List[int] = field(default_factory=lambda: [128, 128, 256, 512]) + ublock_out_channels: List[int] = field(default_factory=lambda: [512, 512, 256, 128, 128]) + upsample_factors: List[int] = field(default_factory=lambda: [4, 4, 4, 2, 2]) + upsample_dilations: List[List[int]] = field( + default_factory=lambda: [[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 4, 8], [1, 2, 4, 8], [1, 2, 4, 8]] + ) + + +class Wavegrad(BaseVocoder): + """🐸 🌊 WaveGrad 🌊 model. + Paper - https://arxiv.org/abs/2009.00713 + + Examples: + Initializing the model. + + >>> from TTS.vocoder.configs import WavegradConfig + >>> config = WavegradConfig() + >>> model = Wavegrad(config) + + Paper Abstract: + This paper introduces WaveGrad, a conditional model for waveform generation which estimates gradients of the + data density. The model is built on prior work on score matching and diffusion probabilistic models. It starts + from a Gaussian white noise signal and iteratively refines the signal via a gradient-based sampler conditioned + on the mel-spectrogram. WaveGrad offers a natural way to trade inference speed for sample quality by adjusting + the number of refinement steps, and bridges the gap between non-autoregressive and autoregressive models in + terms of audio quality. We find that it can generate high fidelity audio samples using as few as six iterations. + Experiments reveal WaveGrad to generate high fidelity audio, outperforming adversarial non-autoregressive + baselines and matching a strong likelihood-based autoregressive baseline using fewer sequential operations. + Audio samples are available at this https URL. + """ + + # pylint: disable=dangerous-default-value + def __init__(self, config: Coqpit): + super().__init__(config) + self.config = config + self.use_weight_norm = config.model_params.use_weight_norm + self.hop_len = np.prod(config.model_params.upsample_factors) + self.noise_level = None + self.num_steps = None + self.beta = None + self.alpha = None + self.alpha_hat = None + self.c1 = None + self.c2 = None + self.sigma = None + + # dblocks + self.y_conv = Conv1d(1, config.model_params.y_conv_channels, 5, padding=2) + self.dblocks = nn.ModuleList([]) + ic = config.model_params.y_conv_channels + for oc, df in zip(config.model_params.dblock_out_channels, reversed(config.model_params.upsample_factors)): + self.dblocks.append(DBlock(ic, oc, df)) + ic = oc + + # film + self.film = nn.ModuleList([]) + ic = config.model_params.y_conv_channels + for oc in reversed(config.model_params.ublock_out_channels): + self.film.append(FiLM(ic, oc)) + ic = oc + + # ublocksn + self.ublocks = nn.ModuleList([]) + ic = config.model_params.x_conv_channels + for oc, uf, ud in zip( + config.model_params.ublock_out_channels, + config.model_params.upsample_factors, + config.model_params.upsample_dilations, + ): + self.ublocks.append(UBlock(ic, oc, uf, ud)) + ic = oc + + self.x_conv = Conv1d(config.model_params.in_channels, config.model_params.x_conv_channels, 3, padding=1) + self.out_conv = Conv1d(oc, config.model_params.out_channels, 3, padding=1) + + if config.model_params.use_weight_norm: + self.apply_weight_norm() + + def forward(self, x, spectrogram, noise_scale): + shift_and_scale = [] + + x = self.y_conv(x) + shift_and_scale.append(self.film[0](x, noise_scale)) + + for film, layer in zip(self.film[1:], self.dblocks): + x = layer(x) + shift_and_scale.append(film(x, noise_scale)) + + x = self.x_conv(spectrogram) + for layer, (film_shift, film_scale) in zip(self.ublocks, reversed(shift_and_scale)): + x = layer(x, film_shift, film_scale) + x = self.out_conv(x) + return x + + def load_noise_schedule(self, path): + beta = np.load(path, allow_pickle=True).item()["beta"] # pylint: disable=unexpected-keyword-arg + self.compute_noise_level(beta) + + @torch.no_grad() + def inference(self, x, y_n=None): + """ + Shapes: + x: :math:`[B, C , T]` + y_n: :math:`[B, 1, T]` + """ + if y_n is None: + y_n = torch.randn(x.shape[0], 1, self.hop_len * x.shape[-1]) + else: + y_n = torch.FloatTensor(y_n).unsqueeze(0).unsqueeze(0) + y_n = y_n.type_as(x) + sqrt_alpha_hat = self.noise_level.to(x) + for n in range(len(self.alpha) - 1, -1, -1): + y_n = self.c1[n] * (y_n - self.c2[n] * self.forward(y_n, x, sqrt_alpha_hat[n].repeat(x.shape[0]))) + if n > 0: + z = torch.randn_like(y_n) + y_n += self.sigma[n - 1] * z + y_n.clamp_(-1.0, 1.0) + return y_n + + def compute_y_n(self, y_0): + """Compute noisy audio based on noise schedule""" + self.noise_level = self.noise_level.to(y_0) + if len(y_0.shape) == 3: + y_0 = y_0.squeeze(1) + s = torch.randint(0, self.num_steps - 1, [y_0.shape[0]]) + l_a, l_b = self.noise_level[s], self.noise_level[s + 1] + noise_scale = l_a + torch.rand(y_0.shape[0]).to(y_0) * (l_b - l_a) + noise_scale = noise_scale.unsqueeze(1) + noise = torch.randn_like(y_0) + noisy_audio = noise_scale * y_0 + (1.0 - noise_scale**2) ** 0.5 * noise + return noise.unsqueeze(1), noisy_audio.unsqueeze(1), noise_scale[:, 0] + + def compute_noise_level(self, beta): + """Compute noise schedule parameters""" + self.num_steps = len(beta) + alpha = 1 - beta + alpha_hat = np.cumprod(alpha) + noise_level = np.concatenate([[1.0], alpha_hat**0.5], axis=0) + noise_level = alpha_hat**0.5 + + # pylint: disable=not-callable + self.beta = torch.tensor(beta.astype(np.float32)) + self.alpha = torch.tensor(alpha.astype(np.float32)) + self.alpha_hat = torch.tensor(alpha_hat.astype(np.float32)) + self.noise_level = torch.tensor(noise_level.astype(np.float32)) + + self.c1 = 1 / self.alpha**0.5 + self.c2 = (1 - self.alpha) / (1 - self.alpha_hat) ** 0.5 + self.sigma = ((1.0 - self.alpha_hat[:-1]) / (1.0 - self.alpha_hat[1:]) * self.beta[1:]) ** 0.5 + + def remove_weight_norm(self): + for _, layer in enumerate(self.dblocks): + if len(layer.state_dict()) != 0: + try: + remove_parametrizations(layer, "weight") + except ValueError: + layer.remove_weight_norm() + + for _, layer in enumerate(self.film): + if len(layer.state_dict()) != 0: + try: + remove_parametrizations(layer, "weight") + except ValueError: + layer.remove_weight_norm() + + for _, layer in enumerate(self.ublocks): + if len(layer.state_dict()) != 0: + try: + remove_parametrizations(layer, "weight") + except ValueError: + layer.remove_weight_norm() + + remove_parametrizations(self.x_conv, "weight") + remove_parametrizations(self.out_conv, "weight") + remove_parametrizations(self.y_conv, "weight") + + def apply_weight_norm(self): + for _, layer in enumerate(self.dblocks): + if len(layer.state_dict()) != 0: + layer.apply_weight_norm() + + for _, layer in enumerate(self.film): + if len(layer.state_dict()) != 0: + layer.apply_weight_norm() + + for _, layer in enumerate(self.ublocks): + if len(layer.state_dict()) != 0: + layer.apply_weight_norm() + + self.x_conv = weight_norm(self.x_conv) + self.out_conv = weight_norm(self.out_conv) + self.y_conv = weight_norm(self.y_conv) + + def load_checkpoint( + self, config, checkpoint_path, eval=False, cache=False + ): # pylint: disable=unused-argument, redefined-builtin + state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) + self.load_state_dict(state["model"]) + if eval: + self.eval() + assert not self.training + if self.config.model_params.use_weight_norm: + self.remove_weight_norm() + betas = np.linspace( + config["test_noise_schedule"]["min_val"], + config["test_noise_schedule"]["max_val"], + config["test_noise_schedule"]["num_steps"], + ) + self.compute_noise_level(betas) + else: + betas = np.linspace( + config["train_noise_schedule"]["min_val"], + config["train_noise_schedule"]["max_val"], + config["train_noise_schedule"]["num_steps"], + ) + self.compute_noise_level(betas) + + def train_step(self, batch: Dict, criterion: Dict) -> Tuple[Dict, Dict]: + # format data + x = batch["input"] + y = batch["waveform"] + + # set noise scale + noise, x_noisy, noise_scale = self.compute_y_n(y) + + # forward pass + noise_hat = self.forward(x_noisy, x, noise_scale) + + # compute losses + loss = criterion(noise, noise_hat) + return {"model_output": noise_hat}, {"loss": loss} + + def train_log( # pylint: disable=no-self-use + self, batch: Dict, outputs: Dict, logger: "Logger", assets: Dict, steps: int # pylint: disable=unused-argument + ) -> Tuple[Dict, np.ndarray]: + pass + + @torch.no_grad() + def eval_step(self, batch: Dict, criterion: nn.Module) -> Tuple[Dict, Dict]: + return self.train_step(batch, criterion) + + def eval_log( # pylint: disable=no-self-use + self, batch: Dict, outputs: Dict, logger: "Logger", assets: Dict, steps: int # pylint: disable=unused-argument + ) -> None: + pass + + def test(self, assets: Dict, test_loader: "DataLoader", outputs=None): # pylint: disable=unused-argument + # setup noise schedule and inference + ap = assets["audio_processor"] + noise_schedule = self.config["test_noise_schedule"] + betas = np.linspace(noise_schedule["min_val"], noise_schedule["max_val"], noise_schedule["num_steps"]) + self.compute_noise_level(betas) + samples = test_loader.dataset.load_test_samples(1) + for sample in samples: + x = sample[0] + x = x[None, :, :].to(next(self.parameters()).device) + y = sample[1] + y = y[None, :] + # compute voice + y_pred = self.inference(x) + # compute spectrograms + figures = plot_results(y_pred, y, ap, "test") + # Sample audio + sample_voice = y_pred[0].squeeze(0).detach().cpu().numpy() + return figures, {"test/audio": sample_voice} + + def get_optimizer(self): + return get_optimizer(self.config.optimizer, self.config.optimizer_params, self.config.lr, self) + + def get_scheduler(self, optimizer): + return get_scheduler(self.config.lr_scheduler, self.config.lr_scheduler_params, optimizer) + + @staticmethod + def get_criterion(): + return torch.nn.L1Loss() + + @staticmethod + def format_batch(batch: Dict) -> Dict: + # return a whole audio segment + m, y = batch[0], batch[1] + y = y.unsqueeze(1) + return {"input": m, "waveform": y} + + def get_data_loader(self, config: Coqpit, assets: Dict, is_eval: True, samples: List, verbose: bool, num_gpus: int): + ap = assets["audio_processor"] + dataset = WaveGradDataset( + ap=ap, + items=samples, + seq_len=self.config.seq_len, + hop_len=ap.hop_length, + pad_short=self.config.pad_short, + conv_pad=self.config.conv_pad, + is_training=not is_eval, + return_segments=True, + use_noise_augment=False, + use_cache=config.use_cache, + verbose=verbose, + ) + sampler = DistributedSampler(dataset) if num_gpus > 1 else None + loader = DataLoader( + dataset, + batch_size=self.config.batch_size, + shuffle=num_gpus <= 1, + drop_last=False, + sampler=sampler, + num_workers=self.config.num_eval_loader_workers if is_eval else self.config.num_loader_workers, + pin_memory=False, + ) + return loader + + def on_epoch_start(self, trainer): # pylint: disable=unused-argument + noise_schedule = self.config["train_noise_schedule"] + betas = np.linspace(noise_schedule["min_val"], noise_schedule["max_val"], noise_schedule["num_steps"]) + self.compute_noise_level(betas) + + @staticmethod + def init_from_config(config: "WavegradConfig"): + return Wavegrad(config) diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py new file mode 100644 index 0000000..7f74ba3 --- /dev/null +++ b/TTS/vocoder/models/wavernn.py @@ -0,0 +1,646 @@ +import sys +import time +from dataclasses import dataclass, field +from typing import Dict, List, Tuple + +import numpy as np +import torch +import torch.nn.functional as F +from coqpit import Coqpit +from torch import nn +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler + +from TTS.tts.utils.visual import plot_spectrogram +from TTS.utils.audio import AudioProcessor +from TTS.utils.audio.numpy_transforms import mulaw_decode +from TTS.utils.io import load_fsspec +from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset +from TTS.vocoder.layers.losses import WaveRNNLoss +from TTS.vocoder.models.base_vocoder import BaseVocoder +from TTS.vocoder.utils.distribution import sample_from_discretized_mix_logistic, sample_from_gaussian + + +def stream(string, variables): + sys.stdout.write(f"\r{string}" % variables) + + +# pylint: disable=abstract-method +# relates https://github.com/pytorch/pytorch/issues/42305 +class ResBlock(nn.Module): + def __init__(self, dims): + super().__init__() + self.conv1 = nn.Conv1d(dims, dims, kernel_size=1, bias=False) + self.conv2 = nn.Conv1d(dims, dims, kernel_size=1, bias=False) + self.batch_norm1 = nn.BatchNorm1d(dims) + self.batch_norm2 = nn.BatchNorm1d(dims) + + def forward(self, x): + residual = x + x = self.conv1(x) + x = self.batch_norm1(x) + x = F.relu(x) + x = self.conv2(x) + x = self.batch_norm2(x) + return x + residual + + +class MelResNet(nn.Module): + def __init__(self, num_res_blocks, in_dims, compute_dims, res_out_dims, pad): + super().__init__() + k_size = pad * 2 + 1 + self.conv_in = nn.Conv1d(in_dims, compute_dims, kernel_size=k_size, bias=False) + self.batch_norm = nn.BatchNorm1d(compute_dims) + self.layers = nn.ModuleList() + for _ in range(num_res_blocks): + self.layers.append(ResBlock(compute_dims)) + self.conv_out = nn.Conv1d(compute_dims, res_out_dims, kernel_size=1) + + def forward(self, x): + x = self.conv_in(x) + x = self.batch_norm(x) + x = F.relu(x) + for f in self.layers: + x = f(x) + x = self.conv_out(x) + return x + + +class Stretch2d(nn.Module): + def __init__(self, x_scale, y_scale): + super().__init__() + self.x_scale = x_scale + self.y_scale = y_scale + + def forward(self, x): + b, c, h, w = x.size() + x = x.unsqueeze(-1).unsqueeze(3) + x = x.repeat(1, 1, 1, self.y_scale, 1, self.x_scale) + return x.view(b, c, h * self.y_scale, w * self.x_scale) + + +class UpsampleNetwork(nn.Module): + def __init__( + self, + feat_dims, + upsample_scales, + compute_dims, + num_res_blocks, + res_out_dims, + pad, + use_aux_net, + ): + super().__init__() + self.total_scale = np.cumproduct(upsample_scales)[-1] + self.indent = pad * self.total_scale + self.use_aux_net = use_aux_net + if use_aux_net: + self.resnet = MelResNet(num_res_blocks, feat_dims, compute_dims, res_out_dims, pad) + self.resnet_stretch = Stretch2d(self.total_scale, 1) + self.up_layers = nn.ModuleList() + for scale in upsample_scales: + k_size = (1, scale * 2 + 1) + padding = (0, scale) + stretch = Stretch2d(scale, 1) + conv = nn.Conv2d(1, 1, kernel_size=k_size, padding=padding, bias=False) + conv.weight.data.fill_(1.0 / k_size[1]) + self.up_layers.append(stretch) + self.up_layers.append(conv) + + def forward(self, m): + if self.use_aux_net: + aux = self.resnet(m).unsqueeze(1) + aux = self.resnet_stretch(aux) + aux = aux.squeeze(1) + aux = aux.transpose(1, 2) + else: + aux = None + m = m.unsqueeze(1) + for f in self.up_layers: + m = f(m) + m = m.squeeze(1)[:, :, self.indent : -self.indent] + return m.transpose(1, 2), aux + + +class Upsample(nn.Module): + def __init__(self, scale, pad, num_res_blocks, feat_dims, compute_dims, res_out_dims, use_aux_net): + super().__init__() + self.scale = scale + self.pad = pad + self.indent = pad * scale + self.use_aux_net = use_aux_net + self.resnet = MelResNet(num_res_blocks, feat_dims, compute_dims, res_out_dims, pad) + + def forward(self, m): + if self.use_aux_net: + aux = self.resnet(m) + aux = torch.nn.functional.interpolate(aux, scale_factor=self.scale, mode="linear", align_corners=True) + aux = aux.transpose(1, 2) + else: + aux = None + m = torch.nn.functional.interpolate(m, scale_factor=self.scale, mode="linear", align_corners=True) + m = m[:, :, self.indent : -self.indent] + m = m * 0.045 # empirically found + + return m.transpose(1, 2), aux + + +@dataclass +class WavernnArgs(Coqpit): + """🐸 WaveRNN model arguments. + + rnn_dims (int): + Number of hidden channels in RNN layers. Defaults to 512. + fc_dims (int): + Number of hidden channels in fully-conntected layers. Defaults to 512. + compute_dims (int): + Number of hidden channels in the feature ResNet. Defaults to 128. + res_out_dim (int): + Number of hidden channels in the feature ResNet output. Defaults to 128. + num_res_blocks (int): + Number of residual blocks in the ResNet. Defaults to 10. + use_aux_net (bool): + enable/disable the feature ResNet. Defaults to True. + use_upsample_net (bool): + enable/ disable the upsampling networl. If False, basic upsampling is used. Defaults to True. + upsample_factors (list): + Upsampling factors. The multiply of the values must match the `hop_length`. Defaults to ```[4, 8, 8]```. + mode (str): + Output mode of the WaveRNN vocoder. `mold` for Mixture of Logistic Distribution, `gauss` for a single + Gaussian Distribution and `bits` for quantized bits as the model's output. + mulaw (bool): + enable / disable the use of Mulaw quantization for training. Only applicable if `mode == 'bits'`. Defaults + to `True`. + pad (int): + Padding applied to the input feature frames against the convolution layers of the feature network. + Defaults to 2. + """ + + rnn_dims: int = 512 + fc_dims: int = 512 + compute_dims: int = 128 + res_out_dims: int = 128 + num_res_blocks: int = 10 + use_aux_net: bool = True + use_upsample_net: bool = True + upsample_factors: List[int] = field(default_factory=lambda: [4, 8, 8]) + mode: str = "mold" # mold [string], gauss [string], bits [int] + mulaw: bool = True # apply mulaw if mode is bits + pad: int = 2 + feat_dims: int = 80 + + +class Wavernn(BaseVocoder): + def __init__(self, config: Coqpit): + """🐸 WaveRNN model. + Original paper - https://arxiv.org/abs/1802.08435 + Official implementation - https://github.com/fatchord/WaveRNN + + Args: + config (Coqpit): [description] + + Raises: + RuntimeError: [description] + + Examples: + >>> from TTS.vocoder.configs import WavernnConfig + >>> config = WavernnConfig() + >>> model = Wavernn(config) + + Paper Abstract: + Sequential models achieve state-of-the-art results in audio, visual and textual domains with respect to + both estimating the data distribution and generating high-quality samples. Efficient sampling for this + class of models has however remained an elusive problem. With a focus on text-to-speech synthesis, we + describe a set of general techniques for reducing sampling time while maintaining high output quality. + We first describe a single-layer recurrent neural network, the WaveRNN, with a dual softmax layer that + matches the quality of the state-of-the-art WaveNet model. The compact form of the network makes it + possible to generate 24kHz 16-bit audio 4x faster than real time on a GPU. Second, we apply a weight + pruning technique to reduce the number of weights in the WaveRNN. We find that, for a constant number of + parameters, large sparse networks perform better than small dense networks and this relationship holds for + sparsity levels beyond 96%. The small number of weights in a Sparse WaveRNN makes it possible to sample + high-fidelity audio on a mobile CPU in real time. Finally, we propose a new generation scheme based on + subscaling that folds a long sequence into a batch of shorter sequences and allows one to generate multiple + samples at once. The Subscale WaveRNN produces 16 samples per step without loss of quality and offers an + orthogonal method for increasing sampling efficiency. + """ + super().__init__(config) + + if isinstance(self.args.mode, int): + self.n_classes = 2**self.args.mode + elif self.args.mode == "mold": + self.n_classes = 3 * 10 + elif self.args.mode == "gauss": + self.n_classes = 2 + else: + raise RuntimeError("Unknown model mode value - ", self.args.mode) + + self.ap = AudioProcessor(**config.audio.to_dict()) + self.aux_dims = self.args.res_out_dims // 4 + + if self.args.use_upsample_net: + assert ( + np.cumproduct(self.args.upsample_factors)[-1] == config.audio.hop_length + ), " [!] upsample scales needs to be equal to hop_length" + self.upsample = UpsampleNetwork( + self.args.feat_dims, + self.args.upsample_factors, + self.args.compute_dims, + self.args.num_res_blocks, + self.args.res_out_dims, + self.args.pad, + self.args.use_aux_net, + ) + else: + self.upsample = Upsample( + config.audio.hop_length, + self.args.pad, + self.args.num_res_blocks, + self.args.feat_dims, + self.args.compute_dims, + self.args.res_out_dims, + self.args.use_aux_net, + ) + if self.args.use_aux_net: + self.I = nn.Linear(self.args.feat_dims + self.aux_dims + 1, self.args.rnn_dims) + self.rnn1 = nn.GRU(self.args.rnn_dims, self.args.rnn_dims, batch_first=True) + self.rnn2 = nn.GRU(self.args.rnn_dims + self.aux_dims, self.args.rnn_dims, batch_first=True) + self.fc1 = nn.Linear(self.args.rnn_dims + self.aux_dims, self.args.fc_dims) + self.fc2 = nn.Linear(self.args.fc_dims + self.aux_dims, self.args.fc_dims) + self.fc3 = nn.Linear(self.args.fc_dims, self.n_classes) + else: + self.I = nn.Linear(self.args.feat_dims + 1, self.args.rnn_dims) + self.rnn1 = nn.GRU(self.args.rnn_dims, self.args.rnn_dims, batch_first=True) + self.rnn2 = nn.GRU(self.args.rnn_dims, self.args.rnn_dims, batch_first=True) + self.fc1 = nn.Linear(self.args.rnn_dims, self.args.fc_dims) + self.fc2 = nn.Linear(self.args.fc_dims, self.args.fc_dims) + self.fc3 = nn.Linear(self.args.fc_dims, self.n_classes) + + def forward(self, x, mels): + bsize = x.size(0) + h1 = torch.zeros(1, bsize, self.args.rnn_dims).to(x.device) + h2 = torch.zeros(1, bsize, self.args.rnn_dims).to(x.device) + mels, aux = self.upsample(mels) + + if self.args.use_aux_net: + aux_idx = [self.aux_dims * i for i in range(5)] + a1 = aux[:, :, aux_idx[0] : aux_idx[1]] + a2 = aux[:, :, aux_idx[1] : aux_idx[2]] + a3 = aux[:, :, aux_idx[2] : aux_idx[3]] + a4 = aux[:, :, aux_idx[3] : aux_idx[4]] + + x = ( + torch.cat([x.unsqueeze(-1), mels, a1], dim=2) + if self.args.use_aux_net + else torch.cat([x.unsqueeze(-1), mels], dim=2) + ) + x = self.I(x) + res = x + self.rnn1.flatten_parameters() + x, _ = self.rnn1(x, h1) + + x = x + res + res = x + x = torch.cat([x, a2], dim=2) if self.args.use_aux_net else x + self.rnn2.flatten_parameters() + x, _ = self.rnn2(x, h2) + + x = x + res + x = torch.cat([x, a3], dim=2) if self.args.use_aux_net else x + x = F.relu(self.fc1(x)) + + x = torch.cat([x, a4], dim=2) if self.args.use_aux_net else x + x = F.relu(self.fc2(x)) + return self.fc3(x) + + def inference(self, mels, batched=None, target=None, overlap=None): + self.eval() + output = [] + start = time.time() + rnn1 = self.get_gru_cell(self.rnn1) + rnn2 = self.get_gru_cell(self.rnn2) + + with torch.no_grad(): + if isinstance(mels, np.ndarray): + mels = torch.FloatTensor(mels).to(str(next(self.parameters()).device)) + + if mels.ndim == 2: + mels = mels.unsqueeze(0) + wave_len = (mels.size(-1) - 1) * self.config.audio.hop_length + + mels = self.pad_tensor(mels.transpose(1, 2), pad=self.args.pad, side="both") + mels, aux = self.upsample(mels.transpose(1, 2)) + + if batched: + mels = self.fold_with_overlap(mels, target, overlap) + if aux is not None: + aux = self.fold_with_overlap(aux, target, overlap) + + b_size, seq_len, _ = mels.size() + + h1 = torch.zeros(b_size, self.args.rnn_dims).type_as(mels) + h2 = torch.zeros(b_size, self.args.rnn_dims).type_as(mels) + x = torch.zeros(b_size, 1).type_as(mels) + + if self.args.use_aux_net: + d = self.aux_dims + aux_split = [aux[:, :, d * i : d * (i + 1)] for i in range(4)] + + for i in range(seq_len): + m_t = mels[:, i, :] + + if self.args.use_aux_net: + a1_t, a2_t, a3_t, a4_t = (a[:, i, :] for a in aux_split) + + x = torch.cat([x, m_t, a1_t], dim=1) if self.args.use_aux_net else torch.cat([x, m_t], dim=1) + x = self.I(x) + h1 = rnn1(x, h1) + + x = x + h1 + inp = torch.cat([x, a2_t], dim=1) if self.args.use_aux_net else x + h2 = rnn2(inp, h2) + + x = x + h2 + x = torch.cat([x, a3_t], dim=1) if self.args.use_aux_net else x + x = F.relu(self.fc1(x)) + + x = torch.cat([x, a4_t], dim=1) if self.args.use_aux_net else x + x = F.relu(self.fc2(x)) + + logits = self.fc3(x) + + if self.args.mode == "mold": + sample = sample_from_discretized_mix_logistic(logits.unsqueeze(0).transpose(1, 2)) + output.append(sample.view(-1)) + x = sample.transpose(0, 1).type_as(mels) + elif self.args.mode == "gauss": + sample = sample_from_gaussian(logits.unsqueeze(0).transpose(1, 2)) + output.append(sample.view(-1)) + x = sample.transpose(0, 1).type_as(mels) + elif isinstance(self.args.mode, int): + posterior = F.softmax(logits, dim=1) + distrib = torch.distributions.Categorical(posterior) + + sample = 2 * distrib.sample().float() / (self.n_classes - 1.0) - 1.0 + output.append(sample) + x = sample.unsqueeze(-1) + else: + raise RuntimeError("Unknown model mode value - ", self.args.mode) + + if i % 100 == 0: + self.gen_display(i, seq_len, b_size, start) + + output = torch.stack(output).transpose(0, 1) + output = output.cpu() + if batched: + output = output.numpy() + output = output.astype(np.float64) + + output = self.xfade_and_unfold(output, target, overlap) + else: + output = output[0] + + if self.args.mulaw and isinstance(self.args.mode, int): + output = mulaw_decode(wav=output, mulaw_qc=self.args.mode) + + # Fade-out at the end to avoid signal cutting out suddenly + fade_out = np.linspace(1, 0, 20 * self.config.audio.hop_length) + output = output[:wave_len] + + if wave_len > len(fade_out): + output[-20 * self.config.audio.hop_length :] *= fade_out + + self.train() + return output + + def gen_display(self, i, seq_len, b_size, start): + gen_rate = (i + 1) / (time.time() - start) * b_size / 1000 + realtime_ratio = gen_rate * 1000 / self.config.audio.sample_rate + stream( + "%i/%i -- batch_size: %i -- gen_rate: %.1f kHz -- x_realtime: %.1f ", + (i * b_size, seq_len * b_size, b_size, gen_rate, realtime_ratio), + ) + + def fold_with_overlap(self, x, target, overlap): + """Fold the tensor with overlap for quick batched inference. + Overlap will be used for crossfading in xfade_and_unfold() + Args: + x (tensor) : Upsampled conditioning features. + shape=(1, timesteps, features) + target (int) : Target timesteps for each index of batch + overlap (int) : Timesteps for both xfade and rnn warmup + Return: + (tensor) : shape=(num_folds, target + 2 * overlap, features) + Details: + x = [[h1, h2, ... hn]] + Where each h is a vector of conditioning features + Eg: target=2, overlap=1 with x.size(1)=10 + folded = [[h1, h2, h3, h4], + [h4, h5, h6, h7], + [h7, h8, h9, h10]] + """ + + _, total_len, features = x.size() + + # Calculate variables needed + num_folds = (total_len - overlap) // (target + overlap) + extended_len = num_folds * (overlap + target) + overlap + remaining = total_len - extended_len + + # Pad if some time steps poking out + if remaining != 0: + num_folds += 1 + padding = target + 2 * overlap - remaining + x = self.pad_tensor(x, padding, side="after") + + folded = torch.zeros(num_folds, target + 2 * overlap, features).to(x.device) + + # Get the values for the folded tensor + for i in range(num_folds): + start = i * (target + overlap) + end = start + target + 2 * overlap + folded[i] = x[:, start:end, :] + + return folded + + @staticmethod + def get_gru_cell(gru): + gru_cell = nn.GRUCell(gru.input_size, gru.hidden_size) + gru_cell.weight_hh.data = gru.weight_hh_l0.data + gru_cell.weight_ih.data = gru.weight_ih_l0.data + gru_cell.bias_hh.data = gru.bias_hh_l0.data + gru_cell.bias_ih.data = gru.bias_ih_l0.data + return gru_cell + + @staticmethod + def pad_tensor(x, pad, side="both"): + # NB - this is just a quick method i need right now + # i.e., it won't generalise to other shapes/dims + b, t, c = x.size() + total = t + 2 * pad if side == "both" else t + pad + padded = torch.zeros(b, total, c).to(x.device) + if side in ("before", "both"): + padded[:, pad : pad + t, :] = x + elif side == "after": + padded[:, :t, :] = x + return padded + + @staticmethod + def xfade_and_unfold(y, target, overlap): + """Applies a crossfade and unfolds into a 1d array. + Args: + y (ndarry) : Batched sequences of audio samples + shape=(num_folds, target + 2 * overlap) + dtype=np.float64 + overlap (int) : Timesteps for both xfade and rnn warmup + Return: + (ndarry) : audio samples in a 1d array + shape=(total_len) + dtype=np.float64 + Details: + y = [[seq1], + [seq2], + [seq3]] + Apply a gain envelope at both ends of the sequences + y = [[seq1_in, seq1_target, seq1_out], + [seq2_in, seq2_target, seq2_out], + [seq3_in, seq3_target, seq3_out]] + Stagger and add up the groups of samples: + [seq1_in, seq1_target, (seq1_out + seq2_in), seq2_target, ...] + """ + + num_folds, length = y.shape + target = length - 2 * overlap + total_len = num_folds * (target + overlap) + overlap + + # Need some silence for the rnn warmup + silence_len = overlap // 2 + fade_len = overlap - silence_len + silence = np.zeros((silence_len), dtype=np.float64) + + # Equal power crossfade + t = np.linspace(-1, 1, fade_len, dtype=np.float64) + fade_in = np.sqrt(0.5 * (1 + t)) + fade_out = np.sqrt(0.5 * (1 - t)) + + # Concat the silence to the fades + fade_in = np.concatenate([silence, fade_in]) + fade_out = np.concatenate([fade_out, silence]) + + # Apply the gain to the overlap samples + y[:, :overlap] *= fade_in + y[:, -overlap:] *= fade_out + + unfolded = np.zeros((total_len), dtype=np.float64) + + # Loop to add up all the samples + for i in range(num_folds): + start = i * (target + overlap) + end = start + target + 2 * overlap + unfolded[start:end] += y[i] + + return unfolded + + def load_checkpoint( + self, config, checkpoint_path, eval=False, cache=False + ): # pylint: disable=unused-argument, redefined-builtin + state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) + self.load_state_dict(state["model"]) + if eval: + self.eval() + assert not self.training + + def train_step(self, batch: Dict, criterion: Dict) -> Tuple[Dict, Dict]: + mels = batch["input"] + waveform = batch["waveform"] + waveform_coarse = batch["waveform_coarse"] + + y_hat = self.forward(waveform, mels) + if isinstance(self.args.mode, int): + y_hat = y_hat.transpose(1, 2).unsqueeze(-1) + else: + waveform_coarse = waveform_coarse.float() + waveform_coarse = waveform_coarse.unsqueeze(-1) + # compute losses + loss_dict = criterion(y_hat, waveform_coarse) + return {"model_output": y_hat}, loss_dict + + def eval_step(self, batch: Dict, criterion: Dict) -> Tuple[Dict, Dict]: + return self.train_step(batch, criterion) + + @torch.no_grad() + def test( + self, assets: Dict, test_loader: "DataLoader", output: Dict # pylint: disable=unused-argument + ) -> Tuple[Dict, Dict]: + ap = self.ap + figures = {} + audios = {} + samples = test_loader.dataset.load_test_samples(1) + for idx, sample in enumerate(samples): + x = torch.FloatTensor(sample[0]) + x = x.to(next(self.parameters()).device) + y_hat = self.inference(x, self.config.batched, self.config.target_samples, self.config.overlap_samples) + x_hat = ap.melspectrogram(y_hat) + figures.update( + { + f"test_{idx}/ground_truth": plot_spectrogram(x.T), + f"test_{idx}/prediction": plot_spectrogram(x_hat.T), + } + ) + audios.update({f"test_{idx}/audio": y_hat}) + # audios.update({f"real_{idx}/audio": y_hat}) + return figures, audios + + def test_log( + self, outputs: Dict, logger: "Logger", assets: Dict, steps: int # pylint: disable=unused-argument + ) -> Tuple[Dict, np.ndarray]: + figures, audios = outputs + logger.eval_figures(steps, figures) + logger.eval_audios(steps, audios, self.ap.sample_rate) + + @staticmethod + def format_batch(batch: Dict) -> Dict: + waveform = batch[0] + mels = batch[1] + waveform_coarse = batch[2] + return {"input": mels, "waveform": waveform, "waveform_coarse": waveform_coarse} + + def get_data_loader( # pylint: disable=no-self-use + self, + config: Coqpit, + assets: Dict, + is_eval: True, + samples: List, + verbose: bool, + num_gpus: int, + ): + ap = self.ap + dataset = WaveRNNDataset( + ap=ap, + items=samples, + seq_len=config.seq_len, + hop_len=ap.hop_length, + pad=config.model_args.pad, + mode=config.model_args.mode, + mulaw=config.model_args.mulaw, + is_training=not is_eval, + verbose=verbose, + ) + sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None + loader = DataLoader( + dataset, + batch_size=1 if is_eval else config.batch_size, + shuffle=num_gpus == 0, + collate_fn=dataset.collate, + sampler=sampler, + num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers, + pin_memory=True, + ) + return loader + + def get_criterion(self): + # define train functions + return WaveRNNLoss(self.args.mode) + + @staticmethod + def init_from_config(config: "WavernnConfig"): + return Wavernn(config) diff --git a/TTS/vocoder/pqmf_output.wav b/TTS/vocoder/pqmf_output.wav new file mode 100644 index 0000000000000000000000000000000000000000..8a77747b00198a4adfd6c398998517df5b4bdb8d GIT binary patch literal 83812 zcmXVX1(*~^*LB;t&+N?3x*H2D?ywNtB}j0$0KqLlg1fuBdvHi_ch_Zo*2det`|tOA z{;j8{Yo~jvZha`i_ zF9ATMBm~d^dcXph05jkM>_7w%4HnYA%CjMWl|!;uE>tYc zm%l5PevscO>81Qu$-R}|%U_kZs8H_lDdnItBa4dtw*RxhtXQE}EKOEytEn7WK#F2- zhLTGFVwDuBB)f8t8Gx0aO8&e2Q~4}Vu2w1canDORzeI?xDc3giOqfnR_wKqn>F zK{=WL4S^a;N>!{&RqTpTYEM*JFQ~kGh4PFa@*DY%d`aFTZIJuwPN^T-& z${BL9oFu2nv2ucv&yjm8*Z0T|m9|BI#=vagIba0`fjhz1pbn}7^@G+yH=*ay1LzdA z8X5&PgLKfp;22O3o&Y)n0eQXLLiR~pr8bfzJ`?wf^Ti=z2eGD@CdP@7SSq{`$^@s_ zPV6T(5o1M8FpJ~F0Nd8^c$UCJp z>5^itOR-)Q^kQRithiNtCEBFX(sQY$d|R#y`~#$bQ@~4LDd>cHL93wK&^zcpv=Qn6 zQQ&289@rApfwzIdKuDe^$H4kpm<2olL?9XL1darkfJc-kyaHc}CJ{$W!_RDEC^uorlt{PGa+t1`B}DOZz$)<9k0 znLI%00|vQ~JYJq5uarl~_2q2YrW~!65*I7q=E(1)SyD4eE14Dhvn5zMBihA&f?b#) zBuWS6KY_MDd)XpClgEMU;WH{iou-+uo~UZ4l2l_gvi5h~AKLlqBgi;d3(L?Xhyyub ztSpN+1dT9nfe{-j}RoqwZ3s=E~xitPO4~Yk*1xio$$Q_gs`w7Sc>%*PkJZKO! z3?zUoaGBCOyMx()Q_hkIp}w$;f6Omb$|+<2We%`r<~!4bxkHbmtaO6-R`$ayfaXG$ zv{3GgBK^hOxv&09ovxjz59{mebF>SPFjxx|fL)*( zbV&=iRqPoWqIBdC{4UW0KZw6X*M&!6d0{(xDbSbA01l~!s*9<+bPsu_`iymh3v)-r z_KWJqP_z#gi}z68`7cT! z7ZPm=m|VzQmixgas@tk}%2;&5Tj9a#0s4CSt~#^&I@lF_DA!ZG%XmpEOknr0ZD|jE ziMWR`co2I;_^}6Aw?IAaHP{XQsXk2b-1apl5I52t zYe`WLmLdQ*G9EswG9!=WgIo#LG_)4GPJN-eQnQGG#Ab zyf9Wc%=ZV~>Qm}>$Yij$($mg}r^Rs~3}2S>C7*DZ(lM9Fmdr91VEToYdaE;$;xhep z^-|o8|3wya-L%bZJ)^FsSYtoxYr{*x-DXvSGp#uJa@00cq2e>XLLXIq4c+w*wVlCL z0jz2Rh(QV z&JgZOIIt3crAq!V)s?7$cSbLg!>CTgqd@Bb6gYrum>c2g=o{GpH_*6M&8aMPyknJ9 z8yAdS9*?K~5?^IM9C0vmcC0^TTimROd&Zf_Ik_uzUV6?#yq)bw^$4d{6qjaI?eh-s z=KDVSzM^}nwwMZQNp@s%pvmfhx|?yLse$>6X}msGU)|i=l4l-o2&uY3O@Z;kD0Uzn zO-&;^69~G||I`aqkySf<&#O`@-dEN0^s36P{OX_ScS($48Zg4R2s=l!_CArnRXdu~ zJ3Fi9)zoc{HTH(_J=1&ColxUo>}7kT=1=jf2!cJOQXo>SLu3T{mOd|7TY9-{VVR*~ zM5Qx0o#4>tf&4%RY#7&BwMTczlx|Hi_cD}f2B@~`E3D0J7tH(Ax!^YGJD*QqrMi>% zDGzZDvj?|&7kGzO=2xmJb3JX!PkD-}%z?WVU6HUbAF6_zRvDlR)=SZyYi!TnoINL} zL&_|t9DO4hs=21o-5Ns@lAOQkV<4Nl1kP0Y$QiyAe;i2hj4nP{_Ptd0+^#69AcA$# zSpNubJKwT!p0F1_jWpIgQN^m>A*rAmUaxIsoM>pIE|hX5WiA!GY<=oBHIgVHmWFEu z=T|MQTI%UsR#2Ac{k19!4MDS+*6>kcSKzW{oB3m0VdkccHaQ=wUyM8BS{PrI-k|Q} zdbt_S1U%AXyri0;W}r`SEIgDOfi?ACD?L{Fyxdg+c|LmdWj%uh!A1T*0&$_0cp^VV zx&unu>-y=2`Pvc6Jh@qYS9475M7n^#gO%c5W;jlvOVHLtukftEjHZ!GB&Zz6Fr%7F5PjeNgq*wPe>Rs2AaU)5N*r7S2 z9*n5r-s($04xNgw@upS|_hx!J6+SA8DT@qkC70lB!s_sgP%7?W&P&_1HOTUiUVwxKO4q+F#-rn{aw|?$&M)p?eztONg&CT`*~Hu8+2BpChN-sga>kgPlp0qXe8}*| z0x7>|#MWBV6sUJSD?J^Ks%_JoVzpW|jtrGDsZ2~0i1SXWI$HWN|8C)hqOTRTL&L*8 zf}=t=1E)iSh#pi^xs!U8Y96c=k1(r*58^$+%P-=tunBxRRRg^gTpWz>&GX@&MWy>o zHbR8={~CFHXtcf`*yvI24b~>XkA>JwCmY?gZB+fau(OnC60F8 zGH2^MXvQONz_~0#O~<~6&A~|(a>2g5wEWhdpV$K+?XoebUJjh&@32!9Z=~X&jHSA;gXlP}0k@i&$osiY>|A;h zwVBz-MX_D!-)WQ@O{(!bm_%3S&Tu`MJ4{PHM<|u%K?Ai#hJmJUMz{5o?YP6~+~}1Sx}&No%mQ_!9YP204Z~1_$?aro{42U3ToxD`JP}+J810`|RaFj` z**r|izS1hs`!b}WN%>!8+seFU?(&o6@5+Z%4)F;=7j}jW(Z{*2;#%1Tty2GOIA>k! z>Jw#)y&OL_se4LNdcX7s8Rs%1v%Y6sO)p73lDs16ZCv~4Qs-t{j(M5!srH*HAHENs zl81-_yNuRTsbnUx4h@DL2f<)UK<97ZbyfAR+*w|;ETWVweo#`w^TRW+EZ}+N340!S z#(QpfUX`7$nCCs|UlY2GRgl-1{k%~Mfki4=KgHsRsOjz>lNX^bLo>`C_&cy5K9=7Ox6*^9HkSC6iKAbVZbwDjgF_2ZG~ zJx;{_+PcE*(eKslg8u|K@g(1c?LyBZlZfV+Ba|HQ`nveW`mR*jD+YN=i}|AeiqOKJ zg})b#EnZL(EXgYUQJPq0D1Tgb((~2>S2%p*LlbZpJ&QTdH5Qu5pCGk%vMJx*I_gV| zFK$CZ&7|F_Ovcfy`_Q-BoF)r=bl+N+3qqn)d_PW-crhxvA?v~~UoG14d z8i*2D6uz5ndoCn3J(sjwE77lk*dhDrqGI3_|=`>5`;jFIN1GAT9 zcggyknVMEB<#OWn*pX3 zwJKMY{a!k&_&{M%LH)vJg&9R;QA*Loq8ml~ilvfpX}Kq*yhp{Ls@MLRp>w!I6>;~) zC%}C~tp!Yvt+iZ-qS@H~3Dc6!r6gxO%B;+ivcT-*Y6ml$WXww)n=&FHFS^j>b{w(X zGqpF|(q?LsRehk7zyz_9oxrT7G}Joc2)ZWpFtE`FS9K_VUV5UqRnf17aKV6l{O6kd zkp-RtO;P`%1I48!inlC_E03%k=Bpo^g+v<{lA%Fw4--bJ8MDY5I~(-I4k z?xp5ubg8DQ{wBLt_RP$YnPbzorj#e1jZKa&cK%^sZuOWC8_sF7)LY;;K)Elu;S`f&unLi_cLBZvMUkgtZo+;9ms7s%h&hq>y z+fu3bCx#yo&*^>qO|d4RL*{7IrmeOik$=aGjW;FvlaHq6WX#DtozWA?-> zAfWqcHhU!_T}$N*A^p1l?9J~_Wt=KZ%y9)yyia> z^Vx#cMSF_pmcH_2m31wDT=B>YhY)-XJ%Xz*4F;YgJ+#-2qir7}x<|#u4v2r8I4HR^ z^>W6A%>1kgS!G#ZW^Bfg)Q!n?6T>lQ-RqsF9c}H=)=?%ze^dPrycsB$yNZdzJFX}D znO;sj2$uvjevP+IrMrB4DP8=r=xRZ1{=*;XKe!)P^Q`$l3)U80E3RJpzI3kVL7A^2 z-uG*0H8zFr&V_}bJQ3ce`KiBVNpk$@?iBkZ{&-?`k~(=<+P(}vvqRRQtXf&QneOy0 zNmCP;nD6c$u4j%8_Pe$R=7omK+W%D5pk+X%bVckY=5v$TFx3cu5t<%I^0~d?%IoC= zJ&jA)!ifb`-i{y7@^1fJpPyRrXW{B1Yf0l$b6MRAv2wHTOi)DIlY80sJS;~-%T()i z=|;i=Ip(-FyN^Zhi2W_TMIxP4p6p41QV*rpNxhabDMd_V<2Od9Mi|nsjtKBzyf)%R4V={Y~i=D&nN>vD*P;<_qX=CE2oz?FKb^46*Ed(t^RraXHov~ z!WqS9O42>{vZdvCWwG~w-xK_d{z(eVSH7<_3@m_`s88wM7-B6uY%3#5T}L7(yLUz9 z##rM%#{Cn&BVkWs`$Sv9!MIB?2cus`c6W}oH?`_4P0V#o9SvdaQuQ0;06Y(B25tiw z`JQx7nJe$pxfF)KM^}d|fkN+^%B=ESPw!Gy$)n=W#kY%_mb@v!N&{uBEAlGuc~|)p zgJ(k<(Urso8sP2-(Q*%HG4fhHTRTc0HZC;pw7#*eapX8X&aJKyk+a?1qpn6Rh#nH1 z8%?>ly52>sw5QoVS=O63nnoCZ=mXmRn%e6Asyw6yl8#iterOJuBje&Rp*uf`)zEK= zZdf4HBrw^xxN2bKzZE$Zx5`hHC6^U>T9u70e^7D1YQ9ew_#9jmlEY^(gltPUV_OUD z7;R^ z!D2A#$7!2szNp-4zbamxrgo}7s9LD9RSV#w&?Im^kS>RXF_F}5^aGU|*;dapJ?dtU=<)moLdtu|eIN7GHy zRC7*qNZVUCTc_1!Xx*9$)j|Y>dx1UW3qmXIAl;JOjE~07pqtRvs1VLTbJ6o?3HmEG z1D{M@q^{E`Ohe`f)0zF7ozA{v`Y@}RAd|=0;o5!W{5BU+oFp-yz$-e;wz(H^V zoT0j`j?rf5Abn@O#W38EWwe^irk=Nd7~$t3(^xG-4dkMNaOt*V+=wZn_}9{XzhSNrY3;$W}v z_3(BDC0Ijw=o)M~S1P=degeZ4j4%Q|0*_FQP_0(C&^*%i)t%Jc(GJnp(RR@3^&Wk7 zL!llpV}QbLp;VXKITzV>HXveN_n3 z8k!Hhl!C%G{vn&nOrX@{Vf;QiD7+`wDL6P-JM=o-AM1hd!p*n?AB+zqZje68#Jp25 z5k0qoOXNWzB>2TOl0(6nlK}#F0$ze@!v>@Z*{ABRzOA08nW#1C=ICbV*61$kg1R>P zUiu+=QrA>#lF9)ZbJcrd_S=p!Bz} zCZO~@PGe9GSp#YR(;U`}*61`F)OS>+2o8H72`m8a%KfExLN5P;UB*t}!GG`vx`DGUnffY6Gs&UI%OF+b?1^dEGlGchj!YqERf&m}nZTutd zJ6Fy<aPkQhmhsSD5O%UuK0AD51Z0b#xHoxjKZ%hqH2v2)mD_HQPUSZ$M!1yBe0Jv<*7q^hOfrrx7ot-h|Vr#Y@UrD>_Bb=L$<%_ryt`qO! zdkI5@;lethSjZ7uiJ2lI0t!bGFQ$m&MNqmUbx>G`f8_{;Y3K&>;0owBxIZ#gHB#Ym zqBMOpD>Ww-YmRFQHC?rbwR5%4HIQbGy0KcN-mS8$79$p9HT)8)1T(0ZxfHI2(m(HT_4va| z?`$XB5L$>2#2AIW$d~#mY~VZ~3H$+`h5EtYU>g#TG)DeFJ|f*zcU6(<{_45vp=wsO zLN!RWRP{pTS3OhO^aIirc?kD{Dd;iuUTOIO&lJgy;WGZQYap73D<^ez&+qwa0W6MnUAbPb|Slx z1IRpN8uBO78A(J6;6GsD)Xe zM)=M~b5}S*!8fjPYJMU=UN|aD7XA@hi*2QA5}@!%ZRBYR4>=MT4_pCsz)%p<4KZYE z)euNB^_N@17maDDX>)mp>@r9jsdW|W5pDRXxBO{RUeh_u2j&)YnE9QltBjBN+-Ie= z_wz|Yp2Fs-q#Q{nT^FlKh0;7A73u`vhR?&pU=LY<)hYREA-^qX~~byj_) z{ukqXqut=t{m?wrI`rlG-g=K#!GINe1}pVB4W0!X;pSjX+8A5uRoZ+N4gUq@LaPyv zYO?ALq>(mr&zSK{nA;~U16lxM#CY~Qb(jnh>C{jfr?!!8@MCBrTp%=5H>w{g;;XTT z=qzjsv7CBOnaNaq6?z(NLS5jlNY7-GxRK8gmP(_Qn7_i*&T+^&*)hR3)cVqjI!M=B z_sFQ+$WD%st)G3Z{jjZ#<&NQ}wzmpbyly0f!po35U^j6pQ-LE`3Sne=a{IZlOd8dM z7=zsid&4}o2e;un&{pC3=npKNh#(7y$@oEZdU!J05iiA`U|rGK;cfU4ri1WG;TPYE zbws~FN-@Y|!`+B2G0WqQMR$nY;5Z#|(%n6-dD4wUU3^;1h^Vy4a}kGazgS{SQ+4f- zD23%KRJ>n%31ePhaKPmq;VTW*$3NiBFinW^ZL9iFF`)9AFFlkQUJ}X*CI#DvdWHW` zu)eqG9P|}x#5!Zu(C*=*sKi`@dMRt3Y?u^Ri^ssxTHJOq_EhTh%r7Ysu`67lvoQKc zvbI_{+nCuoabJ|mwZeYWQq9;}|5jU}Y7Aoh64paC!#f53DgRmWy2M$2)w@3!AJ|@b zp>%fv)|XW%#Szp$m44qrz!k@Gcoj4O=$ zR5~yh_UJm=d&QeF^femf{FlByen(W@m`7=HPM>-!YiDF#i9hb{5rNp!E&WaJ4ZXBY zp(U&in-*y0EiAuKa;mV0+ zVrDz%7&Cxs#DqYO|A)VBI1~UYca;_v!uid9H6wc%S)xRYSfY(^EGv1%E-ii zG3kyckwX*qW=yKX*BX`CC276e8nMIOEc#^h*T@5wN!rm+SD`IEh1eQi8`|XGS6RO- zx^z;>mBI%F(|&>lGfOX3boS;~t**FKeywV%zbCqd7)&0e`_msOl>AJ!p!@Jgg<|$T zm5NWMd+{+UT6S17Gd+cyCf~lIFsp3i%;Aa6Y0EX-|C2ppq*VDZ_O`MmxNs;j?KiJ=;yFC z==V)4`&gLv^Us1Yo)6_j<-v+w6;LHpneY8Obf0)iXEE{Q2cnQV!!43l%aqhgTqgAA zU$ZONHS`qrskjuJrGqsi|ge$|Cx7Nh0~Ml$*Tx>Klz8Fn~6SD3H~&Qh8_?I|6JN9{NS#0 zOPO6vcP2nzrz?1a@Sq_p9~JhGgyPL zgB?KrBo2{J$Uo>-ObVMx_h%pRL0v8FVDS%{$5K^W9ev!B?Y*4~BI`JZMD()!9ecUj zhML#XH^s{ChtV$+y2W&i$%#E2B}TkA?N|F0U5Za=ChZcx(jnC0uU_u+tS)&_`oMcF z@XR~P)3ESWVbAi%-ob&bVF2rZx`I=y#`w<>yZHnG;RcZ-h$Yk@YAw}_?aWnUesU-H zQr$_-7;y=M;j-$hBiik-k5`bWzay?hoV29GHLTX5`mbq&VrzP&7=yq!-U_*5Ax!3)@eL*?&$8-mNj^TNi^ zSl`&njlS_@Jt0|K%JTS1Vk~)&>dn}=mf{@tSN1WTsJ*SbB7f%9_(XPO5(M%y3Qdg>AT(1d%bw-W0|H;Ighycbj9>f;(7wbi}X)!bH~HzTZqSr&pb zfxpDNR4S$q?exB_{I~qDKRGXQS6T3HNL~BorU}ROPrd_eQDNzY;T+ud%{X*nQ$6iOQ`($Jbm)mke za|ndMUUCy~9ng#)#>~LzKrP>i%Ae&c`~%T5p()eZ>Y*JvY?;GY9t6+`<|l{6P99&Ih_d$x;g0h z1l_Ob1ZMjxebs{}(S80O<(rFkmQ43opc%Lp{Wq+|enIa9ZiY59qrjVRb1{#$a)Tuh zc%>jQmB3l3yKJXudY9!l3D47(Mf)uu&3ha#-FsZaqIA*sqFcL~ z>piM%NP=pKsxR^hh!vGZN@!znk?*KKI(Re8pxga96bJ7h+39rV1uhNIUl%NKgj%DKhU_r zkg6(#`Wo*>wN5Kb>gp=DoHws^wTZkNH7d47bZO*e(^b_;_!<(a`CGFR86kCJj-d_1 zw%~-IC%gr1LY9X1Ry{BOSy3%a5M9X_YNirHUdK;{?_sYw63B!YA%_{tEaol<7(bf7 zB<&Hq@xAfR;SR(h`JJ{o5X*V}c%XrF3tne{%xm2AI%3GBe>WmE{JXCx7CRK#j4@1}u_LJ06 zcJeN|fd0zD9LyCn{S*``hrNh>2+bsZ0K2vKfr0$M;Gxj(QY+-TAz*24o@mV1R)=P( z+Bh2}wofgHO}5`Lx3e-4S*}LWdt<}yE%u z(Id{qj7)WDyF5YhK~eO0HeS(okKq~q6MvYSNxnm)h!uQ+)}>l5&cN6B*02wdAG$_X zpDEK&SLcD2f^|$0F|(5J_y&$7V{_wwmg|mxT$A=&B+{87q}8Qgl{E(rC%`D6xGap{Cs#e3NbUqkd)4!p&}_4)kwil zy0A<6g~B_ELI+^uxDjd}d_~-j_6hA4LDhZ3ICD*d-I$}(sD7%ZTK|RVXzf$YOQ@+lh8szd#1_iA-2!(D9iT z)LJC0;Z0Xe)wDl!xvC`KSAD(6dhv_n?>iP5svB}GCnKu6PDbu`<=KBR88ijRaYV2A zsr#<_1)L|9&~{oNpWwI1+Dtz-fy_iLf!_X?*zeRy`XX(iG=vd*jQ)!rq}~We#JXHh zax8I{JgTftXR)u?>ztD=P`G*3TN zqAhcv#7|;NNF&*WnoJy3*r6j>2>(fM6Z(iBS&A$rAJL74CHy7s8T*bE>A841VPpSL zT}7Odj+pLc>3@*Ex&zjK^b6Gk)i@|a7fe^9_9V`ZpW*0fxUZXPy%*Umwj_3R)MH16 zak_fHYQ4I>p_QpbJq(QC?MnS8kk^PCluRF`75#DWi1(>48ubw}g_EE0b!g9UM))MQ zjIJrxm#Wz5#1ZTZaf*Gf>>#jn8`weAQEUnp%N|0?p^M@`d_y3a@2DD~uVLAvzpSZL z$EYC9?>0K-zk~y^n;ntHF2=czgVAH+_rzwp&RSaP&nS#gJ#7ucK7A?@EkO)UoTPNh zIC@RKCIfiyaQ8q{Utxg6{v!L3&+&6;D%uSlht(q+u|E~0eLwvI*W%NuLELM;mB6w& ztbC<0$Z0p{>NctqHG9?b^*?npkzLX=@*Qp=SCI?Jy+kI35+!K2KppR$fEQa!wWVGw zsMU6KC0Z1IgYRSK3I?HoYJ(nY$p{AGSn0Bnz4%?;9 zlo$OCkEdQ!$s|r}#?bJe!PegQ{xCY7s>Up(X5eqp*=SGn8Znv8l?KY4xDHBOdX5OP zhyaT5LTC0OQ6Kw)HsDR_cW|NHhFB2n#?^+0>WA4Tn_KCfng;MbbvwuBxNV7^*qRY< zOlwTP*<#$!O-nh&2`Ng?Rbp^A*CtQSbQRWjwb2f2!wo${e>x_0N=d8 zDr^<4W=;`0EE3hCbJ0wE6J02*7OFADSPHrgzets_Ab(InQHJBGn4S9sZ--mUt*PEv z56Pq%X~?s6vh+3X*R?>Z>yll6C$>mQjs45M$@Iu{-X0b8TU=(`pUxb^2sjE-tJ`S0 zYpbhU0#o=C#Defy)JXQHN(e3SXLwo285-a>hT7w=$uYE-{D?osop_-#x87slixmn^ zB2!(-PgG|%lShSJ3coXw44~cdhx}2M4%SH&dOL(kr!~(k)g0B#mBzX{oobY>yX%;u zWa%7t!8%Z%XEfWR-QS|WyG{1p+JQi^G!E3mHB@JTQ(SL)L-#Xh&|t ztK*4ifM~&P=hsTr#8!M~?kV+%oWllOImj$F>mdl=^(gHDcdWd%~0a;{v;x zb1)+h;xYsM{c7P~mD7@En`drks;A3U57A$7k4tl8MkSKA>dG4Skz-Ai5L4hjVLPLp zDAU4WWhLAeJ|h3hHX(xBGWQ)id)Bx%sW0Z!;SHZ=~ZlUqgR=%Oa z3+|WRD?4-6^9+e%Bk_miF!DI{iR?-?Ky|*izQWJ|@tR!3y}-n(@x*RIo>aaB@VM6-J)KgK_%p(l{npyk1sNZAiI_sElupif+w}~e3C{!trWNTv1z*heN z{2BFui{N+BtLV$*9Ws%F#JNCYXbx~+ZYLqaS}uuuC!_$)fFIS zOr8$E^bhpa!A;7Zs!RNZ@XK%?zBe2)4YVz`owl_ygf+_y-(7c-Yi2x8bh*k*^Yuov z#v!_gMh>!J>NyIsQN?=*{$I#mBUf+k&#xSlX&Ei(Q3o{1(1K_*?RF1L;rL1iT-y zljuhDMIWM3p%tNfVHa~pn9Q}OhlJLX2-sgY&auw^(0b0)LC5JkItmk_(%Yu7(LXGa z#&gCM_Bi(q_aBbUx{-1l1-mNXkIM&uVca6(NnmVXKI$fUCWrn&eWY4YY3xFF2)|u4 zNIIF8&PkiamwZR=D?6J1qQu6jir<-qA3-}|`FH?tPD~7s2~7*ur6AtTePkw~@k~=_ zv382}jAguakad;uxxr(bAKxQ=LAoxc*g_cAnkG4NBlFzT98>g(@;%C}a6LtU7udjM z5a)dV`38lnkv?`6Q=QsFs>y*gs%RzODSH8rNEa1kr-d|K;)Np|%kjKhpx8%bKjH`e z1Gf;2;tRWD9kDv0bfSo_&&?Cs;~d!(+Mw=krK~C|Y#(Jp_3bV7qfIFj(vHWaICh#o z8T%^huiKGlBFc?numSC$W7w@?AE1(dNio4!eqCrDPSQP?xy)KxL!YF(F@yP<(l@!i zTtfoHHNqz0Dc3_$!Z-zsKr*+;iNq+}KpY}eWPP#^-WVGaK0@bnEx8YD4XhV830a{X zWNl*R%~jTqhGT~Pwk5IF6npYwceJ&mxwpC4x;>)I72&97NR&akoVmeyr6F=0H=qL0(N=~}Eyh?4$N*uZbXzrqdCuizKW*&zE(Q6|JO4k8JErmVHP z61$1bM0a98`V!s3vg``s7Tq7ukiNs!3_eTHY_toOA%?An_y{6?bjq`Yey%Cj3+DIc zBi0el>8=yD%i6KxcJ>n2Q1~g$5?6EM$?D;jp_Awhaz2yCoM(Pvf^Hslua&*x8(&ZA!HvF5wuq79WFeB)p39JY;g%KiQk?Bi6+|=KG0B(swaN+$UV&Z?Pyj+Q?x zfOW0?Uc?7$ZS5`T4}OPuPr5C)62qK|UWsL6xx@ixJ@*IOj2X+UVY_hwZlZ#Vz7<#= z5GW-+sL$=A_fVr~MPI@I)P7}`8Hj(w|HA!56!}SW;274JUC%#2oAJ+tXEnA{xv-v;UV&{E_*?17zFLsu%NsA@Bv_f1e45min z)$mPJZEiEWmz~Rv=Hj`3nRvD-j|-=TDf}6(8P}JMq)`Pmw$s<><)oWP!FOPh!~vzZ zY$QGt+wiZ{9&Rv;^CKvpiG^ywse0Z(853>&jec#iaaF{KnC5X$+`S?`IAW|_%(+(H zVRCh{PSzY%v?6KXQMpW-CT)^#a1W^6R2p-DYs)U8pEC`)<9r!+idB^V>@fDRqAi=k zZD*I#lrn-v!b(md3i0juPGv0b!RHgVi9g7b_}|1>Hbc<`Yq>rQB@YB1A~$po^aa*7 zmUenn-`i?(HH|*xUhRD0%4!CYHzkFb+(&UR%4=BT3E7|O!R+jgKQlTpNY zqB#kZMq&`2h96h<(isSdyh|=oX7z1kHocAYG55Gawkh{ho(#C*-!*46O$`V1N3>&% z?X0AIs&jV4g@`WB65B27QrklNUydCXk72pqpa26_n@L!!hZ;tJ6V z&%;__xoAFG153is6C=q)N=v__K>8m#fw{}pKYk7 z8b_HO*8gm^9ql9fM%0Pu>{w{OWE*A$tdGoJjDz$wG$qJu_y<%4ZUW~4o8$;dFP!8D z@X<s+4ybC5#s9Nblt?U}JcwvLpDGZiGQFPBnkCbg}hx+>LnYRJ-ark3<}Ew6LSr z-Iku_O~yX@kDA-6Zpd}$Kk$~K`f|uO#Wuo8u0A`9A?a827Wy(ZmGtBHFj9#Ma?ms3 z1L3vdE#b}K6XEt~Kdc6EnY==6pqn!JOkZ{)`-E-DJ>}Z+U-(L)m*iA*YFe-mnyVr; zCjD7Mtf`&3$RgTmMr?2zT@#%bBibm;-7I@6+wYberV2xuuD*7k`Ww<1X2E}y{qAac zt0?hPxq9qMrZw{)okiEDekh|i5`T~N!0w|R&~_+-O~d}ha4Zs!BkW{z>KpxxS*$Rc z!xdfMP4=RqTRP2dP#B`1@Kn4Zy_H9T{o#wMY^_HB+HlTv-cn(^=-3wVi*v1Wp0lO1 zBI1Q(wtbAXw)woVp5c+gaMe|p!W`HNXd?Ggb{1XdGZb{;9NmiggFHeEA~@WFPsARe zm(Vg)h2>)ZV3QQTa+YXF&LNvoQ|W6=4P_1SiF>c8lIC(W_m11G?7+Uq{iE;_toT?# zau0wj;j1c2W7E5h4@_#y9qTjuBggiLy%F^zMnp_-sO(nTUP~MEcO$3&Q3qROZTbRG5z zm(TYX>L}}o4vJQExHw2G6cU8d{A|9Z;8FG#jaJldPr&Z*4%H*gWnHA9w{f27hoYgL zX^XU%+qT;_*+AQP>mo}(b3@Z!L)`x(=_uj|xc0bryE%{3vju%SW&Pj$i9tTNh|lEn`GvxtVx+WI+K1l%^U!#hDJdmZ$`;#6 ze@lOXtu+B$nQycuGlV_N<;&s~C6%$NSa3qCYL06nw1>1iv=g+YwMR8aG*KG0`lYhE zLNC9^wPlyl2|vLHB|A1y2q_eyc#$#0gnOD#wRu-U%T8R*dOfB zXWSu6>0!E=&Z3K`hhCxcXg|7^=0jWI64+}U$X)V*ynwn!KXREoBi*1gupV}IhKjTX z(}am+)J!T<3jEn$^bz&beKdkDg!V>T(iMzFL0Szqph2`j-9af<5mUvn;x1tlG%ynW zr*`2Hzlb};E#ez-9_;BUlt1bcFA+&a@JqLlLu5Z`4%V!PzNcogn7n`vNNv)WoPYwz zJjpJ;7l)xwQ%&|sy`k^W6N)8?WEF02PW+Tas?#Mjm#$_0V0?5etxYfD6K;W8#4UXC zX!2F+23?4SQhjJ!{17?|1AtDh7Q0}DmxRqiM{%miN;}09@QYm}mYjwb$ZRlV58%BY zgZH`+>{l-|H2#y?kpIYZu>R^wHE;(oNy|tClu|aLQ@$T*1I>`fWI9;8CY)8CQ1G7B z%q!Xr7I1~gp%$_q+{?A37`X_`El;jMy`UdXRX^|n)nYg3UsRVKh_l42LIrUn{drCb4fB2Q!)pXR1;Qc|(@qbG?vSN{8{Ta;YlL&sH%Gd9y+M zEQ)v*_r-@`w9dr44gwdoDi~~oaMrTK^Wt;ylh{?fCDezKM^pT(4PYa86;23;#5p*V z1HmuOmX4EZR8OISK#$Q8%rRy*Q-*28?1TbI5wi&vl)?-H_c)yyLcfx7^c|SIQ|LVC z#SDa3SxVkYky1L?q0@l-XA66U2-wXv(TcrG6*%!3c!sM)4=~G2u?cisrim-WMCq|~ zR&t4VVedz=>i6O(@u;{3`aQFyS7LWDRjLO~mvA~0Y)%si($S2O*^l>CvM<<~>>?(I z4rabW(V{9dkq)3I=qK8m_9Ew?9}@>XosLpxkrUS=(K!!3>1wc)lchS+5|I(7h)bb{ za$b5O_7Go+55Uo$Bi$4CVLijKOC>}J3M|{CsnXx#Qo$*NOShy^SY>Z%30Z)5SV2aB zS9+ak!WJ=g@l+NwpTTxri>Gy)dB;p=7SNMqHPJ%tV>KB8izp?XgN=3;PYB5(2Y+W0 z+lbA;JPt=pzAE0pe%+PKc!yuGjG#D4ngC~eEVYtCVa5N! zcGnU$ZI90!K}*wG^fVp7+++l1BU6caPhW$@n!vOL8{7u9m$f+Mjc_(+L)B#>yzX1^ zmUu+e!8*1>#b-56bzMBE#nKVz0^P!|Rixwi7yrOgyFxdmo9Gul!(wI%ccGG_hexY{ zmFI~)q|V@uc9ce-H{uehLf7EaF2%W-Momzxd4biuVJ<^Ar8#qquA|p!38>`6Gb`v( zr~^50mSj*IGD}DCId?*VXR1h{VY3=1>zH^?tPDQ(UMS1-l@3c9;(_wfQP?wsjOqwZ z`964*1vq!Pu%kjTSG*(k5+kAW(h(=XDVo8S{|}Y0x6(v-!${hL4#e7y&_BWJoyBIb zADO?H3d~8SEjyZZFixntWRRI;Jy{0jr2SF_>7qCgue1ZNWydSE5r>OwMVI&;uX!Bo z-E^FjVbGZQ19~$(VP%EzXb!O5heAteI($oMk|U)`Ys4|&ZV!dZO>-$zTq(}QN~cKG z$XT)h4D1jxk<^D`%UbZJ+c4kgO=_mo(Q{D9Ok(bVr+pm0s|Wm53b}yudjyIK6#6@I zu?1H3`&s#fGO&p}u_2zqL+P1x2r4>P@tl4$o~s}pWJ<@S1JWLJ9$fyPuQKD@KM@6d zHI&+8-zz(%$9P|L)K`&rUL3qNI^euFpd{TQdhwMUx zapGJiNd@45_dzT;g&nfNr&WRPxPo`gLS%m=ot6$tQSfHP$SL@O3)s0W(E4d0Rf2|- zOAO$Yoflt;Sz<{j4Ap^7PXhLG9CF*;SW7jU1K)p|RE4HZ4`v)xiN?|>_@hh`Pt&kh z%ki2ep-W^F>r0hk0j=O&?g>Rgf;bVLR41vVcKG|1@cZo%vHhee)!^qc=xnGF?S?v0 zTbe{;=yT{%)gWo45B{4E_Wum5eI1_V59nX%NC6a$!pSVc!kW{ljuzpS%F`9}5~KK^uty;k|GJXXP%w1Hw3QyLdnxA}$egu-c8%Nhkx= zAphVT{tZ2&pLl(dze!9MrnPfUsWvUmQD)f3JaVRdrg2 z_NATZNaRXApenS2mZcxCE34qOw_^WGBj>0`G}3o5ONgh(<<(e@OyTGGYl}6>6Lcfg z5ys&)9|qe5hw**E;U6fbGm{nll*7O(C+N$`AtRw76CrJqUr|DJTAbx2R9I5VFcL6=DR(k+~;He@!HK?@^^dqlf1^~q0KBo-HkQj zl7qr+zBaJ+iouv*L0~2?6P^hv(ph1I^jcgboP-+EAu@x`CgJQM^k4L1)O0?xn!X_S z=vg9XmNKPC111@-*NaebK2*jk17SNZP9{I#TgQ@K(jz(!w%>!gi3eR(lV~p{hDm05 zn!=25jW3Bok=m~HLU#%9fBR|NCqOS?u3kcGy20C zlZ!a_GhmZFr7*G*p7R@gT!z$3>VVHV0luRqo>wBe12%{v{LWADr_@IBklXNRy~tT< z9%ac5@vt-+dU?m-p&v>Yr3|Qn&B9M!6<@BNN>=&(I4md1Q5Ga={D)~TZIsLUpd6Ubadx5 zpv}>D)ruAa_x33mhd+^dz7nrskDg<%YeVU!r!-w`F8qOv`2+dwM{Y3<=nV1_CnuWm zlCQKk9Zuen2jWxFf}C+PG7y~Mm5qRdSk>?M^GdP8Kk&Z07A3XU=T7XP?3(3Yx?$W_@C{>_i>KJ8d zG`R$CJs7vuZ-@)=(ht~OBvE2j3*b*1A?~)5bT~PkX%6CeZ_;swMX@ajhq2_vMDuqG93q8tgQqbP%3BOdpo7i21K z8D|hFmgCevBor(Algj8~bnaOspQsV5!beQNjb{$x_A-(yNyz3aNMFQAtn-;zS1Jo% zy#}|4XUIv~OOHf!V#3ELaP|-2#N2_M9zzCm6%l$L@{x4J{yMmGEg<_yRk8y&2|sLY z4kAr?T9qEg^V)!$Txr-sW8i!na3`ohr_y@#Kb(#Z=-G-xw5g9*DhqG*1Gy7L-nE?k zhm186_nJ$Hpj{9>h9L)tN2XW^mBO0hQsm!8WS=jkSX2{FA^NvQciJ!HakFuo(j%X# zhSe@Zwq=ppLM?J8k;pIDPGg+Td~zB0$3Dz%dLPf}9?o$U(hiwqJG={r+h=V?%~n7L zyaPGsAf^l3gVV?^%5r3k{H1J}tdr~sC*yeFa$C>^nIauThW3-c4NbY9zNOx(V0N8z ze|G1f;%W9Q@P_z$`pWpT{mTMr!7afN{8+&w_>h;|q~h@J)o?#~CN;&mBJefQ++$8A zcPK2X9@;_r%Es{_DWNw))nRW#V?y1={sx2YhU&EZBRiA+E8XS01yXz?Jkf52YrIqL z%!QK9O!Te(1b%WobTHPL&v zGSDq}haV+=Vpb}P>j#8=jOYD1Yc<=an_!@cBJx$#YUCmrEuCLB#&JfoJS0(ghE%tTy z#|8dFuj)qB5u<~3195>u|44tXx0d&lf2FvcOIFp;4l^_in-|r*c#Q;a38D15vNy}N zDbv1GixR2v3!}${gsWzg$G#HIBc_^<$DSdLK2@9nCs8*9q@MYO!1ueuMm&3 zZxv2;Wy9R?h?se?lMCH)(p8518~Usoi%F1-m9c;-5yELG9Z zqAOffC=}f=ceW};EA&MSsYx~i1v*J~ z;Cz=w=6MDPz-QbA{u5>ki-eC*C2b2_<1=&x2Xgym_vObGrDub{X>&DYG&105M-1B-p@Jx*5!_$bBfb#3ddRjt{Uy_S=fM;4c*r}eA#kZp>6x?_*?yK8{w zlGpDm9?%581xN8d#LDU7a_J#y$9!SaWrO5X6zi2ARnOGhHB+^-bVKyt^hJ8BzJlSH zA;j3%xZ610*vNR>kYLd1w`m>f>Z;X>AF@82nVCr`87xmxY{>P_*t4zvie{3(8t&`TVTJfjwq$T9K@ ziU?J*x(!qo?rLj+Ju^+OHKZ7fhH%4FL#%Ou@w#!oQDYor*s9;J%h0B3o~TwTYbaL8 z9lj19O;}H>H_3n$s;d%fHqn+b`P#`v(W_ zY~w!TiT8~|M}F;K1^zGO4;@G-lF#BXerRBr?}8`E-N9MM-pcAU zS2ABU)ir%C`cPEKbl#L?Zf8Df{@c>Ux(2IJI-9v}x`%tcz6yc(U@|{I@QS(QEpw7< zCSR#IsNAVKrrxM&ueE89>Bi_)hU$i9hOLIbjsJwS4*eNgBdmYeny{f^6GPt^Z|cK! zyMY38%Db{3>~JQF+!t%{7yQ?})jbPcs~n?j7c7^|Y3Bb-$)<-zZ;M)(PMe;X_M2{) zmYbhhy4haZ6P=q}b)iC05r}Yh;C?WduZJA*5IxH7lKriCq?7+VnvaZBc6|6#6?3t}_=s(-8ZC)CL*I?|wgvDp0BRLQish$+e|tW>nG z=(i@O*7U>F%JS0M68*4yT&q0IeOu8%zae;u*WUZCGE)H=$15%0d;)W?)& zT59TMu4dV9Ic523IcY6z-|Q&FxjyCj$EOW63fh7j`6hPFf9vKVZpP~Ta1Uz4G3sLE6n$E~?8 zGm?xIWBAd2i|3Q;59a~9Xq{x4XihP=H2-NXj$c=q%UMoXepqDI?bdI$wT@=4lWyL# z%{L<84&LCCaWmW_-jTKflYd96vc+Vdp+Hkdxee!gfjU-GUK6i5pn0No>E`GU=p{X4 ztPqkFawx=V9Biy$dPP^0q^^Y+Dn1Z->IQ0$yf{_nL1(paZ`Rccq152`F;k1fG+E6;)!3 zxGwT&#TmsEQ;9l2=r`R*Y9ZRX$ce zL*?7A`lz0!k!eqBkLXV6s~Y|=oYU9Rzt%O^HPU`i_f{E{lKg@!fos8xB`?I9Li=D8 zdaGsbjm|-iCid30W7b91-PY6A3)WZGrZ&;`7!jeTbG+-dd$8B!YX~jLGklhiC7!~a zBb)q7DbtNv&0J?h=3h3*PULJ{KiPd*Ir&ogXQ*JTRpcp}DQ_wVs4A)pu&y7PBy9(6 zS8a;+l%|2^p}M_#uWFexUGYRVjMK4G>2#@sP%${v|I}O4bHz2odDSu4QPp9w@3n8X zGmagO1m|RDf%A&{-QXM?AK#{vem(K8BF)EcOXhx#7+Hu^gRVuP;WTmG^z zODqjEu^#NFGU`LGQN{WLH?~>Oa9;tWeIanuxy%gaU;OSORM!=#kkn>7L5F1k+ZlQj zf3p4AdF&zf7MlspbPf7(=Ce9>9rR-u=08;RYEV1Sw$V@%&%k|dE~+t`1f6h~tmtuhlZvtu=0A?R0CF4fY1js~RpaoNK<9mghem1o7 z-BJuG`9IbDG|~dM$c4~0-UAg1J3h%UppdJfh@MFW>cwsL9({}2*Dv}8RmVb_!1M?D zx|A8k^hL#v0(T9k*HQiJ4JC9pw8ZZLLmh&Oz#-HP0>BuTLa9DZP-5kO2sMS#z?j|% zR4k1f`2f^h_K5d^iz|Rc55(%SrDPx$b5H?Tge>wX*7FQ?q%TlpzXv2_Cvf0(z$Lao z|Ku>a3Ph_Zs)ui=kCtSrp@P7pH*X=3(BG=tc|dHgl3S>mokIO#0#KCZKzV=Qj$B>J z6Stx=kSp8~E(@7JWc`9ptROZ9ZZa9j$sVA@uS5p7=RUZT=RhO7J@jDK0bSThHjjvT{IT_E?#H@x#N-1Cp(7QO{_rsdeb(fAi9fbc3%r~L~k ze_vV!2;6qOMgsYO6@{X!u~Vl6>v{bWLUCrfxBe8Qey!=Dnx30QTGSRA(S z9#5$o?&ACKZZKRRy#=T|l>$~a5_Q&Hz*L^mTfkVh!6ql8?$QPI%5)$$!a`bp$0$qSyoWnb$ie}-Ka2?+tu!hP&s;}Z5YXD0bi)zar+`Ug>ch3L; zya`JxM71;p(+ny?{k|k-I=H1gV1)O8+H6FfdL(K#2l3}i(oX!;Z!GgEaE7PwZzX}z zJjTjfVx?bErG85{Ku={l6p4z_<9Mol@YM~o0yx|rek0!#Q2*@!1Z5VU<^Wh(5hg*r z2l71z`YE-r_EkUy4gs^aQ+_b(3K)L7KdzflRelb;r$qI;Vz!=!?$z>+@VgpR|{BojNTUz@Ne zE%5##YCU%`>0ukHq+79y)4-K3p=SCM9v}hrlYw{+i}Aex_30(}o`?!a8~B7K`0o(x zQ9=0pOi_plJ^Jg8Nv zp*1CwIQ(oP>Q4)$`;r0mjv06wSAcTm5g)8XBubh9Y^fJ)c|Di~>rerj4jZ0`b2NZ< z#1pHE*j*kL-V#>b0(B5IeS|x(5y;YLR0ti|o8MKg$>K;<NPeXn)OiQJPNi)I+;hV0berEia_<|qHg>DKClUV`W|LCzP3Qi zsx)H(MmQdr<}cj82jbqV!_zr|_b(TUMglvJZa8DO3_i;hE%#QNZ5o z!#{VxPNl&2RLATCNRH)k&A|5?MWI2jwGZX%EAZjc1@Z8k!J5Ef8 z31ZdZsKxXG|KS!;-GJahvZLO&y#Oaj6xVL!*hfh$!?2 z5cTrFa~p^qV6jVJpHC4Vo1yY|1Fzi}`|$~TULLR00-n7gtqc3Fj8j}&&^($=um0jPK{!!tdD8ma~L+_e8$<6cw@l!!Hb5qC}kwf;)K z07KsmWH=4A(oOJO0`T2w@PLt0zIf$-o}xdjs10hPJ;3PQ0X#esi2OYusa=s#Tmp*f z0)IydAK*d-_!XYo9Lz$|AY!crzrYWVvJ!jN8BeMwyv};m89v}yrDKnG0Ymq}v-Jh) zeGjigsR3V+;7Ns3E&Apq@OpA!nSN9?3y|S{Kn!>a+dL1PelLDzDI)!Pe65DHu0*VF zgg7Y!E`1v7ZwkM!mF%eEzQf+!#rG}fdHn}W{Ta~waQK`iu$(nG0~B^q13%diR@?$B zuZKMA1#CHhcNvK)=mNyKnuu<_aK@Tq7Dp$<)0Q~bHE_ODU?2N{V|PRq-Y(TbjGcqm zNkz5lBlc`1EbW!p5&QW92!1!Iz1Wh(X&Ncg#YSMznNd57M*V4zVvzO`moA+U#?je~ zN9-Q_=`#lc;Fe{gY|Id!N>ol!7a0 z4L*Xp{#3C8>SXH>VP>EDxO3=`L5#wjr~nvf@!%D?P+2^M+%p7T z@gJNn7w$S0!HHW*?|_lE4N?C9klqAXLRZ9@^{8>R2a{kC@`=u{sm3H5dZ~VRsa0eX z>Y93Sr?3p~{z$xwew;7xrKq}-X~>2Qc=Zd&Z)?FPFD8FWa?#Gm3a5o`;zfzTyRO5N zSx=qdTAahzCME;-nhMNcbS^TzM$#ozu0D%Pus+rQB1UcGX%}#wdeJBFc6-Sx#PyZ% zR{8L>i}4&T2~U8J2ZZ{lZmvRhzZ-ki6P3F~WGlG}h0K-WXQ|llXimn%&;1me3cJwr zcU8EF&u_x%=z;ufD{W7BajtZPOlIPs%2$h9%zg()q7)QDMoKpF4pA_c0KDfjz;18@ z6Hk=1cotpBB%GE~bP(=f7ZC|tkeyJV2^X6HlWr!|#d)cMC{{(NhPhfb#B}nI9%5cV zd$K?Gh22GyQ0pEq8EFTmH|3$^IT;hS68Q9hF5u+%OB@ZLmN|j0WU62e&_*tw{)PUt z$=rCZ15*YbCW|pJb)~vORe?&CNCk<*t!TJ72)?tTI6@fC9}iyO=YcUZSklt*;IHY4 z4VC-ZbS(Etb{6;dZftXAoOGKniJ48=LMJehCkN%c0UoC=vVwldSk6lSk}d2I`Ayj> zPGahkS>j=IrZf`Gu#7CR4tQIll!~rzjyuHt4feU0X^ETLI;IBOhdaxjBxm{g{vF=? zP`9h%%MUCO4Dj{q@fr3r3a|^OvI4geikBmBzdt8yPBsLldU9REJ+=TrG&(O-2#)fn z`D+C?ias^~wbb>BD7lHvVnyyMw}z=BId~86;M0V4$o-BBUZEB=K_@a!W)V=s4zlOm zcCG@;(V5aMVRWF8cMQ7ns|Q<($)u)qUpUHF6m?8z=1*pjEK)T|{Y%-Ei8%z`$BtE*Q-z)UB--I*Ks{%VRByf4^({?39tB# zf&ex~lyEAz(YMH}4p3pJ)Q47M7Om3`(~Gh^s-<_f7PYzsaQ3UG_sNCfpG3QWLYDEQaD}vhW3Z z1A~P}LTPcOviltTw}nRpg)a=tWG{9C+@ zEV3^d$E;p^Z(8g;H(qLZ%|j1#Ib zHMx(plTasM5G=Gedk(6Vo5?2RQvrUw@J)J2k8{mrm1#|(Opp!MmISs9*NfSV+k6K) zliU@&!9n~6p{w{@=*~CdhYF3Q4d5!R1go|@(}Oe1JIQAQ!T*bW!L{HT(`ad>7$L-< zcVi)Xeg^X$_>!2CHdwd@P5TJL-e6+ea<&Iuj<{1F8C6+$#NT|PBZ8J} z2F@}8_w}7*mG~#$i?1zx0Pk_1a6WJp+FQNg(YBNJhzMnIqgjAc{DP?=k5z7#-(uEC z<-i7d4^P#d?gl?NOX$E?2f8{#WRW90g%aRQ!7nBZEMjkZQD#v_y@Rhc-pKJzcI zb~AhV@F2!YyRh5ya0j@78)qkS7`N8VQb33oTi|YfM(j=wfp3x{4FRg6MYL@U zwxycR0bjEb{m5=*2T?gOA!F}=^L_@E+zG-LakSJ){3XOmEfFdDNz_FSHm7dm!(TKeq-uXEIp7`M5pj zizz}UoV`IzUv3MlW$FQuku8Du&?Rz7erL3+xFGZVV}?gQ%H~v@|AI+UYqagQk!@#Qj7AZj}!?cN}i+JAl}=MK3}cvV;}5S8bMB z(T>a>S|ED(1aS^I0@lbb#Pd^sV%dpH}bSpBWSzxVK25!C@=;Undj0vo}*07`x zqya6&4Kaqi#vF#H=%YA^+rn@#x54t}hhR>U0k${-PtApFeHfVxe$*Pq478;yIR0UH zT8F?fzu4WJN1@+A3J z#I|>`Px5i{bVQ?%+!3}7jYrNuiENN+i0z@dJT^ED>f`Pp$IpPSwg!xAtr!Nj=~U!Q zbC5lD7iS<3evclEi@rWynO6mkaD)G(Z@%w<&*@v{Zyj7LEW@263D(jV$Uy*=!Na(h zwg$6z1h`oz#lLXh)k-IEvvrG8!1=pM^O%`jSLJ@g%ZO#s-D8eMDI#f9*Qnl6yP~~N zW4)?dEl-hx{>82}wv$$srJzVCEL*s}$ZdV)obH_#j1kw8Q)~r!Rb`&?l46FujBGd4 zRl0;)Sr+E9v~={aQ+sFoAx8xl3#QD!-URPmugmu%&{-&r+t49kVAo+$9f7+%m&!4N zxO=io&|r8aFN29%KX4wirP=(IK;^)8Ux&a4(o;1#%pW~C@=cg7tU+i-#NuKP;{zpY zl{^#E*6^9j^6zzgG1n_9Z~9c2mZ!@}%Z)4;X>H|h6}Tdfr6zWW;)wc?_PI7m`$m0K z{zz&HJ&o_^Exh22cmC^q>+IwF;2h_v>#rKD#g9X6VIFe92|{l{%MT26MfccI{~Z6l zzy;x7x*zi!KFf~EcgRb~3uJ3#OJ$|G&Ey@Q?7!n)<>+jEZ7UPFt9TI6EdG7mt73}i z>)~R=)Z%Q?o6^HmdM9*`=&B-t(v}s!p5=Vc-H?0lXIl35yzYfnt#;SwK#pjkRb-RZ zSD?$lg`79k(|1;HVo&oSzU8Qd4s)(|K5f**`DMWUbKoy~ z1dsa5`3L%Jo@8%R{|9~nEiLoPYb*OIiz_b6Kgf@uA8I2#E?y5-_BC|fvpu(rvaB+N znrb+INgYBum8hNasZ_mEPZDOw)=7AgI534&_*8mhai1YeNU>b|x#rWv&(FUP|JLr? z*i0$=LqT`j9Zy9`lATlT(+n`)3u_*JBCKL~jfk?Ll~f}nk1xU9!SUF}I_jeH_ar(= zuX-MN3f=kc);@i(ByS2XM}PIyKt*4y_pSFo%;xIhY3)13{~{yVWwN%4$BHtFX!#}C zI9Yw}KC_(`Blr1gzGT-PTcTxzX=dSv!g)o#3!0e^2EB&0DebGjt#-BYAE`+t#>5%p z-AR*Dm1!?Zmo8pL^~|>XdzV+0o}GHT@MGM^V_$B3cV^Z2b-~1V?=j7li!{>>?ZV1N z?vCge*)6toA|IO-o~iaTHTVavVfOpZ2(Qa~+tbk9-T9C8MPZwQzNVq}g{}y1xnLwP z3XiZ@R6>D!7VS;0)5Y8r)W5DM_9-_jQe_@mNjf0x0Y`NowB{v$C(m*RV{c_!Xg~hq=k?yre53tP_+|h15xELeH&^h^n8XbZM4vkQE=3+xyASePR8 zB=?xFtX6hZVOCvL4N%^M<#b{9ic`_?zu&XS-rGFRv;b2s8x_qcELXI-=#lBAwY+;% z@TT-9Q(5+};(}tP>^pOsyc8Nq&6San!BQ7fx0EeWs&PW)2&-YX&aM9%-6(c-hpCvO%UIi5w+quJZn@)qEBH>x0!y(=?l)m z9oXX3CNu?mV?1)$;nd0WW{uP;?0~v{Lw=Hfi6_H-#ueu{W4VH9a2w3G&3?-(`zKdj zOq1CrW^m_~cUAwW3Vu&vVV%-rezP!;d#x{1+?4#LRFlL~(Sj~fF4$xG?!Vu}Z&$K1b1D?1n=jfbxjuTH1m=-$n8fu~`-kyHh}Y0X*IC!laLcGR z=4(G=`s0D%3DKoHls)B- zWwYhc^0HhN<|s)=u60e2qe|OXItotI5r3g?l(&nAc(k5{Zm;u{L+S9_O4=@9|LeOm zF$3mjpsBbO{E--0fh?EPvmsO~brE6$9en46W6DO6t&_Htte+&t{vDpL%}{Mq#_7L= zcZ*ySStHacvw2$=R>|~zH)hq%o|gaA{Mx?6eII%=t)&Z$U$#*_+AuA2V#o-6Bi&6* zVR34gsH-SXb8p4zzU9u(P!Dft&9P^B8U#f?3w%K>y02Npu=8}VyoLI*_N^{mcUfH| zFJuQpNAesy87!3%E`dLA@2-ja>P}C#=cc#1ufXf^Om$yzws$PF zXV@n?wz_maHQ!$fL2YRt(AG3yHxCgVr;CMrjo?lHc%GGQH@1npU#g(=iR8QSCnNS6 zmTTwg-h_3Dntp!6 z47C}O4GqBv^QpHh>&vZlPB6k%L=sv?B7D4(Qoq2^tJHMaFy-=C0#A95R&;Vf#SZ^?jx>5_ieB_TSEJ8 zjXyNF3A=olPvqYO%J?^V*LfznE4fR1%LV=rCP_QUeB{BvG{n)uDNN_>7Hkka3`M-L zz7?3nyjl8N-8RyfXe)iNOlGOs@s*AJtSv%96O zDZzZgddQLOPV$!sUcxk#6~T`@mF`oM>|b@HzP2&NcwNuw($oj!8-Z!WOD~0GId2WNu#ouDS_scdH9JNJyKL^?gvw^+2D3u>l=j!K%M23ciHVQefAFTOD86|JU zRcGX6yl~Us!JTT)wlGlp&$pj+UGcUEybu0~?u}1EKx|F_l9f`g(;qZ;H8uo$`MK(p z{062@EF(W~59%wl=KsSje}{LFx4L(rr>y6wXSa7bvQ{g!@SX-nL7PM2pXO`e{mXOF zbH_Ww-z#{KFOOcYB~lagKixn_ZGG^I7~wr~gwO76?h=8C%m(ex$nNpGN;FNLk<>P} zZ`7X9xyFme8^*&STSJm`Yh~78Mf;`v?mu1M%-_0a4a(bW+Tb|o{T7@ht|9qsg7UKF zyUu7Zp<8^d{+T9R)ka~IzviB>SC|Q80bj=V2(zy1I&_ZDjuP%I-ceX-@8IKL4PGO3 zm5P`#@*HI^^-#?j%|~^l>IDX1ePh=!k5PLmLpF>5@^diPdxvk5HxCSvzr2EXi0`Yf zs{a|30;YR*xzD>^Id?c;xte(6p#Z;Gj08%38Fje7fhMP-HZ=%%>@_K0Xcp`kF!95f z_i9;qpjcw!x)Sda>&N{Uy(g?Ts34L{rbOwL*PtH0^J zt)zRipAj09k=zPpvUZ}r92lDQ^>wt1R7r~2vdi3N?moAd`-j~`X7G~N>0D#$X{lgo zX{+G6>D$eBLzj*R9EtB>&`)4j$(||dfH(JD+h4mlCc z{ybk*-$CyP?*=IT_41mr$6Gy_o-WWf>g4$VW_q|+?av9^;y;PYiJqyzK4hzM`!TQl z3GO{D#MOM`KvdwfAmbF;?6BrBt>gUhn_{_`84*W9(u{45HH5Ml zMfo3c*5|y-J67=3oaBgrn&CtLfZ$xQ3X>!&QuI^T)y~uzbXv^SxFh#-ZuUC+k}b>D zqa(x)!D>Fv-OSP5KE?6E_0;<{uvWM%nb89^0H{nJ_(Kx2h+89HqFkkdz`Xjnae&3PAsr8Fdv6{r*(!*>=dk>N}p7pnNI zyro*NvM5h03S{-TT;}fYd${Bk*6|_1`u;)Cw#Y=ELru>L&q^?!uDiFpoX+#kjbQv& zasTpoeP;qHzB%&Wo010nw4U5a*>2o0^z!3eX(m=GBNj;2xyS19A)TX6$F@kA9UmV% zF#0M~S>(p%`U5&ACSBiUy9;$aHLVTvd!zNBUUphuufm0vT8;{Coj1}yh95`fb1#*9 zwF3=9Ftu>FX0p;K{};?Ip8GD#kiV5#*&);~yzn>ibaGLb+f~Oi-fQ;_3bckoRwVj- z%1A3nHD)OHNIp-wM>SP_T3tz_ECxTpt_~2;nP|dvd-0 zM)->2sfpJUl?mC!HbxjjLk%yqR&87DUDX!x$6b13e-Pn`MA-Oe%2@y@l*@~&C#9o}@z7BGq}rG8+ZOhL8wFXj$YoQ+`DGe_t% z=_;yM^+||qvSu?>N%M) zGEyU<4m6Hld5L$Kr@1HFJ<%E&vDcQgNN(Hu*JojTvTI(icVulUWaS{zW~w??^+n!H|e5joat0}Eb{S&?_tGj&UP3np%roBSS!w&X=s9gBCB#0jUq>0SDpbi@8aA+wjx_dHRP4hEtu$!_SSOi zTuYpNos*nhorzAX;}BTF>zqejHKF*M?0*vI$uARfMRaAOclQBnM{VI46Cih_mn4V% zs2pur72zm$EFnFKCSH%r#e}7OVM{_b84Hbb4QI4gWfO?bS4&q{w+GDil6sS_uePCTjx2`G z=2!a$xw|^&IBGc>I377NoC&V&u2$}Oo*_OVa9fy1dZ5yOP~IMxPJiUl=Q*A|iR$AD zs0E!AT7ic!&9f2uc-I`=9N`YD{h|G;y@VqV8h1b3jl4^JFEJ~j1%F(qBRwNd;4Kbj zBKoWwqvJh}Ns&L%3<-^mHpX2_ES+>Zp-$ZFm|l^M!`Ft^4Gjs6H8Q#x$~sI!aFw&I z$&*((=Vtck+*kQmOd{s`Hg;8TGoF3uVn__+3E@l^`57qk9o8|J1FO+Z(3aIU({9zq z=-k@vnue+>@{>%cv^dz`=fgZQ!I9{=Y(HmzXP@AB?#y(r_4Nt97fO*t)I_>+&p8jL zlhu{AM?axlmc?qA*HUlcRv^}Q%l*yy&av6C#Bl;@&jf~3J>KKK(7?dp zcfJ#F^7ka0c0vvB7WgSv(uAF-(CYSu1)|Hx9Y}O0_Dv`rH!vneLPg%~Hyq1Mk%tx&I>@6IH=o32v{n{q}=0am;vi!JegSJq& zOmEiD*N@Oo(f8J0(NESV={jikEBDIoGIOPW_!Is#Z=$;mw198gVr?yLJuqp^<=){d z9o!*Ylg1@S?)L|aF;ld&1Ra@X3`fvC2-mM&fUh<&^gWV33Fd3L5r%I zW1~aq?BNuh3!t?*(r@5vi3Z^GTIM+XZ*{O+DzRG?vHE!tt&7Jeb}mUv-c76=w-6teXCuVxg4k$u_Hg*?zA7c`N&1p1II%o@86?xahd#*y~U`raO0e zh6jd9u^becv`6(x#_z_NMxSAjVZVNop3`S(A8KONjg))kv0O!3O*{l;)8?*T_7>Jy z%RO_EMG5ThkVoOKfLh{6ajDb?T{Zx;J=N%qIQnV7q5LoZMh zC<0%!b+C7F3Et`aO1eb0Llv(pZ}?^G5|U}WZKTG-=n~tZU$1Me?Wg{tJSM-!ejpm5 zk3Y(@(2-}oXHGVCG_5h;uzqsXaR1|->Hmxq_OH-Fd?h`gp_rV!LD>d)R}3`JTQftX ze10i1rKeuEC)QIDvkk^NcG|=3#q8DX{q40J@10-V$-e4=MZuH&3gI9ys76o}8!UxD zpSpBde6foOhm+5i`jGsgMBVtmiru zGdNeC->D$JAh9sN;7Y-%!mouob1VB;_s76ZX&(DQ@m>9=E?{VfPMOVN$HRt%Uk=X; z`x)vrZqc{Vrm23&+A(Qj-M~PP+}Q;)M;4hF(>haaOB`ls+;{eLCwQ-74#QENNO$N) zPNPt$KB|VP>MEns85qYWkWeOe^+KY-6PRiew7oPJlyJOC{yv(8^duQKyF^0{QzgIPezwM`= zYnT?I54#h-J<=W7D)M}|BD9U6jP@VpQm#FTmZz3o)}FT0wiUJo zHlsb=(cRVG`y+5p9L0>04O4_Emnp`|8_8;OquImE9aQXQOSgqZ!S6nu=a5tBNVk`T z_IMRXL+4P}7mX{yUrcH+SQ<#qlH zXxV>sT(pa}@tFD6-qO(gz%<;PY`tyo;~wkpC0ro6VCWoUGnsn$ax=BLap0(Qkq?CS z_IhL-SG;B1JDo{T`>gB=gW_f*5Blxgcbq$XOWE}oI# zOgJ9zjJ;PZy|_2-ThjZK@_(HC<7wGXNzI~4X}?P+-F8c(B7f0fOAFf}dwts&Q>*-) z*|C`$zfb!^A{s`YiLu7$WA;V8iYOI! z)G%EWrBJd%q*44E|6g7sX1ImghgknHFE4sj*t)2_d8qA$GtFxV6z97`%jmAq5no2A zORS@v*xsB4lRgizZcNkso6q%^@$L4`^X33y$@ES0&-UN)eev#cH^%(RH`eyn7;C8Y zwd0hpEa{`#XBZ#8JbF=d$LLbgF;TvlsQ4Wv&X?_zs!csyHY4$TDS9$M6(Xe{P5O|^|d{M5J~ zxCeR~d4JD+OApwPW7Pmk|DHYo$9oWZ6SSlo_lNLv zT%|E>G1d0cqO;39S9y)>tywSJF*KY6L z&?B)6eN7*0o$UNOx@pYFXl<0+`PR7~j>_5g#qhosGw(7G>`&^Mk|+d1gZ-5}?OccQ z7r0J(*7)8c>3jjsTxr3r!5g9aku<)oSW4~-ox~7mj6KRH^lFg`1ugVZ!iDYRE%N_{)<{o2oR*~xj)E?k1^XYQI@ zD!+i|d*D5ns_ds0Yv$|b8NZu5p$HOw&582kVz(+%QJ-B z$js0+P(dZ|F11+aOJRNA7C07s5qcOd7HJD|w1%6?<#N0DA>t%>cvIBDP}x0)@^21y zIJ=0K_u6(vYX^D_l*xP6wO34>J6Q$X{Ei#nsj}19Sti?EiIQUgDk`0;hSyTr0b_SMHM03 zvQzFX50a}(PC>_Q3pWhC57Z0<0t-XYaDR1*tPOL)KLfA*{Q|{93nQ7x_p7OF2Vuk_ zXG+86AxaIREOmlj#}tC5X)v{nSgM?oc%cdZG~7Mhk*}zvGga7bY&;SW-f1RkKj^I1 z53xOx`lk9**C)v_8J3Wa&=)tYw`D|iiOG$wZPREwh*$j$-T%2Bcs2x_kvfsF!7koO z`D=2{XT8bF$_;sL1=ql*Tnju+My{pil8cx>v`yIVy5suJhU5D2y6fy%c89jICQPqH zZEQDA8>6VVxXu}1O+1cl=lbwo{w5gX!vq6YH?$OKycL7fL$xALxn{fz>1l_!_563C zhtvy9=wk35I(-$yItsiww27(53}aT{>=Z?ZsoV5zroYCIT=0!dVWum+ z3lxt*pkSIn%=xaw$d82ATmqNQeL}^uS|kSN=FVX!I3D%5ar|E4UvZc;RGK2T5;sam zlm|p6ISUFe6C8?F)b~)8oduV?2a&8zH1;>OFj#b5v?a71b#CKHI}_I~K_5RB&*%@+ zAcNW1%G$)a!@1D5#IS^^slI?ec2=agP#en5F;YHXIlR_?&fC@J3>HOt;plMta8+<* zo#3Vq3qL`>uZh@Iyoz6{*iTMXhoef|6po7&I9~RW4ygEkW6CnM=uC1R>UERh94QX{ zPGz;XGE)8^6_maSR>6n*!4q&hwP4>~7A}f6q)AY_tdK83+wfJ|gk9bSDAj8~yIxUk zO=xHZ9k*oTBmFD3Clo4^bsJ3yj;Aq`;!4JTcIs_6P45itjYTcj&{5cLPSKsAx+`{c z&*}?x#CBp6UI<0}YrO3|b36&Y1A#%IA)!CPEPev5!l%%*$O68+$cv}MZcr=yfHuw| zS64ou@;Ze43YwCEZUeQ9i^`-vfz)^us^Uo$Lrs8oQV%WfC1R7Bp?D;R^h7jB+obvO zCD09D;9Il!-4ar)bV<4=R|kc)81!{EkS`Aq51=vs z?FH>&cD=5&zO=rRZXA16J4pKgPSC>aXzetn6a~T^IB#{}SZS@&$^kKr4}?F5E`|z* zE1{CFz?b`tJIYPq-|>xvfRHS$mixmS6oxWyFFabuF&@@ZW2m}R9z0s(z;DZfYN;ML zXH|*%(6FtB7A+#B!||hsBPt$xE*UJr`*1Gohr^>WKB*YEqB&#{a3c3XJ$V=&nw8MA z-B2^oCCGyxVK01H#X<8~t#*MUDH-0NE^tsZBwj;mh_Dt$pUvY!+$`{l55 z0^}VT-z7mk294EFumt~B2N2CsS)_4#-;Td^0{P-ap;C+gMQ|hrp+`G`OhmPZhm$M= zUZ{m&JiWo){eyZ0rQjHNI){MY*&eFNJMeR4!1wtE?wf`1#I}aRv>yCiQxzRNEs0Qh z*M&Q2I8;#Uq425*uh%yC@H&FX*bLs2&Cu<)hXeFGJZpF1SQ!O{d{1cHXF{pA9=iH> z@K+^4yH*aGt{~}ycTxmBXs;?OOYu|6;c3l)Q`HXD|KIp&N8m?jh?*gE0#L5sR?4DZ zXNTv<0e;W~c;Qmv*I5X<(l&Tk`a)f|3GYX1xEpmOPsGAoxEsD&8YGFG6uJO520mJyHPU8@WJW*4xm7C_Cq6W?Y%6n%Z*8@+=&Ga3F9 z2d;k`G-Jro!B4r0X8`pcyeD3`f1=<{y$VlUIqo)tS4jdunV-74^Vkj#D9GouBGL02uNyM;t<9NLw?5$ z&>g;ucsNlf!6Q@>)WHtWYJP`vFAkNlCAhBkc(+F4y*!1#`2l|yrCg|(EpUl(s(>+B z4o<1v@QyTvHZdLVsRz&R7W_mnlxa|oHU%@dF^HhE@qHDz8m8f0Y>GYGH1aLTlWsWg zDnr8?fuHg^e2<&(ZfC0xp$~lp%IHaADo8v_<-u_CK2#p6OW+Bs558}vd>dWtno>}T zM*lM&9@k&*cu$qWp0_%u&=k(mMEJOV z!ozZqc!Kxj8a{6|eAoZLi~1ODjulvghGTZv3gzrt^enB!RXqDDP#Wr?_ z6B*=IP%0~dC@U-7)I1Q>-r))pNS5@&-FO_|yBhpY2Qc!IK!(#pRXhz2F#+0GJ7$3( z-tz>|%BH|!eFy9261dDPVBVQAei}o2UJL%W6SzCIFe>NbyHrClm8nJp4SR@J0)`ri1EQ{5pbrRSu)|7@TQyz+J1TP6wgv zKRFs~WHZ)~)9~^P2e&Ms+{230{ zuXukmP}f|88LfcoQNF{K=!VWU72o<6^y2fe4!ngPb|IXG1MqZy5;dT0xdz7PXi`f; zqmDQ42fWO!@Vxh8OzgxRD~oY55;Ig6IEfmt#J+6ga52p)g*cCS&yf z^42fG{C5^i*dh3iXD}LSK#SapD1rI@D)h?f>UpKLk^=_YND!PJz*+KC8II>y0s3MB ztmy%G7veBpPQjUVmH6dFwUOuWU8}$mwGl464BX$rWCXM0c+67kvF@l!U)+mp@K%n+ zckGQZZ^8Y1376hEtVC^a#WZ}jEvUs*8&nCOf-brr->V+x((~~8AAqxQFT8C5IQKi? zzQ4vyFdVL?uTa;I$6fs8*Bl3@Ryla<*JDgv!uv5ED%}_G6wk)WEaPuH1CvgJu~8Ur z&;uy!-MCH|`xc3k69S^~&vkwfyQh4q+!MVGM=Vk+8ZZBM0CU|5& z6G>z$>BDz`J8WpWh9iWiPzB9q>FFz;8GN&+i$ou^-+< z0&kuRGiEkc^&2=5&Vt9RGQQWZaTE`J?ME=7i({W&Piq25wn^}#`r$16h_mi^JOdW`>|FdbH=e{U_}3&j z)_1{2SrBjKCtO=1JRe8lv}^*`#00F2%`rm}m=o;SVeP}J7Q&NRhr9g%EP=szs?+c! zufV063}X;K0tF9-O(_g6;@>ul(xCLjGHhj7=hdeG%5;FO57H|djD0$G|vG2(I$9Hrqy29}t^<2MNgy_`3K`-y@{^S;&1#|mpP>FBz9y^EB_so8nGWi|>pm#J5dS~g=nWT z5~fSxdepio-do*A1iU3s$KNnZ^zS-0y=@o;t2=O&;gM;$`Xr^MTv~Zgq9M z`){b`N|xjx=F`{7IqEC`6#$$@GoX*6cP@yd643h`RJAy1=+GF^mR={}gWH|WmF zO|BMMh`mioVyZM1ZqmN=apJ5HFW)5cs2F0pm?sue8#248$Fh-oOia=JL;t0&;dA69 zrYCcQWI)4p(n;)MdZ(~m7(>5fcaw+tdcvRdR%W7FMlK>R!Pxr^d$Hc~JuxKvsZL}m zDVKjqgxGoHE%9&R5ZOeRNS#16qaN9bxkqhK*U1!dnhr5fltjP2|@S*!yq{wNjgaA8Ue>y_YJ2{`rxp zMvtP$kYkn4azUarU6=ett=LQY>4xeYDO!C?EC=uUl-d@I>v>=#n8-|ZA2>55F&AD@ zGO@#&idkok`WqM*zfl6!oUTM>VE6M1jN)}TMatN7sS1g?z92r=sa^yVppNnlj;hYs zUDQ(NE4@Js?1;4WY`9A>C!pH09{&4rn9+)2pLdLqm4i6zGy#X#rTmFCmQ;@sg{W!d zB&C9MU4Dt3cxyFPd95BGx2TQ4zR6Oj5gk+=*6&VOx2BR_Y9aYTy^Bv7iWOls_LwwI zc<)iSFN(Hq@S9)DUBwXRL@DE9ny zQlk3Mcd^TlCjTH7V_wY0%y=04;LgAHTi9X$r4)y1bR{ZSUzBU`qz{7Q>Nw`#N^tN` z!WvFd7vN-jg#Z30u^wlYtJq@)4dpEq1{bgsoR4+=1gNJEmBsQ|#e==a zNRVv4gNvdE)n+GZo*$5>&;lN-&p5SIfm6%?-<}tHi41Un{*!-#nAQ*L!4a&qi*b^j zuhzneV27HEr?wK#x3SnGHN_6Xiv9U;Xt!u;BhEBi$X?W2_*9drc2p{zLsw@uGaWTq znoF8rb@IcS6cC>#vZZyRZXq=7R`w7a1~arWGlLnK5QI9D{s?Cyq=^Cxl&%*qP+jQj&$4G>EUaEPNV{EIQ2)c!+cw;>HEKzWh~9Qb$8P8AsJ>CN9WU)0ZKcdVbXIM) zCXLyyI>abc>RjP}gQ`E>@AQp!f6b3_&35ZNZ9V0^>%FS?qPL&#P2ga-Ca2+caeD-p zv|Mf}&xSrSiCPSwS4HYCrVv!+W0;=w1nM^CDcUO<=o;F?XRrY%_8V&s&-mD-k;GHwG>RI|tgiOXf!9 zc6M#@(B2|molooE;;Zgk?|+Xf+VF^*?;yqDEIb<8jA!)U$oaC^-Go!V8wS%=TO=YG%)fswrEtyKJQbx(?vRmxM z*F;*QCbA-YEtDD5Kxxv2JIy_X;=Beoh3m-&g_2T&+(wy?UFT<|ncNVr#=%&J#!B&O z6&-2Y?YxeZ@+oWw(@QI3-x2+?fSepi>7Q`O>9-%W->|1S*4hV$3kWR^6)Q#vDbpm7U9W|Lcglbn;?ERjhVsMywZCdBp zY~83mMn~Ar=I0i{c0OiG!8yt06aIA0w-2$O!_`f*$69$~7q)|Tg#MX!pyCg8#LwB_ zZ{V5a>h3EMyzMELGxg`+nPaj$-T2VD z(!trDnv0qmn9@wo4X^ae^_jZAv@hsem^&>>4{-?Z;MZ|W!V%<6U-ompgPw)1ZZ3l- z2i(0A{@(+`;MGAsVdOBsO31@bX&!bm!@ztfL=R(L)6rBSR)ia5b+V2+7OJj^3~kJ@ ztTNWpe5aSPA=7f}IQ!k`&IRI<8YBdx+B*JmymY>BijL~GB_>68K(|6qv7HH$&k03` z!v6N2Uak$E6W}CP&K;0B`Dd%l?5v)-Lr`(A0B!h?j@(lVJ)qv?3v6~j^E2y+jc z&AHC`1KD~Xtaag9NwaP?KQ|QCC26ZMrKxS|G^vEpkN=aK8SW9>>c8y!<}Ks-mcJtJ zbKY{7)?3_H#FyzU;Opiu9J~}tjx;u5)?>Pc5)HZcY!hnh_GLY<*1DuG*3 zy{kueG-O$Jn#O9gnUA_@mL7Jqqd?4;0zHzF3WTF~IX^j%MD>q4<4m<5G~Hm`nigym z?PKC6A04h7G6afvI_7V2>Af4>FLHfZ&9k~^UCFAS^E5xvD|jpV{tYbQHcD;4raw$Q z)qc}gFcvfiY?q?UG2WISF z#3%!$LC?HixhZ)!^RwJ%y_NFGt(0Pj`EZXHp%&2xnUiq&2kF7| zUg|4qNDI`Bid9`rPt(VluNW({*_z4vGv@79$#yO(5HBZ+343G0Q7xm3M;oG=Ii6Uj zo962h!7$9%hN*kf?_h-255{|SuF0-tp7EY|*VWt+IjK1xvoB?T%5CGWLTK<`K04=e-rETYHIZnze?jrdNiCY#UaiUuEow9-}=fBaV;P#?_8~;?z4O zdmY;gOGC5GI911KziJO@MpG_X=5>+%fjZu@;3#K#mV4g2hUCA@E0lLHXLZi*ymoHR zQ_lBSU{<81v`Xzk`l&XWb?i4?bwfo{X=`!EGiRr$uSj3qYr9~n34-YleMNRFQ;@z1 z_x1$oE3fA?NHM*PWZ_DlDjvVPx0`eAaUF3@a-a8P_|E%n!9Jm<;l|ugzOb}Zd4^No zcj`EvV|yg2{h`g(3fgq2N9K`@hzvC>SI6$jOO0fCT?KZqW{##E+eJ?p2bvYz$f$$S zY;?Y}pmPO!(uM5Tti8=O4HuEh?!uh7g4sw|aiv$nodYG|obBSv^*;05bvJTnxn{e* zxf*%AUbpXze_C);WVrZ2DNX9>TTBb>F7~ue)xR^&Ft-Mw`iy0kWv97>X(%*5o!Qo! zH`G2NL8X)(k|IpzOL5P`1(5yv%zxK^%RdL{iAVg`0=J=^91@-s>Ba5mvN;cbR=kQ; z5BY7NER?41kZs|;PJ@!N33>)z>;q4tYFAiEkb6iYB&3NUS7jD0(<_?a*% zY-cuFuYggVZCwT`^MBS#wma4y7Q3l{;g>gZlXjTq6%Qc-e zCQUb{9P%0UWFb&h!pH-$OZUaY;yH0MG7XoAbHse1o*?iXn7pUN<`O06%5g}9dM00% zrz$Pb2P=vyeSK6OZh_n6Lk}nqRrbk9XF85Fup{`DA}32t#LnVq@h*Hmr$D&ylC`NW zR8x8~(@FE2b~IZ<&l&zPJ}??hF4GMpTz0fLEg49&tZc~Ch1qg!TWv#42Zp5kkn6CY zHOsHXX2K)xZlorEqgEzR|7(L@$W`ox@1Q!-A2p^w zQHlS8y7fm?a-X73*A|}clJI=@#6LVzNs7Uz>V)aU!6o|J+6*8#ugVZZ7@g{&?rUICnEx`FxC3q6JyR8LFc zw6T*Y1lr*h&?zm*Dx>IDNO8MK>zIO!ld)kw|HM?%G}PEN0fu9);(C9AFjV=XqVP{kYf7SB7Lj6H2Uu0QUp_*HPRJ-cR0;RhWLT&ZeezuvC zEl)zFbvhjB4P;)bFRzx@$u4PtG#qyZY!*1b??^PI*a6vPN;j6s9W06=PCi}<6Tro&*N;| zk@yoljVd@}F2b9(7oRg0m46Ltu!V5O+<-dgT|$HYl@0lN6{srI7G&Dpq8R#5Fn{VX ze}H|`8E&I(%yqE2zSDc*O$>o@I}~TM%}9jGfyQMDD(XFPPVTF82NkJ-?3V7~IcLL@ z%OX8GOSmsA7VZk~g`!}qc!lv|mRJd#gh_%<>@EI*l9*59DER6Tz0{W?(I09 zk6z(7JlUh@!Hp)DqjP%?KFM70G(h13u`&YkT@iF}i-UYuilnjE9)>r&6{^l{@aJpL z!+wt1>MP7K8}WQoaTn`i#_tO@*E*c`tAkxP8kLKZI46hDeJzBl`f@NF>=ZY6)%gw>9_e6|-9?=^h?I$Da6eqamG&byqFVclX*mEj@*4P-R`N2& zK>%H#-l%&tMz8BnjM87e! z*I&oKlm*RU>K^LzU#%($}D3fr#z0hs$g}&%Luu^1HkbWXJ z>Ke#O9nhid3hrhUK6|?ogC}|ry@EecC3=USxB*@FDtHIhqd)u}z3;}jf;9Y>ChUCb zVr(2hm*6uP!FQ1_+d)XOT&km4fF5tXeLn(zTE38yiS1a(fs(Es@+!G^khVlxWmI|Qk zuED-x9Vjh>lzZS19l&Q=Pzju*Hbsw3Lr74!3+Vq>Cc+>b9Yt4W0@PVm@RJ&#gR&Cc zu4R}Bx?r|0glAisSc+ajCE_3G=RV!b+w`KlvU?B1%CY>y7jVbl!QqaN5F)y4P9c3j(DxY-UVHPQDR z3uWbQDV8V-W==l#2)EG7OOf}=KX6yNDKo*N%OQH>8Z*?>@({v+4%$j}7gRX2l-~GO zW62%L9HJA|ME!t1^me5;*$Qu6Tk?g{03PoKV51$xe;ZcTsyA`p$D=BIvZp5FJhI!i-!Lq|xn)naC$s5&KBJG6r*EIoy$A7$X<3e;JJV{I(jD zUx2BzTlq|Emj+?%-bZbxIx2FVK~(Si|M#M}>XYvf3zf;}aj#O3g9p|DZ$mSXkkaMr zs7Fph_p%ar=16f_ofhXBOZgm3nfvk%> zFkY<*PH+eH8#!7XhS~N76kZ`kgZZNYp7X2!6Vtw0ESGo>oEuqyXZ zr;w+h3Uy6+)k*$?xFkn18-pr(!cr;yZ%28N6RiHO*x7x zUvae$YQlLSVt&DnU^}`K4bR}5;?p|Hfr@#u|JyzrrQd&DD|8g z2twd)sAYD+uhvREgC6D`IGh+#N7O`@)SxWJnov>BQTl+8-Ua>X!_d9Vz1B0JSLAPo%=)_kO2>7=L^4cD=#Qs5DlLsu z&ZrOb%|ybBo$PbDJ$l$VsI&LNUdOK*sLr0{AJ`Mm5@75Na>Dq2qe zzgB3FTt{V*surWpResHX1E4CotVpUB9Jz7CNVO}drZwTjDn*dgBXTscl2So7&Y)Uh zEdDy{~y1bKP3DR&hkEfoOn>^ zA|wb&k|h>RS1sS{Z$t*62R?u`}}#3#eISKdcZr>MF_w-d!T<4g^^Q z>(q30AvGNYIs-B#Q^_%MjPeiqvAXgH|r&tBC`V=hx&F*Vp$QL%w=lsm&&BmadCh9-xq1e^QS zfIhS>Xb)T1DW{Y*? za?*6+8C-2;G*3Wih)I(>YlZtxpNN~DsAFT~7bxaEkYjj%nxyDL2Am2dIFjpp|ioz6b zGj}G!Mrwve1zQHgf&PKh0Zm{Ye9u<`%Y)%i@yG~d*`4P1NQe|k_zG6-}t~zlx zSdY}_0@N@j0dA6(NL_hPZA8y46>I-Rd5B~ZO@f>E2o3phoR`~;bI4$RrSM6TaU$z2 zC989>EMPZ71~tnzuPcJwRhNa@Bx1^$6K|w zGR}jJRZ%0IC!Lz81&*H%m-TPUKc?}9Df+S6+RS2#$NXGe{vx&)YVrR?%7&+eHUw)x zaeu|v$Y0-I+;`Yl!#_XJE|?Q+8(tYvxkMHA|XYfA4B=Q zgZ>BxY!`YU)fm|w7f6FT7L|gV(thzQFY~EF6+SbPz&+=>@Z0z%{Ad0s5@}Y+3e*Yb zQBUZGj$=9OyuV}aUxitG36wWh<%~2*Y9@D8Dx>!uLvn`aK7==|ac*hkuqTWXkx7zgSEF`zyr&eKR{-vby%6G(u@)hANcnu_{7d(+Zk!O+L_>w{<_-`XoFS;+j66(vP zK`6YarYresDK%O-LS?EtWh1ye8=;6f00na!%-c&qc<9Q!g6ce(P1B#&bunfkLn&lw zYO8PWVXJGOY1iAE+pk+ITjrYMOg9bJb)U7o#s;3*IidyL*S?_N&|(qcI{zxtH1Y+M zCR3!(XYhO|COjbAmFvw9GYlY!5PAWn@(T)Jhm7k?=^#|)vkrKhU33jPhx(3hJVzNQ4+OEeC%nyd z`2k$z$j4k18nq;`-yHIK9< z*crNm#y5XMLISr&Y<$bV;VC zCSTi5f5p(u>@$zH*0b++{Oi2v^g4$|RgOC7Y>Jf9DYkzt_e@j4N6BXAXkJmX$tdEK zQUE;PgZy!jd}ank|1tj$?-NgJ&tdms_W<`r_cBj&?*w1ae?8bSZ0Gt21Eg8i*M+=vY38Y(NV6Q07?up%-%R42SP z)G=5nZ7DXFqV}-UC%z z3H30}Wzp0{P#o`T&*|?;PzsXg_LOY~5j=Zgd(F zb<23KNr^nbp3ezs(YmSf+yZL*#9nY8yRqS zup8(k|AoC^S?FErYK+={wLRE}teZW^-e7f@4Qk`p0ZlYh7Bq&VzZBL=NvW*3iRZXM z=w9s#CIw~&oPoK1x3>wX5+#DeLR&*Expu-<@Sw*^ad>t`pcl;`CsMy7^Jwn>Qy(^w ze?#G35>>q-pr(~&?rXN_7V29YQ_XFx)$B_hR~`RG*<&iigrXNjPmT6Q1)LU#Vyk3* zX6|b&iGIX2%>}wOlsv_i2V#P-ky{l$7d+*k>nrHZaaVC2K^mzoKa$rzzp!hC`x88J z{Q@OJog*TDOB^7dQ#+#nQiP#2I<1x+!wzLvvJ!g%J&rVXySAD3FU@{tH=RTcAys6f zQF2M~GrtnNq^F^jKtDLis`yLzXTwFt2G-z8)52dkCx2BeDvMHAr4on^_n{+v2m*Ck ze1;XLh@Q}Z+$R*Ao$i4O(3L5w9mx*YoiRAf?=cJ0j`q%LQOPm?MYoL^7xPC<`RF0o zKU8+GwoaDDriF%iy4BiLrYD6e4|F*T`7M!aAsR05W1gVP>e`uCJNH&jt=z7;D{}Yb zm2o*di9WYKBeXw~CajZcDWj0-_kcENELxJi$PUoG(xvDbeS%)qz1B_EU1SYN$t$I~ zOJ`8GiQm+Xpm<;B8*n4Tqe5}P7wC1o@c;Ck_l*XPWp%Ji=uD_8SD0@s#EIL*IjDeH zaZYPNG{%^%fO^JtoFSu7r~H7*BZvL?1HHi`$Bv6P#utg( zAAc|IX)G5bM46o1Y=x~YE%glw+d_MrF7k`+C;i6H57!S(_Sf=^b&0v9a(m=d%Py0B zHtSpVj=YBXOWdP<+k;8S=o>BX2I097cKL4h2@+97{TuyOeFc4IotJIL_RzM$O2*L} zs7ho}Bv}rX`%6LW?Vp6Fheih9_@Dc(AboX=ca3+PZJ_H8%qTQan9XaPShyVQNr{!nG zpYwmXei*Yak7C|xu)(EUE%U9z_t+n|e)xXw&o<>3#eA_a+*D|)G^NgIe;6_>lzon4AP5T4F-N0o zMm=*Zv+Jy#O|SLMSW;uAzACZOF@9P^i+Qz~Z--}%TjRd(n(OM}`s`9&o!ocbH9gxs z8QyqbzVEld?4Td~hVD?5^g}AgeVitb69bXMQ4Rc@HRKS~cl`1Usi8Pe_yVQF=ZID~ zqzq@u>fYhm=N?);_kT_HT~)&aaMitXR9O%gnP4GuUrTd1|XVLW<$vg|Y(G0_*)QUng&tdzAaS zyD*+;UF6a3gCF*sdw^$@*XrLNC=fb?ER>FXIV6hCh9;{CGU!Kvkj}Z zB@ZG{&3$IU4E;_1k zB8;eVUIj{08YXYtHE*_nCk zJ&S`=xg0T0>7f2WZPKnWSj`vBCoL82ZyaA7M#t~AZq|?HV#fXKA!ZAeLheJN;(eTh zT10M!?cs!AG5;v<3r}lrEpHpI2>sMx?^UnSR}Y+(TK)v6O@v_WFc&!nrf>(TBRWuD zP!Ij(^*92ZU~^Cw%fN%|ls`#E^ki=0oc>g?YWo`+nogO##=mWCqlYE5EI2YLI@ybD z@S?O0X(bD{Nc$tjSnyWNK>Kz}Z*w)%9riBi76Xxt@WVhqZ%lqhcCDX>zYqQX`uqH! z#;o<(i*i=w`tv5bNq_m!XD(e_C;vwN$Gp|qOtN{1<*wxpsCT4wpyi{PM~dWWeZ20I z<|Iu}<4^@DBBXG8pwashIv8vaIP5#^E${8_neATfdYm81@9V1WKI>`gs{y}yN@#kh zPxw~kD;EbES~01qJPN5mRmdBp9<}j5;Or_RuZJG*JaVZAOV}AP6AW6*dUHo(IiqAL z>kP-uP25<>l=eKWY2i_rhq6+}rMycjmQX(Wj;)NXzb(PK%@D^llXJQ8p$)$0?g9B% zb9d$p%x<1_G^={{^z66U&TMNopH1dZ_cjh!774kB`kZJ*yEQl259s0#GU`lw%snll zrLJ|k*=5LQXM(0EN;S}?)iz#GE5`Vu2E!?Vzn>{;lJb1!kX z^4k0j;ZZ&u{5_n;^%R2QQE7oZM4pRI?mj3n*P<>ht6^dj=3XltVq4_+U$eY62p9Eg z%Pak6(-o^^Zxx%I7)h?5-Z;H+O0DD;$#s+KrNkyJj;kBvaoC(%$1KY+T@!k$+?c-` zoan9LdF^TEQS-C2Z9m`tJomFtW>RM5%tu)z^FF#t`JIu9d@Zp7c%E048{{xeWqqcx zw0W#qH2q;Rnm-$d;8fI&eSp2e7LAP5%Yx((Il{k-r?@XMV+&FDkacuc z$~;fkpIJtfkWcWJK*Sgu(EAFwmS$D{8T=W@tex{ACnI-P?zh}sd5XKB|4HaZ)E)W2Amp>LUTE z7<`N4;H%42_oAkMo%*03WU6Iy8HQ_G7~WeNIk(1*OiWLXFMKFxBxNe+2=5Pe@^AM%$+l%4${v^7#dRQWUCyEGnK{>U zJLiKY5$G3Q!ad{G@*l-*$^%l*#A@a3ueDj7N08aR)pFe1H3H`y#%cfeN86xRaUOP2PkW zi$mHZvB(PONjybHQUbgnzrl0!l$cA60r|SUsi+a#F2i;6Wyg-V{RtZiZb~^==ux}{ z%#_*%52w{h?_Fqhe9P#tiA-f6+%})!kNgXj>}p!8v8*A(WMYikdFJ-E?y=1h zIwp)M*e$7U{HU0+vHJ>)NwpN+oIF3_Q&fhdyR(Y3JJQAt=HdD_nod$Y=LyY&e))1B z-*Xj7+D}|%^OJHK=d{gDb`^DZ_Vn-sJ?njae9!%S@K$7pv<-f`QEFY(POlOBXj<#R z$!rvBW(Qz=HemkH*g^XDQ5obzRPRPAEu}9w<;L(+xrE5oU|jp-!V#Ra)G&;?dR6}YQVU)(9zgM#G%u|_$Mu0luRchs3LQKj@V4Sfug*m!D_ zo-=1S%f~H`1uZ3EXlzb&!RQTf&yu#JnbQ&r-fr*9xVi_D_5pUD0vb9(G)eo*9*Gw^_XG zczTY~8|>s7&g!=zXLaKV0Y~ z{@PWaRo)URs+@PACLTi1WXfr(;@5hn4xI>fR~2#}`onM3B8o}M6YBD(I6K!r(l8tw zVuO1Esloc-_Dv3!1zT`w@Jz5(C^wWJws9nX9~!L|;t;5Gf>3|IlKR8d*%0Gk5$530 zgUH0z4gl z9Uj3?6e@}_@GE^&0_blRfwE&8Q54B?8_}!zf~>eW+JJ;103h0@Ma_`9E<#oH@Ki+5(k62ITM-;2rM& z=r0?b7wif11ytx~L!0s*UE*{N?qAro6;b)-&pn?t#Rt(M#_6`*QzxQbPX*fUpA##vAz^@g~iHERH zx()u&-^$-`GUt=msoPXn>UVG&Nj8KN;Ue8M!)L=5vt*rT+vE5VRXOHFOuy)g(a)lv z#x{x{9yc!593w?li#qL?X}gWg_D6uX8}gJj)6HXGdQXf2{~5&>7hZmm!VO`H*nJ18sw@$%H(+ zamo|qOuvz2v9efFXwEO-4n*Q3E5jqhy~F*(UBhF-L&H*d8Yo`>aKEDm^+s4B7L`1p zDK>{j!-GEcL9z?<)+6AOn@mrk&(KThB&G+`S~CW7p^v5Xg$=6lnW?wgW_fB!uo3pB z_DA-Q_Ii#2jwwlqOvgw}@YnR#r^<4J-=1R9|W) zEs$9GZzN{#Ql2Ztprw2XXYw!q`6q%RrT@F;?}O7Z4<4s_@ZXMxi)bdjikYLCuHDP- z*R2Dsb&WB{G{@qxF0q;I-R!sRvb`FfvT8qRZ)X43w%^*pLYSW$UmJ=TT)HvrAyj-S zQdf~o^-bO&jT5(l_+Fm-5`K;=iVEpNxwyj1m_<{9I|B6r)sby~2lI5BV0(~E?+43< zj)%Hot`Wf7O!^zs5oD3CuKQawUSu?J-I&zYyBZbR|uFyMX zp=Pc2BP(LJdfl+wc*T@&UT)D_XILZFz368|*_Ydlwi(v$R-Lu1B>`8w&@fb=u6w7Q zqiMjDq;t^gf1wmc;>kwg9bX1xZWlT@YN!Kvw1ZK(>>XSWqVlw$8dx4!8u)--)ibP@ z$1&sd1!4MBn8S>d&%Ho(=sq$@tBdo*Z(>tCWj#`OYN7M=FSMWwK!Kp-j`Aqv#&3|P zBJ*j0d`PYgt-=aucNEM7DR{PL;SC>#g!*5p$H*)MH|I6_7Zs2k_$xV6gv#vz{>*|A z?Pn%y;z5O($M(`y)%P=;G$xvhSf*MgShiV;TN7>hHpV{FcHLUR`qI+UGSocE)XzB3 zFi`Jj3u-$uJkrf{sM4!7FlDg zH!VdicJm|fd-aCZ`X#z++EvUtDxD0f7m$#-LM$a{_!p7cAVRi7pY>hfZeUEn9=Pb= z=|AD;aANr!_&vBRcsKYdXbTMu<%gDq6C!&emAGqM2j0zZ5ju#;n1zPo#N7-Usx;`8 z#$tqC!x^<6v>gQ0H(M~nUx2Q!5ENfSp}!~sr9?Gi3;f>|a7DGC-!$X8Y&ieaK=QE% ziqu|kxc90?Or8lJI>bW`;w4R%utv(Yl$^2Soh zy2hGim90MOP3s8je-@wltZ4vt-9`1Wx|`Y}ny&N|@{np*wjymJMtH^Tj&u%N5%OFk z@X3E0T;AvY-TqFal!W!*S|11{&;qCw8=wLZfhz71IvH`$_LYWW z=L`PdUGNJ$(5V&1o3H%;&!559$wSkp1NFj;p65Pv30tCG&e1oSNBDqh1IKG5INU(=s~z5Zi=19aHy1P`IB^)NIr925B`QUUpyyU}f&f;sF5 zk{7kmU(dt2{5H`RM3_W$pL`0}65`<2@1;$5xB@WyTN^u~YuoArezf#AmfEBO;JS7$hoF}M-(CBYL z7w-{HY`yeV45f@|ruU`>;2Cc=KQL#R{pMWrF{JR^G_j^a#zKZ@J%JTl)!bv6&}&FD zaZ;Iu6v&V;g)hSuj5NmX@Nh6EFd|SXP%%(45Fdy`6=GW;3Tu}&v=PMoY~)-t;|Tr% zzaK=@u@czpIH#|I2AhY^sXOvrQt8e}(7a1ur6<7UZlq;6S7%ceP?6H1u^Ep_|9L1( zo2dd+X~mQmP==p}B5$hP7H9rqa#^GpH$@uZILv`3pqh0ezkUxmAqlwZq3|rfgmTgX z3eFE=31);G>L9&{>8+`ym9^K{e|3BGYYokecZ@Nnfu_5rXmbg?CRa@TOn&1C;{`)D zn6Lk_&$W9se=;n+lq^DARGP}~K!i)>Y0ela8txEU8`K9c;ffF9Ee{6j;UrKebRTs! zDsm~ZoEyg17AWzixJ>FRmq)+-5z-i|fo@@wbMxG|Gg}-C00#~$pk@%Y&M@^>ZFe^11v`5(2I>w+H9vdf^^yUrb zf|hxfLzdl^k(M%+H=w%DF)c8THuTU}*SWO)G`H!J)N$03Zp+O;9^cLH;ciAE=r>*u zjl*m@CA=ye47ZJ(iPYroVK;dkuEbkVUsQ#jWhhkrE1<$Z3_a;ttiyYe zplkv=s3=tjZ~6$TKLuJXp8F5Dm%Cvtj)kIeAIAD~D9;(tvHHN7u}^M<4C4(_V@V_3 z5Lb&M#HwOMIERz&WMP)DR@fmN7S0R#LK13hL&f7*LxxCR=`XnyYGE?kA0zCtLe<7 zp$B`Jn5Y(2vZZ_CT_McZ;a74T2<$@mM0jC%1XA1jho|5kT!M?GbYyvikId#A$jfUi zgoP90I0?HH`7)F+HK14+Ln`C~N{fBvkHol(k*^Ue*OJ=?!ek5nB3~Exy|H** ztS@1QEkBjVL5GqAhF4jfbqPjcBn`#N@P(bzFE>mxb~f3~2cehx&$1F(IlXNQZ4+&M zZDnl_t;OJlYisUgs%(_?Gj*@ETXhnKi?dLHU9YT!I;tmG1GIs?=*eyXy|pa*+*33o zHTB^d*}-5xK`+N#FqFDZI>-$477i<);05R|9TO?BukfD#6MDI`++^etr*IZdf<=em z99$``H>h+JTtkO=gV0}iCRBs-$tg|7$QUf=V;rRaucotrv#R>u{ypd1jwuEhx?$)T zx{*doKtd3dZWJjIBoq;Z9|8hW0@5HTNT-s5ba&Tu+&*>R@45b8&S&lnH_qL$_S$Q$ zz1H)9UsMV@>qh>%kt_lk9|2U>sh7WqUlAejfZQ*f4Y$l)kIsA#y=jSXPoR6V#UK&A|}ZXnK!Yd@@)V``(r>tV3+c_AL7| z$7APy#$l_wqUWrKn#h>lSbo2Ngq{)eCT3BLBW9Mz=J_SMFZdkS?B{G(t$Qu+oA-+; z!h^^PeTbH%9+Jn0KMNfWCI=S=UikasP3QL)_1paEApRHk5B48{g`q}ZN+2z;IXFGk zpIo}XS-S^xddZu_f zd3t+}c{MIo#L{wb~eViqh(GrJvG5 zsifq}-^q35QgVH{t=y8|#pT}ePPu?GNx2D^(`L06@r(|{&>Yl%{f=$9(j>#aJD)5@ zUF>MyVp(M^ZCh#^W?%1E=ge{b>)ZyW=N?x<_aj$dm*y)%@ zb;kw!L0dsG95O9?%vNatS=;%^9dl@hl~MBO@ZHcXa7QOo>9;JfHINlZ3`)UkfnC^z z^@6j4>w;ThMy((6h4#bjwo8szHY+344>S*$iEFSNCSu=xMH^Rv=LHi0^;^HejWvP1 zugg@U$si<`6z+g&wG=CG5||;qz>^g8BU%ca=|k18@s;<2Ynr5H5$$ZGR#rpE#c3sn zI>;Z?-(U}Qv1>lm67^bCn{}lcXDB&Wcd09CPNsV!6Ucnh4)X-dZOc!DR=;;Nb0#~l zJK~)0oEuy_TxDHHoVT5;T_3p@yUW6T(F)6=w)?(ole33ow|$8Hx^27ljX6nLFHE7b zWP)A<|8R`MS+G_x1}=|;pgVY<=uC&e%)oQ(#_YiDKqN2#1c=U|^C5S* zO?V#}K11>CC3gQiWFdY_{Y)}Z>X)z>*F~G;aVlG1yh;uELg_PXs#wW}ZE=_^pkvsQ zm$4)pnD&6CHwAktOavlb7Yv}rCQL?lXVF5QRGy`|HP-fx{jsCJvy?N-G1OVXwcIt*WpW*GUWN1Ig{!># zs%xNYrb~2pb3b&wbmns|cErHg_|E!~Wk2k;%S;m^RXNkklYa^C4dn!@2j>K=fffEX z{yP3a{<;2s{!;!5elm0YZ`n_(21W!<1nLGK1*fofS`i63CeKxhs|(c#XVb}?e2t~< z$3ykeU#3Bz5LU;+=p}w79utGe^gr0x`N6`yip*%#24!F!*NI%F&hRxjZpm6a$fOt5 zdFo*GV`_T8<)=3`R(EwiW1XZ`)tYgFxF3X>npA4lBm4Yku$-1)W3Gp_To!*ehuBq4 z+pgP-JBzv2xURZl-JiRUy6?F6xktE*xFasByMX(tYp^TFdD{7#v#;|nN2a}r{cGEP z>q*N}b1N#b-#4lHDeZH$q>>hX6p92t2%ZZJgW=_Yf31Hd-b9W+;5Rc0U&0la8K@W> zg$*DF2Vim4f=BNS@$Ha2fvVANR7FdN5$Vjupw$mvx(+C zluk?AV6JdT>qPLb1V28ON~K4E$SioUPto6EjL2MdH<{eu$;tAnaINsd(1YOBV8_s+ zPz1E#0`fy}klRo(Vu8K*58;T=f!h90Ef)JS=S%nV*wYC|^Os5br|_@E`j?0;8C&B0 z;@gUvihQ2bIPr1JWk-^wyJ@IWB=~3E(X6WJ^;0juYw@n)yS?w)rd7u$h_b&gzGejWZi z{7-0Rs1&>4Lh@`%Q)gDxRGWOsy3$aorgT>PpIAklCj3pEt4~j)a$p)Wyhu${i>RHk zGV7`h)TTtv#;GS&o%qreq~InMrU5E5e^qA^H(ZH*=>fOQs_oNefYSB{dFTnOzc=DF z=_j*nu54LOerO%*9gEXa%A6wA6~Cv}vye%l8lVy90j14Dtt0Ij&O$N$;-@7REjTI3 znbf<$v_xyZt8tgyO>A?7nQEKRO5f<*D_P%V6i;uOo}JMi>;Z+h0M^75 z@*>B>CKD14!RLNdYGfWoj^#+{XX*pblP}mzzpHLgPQfvnC?6*S+8rJet`C3P60o;E zBqrr%AFV>A$4!&Vz?7k`G=n<%Db&V$s3bY3T~yC0MU|&=u3QzK=1NHA0&S~)7i6I8 z!W!`l$zrZ=?qvSnywIFv{s4xpXT&g4U>7G>kag+jw6h^+j5F+)oI^Y<;?fd6%HOi! zUj@e$uqMup9~wK)UBdoKe639iAM~HeJC`#vyJOa)%r{xxa`8d~)xt$pGZ~$G#gozy zb64|^;9#%DlUt+@U>0*|%O~o0YJY8kz8UT5A%>DpR@pH!K4wNr>i1PaU4SPsS^0yL zu{R)+E{fQNTB2RLAk8tqHQ%S*_)|#GNK>1Lg{<%WN+OlI_pv*65xLu-d`%8XCKy21 z^r2Xulc=d&Mhr%wmTfh;)PvycT(1^T*DDg|5OA=DSA<>iQF(-7R&T2rRI5zKB1$3K zwFfKphO|c-D zWOr( zQTivdaWKGoH5$!BbM5{6Q zt%bx>;z$W*OLGVF3cQL~b89k5@8U@|H*Ml>Yq5T}2#?GgZGSt`T#nct33n1t6zE;> zZT_r;(Xl_fS2(ttm+7}c@xC!Re`mHNuk!_&onK~E&ubj`CA?In(n>sO-fl^=Y_5rTGMP8%M=^!f?}2eV2NOy)~7}w=R&nY^+M}Ijl&g)lEteZYgcs< z>CuQE%)%pBLlpZ*qF(KZ1xa|vP4s`LclUtC`~!H*&B;<~4Q9hvARC0V96axGjMiLY z;URq!m4t?@fyzl+cUS+eMDrn{#)OMt>ejAnEhDvs0G~bdkiFhm(HkgV=N@%l{zu8NM zDH>;5ziKIwBIwZx<^^cbq1JiUUe>u551DJEm3Qwe?V4NaeJ;a$8$G+UjFZr zt{3i5C6QmUsIza$~a(Wd54jI4dptN#5UqW3pd!2u;lMtut*6 zY!$7S%&K^U+QfS5)o_@~DJy5?S~w_w!pZSE(zYInz4QdQQ_Dw(n_~zoT4dXspmOJwBmd8(6MkA`HHL{d;O@c`dhK7L)49oSV#2$ z+VVE5?=-rwFHt;QXeSjQX7Pn2Npr;`;Qj53{H^cj1opegCsI$_DaPk8tRs;^p`wq9 zcPu`;$n=7G!nK$`9M8m9^|pUyjydyETI;m(>2ov6XP5V$53W)Vn>w3+w0-B09hDq2 zY(H32J$SYgu;x(v0H>VNfItBkO!l3v0j{ zSW(%l9?(s~B5AutwFbzB{=~kCjN8}E4Ia5%Qx zJk}>tFJYa&S&0vC4W0{Xq1K!x{HCtfV}#G8Sj#VDHMBq%mJz3#rt2Qn7rq&49U1}e z^3m`R<&f6c^jJ(akF%b&b+vzFKVU0s70exk&H6Za%k*HCfX819Mw&kYu~=cXsieBD ze{WihA9{rIq?*F_rjI$teX5>T4-yy=}P(3-4IbDkMgxscQHm*gq=b ze|C@yW7EDe{UX7HXODC4bslmab2#mshKg$=AE_0?qXH#;qW5546Ypi;#=x0SA!WMO z&a^;$0Upvquu$59q`1!XR`;s)mFwV~?F@Y$Iu~jP55)wS?e?l~wHx{g>f~aC=HRJ3 zVLWS70bUx!>yN2_>`8252`D_%xLT79j$cDmx4l+5@~3pt{>t4k?%R9|3eG4pvG}Nx zo|4|8rIW1rK98B=_*mMg_6k(a?UGp~{dwBO^p7*Y$$psECHR+eDbh#kYF%ir?U-VJ zW4!`rTX)kE?X>I-M*OF#y_pbF!xm)+HsxWVg1H@ReXGn9q<&&P>Ps8(zSqP$GlMHb z>%tY34parr6Z={Y*+x1(cFu)&XRW=1^((1|shpN1mkX}(J<7|-b>yYyHTD$=bPZ+5 zTQr-gfH$olb|H?LD4DP(5tCa!hZxW2bKg< zg6%^2<<;uI$WHMm%kQ>Pj+xG~u3Tp;$8qam>1brO+9NzIVC3U>%=tEFXYMI)n*Y6! zi}m-dNs>mGU6zN~Et{myoR>c%FQPP7fGvC=)F#v{bei+;n{o-Yxz+~nwjQ|BFG1>m z4$|`+_Ww*yxSPSL&=1V(uc;br!?-=jTJKUD8%1B8dxhVGJa>G{G1Cly%Nb3=UCs`ySs{e+I#AHR=M`t|7ZD9xUPLFj|xol*3EsIT^W3~XL%p^=LRRr zv$PJT8)AKPwz-LAyZMy#g*edETCb|UFUz45;E?X(gymJRVR*eCN}*pCqwbnn$}kBnjdtW8z2S!qW>k1x97xk$#=29^&;C!W|X~9VN?0L zWhRywnA9vG)nzqbRC9fvtR`vC-fVcC_2$64ff>hh`un>m-pA@-71bV|7p2ysRc*%8S#RtlfFm!Cj4jcdzgI}`AY<<1XqQo$Q9Ly7K$7eoYE;y zm%flhX(I8CjlxD#9y~F%6<>III2h7{LU2&vf1yA14%XP1tBI8h7b$hFLYXS7tCy_q zu3W5~zet6AA?G{OSHZNba&No5$p3Wylc~?wzdn_AAg5aJv35qfV}I+O2cFpyPgm!3 z>neDz>ndY{vbRreo!q+KPJyvut9ns?DNMBNwtwf8T!S6y*8S$q!oT`3rA=^^uK^5R zMf2u)mju3%>*<{&pRJs`ZQO)}Kk~KCcQ)=@&whJD>5z6X*gtP$W=?98cmB6ur+%1m zI@|6W9dc;j3kv>DcYCI7wzawWq>!pVR}O}*`Sbfe@^17_@-6T;pjshUnWV2W4Ht(> zPpR?UDQ%Wsi0yF4d@2t9SL3 zO2bPxEVMpuly#wUHuvVc?l1B^+5Yf(rcnht>YRMHLq4d*tNcSzm&8 zUtUk|FFs>mKVjN$+3JY%w5D3ik#B24k+{+Bu{MubPa6|@=e?EPH)D8O%e2Mmwk%)H zBX4$~wme+>z|@*NzHg<{oJkx9Q~d&!mer|b-xs*;m;HAF-QZ@tCy&K@>td=UPLk@F z9p**ib<<^ijk;PsA3W@9nYSSKfxn?%-F83rVS$y!KS+L3eR!R6^@DZXH76&#i$6^) z@0_a7_Lfim`g!)lS@#;>zwqeH^ZRdCXLSoa*Xo%6aZHYx7JnqZLF^D$cXBc}M}AR$ zLJ}_I{+henyEX8AxQ)77KPJRjhS)kfE;&q&c>7jMhWME&SM4Szh4%X+zQf?|4G*1C zoK^Ce1A%?6gl_(wVl+=#fask{ZeIVnQ5)oMVTBb z6zJ})l6%rWTd!~5n9#mZddWMLl4`!GyRyN;x<_hWtWc(C$$XRTo3-9v{oVEFTOVz@ zKlXv^$*7luQ`hE<2z82Vw6t>m9J3_hcEXo&1>I|{_r(*D=IX{!VPAusPqW+QPVgNI zE)FLs_4HqaZRQHL6}FkSY!KJK5?!W$)J}58&?hqKz>Z|oT*YQ*A=C^O3c79O#-mLo@AD4Xj_}$Iyi-8JS zZE25vL`;)}K?%umE8O2WTG|#6lg(2l zAK2veU><9Pe0qZ`f@^~V!|BQwrlywF%=t}ss;78t`Pk25Zn#F-)`@#Gr(7vm-Zw4x zLRRt23YmK5(d-{`?Owm{U?3$lD_l>mql||QvV>j~z@8X1RM5ueHLNLcy(_1LGS11!B73yEIO8LQ6SEMYfx2M60dM9g!DqJhlIR30n z)dYXF%;s-Dc~R!c&PUUq1YXR1S2nAiuS3|aRTFAhJ?_h~vtu8)yE&#>H=7TL`AsF% zxxo*73%&PzM*_*Aebl>+(r=mmg%RK{%V_ICiz?m3&hminxGQ$?Lrp)oA#MpqPr`pK-M|zvr`JSzNWP9|}ldUg=cTKWR zdk2JmR1XSM?VDo$ir*bSCiaLs;{3z$vn@@UqYn#D2$c0dC*$F;uSDR((690#wVM7c za$G1a-Ix9mr+^gny;7E17lkv=&B}B&S8pLUx8AkibGV($9XXD9PR%jSe$!HfI+vKp zB{fEF6b$%Ed*|k!&+eObJF`SqRqC{g<-VWS3I^9irIlV5Ca&J(Z*7GACkD&9sb*K! zgbReK1y=^LeIJvz+rum6?Isp6SO42pD9Kf_Nx8n2o7bFNcTR(r^^2zrt+>3{u6(DQ z--*A-*YmP67Q7wtV)WBFZvSe51wmrZ^853=;`YD&3()H zxAm6kg0eAW4VDjx{w2P>{uaUP&!L_HWg{$NOdOcJNK(vKMDJ&Z(9=F7JXb zCUjnD6j>lx;WBTJe_d8uBOHo+q`i|j!p&gwne$?DwVZu9L$cGd26&t572I2r-jy6$ zen;hgHEP!$TX%n*uWMGT_`3Ls{QEswmSnwQa9ZxE^fj;Jo~e({Jj!}f_GSKe&(lB3 znwonv*hI*7l#NY^9~?g}{zd$W_=R!RJlVE%@wL8PIUZ^mXyf<$p9Ly&QkNFk5&S4D zDR-16%Es{NP?vB4Wx2LBqMEXVePUisc-hd4(;{5A8aB&SGz|#gYmpd>P_^#t)-uNq_`Vo z+S=iSz%p;k+@4wM(s!oU&wHY0yN@LGEP1`$@XGmXTuj+oC%JY)^)cml6>XHb$34pO zl|DI?nRhiqeB*n%?a?=n?jh5i-kf@;rPa+o8;CKTwAFTZj9D0aCr*uTn2;3zZH(VJ z#@5t)!PHZGDrbg^fI|@%dhY+k*UI;S|Lef%K$ias*?XG<-tc&>lxYXO?Py+Wt?ZCo zv#E$1;;iaeL{&%wsjqM>Qb^a8NpQuULa{jJGou!h> z->tNw+Ugo7tG}omE?cTty8O=6dS=ltaPqd^veBUNguk zvTw2P4`1zoCtO8cO0N4Z@bDtSc=KHALE9GlMaLzl;A-Z~v(L2Mu}qMXOfQsQgFpKU zd6(st$Qul*wU9e4w^`l*@1MSx{=Wj_gX2Rh!e7Zf@a^hzikJt}ffcOFtHLu;lJ*Nf zMTTfI!KBz8Sm+-d9wYYf#1$A)q;ZM9W$INhR~l0JPNioRzAZhhXs3c_67GAJ*ngAq zMZOM~&HFMMyJG1-D|5P)#asNw==CS9NZ6**u<@ zF)=yh^F444avrjMD&2%*bBwQQUh~`^a{kPAXBW=?Ij56%VqkE1oN}EC`%kbcE}3Up zeYOP0bVoi%aeJOM&2o}V*Eyz@T83Pc>ceiqNr6xO4SWT?g{a@!oaG78h*(qD5K*;dN(!u$$Mh5S_HQn^W``W1az(4M$6_8;dH3wU(q+b6#|Vw z6}mvQvLpF)C&XUnU#(Z{8=XVl4Py?+j*D9f&-)@zz@8+X(`@pB-~#`4@8R6Pv%kz* z17q{gd7t>c_Me7n^Mq0s{HSe~pskf-i6fD$zdx-TEoaRKq!h8K=?lG#R!l8Sopy!L zeZR@uH0RT-jLhL#1+&j&ug(eQuJV@h{}`wNg3L2wAcu6pG)MRlhLoii!2(Y?5%8b& zhpI^%B6M)pNnBRsV)0)~cuQ6+)3scDd1tvPrRNrJQFvQ{1&Q_Z#l#o(47Pbqg8WTh zlk|nJBQLUElzTb$#nk6hUL?LLmi{!mqW5EeC@?#;L0+LX6Rwzd*bX~}x!QQFAg~{e zYY=xgW~uutM~tzrv(gb0K)XcKl zdf%32f6p=BUedPBGTHn;slB*{9H%#G0p&pGg};M$6Lpv4avo&2$xh0im~G3+$r+RP zqc1gZC)82irL5E>Q!!DHW{QmjcqR3QaLH^SqrwzG-Yr_bbOM`!xGpTQ$oa z;bXm_vOV-&;IwaocYfYYR^zMO6mKK{05GIW5}P}xj3T!>p7Tx+ZHQoA*8c?^?|BqK zwQS^PeY5sI)u&V@->`ssQ|=hPgFW{wJXpRhFH!2K^Eioh6B$2GeZXqzj?|5+$AeTh z#oOB33fg-)PCHAv6Fplz{m7ZVVU5KD`)wqbRRrmrNAR94wh^e+BXuGt8;n-a!967yS7Bz3mS2VeDvL57AKN> z{si=#ognGmHU+^c?LxNwaG?)qy0Gnn09;9GOFq|2ahX^b?$JeH3Qr&x+XoijZ$xjt zp;ya6#XApzXdd`~Q%p(1L7|@*<{axT+_n?Vo2kTGVr~W#z!|BNw4Ml92FQZzA{8SK z$xLa-3H@*w_41QX*q!Y6WYq zkGbCz=Jgyov86bH7zQr$eHhk0gm+I6CUSn$RGb7dARHQL;C~8U~?&`&W=0~UXyuNR+tTL z-Wsqmp9y`zZ9XMc6&IPT%)=9~4lBXvwv0-H+hUyb85wG8L?0{SFcHFw`UlEjP(c0) zJ`3Me??&EGZ{Ap6ty~Mwlatjw`Xgb2WRmU)sSyDbxT1O-XZ58x3wsD!;6K_x@+2+7 zW^oMYKYPW;LLzxy5*U!L$>DBoUPg9g1yHG*ksJP-@RRsfEH8DH-k1KA+L;G(f?v+; zkk$%UIF-&(pR2>QF600oid+F%aS_O83K_=-#VtZZlPi)2gS-xU+dsjhf%bt_kf( zKz}S0d7p@GDa{0$%u=b3)oyD9n)(B4Yuh_pKgV>}QukB0#a+;4C9mj?qlaCyzO}qH zyUpvx{mj5HTJ}G1F8yk0c~G!Fyvv(?$soz>2s{gx3-{sl>#nj$EzY^rYi)}@m1^Rb zrlDklcHz8ux_Pd7hWRtHZ~q|Qu%+2*nPE9>zAu&*ZoucKf-SuY?a+hVa_UaB?&M8W zBF|b^N2s4D1?A4+3E^L<`rM=3luw6`!7+0xbTjly-k^4(rmVF#NKIDrID0#%G}IQTw5NgCrz~tayE6nb*^=GcCK&+T_-*9 zaq|;QiQN+yCcaFZmAEutW_$`fElcc;EzQL2$P9fhHC79hDsn<-jX%kIKW9hw*zC92 zf5D^@WD~^VarS|4_pRKt4 zYx`t-jD4@|XWLJj4@xvSL$Vdo|1MtAIkoD*H|k*!tMGupG-bI!BOQ_?fkJ-}7ex!NAI zHiIqjsE}Y9qes*Socvqm4&nZxgy1rNXjGV+zG=_S|*nx|+NGac*|5boO*Ua=ayH?WARe z^nqZByi%(u$HPB_W(HRU@<7UL29n)!@YjUk>)_!~pYV-vS(J<(gppJ{JSHp36?qE#?@g*UAIc}?OY$>vcW=p$WETkc*OfkMCC;5kf-Czq z*nYLi_WVI;DI5S}x1jZa;~URDBxGd1qY z7woIeO@!acLFu5V;Z-366|;i(QqF{|IT`iSXQ%Z_`ys7h`m=Oj#-J=I=Ur}=H`D(z zcpYSic)gKnoH&zQty=g#bzJYe{q91Z{qB73p03}WZJaLWHOCi@EPF+JVcS*ssOpGr zQ=V2tJt{vWL!mG^Xx&18Qg!&4v!Dl4>t*~Mw<--BX143@HN0X-$E{Zy!Z<_aaE+C zI2-&!Cpm-%;Y0nx)JiO5nQMRJ+8;A2?qOWJxE8U!Vq|b$2jlq`PB@UzBjH$Fx~H5o z!}1uLali5^e3q($zQHMhUj9qoK(3lyG3!}I-;8Y;zhoB8K9%zYNPWlrGlQMO?}O$x zDY8}ABh9nqv$e9Rx`JwBjtDZa7Jtqire5Ux z8a&-!V&lxAHb0IPG(|h4dX)ysFLGnqEhiyodpP&sMn33TwFG#o!}ZFr0KPV@73yM9 zJQG7?J}wgrh@&{$zlQy`5%i3w;!?{H`#t9xDxqRM6Wljkf4Mrl|MZ-T&5r*iU&qAn z^L-S5!^3&IdB15CJS}&^eW@|1AKnsL84UV^-rKnu*{ia&%($$hSr3T&{+-vX$(yidm9_CCw5N<@yLcG#RX|6a}TW94|xiyTHjnob5T{TQjPIGL+spL}S z!Ap5xa#(I!T3GwRSv1wMhD^XR;(72PTVSPWrb^NUqO~5^Y4;@02oQ*xyPvtzT?gIu zV%EoAiA#zHe<>z~`0ZjT*3?IvpbVv6B`drg6u&LOJ^t$6#W@YLPG)q>_$Q-qR!(*} z_olbEe;M`csiB7QQzfKzHJuYR^LcAEdp^fnht=82ImY>|v!659(ZbP&^_9*!smDB# z`j~IUG;AU>^>~A|I5iba#3|wY;qye8T2d8~92$uBof7UK*Hjv)#i+bF3cJ87@P?0) zwGtzg5^7;Fd`!LQG0^cViB+j!T}5`}=S0bWjtr)5E|W|uADKgU$rdW1<|x;cW6E}A zE3$Q#9HMRNNUghGnhL|Oz}_!PrA|*U_V);8aWt0idDt#AYKwiK@BFGYCCC25$e+Rv z^I6*nXD9bS&jHU*o?4z%cbw<2=UU90*d=kx<2J<3_e^u1w7wPxM&_v-<*meX$AiUi zA#^_2DliTPv6S4&IX~pI$o(wuop-o_mKppzw2`WklS&osnGVN+SlaxdrMPv0^$+U~ z>vHQf>i}yrYh|m4{L_(^2j(hp0sbid#p;=93WI?3l8V}G8punWq0c6gTtz8K#I_XK znstz`zm-ow>}U_`z#-EW5GspH-Nn894i(BXVs`sX$-RWArLkz zX~Wfjlp4xbxs$9?`FA;-iN?xcpPiyM(z3P5)HcLLlF0xmjmIZ}QuLH74@9vRb2XOR*+3((!bZ%lSTO?;udF{6Kx$FH=Wm9_1&YW-P+`C;z^15D)wsJ zn)nCtmEwoRo^@NHFww+XYbTPZBfr_IF z>N#2WvCMFJY_Drn6^v1?$-QJzeifFeL%A(~iAVaHoOB=l;#Am%o{)t-5RX&E2JFFk z{Rc1`#A5AV;ku0nbb(y-F8IOc@X0@>f-{C{&ItLnB{|U_PhQb&kg=QKMeTq&ryD+P z3HZaelQ*&$MwMe^j*O?`p$9qr2RI9vr7ZvrV7~T+M)Z)p_D0$`yrRO`uye@~G>Ij9ME5LJW2 z0@M9{d`I(Is z&#DIBOm}jyiZG&P>LMPKQ~MKjVji&sS!;)>s#mDF8AUC*V7fwOdJmYTmXe*jjP-t; zdWo^*SH4u+u)bHotgwrU{9I76w@{luAD)bQ@WampX<)wi90r4X?7-E(!=OBYhQ&BAunL zEW2$-9i3dW+$}w`JYRe2c@jKTIkj2gx$jAgiFlfLGF(2#54Kn4IB|YtoOV|EQQjFY z8eS0E7u*u);ZFnoaCUAmCy+BPcVOOrZ)5+3K=Dv(a+6c#z2Nxt*Z!yb!3%yS)-(6D z6tyn0p0S<-FJP&42wABk!0YXA6|J)@Yt1XAMP&D1Alh*YWcy8Ic#ndQY?JOJf3Sh( zg3V}@c18P;Oy?R@aj%AF{W7BzhWn=u=%(*c!S$AW9y{pc8g|PvSb%nDPpN15f^jw& zIdWjX^3nINsQ5jvR@BzAnx|0N5sxnI!*^|oBvQLk4IJpnqD6X7>Oth`A8{xZwWGkm z*#lC*an(p~0=6dYZxF+$BLDt`#Fg2l6!r=I<*orY{U1c3Btk0x*FuknALp&u9 zr#}9dKx6+|@4&o1xud{yyO6D9@5-5;yD;yRH|T30_&F$tMzVX=#sUsRdI{f&$EeKT zZJBHJ*rtLbd%<=U544W$Z;+5br<&}!R6&{~=7QW&P`GNEVX6pEPet(Z6ZAP+N$nr? zp!zT9FI9-s9i9;U ztnTkPL4HJD@CW1!)n;Yyq~>Qad|Ladak#^Yg<|?2NJ3jgC){1Xl98`U68RMQsa&XT zu4b-7JgO%Dy9ZKEQRz$g0t*Tsp?8ua8(^eBtAX&B_ zP#FH^Are-SBSp&iAhJKKD-cjU-0@g44TOat)BF zKE*D+0Iu#jQwBAtn~5xaAwB>bVV3lpbeL*`_5AgbR87*UKKPZ&fM(c7r-XSRrc{9& z>M_+GxmZ0P@mw;mOHvnU39M!tKnSV>#`*umiefs}(6{J~D#Rm~Q?2k1Qu>5=Wkmap zT>j$POZ9iSy;`d!)m-I!7>g$V9Q*dR zAc9^}4-PXAJRPI40?7?Pu>pR!@3615e`?ok zvu#0ZC+i4M3rk2r;Vw1$@9VX-p6ZY6Rc85<@U>9m&}QoR4g^+`rIs4#AB+p-hW-sN zkUQa%;<^@i9I$H^HS01EgUqONUC<4xaEzjgrbrBwLQ;)x;0!A_V>gzPD3Qs+S9 zxQzbZ53AQnY7pjwm!8O(;kTeN6oOAh)gS0Lu}z-AH51@mFM&1L9`uDZ@CiM`QYi*o zS}*w6#=#`^1@Z4bux}Ls-}@?Bav!*UG2oJHFkOe|MMO{KneKx`^a5t~F+w(Y1?lig zeum{~qL#xVP8PCQ_vPWkc}~py4&U$#He*95TL&#y%M&mAh0D zo<-u{Fgjt{wt*3N2U1i#w%=ed*{U+aA7CLIgT=5Bns_cz=Sx%%l!V(Po)t2gx+=f) zyLlw2=d-Pkt;IPf8DRU=Hs1D`tp(VS-&%7m%`9WgKY$>;lKAhA$Pv)boLX-Ae^KS$IlO~&>J4&Z<)yMbTBBJT?!0@PO0;FqO@o#0xTzLC zO%_u29_J4+R6YJn?Z^gJ`#$Pr^WZ8h&njw2jNK;sgq4!P08SEN(%X{^cy@CKWN zwXpB8n;mr=O&vABBmdbRYyZ^tjddlS?HXwld@VQF6AHt1Fpbz#qWY4Lhha*41Y8oGFTSQW@^G-x$gE zSpIgr9}`$tc2;n85MDZA!4Jj8T*P?ZLynsuZ}VCCo2fvUi9GkkgZL1Ry~+3zOQ_>q zz?s@$n7B$rVz4dB!mslU_Lx6Xfs=y)tjYnLi6yfh9iYYKhtIGJSjYcxmX!n}UR$9} z6oaQV^?ZZiTiA}&WT1OJti*Rn$2Ok(l)k=4bzw8s$_#iD4S(b$*7#8*ZN0FR=Z0e4 zHGpf;hL5-opGSgUrXQBk1(@Onal&UIPj|C687!-YS~cn~8}iqeST`qG!9Gpk9I71a zdIVO`1(4u-gX;2_>fUl#F_lae&@M&cktv4N)rhtaW1k<6*H|9^_9T7p7io%@8As1^ zIOV)Rg~4J@#oEBxn}By!h#KBeoa70d4qf9s;-IM^=Wn&~-anOMVe0(M;)Q`;fiP+QnK!>WL9y5B6C8%cx3pRlaXMjqq$CTMNcd5SF0mmA4~e3u%Ub>Su9 zMex|J3-1SsAxQ4lm-2IPId&-y>S@ly!!)10s1GNvm8cbYO$%0_{oI_iOd!g2S%|Q= z_5t@`E|Ri{c*j^6G&^v*))sjhO#S~C$j=8vF5Kc9DxS_#`E-}n`&zg!oM)AfW$w&G z+?L_Xv|)6rV__yS&zb0^)l{q0!q&b`@3*m6EN67*BU9h&)3GT>Q#aoR?wZPaMJmWr z_`53}Xdk^VKi$C?_z0~xRbL5;Tt4*ZW^`sFtjhIh$-A(J7*{5{TN*z68`C|`M)%Q% z@usfKmK8jgRY+;Qh>0`ao!sNQsK(gB>EdJE9H|MPko2X1nDh%UW`veHNQF#_G46vCJYL`V8k`C!~_nbShST^m8aRJ8sY+F2bF9TNz2r z>##gpu0tfs${t++9E8s~n-G<5VC3a0?TF=AsI54Fr#(>rTQAQFc^s+EI=IZzD*>8T zN3Kpt@yA#{(~#+{$o5&`jIdwW#W*iur|QX$Tpj+Ys%Xn*W#F=o2LGF5`IU8jHpWYFH*V#U5727u26t#4^y>Rj$DleU5eVE4$qs z zv{rcr_FDls^wtxps2oKETZhNk1}Vs=$71uv@VglLt|BM;$$DY*UO^tK;2jO-WOpmx zXex*f26Jvoq(0iR4YpfbtjNCT&LRBtWXEX5)qs8$;oLHTh*uKd)DTRJo{_Qi`+K4R zf3Ta5@_XE6@!SJXICcm^TJ6eiRm!F2buQ{(bWNylVKa+uk{fAjF&WsJ-0IcXAlWHj&_*}17S22!@i@KEUcu`SRB=f@mFFLN<=*x zgVp;TGO(Ui*^OPoY`TxnG>6ty#J0SHhFyXs+nrcI4g958?2Z`XTctqx=m;T z+ieoMegT?nCtlYjSZoa5-pP2^yV$!G__%u$$=ri)c!=vX$R%6Q!F|C>d&`O%jV#*9 zH~sht7^dIei}=32>U}@tj3nKxfs5lh~+2{*GOUCo7#AW4oAJiZ=LdW@hs<{>jWk9pe3M8OOF* z7Il%Tvdm60vs0dTIPuS(^7;G7)>SOy>uALn%y1quoPosT665zqF-qO6omk|iFn2A- zJ<4zgW2~xi|2m9YCH&hszF7nl;0020ot9nau2;zExyR30{%2r3dXSN7_?b-^-+}C+ z(^#EfgC+SL*HT7mK97sAK}Lax*_m0X&-|C5rABW`^DTAgLkct1fgGfPpg)XdhQ7jA z{er(f!%FUsR;$G);*oS2>p7h{e~Z4mPyf#1g&v0Q`ByyNRq)Dxk6e7tDjvnE`2-B7 zkCCts(Usj9?+=K93_!0AmNLAj0%KK`Xa5_uQoK^0XR7deb^6$jtf^s)##}0IH?s>}WUt9E z`As6xgD_GT!bW?xwlLf-k<1ziRh!Q$VzqeSskR_&}51Dy>WPf z#dx+MJ407ib#LC+hhBE2pMCjz5Sd~|P4pIQxiiSpOLoOPkT?x)k$5g+1t;QhIEW_u zc_kC6$V4`>K=3rA!_Z7`_|y}0$iL{Pvo!+%tpKuz@}dA<9vMa|tgoj)UafEg%za5hp0~SWgS!xn2&E*aoV;cVTb+Lr!2C zF#sFmQ-QH*$ykgb8n6b-;6!wV{l)6K&Uie<+J8x(17J-TWW0@;OhJAdfm+m>x&8<_ z?Z@o(LmG#0jleD(#9WT1b>o@QFY$BcBGtw<7g_%T$?nVCwT()4HDl??B%#Ljq@^2PT7(Je?lT!pon={EdYlw=Y_*Cs*&NB#&da z|2pzN_NGnr=p=o4$m)~O_=(&<1?|=@x`IBTZN6Metp(U?=^BJ#{&M{fypT#CsMqGi&JoCgf^6 z*M8*eB)a7;a`%QAdO`f^CBNVC^OR?vu|8i%|Nn+{`rpq}o_$RVUh=MN_QD+gddroD zj>v>PLI?N7MNUV)=v9>Vmq+ImMqd?VhQLOPt^*OzKg_wH85=Z)XAPOCz`T`U=1TEs ztad|7)r{^jO&Fi_oJh>6Ax|`qcV@Voklcm9PxrrvHbe8?|;T=;}j%f0pq!h_pD|2U&s5m zVFPSt?>mGHonk#6V(fRYj~K7~#$S7pckoh}mz%6$Lt{T?j9zdV+CLCQ+!dR5VCWwpL8BXNR6^&XL<5&{Qp$zY+ zip#`XC=8=-~Aj-f!H~kW!=9 z63<1VJG9CQQ?LUf$TDz1v<0^u4dUQiN^&JfN3jNvhDFenG4F<)^37^MhaDr#&SBW1PI_V3hh_Qp6xwCTPTlC-mW%F}$;@y?{z_)H zjh(bu)Xp?|ZrCDDtQ<4_HO9;sISwpnwZQ+3v9j_W8-HuaaVoR+ig|m%_p6{xyZEe~ zS1qir0BupD>t99QO;KHCjG{4*X5M3GW(-{%$Jjf$;uuwhKKSSfG;{PR4H*b?2Q%L$ zfOwtBGsZ|6mf1b}eV>tgfOHx9$IwPloggoUPBcYZF3^hqbfiR!jQMx-t#L?2K}P34 z+d3iIV>_=KZE(>_m7W^w)5I*9keH&3Pm$C0YoRceJ;!c{KL^RL1jJh|hdTi$#6zXSVqrHz|R87pkaaUt? z-(h`UVMcDz^1IB6aT@cIxgfcexiBtcPK~u~%+|f=d+yP`$E z{qCYUpYYD7e9FfNXpEe(;+*u=u$7Ck?hISG3cWA*e=})l1;g{gIp)4be{;FFG4_7s z&(B&mbcl`jB>tavD8|f|U?j_Nr;lBZ!+|s-!Ki^_Vw;L%Ekr6L< z&7hA)A0F`dl)vBdj3K=~=9%PJ#@R>zgUq`Y{cd9ybRg@7pXiLb@2-L9J^#~0h88l$MdLM%=e^O_jDGw0$>uU75?Fd( zjnE=vb>;9~-sp1?qBOD5wUCc~myGt=h$$QPbp`rgo~s1&UYy>$X}cx5mSoyu?Cpjx zWBhIOz`|?BrK8ylU+KR!WoS}&R8tr-r_#1;zTJ>?wKSryJ)s(@!rSq4U z_ZaUFME{@3_ZjOam1pz#oEH6FLnfe);4>Mt<}I)Jqn|LO%~*d%+YArO&|8MhZR`fd z*yrQX7&AlPmyGVX)uTGHI=ax9^O8Ik;x)rQF=WfI7~|;&v>yC$tU`|rZDM$WGP4`x zTMT>MSoMaKx_MtLpGf50PQKZApRt=5pVVoSArS%OGKU#{7TxonBJU5QTHKJO+x+hu z`=+rQ7^~_QvhV`wdVy@bL^6!M(^$!&s5HmZ(s)`~j;k`gG4^xALrO-XOVfjbw8MzS z8LP&S?1DU3o}UuTxbd1{Me2;4F%Q0|WMpxFV;4!KS4K-S`JKh1Aw|af4E<>Me^5>H zs?4*-I0vJnuJW9*OB(Tz0If9UCog&*Ln9j_;X>!dGp;Q%hV#swazw@H=Ypm)2_Lu+m=LnuvEcc4%ZpOZnkDsKdrZ84@9QVp* zW!*=+oM#t3&MteC(SFP`##;07jmEbdvt&iLEBrr?Z^%L~J&VrHbACVKe>ps7jDzvr zhK-=pIukuJ_I5+MiZHfG{7yoa{_{JGURGv4jQzGE^HVEoX&Ec53@yt~+l)Oy;f~&@ zyyx(k#^Gh^7^rD=5yc7rO>{h|tzm5iNL z6&WkWW!RjC)!<<6jMZ%HRfg^{#wQS6dxrGeSQY=Z-;e^s`U$XV|68v{D-F%y#%RFb;K8Cc2jHNM`hU^%6SEM(_`nB?m zhQ>3rKmppFi2NH7HpBifJP;4B8e?H-?nrdJ%->N8qy4N<(&w&_2Wa zG1jON_K#fA-OwBPtH2R)SdoWeKxxQ`(jh7L9)!O*UTVBNM)goh`eN+nhAwi^H$xv8 dYfxeh8j}CtPGQJ`AyqHYh?f}44Eka<{XblQv3URh literal 0 HcmV?d00001 diff --git a/TTS/vocoder/utils/__init__.py b/TTS/vocoder/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/vocoder/utils/__pycache__/__init__.cpython-311.pyc b/TTS/vocoder/utils/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..85d79174f0983d82a8e6b2fdfe457f62f4306d05 GIT binary patch literal 179 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%S}JDIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`*JKN-j_(l0H^%qiB7 vkI&4@EQycTE2#X%VUwGmQks)$SHuc57G!ZTKalvq%*e?2fdNJoF$2W_OE@eA literal 0 HcmV?d00001 diff --git a/TTS/vocoder/utils/__pycache__/distribution.cpython-311.pyc b/TTS/vocoder/utils/__pycache__/distribution.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fbd17839216464c776613fa11291bc97801a4f6e GIT binary patch literal 8671 zcmcIqeM}rlcJH3g9(rb&f#GBLFay}ev&IJe!7X^PVQuU)UbC_nybHE7JEj{v_I%*( z8Ei-s^`7>UH6rrd${Ww-uriJ=d!S_{PRdC{$!Q%ak(_^`F4PI66-$;jtCbTe$I3Y+ zx_>UOs$o1m9+JI3u6n4heqHtIRn@Eae)XE4x?Cj`q>1qx<9}U2QD5PQ?PS_2ytn~{ z2NXwfbd(y^@ARmNJex<&l|SV35{qg9#xPj((#HQulbwtd~e`ha5k@ z_*WW>CsA2KF3dgy!sj_6N8cpF5={ft4`^ruZC_2C=`)~zo_oicjICgR)DQ6l=(+ia zrg5`B$7a@?wB{bD8#$RJJTWiZ!NeKcNv37XXCU3@+Y?XCS~zRck;f~?m$N0AJi2YW zJOee1va=LtlPs7%150g?49_4*5-pkWTNt;Ye?#qDf1F+CXY6OY2YjEkO4e+i+}r(t zY*2i;8d;)zFW>4VV;xWkX7D+l#OHWkp5sMXqBV0Y5}(lPKl^e^L8#s>3iUY1YGy7H z(=1|ShS#jggfMnpvyMf>u}RGm3n#B@=4fI{}2av|tesDyqs`xZrW;KHPkxF(8t83H8zdZ2Asf6IOr|zi&%Lg>uDi$Pk7XLy*ji=#hhKEhb*IC3 zd**vGeQRvJvi*ZsX)n3@()bcGbXrJP&!3P_J$0AQ-^}dE(&XyM;wzlJ1{br?w)p#s z`xtT`Q{BhpGtWG}hrWgCZ1WeEW&5K8$ltE|+m-6}mBXu5KkGr&y?=l7@4B8`P%aGr z!+XlR@1c_;>dBFRx)N5e@XEwZC7wbPDRkwwdgV4cc^g&VMxHzJ*$vjW&epB5b%@=i zvb)yV<~6n%v3pc@k7C*L0!B=2t@h@L;^rU<8nUxF;Ao@T78(S@7KIIA+@i3xbK9As zZsayu<_KK;#fG!hsMKWuwBU}6KXKsLiR&56h+*RzQF=Q9e(H2`Gm#=*iYNrMA!?j z%5nW)K&J1I-{nSl%6c~#=TpWt??|^GrbcCIzO|a2w*R6alTU3jl)LQS<+;muug+hU zPd{~)&0l_4u~3`6w9>j#ulP?O{|VK9;_(&5brHEPs;-OjY2dfPdgb%NE*QaB&Jn=_ zo?twWFWQNoC-dIzz@STXL(J_q-^)?{4^+~|1XaIq-*iQO4-sS z)OAY6(S`zsF!T4z8H3*q2* zZi{!0739n5a=v_=g)8(CFczaflB`C*GHWx|kgS|pvSJ8UP|u#k26!vo$U!+{>+QB2 z_?1_ANakf*f&XD7x;V^}?8bSUr2%5viKTHEB*_6liD3(N^lXV#A~^tTnmDrGXIOx+ zFaiQFSpye%qbG?i=+{|<2lPpJxOU*-D&XOk*yKxePu;*yjikcf$oNWks9V65rDi>n zQ*x6wUMwY%sZ1)97);}s9r%)+hN4Gvn1PXuq{Cc#UT(z>BcI#b_#}O38!g0wky;5>$|x z!Mv(~m-8l>j4a6uvx1qama3)FEnd}xS5=OeU-BE)1!EhMBpc^4?hY~zzhn`b3^SVB zhe>SAx02*H&V})b1<&<=)y`tq@=5}E47Lk%3AzWr`SJcofB54MZ9M{B@jT>)jS_)x zLIW=0J@w7E*T4Gc=l}C>$0xl1e&B;oAKdDh*#AeB|MHLj{mT>Eg)g7~22!w0vw~lQ zvyv%3se3y6Brn8LNuKyd;tMrLQV7Sz$pjpXa+pwafYS}fxlmZtk)d6TOvDlqPNPq2 zjF=ctPDWGW*7*ybV&OYFG{o*ubEM*1XSQICjwGuwnFSvog2SFD@}lM<-km^8IJ{{# zf#*_Vyk?6FaG1M8I7gED0Zy~xgP*9CYYXGq;ZI`=F;v~ER=0lS+HiXBou50OY0O;6 z(#YAQI-BJF?|4YX047VXv5f#-^xuZ7?B3|yXokk%ITJ>%2Kn@+jRNZ1b8lz*GPjVk zQFS)T{Xi_Q&d}uQ$lz-tGX;MxwQR3^4uncKlIubmu8b|~&0cy`qg3x#%J!?yR=NMF zxAJ~%W|*9J@rf6%MepLJrJ9xS>fWE-P=dWmbFW(8_jp$E4kPcd>K&E`pi@W5J$8;w z_v^S{?tA7fOAr6ZFAqE|_03ObUdz;?(miVF9{Kwl?#j$-S#j~^YTr@?{^Q`fw`tAWguJ^|?{0Zu!&9R`6VIWQvDE?8 zdHP8kIy!EUy3)3}fpxYXq8f;8P}v6g)P~2GmXN0zrg-Sl%*qJbdmMR=!;n4x zhkXm@7x%9SsIFVB>qee#pxRxV6m32H)KhW4*f+$-#e@*-4s_#~dH4I5-)s$>Yz+)ueLq(V!x}ws2f!w< zkDx$k@K$0XK*-Q_JQ7RA0#o5=iYLu3@*qn*uP42m?tmW53jiMgiW5ZI%Lh7I107cn zl7@rA5l~(|&`97aULqaB9);3G{p|t!fBfc`CxlK+--k0CE+c?;0t%2ezZ5X;4!VW! z!A*D_XV^68(VA{3LA)^xXv`evZ);2{9vO#kP$3<#HsRML$ppc1*AqzrBO*PtiKCi9 zr-0$7@Fvbq;jAAr&64C}lR_IV+QPyFAXv?gfgomZ8rM35U(qR{749Nle?0r!Nk?b9 z^W&U(3u{1X!Zm0+iJ6qZ#K_cU8O2oW2-8^Sz0SGLbmQGFh!)+tTlj^*<-T`m?ozt% z?#TQ|rt$9Y5x}qocKXt{5aU-FzudP`R!+Vk01022mP}`+C4F8kYXSrTA*w2stzBmu z*4TzDyB=s?3$&xaAvJIav4>Um@H+d(8vDlT(Bp2z4yx=RIB~WTT0GEQ<4yC$7tvs~yE~T5Jy5~#ox#!&J z)PtFiXMQBzmk_gCWp=MK2iBMai`>cpVos>c37AL5^@;VqBl8;a>`^^?$lyDc>Xz$% zVf`D&?9T)hlt(-77KlWQ6zW+uoG+n}+h z@%dM`WcqQ}I>QTzkY-O#Px5fQh12Xopi(1UB%=rs+# z>R_DwTY&yK34aYGAACrFT)^j4Vh71oEm1^TGJ(Hl-iRf*RFpq1+=5D2&BcF)Y|~8B z^ajPs5fh6m3+X|Ov7o#n7u_?=<-c!hm4H-e*vmfO#T1> literal 0 HcmV?d00001 diff --git a/TTS/vocoder/utils/__pycache__/generic_utils.cpython-311.pyc b/TTS/vocoder/utils/__pycache__/generic_utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e0454728e44ed70d3b4ca49ccd6920821850449 GIT binary patch literal 4051 zcmd5Wi4tjBwNuCG;)I^##*%9bPFtikf&jtNE`|XoX%k@Z4gxJw5@m|y z9;s{+@eh}A?u~5eedayNxKe; zo%T$9@4b)TpZC7^`29YAIWm$&(29j}!AKzVPcj)ap+9)_YZ!cv6r?aZsxTW2L&gz3 zvJs^{yTL*q)wxP+BTnD3I$ud_BN_G=*s}<>CAQv<`=w405=E971Jcfwl&702+rw zt$lXzCfF;H|3zw>aRS!LL;vO2!1g}b&8gO48gL>Ct8ib%4nXU0sJYLd5(dV})q&sP ze&B8Vbx~Iz4EKnt4%5iK-HWcZRnKu;`OBJBHy_BQ^w{X#3 zp(Emhdd>B`O7jKXl%4cSXf4F47Far6>+y9uwgfzB-msQrjO7|Jax6q_Y^p&sN^dcr zs7@6df%U-V^Q9$6HQ-*ZgcO4Idhw}WkZyw6f{BBsHPet*Fu9lye121d2&ppuIL-QE zaIgL(R`X`Xsyb>e#Aivc{1I8#ibgJPVnfAteF_}O1vtLIim0D-!sjdu;qf`UBwMP_ z(GB>q-VXdk)v$k2Rn@xcM-0PH7OF&ywKTWD}C~-CFjc z4Xeo3WV@8Tt=e0TX=Q(6RtmL;tJ!?jcFao7fK}Vs2M<2Y_76QoOk^F`fduX*gX7JKcvklMZRly!w^ zPnhlqSKGqXXN)UkJR#E&-fIi*{pG$ZT=#_Q&D%uu15dcr5i)HdbHs~JZ~UG8W~HT; zyHO-A{2N8Y*nbH5S8U`r#V7LP@=m$SAujQ4j5@xuuY6H<*=3JiZn4Wp!dP?d6)X(- zTrF3U9iOTB5!vzuL$0Vf3#$d~SAGoB3zZM#1w_7Sn$f-i`Z;Pun-L|l9VI#70$u>?9PGt7=>%WR_t^MAcQrV;u6T;4$8Tz;IFx7jmw zPM}0UhjWH#2)HuL=MA>O)d8KuzP4k|8^w6g2aCA;s8y)QAq*NoS@O60U zTpmau)L_FNK_+oZhC3ybJSB4?YG6koGL5*#Zu1QuFjbWQ3ZLUWC~hbi!uPL0?E@G| zfRA7lR%KoKrTmECLuJ6#a1chm9};PHk>-O)1ZZ||=wM*82SNlyYp0|?2|7O(lr3pK zTzi3K%8;x0K8J4Rw9i1+ljdy)zayDe4_UzEI;2FfNK)d^98sXQ3RReMRg ztrp~}4z&WH^SWuM{Q}_>CX=FaMKY@nfP~c7mbTOy1iN3mP}^o9WS$lV^n5~P2%L*e zoUZ$bg7iz0Fkh0)S|2ZBvuY>~t~w=hOlrPVPf-Aa0!JBg0(C{UAaAMlRFFfCJ`Rd6EAhdxwbgxidQ}HYDZjXiwmx}=!uIR z@!hugt}DLpiSIW*I^q*gW_H=%iN6&;8{Hl4@Y8L6+T|~M{N)Zm+vaCoe$M0PI{ZSL zUvT+Fk6&!wdXXCMq-NTwnf*C8HSdup(_B3oa%bGs6^}e`H&>sh#(y8%tL=a2o`1(p zEqbZN<|@DxfE<5G1Q!RFi@a{ah|9EX@t+_@ftF6%= zyX>sT&bHXuvo#(*zPod`dAF}2x$KO`&a~JWV0i2~m!0(3$rd|F-}j6|=o05zlk+Y= z|BuYd*B`o>kGu>-W#WhP(k`EV_CYtnjm4Tj?TSd8Xs%;YilmwKbP8(6ngwM&CK!a@ zCL~SB7!W@RR{*dDUaf%AXNk0=-G!@i(9fuIlPdd=rGo$7%l!NH@wb%n>@@3O_H_<4brTjA#jWm@5<8yja*T{I+_d1j)EfIQ{?DAK{RH@~__2VdW$UF&B*@Abtk K+Mh}2_WlbD9?fb1 literal 0 HcmV?d00001 diff --git a/TTS/vocoder/utils/distribution.py b/TTS/vocoder/utils/distribution.py new file mode 100644 index 0000000..fe706ba --- /dev/null +++ b/TTS/vocoder/utils/distribution.py @@ -0,0 +1,154 @@ +import math + +import numpy as np +import torch +import torch.nn.functional as F +from torch.distributions.normal import Normal + + +def gaussian_loss(y_hat, y, log_std_min=-7.0): + assert y_hat.dim() == 3 + assert y_hat.size(2) == 2 + mean = y_hat[:, :, :1] + log_std = torch.clamp(y_hat[:, :, 1:], min=log_std_min) + # TODO: replace with pytorch dist + log_probs = -0.5 * (-math.log(2.0 * math.pi) - 2.0 * log_std - torch.pow(y - mean, 2) * torch.exp((-2.0 * log_std))) + return log_probs.squeeze().mean() + + +def sample_from_gaussian(y_hat, log_std_min=-7.0, scale_factor=1.0): + assert y_hat.size(2) == 2 + mean = y_hat[:, :, :1] + log_std = torch.clamp(y_hat[:, :, 1:], min=log_std_min) + dist = Normal( + mean, + torch.exp(log_std), + ) + sample = dist.sample() + sample = torch.clamp(torch.clamp(sample, min=-scale_factor), max=scale_factor) + del dist + return sample + + +def log_sum_exp(x): + """numerically stable log_sum_exp implementation that prevents overflow""" + # TF ordering + axis = len(x.size()) - 1 + m, _ = torch.max(x, dim=axis) + m2, _ = torch.max(x, dim=axis, keepdim=True) + return m + torch.log(torch.sum(torch.exp(x - m2), dim=axis)) + + +# It is adapted from https://github.com/r9y9/wavenet_vocoder/blob/master/wavenet_vocoder/mixture.py +def discretized_mix_logistic_loss(y_hat, y, num_classes=65536, log_scale_min=None, reduce=True): + if log_scale_min is None: + log_scale_min = float(np.log(1e-14)) + y_hat = y_hat.permute(0, 2, 1) + assert y_hat.dim() == 3 + assert y_hat.size(1) % 3 == 0 + nr_mix = y_hat.size(1) // 3 + + # (B x T x C) + y_hat = y_hat.transpose(1, 2) + + # unpack parameters. (B, T, num_mixtures) x 3 + logit_probs = y_hat[:, :, :nr_mix] + means = y_hat[:, :, nr_mix : 2 * nr_mix] + log_scales = torch.clamp(y_hat[:, :, 2 * nr_mix : 3 * nr_mix], min=log_scale_min) + + # B x T x 1 -> B x T x num_mixtures + y = y.expand_as(means) + + centered_y = y - means + inv_stdv = torch.exp(-log_scales) + plus_in = inv_stdv * (centered_y + 1.0 / (num_classes - 1)) + cdf_plus = torch.sigmoid(plus_in) + min_in = inv_stdv * (centered_y - 1.0 / (num_classes - 1)) + cdf_min = torch.sigmoid(min_in) + + # log probability for edge case of 0 (before scaling) + # equivalent: torch.log(F.sigmoid(plus_in)) + log_cdf_plus = plus_in - F.softplus(plus_in) + + # log probability for edge case of 255 (before scaling) + # equivalent: (1 - F.sigmoid(min_in)).log() + log_one_minus_cdf_min = -F.softplus(min_in) + + # probability for all other cases + cdf_delta = cdf_plus - cdf_min + + mid_in = inv_stdv * centered_y + # log probability in the center of the bin, to be used in extreme cases + # (not actually used in our code) + log_pdf_mid = mid_in - log_scales - 2.0 * F.softplus(mid_in) + + # tf equivalent + + # log_probs = tf.where(x < -0.999, log_cdf_plus, + # tf.where(x > 0.999, log_one_minus_cdf_min, + # tf.where(cdf_delta > 1e-5, + # tf.log(tf.maximum(cdf_delta, 1e-12)), + # log_pdf_mid - np.log(127.5)))) + + # TODO: cdf_delta <= 1e-5 actually can happen. How can we choose the value + # for num_classes=65536 case? 1e-7? not sure.. + inner_inner_cond = (cdf_delta > 1e-5).float() + + inner_inner_out = inner_inner_cond * torch.log(torch.clamp(cdf_delta, min=1e-12)) + (1.0 - inner_inner_cond) * ( + log_pdf_mid - np.log((num_classes - 1) / 2) + ) + inner_cond = (y > 0.999).float() + inner_out = inner_cond * log_one_minus_cdf_min + (1.0 - inner_cond) * inner_inner_out + cond = (y < -0.999).float() + log_probs = cond * log_cdf_plus + (1.0 - cond) * inner_out + + log_probs = log_probs + F.log_softmax(logit_probs, -1) + + if reduce: + return -torch.mean(log_sum_exp(log_probs)) + return -log_sum_exp(log_probs).unsqueeze(-1) + + +def sample_from_discretized_mix_logistic(y, log_scale_min=None): + """ + Sample from discretized mixture of logistic distributions + Args: + y (Tensor): :math:`[B, C, T]` + log_scale_min (float): Log scale minimum value + Returns: + Tensor: sample in range of [-1, 1]. + """ + if log_scale_min is None: + log_scale_min = float(np.log(1e-14)) + assert y.size(1) % 3 == 0 + nr_mix = y.size(1) // 3 + + # B x T x C + y = y.transpose(1, 2) + logit_probs = y[:, :, :nr_mix] + + # sample mixture indicator from softmax + temp = logit_probs.data.new(logit_probs.size()).uniform_(1e-5, 1.0 - 1e-5) + temp = logit_probs.data - torch.log(-torch.log(temp)) + _, argmax = temp.max(dim=-1) + + # (B, T) -> (B, T, nr_mix) + one_hot = to_one_hot(argmax, nr_mix) + # select logistic parameters + means = torch.sum(y[:, :, nr_mix : 2 * nr_mix] * one_hot, dim=-1) + log_scales = torch.clamp(torch.sum(y[:, :, 2 * nr_mix : 3 * nr_mix] * one_hot, dim=-1), min=log_scale_min) + # sample from logistic & clip to interval + # we don't actually round to the nearest 8bit value when sampling + u = means.data.new(means.size()).uniform_(1e-5, 1.0 - 1e-5) + x = means + torch.exp(log_scales) * (torch.log(u) - torch.log(1.0 - u)) + + x = torch.clamp(torch.clamp(x, min=-1.0), max=1.0) + + return x + + +def to_one_hot(tensor, n, fill_with=1.0): + # we perform one hot encore with respect to the last axis + one_hot = torch.FloatTensor(tensor.size() + (n,)).zero_().type_as(tensor) + one_hot.scatter_(len(tensor.size()), tensor.unsqueeze(-1), fill_with) + return one_hot diff --git a/TTS/vocoder/utils/generic_utils.py b/TTS/vocoder/utils/generic_utils.py new file mode 100644 index 0000000..63a0af4 --- /dev/null +++ b/TTS/vocoder/utils/generic_utils.py @@ -0,0 +1,72 @@ +from typing import Dict + +import numpy as np +import torch +from matplotlib import pyplot as plt + +from TTS.tts.utils.visual import plot_spectrogram +from TTS.utils.audio import AudioProcessor + + +def interpolate_vocoder_input(scale_factor, spec): + """Interpolate spectrogram by the scale factor. + It is mainly used to match the sampling rates of + the tts and vocoder models. + + Args: + scale_factor (float): scale factor to interpolate the spectrogram + spec (np.array): spectrogram to be interpolated + + Returns: + torch.tensor: interpolated spectrogram. + """ + print(" > before interpolation :", spec.shape) + spec = torch.tensor(spec).unsqueeze(0).unsqueeze(0) # pylint: disable=not-callable + spec = torch.nn.functional.interpolate( + spec, scale_factor=scale_factor, recompute_scale_factor=True, mode="bilinear", align_corners=False + ).squeeze(0) + print(" > after interpolation :", spec.shape) + return spec + + +def plot_results(y_hat: torch.tensor, y: torch.tensor, ap: AudioProcessor, name_prefix: str = None) -> Dict: + """Plot the predicted and the real waveform and their spectrograms. + + Args: + y_hat (torch.tensor): Predicted waveform. + y (torch.tensor): Real waveform. + ap (AudioProcessor): Audio processor used to process the waveform. + name_prefix (str, optional): Name prefix used to name the figures. Defaults to None. + + Returns: + Dict: output figures keyed by the name of the figures. + """ """Plot vocoder model results""" + if name_prefix is None: + name_prefix = "" + + # select an instance from batch + y_hat = y_hat[0].squeeze().detach().cpu().numpy() + y = y[0].squeeze().detach().cpu().numpy() + + spec_fake = ap.melspectrogram(y_hat).T + spec_real = ap.melspectrogram(y).T + spec_diff = np.abs(spec_fake - spec_real) + + # plot figure and save it + fig_wave = plt.figure() + plt.subplot(2, 1, 1) + plt.plot(y) + plt.title("groundtruth speech") + plt.subplot(2, 1, 2) + plt.plot(y_hat) + plt.title("generated speech") + plt.tight_layout() + plt.close() + + figures = { + name_prefix + "spectrogram/fake": plot_spectrogram(spec_fake), + name_prefix + "spectrogram/real": plot_spectrogram(spec_real), + name_prefix + "spectrogram/diff": plot_spectrogram(spec_diff), + name_prefix + "speech_comparison": fig_wave, + } + return figures