From 6af40bc6cf5569c8054a32ff43402f5e4dcfcc4e Mon Sep 17 00:00:00 2001
From: Sam Khoze <68170403+SamKhoze@users.noreply.github.com>
Date: Tue, 18 Jun 2024 13:18:37 -0700
Subject: [PATCH] Add files via upload

---
 TTS/VERSION                                   |   1 +
 TTS/api.py                                    | 458 +++++++++++++
 TTS/bin/__init__.py                           |   0
 TTS/bin/collect_env_info.py                   |  48 ++
 TTS/bin/compute_attention_masks.py            | 165 +++++
 TTS/bin/compute_embeddings.py                 | 197 ++++++
 TTS/bin/compute_statistics.py                 |  96 +++
 TTS/bin/eval_encoder.py                       |  88 +++
 TTS/bin/extract_tts_spectrograms.py           | 287 ++++++++
 TTS/bin/find_unique_chars.py                  |  45 ++
 TTS/bin/find_unique_phonemes.py               |  74 ++
 TTS/bin/remove_silence_using_vad.py           | 124 ++++
 TTS/bin/resample.py                           |  90 +++
 TTS/bin/synthesize.py                         | 494 ++++++++++++++
 TTS/bin/train_encoder.py                      | 332 +++++++++
 TTS/bin/train_tts.py                          |  71 ++
 TTS/bin/train_vocoder.py                      |  77 +++
 TTS/bin/tune_wavegrad.py                      | 103 +++
 TTS/config/__init__.py                        | 135 ++++
 .../__pycache__/__init__.cpython-311.pyc      | Bin 0 -> 6650 bytes
 .../shared_configs.cpython-311.pyc            | Bin 0 -> 11855 bytes
 TTS/config/shared_configs.py                  | 268 ++++++++
 TTS/encoder/README.md                         |  18 +
 TTS/encoder/__init__.py                       |   0
 .../__pycache__/__init__.cpython-311.pyc      | Bin 0 -> 173 bytes
 .../__pycache__/losses.cpython-311.pyc        | Bin 0 -> 13636 bytes
 TTS/encoder/configs/base_encoder_config.py    |  61 ++
 TTS/encoder/configs/emotion_encoder_config.py |  12 +
 TTS/encoder/configs/speaker_encoder_config.py |  11 +
 TTS/encoder/dataset.py                        | 147 ++++
 TTS/encoder/losses.py                         | 226 +++++++
 .../__pycache__/base_encoder.cpython-311.pyc  | Bin 0 -> 8120 bytes
 .../models/__pycache__/lstm.cpython-311.pyc   | Bin 0 -> 6648 bytes
 .../models/__pycache__/resnet.cpython-311.pyc | Bin 0 -> 11976 bytes
 TTS/encoder/models/base_encoder.py            | 161 +++++
 TTS/encoder/models/lstm.py                    |  99 +++
 TTS/encoder/models/resnet.py                  | 198 ++++++
 TTS/encoder/requirements.txt                  |   2 +
 TTS/encoder/utils/__init__.py                 |   0
 .../__pycache__/__init__.cpython-311.pyc      | Bin 0 -> 179 bytes
 .../__pycache__/generic_utils.cpython-311.pyc | Bin 0 -> 7424 bytes
 TTS/encoder/utils/generic_utils.py            | 136 ++++
 TTS/encoder/utils/prepare_voxceleb.py         | 219 ++++++
 TTS/encoder/utils/training.py                 |  99 +++
 TTS/encoder/utils/visual.py                   |  50 ++
 TTS/model.py                                  |  59 ++
 TTS/utils/__init__.py                         |   0
 .../__pycache__/__init__.cpython-311.pyc      | Bin 0 -> 171 bytes
 .../__pycache__/generic_utils.cpython-311.pyc | Bin 0 -> 15661 bytes
 TTS/utils/__pycache__/io.cpython-311.pyc      | Bin 0 -> 4685 bytes
 TTS/utils/__pycache__/manage.cpython-311.pyc  | Bin 0 -> 36630 bytes
 .../__pycache__/samplers.cpython-311.pyc      | Bin 0 -> 12400 bytes
 .../__pycache__/synthesizer.cpython-311.pyc   | Bin 0 -> 24501 bytes
 TTS/utils/audio/__init__.py                   |   1 +
 .../__pycache__/__init__.cpython-311.pyc      | Bin 0 -> 255 bytes
 .../numpy_transforms.cpython-311.pyc          | Bin 0 -> 24021 bytes
 .../__pycache__/processor.cpython-311.pyc     | Bin 0 -> 27818 bytes
 .../torch_transforms.cpython-311.pyc          | Bin 0 -> 7009 bytes
 TTS/utils/audio/numpy_transforms.py           | 485 ++++++++++++++
 TTS/utils/audio/processor.py                  | 633 ++++++++++++++++++
 TTS/utils/audio/torch_transforms.py           | 165 +++++
 TTS/utils/callbacks.py                        | 105 +++
 TTS/utils/capacitron_optimizer.py             |  67 ++
 TTS/utils/distribute.py                       |  20 +
 TTS/utils/download.py                         | 206 ++++++
 TTS/utils/downloaders.py                      | 126 ++++
 TTS/utils/generic_utils.py                    | 239 +++++++
 TTS/utils/io.py                               |  70 ++
 TTS/utils/manage.py                           | 621 +++++++++++++++++
 TTS/utils/radam.py                            | 105 +++
 TTS/utils/samplers.py                         | 201 ++++++
 TTS/utils/synthesizer.py                      | 505 ++++++++++++++
 TTS/utils/training.py                         |  44 ++
 TTS/utils/vad.py                              |  88 +++
 74 files changed, 8332 insertions(+)
 create mode 100644 TTS/VERSION
 create mode 100644 TTS/api.py
 create mode 100644 TTS/bin/__init__.py
 create mode 100644 TTS/bin/collect_env_info.py
 create mode 100644 TTS/bin/compute_attention_masks.py
 create mode 100644 TTS/bin/compute_embeddings.py
 create mode 100644 TTS/bin/compute_statistics.py
 create mode 100644 TTS/bin/eval_encoder.py
 create mode 100644 TTS/bin/extract_tts_spectrograms.py
 create mode 100644 TTS/bin/find_unique_chars.py
 create mode 100644 TTS/bin/find_unique_phonemes.py
 create mode 100644 TTS/bin/remove_silence_using_vad.py
 create mode 100644 TTS/bin/resample.py
 create mode 100644 TTS/bin/synthesize.py
 create mode 100644 TTS/bin/train_encoder.py
 create mode 100644 TTS/bin/train_tts.py
 create mode 100644 TTS/bin/train_vocoder.py
 create mode 100644 TTS/bin/tune_wavegrad.py
 create mode 100644 TTS/config/__init__.py
 create mode 100644 TTS/config/__pycache__/__init__.cpython-311.pyc
 create mode 100644 TTS/config/__pycache__/shared_configs.cpython-311.pyc
 create mode 100644 TTS/config/shared_configs.py
 create mode 100644 TTS/encoder/README.md
 create mode 100644 TTS/encoder/__init__.py
 create mode 100644 TTS/encoder/__pycache__/__init__.cpython-311.pyc
 create mode 100644 TTS/encoder/__pycache__/losses.cpython-311.pyc
 create mode 100644 TTS/encoder/configs/base_encoder_config.py
 create mode 100644 TTS/encoder/configs/emotion_encoder_config.py
 create mode 100644 TTS/encoder/configs/speaker_encoder_config.py
 create mode 100644 TTS/encoder/dataset.py
 create mode 100644 TTS/encoder/losses.py
 create mode 100644 TTS/encoder/models/__pycache__/base_encoder.cpython-311.pyc
 create mode 100644 TTS/encoder/models/__pycache__/lstm.cpython-311.pyc
 create mode 100644 TTS/encoder/models/__pycache__/resnet.cpython-311.pyc
 create mode 100644 TTS/encoder/models/base_encoder.py
 create mode 100644 TTS/encoder/models/lstm.py
 create mode 100644 TTS/encoder/models/resnet.py
 create mode 100644 TTS/encoder/requirements.txt
 create mode 100644 TTS/encoder/utils/__init__.py
 create mode 100644 TTS/encoder/utils/__pycache__/__init__.cpython-311.pyc
 create mode 100644 TTS/encoder/utils/__pycache__/generic_utils.cpython-311.pyc
 create mode 100644 TTS/encoder/utils/generic_utils.py
 create mode 100644 TTS/encoder/utils/prepare_voxceleb.py
 create mode 100644 TTS/encoder/utils/training.py
 create mode 100644 TTS/encoder/utils/visual.py
 create mode 100644 TTS/model.py
 create mode 100644 TTS/utils/__init__.py
 create mode 100644 TTS/utils/__pycache__/__init__.cpython-311.pyc
 create mode 100644 TTS/utils/__pycache__/generic_utils.cpython-311.pyc
 create mode 100644 TTS/utils/__pycache__/io.cpython-311.pyc
 create mode 100644 TTS/utils/__pycache__/manage.cpython-311.pyc
 create mode 100644 TTS/utils/__pycache__/samplers.cpython-311.pyc
 create mode 100644 TTS/utils/__pycache__/synthesizer.cpython-311.pyc
 create mode 100644 TTS/utils/audio/__init__.py
 create mode 100644 TTS/utils/audio/__pycache__/__init__.cpython-311.pyc
 create mode 100644 TTS/utils/audio/__pycache__/numpy_transforms.cpython-311.pyc
 create mode 100644 TTS/utils/audio/__pycache__/processor.cpython-311.pyc
 create mode 100644 TTS/utils/audio/__pycache__/torch_transforms.cpython-311.pyc
 create mode 100644 TTS/utils/audio/numpy_transforms.py
 create mode 100644 TTS/utils/audio/processor.py
 create mode 100644 TTS/utils/audio/torch_transforms.py
 create mode 100644 TTS/utils/callbacks.py
 create mode 100644 TTS/utils/capacitron_optimizer.py
 create mode 100644 TTS/utils/distribute.py
 create mode 100644 TTS/utils/download.py
 create mode 100644 TTS/utils/downloaders.py
 create mode 100644 TTS/utils/generic_utils.py
 create mode 100644 TTS/utils/io.py
 create mode 100644 TTS/utils/manage.py
 create mode 100644 TTS/utils/radam.py
 create mode 100644 TTS/utils/samplers.py
 create mode 100644 TTS/utils/synthesizer.py
 create mode 100644 TTS/utils/training.py
 create mode 100644 TTS/utils/vad.py

diff --git a/TTS/VERSION b/TTS/VERSION
new file mode 100644
index 0000000..2157409
--- /dev/null
+++ b/TTS/VERSION
@@ -0,0 +1 @@
+0.22.0
diff --git a/TTS/api.py b/TTS/api.py
new file mode 100644
index 0000000..7abc188
--- /dev/null
+++ b/TTS/api.py
@@ -0,0 +1,458 @@
+import tempfile
+import warnings
+from pathlib import Path
+from typing import Union
+
+import numpy as np
+from torch import nn
+
+from TTS.utils.audio.numpy_transforms import save_wav
+from TTS.utils.manage import ModelManager
+from TTS.utils.synthesizer import Synthesizer
+from TTS.config import load_config
+
+
+class TTS(nn.Module):
+    """TODO: Add voice conversion and Capacitron support."""
+
+    def __init__(
+        self,
+        model_name: str = "",
+        model_path: str = None,
+        config_path: str = None,
+        vocoder_path: str = None,
+        vocoder_config_path: str = None,
+        progress_bar: bool = True,
+        gpu=False,
+    ):
+        """🐸TTS python interface that allows to load and use the released models.
+
+        Example with a multi-speaker model:
+            >>> from TTS.api import TTS
+            >>> tts = TTS(TTS.list_models()[0])
+            >>> wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0])
+            >>> tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav")
+
+        Example with a single-speaker model:
+            >>> tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False)
+            >>> tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path="output.wav")
+
+        Example loading a model from a path:
+            >>> tts = TTS(model_path="/path/to/checkpoint_100000.pth", config_path="/path/to/config.json", progress_bar=False, gpu=False)
+            >>> tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path="output.wav")
+
+        Example voice cloning with YourTTS in English, French and Portuguese:
+            >>> tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
+            >>> tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="thisisit.wav")
+            >>> tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr", file_path="thisisit.wav")
+            >>> tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt", file_path="thisisit.wav")
+
+        Example Fairseq TTS models (uses ISO language codes in https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html):
+            >>> tts = TTS(model_name="tts_models/eng/fairseq/vits", progress_bar=False, gpu=True)
+            >>> tts.tts_to_file("This is a test.", file_path="output.wav")
+
+        Args:
+            model_name (str, optional): Model name to load. You can list models by ```tts.models```. Defaults to None.
+            model_path (str, optional): Path to the model checkpoint. Defaults to None.
+            config_path (str, optional): Path to the model config. Defaults to None.
+            vocoder_path (str, optional): Path to the vocoder checkpoint. Defaults to None.
+            vocoder_config_path (str, optional): Path to the vocoder config. Defaults to None.
+            progress_bar (bool, optional): Whether to pring a progress bar while downloading a model. Defaults to True.
+            gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
+        """
+        super().__init__()
+        self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar, verbose=False)
+        self.config = load_config(config_path) if config_path else None
+        self.synthesizer = None
+        self.voice_converter = None
+        self.model_name = ""
+        if gpu:
+            warnings.warn("`gpu` will be deprecated. Please use `tts.to(device)` instead.")
+
+        if model_name is not None and len(model_name) > 0:
+            if "tts_models" in model_name:
+                self.load_tts_model_by_name(model_name, gpu)
+            elif "voice_conversion_models" in model_name:
+                self.load_vc_model_by_name(model_name, gpu)
+            else:
+                self.load_model_by_name(model_name, gpu)
+
+        if model_path:
+            self.load_tts_model_by_path(
+                model_path, config_path, vocoder_path=vocoder_path, vocoder_config=vocoder_config_path, gpu=gpu
+            )
+
+    @property
+    def models(self):
+        return self.manager.list_tts_models()
+
+    @property
+    def is_multi_speaker(self):
+        if hasattr(self.synthesizer.tts_model, "speaker_manager") and self.synthesizer.tts_model.speaker_manager:
+            return self.synthesizer.tts_model.speaker_manager.num_speakers > 1
+        return False
+
+    @property
+    def is_multi_lingual(self):
+        # Not sure what sets this to None, but applied a fix to prevent crashing.
+        if (
+            isinstance(self.model_name, str)
+            and "xtts" in self.model_name
+            or self.config
+            and ("xtts" in self.config.model or len(self.config.languages) > 1)
+        ):
+            return True
+        if hasattr(self.synthesizer.tts_model, "language_manager") and self.synthesizer.tts_model.language_manager:
+            return self.synthesizer.tts_model.language_manager.num_languages > 1
+        return False
+
+    @property
+    def speakers(self):
+        if not self.is_multi_speaker:
+            return None
+        return self.synthesizer.tts_model.speaker_manager.speaker_names
+
+    @property
+    def languages(self):
+        if not self.is_multi_lingual:
+            return None
+        return self.synthesizer.tts_model.language_manager.language_names
+
+    @staticmethod
+    def get_models_file_path():
+        return Path(__file__).parent / ".models.json"
+
+    def list_models(self):
+        return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False)
+
+    def download_model_by_name(self, model_name: str):
+        model_path, config_path, model_item = self.manager.download_model(model_name)
+        if "fairseq" in model_name or (model_item is not None and isinstance(model_item["model_url"], list)):
+            # return model directory if there are multiple files
+            # we assume that the model knows how to load itself
+            return None, None, None, None, model_path
+        if model_item.get("default_vocoder") is None:
+            return model_path, config_path, None, None, None
+        vocoder_path, vocoder_config_path, _ = self.manager.download_model(model_item["default_vocoder"])
+        return model_path, config_path, vocoder_path, vocoder_config_path, None
+
+    def load_model_by_name(self, model_name: str, gpu: bool = False):
+        """Load one of the 🐸TTS models by name.
+
+        Args:
+            model_name (str): Model name to load. You can list models by ```tts.models```.
+            gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
+        """
+        self.load_tts_model_by_name(model_name, gpu)
+
+    def load_vc_model_by_name(self, model_name: str, gpu: bool = False):
+        """Load one of the voice conversion models by name.
+
+        Args:
+            model_name (str): Model name to load. You can list models by ```tts.models```.
+            gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
+        """
+        self.model_name = model_name
+        model_path, config_path, _, _, _ = self.download_model_by_name(model_name)
+        self.voice_converter = Synthesizer(vc_checkpoint=model_path, vc_config=config_path, use_cuda=gpu)
+
+    def load_tts_model_by_name(self, model_name: str, gpu: bool = False):
+        """Load one of 🐸TTS models by name.
+
+        Args:
+            model_name (str): Model name to load. You can list models by ```tts.models```.
+            gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
+
+        TODO: Add tests
+        """
+        self.synthesizer = None
+        self.model_name = model_name
+
+        model_path, config_path, vocoder_path, vocoder_config_path, model_dir = self.download_model_by_name(
+            model_name
+        )
+
+        # init synthesizer
+        # None values are fetch from the model
+        self.synthesizer = Synthesizer(
+            tts_checkpoint=model_path,
+            tts_config_path=config_path,
+            tts_speakers_file=None,
+            tts_languages_file=None,
+            vocoder_checkpoint=vocoder_path,
+            vocoder_config=vocoder_config_path,
+            encoder_checkpoint=None,
+            encoder_config=None,
+            model_dir=model_dir,
+            use_cuda=gpu,
+        )
+
+    def load_tts_model_by_path(
+        self, model_path: str, config_path: str, vocoder_path: str = None, vocoder_config: str = None, gpu: bool = False
+    ):
+        """Load a model from a path.
+
+        Args:
+            model_path (str): Path to the model checkpoint.
+            config_path (str): Path to the model config.
+            vocoder_path (str, optional): Path to the vocoder checkpoint. Defaults to None.
+            vocoder_config (str, optional): Path to the vocoder config. Defaults to None.
+            gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
+        """
+
+        self.synthesizer = Synthesizer(
+            tts_checkpoint=model_path,
+            tts_config_path=config_path,
+            tts_speakers_file=None,
+            tts_languages_file=None,
+            vocoder_checkpoint=vocoder_path,
+            vocoder_config=vocoder_config,
+            encoder_checkpoint=None,
+            encoder_config=None,
+            use_cuda=gpu,
+        )
+
+    def _check_arguments(
+        self,
+        speaker: str = None,
+        language: str = None,
+        speaker_wav: str = None,
+        emotion: str = None,
+        speed: float = None,
+        **kwargs,
+    ) -> None:
+        """Check if the arguments are valid for the model."""
+        # check for the coqui tts models
+        if self.is_multi_speaker and (speaker is None and speaker_wav is None):
+            raise ValueError("Model is multi-speaker but no `speaker` is provided.")
+        if self.is_multi_lingual and language is None:
+            raise ValueError("Model is multi-lingual but no `language` is provided.")
+        if not self.is_multi_speaker and speaker is not None and "voice_dir" not in kwargs:
+            raise ValueError("Model is not multi-speaker but `speaker` is provided.")
+        if not self.is_multi_lingual and language is not None:
+            raise ValueError("Model is not multi-lingual but `language` is provided.")
+        if not emotion is None and not speed is None:
+            raise ValueError("Emotion and speed can only be used with Coqui Studio models. Which is discontinued.")
+
+    def tts(
+        self,
+        text: str,
+        speaker: str = None,
+        language: str = None,
+        speaker_wav: str = None,
+        emotion: str = None,
+        speed: float = None,
+        split_sentences: bool = True,
+        **kwargs,
+    ):
+        """Convert text to speech.
+
+        Args:
+            text (str):
+                Input text to synthesize.
+            speaker (str, optional):
+                Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
+                `tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
+            language (str): Language of the text. If None, the default language of the speaker is used. Language is only
+                supported by `XTTS` model.
+            speaker_wav (str, optional):
+                Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
+                Defaults to None.
+            emotion (str, optional):
+                Emotion to use for 🐸Coqui Studio models. If None, Studio models use "Neutral". Defaults to None.
+            speed (float, optional):
+                Speed factor to use for 🐸Coqui Studio models, between 0 and 2.0. If None, Studio models use 1.0.
+                Defaults to None.
+            split_sentences (bool, optional):
+                Split text into sentences, synthesize them separately and concatenate the file audio.
+                Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only
+                applicable to the 🐸TTS models. Defaults to True.
+            kwargs (dict, optional):
+                Additional arguments for the model.
+        """
+        self._check_arguments(
+            speaker=speaker, language=language, speaker_wav=speaker_wav, emotion=emotion, speed=speed, **kwargs
+        )
+        wav = self.synthesizer.tts(
+            text=text,
+            speaker_name=speaker,
+            language_name=language,
+            speaker_wav=speaker_wav,
+            reference_wav=None,
+            style_wav=None,
+            style_text=None,
+            reference_speaker_name=None,
+            split_sentences=split_sentences,
+            **kwargs,
+        )
+        return wav
+
+    def tts_to_file(
+        self,
+        text: str,
+        speaker: str = None,
+        language: str = None,
+        speaker_wav: str = None,
+        emotion: str = None,
+        speed: float = 1.0,
+        pipe_out=None,
+        file_path: str = "output.wav",
+        split_sentences: bool = True,
+        **kwargs,
+    ):
+        """Convert text to speech.
+
+        Args:
+            text (str):
+                Input text to synthesize.
+            speaker (str, optional):
+                Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
+                `tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
+            language (str, optional):
+                Language code for multi-lingual models. You can check whether loaded model is multi-lingual
+                `tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
+            speaker_wav (str, optional):
+                Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
+                Defaults to None.
+            emotion (str, optional):
+                Emotion to use for 🐸Coqui Studio models. Defaults to "Neutral".
+            speed (float, optional):
+                Speed factor to use for 🐸Coqui Studio models, between 0.0 and 2.0. Defaults to None.
+            pipe_out (BytesIO, optional):
+                Flag to stdout the generated TTS wav file for shell pipe.
+            file_path (str, optional):
+                Output file path. Defaults to "output.wav".
+            split_sentences (bool, optional):
+                Split text into sentences, synthesize them separately and concatenate the file audio.
+                Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only
+                applicable to the 🐸TTS models. Defaults to True.
+            kwargs (dict, optional):
+                Additional arguments for the model.
+        """
+        self._check_arguments(speaker=speaker, language=language, speaker_wav=speaker_wav, **kwargs)
+
+        wav = self.tts(
+            text=text,
+            speaker=speaker,
+            language=language,
+            speaker_wav=speaker_wav,
+            split_sentences=split_sentences,
+            **kwargs,
+        )
+        self.synthesizer.save_wav(wav=wav, path=file_path, pipe_out=pipe_out)
+        return file_path
+
+    def voice_conversion(
+        self,
+        source_wav: str,
+        target_wav: str,
+    ):
+        """Voice conversion with FreeVC. Convert source wav to target speaker.
+
+        Args:``
+            source_wav (str):
+                Path to the source wav file.
+            target_wav (str):`
+                Path to the target wav file.
+        """
+        wav = self.voice_converter.voice_conversion(source_wav=source_wav, target_wav=target_wav)
+        return wav
+
+    def voice_conversion_to_file(
+        self,
+        source_wav: str,
+        target_wav: str,
+        file_path: str = "output.wav",
+    ):
+        """Voice conversion with FreeVC. Convert source wav to target speaker.
+
+        Args:
+            source_wav (str):
+                Path to the source wav file.
+            target_wav (str):
+                Path to the target wav file.
+            file_path (str, optional):
+                Output file path. Defaults to "output.wav".
+        """
+        wav = self.voice_conversion(source_wav=source_wav, target_wav=target_wav)
+        save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate)
+        return file_path
+
+    def tts_with_vc(
+        self,
+        text: str,
+        language: str = None,
+        speaker_wav: str = None,
+        speaker: str = None,
+        split_sentences: bool = True,
+    ):
+        """Convert text to speech with voice conversion.
+
+        It combines tts with voice conversion to fake voice cloning.
+
+        - Convert text to speech with tts.
+        - Convert the output wav to target speaker with voice conversion.
+
+        Args:
+            text (str):
+                Input text to synthesize.
+            language (str, optional):
+                Language code for multi-lingual models. You can check whether loaded model is multi-lingual
+                `tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
+            speaker_wav (str, optional):
+                Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
+                Defaults to None.
+            speaker (str, optional):
+                Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
+                `tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
+            split_sentences (bool, optional):
+                Split text into sentences, synthesize them separately and concatenate the file audio.
+                Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only
+                applicable to the 🐸TTS models. Defaults to True.
+        """
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+            # Lazy code... save it to a temp file to resample it while reading it for VC
+            self.tts_to_file(
+                text=text, speaker=speaker, language=language, file_path=fp.name, split_sentences=split_sentences
+            )
+        if self.voice_converter is None:
+            self.load_vc_model_by_name("voice_conversion_models/multilingual/vctk/freevc24")
+        wav = self.voice_converter.voice_conversion(source_wav=fp.name, target_wav=speaker_wav)
+        return wav
+
+    def tts_with_vc_to_file(
+        self,
+        text: str,
+        language: str = None,
+        speaker_wav: str = None,
+        file_path: str = "output.wav",
+        speaker: str = None,
+        split_sentences: bool = True,
+    ):
+        """Convert text to speech with voice conversion and save to file.
+
+        Check `tts_with_vc` for more details.
+
+        Args:
+            text (str):
+                Input text to synthesize.
+            language (str, optional):
+                Language code for multi-lingual models. You can check whether loaded model is multi-lingual
+                `tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
+            speaker_wav (str, optional):
+                Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
+                Defaults to None.
+            file_path (str, optional):
+                Output file path. Defaults to "output.wav".
+            speaker (str, optional):
+                Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
+                `tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
+            split_sentences (bool, optional):
+                Split text into sentences, synthesize them separately and concatenate the file audio.
+                Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only
+                applicable to the 🐸TTS models. Defaults to True.
+        """
+        wav = self.tts_with_vc(
+            text=text, language=language, speaker_wav=speaker_wav, speaker=speaker, split_sentences=split_sentences
+        )
+        save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate)
diff --git a/TTS/bin/__init__.py b/TTS/bin/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/TTS/bin/collect_env_info.py b/TTS/bin/collect_env_info.py
new file mode 100644
index 0000000..662fcd0
--- /dev/null
+++ b/TTS/bin/collect_env_info.py
@@ -0,0 +1,48 @@
+"""Get detailed info about the working environment."""
+import os
+import platform
+import sys
+
+import numpy
+import torch
+
+sys.path += [os.path.abspath(".."), os.path.abspath(".")]
+import json
+
+import TTS
+
+
+def system_info():
+    return {
+        "OS": platform.system(),
+        "architecture": platform.architecture(),
+        "version": platform.version(),
+        "processor": platform.processor(),
+        "python": platform.python_version(),
+    }
+
+
+def cuda_info():
+    return {
+        "GPU": [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())],
+        "available": torch.cuda.is_available(),
+        "version": torch.version.cuda,
+    }
+
+
+def package_info():
+    return {
+        "numpy": numpy.__version__,
+        "PyTorch_version": torch.__version__,
+        "PyTorch_debug": torch.version.debug,
+        "TTS": TTS.__version__,
+    }
+
+
+def main():
+    details = {"System": system_info(), "CUDA": cuda_info(), "Packages": package_info()}
+    print(json.dumps(details, indent=4, sort_keys=True))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py
new file mode 100644
index 0000000..9ab520b
--- /dev/null
+++ b/TTS/bin/compute_attention_masks.py
@@ -0,0 +1,165 @@
+import argparse
+import importlib
+import os
+from argparse import RawTextHelpFormatter
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.tts.datasets.TTSDataset import TTSDataset
+from TTS.tts.models import setup_model
+from TTS.tts.utils.text.characters import make_symbols, phonemes, symbols
+from TTS.utils.audio import AudioProcessor
+from TTS.utils.io import load_checkpoint
+
+if __name__ == "__main__":
+    # pylint: disable=bad-option-value
+    parser = argparse.ArgumentParser(
+        description="""Extract attention masks from trained Tacotron/Tacotron2 models.
+These masks can be used for different purposes including training a TTS model with a Duration Predictor.\n\n"""
+        """Each attention mask is written to the same path as the input wav file with ".npy" file extension.
+(e.g. path/bla.wav (wav file) --> path/bla.npy (attention mask))\n"""
+        """
+Example run:
+    CUDA_VISIBLE_DEVICE="0" python TTS/bin/compute_attention_masks.py
+        --model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth
+        --config_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/config.json
+        --dataset_metafile metadata.csv
+        --data_path /root/LJSpeech-1.1/
+        --batch_size 32
+        --dataset ljspeech
+        --use_cuda True
+""",
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("--model_path", type=str, required=True, help="Path to Tacotron/Tacotron2 model file ")
+    parser.add_argument(
+        "--config_path",
+        type=str,
+        required=True,
+        help="Path to Tacotron/Tacotron2 config file.",
+    )
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="",
+        required=True,
+        help="Target dataset processor name from TTS.tts.dataset.preprocess.",
+    )
+
+    parser.add_argument(
+        "--dataset_metafile",
+        type=str,
+        default="",
+        required=True,
+        help="Dataset metafile inclusing file paths with transcripts.",
+    )
+    parser.add_argument("--data_path", type=str, default="", help="Defines the data path. It overwrites config.json.")
+    parser.add_argument("--use_cuda", type=bool, default=False, help="enable/disable cuda.")
+
+    parser.add_argument(
+        "--batch_size", default=16, type=int, help="Batch size for the model. Use batch_size=1 if you have no CUDA."
+    )
+    args = parser.parse_args()
+
+    C = load_config(args.config_path)
+    ap = AudioProcessor(**C.audio)
+
+    # if the vocabulary was passed, replace the default
+    if "characters" in C.keys():
+        symbols, phonemes = make_symbols(**C.characters)
+
+    # load the model
+    num_chars = len(phonemes) if C.use_phonemes else len(symbols)
+    # TODO: handle multi-speaker
+    model = setup_model(C)
+    model, _ = load_checkpoint(model, args.model_path, args.use_cuda, True)
+
+    # data loader
+    preprocessor = importlib.import_module("TTS.tts.datasets.formatters")
+    preprocessor = getattr(preprocessor, args.dataset)
+    meta_data = preprocessor(args.data_path, args.dataset_metafile)
+    dataset = TTSDataset(
+        model.decoder.r,
+        C.text_cleaner,
+        compute_linear_spec=False,
+        ap=ap,
+        meta_data=meta_data,
+        characters=C.characters if "characters" in C.keys() else None,
+        add_blank=C["add_blank"] if "add_blank" in C.keys() else False,
+        use_phonemes=C.use_phonemes,
+        phoneme_cache_path=C.phoneme_cache_path,
+        phoneme_language=C.phoneme_language,
+        enable_eos_bos=C.enable_eos_bos_chars,
+    )
+
+    dataset.sort_and_filter_items(C.get("sort_by_audio_len", default=False))
+    loader = DataLoader(
+        dataset,
+        batch_size=args.batch_size,
+        num_workers=4,
+        collate_fn=dataset.collate_fn,
+        shuffle=False,
+        drop_last=False,
+    )
+
+    # compute attentions
+    file_paths = []
+    with torch.no_grad():
+        for data in tqdm(loader):
+            # setup input data
+            text_input = data[0]
+            text_lengths = data[1]
+            linear_input = data[3]
+            mel_input = data[4]
+            mel_lengths = data[5]
+            stop_targets = data[6]
+            item_idxs = data[7]
+
+            # dispatch data to GPU
+            if args.use_cuda:
+                text_input = text_input.cuda()
+                text_lengths = text_lengths.cuda()
+                mel_input = mel_input.cuda()
+                mel_lengths = mel_lengths.cuda()
+
+            model_outputs = model.forward(text_input, text_lengths, mel_input)
+
+            alignments = model_outputs["alignments"].detach()
+            for idx, alignment in enumerate(alignments):
+                item_idx = item_idxs[idx]
+                # interpolate if r > 1
+                alignment = (
+                    torch.nn.functional.interpolate(
+                        alignment.transpose(0, 1).unsqueeze(0),
+                        size=None,
+                        scale_factor=model.decoder.r,
+                        mode="nearest",
+                        align_corners=None,
+                        recompute_scale_factor=None,
+                    )
+                    .squeeze(0)
+                    .transpose(0, 1)
+                )
+                # remove paddings
+                alignment = alignment[: mel_lengths[idx], : text_lengths[idx]].cpu().numpy()
+                # set file paths
+                wav_file_name = os.path.basename(item_idx)
+                align_file_name = os.path.splitext(wav_file_name)[0] + "_attn.npy"
+                file_path = item_idx.replace(wav_file_name, align_file_name)
+                # save output
+                wav_file_abs_path = os.path.abspath(item_idx)
+                file_abs_path = os.path.abspath(file_path)
+                file_paths.append([wav_file_abs_path, file_abs_path])
+                np.save(file_path, alignment)
+
+        # ourput metafile
+        metafile = os.path.join(args.data_path, "metadata_attn_mask.txt")
+
+        with open(metafile, "w", encoding="utf-8") as f:
+            for p in file_paths:
+                f.write(f"{p[0]}|{p[1]}\n")
+        print(f" >> Metafile created: {metafile}")
diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py
new file mode 100644
index 0000000..5b5a37d
--- /dev/null
+++ b/TTS/bin/compute_embeddings.py
@@ -0,0 +1,197 @@
+import argparse
+import os
+from argparse import RawTextHelpFormatter
+
+import torch
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.config.shared_configs import BaseDatasetConfig
+from TTS.tts.datasets import load_tts_samples
+from TTS.tts.utils.managers import save_file
+from TTS.tts.utils.speakers import SpeakerManager
+
+
+def compute_embeddings(
+    model_path,
+    config_path,
+    output_path,
+    old_speakers_file=None,
+    old_append=False,
+    config_dataset_path=None,
+    formatter_name=None,
+    dataset_name=None,
+    dataset_path=None,
+    meta_file_train=None,
+    meta_file_val=None,
+    disable_cuda=False,
+    no_eval=False,
+):
+    use_cuda = torch.cuda.is_available() and not disable_cuda
+
+    if config_dataset_path is not None:
+        c_dataset = load_config(config_dataset_path)
+        meta_data_train, meta_data_eval = load_tts_samples(c_dataset.datasets, eval_split=not no_eval)
+    else:
+        c_dataset = BaseDatasetConfig()
+        c_dataset.formatter = formatter_name
+        c_dataset.dataset_name = dataset_name
+        c_dataset.path = dataset_path
+        if meta_file_train is not None:
+            c_dataset.meta_file_train = meta_file_train
+        if meta_file_val is not None:
+            c_dataset.meta_file_val = meta_file_val
+        meta_data_train, meta_data_eval = load_tts_samples(c_dataset, eval_split=not no_eval)
+
+    if meta_data_eval is None:
+        samples = meta_data_train
+    else:
+        samples = meta_data_train + meta_data_eval
+
+    encoder_manager = SpeakerManager(
+        encoder_model_path=model_path,
+        encoder_config_path=config_path,
+        d_vectors_file_path=old_speakers_file,
+        use_cuda=use_cuda,
+    )
+
+    class_name_key = encoder_manager.encoder_config.class_name_key
+
+    # compute speaker embeddings
+    if old_speakers_file is not None and old_append:
+        speaker_mapping = encoder_manager.embeddings
+    else:
+        speaker_mapping = {}
+
+    for fields in tqdm(samples):
+        class_name = fields[class_name_key]
+        audio_file = fields["audio_file"]
+        embedding_key = fields["audio_unique_name"]
+
+        # Only update the speaker name when the embedding is already in the old file.
+        if embedding_key in speaker_mapping:
+            speaker_mapping[embedding_key]["name"] = class_name
+            continue
+
+        if old_speakers_file is not None and embedding_key in encoder_manager.clip_ids:
+            # get the embedding from the old file
+            embedd = encoder_manager.get_embedding_by_clip(embedding_key)
+        else:
+            # extract the embedding
+            embedd = encoder_manager.compute_embedding_from_clip(audio_file)
+
+        # create speaker_mapping if target dataset is defined
+        speaker_mapping[embedding_key] = {}
+        speaker_mapping[embedding_key]["name"] = class_name
+        speaker_mapping[embedding_key]["embedding"] = embedd
+
+    if speaker_mapping:
+        # save speaker_mapping if target dataset is defined
+        if os.path.isdir(output_path):
+            mapping_file_path = os.path.join(output_path, "speakers.pth")
+        else:
+            mapping_file_path = output_path
+
+        if os.path.dirname(mapping_file_path) != "":
+            os.makedirs(os.path.dirname(mapping_file_path), exist_ok=True)
+
+        save_file(speaker_mapping, mapping_file_path)
+        print("Speaker embeddings saved at:", mapping_file_path)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""
+        """
+        Example runs:
+        python TTS/bin/compute_embeddings.py --model_path speaker_encoder_model.pth --config_path speaker_encoder_config.json  --config_dataset_path dataset_config.json
+
+        python TTS/bin/compute_embeddings.py --model_path speaker_encoder_model.pth --config_path speaker_encoder_config.json  --formatter_name coqui --dataset_path /path/to/vctk/dataset --dataset_name my_vctk --meta_file_train /path/to/vctk/metafile_train.csv --meta_file_val /path/to/vctk/metafile_eval.csv
+        """,
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        help="Path to model checkpoint file. It defaults to the released speaker encoder.",
+        default="https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar",
+    )
+    parser.add_argument(
+        "--config_path",
+        type=str,
+        help="Path to model config file. It defaults to the released speaker encoder config.",
+        default="https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json",
+    )
+    parser.add_argument(
+        "--config_dataset_path",
+        type=str,
+        help="Path to dataset config file. You either need to provide this or `formatter_name`, `dataset_name` and `dataset_path` arguments.",
+        default=None,
+    )
+    parser.add_argument(
+        "--output_path",
+        type=str,
+        help="Path for output `pth` or `json` file.",
+        default="speakers.pth",
+    )
+    parser.add_argument(
+        "--old_file",
+        type=str,
+        help="The old existing embedding file, from which the embeddings will be directly loaded for already computed audio clips.",
+        default=None,
+    )
+    parser.add_argument(
+        "--old_append",
+        help="Append new audio clip embeddings to the old embedding file, generate a new non-duplicated merged embedding file. Default False",
+        default=False,
+        action="store_true",
+    )
+    parser.add_argument("--disable_cuda", type=bool, help="Flag to disable cuda.", default=False)
+    parser.add_argument("--no_eval", help="Do not compute eval?. Default False", default=False, action="store_true")
+    parser.add_argument(
+        "--formatter_name",
+        type=str,
+        help="Name of the formatter to use. You either need to provide this or `config_dataset_path`",
+        default=None,
+    )
+    parser.add_argument(
+        "--dataset_name",
+        type=str,
+        help="Name of the dataset to use. You either need to provide this or `config_dataset_path`",
+        default=None,
+    )
+    parser.add_argument(
+        "--dataset_path",
+        type=str,
+        help="Path to the dataset. You either need to provide this or `config_dataset_path`",
+        default=None,
+    )
+    parser.add_argument(
+        "--meta_file_train",
+        type=str,
+        help="Path to the train meta file. If not set, dataset formatter uses the default metafile if it is defined in the formatter. You either need to provide this or `config_dataset_path`",
+        default=None,
+    )
+    parser.add_argument(
+        "--meta_file_val",
+        type=str,
+        help="Path to the evaluation meta file. If not set, dataset formatter uses the default metafile if it is defined in the formatter. You either need to provide this or `config_dataset_path`",
+        default=None,
+    )
+    args = parser.parse_args()
+
+    compute_embeddings(
+        args.model_path,
+        args.config_path,
+        args.output_path,
+        old_speakers_file=args.old_file,
+        old_append=args.old_append,
+        config_dataset_path=args.config_dataset_path,
+        formatter_name=args.formatter_name,
+        dataset_name=args.dataset_name,
+        dataset_path=args.dataset_path,
+        meta_file_train=args.meta_file_train,
+        meta_file_val=args.meta_file_val,
+        disable_cuda=args.disable_cuda,
+        no_eval=args.no_eval,
+    )
diff --git a/TTS/bin/compute_statistics.py b/TTS/bin/compute_statistics.py
new file mode 100644
index 0000000..3ab7ea7
--- /dev/null
+++ b/TTS/bin/compute_statistics.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import argparse
+import glob
+import os
+
+import numpy as np
+from tqdm import tqdm
+
+# from TTS.utils.io import load_config
+from TTS.config import load_config
+from TTS.tts.datasets import load_tts_samples
+from TTS.utils.audio import AudioProcessor
+
+
+def main():
+    """Run preprocessing process."""
+    parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
+    parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
+    parser.add_argument("out_path", type=str, help="save path (directory and filename).")
+    parser.add_argument(
+        "--data_path",
+        type=str,
+        required=False,
+        help="folder including the target set of wavs overriding dataset config.",
+    )
+    args, overrides = parser.parse_known_args()
+
+    CONFIG = load_config(args.config_path)
+    CONFIG.parse_known_args(overrides, relaxed_parser=True)
+
+    # load config
+    CONFIG.audio.signal_norm = False  # do not apply earlier normalization
+    CONFIG.audio.stats_path = None  # discard pre-defined stats
+
+    # load audio processor
+    ap = AudioProcessor(**CONFIG.audio.to_dict())
+
+    # load the meta data of target dataset
+    if args.data_path:
+        dataset_items = glob.glob(os.path.join(args.data_path, "**", "*.wav"), recursive=True)
+    else:
+        dataset_items = load_tts_samples(CONFIG.datasets)[0]  # take only train data
+    print(f" > There are {len(dataset_items)} files.")
+
+    mel_sum = 0
+    mel_square_sum = 0
+    linear_sum = 0
+    linear_square_sum = 0
+    N = 0
+    for item in tqdm(dataset_items):
+        # compute features
+        wav = ap.load_wav(item if isinstance(item, str) else item["audio_file"])
+        linear = ap.spectrogram(wav)
+        mel = ap.melspectrogram(wav)
+
+        # compute stats
+        N += mel.shape[1]
+        mel_sum += mel.sum(1)
+        linear_sum += linear.sum(1)
+        mel_square_sum += (mel**2).sum(axis=1)
+        linear_square_sum += (linear**2).sum(axis=1)
+
+    mel_mean = mel_sum / N
+    mel_scale = np.sqrt(mel_square_sum / N - mel_mean**2)
+    linear_mean = linear_sum / N
+    linear_scale = np.sqrt(linear_square_sum / N - linear_mean**2)
+
+    output_file_path = args.out_path
+    stats = {}
+    stats["mel_mean"] = mel_mean
+    stats["mel_std"] = mel_scale
+    stats["linear_mean"] = linear_mean
+    stats["linear_std"] = linear_scale
+
+    print(f" > Avg mel spec mean: {mel_mean.mean()}")
+    print(f" > Avg mel spec scale: {mel_scale.mean()}")
+    print(f" > Avg linear spec mean: {linear_mean.mean()}")
+    print(f" > Avg linear spec scale: {linear_scale.mean()}")
+
+    # set default config values for mean-var scaling
+    CONFIG.audio.stats_path = output_file_path
+    CONFIG.audio.signal_norm = True
+    # remove redundant values
+    del CONFIG.audio.max_norm
+    del CONFIG.audio.min_level_db
+    del CONFIG.audio.symmetric_norm
+    del CONFIG.audio.clip_norm
+    stats["audio_config"] = CONFIG.audio.to_dict()
+    np.save(output_file_path, stats, allow_pickle=True)
+    print(f" > stats saved to {output_file_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/eval_encoder.py b/TTS/bin/eval_encoder.py
new file mode 100644
index 0000000..60fed13
--- /dev/null
+++ b/TTS/bin/eval_encoder.py
@@ -0,0 +1,88 @@
+import argparse
+from argparse import RawTextHelpFormatter
+
+import torch
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.tts.datasets import load_tts_samples
+from TTS.tts.utils.speakers import SpeakerManager
+
+
+def compute_encoder_accuracy(dataset_items, encoder_manager):
+    class_name_key = encoder_manager.encoder_config.class_name_key
+    map_classid_to_classname = getattr(encoder_manager.encoder_config, "map_classid_to_classname", None)
+
+    class_acc_dict = {}
+
+    # compute embeddings for all wav_files
+    for item in tqdm(dataset_items):
+        class_name = item[class_name_key]
+        wav_file = item["audio_file"]
+
+        # extract the embedding
+        embedd = encoder_manager.compute_embedding_from_clip(wav_file)
+        if encoder_manager.encoder_criterion is not None and map_classid_to_classname is not None:
+            embedding = torch.FloatTensor(embedd).unsqueeze(0)
+            if encoder_manager.use_cuda:
+                embedding = embedding.cuda()
+
+            class_id = encoder_manager.encoder_criterion.softmax.inference(embedding).item()
+            predicted_label = map_classid_to_classname[str(class_id)]
+        else:
+            predicted_label = None
+
+        if class_name is not None and predicted_label is not None:
+            is_equal = int(class_name == predicted_label)
+            if class_name not in class_acc_dict:
+                class_acc_dict[class_name] = [is_equal]
+            else:
+                class_acc_dict[class_name].append(is_equal)
+        else:
+            raise RuntimeError("Error: class_name or/and predicted_label are None")
+
+    acc_avg = 0
+    for key, values in class_acc_dict.items():
+        acc = sum(values) / len(values)
+        print("Class", key, "Accuracy:", acc)
+        acc_avg += acc
+
+    print("Average Accuracy:", acc_avg / len(class_acc_dict))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="""Compute the accuracy of the encoder.\n\n"""
+        """
+        Example runs:
+        python TTS/bin/eval_encoder.py emotion_encoder_model.pth emotion_encoder_config.json  dataset_config.json
+        """,
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("model_path", type=str, help="Path to model checkpoint file.")
+    parser.add_argument(
+        "config_path",
+        type=str,
+        help="Path to model config file.",
+    )
+
+    parser.add_argument(
+        "config_dataset_path",
+        type=str,
+        help="Path to dataset config file.",
+    )
+    parser.add_argument("--use_cuda", type=bool, help="flag to set cuda.", default=True)
+    parser.add_argument("--eval", type=bool, help="compute eval.", default=True)
+
+    args = parser.parse_args()
+
+    c_dataset = load_config(args.config_dataset_path)
+
+    meta_data_train, meta_data_eval = load_tts_samples(c_dataset.datasets, eval_split=args.eval)
+    items = meta_data_train + meta_data_eval
+
+    enc_manager = SpeakerManager(
+        encoder_model_path=args.model_path, encoder_config_path=args.config_path, use_cuda=args.use_cuda
+    )
+
+    compute_encoder_accuracy(items, enc_manager)
diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py
new file mode 100644
index 0000000..c604862
--- /dev/null
+++ b/TTS/bin/extract_tts_spectrograms.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+"""Extract Mel spectrograms with teacher forcing."""
+
+import argparse
+import os
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.tts.datasets import TTSDataset, load_tts_samples
+from TTS.tts.models import setup_model
+from TTS.tts.utils.speakers import SpeakerManager
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
+from TTS.utils.audio import AudioProcessor
+from TTS.utils.audio.numpy_transforms import quantize
+from TTS.utils.generic_utils import count_parameters
+
+use_cuda = torch.cuda.is_available()
+
+
+def setup_loader(ap, r, verbose=False):
+    tokenizer, _ = TTSTokenizer.init_from_config(c)
+    dataset = TTSDataset(
+        outputs_per_step=r,
+        compute_linear_spec=False,
+        samples=meta_data,
+        tokenizer=tokenizer,
+        ap=ap,
+        batch_group_size=0,
+        min_text_len=c.min_text_len,
+        max_text_len=c.max_text_len,
+        min_audio_len=c.min_audio_len,
+        max_audio_len=c.max_audio_len,
+        phoneme_cache_path=c.phoneme_cache_path,
+        precompute_num_workers=0,
+        use_noise_augment=False,
+        verbose=verbose,
+        speaker_id_mapping=speaker_manager.name_to_id if c.use_speaker_embedding else None,
+        d_vector_mapping=speaker_manager.embeddings if c.use_d_vector_file else None,
+    )
+
+    if c.use_phonemes and c.compute_input_seq_cache:
+        # precompute phonemes to have a better estimate of sequence lengths.
+        dataset.compute_input_seq(c.num_loader_workers)
+    dataset.preprocess_samples()
+
+    loader = DataLoader(
+        dataset,
+        batch_size=c.batch_size,
+        shuffle=False,
+        collate_fn=dataset.collate_fn,
+        drop_last=False,
+        sampler=None,
+        num_workers=c.num_loader_workers,
+        pin_memory=False,
+    )
+    return loader
+
+
+def set_filename(wav_path, out_path):
+    wav_file = os.path.basename(wav_path)
+    file_name = wav_file.split(".")[0]
+    os.makedirs(os.path.join(out_path, "quant"), exist_ok=True)
+    os.makedirs(os.path.join(out_path, "mel"), exist_ok=True)
+    os.makedirs(os.path.join(out_path, "wav_gl"), exist_ok=True)
+    os.makedirs(os.path.join(out_path, "wav"), exist_ok=True)
+    wavq_path = os.path.join(out_path, "quant", file_name)
+    mel_path = os.path.join(out_path, "mel", file_name)
+    wav_gl_path = os.path.join(out_path, "wav_gl", file_name + ".wav")
+    wav_path = os.path.join(out_path, "wav", file_name + ".wav")
+    return file_name, wavq_path, mel_path, wav_gl_path, wav_path
+
+
+def format_data(data):
+    # setup input data
+    text_input = data["token_id"]
+    text_lengths = data["token_id_lengths"]
+    mel_input = data["mel"]
+    mel_lengths = data["mel_lengths"]
+    item_idx = data["item_idxs"]
+    d_vectors = data["d_vectors"]
+    speaker_ids = data["speaker_ids"]
+    attn_mask = data["attns"]
+    avg_text_length = torch.mean(text_lengths.float())
+    avg_spec_length = torch.mean(mel_lengths.float())
+
+    # dispatch data to GPU
+    if use_cuda:
+        text_input = text_input.cuda(non_blocking=True)
+        text_lengths = text_lengths.cuda(non_blocking=True)
+        mel_input = mel_input.cuda(non_blocking=True)
+        mel_lengths = mel_lengths.cuda(non_blocking=True)
+        if speaker_ids is not None:
+            speaker_ids = speaker_ids.cuda(non_blocking=True)
+        if d_vectors is not None:
+            d_vectors = d_vectors.cuda(non_blocking=True)
+        if attn_mask is not None:
+            attn_mask = attn_mask.cuda(non_blocking=True)
+    return (
+        text_input,
+        text_lengths,
+        mel_input,
+        mel_lengths,
+        speaker_ids,
+        d_vectors,
+        avg_text_length,
+        avg_spec_length,
+        attn_mask,
+        item_idx,
+    )
+
+
+@torch.no_grad()
+def inference(
+    model_name,
+    model,
+    ap,
+    text_input,
+    text_lengths,
+    mel_input,
+    mel_lengths,
+    speaker_ids=None,
+    d_vectors=None,
+):
+    if model_name == "glow_tts":
+        speaker_c = None
+        if speaker_ids is not None:
+            speaker_c = speaker_ids
+        elif d_vectors is not None:
+            speaker_c = d_vectors
+        outputs = model.inference_with_MAS(
+            text_input,
+            text_lengths,
+            mel_input,
+            mel_lengths,
+            aux_input={"d_vectors": speaker_c, "speaker_ids": speaker_ids},
+        )
+        model_output = outputs["model_outputs"]
+        model_output = model_output.detach().cpu().numpy()
+
+    elif "tacotron" in model_name:
+        aux_input = {"speaker_ids": speaker_ids, "d_vectors": d_vectors}
+        outputs = model(text_input, text_lengths, mel_input, mel_lengths, aux_input)
+        postnet_outputs = outputs["model_outputs"]
+        # normalize tacotron output
+        if model_name == "tacotron":
+            mel_specs = []
+            postnet_outputs = postnet_outputs.data.cpu().numpy()
+            for b in range(postnet_outputs.shape[0]):
+                postnet_output = postnet_outputs[b]
+                mel_specs.append(torch.FloatTensor(ap.out_linear_to_mel(postnet_output.T).T))
+            model_output = torch.stack(mel_specs).cpu().numpy()
+
+        elif model_name == "tacotron2":
+            model_output = postnet_outputs.detach().cpu().numpy()
+    return model_output
+
+
+def extract_spectrograms(
+    data_loader, model, ap, output_path, quantize_bits=0, save_audio=False, debug=False, metada_name="metada.txt"
+):
+    model.eval()
+    export_metadata = []
+    for _, data in tqdm(enumerate(data_loader), total=len(data_loader)):
+        # format data
+        (
+            text_input,
+            text_lengths,
+            mel_input,
+            mel_lengths,
+            speaker_ids,
+            d_vectors,
+            _,
+            _,
+            _,
+            item_idx,
+        ) = format_data(data)
+
+        model_output = inference(
+            c.model.lower(),
+            model,
+            ap,
+            text_input,
+            text_lengths,
+            mel_input,
+            mel_lengths,
+            speaker_ids,
+            d_vectors,
+        )
+
+        for idx in range(text_input.shape[0]):
+            wav_file_path = item_idx[idx]
+            wav = ap.load_wav(wav_file_path)
+            _, wavq_path, mel_path, wav_gl_path, wav_path = set_filename(wav_file_path, output_path)
+
+            # quantize and save wav
+            if quantize_bits > 0:
+                wavq = quantize(wav, quantize_bits)
+                np.save(wavq_path, wavq)
+
+            # save TTS mel
+            mel = model_output[idx]
+            mel_length = mel_lengths[idx]
+            mel = mel[:mel_length, :].T
+            np.save(mel_path, mel)
+
+            export_metadata.append([wav_file_path, mel_path])
+            if save_audio:
+                ap.save_wav(wav, wav_path)
+
+            if debug:
+                print("Audio for debug saved at:", wav_gl_path)
+                wav = ap.inv_melspectrogram(mel)
+                ap.save_wav(wav, wav_gl_path)
+
+    with open(os.path.join(output_path, metada_name), "w", encoding="utf-8") as f:
+        for data in export_metadata:
+            f.write(f"{data[0]}|{data[1]+'.npy'}\n")
+
+
+def main(args):  # pylint: disable=redefined-outer-name
+    # pylint: disable=global-variable-undefined
+    global meta_data, speaker_manager
+
+    # Audio processor
+    ap = AudioProcessor(**c.audio)
+
+    # load data instances
+    meta_data_train, meta_data_eval = load_tts_samples(
+        c.datasets, eval_split=args.eval, eval_split_max_size=c.eval_split_max_size, eval_split_size=c.eval_split_size
+    )
+
+    # use eval and training partitions
+    meta_data = meta_data_train + meta_data_eval
+
+    # init speaker manager
+    if c.use_speaker_embedding:
+        speaker_manager = SpeakerManager(data_items=meta_data)
+    elif c.use_d_vector_file:
+        speaker_manager = SpeakerManager(d_vectors_file_path=c.d_vector_file)
+    else:
+        speaker_manager = None
+
+    # setup model
+    model = setup_model(c)
+
+    # restore model
+    model.load_checkpoint(c, args.checkpoint_path, eval=True)
+
+    if use_cuda:
+        model.cuda()
+
+    num_params = count_parameters(model)
+    print("\n > Model has {} parameters".format(num_params), flush=True)
+    # set r
+    r = 1 if c.model.lower() == "glow_tts" else model.decoder.r
+    own_loader = setup_loader(ap, r, verbose=True)
+
+    extract_spectrograms(
+        own_loader,
+        model,
+        ap,
+        args.output_path,
+        quantize_bits=args.quantize_bits,
+        save_audio=args.save_audio,
+        debug=args.debug,
+        metada_name="metada.txt",
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True)
+    parser.add_argument("--checkpoint_path", type=str, help="Model file to be restored.", required=True)
+    parser.add_argument("--output_path", type=str, help="Path to save mel specs", required=True)
+    parser.add_argument("--debug", default=False, action="store_true", help="Save audio files for debug")
+    parser.add_argument("--save_audio", default=False, action="store_true", help="Save audio files")
+    parser.add_argument("--quantize_bits", type=int, default=0, help="Save quantized audio files if non-zero")
+    parser.add_argument("--eval", type=bool, help="compute eval.", default=True)
+    args = parser.parse_args()
+
+    c = load_config(args.config_path)
+    c.audio.trim_silence = False
+    main(args)
diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py
new file mode 100644
index 0000000..ea16974
--- /dev/null
+++ b/TTS/bin/find_unique_chars.py
@@ -0,0 +1,45 @@
+"""Find all the unique characters in a dataset"""
+import argparse
+from argparse import RawTextHelpFormatter
+
+from TTS.config import load_config
+from TTS.tts.datasets import load_tts_samples
+
+
+def main():
+    # pylint: disable=bad-option-value
+    parser = argparse.ArgumentParser(
+        description="""Find all the unique characters or phonemes in a dataset.\n\n"""
+        """
+    Example runs:
+
+    python TTS/bin/find_unique_chars.py --config_path config.json
+    """,
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("--config_path", type=str, help="Path to dataset config file.", required=True)
+    args = parser.parse_args()
+
+    c = load_config(args.config_path)
+
+    # load all datasets
+    train_items, eval_items = load_tts_samples(
+        c.datasets, eval_split=True, eval_split_max_size=c.eval_split_max_size, eval_split_size=c.eval_split_size
+    )
+
+    items = train_items + eval_items
+
+    texts = "".join(item["text"] for item in items)
+    chars = set(texts)
+    lower_chars = filter(lambda c: c.islower(), chars)
+    chars_force_lower = [c.lower() for c in chars]
+    chars_force_lower = set(chars_force_lower)
+
+    print(f" > Number of unique characters: {len(chars)}")
+    print(f" > Unique characters: {''.join(sorted(chars))}")
+    print(f" > Unique lower characters: {''.join(sorted(lower_chars))}")
+    print(f" > Unique all forced to lower characters: {''.join(sorted(chars_force_lower))}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/find_unique_phonemes.py b/TTS/bin/find_unique_phonemes.py
new file mode 100644
index 0000000..4bd7a78
--- /dev/null
+++ b/TTS/bin/find_unique_phonemes.py
@@ -0,0 +1,74 @@
+"""Find all the unique characters in a dataset"""
+import argparse
+import multiprocessing
+from argparse import RawTextHelpFormatter
+
+from tqdm.contrib.concurrent import process_map
+
+from TTS.config import load_config
+from TTS.tts.datasets import load_tts_samples
+from TTS.tts.utils.text.phonemizers import Gruut
+
+
+def compute_phonemes(item):
+    text = item["text"]
+    ph = phonemizer.phonemize(text).replace("|", "")
+    return set(list(ph))
+
+
+def main():
+    # pylint: disable=W0601
+    global c, phonemizer
+    # pylint: disable=bad-option-value
+    parser = argparse.ArgumentParser(
+        description="""Find all the unique characters or phonemes in a dataset.\n\n"""
+        """
+    Example runs:
+
+    python TTS/bin/find_unique_phonemes.py --config_path config.json
+    """,
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("--config_path", type=str, help="Path to dataset config file.", required=True)
+    args = parser.parse_args()
+
+    c = load_config(args.config_path)
+
+    # load all datasets
+    train_items, eval_items = load_tts_samples(
+        c.datasets, eval_split=True, eval_split_max_size=c.eval_split_max_size, eval_split_size=c.eval_split_size
+    )
+    items = train_items + eval_items
+    print("Num items:", len(items))
+
+    language_list = [item["language"] for item in items]
+    is_lang_def = all(language_list)
+
+    if not c.phoneme_language or not is_lang_def:
+        raise ValueError("Phoneme language must be defined in config.")
+
+    if not language_list.count(language_list[0]) == len(language_list):
+        raise ValueError(
+            "Currently, just one phoneme language per config file is supported !! Please split the dataset config into different configs and run it individually for each language !!"
+        )
+
+    phonemizer = Gruut(language=language_list[0], keep_puncs=True)
+
+    phonemes = process_map(compute_phonemes, items, max_workers=multiprocessing.cpu_count(), chunksize=15)
+    phones = []
+    for ph in phonemes:
+        phones.extend(ph)
+
+    phones = set(phones)
+    lower_phones = filter(lambda c: c.islower(), phones)
+    phones_force_lower = [c.lower() for c in phones]
+    phones_force_lower = set(phones_force_lower)
+
+    print(f" > Number of unique phonemes: {len(phones)}")
+    print(f" > Unique phonemes: {''.join(sorted(phones))}")
+    print(f" > Unique lower phonemes: {''.join(sorted(lower_phones))}")
+    print(f" > Unique all forced to lower phonemes: {''.join(sorted(phones_force_lower))}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/remove_silence_using_vad.py b/TTS/bin/remove_silence_using_vad.py
new file mode 100644
index 0000000..a1eaf4c
--- /dev/null
+++ b/TTS/bin/remove_silence_using_vad.py
@@ -0,0 +1,124 @@
+import argparse
+import glob
+import multiprocessing
+import os
+import pathlib
+
+import torch
+from tqdm import tqdm
+
+from TTS.utils.vad import get_vad_model_and_utils, remove_silence
+
+torch.set_num_threads(1)
+
+
+def adjust_path_and_remove_silence(audio_path):
+    output_path = audio_path.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, ""))
+    # ignore if the file exists
+    if os.path.exists(output_path) and not args.force:
+        return output_path, False
+
+    # create all directory structure
+    pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+    # remove the silence and save the audio
+    output_path, is_speech = remove_silence(
+        model_and_utils,
+        audio_path,
+        output_path,
+        trim_just_beginning_and_end=args.trim_just_beginning_and_end,
+        use_cuda=args.use_cuda,
+    )
+    return output_path, is_speech
+
+
+def preprocess_audios():
+    files = sorted(glob.glob(os.path.join(args.input_dir, args.glob), recursive=True))
+    print("> Number of files: ", len(files))
+    if not args.force:
+        print("> Ignoring files that already exist in the output idrectory.")
+
+    if args.trim_just_beginning_and_end:
+        print("> Trimming just the beginning and the end with nonspeech parts.")
+    else:
+        print("> Trimming all nonspeech parts.")
+
+    filtered_files = []
+    if files:
+        # create threads
+        # num_threads = multiprocessing.cpu_count()
+        # process_map(adjust_path_and_remove_silence, files, max_workers=num_threads, chunksize=15)
+
+        if args.num_processes > 1:
+            with multiprocessing.Pool(processes=args.num_processes) as pool:
+                results = list(
+                    tqdm(
+                        pool.imap_unordered(adjust_path_and_remove_silence, files),
+                        total=len(files),
+                        desc="Processing audio files",
+                    )
+                )
+            for output_path, is_speech in results:
+                if not is_speech:
+                    filtered_files.append(output_path)
+        else:
+            for f in tqdm(files):
+                output_path, is_speech = adjust_path_and_remove_silence(f)
+                if not is_speech:
+                    filtered_files.append(output_path)
+
+        # write files that do not have speech
+        with open(os.path.join(args.output_dir, "filtered_files.txt"), "w", encoding="utf-8") as f:
+            for file in filtered_files:
+                f.write(str(file) + "\n")
+    else:
+        print("> No files Found !")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end True"
+    )
+    parser.add_argument("-i", "--input_dir", type=str, help="Dataset root dir", required=True)
+    parser.add_argument("-o", "--output_dir", type=str, help="Output Dataset dir", default="")
+    parser.add_argument("-f", "--force", default=False, action="store_true", help="Force the replace of exists files")
+    parser.add_argument(
+        "-g",
+        "--glob",
+        type=str,
+        default="**/*.wav",
+        help="path in glob format for acess wavs from input_dir. ex: wav48/*/*.wav",
+    )
+    parser.add_argument(
+        "-t",
+        "--trim_just_beginning_and_end",
+        type=bool,
+        default=True,
+        help="If True this script will trim just the beginning and end nonspeech parts. If False all nonspeech parts will be trim. Default True",
+    )
+    parser.add_argument(
+        "-c",
+        "--use_cuda",
+        type=bool,
+        default=False,
+        help="If True use cuda",
+    )
+    parser.add_argument(
+        "--use_onnx",
+        type=bool,
+        default=False,
+        help="If True use onnx",
+    )
+    parser.add_argument(
+        "--num_processes",
+        type=int,
+        default=1,
+        help="Number of processes to use",
+    )
+    args = parser.parse_args()
+
+    if args.output_dir == "":
+        args.output_dir = args.input_dir
+
+    # load the model and utils
+    model_and_utils = get_vad_model_and_utils(use_cuda=args.use_cuda, use_onnx=args.use_onnx)
+    preprocess_audios()
diff --git a/TTS/bin/resample.py b/TTS/bin/resample.py
new file mode 100644
index 0000000..a3f2848
--- /dev/null
+++ b/TTS/bin/resample.py
@@ -0,0 +1,90 @@
+import argparse
+import glob
+import os
+from argparse import RawTextHelpFormatter
+from multiprocessing import Pool
+from shutil import copytree
+
+import librosa
+import soundfile as sf
+from tqdm import tqdm
+
+
+def resample_file(func_args):
+    filename, output_sr = func_args
+    y, sr = librosa.load(filename, sr=output_sr)
+    sf.write(filename, y, sr)
+
+
+def resample_files(input_dir, output_sr, output_dir=None, file_ext="wav", n_jobs=10):
+    if output_dir:
+        print("Recursively copying the input folder...")
+        copytree(input_dir, output_dir)
+        input_dir = output_dir
+
+    print("Resampling the audio files...")
+    audio_files = glob.glob(os.path.join(input_dir, f"**/*.{file_ext}"), recursive=True)
+    print(f"Found {len(audio_files)} files...")
+    audio_files = list(zip(audio_files, len(audio_files) * [output_sr]))
+    with Pool(processes=n_jobs) as p:
+        with tqdm(total=len(audio_files)) as pbar:
+            for _, _ in enumerate(p.imap_unordered(resample_file, audio_files)):
+                pbar.update()
+
+    print("Done !")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="""Resample a folder recusively with librosa
+                       Can be used in place or create a copy of the folder as an output.\n\n
+                       Example run:
+                            python TTS/bin/resample.py
+                                --input_dir /root/LJSpeech-1.1/
+                                --output_sr 22050
+                                --output_dir /root/resampled_LJSpeech-1.1/
+                                --file_ext wav
+                                --n_jobs 24
+                    """,
+        formatter_class=RawTextHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--input_dir",
+        type=str,
+        default=None,
+        required=True,
+        help="Path of the folder containing the audio files to resample",
+    )
+
+    parser.add_argument(
+        "--output_sr",
+        type=int,
+        default=22050,
+        required=False,
+        help="Samlple rate to which the audio files should be resampled",
+    )
+
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default=None,
+        required=False,
+        help="Path of the destination folder. If not defined, the operation is done in place",
+    )
+
+    parser.add_argument(
+        "--file_ext",
+        type=str,
+        default="wav",
+        required=False,
+        help="Extension of the audio files to resample",
+    )
+
+    parser.add_argument(
+        "--n_jobs", type=int, default=None, help="Number of threads to use, by default it uses all cores"
+    )
+
+    args = parser.parse_args()
+
+    resample_files(args.input_dir, args.output_sr, args.output_dir, args.file_ext, args.n_jobs)
diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py
new file mode 100644
index 0000000..b86252a
--- /dev/null
+++ b/TTS/bin/synthesize.py
@@ -0,0 +1,494 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import argparse
+import contextlib
+import sys
+from argparse import RawTextHelpFormatter
+
+# pylint: disable=redefined-outer-name, unused-argument
+from pathlib import Path
+
+description = """
+Synthesize speech on command line.
+
+You can either use your trained model or choose a model from the provided list.
+
+If you don't specify any models, then it uses LJSpeech based English model.
+
+#### Single Speaker Models
+
+- List provided models:
+
+  ```
+  $ tts --list_models
+  ```
+
+- Get model info (for both tts_models and vocoder_models):
+
+  - Query by type/name:
+    The model_info_by_name uses the name as it from the --list_models.
+    ```
+    $ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>"
+    ```
+    For example:
+    ```
+    $ tts --model_info_by_name tts_models/tr/common-voice/glow-tts
+    $ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2
+    ```
+  - Query by type/idx:
+    The model_query_idx uses the corresponding idx from --list_models.
+
+    ```
+    $ tts --model_info_by_idx "<model_type>/<model_query_idx>"
+    ```
+
+    For example:
+
+    ```
+    $ tts --model_info_by_idx tts_models/3
+    ```
+
+  - Query info for model info by full name:
+    ```
+    $ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>"
+    ```
+
+- Run TTS with default models:
+
+  ```
+  $ tts --text "Text for TTS" --out_path output/path/speech.wav
+  ```
+
+- Run TTS and pipe out the generated TTS wav file data:
+
+  ```
+  $ tts --text "Text for TTS" --pipe_out --out_path output/path/speech.wav | aplay
+  ```
+
+- Run a TTS model with its default vocoder model:
+
+  ```
+  $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --out_path output/path/speech.wav
+  ```
+
+  For example:
+
+  ```
+  $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --out_path output/path/speech.wav
+  ```
+
+- Run with specific TTS and vocoder models from the list:
+
+  ```
+  $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --vocoder_name "<model_type>/<language>/<dataset>/<model_name>" --out_path output/path/speech.wav
+  ```
+
+  For example:
+
+  ```
+  $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav
+  ```
+
+- Run your own TTS model (Using Griffin-Lim Vocoder):
+
+  ```
+  $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
+  ```
+
+- Run your own TTS and Vocoder models:
+
+  ```
+  $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
+      --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
+  ```
+
+#### Multi-speaker Models
+
+- List the available speakers and choose a <speaker_id> among them:
+
+  ```
+  $ tts --model_name "<language>/<dataset>/<model_name>"  --list_speaker_idxs
+  ```
+
+- Run the multi-speaker TTS model with the target speaker ID:
+
+  ```
+  $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>"  --speaker_idx <speaker_id>
+  ```
+
+- Run your own multi-speaker TTS model:
+
+  ```
+  $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
+  ```
+
+### Voice Conversion Models
+
+```
+$ tts --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --source_wav <path/to/speaker/wav> --target_wav <path/to/reference/wav>
+```
+"""
+
+
+def str2bool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    if v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    raise argparse.ArgumentTypeError("Boolean value expected.")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=description.replace("    ```\n", ""),
+        formatter_class=RawTextHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--list_models",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=False,
+        help="list available pre-trained TTS and vocoder models.",
+    )
+
+    parser.add_argument(
+        "--model_info_by_idx",
+        type=str,
+        default=None,
+        help="model info using query format: <model_type>/<model_query_idx>",
+    )
+
+    parser.add_argument(
+        "--model_info_by_name",
+        type=str,
+        default=None,
+        help="model info using query format: <model_type>/<language>/<dataset>/<model_name>",
+    )
+
+    parser.add_argument("--text", type=str, default=None, help="Text to generate speech.")
+
+    # Args for running pre-trained TTS models.
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        default="tts_models/en/ljspeech/tacotron2-DDC",
+        help="Name of one of the pre-trained TTS models in format <language>/<dataset>/<model_name>",
+    )
+    parser.add_argument(
+        "--vocoder_name",
+        type=str,
+        default=None,
+        help="Name of one of the pre-trained  vocoder models in format <language>/<dataset>/<model_name>",
+    )
+
+    # Args for running custom models
+    parser.add_argument("--config_path", default=None, type=str, help="Path to model config file.")
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        default=None,
+        help="Path to model file.",
+    )
+    parser.add_argument(
+        "--out_path",
+        type=str,
+        default="tts_output.wav",
+        help="Output wav file path.",
+    )
+    parser.add_argument("--use_cuda", type=bool, help="Run model on CUDA.", default=False)
+    parser.add_argument("--device", type=str, help="Device to run model on.", default="cpu")
+    parser.add_argument(
+        "--vocoder_path",
+        type=str,
+        help="Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).",
+        default=None,
+    )
+    parser.add_argument("--vocoder_config_path", type=str, help="Path to vocoder model config file.", default=None)
+    parser.add_argument(
+        "--encoder_path",
+        type=str,
+        help="Path to speaker encoder model file.",
+        default=None,
+    )
+    parser.add_argument("--encoder_config_path", type=str, help="Path to speaker encoder config file.", default=None)
+    parser.add_argument(
+        "--pipe_out",
+        help="stdout the generated TTS wav file for shell pipe.",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=False,
+    )
+    
+    # args for multi-speaker synthesis
+    parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
+    parser.add_argument("--language_ids_file_path", type=str, help="JSON file for multi-lingual model.", default=None)
+    parser.add_argument(
+        "--speaker_idx",
+        type=str,
+        help="Target speaker ID for a multi-speaker TTS model.",
+        default=None,
+    )
+    parser.add_argument(
+        "--language_idx",
+        type=str,
+        help="Target language ID for a multi-lingual TTS model.",
+        default=None,
+    )
+    parser.add_argument(
+        "--speaker_wav",
+        nargs="+",
+        help="wav file(s) to condition a multi-speaker TTS model with a Speaker Encoder. You can give multiple file paths. The d_vectors is computed as their average.",
+        default=None,
+    )
+    parser.add_argument("--gst_style", help="Wav path file for GST style reference.", default=None)
+    parser.add_argument(
+        "--capacitron_style_wav", type=str, help="Wav path file for Capacitron prosody reference.", default=None
+    )
+    parser.add_argument("--capacitron_style_text", type=str, help="Transcription of the reference.", default=None)
+    parser.add_argument(
+        "--list_speaker_idxs",
+        help="List available speaker ids for the defined multi-speaker model.",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=False,
+    )
+    parser.add_argument(
+        "--list_language_idxs",
+        help="List available language ids for the defined multi-lingual model.",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=False,
+    )
+    # aux args
+    parser.add_argument(
+        "--save_spectogram",
+        type=bool,
+        help="If true save raw spectogram for further (vocoder) processing in out_path.",
+        default=False,
+    )
+    parser.add_argument(
+        "--reference_wav",
+        type=str,
+        help="Reference wav file to convert in the voice of the speaker_idx or speaker_wav",
+        default=None,
+    )
+    parser.add_argument(
+        "--reference_speaker_idx",
+        type=str,
+        help="speaker ID of the reference_wav speaker (If not provided the embedding will be computed using the Speaker Encoder).",
+        default=None,
+    )
+    parser.add_argument(
+        "--progress_bar",
+        type=str2bool,
+        help="If true shows a progress bar for the model download. Defaults to True",
+        default=True,
+    )
+
+    # voice conversion args
+    parser.add_argument(
+        "--source_wav",
+        type=str,
+        default=None,
+        help="Original audio file to convert in the voice of the target_wav",
+    )
+    parser.add_argument(
+        "--target_wav",
+        type=str,
+        default=None,
+        help="Target audio file to convert in the voice of the source_wav",
+    )
+
+    parser.add_argument(
+        "--voice_dir",
+        type=str,
+        default=None,
+        help="Voice dir for tortoise model",
+    )
+
+    args = parser.parse_args()
+
+    # print the description if either text or list_models is not set
+    check_args = [
+        args.text,
+        args.list_models,
+        args.list_speaker_idxs,
+        args.list_language_idxs,
+        args.reference_wav,
+        args.model_info_by_idx,
+        args.model_info_by_name,
+        args.source_wav,
+        args.target_wav,
+    ]
+    if not any(check_args):
+        parser.parse_args(["-h"])
+
+    pipe_out = sys.stdout if args.pipe_out else None
+
+    with contextlib.redirect_stdout(None if args.pipe_out else sys.stdout):
+        # Late-import to make things load faster
+        from TTS.api import TTS
+        from TTS.utils.manage import ModelManager
+        from TTS.utils.synthesizer import Synthesizer
+
+        # load model manager
+        path = Path(__file__).parent / "../.models.json"
+        manager = ModelManager(path, progress_bar=args.progress_bar)
+        api = TTS()
+
+        tts_path = None
+        tts_config_path = None
+        speakers_file_path = None
+        language_ids_file_path = None
+        vocoder_path = None
+        vocoder_config_path = None
+        encoder_path = None
+        encoder_config_path = None
+        vc_path = None
+        vc_config_path = None
+        model_dir = None
+
+        # CASE1 #list : list pre-trained TTS models
+        if args.list_models:
+            manager.list_models()
+            sys.exit()
+
+        # CASE2 #info : model info for pre-trained TTS models
+        if args.model_info_by_idx:
+            model_query = args.model_info_by_idx
+            manager.model_info_by_idx(model_query)
+            sys.exit()
+
+        if args.model_info_by_name:
+            model_query_full_name = args.model_info_by_name
+            manager.model_info_by_full_name(model_query_full_name)
+            sys.exit()
+
+        # CASE3: load pre-trained model paths
+        if args.model_name is not None and not args.model_path:
+            model_path, config_path, model_item = manager.download_model(args.model_name)
+            # tts model
+            if model_item["model_type"] == "tts_models":
+                tts_path = model_path
+                tts_config_path = config_path
+                if "default_vocoder" in model_item:
+                    args.vocoder_name = (
+                        model_item["default_vocoder"] if args.vocoder_name is None else args.vocoder_name
+                    )
+
+            # voice conversion model
+            if model_item["model_type"] == "voice_conversion_models":
+                vc_path = model_path
+                vc_config_path = config_path
+
+            # tts model with multiple files to be loaded from the directory path
+            if model_item.get("author", None) == "fairseq" or isinstance(model_item["model_url"], list):
+                model_dir = model_path
+                tts_path = None
+                tts_config_path = None
+                args.vocoder_name = None
+
+        # load vocoder
+        if args.vocoder_name is not None and not args.vocoder_path:
+            vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name)
+
+        # CASE4: set custom model paths
+        if args.model_path is not None:
+            tts_path = args.model_path
+            tts_config_path = args.config_path
+            speakers_file_path = args.speakers_file_path
+            language_ids_file_path = args.language_ids_file_path
+
+        if args.vocoder_path is not None:
+            vocoder_path = args.vocoder_path
+            vocoder_config_path = args.vocoder_config_path
+
+        if args.encoder_path is not None:
+            encoder_path = args.encoder_path
+            encoder_config_path = args.encoder_config_path
+
+        device = args.device
+        if args.use_cuda:
+            device = "cuda"
+
+        # load models
+        synthesizer = Synthesizer(
+            tts_path,
+            tts_config_path,
+            speakers_file_path,
+            language_ids_file_path,
+            vocoder_path,
+            vocoder_config_path,
+            encoder_path,
+            encoder_config_path,
+            vc_path,
+            vc_config_path,
+            model_dir,
+            args.voice_dir,
+        ).to(device)
+
+        # query speaker ids of a multi-speaker model.
+        if args.list_speaker_idxs:
+            print(
+                " > Available speaker ids: (Set --speaker_idx flag to one of these values to use the multi-speaker model."
+            )
+            print(synthesizer.tts_model.speaker_manager.name_to_id)
+            return
+
+        # query langauge ids of a multi-lingual model.
+        if args.list_language_idxs:
+            print(
+                " > Available language ids: (Set --language_idx flag to one of these values to use the multi-lingual model."
+            )
+            print(synthesizer.tts_model.language_manager.name_to_id)
+            return
+
+        # check the arguments against a multi-speaker model.
+        if synthesizer.tts_speakers_file and (not args.speaker_idx and not args.speaker_wav):
+            print(
+                " [!] Looks like you use a multi-speaker model. Define `--speaker_idx` to "
+                "select the target speaker. You can list the available speakers for this model by `--list_speaker_idxs`."
+            )
+            return
+
+        # RUN THE SYNTHESIS
+        if args.text:
+            print(" > Text: {}".format(args.text))
+
+        # kick it
+        if tts_path is not None:
+            wav = synthesizer.tts(
+                args.text,
+                speaker_name=args.speaker_idx,
+                language_name=args.language_idx,
+                speaker_wav=args.speaker_wav,
+                reference_wav=args.reference_wav,
+                style_wav=args.capacitron_style_wav,
+                style_text=args.capacitron_style_text,
+                reference_speaker_name=args.reference_speaker_idx,
+            )
+        elif vc_path is not None:
+            wav = synthesizer.voice_conversion(
+                source_wav=args.source_wav,
+                target_wav=args.target_wav,
+            )
+        elif model_dir is not None:
+            wav = synthesizer.tts(
+                args.text, speaker_name=args.speaker_idx, language_name=args.language_idx, speaker_wav=args.speaker_wav
+            )
+
+        # save the results
+        print(" > Saving output to {}".format(args.out_path))
+        synthesizer.save_wav(wav, args.out_path, pipe_out=pipe_out)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py
new file mode 100644
index 0000000..a32ad00
--- /dev/null
+++ b/TTS/bin/train_encoder.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import time
+import traceback
+
+import torch
+from torch.utils.data import DataLoader
+from trainer.io import copy_model_files, save_best_model, save_checkpoint
+from trainer.torch import NoamLR
+from trainer.trainer_utils import get_optimizer
+
+from TTS.encoder.dataset import EncoderDataset
+from TTS.encoder.utils.generic_utils import setup_encoder_model
+from TTS.encoder.utils.training import init_training
+from TTS.encoder.utils.visual import plot_embeddings
+from TTS.tts.datasets import load_tts_samples
+from TTS.utils.audio import AudioProcessor
+from TTS.utils.generic_utils import count_parameters, remove_experiment_folder
+from TTS.utils.samplers import PerfectBatchSampler
+from TTS.utils.training import check_update
+
+torch.backends.cudnn.enabled = True
+torch.backends.cudnn.benchmark = True
+torch.manual_seed(54321)
+use_cuda = torch.cuda.is_available()
+num_gpus = torch.cuda.device_count()
+print(" > Using CUDA: ", use_cuda)
+print(" > Number of GPUs: ", num_gpus)
+
+
+def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False):
+    num_utter_per_class = c.num_utter_per_class if not is_val else c.eval_num_utter_per_class
+    num_classes_in_batch = c.num_classes_in_batch if not is_val else c.eval_num_classes_in_batch
+
+    dataset = EncoderDataset(
+        c,
+        ap,
+        meta_data_eval if is_val else meta_data_train,
+        voice_len=c.voice_len,
+        num_utter_per_class=num_utter_per_class,
+        num_classes_in_batch=num_classes_in_batch,
+        verbose=verbose,
+        augmentation_config=c.audio_augmentation if not is_val else None,
+        use_torch_spec=c.model_params.get("use_torch_spec", False),
+    )
+    # get classes list
+    classes = dataset.get_class_list()
+
+    sampler = PerfectBatchSampler(
+        dataset.items,
+        classes,
+        batch_size=num_classes_in_batch * num_utter_per_class,  # total batch size
+        num_classes_in_batch=num_classes_in_batch,
+        num_gpus=1,
+        shuffle=not is_val,
+        drop_last=True,
+    )
+
+    if len(classes) < num_classes_in_batch:
+        if is_val:
+            raise RuntimeError(
+                f"config.eval_num_classes_in_batch ({num_classes_in_batch}) need to be <= {len(classes)} (Number total of Classes in the Eval dataset) !"
+            )
+        raise RuntimeError(
+            f"config.num_classes_in_batch ({num_classes_in_batch}) need to be <= {len(classes)} (Number total of Classes in the Train dataset) !"
+        )
+
+    # set the classes to avoid get wrong class_id when the number of training and eval classes are not equal
+    if is_val:
+        dataset.set_classes(train_classes)
+
+    loader = DataLoader(
+        dataset,
+        num_workers=c.num_loader_workers,
+        batch_sampler=sampler,
+        collate_fn=dataset.collate_fn,
+    )
+
+    return loader, classes, dataset.get_map_classid_to_classname()
+
+
+def evaluation(model, criterion, data_loader, global_step):
+    eval_loss = 0
+    for _, data in enumerate(data_loader):
+        with torch.no_grad():
+            # setup input data
+            inputs, labels = data
+
+            # agroup samples of each class in the batch. perfect sampler produces [3,2,1,3,2,1] we need [3,3,2,2,1,1]
+            labels = torch.transpose(
+                labels.view(c.eval_num_utter_per_class, c.eval_num_classes_in_batch), 0, 1
+            ).reshape(labels.shape)
+            inputs = torch.transpose(
+                inputs.view(c.eval_num_utter_per_class, c.eval_num_classes_in_batch, -1), 0, 1
+            ).reshape(inputs.shape)
+
+            # dispatch data to GPU
+            if use_cuda:
+                inputs = inputs.cuda(non_blocking=True)
+                labels = labels.cuda(non_blocking=True)
+
+            # forward pass model
+            outputs = model(inputs)
+
+            # loss computation
+            loss = criterion(
+                outputs.view(c.eval_num_classes_in_batch, outputs.shape[0] // c.eval_num_classes_in_batch, -1), labels
+            )
+
+            eval_loss += loss.item()
+
+    eval_avg_loss = eval_loss / len(data_loader)
+    # save stats
+    dashboard_logger.eval_stats(global_step, {"loss": eval_avg_loss})
+    # plot the last batch in the evaluation
+    figures = {
+        "UMAP Plot": plot_embeddings(outputs.detach().cpu().numpy(), c.num_classes_in_batch),
+    }
+    dashboard_logger.eval_figures(global_step, figures)
+    return eval_avg_loss
+
+
+def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader, global_step):
+    model.train()
+    best_loss = {"train_loss": None, "eval_loss": float("inf")}
+    avg_loader_time = 0
+    end_time = time.time()
+    for epoch in range(c.epochs):
+        tot_loss = 0
+        epoch_time = 0
+        for _, data in enumerate(data_loader):
+            start_time = time.time()
+
+            # setup input data
+            inputs, labels = data
+            # agroup samples of each class in the batch. perfect sampler produces [3,2,1,3,2,1] we need [3,3,2,2,1,1]
+            labels = torch.transpose(labels.view(c.num_utter_per_class, c.num_classes_in_batch), 0, 1).reshape(
+                labels.shape
+            )
+            inputs = torch.transpose(inputs.view(c.num_utter_per_class, c.num_classes_in_batch, -1), 0, 1).reshape(
+                inputs.shape
+            )
+            # ToDo: move it to a unit test
+            # labels_converted = torch.transpose(labels.view(c.num_utter_per_class, c.num_classes_in_batch), 0, 1).reshape(labels.shape)
+            # inputs_converted = torch.transpose(inputs.view(c.num_utter_per_class, c.num_classes_in_batch, -1), 0, 1).reshape(inputs.shape)
+            # idx = 0
+            # for j in range(0, c.num_classes_in_batch, 1):
+            #     for i in range(j, len(labels), c.num_classes_in_batch):
+            #         if not torch.all(labels[i].eq(labels_converted[idx])) or not torch.all(inputs[i].eq(inputs_converted[idx])):
+            #             print("Invalid")
+            #             print(labels)
+            #             exit()
+            #         idx += 1
+            # labels = labels_converted
+            # inputs = inputs_converted
+
+            loader_time = time.time() - end_time
+            global_step += 1
+
+            # setup lr
+            if c.lr_decay:
+                scheduler.step()
+            optimizer.zero_grad()
+
+            # dispatch data to GPU
+            if use_cuda:
+                inputs = inputs.cuda(non_blocking=True)
+                labels = labels.cuda(non_blocking=True)
+
+            # forward pass model
+            outputs = model(inputs)
+
+            # loss computation
+            loss = criterion(
+                outputs.view(c.num_classes_in_batch, outputs.shape[0] // c.num_classes_in_batch, -1), labels
+            )
+            loss.backward()
+            grad_norm, _ = check_update(model, c.grad_clip)
+            optimizer.step()
+
+            step_time = time.time() - start_time
+            epoch_time += step_time
+
+            # acumulate the total epoch loss
+            tot_loss += loss.item()
+
+            # Averaged Loader Time
+            num_loader_workers = c.num_loader_workers if c.num_loader_workers > 0 else 1
+            avg_loader_time = (
+                1 / num_loader_workers * loader_time + (num_loader_workers - 1) / num_loader_workers * avg_loader_time
+                if avg_loader_time != 0
+                else loader_time
+            )
+            current_lr = optimizer.param_groups[0]["lr"]
+
+            if global_step % c.steps_plot_stats == 0:
+                # Plot Training Epoch Stats
+                train_stats = {
+                    "loss": loss.item(),
+                    "lr": current_lr,
+                    "grad_norm": grad_norm,
+                    "step_time": step_time,
+                    "avg_loader_time": avg_loader_time,
+                }
+                dashboard_logger.train_epoch_stats(global_step, train_stats)
+                figures = {
+                    "UMAP Plot": plot_embeddings(outputs.detach().cpu().numpy(), c.num_classes_in_batch),
+                }
+                dashboard_logger.train_figures(global_step, figures)
+
+            if global_step % c.print_step == 0:
+                print(
+                    "   | > Step:{}  Loss:{:.5f}  GradNorm:{:.5f}  "
+                    "StepTime:{:.2f}  LoaderTime:{:.2f}  AvGLoaderTime:{:.2f}  LR:{:.6f}".format(
+                        global_step, loss.item(), grad_norm, step_time, loader_time, avg_loader_time, current_lr
+                    ),
+                    flush=True,
+                )
+
+            if global_step % c.save_step == 0:
+                # save model
+                save_checkpoint(
+                    c, model, optimizer, None, global_step, epoch, OUT_PATH, criterion=criterion.state_dict()
+                )
+
+            end_time = time.time()
+
+        print("")
+        print(
+            ">>> Epoch:{}  AvgLoss: {:.5f} GradNorm:{:.5f}  "
+            "EpochTime:{:.2f} AvGLoaderTime:{:.2f} ".format(
+                epoch, tot_loss / len(data_loader), grad_norm, epoch_time, avg_loader_time
+            ),
+            flush=True,
+        )
+        # evaluation
+        if c.run_eval:
+            model.eval()
+            eval_loss = evaluation(model, criterion, eval_data_loader, global_step)
+            print("\n\n")
+            print("--> EVAL PERFORMANCE")
+            print(
+                "   | > Epoch:{}  AvgLoss: {:.5f} ".format(epoch, eval_loss),
+                flush=True,
+            )
+            # save the best checkpoint
+            best_loss = save_best_model(
+                {"train_loss": None, "eval_loss": eval_loss},
+                best_loss,
+                c,
+                model,
+                optimizer,
+                None,
+                global_step,
+                epoch,
+                OUT_PATH,
+                criterion=criterion.state_dict(),
+            )
+            model.train()
+
+    return best_loss, global_step
+
+
+def main(args):  # pylint: disable=redefined-outer-name
+    # pylint: disable=global-variable-undefined
+    global meta_data_train
+    global meta_data_eval
+    global train_classes
+
+    ap = AudioProcessor(**c.audio)
+    model = setup_encoder_model(c)
+
+    optimizer = get_optimizer(c.optimizer, c.optimizer_params, c.lr, model)
+
+    # pylint: disable=redefined-outer-name
+    meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=True)
+
+    train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False, verbose=True)
+    if c.run_eval:
+        eval_data_loader, _, _ = setup_loader(ap, is_val=True, verbose=True)
+    else:
+        eval_data_loader = None
+
+    num_classes = len(train_classes)
+    criterion = model.get_criterion(c, num_classes)
+
+    if c.loss == "softmaxproto" and c.model != "speaker_encoder":
+        c.map_classid_to_classname = map_classid_to_classname
+        copy_model_files(c, OUT_PATH, new_fields={})
+
+    if args.restore_path:
+        criterion, args.restore_step = model.load_checkpoint(
+            c, args.restore_path, eval=False, use_cuda=use_cuda, criterion=criterion
+        )
+        print(" > Model restored from step %d" % args.restore_step, flush=True)
+    else:
+        args.restore_step = 0
+
+    if c.lr_decay:
+        scheduler = NoamLR(optimizer, warmup_steps=c.warmup_steps, last_epoch=args.restore_step - 1)
+    else:
+        scheduler = None
+
+    num_params = count_parameters(model)
+    print("\n > Model has {} parameters".format(num_params), flush=True)
+
+    if use_cuda:
+        model = model.cuda()
+        criterion.cuda()
+
+    global_step = args.restore_step
+    _, global_step = train(model, optimizer, scheduler, criterion, train_data_loader, eval_data_loader, global_step)
+
+
+if __name__ == "__main__":
+    args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()
+
+    try:
+        main(args)
+    except KeyboardInterrupt:
+        remove_experiment_folder(OUT_PATH)
+        try:
+            sys.exit(0)
+        except SystemExit:
+            os._exit(0)  # pylint: disable=protected-access
+    except Exception:  # pylint: disable=broad-except
+        remove_experiment_folder(OUT_PATH)
+        traceback.print_exc()
+        sys.exit(1)
diff --git a/TTS/bin/train_tts.py b/TTS/bin/train_tts.py
new file mode 100644
index 0000000..bdb4f6f
--- /dev/null
+++ b/TTS/bin/train_tts.py
@@ -0,0 +1,71 @@
+import os
+from dataclasses import dataclass, field
+
+from trainer import Trainer, TrainerArgs
+
+from TTS.config import load_config, register_config
+from TTS.tts.datasets import load_tts_samples
+from TTS.tts.models import setup_model
+
+
+@dataclass
+class TrainTTSArgs(TrainerArgs):
+    config_path: str = field(default=None, metadata={"help": "Path to the config file."})
+
+
+def main():
+    """Run `tts` model training directly by a `config.json` file."""
+    # init trainer args
+    train_args = TrainTTSArgs()
+    parser = train_args.init_argparse(arg_prefix="")
+
+    # override trainer args from comman-line args
+    args, config_overrides = parser.parse_known_args()
+    train_args.parse_args(args)
+
+    # load config.json and register
+    if args.config_path or args.continue_path:
+        if args.config_path:
+            # init from a file
+            config = load_config(args.config_path)
+            if len(config_overrides) > 0:
+                config.parse_known_args(config_overrides, relaxed_parser=True)
+        elif args.continue_path:
+            # continue from a prev experiment
+            config = load_config(os.path.join(args.continue_path, "config.json"))
+            if len(config_overrides) > 0:
+                config.parse_known_args(config_overrides, relaxed_parser=True)
+        else:
+            # init from console args
+            from TTS.config.shared_configs import BaseTrainingConfig  # pylint: disable=import-outside-toplevel
+
+            config_base = BaseTrainingConfig()
+            config_base.parse_known_args(config_overrides)
+            config = register_config(config_base.model)()
+
+    # load training samples
+    train_samples, eval_samples = load_tts_samples(
+        config.datasets,
+        eval_split=True,
+        eval_split_max_size=config.eval_split_max_size,
+        eval_split_size=config.eval_split_size,
+    )
+
+    # init the model from config
+    model = setup_model(config, train_samples + eval_samples)
+
+    # init the trainer and 🚀
+    trainer = Trainer(
+        train_args,
+        model.config,
+        config.output_path,
+        model=model,
+        train_samples=train_samples,
+        eval_samples=eval_samples,
+        parse_command_line_args=False,
+    )
+    trainer.fit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/train_vocoder.py b/TTS/bin/train_vocoder.py
new file mode 100644
index 0000000..32ecd7b
--- /dev/null
+++ b/TTS/bin/train_vocoder.py
@@ -0,0 +1,77 @@
+import os
+from dataclasses import dataclass, field
+
+from trainer import Trainer, TrainerArgs
+
+from TTS.config import load_config, register_config
+from TTS.utils.audio import AudioProcessor
+from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
+from TTS.vocoder.models import setup_model
+
+
+@dataclass
+class TrainVocoderArgs(TrainerArgs):
+    config_path: str = field(default=None, metadata={"help": "Path to the config file."})
+
+
+def main():
+    """Run `tts` model training directly by a `config.json` file."""
+    # init trainer args
+    train_args = TrainVocoderArgs()
+    parser = train_args.init_argparse(arg_prefix="")
+
+    # override trainer args from comman-line args
+    args, config_overrides = parser.parse_known_args()
+    train_args.parse_args(args)
+
+    # load config.json and register
+    if args.config_path or args.continue_path:
+        if args.config_path:
+            # init from a file
+            config = load_config(args.config_path)
+            if len(config_overrides) > 0:
+                config.parse_known_args(config_overrides, relaxed_parser=True)
+        elif args.continue_path:
+            # continue from a prev experiment
+            config = load_config(os.path.join(args.continue_path, "config.json"))
+            if len(config_overrides) > 0:
+                config.parse_known_args(config_overrides, relaxed_parser=True)
+        else:
+            # init from console args
+            from TTS.config.shared_configs import BaseTrainingConfig  # pylint: disable=import-outside-toplevel
+
+            config_base = BaseTrainingConfig()
+            config_base.parse_known_args(config_overrides)
+            config = register_config(config_base.model)()
+
+    # load training samples
+    if "feature_path" in config and config.feature_path:
+        # load pre-computed features
+        print(f" > Loading features from: {config.feature_path}")
+        eval_samples, train_samples = load_wav_feat_data(config.data_path, config.feature_path, config.eval_split_size)
+    else:
+        # load data raw wav files
+        eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size)
+
+    # setup audio processor
+    ap = AudioProcessor(**config.audio)
+
+    # init the model from config
+    model = setup_model(config)
+
+    # init the trainer and 🚀
+    trainer = Trainer(
+        train_args,
+        config,
+        config.output_path,
+        model=model,
+        train_samples=train_samples,
+        eval_samples=eval_samples,
+        training_assets={"audio_processor": ap},
+        parse_command_line_args=False,
+    )
+    trainer.fit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/TTS/bin/tune_wavegrad.py b/TTS/bin/tune_wavegrad.py
new file mode 100644
index 0000000..09582ce
--- /dev/null
+++ b/TTS/bin/tune_wavegrad.py
@@ -0,0 +1,103 @@
+"""Search a good noise schedule for WaveGrad for a given number of inference iterations"""
+import argparse
+from itertools import product as cartesian_product
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+from TTS.config import load_config
+from TTS.utils.audio import AudioProcessor
+from TTS.vocoder.datasets.preprocess import load_wav_data
+from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset
+from TTS.vocoder.models import setup_model
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", type=str, help="Path to model checkpoint.")
+    parser.add_argument("--config_path", type=str, help="Path to model config file.")
+    parser.add_argument("--data_path", type=str, help="Path to data directory.")
+    parser.add_argument("--output_path", type=str, help="path for output file including file name and extension.")
+    parser.add_argument(
+        "--num_iter",
+        type=int,
+        help="Number of model inference iterations that you like to optimize noise schedule for.",
+    )
+    parser.add_argument("--use_cuda", action="store_true", help="enable CUDA.")
+    parser.add_argument("--num_samples", type=int, default=1, help="Number of datasamples used for inference.")
+    parser.add_argument(
+        "--search_depth",
+        type=int,
+        default=3,
+        help="Search granularity. Increasing this increases the run-time exponentially.",
+    )
+
+    # load config
+    args = parser.parse_args()
+    config = load_config(args.config_path)
+
+    # setup audio processor
+    ap = AudioProcessor(**config.audio)
+
+    # load dataset
+    _, train_data = load_wav_data(args.data_path, 0)
+    train_data = train_data[: args.num_samples]
+    dataset = WaveGradDataset(
+        ap=ap,
+        items=train_data,
+        seq_len=-1,
+        hop_len=ap.hop_length,
+        pad_short=config.pad_short,
+        conv_pad=config.conv_pad,
+        is_training=True,
+        return_segments=False,
+        use_noise_augment=False,
+        use_cache=False,
+        verbose=True,
+    )
+    loader = DataLoader(
+        dataset,
+        batch_size=1,
+        shuffle=False,
+        collate_fn=dataset.collate_full_clips,
+        drop_last=False,
+        num_workers=config.num_loader_workers,
+        pin_memory=False,
+    )
+
+    # setup the model
+    model = setup_model(config)
+    if args.use_cuda:
+        model.cuda()
+
+    # setup optimization parameters
+    base_values = sorted(10 * np.random.uniform(size=args.search_depth))
+    print(f" > base values: {base_values}")
+    exponents = 10 ** np.linspace(-6, -1, num=args.num_iter)
+    best_error = float("inf")
+    best_schedule = None  # pylint: disable=C0103
+    total_search_iter = len(base_values) ** args.num_iter
+    for base in tqdm(cartesian_product(base_values, repeat=args.num_iter), total=total_search_iter):
+        beta = exponents * base
+        model.compute_noise_level(beta)
+        for data in loader:
+            mel, audio = data
+            y_hat = model.inference(mel.cuda() if args.use_cuda else mel)
+
+            if args.use_cuda:
+                y_hat = y_hat.cpu()
+            y_hat = y_hat.numpy()
+
+            mel_hat = []
+            for i in range(y_hat.shape[0]):
+                m = ap.melspectrogram(y_hat[i, 0])[:, :-1]
+                mel_hat.append(torch.from_numpy(m))
+
+            mel_hat = torch.stack(mel_hat)
+            mse = torch.sum((mel - mel_hat) ** 2).mean()
+            if mse.item() < best_error:
+                best_error = mse.item()
+                best_schedule = {"beta": beta}
+                print(f" > Found a better schedule. - MSE: {mse.item()}")
+                np.save(args.output_path, best_schedule)
diff --git a/TTS/config/__init__.py b/TTS/config/__init__.py
new file mode 100644
index 0000000..c5a6dd6
--- /dev/null
+++ b/TTS/config/__init__.py
@@ -0,0 +1,135 @@
+import json
+import os
+import re
+from typing import Dict
+
+import fsspec
+import yaml
+from coqpit import Coqpit
+
+from TTS.config.shared_configs import *
+from TTS.utils.generic_utils import find_module
+
+
+def read_json_with_comments(json_path):
+    """for backward compat."""
+    # fallback to json
+    with fsspec.open(json_path, "r", encoding="utf-8") as f:
+        input_str = f.read()
+    # handle comments but not urls with //
+    input_str = re.sub(r"(\"(?:[^\"\\]|\\.)*\")|(/\*(?:.|[\\n\\r])*?\*/)|(//.*)", lambda m: m.group(1) or m.group(2) or "", input_str)
+    return json.loads(input_str)
+
+def register_config(model_name: str) -> Coqpit:
+    """Find the right config for the given model name.
+
+    Args:
+        model_name (str): Model name.
+
+    Raises:
+        ModuleNotFoundError: No matching config for the model name.
+
+    Returns:
+        Coqpit: config class.
+    """
+    config_class = None
+    config_name = model_name + "_config"
+
+    # TODO: fix this
+    if model_name == "xtts":
+        from TTS.tts.configs.xtts_config import XttsConfig
+
+        config_class = XttsConfig
+    paths = ["TTS.tts.configs", "TTS.vocoder.configs", "TTS.encoder.configs", "TTS.vc.configs"]
+    for path in paths:
+        try:
+            config_class = find_module(path, config_name)
+        except ModuleNotFoundError:
+            pass
+    if config_class is None:
+        raise ModuleNotFoundError(f" [!] Config for {model_name} cannot be found.")
+    return config_class
+
+
+def _process_model_name(config_dict: Dict) -> str:
+    """Format the model name as expected. It is a band-aid for the old `vocoder` model names.
+
+    Args:
+        config_dict (Dict): A dictionary including the config fields.
+
+    Returns:
+        str: Formatted modelname.
+    """
+    model_name = config_dict["model"] if "model" in config_dict else config_dict["generator_model"]
+    model_name = model_name.replace("_generator", "").replace("_discriminator", "")
+    return model_name
+
+
+def load_config(config_path: str) -> Coqpit:
+    """Import `json` or `yaml` files as TTS configs. First, load the input file as a `dict` and check the model name
+    to find the corresponding Config class. Then initialize the Config.
+
+    Args:
+        config_path (str): path to the config file.
+
+    Raises:
+        TypeError: given config file has an unknown type.
+
+    Returns:
+        Coqpit: TTS config object.
+    """
+    config_dict = {}
+    ext = os.path.splitext(config_path)[1]
+    if ext in (".yml", ".yaml"):
+        with fsspec.open(config_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+    elif ext == ".json":
+        try:
+            with fsspec.open(config_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+        except json.decoder.JSONDecodeError:
+            # backwards compat.
+            data = read_json_with_comments(config_path)
+    else:
+        raise TypeError(f" [!] Unknown config file type {ext}")
+    config_dict.update(data)
+    model_name = _process_model_name(config_dict)
+    config_class = register_config(model_name.lower())
+    config = config_class()
+    config.from_dict(config_dict)
+    return config
+
+
+def check_config_and_model_args(config, arg_name, value):
+    """Check the give argument in `config.model_args` if exist or in `config` for
+    the given value.
+
+    Return False if the argument does not exist in `config.model_args` or `config`.
+    This is to patch up the compatibility between models with and without `model_args`.
+
+    TODO: Remove this in the future with a unified approach.
+    """
+    if hasattr(config, "model_args"):
+        if arg_name in config.model_args:
+            return config.model_args[arg_name] == value
+    if hasattr(config, arg_name):
+        return config[arg_name] == value
+    return False
+
+
+def get_from_config_or_model_args(config, arg_name):
+    """Get the given argument from `config.model_args` if exist or in `config`."""
+    if hasattr(config, "model_args"):
+        if arg_name in config.model_args:
+            return config.model_args[arg_name]
+    return config[arg_name]
+
+
+def get_from_config_or_model_args_with_default(config, arg_name, def_val):
+    """Get the given argument from `config.model_args` if exist or in `config`."""
+    if hasattr(config, "model_args"):
+        if arg_name in config.model_args:
+            return config.model_args[arg_name]
+    if hasattr(config, arg_name):
+        return config[arg_name]
+    return def_val
diff --git a/TTS/config/__pycache__/__init__.cpython-311.pyc b/TTS/config/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f244eacaeb4079105e2020ebda2a52db561a42af
GIT binary patch
literal 6650
zcmb_g+iw$B8b4!??@1iT2?0XO<iZlCt+R;b(r_y!giC4YlC)bAgEM%>$pqVD&WwRr
zj$~EUD!a-<S|Nc{RcrNu1<~E*k(KtLbpL^I<Q3LvvqHP-L*G)-N~ll!eP?`4psUsH
z@%Wo_pYMF<yZp}ieQT?iK$?xu$KwG){)L5V;wzbFUqI$Ik%+`4$dvWwrZ`r%P1)dW
zOW2eA6wg_0jwuIgb51$oZBMw8?kP8G;}f1q?BhsylfEe*)Hy#UQ!SE9Zk61!JI+g<
zkNK%K$t&|vZiTWB%6_S(*4_%`cBu_U1tdSv4yhezP~s*?DDW*ZJRagqepHF+@OGY1
zucZ~taYI0SaU~^1ld6<S$T6<6I(#|dZ-@W0e*n2n^oF+iT0RHsmuz3!??TT9*qfz&
zWRc5p1zWY;_`VB;2epbto5bVqF7$m+>(7ytub=E88h^uf(|(cM;5c%TaPZSzwV`Cb
zx)&fjlv&Q!%*CQj_cip!^{@o}fd2=zHGq^{+0M91g)3t0>J5=fLQGAjMLleC^hZZV
zkH2y7K4-e+R7{nW)a-b6|6rtV@Yu-o<w##7aw!*yghNA-zEEy(I5GtF;oNj2l8Qv=
zrO?o^$j~r042Oq8P2xpi1>yf}3dn7eBL#SGP7>G5Y<kN}du~`(nv?9XG=BVBT<Nzc
zT$8|tJ7%exN#ph>HEa=nJU0BkCR1&ALDc4kM`i7*uBL}isLA-k`)7t@8BJG{(UdC5
z+VJG$#BfYa#g*CNXjDlldNdkNFZ}2}oDh>&B=KlAL}gKm&TDEadPC9Yq9DkmoYJ-M
z;e;9!6WUS8SY12e)7l;&d16E^J&D9tBQZmgi;;LK5;r1QWB%%s`D?55*9@u^=k?OO
zj+HmFdGZVf!IqiMxTdA$n8~YYIc4(rWz$Aw)2?N%m^|`eIufcVX(7AmVJyL(&Y4`?
z^eCxxMvrPb)o>dGL7*M*HoITKJ>3bdZSdFr1SC&>a1w9(a?i@eg8kmC;n`pG>@Rut
zuM^Jk%A?+LXV>kqPsctvee3j+XWb5Utbvj5u*>&I>UA&5J%9fEMb+@#K=78+=zh<*
zkcDTRfT4}qz9S@X%Jw~@`7>)i|MJL+QuGNWpJ4EU)(Sk|>3Y-tXa0!mG2xE5OfQ(b
zoQS5xq-;8=tY>JdNjDuZ0tNHzk3hi5abV8i(6Ufkv_Z~(m6<AagXuaFB*`t>Ig+z4
za<kyPzTg)596w4f^)EU+M8~-*DdzywYcljjZn?9c=rz+MWPnhZ>mpeeZV=1#b6h^y
zX!epTM?NEx`*VA<WqbrK2lU;O{pT2XG(n$}1**)>>0r#LumT!5R?aHd<&?lAAfN=o
zUN0zsaGcI+BP@rdx`2d1P?OMz@Ggq1ali#p(d339&;eZNxT=q-nUr*rQk9Mf<EoGp
z_1GM^jg5S4;@|??h=v(0M?F#<7)ywn7G^Atn=UXdT$ah-)OC&Get=1PKZL>w))3;e
zelXWCRD`XrS~iFk*Hv(1GHtG8zOA9MjrEMxI<*Vay_W=QW+usOK!}N{l&T9?WT=1@
zheN#Cv*C+jWUj&~-32^S+-fuU0$GKeRKT_ti`gh!t?8?jaHH4+rUQ*wGkNyH*>Jn6
z2JENutfJ{Mt$y=$=+F&+?GHfmWPK~?+?qdE?$~i>q1Z8)e`noE+#M?)th#y(S5MhJ
zVl+QbyL*>f$}LfYJ^7R6_O5lpI|7U>@#{9bC$QG@vx0tay4dq(sprk5b4%x*ulf7H
zdOgsqO3E$mOQYZmJOQxmr>-D;Y^|-==)3T63W)U-+a^kF6NY=D?CUo8Zmk74{$2ae
zZ4bRi9Dm`CIBK$xAdWYgE5<=)t{*`L%{51^6Y9p;ybWrslmfGnZ1pmW(ACDKw`AWK
zj|)xh)dED_4RIUi1QT>-|30Qt(0gWYbv+kEO^|Pb%hqKnES%8=MH55_x+&=mQITo}
zswO01rlO@Y4TcK3{{l;`C<qEg7-SG<L^v*BQBhMOT@aL1ERn&u&A73;3voqGNYx29
z86H^th+r)NShZNNTm`C{>0nDT{j+jPrlPJ=gimt9^hRqn)DKcJ+oF=9#i){0QrHa)
z)J5fVLX62F8+`+MFqPNPc&i#5jHam?lQqqnllf@+3+S*P{@Qy$K#M!b(4L3y7@=eE
ztn}RZRVlE`2<-Z5+W6&_(#cr<Y&p>LB+$Pa=r5dm*jEhfD+TsF2^?7s95IfL8}D5x
z1|~{@3FzkQGWafvk+`N@XjV<ig(k+7>k+Vdn<x-*bf~JPIuv{{+{=+>FWsn;5KC;5
z4ODfs(ZAPW9Yz|7|I*P6>i}HZUjpnlM1)0Na^kN!=DZwSrhX0lxGJ$N1+nXeVDl%q
z!cDKFd3QCJ`(L>_1Q8O!M<vaBYB_dH!4gEx4gkmK=_+ZX+!y(aB(<d)Q}wzZfJmCN
zeMUatfF3cL77#gXN;$rHN8!7#)H~%i;MGNkj^L@1a*jfKHQ)HY3x)cjpn3j9r;X&C
z%{>-fIadLWW}DN-?*lGzAu15B0oFZ}OsiBEW)KX{2!ID>7Q|#?2E0K+23MpB5WXtW
zRtpPb3f1)21%!~ybuiG#`eIj6m_a8qBY^u7Vsmor>IQGee3h=kfC?Iksg%lET1_!$
zbizU<R!|ov=Kzx+pjO0$l9kz1R*#=@dT6Xwe8TeZIfOVhCzq(An~h*)av?2OAc}>D
z8iojSxV)5*NnK5;H&TKQeJfmS@|0DGQWu<{UYQ3EZ{ZY*8Wgf+`S3zAk#&So+!QT?
zq7qZo-*KqM{m8z`AjbQZk2dmy3<#kP)23?lH5g*LwRA$!<(oR~!fCc(9glOH9!-qP
zQQR`Roz-I2bV)M9Jf{Eb#QE`2R$!}Vz~1!Kq+vQUX-Ne9fF743c)%mX4VjvrIE7OX
z?s&*e_hBpgZ_|!Up!%CUj1!qZr-N8_S}V|SwHP%pS*Hvi@>B&VC!u*1{@UL_Kf$$c
zC&8}#J8RyS<=s!b+gH8Y?;I+6cbB}o?{OvXQ2s>O<u%%N8ZLa+T7%2ecX|r3ho_9z
z!^PIarPjlUId*-;tp&Sp9WC@0g9D}DfCW8vu_i{~7~<Il#}H3LUA?uQvyLH~dOd&Y
zX<2do_SL0V*KH2Z!L>m5t)4s6_xc~XjKEvPz+0ujTQEoKz*ooDx_fRd6at^*Zsk^Z
zz(1`6tbq|=qICe6XsoNZ)^ipUn|eKe{*6DloGbZXTPHTp!8-XSICSq&F?gU9Jg_v1
zzjX(+!z@3ziGQoH_2{D@5O|7h$4YI-4EM1z?|;H?S>?B^=tX`<iQi%HJJtdNh3R5o
zPbsh`e|9a{R~UKHzjw8NZ?S)0sefNFxW5$K4?wbY+fw8=mzOW+$I86-3E#QOcdkqq
z`Q8%WYw*2gUm*Xkb{e+#vAyf~fc^0Re|*sO_%#mcf$p~tk+0jj-X6Ap-QR-wVgBvC
zuCMpnF~6U~`~j9f)PJJa{#Stm8pBXrO*P#2DbzII&>;3}4tsQa4sc;TTtnCmG?sG>
zSTSd_g5X5ctxt}Ca|IAkzo(%sSXYlF^>LC7oUUCw4@!Na<{Qz|2qK-$;FSP)J7LDM
zkuZZ#QE1U-1SJj_5imTu=Xw|L(JHk(YV8PjT})(}9g{EyR}3;vfnKooB}oPEjz<-1
z;`*#jAI5NC@iSvNugN*Ur|^JN1?L)@6Ef+Fcg4#g<%$A6XaSBt`VF~ypwtAs^<W_Z
zGinBcO~c|UUUc&O==l-hf}B)A9y<F9cv;{$o`IvdT$uy>uL3Yt62vq>LNPXHaaBFM
zqqN5E+e{aDYf;xJJq1b-vQu;>rW;zV<Gq7Psrj0thlQ((2Z}=cs$fl=lAeR{v+&n0
z1A(iK4&wGNU0=x+T>~W-n6J%ou<Q%2TrYJG8a{l=!ENAU3Lln&;fK3cg9nV318c4}
z7;>}d5=t%sEZGq%`@2@=O8%XNeXIUK!#!B`wJ+^s$TNoMx0-4ijUWGy8ogzcMz_99
zqbJ~ktx;1a$bDP(^eNdoqF5oYrZ4E~tGdvnA}`U+Fq1t+I|p2k8$izjX~Old%*uL{
z#Slx!qSaFaQ+s+I`rpJchk)eCH%x#HG9P}a%#D^XxS5r~lCQ_$dn}Q@&`Ep*Gyi`@
z+IS`W0_Ok+&}Nb6z$wsiV8arXrK?O-7@^})P}OEd9r{V3SvQO@P|PHBdVvWJBxq&+
zI|*)5SfQ%0hI?n(*O4D*f(>;tR6!5m8nyy4oq$c?Xh~7x78rSxRma#JJ4Jmqx8bEo
z#nWkXB9*JbUo*S0DWfY1EzFLNN-WAs?3%;^Cw4fDG9+SQ62uG4Mw!a6Ro^F!IR-A^
zDi-_=+(R9u7ojKK?`pk3A@*`ynfUVTS0<i3`!!^`4bt>HC9Qe(YZ$f7AWhFxvfHRW
zWio6uKkH5x*Sk(?q+<uSwP0T-@Ve(@uZ`rWkK0n1Tqp3l7h|uDWQV|Y+?iM>@G5-3
fUK`06*T%ukOO5n7xE<@HM%rCm5P8~2m<az3_gj1r

literal 0
HcmV?d00001

diff --git a/TTS/config/__pycache__/shared_configs.cpython-311.pyc b/TTS/config/__pycache__/shared_configs.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4f55e59507f7d004c8100732afbee64375621d03
GIT binary patch
literal 11855
zcmb_iO>7*;a-JoJ<nT|T#D7wxMiecIwzQ;vXh#ortRG4;{UnMqDd%~OWHsEGCCA#`
zS<TE!qVk>}0~mn<<iJCKrvwj(4uK-WKIo{NV&o*`v<vXC7$AUv91;RK(2xy0I02lm
zs%Lh0Mr%?IvKp4Ur>ncGtE;N3s^=d&JKH4uR??rP*N#Zizf&dnHT#8^|ANBrBwf;D
z87Uh}#Ig;Eh8WcwGL2a|A#=Gg)0AyaG-q29EnJr~O13r88k3BM2I;b-H+>=L%|`R%
zMoIb#f8zHT|9uryq(qyK+X7r=7u<Fqw-vZ;M(d8_cIfSk(qzXL%{1BIHEWJ;rd+R8
z*IX@?(Hv(o=E+w~$EC&#*6qCMdV5l<M(S2lvsVgPBj*C6bII1soMB(Ea%poVRUsRN
zU_mdv7_rKb5;0v$G~58o5{-I8LN=O=X1(!UprY2oHJNKo359FTTx&_Ra!v7TZCq>R
zT3e#s=)frLT<b`5a$6_Y_9XTgd(pO+Yh8&hV;^e!xVAsBpE0|A%x=c);aYE^hsW&W
zT7ROK+XlEcnCRnvLw>)0ZX4!WOJaa$IKZ`$#30v3xppuy#QjvR9ZC#yZH#MOi35yz
zm}@PG5w4AMZGU2vG2h@?f8rp|cf_x$T$|w9(ZnI{H_5d(6JuO^%f~#-n8&zwJTcB=
z#{IT8xNVAS(}^S8?}Xnr!EGnG){;2Nd^yFn(_cu5NxcOkG~?|(uQ|rKf^J$u476;Q
zB+)xH#bTklmZ}?RGq+->8lkCq+e#UZgEwkkv$d?@8nzQx=Umlv)PiH^)Wyu1uBm0r
z`$n6J-_4shmzEae1=q|taULelb1sOnEISU2Hby;XuQ<Qp5_L?c-K2w2)T1V3w)&ph
z<_p<n!&a@dy0~;{Nln|v?Shd@t*IH~j*)Rx?UokjzQAx-4K)K{*KF0v8!6XX0p;id
z^5g2Pk=6<s*CE1BOrJa*7nIN(cg<WfW8_xc)p`_M)KaTr5TZh`2TVXG-MR}#EU)=x
zYW9c0LaT$KqG~yCY}Lqto4SrpcTG%o->@yFC#$)s)e1X^B=iZmg5r+UuUh#XsIT%*
zQ0{2ie8zCpWy8H|fF4c%OiRI32==C|Od*@YM(@Ba=4aAbHL+`1ryj=^4VM;BB@7jH
z8RDj^R!+^DnT+WekUSmR2fi>#Bx$zh8a3M@CK4Rod8RP0fBsC4i?v{9l37cK6gsXQ
zk-`P+8c_&6ShaMa0A`MP7FQ>1Bb|Y`O{kWwP7pH<dm_xj3QEoL#7-n>(e;?fsuST&
zXj@L6oSvDk8P7Jvi0aX0%gRI+a51MXXAE^poi!aU`Pj55AsA2#Y*P(UsD&4S7nih*
zV^Bj?dGfYlWb><<1D!aU&RAMhgBNNLQkIcUn<*1K3XeS<4=*JRk%oRJ^<|$o>dEPl
zf_!A84I75QP-mBgEg;3V4cdG*1irYND>*I0Vuq4si#8q`Dv+`6#xXEP0P#3cacHud
zs*Y8FV3N9<c04q1tzw+4rCGO<+$)J$vah!XViv+DBdWP3w1XD`-AI|(E;49$NF@zb
zH`D%tL21^)wu!BZ=qhhe&F*BZm1Me*OVtx<n1(#;sI{UJ2G7oxJq<A?D`C0?XafkE
zJx}Mv9h?B5FuZRQxqKWWoS2@dnG~#WZ8Hm-4&}uTy_P^yk1UC!-wVrhnE}hjikZu?
zpb8_Z8e(sVJ7Skp88aWI3o^Q77b2nxy0EUKYeB4%{z1XC9aF75tzV0(jlpU&rTa<~
z;)-6&6}J5~M~}^CPh@k5qA;7P<YbME;M4yPEi@dTfv-adJjc{(GM&}#`5OF_G+fp0
znb`uJslhSqAG?>P#Ss}<F}C6D)6)@y;m-#9DeT2h&jttMXU=vaJQs^$#fF3wdC?b6
zcNL~^)ynu*orFJ-+dw44qIG&EyoYwysnPkLTueliKDxRXsCL)@+$ApoW|Yqx4F@#)
zV`)$uCL^lpB+_Iu*<Su66%bm1Rnc^iuZAdyt4mNUTO(%>X7Tu$Gh_p%!?Vv@xmr=q
z!xOUWoBxaV^1y|IQJK-Wm&dWss!m=SOK9NEEgRSX%w7;0f7yoFgP>e7v+R7X8kszM
zOtzsHU|C=SLYu1Du9?<S@bb`!?1QtG!++!;>R`8-iZJm+ymm8J#B4AP*`!GhZ@qK5
z>QMQ_tq^r8;j;ygXu#XGXGE@$O+qtw85bDGag2fY>;YlDt1yM^pTlvZRq=b@*L2AA
z804c;zZ#tvSmAB^qgrZ!52dfD;p8vWtEf~}J0d7FaSdr^MuY^P&p`E|G&sXxj$xTU
zKQqHG(gwd`t~=pF6k$ZpFyQ_{u$IwF|E%D+*dac7R0?n%;}r*?>T(FeE7mIF>cB?*
z>_xmDK~)Nf&B+x6*7YRhGQHRGI6$!^R%dZ~x_pL(57zMZgd7ToU6&fu5?N=&pa}G+
zD%ZLzk>dwDA9b=qx`cMNzA&Fv8%aAvf{?PZ`2uYj_M%4SBEo<C#B{B|Icr%s(6*U+
z{Y3>mqU)MVs}K>DmK1Ixla)7Y&`G`=HiD=X#_ZGMw~nc|{!|6UVXN`!G`&9-@~)%I
z((d_0*K=`k=-k4e&{5iDnlxlo1qtEZJ{|IT$%lDuz7TRgn8+${d@L4uN6QpoCH#d4
zG=LSg{&1L%$Wk3wa~(uO6b;vlgzz3}8xjKUqP}`YiBp2&SyPfF$y0)u)@!SVY+Dl2
zPhML!TAcUxgkwx^ZzL4-TKtg9>kNf%=Gey=s=X$Nx#TH9T=XA7?#pYhiCMOK@Y?IZ
zcok*vIzztveI@X5Jtc6bm%P0ZCwJ>Ojnm>g99}z`1INHqg0p!=kno51y(T^}w<uHN
z?R(|i*;=6YN^o3wN?@vkl6KE)t=J&1yN+Y)b%kDH0Dtl?&U$TChtGex{eJbOnF9x_
zw#WSak3atCb<|o5Z%<g0{Xx3Uf;?YOyw-|5du>&TN@1%jKLsVqLB8w&{GH_1wEbFo
zjM`U~H@}IwLADYd^w_UsdV}7m%U?G=ZUP?u#4jd=@R}K~MOSvgYh}DPy?qzF4u9TG
zeb3i>A0yTLRgi9m42Nd!V$6Mdr@ns|%x=c)(f92#cQ51h>5aSK_50&>?t(YqQ#iN_
z-Voz8>bs3MOn6@(c#M6jrphB+8F>wyQJ(MMuJfs{G1eg-Yi!rC4l~|3=zn8Jc{;*)
z`}B!jcy{zPXq&8$qrZua?OQ)|U!X*Oke*GOMn-q+aR~X+7TtZA3Y2HE(QAWoqXQKu
zvDZQ>3iFQwln@2G9hh6|gzoK?B)gkRQUk!Fzl=#XQoxe=pV%(D1EopXp22_i+XUVL
z@Y*%<@N#f!5&5IPv_<M@?;+4f03%5@nLTshe}4SY9-yiUutn-<j}bTq@MTPl9H$pk
z04sm@xAgm`_w8wV^#*}n0!OG3f0A_ew`V6Cs|0y%S_Ziq9FMrkm_YlM4qy)x7y+37
zvcV?%V7CB#+2F~Jkx6^8ltUz{>JJyEZa8q~r`9xQb!ygdZn;)|>VlO`uicoNO2PNE
zvKU)8oGB!*r{JyOs$|Mp)evPQMa_xl*SrpKyCRO*`v=a$OFdUjs>f?ve~*cs{|3X>
zrSBD~vwP$8UtRg@D-UA-ru1#xe{|w$&ole^>7uewQWiFq1r(|c&hy!#a=oNn-&C%n
zKn=fnq&=Pe=J4}7Mdjm?^6{qfF$!oXcZ`%fI?MgT<(|HB&p>%#e7m!ywe4F8;5!1_
z1itT*TKkLr6HoR%b-%e@Tv#eD=ZcC|QmjqILZLdtq34+KT1mOKsa!*W8a{t?`{~j*
zGsTaVipq_Wa${4ufkL(A#B;r<ES8kTO=S@UYPkC-_C$Yn?wi|1Wv-;mZ7OppprP3R
z=96>J#-BG7mwr`LJ}D`mY$~6i@O?Ar6KgCgXG+SMP2~&<+=GcOD({w*cQ=)HQJ@I~
z1MBeV&mWz8I$l)XDJk!4D(|4MBeoc(P`r*6h1j~m<9ri)ek#I^V*k-6hn~qr<-L;f
z-lp;%3N*voqeD;ea;BurY$`J-i036_K$?8hQ<BLXt{{?0uPsR#=0b+b9m(YFf|l`H
zT9QfKN+px5vtA?aLA<@mq?XHB@Wzmta?nI-D%6j(k@eT!kCN928^GFavoj-8{+CtM
zYhrIzsOBJb8X{298+(+h2MMSISZB$~2&E)_WOcKTQfZRFn*`n>K)TK9Z&x&1DD(+x
zm<9M!vM*Bk0s%5x4CvT*-hpPW*ZkX-S7J@dw#0C$QR*FB|KLHYC=cK%$phuSq4f_p
z8jJD}o{~IN?i&>K!J<4^k_XE}qwDh<7mD&Io{~IT9zM8!ZDX-0AH-9V50(eV)~{}y
zD#~McO7hr?{sZg3+~_XK2k?~S1LfW!%%@k!AEfr)YWu*@`jw5t)%Kx-81Hm-{GL%k
zuRs5b11jcUD#|LJlB|}yd)F^N06iG8w<Pzz7#P9$Jw<s0Pe~r3^?mTbsEq%jyKnvC
zgPEe-ho>a>l}E6U-VLWR;ftO=F=2oJhVTFZLm~r~d;8bt9-x0eo|4>OR>x6z2nz83
zcu5{Fj~pt>JsV&F{y$Wb4-ttUZZuU%92ge$;i5cTl80Xm5Q{KCz(UlSML``~FuyTd
zp_sNTO5xlCx3c8u7H`XH)7#t9t3b#UiBk%Hl^@xE`n~)IfS+}n-QeG5H}Sn`gMXVH
z+-<iR?M4&nj`-n*THJFtBs%z(R@`$p_&3{a{uq0>P274nB=&NxgWEcBFWbfUvU|LJ
zbcD=`bjJlgb+k=6d^1Y{Hx8vBDS|`_2kDUuRJuZNaaz}l*e+EX$<`ont4SsWIXS;K
zqDF+VbcyS_PTbGKIB^Q5d3ZF2L&uwAW3_ideh)Tx^+>sqGjV&XG99!q3L@)fdm`ih
zR51FHWx2RftwyB&U>u4@auJask%-We6tUu}g7Y}9hPv~_E<e8=3{2hpK=&GMNpRil
zxP*gXOuI!HLy^%EcTEVoafLy{Q>ac0&f*5zq$C|72TPA6$7)kH5u*KJla<+y^7kL%
z8*_RUB57{cPa(peK~}=g#QL`@0eO{uRU@h03=+Ww4Y}Yt$$%?CTZ-;Y!>mHSHti=Z
z^<zknsutWx+k}&)-psGUk;91?jJ>l|32dZr>x%aRkHxKlH2X3x^lHLUgK6UG`86Vm
zz5^hNfDdA3FJ4MsIX8bfxpXbL@WHkDi&y6o7eCsWq^k758LY>d9c3r1gQNsz4ly#W
z%KUFDus0ajgwWB2TQ%)iofhhtOJ+6aR=vp=YBYjGT+BzJD$>WK%Q?R9;}HnOpO`;O
zpi6M%sM%1^J&y^o33nke59JDl4zW5HL>e4lkQns662AIiOi%J!D|W=|@b!r;g(s5=
zdwZ*^<KgDLuBf~t0pm*?(iN3`_d2UR!8WhETCK1tlz5={gpP}sWq_Zbc=)<;#~@ec
zz;0)prq{rs<VDLab1C(4^i~9sZT2ZF&t3#rvgzotZxBF+OtKRIm+Vgg=k53D#aRG9
zS`zNsEMD9nedT-#1v&pkoz?;B1UYqG@jDN1KS3OEvZS2cR8FG6K7L)F`nOMFPmPG<
zU+h2jM0<9zsGKh;=Qow}D2Rt$g2{dMCD3ADCh!4)IRdnP`wD?y5?CToWyGIQmE+Z4
zQRx!`#2n@gJF)gPs#g8j>r^H1&-iox6(H=#9waxG!1<WeHxPEZqF!Xz{`J`hhu7!V
z=l^29JUk*?*1#nkJHigx!&84_7G;!=l;k6b^CE8FfbfF@KY4Ii__@B5Rz3)CuKyt%
zUEizQmRp-H#I~jSP%M)cSKqZFHtynD$@owZUqR}^nUdj5QLWaQsyI-P(N=FiiP(~V
zMhYLO@C<P?y$;eJI8(*#7i^M29}<}<b^Gr(o{?V^oL@DcsR_$Phn)!F+5U)cCXhzp
zF@%W=zGuUTNRZ>>3vzzM=aTU-N#yv{bG&$*dM3Em{tF~Fr?{NpL&{Ij2gKKyc*^<G
z?XO`c?^-sy6gz$u7a*}a6}TF%NnecM{1YQZhBsg&|Ic$!=K`Ng>MOHQqJQ3N;`MTN
z$lL$wa=d<eSBHj6-ahaKc^G^gmedL>`~ho6U7b!s4r_G!CRINqpb=OmaFswrb^nas
z5%>fC9KsE&ZU?C@fdkThioZ@(<F!%5Uwsb|fCd5OVZ=?t4;QQP(<q|D-VMYv`2TQ;
zAqqmHXwLb`PTUG$Px#wi!x0H987*G3yGEBeY=^w&6o1NVQ)Zd-S#+rY7ZT1BMb09Q
zW+TMQ2`}*n<y~+e35GwmDUk*}lAM0P?Z#LvR+c7<^`5fSUkpDl>Y<@;Svph<Kikbx
zL;H4v(-1?fRv&JpzvY(i7=GJ`4qpuVT{2DFeI`aWsXlzz{4KY9$FS0!k@GR`-W?m?
VmR<!9N4}-E-w~+v3!(Au{4Zle1{DAR

literal 0
HcmV?d00001

diff --git a/TTS/config/shared_configs.py b/TTS/config/shared_configs.py
new file mode 100644
index 0000000..7fae77d
--- /dev/null
+++ b/TTS/config/shared_configs.py
@@ -0,0 +1,268 @@
+from dataclasses import asdict, dataclass
+from typing import List
+
+from coqpit import Coqpit, check_argument
+from trainer import TrainerConfig
+
+
+@dataclass
+class BaseAudioConfig(Coqpit):
+    """Base config to definge audio processing parameters. It is used to initialize
+    ```TTS.utils.audio.AudioProcessor.```
+
+    Args:
+        fft_size (int):
+            Number of STFT frequency levels aka.size of the linear spectogram frame. Defaults to 1024.
+
+        win_length (int):
+            Each frame of audio is windowed by window of length ```win_length``` and then padded with zeros to match
+            ```fft_size```. Defaults to 1024.
+
+        hop_length (int):
+            Number of audio samples between adjacent STFT columns. Defaults to 1024.
+
+        frame_shift_ms (int):
+            Set ```hop_length``` based on milliseconds and sampling rate.
+
+        frame_length_ms (int):
+            Set ```win_length``` based on milliseconds and sampling rate.
+
+        stft_pad_mode (str):
+            Padding method used in STFT. 'reflect' or 'center'. Defaults to 'reflect'.
+
+        sample_rate (int):
+            Audio sampling rate. Defaults to 22050.
+
+        resample (bool):
+            Enable / Disable resampling audio to ```sample_rate```. Defaults to ```False```.
+
+        preemphasis (float):
+            Preemphasis coefficient. Defaults to 0.0.
+
+        ref_level_db (int): 20
+            Reference Db level to rebase the audio signal and ignore the level below. 20Db is assumed the sound of air.
+            Defaults to 20.
+
+        do_sound_norm (bool):
+            Enable / Disable sound normalization to reconcile the volume differences among samples. Defaults to False.
+
+        log_func (str):
+            Numpy log function used for amplitude to DB conversion. Defaults to 'np.log10'.
+
+        do_trim_silence (bool):
+            Enable / Disable trimming silences at the beginning and the end of the audio clip. Defaults to ```True```.
+
+        do_amp_to_db_linear (bool, optional):
+            enable/disable amplitude to dB conversion of linear spectrograms. Defaults to True.
+
+        do_amp_to_db_mel (bool, optional):
+            enable/disable amplitude to dB conversion of mel spectrograms. Defaults to True.
+
+        pitch_fmax (float, optional):
+            Maximum frequency of the F0 frames. Defaults to ```640```.
+
+        pitch_fmin (float, optional):
+            Minimum frequency of the F0 frames. Defaults to ```1```.
+
+        trim_db (int):
+            Silence threshold used for silence trimming. Defaults to 45.
+
+        do_rms_norm (bool, optional):
+            enable/disable RMS volume normalization when loading an audio file. Defaults to False.
+
+        db_level (int, optional):
+            dB level used for rms normalization. The range is -99 to 0. Defaults to None.
+
+        power (float):
+            Exponent used for expanding spectrogra levels before running Griffin Lim. It helps to reduce the
+            artifacts in the synthesized voice. Defaults to 1.5.
+
+        griffin_lim_iters (int):
+            Number of Griffing Lim iterations. Defaults to 60.
+
+        num_mels (int):
+            Number of mel-basis frames that defines the frame lengths of each mel-spectrogram frame. Defaults to 80.
+
+        mel_fmin (float): Min frequency level used for the mel-basis filters. ~50 for male and ~95 for female voices.
+            It needs to be adjusted for a dataset. Defaults to 0.
+
+        mel_fmax (float):
+            Max frequency level used for the mel-basis filters. It needs to be adjusted for a dataset.
+
+        spec_gain (int):
+            Gain applied when converting amplitude to DB. Defaults to 20.
+
+        signal_norm (bool):
+            enable/disable signal normalization. Defaults to True.
+
+        min_level_db (int):
+            minimum db threshold for the computed melspectrograms. Defaults to -100.
+
+        symmetric_norm (bool):
+            enable/disable symmetric normalization. If set True normalization is performed in the range [-k, k] else
+            [0, k], Defaults to True.
+
+        max_norm (float):
+            ```k``` defining the normalization range. Defaults to 4.0.
+
+        clip_norm (bool):
+            enable/disable clipping the our of range values in the normalized audio signal. Defaults to True.
+
+        stats_path (str):
+            Path to the computed stats file. Defaults to None.
+    """
+
+    # stft parameters
+    fft_size: int = 1024
+    win_length: int = 1024
+    hop_length: int = 256
+    frame_shift_ms: int = None
+    frame_length_ms: int = None
+    stft_pad_mode: str = "reflect"
+    # audio processing parameters
+    sample_rate: int = 22050
+    resample: bool = False
+    preemphasis: float = 0.0
+    ref_level_db: int = 20
+    do_sound_norm: bool = False
+    log_func: str = "np.log10"
+    # silence trimming
+    do_trim_silence: bool = True
+    trim_db: int = 45
+    # rms volume normalization
+    do_rms_norm: bool = False
+    db_level: float = None
+    # griffin-lim params
+    power: float = 1.5
+    griffin_lim_iters: int = 60
+    # mel-spec params
+    num_mels: int = 80
+    mel_fmin: float = 0.0
+    mel_fmax: float = None
+    spec_gain: int = 20
+    do_amp_to_db_linear: bool = True
+    do_amp_to_db_mel: bool = True
+    # f0 params
+    pitch_fmax: float = 640.0
+    pitch_fmin: float = 1.0
+    # normalization params
+    signal_norm: bool = True
+    min_level_db: int = -100
+    symmetric_norm: bool = True
+    max_norm: float = 4.0
+    clip_norm: bool = True
+    stats_path: str = None
+
+    def check_values(
+        self,
+    ):
+        """Check config fields"""
+        c = asdict(self)
+        check_argument("num_mels", c, restricted=True, min_val=10, max_val=2056)
+        check_argument("fft_size", c, restricted=True, min_val=128, max_val=4058)
+        check_argument("sample_rate", c, restricted=True, min_val=512, max_val=100000)
+        check_argument(
+            "frame_length_ms",
+            c,
+            restricted=True,
+            min_val=10,
+            max_val=1000,
+            alternative="win_length",
+        )
+        check_argument("frame_shift_ms", c, restricted=True, min_val=1, max_val=1000, alternative="hop_length")
+        check_argument("preemphasis", c, restricted=True, min_val=0, max_val=1)
+        check_argument("min_level_db", c, restricted=True, min_val=-1000, max_val=10)
+        check_argument("ref_level_db", c, restricted=True, min_val=0, max_val=1000)
+        check_argument("power", c, restricted=True, min_val=1, max_val=5)
+        check_argument("griffin_lim_iters", c, restricted=True, min_val=10, max_val=1000)
+
+        # normalization parameters
+        check_argument("signal_norm", c, restricted=True)
+        check_argument("symmetric_norm", c, restricted=True)
+        check_argument("max_norm", c, restricted=True, min_val=0.1, max_val=1000)
+        check_argument("clip_norm", c, restricted=True)
+        check_argument("mel_fmin", c, restricted=True, min_val=0.0, max_val=1000)
+        check_argument("mel_fmax", c, restricted=True, min_val=500.0, allow_none=True)
+        check_argument("spec_gain", c, restricted=True, min_val=1, max_val=100)
+        check_argument("do_trim_silence", c, restricted=True)
+        check_argument("trim_db", c, restricted=True)
+
+
+@dataclass
+class BaseDatasetConfig(Coqpit):
+    """Base config for TTS datasets.
+
+    Args:
+        formatter (str):
+            Formatter name that defines used formatter in ```TTS.tts.datasets.formatter```. Defaults to `""`.
+
+        dataset_name (str):
+            Unique name for the dataset. Defaults to `""`.
+
+        path (str):
+            Root path to the dataset files. Defaults to `""`.
+
+        meta_file_train (str):
+            Name of the dataset meta file. Or a list of speakers to be ignored at training for multi-speaker datasets.
+            Defaults to `""`.
+
+        ignored_speakers (List):
+            List of speakers IDs that are not used at the training. Default None.
+
+        language (str):
+            Language code of the dataset. If defined, it overrides `phoneme_language`. Defaults to `""`.
+
+        phonemizer (str):
+            Phonemizer used for that dataset's language. By default it uses `DEF_LANG_TO_PHONEMIZER`. Defaults to `""`.
+
+        meta_file_val (str):
+            Name of the dataset meta file that defines the instances used at validation.
+
+        meta_file_attn_mask (str):
+            Path to the file that lists the attention mask files used with models that require attention masks to
+            train the duration predictor.
+    """
+
+    formatter: str = ""
+    dataset_name: str = ""
+    path: str = ""
+    meta_file_train: str = ""
+    ignored_speakers: List[str] = None
+    language: str = ""
+    phonemizer: str = ""
+    meta_file_val: str = ""
+    meta_file_attn_mask: str = ""
+
+    def check_values(
+        self,
+    ):
+        """Check config fields"""
+        c = asdict(self)
+        check_argument("formatter", c, restricted=True)
+        check_argument("path", c, restricted=True)
+        check_argument("meta_file_train", c, restricted=True)
+        check_argument("meta_file_val", c, restricted=False)
+        check_argument("meta_file_attn_mask", c, restricted=False)
+
+
+@dataclass
+class BaseTrainingConfig(TrainerConfig):
+    """Base config to define the basic 🐸TTS training parameters that are shared
+    among all the models. It is based on ```Trainer.TrainingConfig```.
+
+    Args:
+        model (str):
+            Name of the model that is used in the training.
+
+        num_loader_workers (int):
+            Number of workers for training time dataloader.
+
+        num_eval_loader_workers (int):
+            Number of workers for evaluation time dataloader.
+    """
+
+    model: str = None
+    # dataloading
+    num_loader_workers: int = 0
+    num_eval_loader_workers: int = 0
+    use_noise_augment: bool = False
diff --git a/TTS/encoder/README.md b/TTS/encoder/README.md
new file mode 100644
index 0000000..b38b200
--- /dev/null
+++ b/TTS/encoder/README.md
@@ -0,0 +1,18 @@
+### Speaker Encoder
+
+This is an implementation of https://arxiv.org/abs/1710.10467. This model can be used for voice and speaker embedding.
+
+With the code here you can generate d-vectors for both multi-speaker and single-speaker TTS datasets, then visualise and explore them along with the associated audio files in an interactive chart.
+
+Below is an example showing embedding results of various speakers. You can generate the same plot with the provided notebook as demonstrated in [this video](https://youtu.be/KW3oO7JVa7Q).
+
+![](umap.png)
+
+Download a pretrained model from [Released Models](https://github.com/mozilla/TTS/wiki/Released-Models) page.
+
+To run the code, you need to follow the same flow as in TTS.
+
+- Define 'config.json' for your needs. Note that, audio parameters should match your TTS model.
+- Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360```
+- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files.
+- Watch training on Tensorboard as in TTS
diff --git a/TTS/encoder/__init__.py b/TTS/encoder/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/TTS/encoder/__pycache__/__init__.cpython-311.pyc b/TTS/encoder/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ad49a98405a788941dfc18783214cb3503b0831f
GIT binary patch
literal 173
zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%RxW1IJKx)
zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv?N+FB!-#(vOeN%*!l^
pkJl@x{Ka9Do1apelWJGQ3N#R8WidaH_`uA_$oPQ)Miemv#Q+K^DUARC

literal 0
HcmV?d00001

diff --git a/TTS/encoder/__pycache__/losses.cpython-311.pyc b/TTS/encoder/__pycache__/losses.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fce5534a4bc25f951d066108ef377e2c6fa6dc96
GIT binary patch
literal 13636
zcmd^FT}&HUmag(|845577z~6q3869JCno=-I~@{8LMNd+on$smr{y-T0!+XrRfYtQ
z>4`?&J5I!Gm=?S7t}<Fb^k#_CYVk0$V)kWuSnWou9?6AD!c!7bw5!&rA66ntMmrj<
zG<(i1yX>+}(tq=^du<<A-MW8u?>*;y=bZbCva%8i(&)(e$dgwn>Ob)&lg#OhXa52h
z_b86y=m}~_{ii1legjQS8i!0Y<`^cZNy8B27^u?}XZ(obOuS({(*pItNKudAr=}qb
zDP@LImKT(=l2TSE#k`;tLrU48RLKiU**N+l<u3gS2QcWS6+<xSPmhNDWybdGINbgO
z-j=6^XpR~(aHTP7gyu{i*@lc9jCjbzS%8{37}t=6V}M#YmMi&)9%5qDO?t=%{gf%p
z>HhBib0Jauo!U$D@$^O2s$vyGBjHKkO}|NfMkr1d4&vG0&=mF8^gSvJ|C#gvE*@oW
zs6@^aEtArhp{cMo`4lytc|9s@%ht#)Pn3QDZ9dZ4)!#)auO(CK8?|cCx1+y<R;`&<
zqei%6GI!zanJWsmoW2%KMX7PEBxjhRM8liZk7-@5@tOf!5Ln11+Z*qohf@5<q$_Po
zmoA4~R(-B7&6zJ!_;Xu)wa{>hE1d==rzZGGJ{b0e1EHWRG~x=6@vhT+kQaOtfe6pJ
z`h#3YIMe}ut_j!+E{-1w1R*~Vbd80>Q({k7mruAEnCT1&qg}pXvCDJFv$xZ;cmKgd
zuJ=6eYd!V){rpr|boqi%b!s|{eTmS6t97v5b-vxz=hlki*lvvR0`D4h1w=BU;Pm7$
zFOWJ@yzd$>h&TWo>##5EA8XguJzrRBIvj>K3i^4`H3jX6Y9sn;K6u5xOjRxbvm(Nr
zz%+zsGZVwZC^#+<9Cgj`{%}Zewep>#oi46Jz0JNtuP`e1=mymR0}h0}H(aeF6Cq#N
z-IJb4_5JW!z&GKV@l8zg*xL<!zy9mf>bkTZaARifeBvaf+Z@hoGXQUiQ&Z6B@XH#F
zw7HJC2Gp^T)qlZGa2o{N#0ujN1;c_*43k>{H|ros53@K8Gp#UQFL`gTfc&f2-u4N;
zNgftMF~j`%#}rE#<~bxN^bLg`R!VV?d7;2qh*Qi{LLeAcwvk!)k|$}gqm*Rxl<nDT
zZ)PlRvtknYi4ny@dcTqW9~QQeQFy)n2_JOi6;Z<ARNngIv98M^?5nO>pE%al$BWm(
zp{cHup~;ci%L85hX)zp{^aev5FLqtJbg_#M`r%sW!cp;J=hUo#d>5(A$jr{nd*d95
zzXtM$Th#9zl&$=>b>6zp)UGkL5>qELbxEe~%gXJG4L|4aPb?jj8oTAjZmDvgT)8i1
znzy{*QL4TrW|EoOFPXArd5^>#m6@YS=IE1(?Q+E{$z46kitP7EX@y)`AD>+-ZB3T8
zrW$w3jR#_V^KYfp{K#6VJ6Y-``3EsS2A@qM#kN82GZ;MO&k@BG3Oyp|kAb+=i}`U)
z07vPlk)xxgj}0RRCpJM6=0;7NAp&<Ct$#L>Cv4};xe8>Q2t;foYUYd&;O>!@GiM&9
z<}6`N7(<!7w)&{|4_<}xbJjT~YK>;Zt0QVj<f<?-MzxQ8p*`QUWm%@6>>=IAwYiI0
z^d&(cn<8&(a^j?K!aqIX3#a)v$lq}JK{gAa04KWmoBoMuB72ax1%3nsBnUK@Cfc-S
z2EW2Z5O8+f7NH#e6r<l4R!oz;FR0jf5T+mu!@OegO-=DZU^Fr8^Ivn@gc>Ys=0KW>
zicRYpSZV5-pctVi#e~C9%3x^T>~lH*QXn%>)XvCJBnkD{;x<3Lm{(UPQm=7J;lpOA
zBz_JAScakO)$_CQ-3e2IONj6*k?Zy>U0&Y5vVG<HuWMGEE6(LN<c^a`XP;EjFIV)(
zOdAzd^O1OSf|e?F$`w0frc`ZRtZbvcIq|Mk-zC>~ZBiziN^YNx^~P?dD(nk=cV=U=
zDf^Cfd-IyTIdMs{x5@UlB>6tkvad+?PTAg>Bwy_3hw;~?3b$P0PBLzx0fy|?a0=X%
z!H+P?C0LAm6l_vs?#9T+EPd1h{l0;0Q&W0NYwU#EUcY%@OaHUBrMEy-z;1@9DQbNH
zoBWZs(dTTil{MypF={E7g~qk=y4>t$W)d2v(i(t^e7uv$Vasee&H&|L3|S_Q+P-c{
zeLdaiG+Y(UW6CW`0jT~gPnbmD4I_ZFIFr5~{p{1!{Hm@^4V6YMoFTu>IZM=7e8%%d
z0u6g$-lS_ei~o%JF@X3mNBT9sKM%l&D1LuP1dImoYBDh46A-^l`ocosrt3x^Jmvyu
zBO>ZL-#(ZHEJ7nJ4G|d+X~hUde)rS=`c>k;|8@D8yHsexXYELKA|VpR3>tKDN-<1M
zDz@pMczv4ZBfMe*BoGv*LL#r2f+1m20E3JYb^&ph5a}W`W5#YEN;wSE3uE-EgHsrk
zGTs1p6RZmrpD7l<=oLaY6f1}x^2DY|JHZWYh&VNAQ3HRfw5cEhMV$j|KT@j~HQ6WK
z*u)WFv0Kz8L)A6J$~T(YmJUcw9=XZ0N!e{}PmxIMj@X%n(NuL!?9@g}*RoY=IV`sv
z-lW*FZG=enoeS2uGv#VtcO6=D9a=H3%sy^SUV2Y*U6ox|7s{U49rwyUDch)WE}q?}
z-Ep5uHSAt*=vr&&TBfCjZn>d*z2UXBhSydutit^Xx#2{r!M)zFZ>?e9a_{nWso{v+
zaOA1k=&XBA0Rf@d&~(2i5s>Ql%JqAHb>NphE0f6!?<DtKl=fYef!1GqYBM=6)0-uf
zbH~#%YqjG!1>~vGR=Mpt1!PkrzptVk^^5PuXO=w4>JF*8L$2<Kol3EF>ul2++mvXL
zShvi&ldKyqRFQW~s%Vod+LBCLp~yovAR_N=nAdw$lpco+HC=`LIpbVLv|*`qiso%>
z5N$JaRuGp=IJcBZzx|=q5fEieamLcIkS5AN99pBcTtreJhOAk%QcCH21g7r^YbF>N
zrJ4ZD7dL=1wh*!TC85n5TeMk-77UTg8oTGA1!A3n4afWZW3H^B(tNI!_x^A^Fz0S|
z4bQs5zH4Z{q;a&?mI@ZEfCR>TQ@mmZE*|9xw=481MCGI2bQQw=g&chlR#Kq{75cnF
zdlmYeLXRuvbI80x8$2q;s6rJy$iV?QFh1SaS*-aBAcQ}+*Dmh4ZH>_}Pt2EM%VYFL
zrDNgx&uSNIHz~8Nfsj~ls>%`T->9yOdwzCh@yaGemkrP=U7%C;>U-=b?9YAoCzl3S
zJS)DH{c`*9)h4O2Pj2j6y&yN9PVP7@+0V%KGfDC#9GfspRV{K=ORPV|mfbyb=gh+8
z+i$5DS>*@M?GNWaOfowPxdHbI;fDYDZrt$2*W!k|bGG8njlx~8-`8^`x_Vh|DFFm;
z{WurDmqe{u^q%FE0_<K2e5pYNm<26yMqOVTf2(7RT1Sl8R`sKdfV9`u&2nF9UP<Up
zp95UBpsTBal6-z7oLh9{`6Z#v-)zxlA?KPR;~KYpE6&ZBwyGP3XmUUBJTHbdBSGt6
zkf`%fb<Q{6a5KU&xE9_(LbSJ442({KS)uA@)ar^6+<7WrvpJd?`C2%E<Rp?lB>hNE
zAvui%`5Pd92LFS~B~6P;4cZD>GeLVGqN$wK+G)KlI3PFDpy}!yagcfSAt)jK3CK(3
zffeNekV1K2ESmy(fOLW0*xmN&`^(2xn^rHZHp}}?OS{j=yMgm}ZnSlM=1v|s{n+rh
z^07faa5j0~oBY6+<ap@=KKa2YeC6|F$^8MTZCq{}PugEii<bDse>m{z>&uO+^r~mo
zw`!F=XQW*N@~#1C$G~s;zZgjVU{X34l+OjF9YM(+lI@`+`F;yIGpMsPkY$OW=}QZm
z1Y{JZ2axqh%jW1Tt8s>FWYep@$XvCXV|df~%5o08o(YYGwT9sW4ShX+o-^rM%?cSR
z>&4AC?1`^bW5%cnu{roSHO5Cb!kh`lps{n_81gXuoH=SP4j1$@Lk#m7ji2%u7Wb&i
zh9lqu%=Kt$ZbLxWU}k&MKvryb;T?ci(F<O&Om@2qO>OT8?+c50Q2Z+}jVNI+5H|z%
zH#p-|#p}ZC*N}LS*pXBssRjbpKC$5Gn~KFd;Tz^B2+|e^6C>A(IB_cHsaV+xFL1*r
za1p!4cj+92ZQ@@60bVPi?3H(C?#$f2Ie#-wgU#z(0}oT?`z4^-H{;h6O;S~}T-6-2
zrJOs~ogHh=jwRo+Q*s`aoksy#GiCFo3w?{H;(Zd+Br{D(rU?q(cO*I`=icT0zdR^8
zkHpGTto`m=cixKk#y^zUPMPhD(O{itI*(Nl73($%_#y%>I`KL_q`{OyUt<uQi(ao%
z;`L63xakSJX1!i;qfV$#h?h<^WymBV1R;V)cn7YOvb1B+&reKvy$@-j4su1{DC#(w
zF9EkAA=tI}Qy_o11v8|6HVsbmp-pNla+o!@fw_<&4G#03O-du(tho`Nrb%NtUTMU|
zngQj!l#tmW<MYGNw3K1cXK2IeB11GpgsfV0$iy)a8KQ^GBno5!Ca@{xy}{85{%s)?
z4xIyEr|%1x4Z;`2LkCbAJXnH@R0z*b1;7~lnt_Avy?gg{dOSxCcWZq2e==y0eY@`*
zB+!~dp8m$s08%^4NjiOk8=dHrE3n@G-(q=T&5Hyk9tvWT5;?h<Jb;;SnboKeq0{_8
zVHC;fl<yP6fHSa{VKCB~7jfB=fxr)g)`Tj-aA^eeE~;9g0<Sb(px93ez^eV|Ii8wD
zt_5c<xPf>9;#mSlCfz0#;?-h3!~m+GLzN60>6R+%NI;`T&;D9i<1eA8I12>iABgtv
zEvT{KY`kB)?(AN3c1zCvvU5KKLCQ&l<FLdWk(ndO%$LYIJqW@DBAfPeSTyuKBp{<O
z>6h$VfFL7h{21Jqgp%uAo23Xuf|8`HN&U|)4dBI?C>$c><c-plg=!EN$$>jrH*M6k
zg~gTUINri-YvuqP=}$Sp*iaA@B<@?cSRANv5E0UYMg1Mbb}~8)#*z)50~FNQjkD<M
z0CcqEK2JkOup!k%prgKB<S&x)UF`|vg@P2dj3Wz@q_;NbtN=;T&?-nUCaZp&U~e)Y
z3apW+Ce8EUW|js2%%D>kgz*j%U(i>7f~4Zl3gIot&VirD;Z~{4x>3_kW?>)ZTlkw(
z5J?yIV;wz4g?JP3D=CI>NU?DIOu)|z??btC<VkoN>s-KAN<!0NFU0G|ATrG2HMo=^
zLO#PQY%Z)O$W}3{P?rE)+zueZ)TGKWM0OEJb`nt_cBWD80z4C60~Pmjlv`c5IFz6h
zP2_VS@lN8MrAquHT0bpax*%0|$<<x4l2lD?ypnu+<JZZjW-$_DQ#DQZe)P$Yq?%T_
zrgf=OuGte~zjW5g&h`{ryUy-hV|OOHCALLoTh`f*HMV1EczNgYgv6ed*^?>OvCg{I
zSXW}3#5T)p^E$g{joq_!V%e~KPGWmyws)O9yT+b<-23>t#9olu3*alRD*<0|T}iAQ
zyv4TiBG~)YrRL9Ca<TV@y?T+2UsWSt>-LT{d&g2x?mQ{k`(%4x%#7H4@XlcT7`Vt;
zkIZ@qHjgnx5<fI#k%Vv&ruCs!xCDQ~Wh6fUqFB9NF68%mRTYNIBK!rCD?oBVHk$bY
z5@b>FJs>)eZ8J0Ay$0f7%srdbR-|gXxpI?wDN!La_U4D5mNHuZ*<Q%Xhs*>p8$M$2
zAQc>|0>Dg;RappZwjwhtr5Du$Qn=Y7za^Yq95AQLshsWm8s}?@!Q>)9Qu6?N!6wZf
z5J?-PISyiYtO*`un+{hnW1oT%n>nlkT$gtMhg8yGMuZPU{XT(<?*%hri|Ld-|CPwe
z%{<Llmw>}G#idi0mP`$djOqn3)X~&x7;Wlw+8VpnW{%2;a3CtPkhtO?(6U$!^#D2j
zB3#8Q6nKi|90cfmf?^o)=K?aNOt)-GNp>l=L}*nxJ57UUpF<}iGTqkjtQgSR*jBaZ
zj-OxZk+vO_w;crRWhL0FRbt#SgE4OPTLgP)2E%_quUlg;@F&v=G^h(w9^T1uP3f0u
z4i+v^P#ZJy7i!J`S~$bVx*GX`cW85KH&)&@0hgDNKzG3lZMsd0Rp3d~L{*bhkf|!Q
zG{z=MZ47gXg2K&<Y?h;&w&$*3S~2}26cgVCa*KKbL5G1m0}I3ProV6byOzIgz2BPX
zzTYmfugdJJAc7&N0kOsSuw-wN?M*TB6A;s<@0?!fz5V9=oA5j>wLNiO)gCeWNrgQv
zwf(u%gF_?y`|of8OGqW)zc3X{o*|smO4eR_8OKHeLVuPwxm&=^yzn5LqR54VS)R@>
zXNsC&sZ6RnT<C;%Ne8+FvP>FhDkUS+K|daW^i8Oz+N_y~Hei-f@Ql07s%y=rF(^SR
zN~sU3&5%=iA{>Gtr^?{84srvoayle!3k1{0bt2Wdi>+m^LU|E2;w=i-3zu&|VjE?)
z5yD&$o*k1N?sbP}&EZ*YTB%GrJd)$6>^QpaIJxFHx!Nl^PRWi_vC<8Qk-e5EmA1Fb
z+uLJIs-h<CF(Voo!T@@WEL?*gbF^_3Z;PtrNqm9?xm4tV=v1;9RWg!t3%a+85Icj~
zSWl`<=+t@<QI#|Ht5eRI#s4<`tjzrD@t|+y#cr*PaL?A#`L@2M-agXl+3z{9PgA$W
zPKj#!>(8pU$=(F=O;4q2L&pZbv%?9wB$G7Z^G}fTy?}?D){83p%r4R$D96)@dVQmV
z5}LhNsBN+?9d{8}P`GgjPO#?S1j1wJ!XVgJDe<A>bqXD?DnwA7`@(fBAt1@~-k=!7
zWzqR>8q+&B+dnaXGN)4|s3W3=+8QU+>`c7A+$Pn$F4w#cfPmeoYh7|IJEXe9a@}Fb
zs2~X8h{W{BOiwcNErJlV6&{BEzJYqd5K~daP`Ik#O#y!jnhhZK>Bt>zyXINc1(TxV
zW>CT3dr${XOW%TF5#^HBC91iL$UrfigOgO}#5}b@ViJ)Z1%o9kn8pdeh3cXQ2r*#G
zx83#L@y0JmWv|F(ufz;b$|~<(y>m6*6u&H$?UKuOK~_cO!in2AV&$rA4d9^AToW+h
zQ(%gHh?z+K4St2Dm=)zdl5H0AQIz||iQQ(dL6hwzL>&?K28YE^H!Iu88S&0wuro(L
zBi{-$NRDF}%Xt#%QA0Cm#}Pe62K|syo5Vw?e`H=$Lqf-dpTI3{C=uzVk*4Vs#oi*H
z6!l7Si*JgmO6tFxOo*mIgB7PSVSe^0dG<WF#E8*NgC;B_3-2tx`xNt^XCGdn8?l2z
jvaoZ}{S@<`XCI!R>uH$bLJ~i5f8Z(RKQHo-%+UV=(1xQu

literal 0
HcmV?d00001

diff --git a/TTS/encoder/configs/base_encoder_config.py b/TTS/encoder/configs/base_encoder_config.py
new file mode 100644
index 0000000..ebbaa04
--- /dev/null
+++ b/TTS/encoder/configs/base_encoder_config.py
@@ -0,0 +1,61 @@
+from dataclasses import asdict, dataclass, field
+from typing import Dict, List
+
+from coqpit import MISSING
+
+from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig, BaseTrainingConfig
+
+
+@dataclass
+class BaseEncoderConfig(BaseTrainingConfig):
+    """Defines parameters for a Generic Encoder model."""
+
+    model: str = None
+    audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
+    datasets: List[BaseDatasetConfig] = field(default_factory=lambda: [BaseDatasetConfig()])
+    # model params
+    model_params: Dict = field(
+        default_factory=lambda: {
+            "model_name": "lstm",
+            "input_dim": 80,
+            "proj_dim": 256,
+            "lstm_dim": 768,
+            "num_lstm_layers": 3,
+            "use_lstm_with_projection": True,
+        }
+    )
+
+    audio_augmentation: Dict = field(default_factory=lambda: {})
+
+    # training params
+    epochs: int = 10000
+    loss: str = "angleproto"
+    grad_clip: float = 3.0
+    lr: float = 0.0001
+    optimizer: str = "radam"
+    optimizer_params: Dict = field(default_factory=lambda: {"betas": [0.9, 0.999], "weight_decay": 0})
+    lr_decay: bool = False
+    warmup_steps: int = 4000
+
+    # logging params
+    tb_model_param_stats: bool = False
+    steps_plot_stats: int = 10
+    save_step: int = 1000
+    print_step: int = 20
+    run_eval: bool = False
+
+    # data loader
+    num_classes_in_batch: int = MISSING
+    num_utter_per_class: int = MISSING
+    eval_num_classes_in_batch: int = None
+    eval_num_utter_per_class: int = None
+
+    num_loader_workers: int = MISSING
+    voice_len: float = 1.6
+
+    def check_values(self):
+        super().check_values()
+        c = asdict(self)
+        assert (
+            c["model_params"]["input_dim"] == self.audio.num_mels
+        ), " [!] model input dimendion must be equal to melspectrogram dimension."
diff --git a/TTS/encoder/configs/emotion_encoder_config.py b/TTS/encoder/configs/emotion_encoder_config.py
new file mode 100644
index 0000000..5eda267
--- /dev/null
+++ b/TTS/encoder/configs/emotion_encoder_config.py
@@ -0,0 +1,12 @@
+from dataclasses import asdict, dataclass
+
+from TTS.encoder.configs.base_encoder_config import BaseEncoderConfig
+
+
+@dataclass
+class EmotionEncoderConfig(BaseEncoderConfig):
+    """Defines parameters for Emotion Encoder model."""
+
+    model: str = "emotion_encoder"
+    map_classid_to_classname: dict = None
+    class_name_key: str = "emotion_name"
diff --git a/TTS/encoder/configs/speaker_encoder_config.py b/TTS/encoder/configs/speaker_encoder_config.py
new file mode 100644
index 0000000..6dceb00
--- /dev/null
+++ b/TTS/encoder/configs/speaker_encoder_config.py
@@ -0,0 +1,11 @@
+from dataclasses import asdict, dataclass
+
+from TTS.encoder.configs.base_encoder_config import BaseEncoderConfig
+
+
+@dataclass
+class SpeakerEncoderConfig(BaseEncoderConfig):
+    """Defines parameters for Speaker Encoder model."""
+
+    model: str = "speaker_encoder"
+    class_name_key: str = "speaker_name"
diff --git a/TTS/encoder/dataset.py b/TTS/encoder/dataset.py
new file mode 100644
index 0000000..582b1fe
--- /dev/null
+++ b/TTS/encoder/dataset.py
@@ -0,0 +1,147 @@
+import random
+
+import torch
+from torch.utils.data import Dataset
+
+from TTS.encoder.utils.generic_utils import AugmentWAV
+
+
+class EncoderDataset(Dataset):
+    def __init__(
+        self,
+        config,
+        ap,
+        meta_data,
+        voice_len=1.6,
+        num_classes_in_batch=64,
+        num_utter_per_class=10,
+        verbose=False,
+        augmentation_config=None,
+        use_torch_spec=None,
+    ):
+        """
+        Args:
+            ap (TTS.tts.utils.AudioProcessor): audio processor object.
+            meta_data (list): list of dataset instances.
+            seq_len (int): voice segment length in seconds.
+            verbose (bool): print diagnostic information.
+        """
+        super().__init__()
+        self.config = config
+        self.items = meta_data
+        self.sample_rate = ap.sample_rate
+        self.seq_len = int(voice_len * self.sample_rate)
+        self.num_utter_per_class = num_utter_per_class
+        self.ap = ap
+        self.verbose = verbose
+        self.use_torch_spec = use_torch_spec
+        self.classes, self.items = self.__parse_items()
+
+        self.classname_to_classid = {key: i for i, key in enumerate(self.classes)}
+
+        # Data Augmentation
+        self.augmentator = None
+        self.gaussian_augmentation_config = None
+        if augmentation_config:
+            self.data_augmentation_p = augmentation_config["p"]
+            if self.data_augmentation_p and ("additive" in augmentation_config or "rir" in augmentation_config):
+                self.augmentator = AugmentWAV(ap, augmentation_config)
+
+            if "gaussian" in augmentation_config.keys():
+                self.gaussian_augmentation_config = augmentation_config["gaussian"]
+
+        if self.verbose:
+            print("\n > DataLoader initialization")
+            print(f" | > Classes per Batch: {num_classes_in_batch}")
+            print(f" | > Number of instances : {len(self.items)}")
+            print(f" | > Sequence length: {self.seq_len}")
+            print(f" | > Num Classes: {len(self.classes)}")
+            print(f" | > Classes: {self.classes}")
+
+    def load_wav(self, filename):
+        audio = self.ap.load_wav(filename, sr=self.ap.sample_rate)
+        return audio
+
+    def __parse_items(self):
+        class_to_utters = {}
+        for item in self.items:
+            path_ = item["audio_file"]
+            class_name = item[self.config.class_name_key]
+            if class_name in class_to_utters.keys():
+                class_to_utters[class_name].append(path_)
+            else:
+                class_to_utters[class_name] = [
+                    path_,
+                ]
+
+        # skip classes with number of samples >= self.num_utter_per_class
+        class_to_utters = {k: v for (k, v) in class_to_utters.items() if len(v) >= self.num_utter_per_class}
+
+        classes = list(class_to_utters.keys())
+        classes.sort()
+
+        new_items = []
+        for item in self.items:
+            path_ = item["audio_file"]
+            class_name = item["emotion_name"] if self.config.model == "emotion_encoder" else item["speaker_name"]
+            # ignore filtered classes
+            if class_name not in classes:
+                continue
+            # ignore small audios
+            if self.load_wav(path_).shape[0] - self.seq_len <= 0:
+                continue
+
+            new_items.append({"wav_file_path": path_, "class_name": class_name})
+
+        return classes, new_items
+
+    def __len__(self):
+        return len(self.items)
+
+    def get_num_classes(self):
+        return len(self.classes)
+
+    def get_class_list(self):
+        return self.classes
+
+    def set_classes(self, classes):
+        self.classes = classes
+        self.classname_to_classid = {key: i for i, key in enumerate(self.classes)}
+
+    def get_map_classid_to_classname(self):
+        return dict((c_id, c_n) for c_n, c_id in self.classname_to_classid.items())
+
+    def __getitem__(self, idx):
+        return self.items[idx]
+
+    def collate_fn(self, batch):
+        # get the batch class_ids
+        labels = []
+        feats = []
+        for item in batch:
+            utter_path = item["wav_file_path"]
+            class_name = item["class_name"]
+
+            # get classid
+            class_id = self.classname_to_classid[class_name]
+            # load wav file
+            wav = self.load_wav(utter_path)
+            offset = random.randint(0, wav.shape[0] - self.seq_len)
+            wav = wav[offset : offset + self.seq_len]
+
+            if self.augmentator is not None and self.data_augmentation_p:
+                if random.random() < self.data_augmentation_p:
+                    wav = self.augmentator.apply_one(wav)
+
+            if not self.use_torch_spec:
+                mel = self.ap.melspectrogram(wav)
+                feats.append(torch.FloatTensor(mel))
+            else:
+                feats.append(torch.FloatTensor(wav))
+
+            labels.append(class_id)
+
+        feats = torch.stack(feats)
+        labels = torch.LongTensor(labels)
+
+        return feats, labels
diff --git a/TTS/encoder/losses.py b/TTS/encoder/losses.py
new file mode 100644
index 0000000..5b5aa0f
--- /dev/null
+++ b/TTS/encoder/losses.py
@@ -0,0 +1,226 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+
+# adapted from https://github.com/cvqluu/GE2E-Loss
+class GE2ELoss(nn.Module):
+    def __init__(self, init_w=10.0, init_b=-5.0, loss_method="softmax"):
+        """
+        Implementation of the Generalized End-to-End loss defined in https://arxiv.org/abs/1710.10467 [1]
+        Accepts an input of size (N, M, D)
+            where N is the number of speakers in the batch,
+            M is the number of utterances per speaker,
+            and D is the dimensionality of the embedding vector (e.g. d-vector)
+        Args:
+            - init_w (float): defines the initial value of w in Equation (5) of [1]
+            - init_b (float): definies the initial value of b in Equation (5) of [1]
+        """
+        super().__init__()
+        # pylint: disable=E1102
+        self.w = nn.Parameter(torch.tensor(init_w))
+        # pylint: disable=E1102
+        self.b = nn.Parameter(torch.tensor(init_b))
+        self.loss_method = loss_method
+
+        print(" > Initialized Generalized End-to-End loss")
+
+        assert self.loss_method in ["softmax", "contrast"]
+
+        if self.loss_method == "softmax":
+            self.embed_loss = self.embed_loss_softmax
+        if self.loss_method == "contrast":
+            self.embed_loss = self.embed_loss_contrast
+
+    # pylint: disable=R0201
+    def calc_new_centroids(self, dvecs, centroids, spkr, utt):
+        """
+        Calculates the new centroids excluding the reference utterance
+        """
+        excl = torch.cat((dvecs[spkr, :utt], dvecs[spkr, utt + 1 :]))
+        excl = torch.mean(excl, 0)
+        new_centroids = []
+        for i, centroid in enumerate(centroids):
+            if i == spkr:
+                new_centroids.append(excl)
+            else:
+                new_centroids.append(centroid)
+        return torch.stack(new_centroids)
+
+    def calc_cosine_sim(self, dvecs, centroids):
+        """
+        Make the cosine similarity matrix with dims (N,M,N)
+        """
+        cos_sim_matrix = []
+        for spkr_idx, speaker in enumerate(dvecs):
+            cs_row = []
+            for utt_idx, utterance in enumerate(speaker):
+                new_centroids = self.calc_new_centroids(dvecs, centroids, spkr_idx, utt_idx)
+                # vector based cosine similarity for speed
+                cs_row.append(
+                    torch.clamp(
+                        torch.mm(
+                            utterance.unsqueeze(1).transpose(0, 1),
+                            new_centroids.transpose(0, 1),
+                        )
+                        / (torch.norm(utterance) * torch.norm(new_centroids, dim=1)),
+                        1e-6,
+                    )
+                )
+            cs_row = torch.cat(cs_row, dim=0)
+            cos_sim_matrix.append(cs_row)
+        return torch.stack(cos_sim_matrix)
+
+    # pylint: disable=R0201
+    def embed_loss_softmax(self, dvecs, cos_sim_matrix):
+        """
+        Calculates the loss on each embedding $L(e_{ji})$ by taking softmax
+        """
+        N, M, _ = dvecs.shape
+        L = []
+        for j in range(N):
+            L_row = []
+            for i in range(M):
+                L_row.append(-F.log_softmax(cos_sim_matrix[j, i], 0)[j])
+            L_row = torch.stack(L_row)
+            L.append(L_row)
+        return torch.stack(L)
+
+    # pylint: disable=R0201
+    def embed_loss_contrast(self, dvecs, cos_sim_matrix):
+        """
+        Calculates the loss on each embedding $L(e_{ji})$ by contrast loss with closest centroid
+        """
+        N, M, _ = dvecs.shape
+        L = []
+        for j in range(N):
+            L_row = []
+            for i in range(M):
+                centroids_sigmoids = torch.sigmoid(cos_sim_matrix[j, i])
+                excl_centroids_sigmoids = torch.cat((centroids_sigmoids[:j], centroids_sigmoids[j + 1 :]))
+                L_row.append(1.0 - torch.sigmoid(cos_sim_matrix[j, i, j]) + torch.max(excl_centroids_sigmoids))
+            L_row = torch.stack(L_row)
+            L.append(L_row)
+        return torch.stack(L)
+
+    def forward(self, x, _label=None):
+        """
+        Calculates the GE2E loss for an input of dimensions (num_speakers, num_utts_per_speaker, dvec_feats)
+        """
+
+        assert x.size()[1] >= 2
+
+        centroids = torch.mean(x, 1)
+        cos_sim_matrix = self.calc_cosine_sim(x, centroids)
+        torch.clamp(self.w, 1e-6)
+        cos_sim_matrix = self.w * cos_sim_matrix + self.b
+        L = self.embed_loss(x, cos_sim_matrix)
+        return L.mean()
+
+
+# adapted from https://github.com/clovaai/voxceleb_trainer/blob/master/loss/angleproto.py
+class AngleProtoLoss(nn.Module):
+    """
+    Implementation of the Angular Prototypical loss defined in https://arxiv.org/abs/2003.11982
+        Accepts an input of size (N, M, D)
+            where N is the number of speakers in the batch,
+            M is the number of utterances per speaker,
+            and D is the dimensionality of the embedding vector
+        Args:
+            - init_w (float): defines the initial value of w
+            - init_b (float): definies the initial value of b
+    """
+
+    def __init__(self, init_w=10.0, init_b=-5.0):
+        super().__init__()
+        # pylint: disable=E1102
+        self.w = nn.Parameter(torch.tensor(init_w))
+        # pylint: disable=E1102
+        self.b = nn.Parameter(torch.tensor(init_b))
+        self.criterion = torch.nn.CrossEntropyLoss()
+
+        print(" > Initialized Angular Prototypical loss")
+
+    def forward(self, x, _label=None):
+        """
+        Calculates the AngleProto loss for an input of dimensions (num_speakers, num_utts_per_speaker, dvec_feats)
+        """
+
+        assert x.size()[1] >= 2
+
+        out_anchor = torch.mean(x[:, 1:, :], 1)
+        out_positive = x[:, 0, :]
+        num_speakers = out_anchor.size()[0]
+
+        cos_sim_matrix = F.cosine_similarity(
+            out_positive.unsqueeze(-1).expand(-1, -1, num_speakers),
+            out_anchor.unsqueeze(-1).expand(-1, -1, num_speakers).transpose(0, 2),
+        )
+        torch.clamp(self.w, 1e-6)
+        cos_sim_matrix = cos_sim_matrix * self.w + self.b
+        label = torch.arange(num_speakers).to(cos_sim_matrix.device)
+        L = self.criterion(cos_sim_matrix, label)
+        return L
+
+
+class SoftmaxLoss(nn.Module):
+    """
+    Implementation of the Softmax loss as defined in https://arxiv.org/abs/2003.11982
+        Args:
+            - embedding_dim (float): speaker embedding dim
+            - n_speakers (float): number of speakers
+    """
+
+    def __init__(self, embedding_dim, n_speakers):
+        super().__init__()
+
+        self.criterion = torch.nn.CrossEntropyLoss()
+        self.fc = nn.Linear(embedding_dim, n_speakers)
+
+        print("Initialised Softmax Loss")
+
+    def forward(self, x, label=None):
+        # reshape for compatibility
+        x = x.reshape(-1, x.size()[-1])
+        label = label.reshape(-1)
+
+        x = self.fc(x)
+        L = self.criterion(x, label)
+
+        return L
+
+    def inference(self, embedding):
+        x = self.fc(embedding)
+        activations = torch.nn.functional.softmax(x, dim=1).squeeze(0)
+        class_id = torch.argmax(activations)
+        return class_id
+
+
+class SoftmaxAngleProtoLoss(nn.Module):
+    """
+    Implementation of the Softmax AnglePrototypical loss as defined in https://arxiv.org/abs/2009.14153
+        Args:
+            - embedding_dim (float): speaker embedding dim
+            - n_speakers (float): number of speakers
+            - init_w (float): defines the initial value of w
+            - init_b (float): definies the initial value of b
+    """
+
+    def __init__(self, embedding_dim, n_speakers, init_w=10.0, init_b=-5.0):
+        super().__init__()
+
+        self.softmax = SoftmaxLoss(embedding_dim, n_speakers)
+        self.angleproto = AngleProtoLoss(init_w, init_b)
+
+        print("Initialised SoftmaxAnglePrototypical Loss")
+
+    def forward(self, x, label=None):
+        """
+        Calculates the SoftmaxAnglePrototypical loss for an input of dimensions (num_speakers, num_utts_per_speaker, dvec_feats)
+        """
+
+        Lp = self.angleproto(x)
+
+        Ls = self.softmax(x, label)
+
+        return Ls + Lp
diff --git a/TTS/encoder/models/__pycache__/base_encoder.cpython-311.pyc b/TTS/encoder/models/__pycache__/base_encoder.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..51ea66b4ce060e7a2bd528863f7dc2cfcfe8031a
GIT binary patch
literal 8120
zcmbVRYit|WmA*rA$RRltDan#ZNw&uinWm*kc5KIwwUa8A<k+g!Y^=Bron%6Dh7x6R
zNY4x{Tc85FaIs3^qHf`Zg(5|{+tzhpW8oiNz(2Y`igbbg90~)77(jr4MX>OnT?S5p
zAU_s+&J158KT`B+bmu<Kea<=GdGPmvfR8|$NzJ7`>LTR7uu@NSt?=~Upm3K6M4&Px
zPML2<++n_*aVNYT8CRB$(-hJ;Gmfk~?l#M=j3>*)nXEVN&HCa#vyIO9vuvC-%kE4d
z+ZJ!5h=ZIYg6Ad?7|}f!tZ(3+laL4SZ>4z1r1Aol?<G|20(F+e{7-PuRE#p*Cvunb
zX%$KiNs2j*woz#&Bfc%?)ZF)TiefM)$6p;cOI>GkDK(q8YA1k!0Yy~#w3JqPA)UmT
z{Fz)r;8Ti{7n8}__RLp(vrl(H^`GF<MDxPnvRq3fMG20ZUbrylO%jLtRVq%yR%yfk
zwk(ck^Ro#ht^Bud^*8zBzW;qA>1dhQ17)21se>ZqpQ*b<g?ByOgTjM)g_K%)8jZwG
zLy;m?Bz$9qA}Q5(-W&oYfl|HAhPgUJkRq9DjP=9kpK;uSRUb4~F1eednkB*c_fXw~
zl^!%!L$-lCb4?lUR63)I@{Bcq^$i(mVjhE53VBgB7~c9~yx~vg#8fJsOpB6g1Z8n1
zt-v_`Vj-0RI$F)i$yvic0iRTz79}Mo8{UGXTrP;>f(ZQLQp8M3ZimSYFV8142}R*~
z1+R<au75i@@~$Gv%E+~ZGCMLRDwot;e&j?ho4WSysgYzsQFB>d$_b(}GCh5EM3j<H
zmPfLXXOxkP2}R^<ZNvF%atJ0}*wn%d!}UW1phvk3**Dk88WZ@$bIY^Jgjbkwnd#M;
zUXAIk^zB-5=}fr7u&Ye}3e&%Q{&xjEdie1H{U0W^qmzF*s~!2ra`YWN`p#<f{gvqZ
z+6Nz&qr4vFSEK2bXj+@QRE}o!Xr|0$btbFTpQo@WvGF2ufys-<;CtISu}J+b+-K(k
zfx2XVUBz*l{D@LrxSo~f8az4IXe~8(uug?<SoirHw}CGOr{J=K%Hnsgh_d%gLiw3v
z>dkf@Ew#u@odRTO!(LGGr=H-^pP<~t9CAOrOfEJ&vY5(<05v|tnGq%6!SsSCcVa)@
zgyBmSq@<e8Nr{Z%%qIjHpqm)(WKO!WPmtTN;;D16L+-{pbydN&I8F|m-Km^BpOA%x
z&Q^A-HFf}nB0~nu7VNma@9t~Ac<twh?;QSQ@z!FAS_y8`>d%*K`>nThcIzs;V};#O
z8vXs`zfL}){$=X7Q;#MdpDMExIy<pMtpzvTx~d1auLff)!Pxz6_op9TC<l+|!6U1|
z(Usun=iAD`F+Dh@F=KKBmW@#|i!V1yv6RS)Ja71T9;Bv_!7|J9mkWuE)niJHJOK1Y
zpeCP5F_Yo>&nUSES{1zA@0q8T{ZKSBoL+ekGLWbGU)ABHC8|nZn)_VzIM94|ZXU#3
zD+$XqQk^xP{t=pN3WAC|Ybt_?yG#{9I|%5x2K(6|T<8$!o0PpR&b)L)r@%Zj!Ygb7
zZR$%o1piGs?)MVwPv{Z?qEBcOSs^F}gpe3WF+w{~w3%uv2+Z4U_}>Kl8n+;Ap@<dk
zqZ*oh#F;=j%#B|W<!hWI&bM@#=*MLN9J#bKE6Qn=Q)ko4u+PUqI;C=gm`Y2c!X<Or
zY))z!iTXATP|nr)9|qx=WJB}7t)hgO_;d~m8+D~>VhwaaDv}Ly+tO!IU~a!+OaFXE
zO~srt+9~o7W(eFMz$j=I&zYij-C{Zm)m!eWf$AL4E6BMPs{i!T@_{nL=?u5Z#8#MC
znHkiXL9PCn9A!gd<YM!r{|5y?V#w=hu5lx!=blP!uoa@GK%v-K$&Suy1LFv3>i_4X
zEzIL+&I2{4t>&`T=w{8f@5a7hyt_&B&~pzE{sH{gb_u6yS_Qz4%#`8J%c58_qYS^2
z$mRhp<b*03Oe&@Fh@pmWJ}vP8sWa-V;hW9n>t#kNWce&8<(OOE2TK^V#KRzYKa}MI
z*x-ecLT;W<NrqdpNSRhhJ}ZLBAgSp@#*}))XBrlXf{@NZSx!g_plen!+RliXv%try
zoSTsoS)*+>k<F&18Qz+|#zqEfj^qK-Q|TESbZ?yjYt)<^cfq4f)aJ6RlZC-nUi%K>
z9dI;d9x{Y_K<U-s_LboFHMVD&Eq$Pe_m|lNI(y)+90~O3?6y^Q?+UxO(!cxuX1)JV
zrF(nnsNOwX>5JZH^}d7k_ge2(y?40Mv!irc?-{8ENMPSr#1m-y8ndr<L_)r=31n3g
z@%l`ro=zec*Hc4&hoR*zNNK6wV|#-JcG%_jERk^tXb+Sr5KvPdcS70lWL^a`46Yv+
z0>H48pf@dn2?&rSE_AitS)=(4pjL2}>tsFH`Lp6rip!(R^W|Vv4@Na6YHt3?x6uCt
zy{Ke^JE20uzbTu407=AC%t3>d<WSK8^@fDDO5XyETXYnurhVNiQ;Y7RyXd%NP9r;t
z?nOtd95<~b!HIMWu&JW+;YN$cGeZ`cqGybJFtO-WQ6<(=(QB9ejlD1<P;Q{{ft=C;
z{bNVbTlD?dk!nGJMSqcb0aEzwvry#LNwwkj0n#*3r^pD-hi?0KhTW6r+gL@WNCQ&X
zuwjucy5t_)?54|$hc2>U5i$?G8*D<h$O5AH7Tz|M3o4kCq9h`caANkNC<q{~3J2oM
zCAf<TH95=WQd~g=uOb1wq%=ldJ<9F7I(>Dl(O@D;)6N4=(h0bOUaC__XMu4?VN=rp
z0Vm;YcrJ-z9^3s!TLC-;OXgAD<=3&FR%R1<5!~9m!DP~sl20T>!<hyz$DPRMMM*H>
z$#5nUs{AGnaAn1W6brP-g%4pb1u!InBaCJ73Ye6BSyT%$f?Pr}w`6$0Ek>v`Jh@Z~
zykW&)tx>#re#31w8$MW-ua(`kd40`0Q_zpW`)njj5Zgi1=L;%0L-p%j*xV|R&vc)F
zc@#HD&~;L6BWy_bZ>bW>J3&<<J4%AS>(FCX-!)#2j4z#D@94Y}UEX$Q&yr`|-*G#>
z;@_(Iw^kzD(&;ZdHr*LqPLz7?A1!Y?`1r8aF;?yv(>un1v?sE}R=PKXsps8oW=j)m
zUE$^KpI^9h0g93S<#VOE^5(te$UZ%?Z|QWUtNZq4{9E2t>eYG&wVpw}YiMbr!iHDb
ztt;%-lDo`CbvCM*&zC*@cdnKCAMMk64wick>OBWxxvt3F4}bAt>HCkPw?8a*y{UJ-
zx#U~-cjL*%skO+KPkT$JAB{d%wa8dGGNwnyVA)_qV<ILHO&L`3`9S(Va9aRDR1<>g
zPM6%SB1qMe;CQ&fMPGDE=WXL$ma`@YRRp+NvYSXS=5^Z`x;Cc26|8^)f!<85ihd)g
z!)!gRASHNO=UxEFwL{5|sG`#{2V67at0MS8R#3-^+JNt#71i0CVE7Uk2Fha?$Y5(U
zR*iwz97KAu3tOjWMQ%XhI4K9FsshrUfrt?Y0fu}|Rz(3E6c@J02>dc$h>Yfo!I~B>
zpHF~bDTa6aYEsOjw-$3*qW3-$`BB_VD;Z?Zn*=KuEa9A_;H{u|SdtMy{oUwXh_p(D
zt?METQ;LvXClx<?>$ThGR{VXMzi%zf-G5IDA1a3r>ET05r<P7t0&Ta>-7c;Kc4&be
zYvEnx@NPZ4yV>*cN?<?>46KD;xu19_J&x<cZ~bMk96qOq&mqy59erB=@z1G0yUYC(
z<&Kkj$4QMnSqX+UCT#NIgz1Qa)nWvbvtsg6J_nElmjM=+%Y+vO!7N!263K?K2m8i5
zL|%OQDdZc%3XWDTZBdPs{o5Da4;>qz+~Y+9qJ_s{g<2|lS`AeDOcZGid_m9vpyokC
zVN*n!-$B%d8X-hQQ@oZ^j-5nKf|7EAie@0BQTO?vU!XV61iNd!h595L+zwF17=fxK
zP_?$btLU)T(uVVUUwHZr%AO)0d*6fL+9LBYBQe%{o|-3Do$rx(sz~4Xf#5EBt`K<=
zZj^rEjkcKV8&4_?f5)BmE;6>e@)Q}nDyPUxcIcGjMf&>r+AgP642jedm?&@}`_0@q
z0XNfP{M2dD9a5Wq2cjk_Ei>6qWZ&B&{rBAp7$<vRCm6GNCfRWng0}RHO|Z(}KnuHZ
z%9~-{7Ne>;{WJ5DIdoFZB%5KlqEqn|Y2Y$%^D0t~v#<j+jHbY*$QNP`0J;QtP&qI#
z0J#i0nE-(|XcPSwBHZiT8H{s+v!2OZ<75$np|U6pa|;}Fy`7NJSK_Q-00!5gmR>Mt
z8kT2aFAD=ecA}1p0KEx;OV5B6FACg6F_{1q0lVv{5ynlJK$;f7MzV?+8-lQzD&}MD
zG76w&3%LdHN*b)Bpc%kdR50nxn7%>hWi*_aN%7jaEJF;KeWw7<TvoJdz9y-RwyFF+
zWQGS2IH9VtoW`atm?_xqLU5BaSY*KSgTqNPF`tF~sDov+;LUzHizPg*jL}^%>2OO1
zEt-4qS$YJ2bWjj%P%T)=QV!NBnjpK-*Qy@OBQ`s6C1od=#@ESTJ4k!~@(=!F;EzL}
z7s^M^=ts|J@5Z&zg>vYE9=fnZR~@Xkufm2_S#E{pN?oP--)<?hdv$iN#_nBX+qI5Y
z%IvVt4r}J~%~~k@t065ItrD-dubF|*1hz|{ztY`z+Yg~at>ZwMJ*cw>HTGa7via9r
zf4Q|B*`r7HES+A9>?$ohI{3Kf53m2{KCs(#G~Jpz=q<BDIy<DXLzPhPZD0NG8%Who
zSQFo2!ggtXu5{tixHfPE9th|Kz#<GqRztCsQ0)G|Bfq}qSUGfD4;|Oc=bN=n{lCg+
zorAc|35e@8^R-Zy)_weOp&T05L*v>rkGZrH>2Ylz3A9(89`DihP(%xDtAzTN3tDI=
zJZqtzWw*v1eLPkUjq0IMEp%o*+`k&$vl8BO|FwVL|LBT-=v;aK5B2>&EQinQ;q#!N
z*{)S~V1*qhiT5Y<U5Cr;5uH7vu}9YZaGy@^D{wDAJN1)OpG@AGT$)798{ApxANcju
zFQ>}=Lwf(vZST52SS1ecfSt0Z@6Mve?!>2ar}c5XLzT|{WwA8zX%?<QV+SoouIvSF
zxwmJuo&2f2{RHj&)ArHG3F7>M&`|jz*mq)(d@<NQ*6sY_SZJ)>`R8^T$}wshESwWC
zQXYeBdfIZwT@w>i6S0uI3p=8i?S{<o@Vt;q^1KzlMNiujQIC}4AuuACE{l8t+fjX+
zGG;gxRW@7~bGeLs0~=$Q?SZT{%4>$?FhlH8;Mc{CQC<dvyiAp)=pp#z&t~pk8ln@8
z>@ab6UVr!I2RHbeeAPkG;1ATZubjl)Rd03M==$m2Dye4;I&3)Rr6V9DbY%I8`P!Im
zA>DnI$d*b+ccrbf8a#m1kgbQht29>Jq<3@GgC&N5gn6;#Bb_}}KbDa6<(X-vnSsk#
z2gJDwS@`K`5--yHf#E2W(1ODsO|6?pi`3|V->-)2zg3~pD=L=7i*}o3_Vyr2K}~0r
z;TiOo(@EZ}nTTny&E9m*gk-}F`T-*PR#+ZC*k{Dr8~#M`EprUu54vBMe+pGJz7))>
zPKu%`q(iGe6~bOO|0<+it3MSI(d<u!Y|`w{I+@a5{8Y$3&Hhx#PR;&Q-98F`4tRck
gd;6W(S7zVWtrWd)IVku-&(Ezf*!T4dD9juBe}-5}ga7~l

literal 0
HcmV?d00001

diff --git a/TTS/encoder/models/__pycache__/lstm.cpython-311.pyc b/TTS/encoder/models/__pycache__/lstm.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ad58d0ea63242a1b35a56930408c3b86b0972ca4
GIT binary patch
literal 6648
zcmb_hU2GFq7QW-(apEL)a6;?^;{ZuxN)p0ump~~kflZZ?1X{v&8*O(OJmbWv|F|<Q
z1RRzJR_masrBzmBE2R2i#Rky_9??D_p89A<u40W83GJ#6>{C!!3H4?7oICc|4uQ5(
z?>J}fy>srlf9HPZoST0%G}IADQ{kC#e*+=^#zHw+OPP(&AajdIMB<`k65pmMiE)!0
zhk0|<6f;knIbtI363KF%NLHDfv6O|}F%xnR{`53yWn4DkvcH1M#<+Oka>(XQ66_Lp
zh6L-Lpuq7Ur<vk$p!GwdBL6lXN=Pydl{#ZDQ>n1A2Qq(wX~<-flgOkgL&BWIUFRpw
zlIc2`v}DL7Zqf>(&06#5nTfFvBkJ^NnwXJ8Y9tX48506AxG*@Sjei08D|d^i9ArxA
z4!rM`3nW|h#i%5;JRC`LX(E(IO424M8B3NnL&@k}t*=O#v6b`C-Xw9~m}-{_+S$t-
zHSPFvE%aoT%->jEXy+(%NCZ~IIzE9*d|p&T(?U2x6?OP7r&-QNL?u|KS(Q{$rW!8@
zk$6NE1g$O-Po`8siCmVo`ss)y$#K2lNWyxuEXC<*7F=h|5>?cgW*d#fWsz#OD0>I3
znnjVLVG2q~Xd{v)Y~m1vP*hYDK~QiP0+6o#aJ>JlB2%S*R#c|@PsqwAY9iTxJP`}e
zo*n5Ar4%(06XKv5O8>;fnSNQ<lKvRfM3sK*y)QXSognUV`-?l@SDqKHnu5CZ{2E!~
z8|Li`_Eo-Zg>TFAz5?&d@xG#OPsXy)RBZ3gSPFbw(Yp(ZjccCP#dnw9TdsfT$$5tJ
zp5cOL80sBst!;~uZ2j^_56<RVkLO#D7g~=)eSHR=4d{l142^&SE*So8ECRVj(qsnS
zdisevMzZAwHSaWc2XyqFp{IX<Ugu1Vn&&|Gvelz7TBc34tw2k-@t~QyfkSH!M@3bI
zT~CTsjL9nO4c!JGf)?t*@7yJgOK6;+pjzV=(pIdthZFRoNTtiZEq0;w%>|6gTR^Un
zBJW&qtnzzS_&s^PyTEtn`0k>o?bgVbBTM2Jql=^Sjtn>do6NDyg<{jT&#&CLvNSM%
zb>V7`-$A#7gu6D>**FXkbyg5SA5u}wI|boFN{s4Xm|oBwIKGCG?uF!NR1ofRv;_x)
z3-=D20LAmj2p&6S6bPtF`B^vFtS7j2@)GJYTm9?gRfG+iiAN<gQh9rR4#2gP7dVkp
zt1zw%_$V9a!u0JwkohGrPOXwYgOXW7<Vsr@c7Zb7Lxs@mt1+*-rIBaY#eK8MJOJhy
zs94jZfrNKJeg&U_-#|l#br!%nO9gebx_BxkM8#RuVdSI;jX?)R9M*LM=R#30nex+S
z%%QexmJj67vzmp<(Nxf;YraE=Mkx~0cmy7S<rpktu8DPq^`#z}hT(+plmxEnYQ~Vt
zzvUJhV1){<{1M1Y!HU6BcJ{$^?#%gIQq6f%c~7d~NmZbUU+#I}$+aHMw;nCD9z|$k
zZN3;;96_cGJWMBRaLUk|GEQY-5HF@zq7K-<tugJTXk?Z+uhB^@GS!v<r%dBd{s*Z9
zRx}9hs?mx1u%ZKqX48#VEle>ixV&TQ<u9QW!iDlC5I`yKj<3C6d9wq5>$u%9-w25G
zW@h;2F+`-<8?#HEuiL+B&;D+C=<kzx@BV^!f4*s;&@_<a2lV6a$HDAIaaB0eg_TGU
zHkEgQ)ZmN-kz@<vG+7U<lUEVFPm~;jKO>b->&9WIs`jBAr1D60%%iejcYsF)uG^$V
zs+Szs!KY%Ll?_(jse4vulCt=TOiSL{69O}Pf>`qe`<WOX4;d#C%9zLcjS~QLzXT4Y
zt#Ibcph?m;D3#|&(sn3SFbMrH18=6t9G|w*3u!)UAPkJlNGkd#r39#E1`7#tE3R*?
z9&$uQQ(5K*1iTE6crDKf@^SZ^<0th8e%z1sxA2;Zs8mWb%<acQn*R*E3Bm9y<J;BB
zQ9{oYB&RKRHlwq_Un3R0HI_D?Z|DJ?IQ<)`;6H7LGhMZEWgZxpy>>M~0)i|{al3?L
zHp(EQYBkGTu%3{uU_FK_`uWGR6l|_wxYm|ZWPt(lMR5BC1Z3Us*BbGQnxNqMDoHt{
zImMI|NeH1tJRF&#`#_Lp6O&0fE-}-rS*aMGl6C*K?u>jPCCAl>7zMvpw;_1Aw8oLR
zqKfg5Jf5Jj0ZDU4O653|wVhM4TIw<;M_IQjO-z9+rTe~GUG;c$uUnTMtY>D1?uAa+
z*y!t+>qRn%<PZ`(#q=l=L^<7y!R=%=hLLJR!(8IpBWqZh-Ox%2z-j{i{MSPqjaxaz
zJ@245+*S1N!*_eJqX*yKVp{;LNW)|Iwv6p@{cdoK9Ot;Av-$I}8)M)3_JLc>KkUi-
zP8NJ8^Ul$Nb2R51U2E|zy}kT?zP&Hs(qCxlUng9{Y4E9#-sFm|)>{o<Hhkybf4}X)
z;KSeN{o@7yc;0oY;5wCKzcpXS?cU|qoNs^T#KOtU$?tvr_Z`Uk?Vs{Kq2Lp+`uh%l
zp`&*>lJ6KSbPTewKCW*n)T4Xm*j@DRTJ;aD_=g@I$@@<i{HLMGsm!Uz{#}@P%sW?k
z?+WkD^X)L#T=}QlNmRRXLR?|ER)EaU_vZl{#+Z-J%|Yas<~}8~Q#Fu_RGqMFRc*O*
zI9(>mbO%uIo`HlMS=cL?nWUXBkz`<DMGi`;J*OaNyyQty1evKavu3*}N2aD#@P$zM
zG#5J9Qgw`g>!DdNiqsr%!0?Eug4TFR{4^p{h~Q{Uj0#}4SrB2t=9-m-7z(xo&DH&*
z<50M~6AkocwPgXSepH5mNl}1+iEiI{`_0w%gDdR^^X-QU?T5g6I{FxyKbbjkv#Z$D
zvgFBJ&0Jk`H80v%U7ahg&TQB6w!Evi;Ofn>U(xMdn#j33;aB47UU7A2QwGoTbw^pM
zt_?WmazgJE%NzS(KWgt2W(=*VMYA`+ZmQ+>@SZVtvVyK#?>+OjL24!DLs<|}8v9H4
zrn#-HoMhg-+km}>Z(TJUupG6Vb8yFzt+wn2^JdPhar<DfKt?i4mRgOS<0LCSwY40H
z*Y73+M6p~nT{3?}E^-|C2<8Xq|FgJZw1$nW9>?a<VvIavSVQWB8x`p1&nC$hHdPEM
zaGB_9HT#8bJWDf%)yx_;viuF-AkhN1H1=A50wkKjn(^Wl*ver&at<WnY9x>d2h?df
z!0tEt>gpJx135;gl*2{|YcB<QCgix1puxj|55$Xsi{hs;c(Ad65}AsN(LjO*(B_ql
zPaurJywVpK2?tmh6No4Q(P*?UaIRvO&IMv{?GiXI2hL5<lzgtHTkKj*1VV`zTocJ`
ztkvBl;=M5Z-f(obucle_0E2))&-p|m3M2k-S_VFfqDBw&>gWJhPD~!ewIExmn*Et+
zF)44Fe{bN@;lSaTs7@a~*E7B^Ft9H$AzX>X=Yr>O?p0q-2v=~3a}@?-Xv1(dW|!mQ
z`KT<>gRoss@F0%UexN~<#w9f~gm%Fu&B~TXv&Rzx*m6m;gi?~InZ;OA<HeMk2#Jcy
z47A28;5NvYWq}TX5c&=f&7p#0p(GP<(Zx(RGv^2yT3t95XLpp~WjKswDBZQNXhyS_
z)=uN3t%4bL-?9lSOOebN^q{0c!@$6|lE!TdAI}R5!aA`!040H}xmy=~i~d!2&x*U}
zeoM~XlXo91xDNt)HXx2R94c<xUf9+Rafkc$qNnB7dtbh{bm5D!#j%@X#kP*Go4#sV
z=JIWO3vGKLDDt$edb(CTU0E^j=_z=6Ry_w+JO}O%+=udD!83T10}w6AWx}6g56_Ig
zD!p#XJ%9duQ;l9Um*>GI^Au<F%$QqcmYZ@+T1`ux!ATn2pP#&Oa*3NCUl`ADMRXf_
zz&&wzi!S%gPWF6ovulyh94pp0<A=A_TrG=y!L@tU)w|;AT^8>TJm>_Al6M^`xQ+l@
zlPkk3Z@?}rb`K4j|9Px^sMGqW#R>UGotB|4$D=MY=6g8I2U&j5GIYfL=xq)t*xFw~
zTl#AxC<Vf+AU6@T)16ooK+=f>!H6D5g0U;zg=9C7>ag<wRwBW#N(xAI*vXUTmZIBJ
zZ1Wa7yNcU8iY?xCcc-;uoxF?;b5<*uS%WmTTHWj9M~NP!;y8jleIH)Bv%M4UcKb?q
zyM6lIZr^$Mr+`qZqv!`|wlQ|gr(e~fFU}@(m*_$<p8kP9qTd7_r2-T|^@;K=kaaW1
zaYf?1#-1Ya<Z6G9$-!LhZ{6nPz-fGuEH&TuKE?WHm5+W02f_J^WNGO3$WyF;R{7X=
VjN@RXwx&z%PuZ7eFZs@9`hT)th0g#0

literal 0
HcmV?d00001

diff --git a/TTS/encoder/models/__pycache__/resnet.cpython-311.pyc b/TTS/encoder/models/__pycache__/resnet.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b52f73ed0643b7f44728107f726b4b42e70fd3de
GIT binary patch
literal 11976
zcmcgSS!^3gcHKOiR7)at(6%hels&Rcua1$%<8yX)<V!x~L-HI>Hgt>Kl4$dgx?8fW
zp?00@0HFl!SYv2m4&Vv6gMn8AJAo0*0?wa5g1|t2*h(iN8W3P$;{g6xK!zvCV1APK
zs>x=z4%-VXQj}hIRlRyu)%EIKRlh4Q_fn9C2Coigwoud`FjGv<!pqCg;pGm+Q#>7`
zF5}Y{qvG^sn#Ol~%oev_w$qf2>Y;eYXB6)g=&Oz*k$ZNEx)1+)y6hxaF2HiX0?S3P
z7{Kxf_Ei+zJl#iyyw8zfZ-~}xi3Gs1!<-}>OGJ{qAVvzEkw>xXmpdW*w=fNXx=i!b
zWgG8EQ-d^b`^<CM&f7nuE<=8TzU&0zPR-qS><o895T7&9DpI6@1hD|~FR#!P^&9#Q
zCBw6rWLxo#qEgg7$lW(evc_8phobI5=DtxtQ?lF0P^FK^)Ayj4`$kVGDqLI~YYSJg
zwS|sBGeyJCHVGrKj?IFNGbD+(J+NE*wcwel5}GOc6;wS%s@56b14g-TFbm9!up{E~
z6s&}!_c>0w|65vfM-wA4E+T}=G^aE+B8VbRR%629Xd)_y!y>MV=I`dY5ji?8bdL|6
zPbOnsyvA_jL*Ws4*Svkg=$Md@qg+gLorxv{PShM1g)^66y`w|%WR%xzgOQL!b4WsL
zP;*CyxkN&UX&zDF$0Bkxnb17pa3sb_QaCK(sDr`a%^#0-T#^J)>bSv4!yQKj>6)Az
z={S;%58k+RvLiAk$;o&)0gNei^!N952s)E?#Gxc6b%=tL5ajle8)7-ooAebJue~@e
z{BF`TfFEvB%S`#NhvsC3saKi$C8l|iX;zpPm1)T_Ez6th)Xl967nIE%>gJBLW43&`
zs%E}znXQ;TIX9HKz7SHjy`!*uRCW)R^<<9b*d~Q-QrV^gPIikD*uBtG!Z|g4X7<br
zkF(nQJp}-0y3_D`34@^`W^TrXAbaA!;N=dLqO<T^o2)DUm+<|jY*Y3r$CM*QWewgo
zw*!u{@csIGC}m68t(-UI1d8TeYY<I2-;^Rx!>)rdmbeqB(kbU8>^$I1Td#;qLzpEx
zDTtMj(H!GZ;ksCZ<u0)nOXvxW9?<BBMu#=}hJ;@Vcm|RP(w5TPgGuo^C-Re(#dR+f
zR70&a0N^H-uMOP!@b-sad_4d0v^PzsJJZ~pKhOGZot-(GfhDY0+4?26X_0NpHf1j;
zY>Udaq-jLxndzD9o<22u>ejiLbD8eUs0E?YH`h5mmS)8|=q5yqxSvBV+)xQFE`-CH
zHyj2g9gE>R8xD_-aWTDyu&THN@U`;7euxOMSUCJGEpCQV2^aGNGDEUS2|%$TodE!h
zT>P!r7^ioI+6dG*oV6>|s{k20As+$CMk+4-%iU095&AR;dlwP>4iNkf-U))=Nd&(O
z7S*k>eaAqPL?ee|$;dUb!oB3fL~!Mb$<P7+02BTH(g7&Bc{*k1ZM^+~;~tFXzA>iP
z>w}cd%J<O1klz-m4jiv1*j3OU>CJf$c>cbjaiFcCBP?x8<5}CAA^$BcO48o-z`bUK
zWyP_`xLFDb#^to`b{G%JPLfg}OTraK&JH=;nIsX7!t_2z?jTEh9CmNnH9-U!AC_Pf
zYc5F^qr9NGM>w93CWbV}K$Me0cCihcbRc*eK);CN4*5hDOOVI(9f5>3*O6pm9Mq08
z0?$q~OytOLZ&HjynSCJ9saJI2dzV;+EgYf{8`HI!O%gOOpS+%sxcEp+5Zj?X<km$A
zLlj^_kTe%4LU@u|@c?$e2?5!9xT-|J1aImakf@bfGg}+Fr2%lI?*J5ZODuC#VVYE?
zX^Gjr$m~{_R+VYZF|EtBo8~{sOe_Q*U3fg6`{?6bn9tP;O0A&Q3ZNCd%hk2>?`L-^
z)giSy1O<%VU_3kisPi%RM99_lE4BS<ZU6enHG%n_?9p6JNT~^_HAuj-T(u>$GaFK>
zTGgsnC~)WN-hxMEK2Q&jvNZh2*0-o3{QUU;QiN@ZYG}#^Y5)q>rqzmN3K(Sr6|~9(
zBA907XAlP^WKmWG6UVbAt&3@FJTv;bn8rqY9p+VIL;iIw%+JVK+O1Nt@XhRCA_CX^
zN#DfYM}1580;_fbad97lJqX@KuouC81jwu*2jOS25#BWh?4KwPnnklG$K;|-rE5o`
zVJ2%VtXoizyP&Of2LPzY>Y6n3w4&<P)tRdq|8#sdp0?*98=Z+}?9++agq|Iq8J?5$
z+9GoGbaFPirWV@{%?!;&rem|QwEbyW<?PRLWjphmw%+;W?O$d`l}#aaQz(7nDN~l7
zn66ygS!Gq)tLs#|NrAwpc?-rXHY3;xK=TL_BV0lPtFoj|#TKk4voq;XB4H4qrj{-M
zFzL{|&wumeio@e<SfMro0iUyag?bgx6)E-+fqx?v<^0Q!p=gaF1x+*rF5lNQK<C#q
z@8aF?V+I+bU_C^^dQrjhl|1_yec8pAL(a|n_zK7Y7Wi8pP`iGu=At0=3Uc3wz=1q2
z_;Hi}b`phP94vMYjqhM`FencTL1H-vPjqzmoey4*%EQ1o!2y)A!32m|E*72C(W8l=
zq!SDZ@d1HI^!8x?a8xRgxXwwz5ivQElmtE)O&kmk%kqeHu%m+$C!*u+NpYxy8<0A>
z-hTT)d*`0cci-`%WF;O3G#C_<@lul^7|7T_dn6g}h{TfP92e~vPfmbYBn*UQkppK#
zB(0x+0ADf+&NA+UR(OAo8~-`(O_B{co(J*$2$tSNKmjLcc9`t(-iV<^p~4IQMBM!y
z2!ubs6_NrQI&6q9us*1;A)-<)$QfcN<%XOgTv80=4BG;<wb@!gv$l|~;`m7dtH#0G
zN-CbWqwNhg4)kELI!g9{nGPyposRQ?%PNAQZ&ZII$>ugC3&GO!n%0KxU>zaQ^%d!Y
zVD%T@%}*J(dm*iA4K`e9w$|_lP&UT-hLq_NEwBzvIo|z%v3f8-9i0=B-jKS-Lfw>G
zmpb}9e+qTGg}N!XF7-f>x@9fV_91DtQis{1Nu<|NC1ogXg}IkHE~3CQ#jN1~6uBfN
z+&si|!bV<3C09r(bF9WY34Cg$Gv%?2xW3rVQi|3XNx&_pFOl+~$H|w$j$p04Vrgyc
zf_p&Yet{B2S#VI}lEjx;$7jJZ#=)1ZJCawAW1G>3=Rvu(mo@u2Sj2_zXGj{p;u<lp
zM0UO2r@V=f1-r6<UAZ1MPre<rsgzNk7838Xj_Yq|%lx+&@lsy!nqVxY7#^(u4x^UJ
zns#`xbZtbBcpZK{S|H-gbTMD>DbT<tjzz(zustUzr=b{Ja#D-$#0M37^wKtXKtRmS
zN8_5kulu}s5(*~$mlC`%2zrJO7D7y+X5B$Kj08{)YHXn<EZ-OrG!HlwV{#bDcw)&R
zh@(MPs~D4nu$&Yl!(q^#5sl@>_-HZ=-ppWhNMk@>Ud1LP+$MG+X=3uBC!^ba@8BCT
z{tl8pBci~`LO6!;RN^dYF7n=4cy<+@?-ZVUw2G4nN#+s}0o|rfUSp$$JnDCC+mIj^
z1{g=j4F)NT$sv)8>!CT#YZ|>hnvg}5)w*8>#7YhsWZZqpK{?J%XdX_MF)|1tKJSNI
zY)m*Nib+vK$xh-q5C9=zMly0K#OfyOd93ay)b$FjEHM@j4@P4$1O&x%SdHeIco9J#
zB9?)*66Z#Af71;a@-d0)hFe&7-3T=MZw>3&M}{BQ4GkrX>VjGL-vP=~@$VJev*5~w
zR3@~<bS*Mn3bRLL_T-p7PwRK39V)XWzhyf(QWeEVBRE+qvo&9=-5iA4@@K^F8_oV?
zf%~TY5%+}7_4Vg!FDbQ`)Y?mc;9XwNbtBFoX`)njs@0u<$UNP=Bintq0*U4WbxVPk
z#X!s7vk$L5Zu&0qc<{-Ee~v1F^J?HcAe3!@ko)+ra)A~l5LN@>KR?>D`bM?BZ6T`E
z?^o;hlTQSezgpI~LfJh#@{QY<8V@fv9)A3u(s*8NJfA)~doF$MY2$W$c^24`dFyWb
zLZ?#Kss!59KwG-(*_N$$8}z{HwQSRTBKxTl=z4hM@r4`<5!N1+?IBUv>65c3@ufEd
zvDs}ZyG?Hr$sEd#-hF@Jw6b;IswSs0ojJBaVH;Go;YFFN4q~$a;Dc1G_$bz=mepsS
zi)GEZvgUjM0)mB}(>>{)e7RpOZ_G9?mbc`}Tg<f&FDQY%i1MteKG*QJQq`$eK`))p
zAc7pq^khF)s@m16cId?Ylwp^cEsM++h1m)WlPmt|-Z=U!#e@bS3iX#ap%lDvDqF0v
zJgw^LDE0G&DOxs^0jOkJHq1`Sq$aF_S=?p8GFoH}Bh}n#iY^%g7~+Cs4(PFTN<~w%
zM_(}p!?;~FhEp4iq3A@JhXC_3jlp511jj(e3}DX2dv+sAhFid|3XQ=`z9kCC)eEs^
zE)fQSL!@-bra91$(%3{Y5hF(-QTYb(SBa7ec{R5l{FO9sR4NMKlGN86C=a#DYg{xA
zkuo^oK-Vj*xvmS*p<!7>EueWoYoH{T!z6ANvK4~jaS63tAqXz=@IKkJzJxOg-A@5c
z`WgWE{QlZI^|$N4Xq<=r>GZS_knUct+>&Yf>f+r`l*$&hvIRoN<!#Hps(JU4uVvBK
zvOqsz9^QB|p7XURz7JL3hdJ`M?BAN{{z|<23&r2A`rB71o6)Li(Fc3zn_UatimzSu
zwdZ{81$u>=oeNEhuT}N6=6tO`tdtdpqI=W0DiJdSVgcC_|3S6{Pr-?TgQu?%c9*Kb
zt^ou+f!{=5HMSF4fI61gCz~jitR_pT!7#6;sQX>O{6MkfXIUs2eXSjj)rPV%@RW0i
znsTLF>xZ0d!2GL5=75+kjJkBzVx>+~Yx@UVtoX4g+3?fAQid40bu=JUc22q9d^Fb9
zt4A{1V);Ubs7kp$*H0`9p=Gmyhe6{IBceSdet-a55wk?MpvY!%aU&x_0s_Y(mlzTN
zog5%J2gpgpA!wkv$h$f2D<GXF01}FZqVb>yelJ1E2HVPPlKl!>{}BM5G;K+H^JV_I
z3p0n(hd`&bE$n@G?7MTyPmV0s9m&-lSteQyc9e%y1|Bwl*P(>EpN!@{3Fii{<$N*4
z7gK#P+$0s5@df9DN@d$V#kXJe?SJ94*HwH^0Z5-(@lsVaX;+@9xK%z=J}1lxnWJ~l
zLT7s)9aI9x)xdFu=~0=U9MhBE+?cKXW}CA4ZBReu%l^RJXlAG4Z&LkDD-`Y7Pe8gm
z&sLH1u;xs2*6~gG!njhmSFPKtu=`YYUyl4etMt!KWd?JV%{iudt-eB5N6G&3GXU0O
z{nAF<VCm`4S37(*8K@MKNrD?2s#u55)rT|4?tPT_Q>cP-MC29CbDjeOR{*n8a}x7b
zD?1)ba&o_rkibq&B2L8w2j`DsQ53_m1#*yx-lq6701{FH9?_2;%M-%&@JNyB<o0#t
zuLWlwX`o>bC(#wY+1OX_s#{wZ_}tcal!`rS#h$c%neooLmzc&yrZFQa%np^=kz;n`
zjfkk_CWT=j{0KXL<5T0+cOT&6P^ASCF|duwWKCKf4VXfigI|CoaZ?Hu|0KL;q4%}P
zYL;NzjZ6uvd{|%Nt)2s+XP6P@-q#!TMrz;MGofi10<WcMMBiSE3DJzTY29%eF$gpM
zx|l{(?lqWn)`-+uF|#I#U#hXLnmLNXJK$Ero3wG10z+1#kbGbCB-f-hGlavWt42(a
zcNQWPb#im5>DB7uJLp0?iSXbFC~$Hq+Au{Yer2J=6evyn$&?(H7B$7bEv2Rr@vceG
zCmW3mY!*^K2|ScFB7Q}h_~bngyjJeC=BAvMo{hL3oZdPI;naEccYw7sqI~9l-f&G#
z(KLRQsZCbZh!R?9uNpI)zl$yAsP0vRRjwLQ!R%Hujp94V8W+Ux%GPicUS(ssPr=YT
zu3rQRj=<$2%RxuGmz*1*ZtoU{q=QBdOD2N5bPW(X7`(_`4_@cS1vny$2c_sxf{O){
zA|8Ph<0Ein0Xjr#51t%^;|>V+1mP%zGa9u9uaqJuSAuc4xfC1_f>-**G2x1(TkKj+
z2H~&+hzR6R#@tOZ(FW%<ZG*8J?UrV-E~1-)y9VGk7L53#VFB<&Bz2~%P4^(+7@p7r
zM+In0rrGxmb0fm4`L_ip4h9d#IeGZtm0i89!OqrTfB4gAVk&e6=WePw8vYcAI8{P0
zhDHpwp<n;;_u2pXmxu2Y_viWlLK1sn{X`OeJA-d00Svtu{zLlrKm6zWaOcntjt}@z
z^oV#CYC`oIBf<4DVNwX|@t&V!BMkp&b_lPDe}QkFu|xrB^-1Z6i2V@)JZlp_L2S1u
zNH}X<hYw)+IRxkmX?AHW4mZ_CMH#LX#NfVwW{+^Ph~ZljveFF)@8DR1TwQ}BR*&(C
z3)hqMGgx<FT{QZ-W{Zz$Hfd<}MJ4?xwtih3p>QPoCG;!#L4e()Rv-rDz2%+p!Ud!9
z+46LGF_c0>D8*NEr}B1XmR5Y*Rp0h>Hy*IQk5*h$-d{D>|HbAz6}KxCUr_Z0(X8_=
zu?>rCLxxk>pvnf9*xiflZV0uoZ7SQArk}z!r&BYh=0>N_&Ysn8J)N96ImhAEH9dP`
z=EU5E>C>~Pb>!1Cr!(|)?`*GL`^MR)Z22uh`Oe_&!7qmAhclz|*RoyN(cg`KJ^tH?
zZzdkvzDenK=k$(xXL>U*OpMb&0mhPy$jq{@7A)wd4EJSo_5$%nTU1|5x*O(eiP3<y
zJHI8<nHl}^-7J^$?^67`RR6B@VJxrv>eAg$7U+e}ZygWm-%dUp%{3fQ8V;xp2Ob@M
zO#jm{rJ;MN{={Pai6{NJ`V&h1MYaB-;_p-aebAot-<1(FyRzWf?o@p{VI5$C$7hbu
zb>7)~d+!(f=l5s2=MQCV3cEvPcjzC>xKD-?s=_t<DUxDT-;O0;+oG>+fqU5bs43@b
zQ+)5KzV~20mA*7XY;A~Uqj3_VkB7XgPr}{dFrSRT6;3n@NH{=5=SBPlf*lB25bOeA
zzFKw|3lX4mB>f`*)3Nx9&E{mlmj_TmRoCYIHTlY|=~OzEui2R+fBD+FeATA>rh0ID
zR;u?q>sP3afK0ob5M(fbufn-=g|dR}c06M@fVa)*U!h(L^hg0t0vtZv*7~m91~*09
z3&Hw!a^5Yq55NuQaG^{@qg8XACD-%xJ9fLVF%tTg(utyW`jL50kK(*9{syubP>}u=
zz>1xw={&{WB)>dWle7M<xF~vmj@oDbTX9v=a2;<gNDs_LUtsz7rOLM&Y4{pz!Ccq;
effrc*eW~&=ecDFD$66m|4!s~X-)~S)Ci{OQ1BVp=

literal 0
HcmV?d00001

diff --git a/TTS/encoder/models/base_encoder.py b/TTS/encoder/models/base_encoder.py
new file mode 100644
index 0000000..957ea3c
--- /dev/null
+++ b/TTS/encoder/models/base_encoder.py
@@ -0,0 +1,161 @@
+import numpy as np
+import torch
+import torchaudio
+from coqpit import Coqpit
+from torch import nn
+
+from TTS.encoder.losses import AngleProtoLoss, GE2ELoss, SoftmaxAngleProtoLoss
+from TTS.utils.generic_utils import set_init_dict
+from TTS.utils.io import load_fsspec
+
+
+class PreEmphasis(nn.Module):
+    def __init__(self, coefficient=0.97):
+        super().__init__()
+        self.coefficient = coefficient
+        self.register_buffer("filter", torch.FloatTensor([-self.coefficient, 1.0]).unsqueeze(0).unsqueeze(0))
+
+    def forward(self, x):
+        assert len(x.size()) == 2
+
+        x = torch.nn.functional.pad(x.unsqueeze(1), (1, 0), "reflect")
+        return torch.nn.functional.conv1d(x, self.filter).squeeze(1)
+
+
+class BaseEncoder(nn.Module):
+    """Base `encoder` class. Every new `encoder` model must inherit this.
+
+    It defines common `encoder` specific functions.
+    """
+
+    # pylint: disable=W0102
+    def __init__(self):
+        super(BaseEncoder, self).__init__()
+
+    def get_torch_mel_spectrogram_class(self, audio_config):
+        return torch.nn.Sequential(
+            PreEmphasis(audio_config["preemphasis"]),
+            # TorchSTFT(
+            #     n_fft=audio_config["fft_size"],
+            #     hop_length=audio_config["hop_length"],
+            #     win_length=audio_config["win_length"],
+            #     sample_rate=audio_config["sample_rate"],
+            #     window="hamming_window",
+            #     mel_fmin=0.0,
+            #     mel_fmax=None,
+            #     use_htk=True,
+            #     do_amp_to_db=False,
+            #     n_mels=audio_config["num_mels"],
+            #     power=2.0,
+            #     use_mel=True,
+            #     mel_norm=None,
+            # )
+            torchaudio.transforms.MelSpectrogram(
+                sample_rate=audio_config["sample_rate"],
+                n_fft=audio_config["fft_size"],
+                win_length=audio_config["win_length"],
+                hop_length=audio_config["hop_length"],
+                window_fn=torch.hamming_window,
+                n_mels=audio_config["num_mels"],
+            ),
+        )
+
+    @torch.no_grad()
+    def inference(self, x, l2_norm=True):
+        return self.forward(x, l2_norm)
+
+    @torch.no_grad()
+    def compute_embedding(self, x, num_frames=250, num_eval=10, return_mean=True, l2_norm=True):
+        """
+        Generate embeddings for a batch of utterances
+        x: 1xTxD
+        """
+        # map to the waveform size
+        if self.use_torch_spec:
+            num_frames = num_frames * self.audio_config["hop_length"]
+
+        max_len = x.shape[1]
+
+        if max_len < num_frames:
+            num_frames = max_len
+
+        offsets = np.linspace(0, max_len - num_frames, num=num_eval)
+
+        frames_batch = []
+        for offset in offsets:
+            offset = int(offset)
+            end_offset = int(offset + num_frames)
+            frames = x[:, offset:end_offset]
+            frames_batch.append(frames)
+
+        frames_batch = torch.cat(frames_batch, dim=0)
+        embeddings = self.inference(frames_batch, l2_norm=l2_norm)
+
+        if return_mean:
+            embeddings = torch.mean(embeddings, dim=0, keepdim=True)
+        return embeddings
+
+    def get_criterion(self, c: Coqpit, num_classes=None):
+        if c.loss == "ge2e":
+            criterion = GE2ELoss(loss_method="softmax")
+        elif c.loss == "angleproto":
+            criterion = AngleProtoLoss()
+        elif c.loss == "softmaxproto":
+            criterion = SoftmaxAngleProtoLoss(c.model_params["proj_dim"], num_classes)
+        else:
+            raise Exception("The %s  not is a loss supported" % c.loss)
+        return criterion
+
+    def load_checkpoint(
+        self,
+        config: Coqpit,
+        checkpoint_path: str,
+        eval: bool = False,
+        use_cuda: bool = False,
+        criterion=None,
+        cache=False,
+    ):
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+        try:
+            self.load_state_dict(state["model"])
+            print(" > Model fully restored. ")
+        except (KeyError, RuntimeError) as error:
+            # If eval raise the error
+            if eval:
+                raise error
+
+            print(" > Partial model initialization.")
+            model_dict = self.state_dict()
+            model_dict = set_init_dict(model_dict, state["model"], c)
+            self.load_state_dict(model_dict)
+            del model_dict
+
+        # load the criterion for restore_path
+        if criterion is not None and "criterion" in state:
+            try:
+                criterion.load_state_dict(state["criterion"])
+            except (KeyError, RuntimeError) as error:
+                print(" > Criterion load ignored because of:", error)
+
+        # instance and load the criterion for the encoder classifier in inference time
+        if (
+            eval
+            and criterion is None
+            and "criterion" in state
+            and getattr(config, "map_classid_to_classname", None) is not None
+        ):
+            criterion = self.get_criterion(config, len(config.map_classid_to_classname))
+            criterion.load_state_dict(state["criterion"])
+
+        if use_cuda:
+            self.cuda()
+            if criterion is not None:
+                criterion = criterion.cuda()
+
+        if eval:
+            self.eval()
+            assert not self.training
+
+        if not eval:
+            return criterion, state["step"]
+        return criterion
diff --git a/TTS/encoder/models/lstm.py b/TTS/encoder/models/lstm.py
new file mode 100644
index 0000000..51852b5
--- /dev/null
+++ b/TTS/encoder/models/lstm.py
@@ -0,0 +1,99 @@
+import torch
+from torch import nn
+
+from TTS.encoder.models.base_encoder import BaseEncoder
+
+
+class LSTMWithProjection(nn.Module):
+    def __init__(self, input_size, hidden_size, proj_size):
+        super().__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.proj_size = proj_size
+        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
+        self.linear = nn.Linear(hidden_size, proj_size, bias=False)
+
+    def forward(self, x):
+        self.lstm.flatten_parameters()
+        o, (_, _) = self.lstm(x)
+        return self.linear(o)
+
+
+class LSTMWithoutProjection(nn.Module):
+    def __init__(self, input_dim, lstm_dim, proj_dim, num_lstm_layers):
+        super().__init__()
+        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=lstm_dim, num_layers=num_lstm_layers, batch_first=True)
+        self.linear = nn.Linear(lstm_dim, proj_dim, bias=True)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        _, (hidden, _) = self.lstm(x)
+        return self.relu(self.linear(hidden[-1]))
+
+
+class LSTMSpeakerEncoder(BaseEncoder):
+    def __init__(
+        self,
+        input_dim,
+        proj_dim=256,
+        lstm_dim=768,
+        num_lstm_layers=3,
+        use_lstm_with_projection=True,
+        use_torch_spec=False,
+        audio_config=None,
+    ):
+        super().__init__()
+        self.use_lstm_with_projection = use_lstm_with_projection
+        self.use_torch_spec = use_torch_spec
+        self.audio_config = audio_config
+        self.proj_dim = proj_dim
+
+        layers = []
+        # choise LSTM layer
+        if use_lstm_with_projection:
+            layers.append(LSTMWithProjection(input_dim, lstm_dim, proj_dim))
+            for _ in range(num_lstm_layers - 1):
+                layers.append(LSTMWithProjection(proj_dim, lstm_dim, proj_dim))
+            self.layers = nn.Sequential(*layers)
+        else:
+            self.layers = LSTMWithoutProjection(input_dim, lstm_dim, proj_dim, num_lstm_layers)
+
+        self.instancenorm = nn.InstanceNorm1d(input_dim)
+
+        if self.use_torch_spec:
+            self.torch_spec = self.get_torch_mel_spectrogram_class(audio_config)
+        else:
+            self.torch_spec = None
+
+        self._init_layers()
+
+    def _init_layers(self):
+        for name, param in self.layers.named_parameters():
+            if "bias" in name:
+                nn.init.constant_(param, 0.0)
+            elif "weight" in name:
+                nn.init.xavier_normal_(param)
+
+    def forward(self, x, l2_norm=True):
+        """Forward pass of the model.
+
+        Args:
+            x (Tensor): Raw waveform signal or spectrogram frames. If input is a waveform, `torch_spec` must be `True`
+                to compute the spectrogram on-the-fly.
+            l2_norm (bool): Whether to L2-normalize the outputs.
+
+        Shapes:
+            - x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})`
+        """
+        with torch.no_grad():
+            with torch.cuda.amp.autocast(enabled=False):
+                if self.use_torch_spec:
+                    x.squeeze_(1)
+                    x = self.torch_spec(x)
+                x = self.instancenorm(x).transpose(1, 2)
+        d = self.layers(x)
+        if self.use_lstm_with_projection:
+            d = d[:, -1]
+        if l2_norm:
+            d = torch.nn.functional.normalize(d, p=2, dim=1)
+        return d
diff --git a/TTS/encoder/models/resnet.py b/TTS/encoder/models/resnet.py
new file mode 100644
index 0000000..5eafcd6
--- /dev/null
+++ b/TTS/encoder/models/resnet.py
@@ -0,0 +1,198 @@
+import torch
+from torch import nn
+
+# from TTS.utils.audio.torch_transforms import TorchSTFT
+from TTS.encoder.models.base_encoder import BaseEncoder
+
+
+class SELayer(nn.Module):
+    def __init__(self, channel, reduction=8):
+        super(SELayer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y
+
+
+class SEBasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=8):
+        super(SEBasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.se = SELayer(planes, reduction)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.relu(out)
+        out = self.bn1(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.se(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+        return out
+
+
+class ResNetSpeakerEncoder(BaseEncoder):
+    """Implementation of the model H/ASP without batch normalization in speaker embedding. This model was proposed in: https://arxiv.org/abs/2009.14153
+    Adapted from: https://github.com/clovaai/voxceleb_trainer
+    """
+
+    # pylint: disable=W0102
+    def __init__(
+        self,
+        input_dim=64,
+        proj_dim=512,
+        layers=[3, 4, 6, 3],
+        num_filters=[32, 64, 128, 256],
+        encoder_type="ASP",
+        log_input=False,
+        use_torch_spec=False,
+        audio_config=None,
+    ):
+        super(ResNetSpeakerEncoder, self).__init__()
+
+        self.encoder_type = encoder_type
+        self.input_dim = input_dim
+        self.log_input = log_input
+        self.use_torch_spec = use_torch_spec
+        self.audio_config = audio_config
+        self.proj_dim = proj_dim
+
+        self.conv1 = nn.Conv2d(1, num_filters[0], kernel_size=3, stride=1, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+        self.bn1 = nn.BatchNorm2d(num_filters[0])
+
+        self.inplanes = num_filters[0]
+        self.layer1 = self.create_layer(SEBasicBlock, num_filters[0], layers[0])
+        self.layer2 = self.create_layer(SEBasicBlock, num_filters[1], layers[1], stride=(2, 2))
+        self.layer3 = self.create_layer(SEBasicBlock, num_filters[2], layers[2], stride=(2, 2))
+        self.layer4 = self.create_layer(SEBasicBlock, num_filters[3], layers[3], stride=(2, 2))
+
+        self.instancenorm = nn.InstanceNorm1d(input_dim)
+
+        if self.use_torch_spec:
+            self.torch_spec = self.get_torch_mel_spectrogram_class(audio_config)
+        else:
+            self.torch_spec = None
+
+        outmap_size = int(self.input_dim / 8)
+
+        self.attention = nn.Sequential(
+            nn.Conv1d(num_filters[3] * outmap_size, 128, kernel_size=1),
+            nn.ReLU(),
+            nn.BatchNorm1d(128),
+            nn.Conv1d(128, num_filters[3] * outmap_size, kernel_size=1),
+            nn.Softmax(dim=2),
+        )
+
+        if self.encoder_type == "SAP":
+            out_dim = num_filters[3] * outmap_size
+        elif self.encoder_type == "ASP":
+            out_dim = num_filters[3] * outmap_size * 2
+        else:
+            raise ValueError("Undefined encoder")
+
+        self.fc = nn.Linear(out_dim, proj_dim)
+
+        self._init_layers()
+
+    def _init_layers(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def create_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    # pylint: disable=R0201
+    def new_parameter(self, *size):
+        out = nn.Parameter(torch.FloatTensor(*size))
+        nn.init.xavier_normal_(out)
+        return out
+
+    def forward(self, x, l2_norm=False):
+        """Forward pass of the model.
+
+        Args:
+            x (Tensor): Raw waveform signal or spectrogram frames. If input is a waveform, `torch_spec` must be `True`
+                to compute the spectrogram on-the-fly.
+            l2_norm (bool): Whether to L2-normalize the outputs.
+
+        Shapes:
+            - x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})`
+        """
+        x.squeeze_(1)
+        # if you torch spec compute it otherwise use the mel spec computed by the AP
+        if self.use_torch_spec:
+            x = self.torch_spec(x)
+
+        if self.log_input:
+            x = (x + 1e-6).log()
+        x = self.instancenorm(x).unsqueeze(1)
+
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.bn1(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = x.reshape(x.size()[0], -1, x.size()[-1])
+
+        w = self.attention(x)
+
+        if self.encoder_type == "SAP":
+            x = torch.sum(x * w, dim=2)
+        elif self.encoder_type == "ASP":
+            mu = torch.sum(x * w, dim=2)
+            sg = torch.sqrt((torch.sum((x**2) * w, dim=2) - mu**2).clamp(min=1e-5))
+            x = torch.cat((mu, sg), 1)
+
+        x = x.view(x.size()[0], -1)
+        x = self.fc(x)
+
+        if l2_norm:
+            x = torch.nn.functional.normalize(x, p=2, dim=1)
+        return x
diff --git a/TTS/encoder/requirements.txt b/TTS/encoder/requirements.txt
new file mode 100644
index 0000000..a486cc4
--- /dev/null
+++ b/TTS/encoder/requirements.txt
@@ -0,0 +1,2 @@
+umap-learn
+numpy>=1.17.0
diff --git a/TTS/encoder/utils/__init__.py b/TTS/encoder/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/TTS/encoder/utils/__pycache__/__init__.cpython-311.pyc b/TTS/encoder/utils/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ae8731445fea368b7141b7d803d8099ae6abe356
GIT binary patch
literal 179
zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%S}JDIJKx)
zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv?N+FB!-#(l0H^%qiB7
vkI&4@EQycTE2#X%VUwGmQks)$SHuc57G!ZTKalvq%*e?2fdNJoF$2W_MqexQ

literal 0
HcmV?d00001

diff --git a/TTS/encoder/utils/__pycache__/generic_utils.cpython-311.pyc b/TTS/encoder/utils/__pycache__/generic_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..68db90c1306626cd6db6d50f8ee2a2769c0c9ebe
GIT binary patch
literal 7424
zcmb_hUu+Xen(wyT{~g<j?Zi$3IDtT%1SbT-KqfE~0wjSU?2>S^6U~grxI2l1|IyuH
zCYGZ`D^9XbdB_8I!|pPYSDIzR!s;U3KJaiS@o+2U;ZEwd>MpI8kfL4jFuE5bcsT9T
ze${Qay9vzhNZV<DRrOW%_kHzM)mOi&{HCeNNx-wXu)J_CKoEbzh5BQuWnN_=^EE*e
zG?^ym)s&nkm9lBxq@?C~3Q|+rlCjQPN!-Sqwq@+|b|p`x9U14mlO#;UBtcv5610_Z
zJv0-<WB94hyqk8?j=R)+6HU$%QP+2vJrgBmE0<c#Cen~=`Dk|T%50uV++^4b*<_Ap
zSl|Y(G29Hp8~MqaR3#y6g;xWR`I=w|Y&+OZ-X-SEv<WChn}J$r3aAy^H*ce@V1-R~
zj;}0cm@NPC_&+2yTS1H!ez^SVXB3uMB#T5Git0n-Qe7)}rD!Uf>UH|`Aru~KjN8Np
z$$nlm(WbD*138-3BC)LHX>+5^nz?9(FUM(0uXT}t64<6b`fvXhTh0@ojjd6<tNuL!
z(i+-}R6{%OZnSv_y?m_olx>G^Gz-eMM!QC?NIe8UJ=T0h66<I9=0^Rpy`JY=>N$c~
zR^}`&mvt64>WgODVzh;(o>+~XHA@P6NVC3Zfj)n=W-VIFO$|wn9LTg8C6qml+D6V`
zr?<{G_L8<Y{I_N++Vp<9O|s<r1%;>C20SMo&t;8Mw2gsiBc~zr9lo|2tpH!I@?CWc
z34(U;2OFiL#n}3n1ntz@`Ag!PJ!7ZLdu^O(*WWtt*?)pr?`qf&<KnMd0y?!k?QXPT
zFaI6p$vb5K_1Oa!LbOyLW!-7LHh8qj;89{(`(}@CJ>Qs(JQ7B?9IWfmr^cv(<10ie
z@61Kno}lRzpSsP+E-tr{rMY-M!7t4f?0tQMeX%bSw;@_OSSGo`a<~z};-y+R8YxE3
zMy_+I>|$iRCLl7COL0u37W)!>DwiFNe3{~xB89d{VueoSxJW9?@ri7b;UWucE)$W>
zY>JibKoqToUY*!$)7QR7V)Gt}(N@`%$jdD&922kiDW1h9$<$3|m6L-;ZakUGE~FOa
zz#D9SHP3LeGX;H9ZIvlHmE>h>BA;inv`j6gb2nsDj*}_uDVbW%rLwY9(G*XoI9|4~
zOg^1RGO~rsr&GLa=9s)}$+M{}<kL)6wqYA_&z-pEH9H(QzmQ6U@&iehN$}7ObsJG!
z8OLf9X+=ZSE>j$nUSM&kko|^cHEB&+uM{KA-~rjA@l-W-7+po9>`<+zQ!IyLXQcN0
z`SRd(j$ygM)daURc%I>I^11xrL@u+idVP8@xx(?e47iwPxWT!(*+E9#UI$nBRGJ%H
zWU>sKO2(B^EWaw-<MC8B#mD0Xe`9}))tNYca#vt--65owK#5wnN!>?E6tW-Kbp?g+
zz)tvBC45W_A4lQig6p{GIxa=Wp3bA_ROvi&AC@{JsPp938tS}Ix*~ZyUlMIiEmB`>
zgF?O|y8}uZl{yZfj**>?(Mrdt*fEAW#;Sy=`K&@VCZ4x-l@Forv8S`Bd;AY4#I`Ba
zHdQ6a;8`iui9*MALZ>RBQ&0V3=nM*-5khDF|3z2Nx8dInKY8z8$9_BZwCPzv?7xQk
zuL+;u5W13LR}yt4H$1!Dhc>8t-rY#7O4yn|BvraGB}IC7B14tP(AK_ZCNVOBA`?52
z=}Kh!zq|kIuo(F<iu_o(Hj5&28`HaOkxdHqjtg!0l|m7z<IpDc#4B{X2d^~p-hJwU
zchez;hNVF0-qrFn3dD8-BbC6&){+?b00lk}l=p=%STzyBk4%4<*!GC$KSt+2t`fMc
zQX$X3CWu?)rCM3#zBHSfTV8{oTCks5s&*o@4~ECR>+%VKBRheCN?<??#6YFs!mqUd
z*j5kPf2wqT{i@{kumALZ0eSo2+onU$4gRA3-Mj5Xr)Gugak2jk)c=K$&I?_)#I9SY
z>lV1oBON#hiTj1SS!hZA$LzDff9@C0{0N=-5fs%|bkoR93vPPX?%uHvRO|zyJ%;Qt
zq5i&tSw$o@qyPnshVbvz&wv01BbFgmAGQffT3d+L8Y;=!6lA28p`C`R*iI;EHDwLE
z)TQE-Cf|(HIR4__Bh!p*gHRVAzdjkYus&$a;;l<=uC4b*Tw(FH%eDfEl57to9JV#W
z4q)~nAca2Ny80_2<TU<%4hSqamlwIZtAyFnEA>QC&sn^nx&?RlZqE@V3k}v`rI3yh
z$?b!x9e1?ij&4Rp_X*@aA=KZh8CodK*cb>lAYU9+6tA}cg;N2^Ykme~rsWM>&8HaM
z7C?g0hqSo>LXErvoIcjfybd)suq2F9{cHlryMYM|tuaDZ9%!ve(YK~`DE6IgEDh;K
z^k*!sQAJY&tJ2oSS8EoU08~i<y8Iu&dc?A$ZdX}Dv2_kk738P2S=R9FlQ+;OO%yHv
zs5>dF*^BnKTTryzR1^TNJZJDw15LimLpq%L!#%X-_<?i7{)6U*(=fv{%nlgTTyz$|
zw?zl0G+6*2?wxZ*yE4OEMe7}N(Y0n>7mC)Rs{nE!|G&U(1^o3^gb!yulX3KCiK3}!
z#d8>tm}BNUJh0#4Ilp*xqVxa$c(VNi*^^0S<JlFw#{!Jupq%(hS5~0dBT_bTtlWf~
za#<F(*f@P7>Xl7dz#Qq^;;|u_$}ownY-JN!I+uZ?aUzpXGi(Q_0;mE^z$Gk(qJUSH
z5_v{N74~#4LB|0h04`KMh_e7m0J>yT3pq9e0OS_S>mY?<V$`h!raBtJP)fEa&`-7}
z(_jVIrYd$Q+&D%<Ds<u$T^2=5jir8A!zR!-*{%jd`bHhFfO#CAkafU<Q#Br51a2X$
zi-#KaQDlrmHTMgMI}rZ-!Ea0t?7wdLwrw-GMLr6NUBjqrcxz7VIxU7qQD{{3kCyB&
zf*tohgZQ_8_5LN~KJwh%vOXjE0?605If{HIh4&_rZ)y+YrN!Ljc})O$X|c9CUlTw|
z&Z>_Hw%@y4ChyIZ98xe`a!SDiRl?`!RY>WA6yEosZ6_S9grl21Tjzg2iB3!k;iwp%
zLgA_H_U-6)I||R0F70~5$a|nnB5!y3lZrPccw>?;^dhwXfk$V`)=!adQdd@O1=GUV
zw6Lc%jW_l7Z_YiK`t`MMKi-^w+WhD<vFCl%^Zrv>>^UcPPN2>Sv2CI>^*q%6z*aVG
z+P6-Lz3)FwiM^BC!{U)EVq``PT}7d*!nIGu(5IzouuSlFN&60OI5(WTzV`bm(RT#r
zt5(9@D!BGZ{<a<e;fnuoxv(`N`cEVOX~BJZ*BjU<h~6&2-lc9L+9m)4h6A#)iClz%
z!`COvz)&Ar7tq&CytX;R=Aub}V5ISl+Xh^Q=r0|#ZK5t6rw|}?ZI-o-;>jD!8iG2-
zYe#s{-?)s>bG2YyglnX|fyQVb4n}aXqC40%LOcW(xWZ{1^7R|k6N~Y>48p%jAO0cm
zSZ9<d!k&UIvLirb2b>^o$1{NH;p9gtX%mar3wst<Ta!z<RFYv2Vh-*Rdk&KcOy0-j
z0uVUztYld1ZCLjpE|<Q|u-#Auh|_}`O<{6LsjnY<SmgwOeXp!$Wf{vZmVqyCFgW%U
zT6N1=$6tU?xVu1LEeFE&W%a@zzIUb^_;$bOA42}2l3nt&tS|0(_E$Xn?|-#<{>da7
z_(1UN7d;;$&xa+m<Z1rrbN41;5nL=E-z0x^^5-)TX2d{03iOMf{*w8*HwaL6rJUTN
ze(!zeLPH-3-YcT_3i4jrUfy2aUPj(e1p6m@XDtPmVNbmJ7a(5~h7AbjvHHLWYV*`M
z_G@qvNRu~}S~dvV3zoI#DZT+PVRIqg4n<+pA>Z7H*dY)ILxTsH#h{QK#^fiM;6NHR
z>qc!77#udD9&2R>oQ%_}@m!WE1aw2Baq&$sHx1+t@f=6d@(FbCv=|&k!BL2OuHgM}
z#nmmix|NV{u;M>hz99PhkiSoG_w9Q9JKpXJ#JADS7129_yd#2rMDg+r9E0IZnN4Jv
zcwBbI@tj>r<Gec_zqOJ`t2OLV(5&ENr7vo)=7IN!P{gSF1&%E&CU`sHXdrjAzpAOv
zGFT-v@)Oc(0idf9-$4t&XN_Ef>UlWk>8Q_QIaww464d~#7S@$|9buiw-O8tU!<hv`
z17!*YU>X?DEc(D#)}|A**Pe%vd#sgcQ*A1J!Rx{tOut48-7T?!Z$!S0_Bym{Xs^Sw
zdj8udFm0pl#%T%PP<16w91kJjK8BzAtdZHfdYM1i3(lGs5^vmA*A>8jA{uDipy?f6
zct@{XZ|Rk$;~n38zbD)eVcZ^TW4A^Y@qa#=W@IN0)oEop$W)r+GqNL<&9CrrI+c;_
zc{aDK=5Upg^I~MFl+uY+xaE;O_*#n3vB{-4muHd}qb7C<jFsIgR;y1?mf^AtFFOES
z0@?%(5IOLIVHr&Du|fHaO+hYdmEEdc`2?HDa2S$)$*?RQS=qeE@aj&0y93(rKSN}T
z(w@TsuLkC1KgaMZ`FQPqN!gpyY#tiih95`4w1?1dYeKg6`(wz~yJL%0Y%$R`glt1w
z7b~_=!8ZEb*DbkQko!Q{h1>(RtP{EW_hg00G2|YWI=WCtY^P(m(lIO?pLlj%?6{0N
zE=%D94?3$&wr1x`xXb}O2c%>vSpd;FeaO*y|5N1X+i?t497CdG7&(TwmMV_3g5&IS
z-yx~B6SYP+`%&vjDcFI6y_@YQctYjwv>vUr9^Jh8bVO_&N3G-EtKMbB>VW{9#OkXa
z!rgr5BPE#ByvTk5o{4(cHb~eICd%e*fg#VYD*H5m{bUOAIIhTTaD5l6xi_ZzKZa+9
zI!@uY)T)z!#RDf>b2pZmB(H=|WmwqHu||9epaj=H+s8mAgyyp>9xxoGxPJoz7X~CL
z5w1JRpF}wBD1Q>sbVvEyC0c~~D-j{V`0f%X1>-9bqk{3RS{F$YBJbPjM(azZ=C!_l
P&fG#aSBZBJrBi<as!dqB

literal 0
HcmV?d00001

diff --git a/TTS/encoder/utils/generic_utils.py b/TTS/encoder/utils/generic_utils.py
new file mode 100644
index 0000000..236d6fe
--- /dev/null
+++ b/TTS/encoder/utils/generic_utils.py
@@ -0,0 +1,136 @@
+import glob
+import os
+import random
+
+import numpy as np
+from scipy import signal
+
+from TTS.encoder.models.lstm import LSTMSpeakerEncoder
+from TTS.encoder.models.resnet import ResNetSpeakerEncoder
+
+
+class AugmentWAV(object):
+    def __init__(self, ap, augmentation_config):
+        self.ap = ap
+        self.use_additive_noise = False
+
+        if "additive" in augmentation_config.keys():
+            self.additive_noise_config = augmentation_config["additive"]
+            additive_path = self.additive_noise_config["sounds_path"]
+            if additive_path:
+                self.use_additive_noise = True
+                # get noise types
+                self.additive_noise_types = []
+                for key in self.additive_noise_config.keys():
+                    if isinstance(self.additive_noise_config[key], dict):
+                        self.additive_noise_types.append(key)
+
+                additive_files = glob.glob(os.path.join(additive_path, "**/*.wav"), recursive=True)
+
+                self.noise_list = {}
+
+                for wav_file in additive_files:
+                    noise_dir = wav_file.replace(additive_path, "").split(os.sep)[0]
+                    # ignore not listed directories
+                    if noise_dir not in self.additive_noise_types:
+                        continue
+                    if not noise_dir in self.noise_list:
+                        self.noise_list[noise_dir] = []
+                    self.noise_list[noise_dir].append(wav_file)
+
+                print(
+                    f" | > Using Additive Noise Augmentation: with {len(additive_files)} audios instances from {self.additive_noise_types}"
+                )
+
+        self.use_rir = False
+
+        if "rir" in augmentation_config.keys():
+            self.rir_config = augmentation_config["rir"]
+            if self.rir_config["rir_path"]:
+                self.rir_files = glob.glob(os.path.join(self.rir_config["rir_path"], "**/*.wav"), recursive=True)
+                self.use_rir = True
+
+            print(f" | > Using RIR Noise Augmentation: with {len(self.rir_files)} audios instances")
+
+        self.create_augmentation_global_list()
+
+    def create_augmentation_global_list(self):
+        if self.use_additive_noise:
+            self.global_noise_list = self.additive_noise_types
+        else:
+            self.global_noise_list = []
+        if self.use_rir:
+            self.global_noise_list.append("RIR_AUG")
+
+    def additive_noise(self, noise_type, audio):
+        clean_db = 10 * np.log10(np.mean(audio**2) + 1e-4)
+
+        noise_list = random.sample(
+            self.noise_list[noise_type],
+            random.randint(
+                self.additive_noise_config[noise_type]["min_num_noises"],
+                self.additive_noise_config[noise_type]["max_num_noises"],
+            ),
+        )
+
+        audio_len = audio.shape[0]
+        noises_wav = None
+        for noise in noise_list:
+            noiseaudio = self.ap.load_wav(noise, sr=self.ap.sample_rate)[:audio_len]
+
+            if noiseaudio.shape[0] < audio_len:
+                continue
+
+            noise_snr = random.uniform(
+                self.additive_noise_config[noise_type]["min_snr_in_db"],
+                self.additive_noise_config[noise_type]["max_num_noises"],
+            )
+            noise_db = 10 * np.log10(np.mean(noiseaudio**2) + 1e-4)
+            noise_wav = np.sqrt(10 ** ((clean_db - noise_db - noise_snr) / 10)) * noiseaudio
+
+            if noises_wav is None:
+                noises_wav = noise_wav
+            else:
+                noises_wav += noise_wav
+
+        # if all possible files is less than audio, choose other files
+        if noises_wav is None:
+            return self.additive_noise(noise_type, audio)
+
+        return audio + noises_wav
+
+    def reverberate(self, audio):
+        audio_len = audio.shape[0]
+
+        rir_file = random.choice(self.rir_files)
+        rir = self.ap.load_wav(rir_file, sr=self.ap.sample_rate)
+        rir = rir / np.sqrt(np.sum(rir**2))
+        return signal.convolve(audio, rir, mode=self.rir_config["conv_mode"])[:audio_len]
+
+    def apply_one(self, audio):
+        noise_type = random.choice(self.global_noise_list)
+        if noise_type == "RIR_AUG":
+            return self.reverberate(audio)
+
+        return self.additive_noise(noise_type, audio)
+
+
+def setup_encoder_model(config: "Coqpit"):
+    if config.model_params["model_name"].lower() == "lstm":
+        model = LSTMSpeakerEncoder(
+            config.model_params["input_dim"],
+            config.model_params["proj_dim"],
+            config.model_params["lstm_dim"],
+            config.model_params["num_lstm_layers"],
+            use_torch_spec=config.model_params.get("use_torch_spec", False),
+            audio_config=config.audio,
+        )
+    elif config.model_params["model_name"].lower() == "resnet":
+        model = ResNetSpeakerEncoder(
+            input_dim=config.model_params["input_dim"],
+            proj_dim=config.model_params["proj_dim"],
+            log_input=config.model_params.get("log_input", False),
+            use_torch_spec=config.model_params.get("use_torch_spec", False),
+            audio_config=config.audio,
+        )
+    return model
diff --git a/TTS/encoder/utils/prepare_voxceleb.py b/TTS/encoder/utils/prepare_voxceleb.py
new file mode 100644
index 0000000..b93baf9
--- /dev/null
+++ b/TTS/encoder/utils/prepare_voxceleb.py
@@ -0,0 +1,219 @@
+# coding=utf-8
+# Copyright (C) 2020 ATHENA AUTHORS; Yiping Peng; Ne Luo
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Only support eager mode and TF>=2.0.0
+# pylint: disable=no-member, invalid-name, relative-beyond-top-level
+# pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes
+""" voxceleb 1 & 2 """
+
+import hashlib
+import os
+import subprocess
+import sys
+import zipfile
+
+import pandas
+import soundfile as sf
+from absl import logging
+
+SUBSETS = {
+    "vox1_dev_wav": [
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partaa",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partab",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partac",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partad",
+    ],
+    "vox1_test_wav": ["https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_test_wav.zip"],
+    "vox2_dev_aac": [
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaa",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partab",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partac",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partad",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partae",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaf",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partag",
+        "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partah",
+    ],
+    "vox2_test_aac": ["https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_test_aac.zip"],
+}
+
+MD5SUM = {
+    "vox1_dev_wav": "ae63e55b951748cc486645f532ba230b",
+    "vox2_dev_aac": "bbc063c46078a602ca71605645c2a402",
+    "vox1_test_wav": "185fdc63c3c739954633d50379a3d102",
+    "vox2_test_aac": "0d2b3ea430a821c33263b5ea37ede312",
+}
+
+USER = {"user": "", "password": ""}
+
+speaker_id_dict = {}
+
+
+def download_and_extract(directory, subset, urls):
+    """Download and extract the given split of dataset.
+
+    Args:
+        directory: the directory where to put the downloaded data.
+        subset: subset name of the corpus.
+        urls: the list of urls to download the data file.
+    """
+    os.makedirs(directory, exist_ok=True)
+
+    try:
+        for url in urls:
+            zip_filepath = os.path.join(directory, url.split("/")[-1])
+            if os.path.exists(zip_filepath):
+                continue
+            logging.info("Downloading %s to %s" % (url, zip_filepath))
+            subprocess.call(
+                "wget %s --user %s --password %s -O %s" % (url, USER["user"], USER["password"], zip_filepath),
+                shell=True,
+            )
+
+            statinfo = os.stat(zip_filepath)
+            logging.info("Successfully downloaded %s, size(bytes): %d" % (url, statinfo.st_size))
+
+        # concatenate all parts into zip files
+        if ".zip" not in zip_filepath:
+            zip_filepath = "_".join(zip_filepath.split("_")[:-1])
+            subprocess.call("cat %s* > %s.zip" % (zip_filepath, zip_filepath), shell=True)
+            zip_filepath += ".zip"
+        extract_path = zip_filepath.strip(".zip")
+
+        # check zip file md5sum
+        with open(zip_filepath, "rb") as f_zip:
+            md5 = hashlib.md5(f_zip.read()).hexdigest()
+        if md5 != MD5SUM[subset]:
+            raise ValueError("md5sum of %s mismatch" % zip_filepath)
+
+        with zipfile.ZipFile(zip_filepath, "r") as zfile:
+            zfile.extractall(directory)
+            extract_path_ori = os.path.join(directory, zfile.infolist()[0].filename)
+            subprocess.call("mv %s %s" % (extract_path_ori, extract_path), shell=True)
+    finally:
+        # os.remove(zip_filepath)
+        pass
+
+
+def exec_cmd(cmd):
+    """Run a command in a subprocess.
+    Args:
+        cmd: command line to be executed.
+    Return:
+        int, the return code.
+    """
+    try:
+        retcode = subprocess.call(cmd, shell=True)
+        if retcode < 0:
+            logging.info(f"Child was terminated by signal {retcode}")
+    except OSError as e:
+        logging.info(f"Execution failed: {e}")
+        retcode = -999
+    return retcode
+
+
+def decode_aac_with_ffmpeg(aac_file, wav_file):
+    """Decode a given AAC file into WAV using ffmpeg.
+    Args:
+        aac_file: file path to input AAC file.
+        wav_file: file path to output WAV file.
+    Return:
+        bool, True if success.
+    """
+    cmd = f"ffmpeg -i {aac_file} {wav_file}"
+    logging.info(f"Decoding aac file using command line: {cmd}")
+    ret = exec_cmd(cmd)
+    if ret != 0:
+        logging.error(f"Failed to decode aac file with retcode {ret}")
+        logging.error("Please check your ffmpeg installation.")
+        return False
+    return True
+
+
+def convert_audio_and_make_label(input_dir, subset, output_dir, output_file):
+    """Optionally convert AAC to WAV and make speaker labels.
+    Args:
+        input_dir: the directory which holds the input dataset.
+        subset: the name of the specified subset. e.g. vox1_dev_wav
+        output_dir: the directory to place the newly generated csv files.
+        output_file: the name of the newly generated csv file. e.g. vox1_dev_wav.csv
+    """
+
+    logging.info("Preprocessing audio and label for subset %s" % subset)
+    source_dir = os.path.join(input_dir, subset)
+
+    files = []
+    # Convert all AAC file into WAV format. At the same time, generate the csv
+    for root, _, filenames in os.walk(source_dir):
+        for filename in filenames:
+            name, ext = os.path.splitext(filename)
+            if ext.lower() == ".wav":
+                _, ext2 = os.path.splitext(name)
+                if ext2:
+                    continue
+                wav_file = os.path.join(root, filename)
+            elif ext.lower() == ".m4a":
+                # Convert AAC to WAV.
+                aac_file = os.path.join(root, filename)
+                wav_file = aac_file + ".wav"
+                if not os.path.exists(wav_file):
+                    if not decode_aac_with_ffmpeg(aac_file, wav_file):
+                        raise RuntimeError("Audio decoding failed.")
+            else:
+                continue
+            speaker_name = root.split(os.path.sep)[-2]
+            if speaker_name not in speaker_id_dict:
+                num = len(speaker_id_dict)
+                speaker_id_dict[speaker_name] = num
+            # wav_filesize = os.path.getsize(wav_file)
+            wav_length = len(sf.read(wav_file)[0])
+            files.append((os.path.abspath(wav_file), wav_length, speaker_id_dict[speaker_name], speaker_name))
+
+    # Write to CSV file which contains four columns:
+    # "wav_filename", "wav_length_ms", "speaker_id", "speaker_name".
+    csv_file_path = os.path.join(output_dir, output_file)
+    df = pandas.DataFrame(data=files, columns=["wav_filename", "wav_length_ms", "speaker_id", "speaker_name"])
+    df.to_csv(csv_file_path, index=False, sep="\t")
+    logging.info("Successfully generated csv file {}".format(csv_file_path))
+
+
+def processor(directory, subset, force_process):
+    """download and process"""
+    urls = SUBSETS
+    if subset not in urls:
+        raise ValueError(subset, "is not in voxceleb")
+
+    subset_csv = os.path.join(directory, subset + ".csv")
+    if not force_process and os.path.exists(subset_csv):
+        return subset_csv
+
+    logging.info("Downloading and process the voxceleb in %s", directory)
+    logging.info("Preparing subset %s", subset)
+    download_and_extract(directory, subset, urls[subset])
+    convert_audio_and_make_label(directory, subset, directory, subset + ".csv")
+    logging.info("Finished downloading and processing")
+    return subset_csv
+
+
+if __name__ == "__main__":
+    logging.set_verbosity(logging.INFO)
+    if len(sys.argv) != 4:
+        print("Usage: python prepare_data.py save_directory user password")
+        sys.exit()
+
+    DIR, USER["user"], USER["password"] = sys.argv[1], sys.argv[2], sys.argv[3]
+    for SUBSET in SUBSETS:
+        processor(DIR, SUBSET, False)
diff --git a/TTS/encoder/utils/training.py b/TTS/encoder/utils/training.py
new file mode 100644
index 0000000..ff8f271
--- /dev/null
+++ b/TTS/encoder/utils/training.py
@@ -0,0 +1,99 @@
+import os
+from dataclasses import dataclass, field
+
+from coqpit import Coqpit
+from trainer import TrainerArgs, get_last_checkpoint
+from trainer.io import copy_model_files
+from trainer.logging import logger_factory
+from trainer.logging.console_logger import ConsoleLogger
+
+from TTS.config import load_config, register_config
+from TTS.tts.utils.text.characters import parse_symbols
+from TTS.utils.generic_utils import get_experiment_folder_path, get_git_branch
+
+
+@dataclass
+class TrainArgs(TrainerArgs):
+    config_path: str = field(default=None, metadata={"help": "Path to the config file."})
+
+
+def getarguments():
+    train_config = TrainArgs()
+    parser = train_config.init_argparse(arg_prefix="")
+    return parser
+
+
+def process_args(args, config=None):
+    """Process parsed comand line arguments and initialize the config if not provided.
+    Args:
+        args (argparse.Namespace or dict like): Parsed input arguments.
+        config (Coqpit): Model config. If none, it is generated from `args`. Defaults to None.
+    Returns:
+        c (TTS.utils.io.AttrDict): Config paramaters.
+        out_path (str): Path to save models and logging.
+        audio_path (str): Path to save generated test audios.
+        c_logger (TTS.utils.console_logger.ConsoleLogger): Class that does
+            logging to the console.
+        dashboard_logger (WandbLogger or TensorboardLogger): Class that does the dashboard Logging
+    TODO:
+        - Interactive config definition.
+    """
+    if isinstance(args, tuple):
+        args, coqpit_overrides = args
+    if args.continue_path:
+        # continue a previous training from its output folder
+        experiment_path = args.continue_path
+        args.config_path = os.path.join(args.continue_path, "config.json")
+        args.restore_path, best_model = get_last_checkpoint(args.continue_path)
+        if not args.best_path:
+            args.best_path = best_model
+    # init config if not already defined
+    if config is None:
+        if args.config_path:
+            # init from a file
+            config = load_config(args.config_path)
+        else:
+            # init from console args
+            from TTS.config.shared_configs import BaseTrainingConfig  # pylint: disable=import-outside-toplevel
+
+            config_base = BaseTrainingConfig()
+            config_base.parse_known_args(coqpit_overrides)
+            config = register_config(config_base.model)()
+    # override values from command-line args
+    config.parse_known_args(coqpit_overrides, relaxed_parser=True)
+    experiment_path = args.continue_path
+    if not experiment_path:
+        experiment_path = get_experiment_folder_path(config.output_path, config.run_name)
+    audio_path = os.path.join(experiment_path, "test_audios")
+    config.output_log_path = experiment_path
+    # setup rank 0 process in distributed training
+    dashboard_logger = None
+    if args.rank == 0:
+        new_fields = {}
+        if args.restore_path:
+            new_fields["restore_path"] = args.restore_path
+        new_fields["github_branch"] = get_git_branch()
+        # if model characters are not set in the config file
+        # save the default set to the config file for future
+        # compatibility.
+        if config.has("characters") and config.characters is None:
+            used_characters = parse_symbols()
+            new_fields["characters"] = used_characters
+        copy_model_files(config, experiment_path, new_fields)
+        dashboard_logger = logger_factory(config, experiment_path)
+    c_logger = ConsoleLogger()
+    return config, experiment_path, audio_path, c_logger, dashboard_logger
+
+
+def init_arguments():
+    train_config = TrainArgs()
+    parser = train_config.init_argparse(arg_prefix="")
+    return parser
+
+
+def init_training(config: Coqpit = None):
+    """Initialization of a training run."""
+    parser = init_arguments()
+    args = parser.parse_known_args()
+    config, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = process_args(args, config)
+    return args[0], config, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger
diff --git a/TTS/encoder/utils/visual.py b/TTS/encoder/utils/visual.py
new file mode 100644
index 0000000..6575b86
--- /dev/null
+++ b/TTS/encoder/utils/visual.py
@@ -0,0 +1,50 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import umap
+
+matplotlib.use("Agg")
+
+
+colormap = (
+    np.array(
+        [
+            [76, 255, 0],
+            [0, 127, 70],
+            [255, 0, 0],
+            [255, 217, 38],
+            [0, 135, 255],
+            [165, 0, 165],
+            [255, 167, 255],
+            [0, 255, 255],
+            [255, 96, 38],
+            [142, 76, 0],
+            [33, 0, 127],
+            [0, 0, 0],
+            [183, 183, 183],
+        ],
+        dtype=float,
+    )
+    / 255
+)
+
+
+def plot_embeddings(embeddings, num_classes_in_batch):
+    num_utter_per_class = embeddings.shape[0] // num_classes_in_batch
+
+    # if necessary get just the first 10 classes
+    if num_classes_in_batch > 10:
+        num_classes_in_batch = 10
+        embeddings = embeddings[: num_classes_in_batch * num_utter_per_class]
+
+    model = umap.UMAP()
+    projection = model.fit_transform(embeddings)
+    ground_truth = np.repeat(np.arange(num_classes_in_batch), num_utter_per_class)
+    colors = [colormap[i] for i in ground_truth]
+    fig, ax = plt.subplots(figsize=(16, 10))
+    _ = ax.scatter(projection[:, 0], projection[:, 1], c=colors)
+    plt.gca().set_aspect("equal", "datalim")
+    plt.title("UMAP projection")
+    plt.tight_layout()
+    plt.savefig("umap")
+    return fig
diff --git a/TTS/model.py b/TTS/model.py
new file mode 100644
index 0000000..ae6be7b
--- /dev/null
+++ b/TTS/model.py
@@ -0,0 +1,59 @@
+from abc import abstractmethod
+from typing import Dict
+
+import torch
+from coqpit import Coqpit
+from trainer import TrainerModel
+
+# pylint: skip-file
+
+
+class BaseTrainerModel(TrainerModel):
+    """BaseTrainerModel model expanding TrainerModel with required functions by 🐸TTS.
+
+    Every new 🐸TTS model must inherit it.
+    """
+
+    @staticmethod
+    @abstractmethod
+    def init_from_config(config: Coqpit):
+        """Init the model and all its attributes from the given config.
+
+        Override this depending on your model.
+        """
+        ...
+
+    @abstractmethod
+    def inference(self, input: torch.Tensor, aux_input={}) -> Dict:
+        """Forward pass for inference.
+
+        It must return a dictionary with the main model output and all the auxiliary outputs. The key ```model_outputs```
+        is considered to be the main output and you can add any other auxiliary outputs as you want.
+
+        We don't use `*kwargs` since it is problematic with the TorchScript API.
+
+        Args:
+            input (torch.Tensor): [description]
+            aux_input (Dict): Auxiliary inputs like speaker embeddings, durations etc.
+
+        Returns:
+            Dict: [description]
+        """
+        outputs_dict = {"model_outputs": None}
+        ...
+        return outputs_dict
+
+    @abstractmethod
+    def load_checkpoint(
+        self, config: Coqpit, checkpoint_path: str, eval: bool = False, strict: bool = True, cache=False
+    ) -> None:
+        """Load a model checkpoint gile and get ready for training or inference.
+
+        Args:
+            config (Coqpit): Model configuration.
+            checkpoint_path (str): Path to the model checkpoint file.
+            eval (bool, optional): If true, init model for inference else for training. Defaults to False.
+            strict (bool, optional): Match all checkpoint keys to model's keys. Defaults to True.
+            cache (bool, optional): If True, cache the file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to False.
+        """
+        ...
diff --git a/TTS/utils/__init__.py b/TTS/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/TTS/utils/__pycache__/__init__.cpython-311.pyc b/TTS/utils/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4abc14cc33e04c0244dbc6bae8d57efd563d9fa2
GIT binary patch
literal 171
zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%T7PEIJKx)
zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`++Br~U2KR!M)FS8^*
pUaz3?7l%!5eoARhs$CH)&^VBV#r#0x12ZEd;|B&9QN#=s0|2UBDBJ)5

literal 0
HcmV?d00001

diff --git a/TTS/utils/__pycache__/generic_utils.cpython-311.pyc b/TTS/utils/__pycache__/generic_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b8a83cfe71594e9b6df05b2c86cadff275c36c0
GIT binary patch
literal 15661
zcmb_@e{36PcHj&-{QaAjWLdJEkz~oDZAq4`IJRP2kuCqWWhGWpN4Cq*oRLJCBAJ<?
zY;mcW8>EPA2(X%FVPS9g%HHMnl#9FTUbk0wv3qs)f?f)==nOGe5rY%zqFA*1PtU#d
zfFL;F?!9kFibKhU)4Pu|Z)U!q?|a|--h1EsKK^@;$4Nn$7@r#d>z63%@9?7>tfiN)
zUZW}MKE+WS9jC_hCp|`!Z_}8GJk4Wf@?^#s^0bUu$kRGzg{LWQo3@YHX)J4wJEomu
zPV!F2U1Khi&pqbm7~VbQDL2kps=vk=&iVl}=H+Z3P-8yM4$#jz00uZGz#!)WxQTNE
ztm8ZY>p3sq!1>^|@SDf2oF7Uzk~sWkxBz@_;ez~TNN<4eP4L~s)j`Q-t{&i4u7P)Q
zo8h+bPAK09>Dx@yDT>?j0mU`_$V5?}!L5g}?OZdId5JeovE?{!E4Ovr%x(LC9&6#~
zOH^q4uQ6#jL~G1QM4H6sP%J8G^exRQ@KRbxM1jneV1_rmzxp!(_o*zE143V-q^dkq
zmG><5BY6L;@-3XsQuzvcl{(?&M{s{uiI}6L$~#3(VS6Oxnetop(^yt$fHo>6s`GL5
z1CtS-Ilv!CU>+WYZo3o7MEH6<8NCrpOoS|&MM?_MNzD-x!xEnmlY-`qCKFO@BArZ&
z8WT-(5zP&Wk(o#=9=RUpL$qLl_C)M1`_1v5H$`3$duAizWX}*U-jI^1p5w{s@!2=e
z^hDF5l$;JHk{mDgjE-LFNlUS~*fYT=cp(-Ildta7tY(vvVQfh7L7jfA{A~a^s%W$4
zW{bW}^U|u#{lV?UcNN<f)wbm+Wwz`px;^*K-#ve4cwu;+UUm5w`d3`71y`%$+M&93
zJf&#IuA)D1-};d?KeT97{2|pJnjb8>+_}?VLFH5wK{+77$#DM|UhW%*r82B9IHZ;Q
z)kA6+=t_<ksZlU-bb{t;hm_7zS$f<=hBaeF;`Q&<%+XXj#0U=Pgy2Si^I5ZU{7fv$
zhfJD1E%G{{H$j4kEXuNi7c<!>cu7yHhtDQVJ_#U4t@`Q~GAq80g0DmIb*jG3-04+M
zaN+8|<3E{N={Qp8IHGh6s2u}x)7hoYrOwB1DxPzy=Unb&(Y0yuE!ov9GtB~yse#~D
zh#&+X0%F{!9M70p9CUo5rt2FMjWgHsykvtmN`YfOfH|=7yy7gzl0!AK)R;O)XX!lh
z|GL0gIh#>1(Ntnzn~|BY-z4my1PcgZ`yBl?m3T?QF(?HIiB$^YCdlWWGjR^g$5C^p
z_e=@9{=7-wq;8qtrfvd8zy{7?%{&p4H0yOCl7I=#q^0qL2APITz8~(dh5($SXd>6H
zHIp8d+A}`58oBn9%~|>_>OM_V3tNd$HEHxN6sb;8iU^W;GX{dyq?x++YD_$q;Hw$z
z8^>Xs_P0{P(M${S8sHk%QSCkq(kK!ak9JqGv;iU_;`$Hp%TaQ}2wZvQQQw~q{A}RM
z{mQEs)mJY<)S{#(RYSsT8tu%OcCi|zd147M#_?eeBpxqlwshh~B6%|rasbxp>nR}_
z<wa3*M<@B{jc_t8rP4q{j*o)O(ilKk(kx;s4urJ{d@3G^@>=kCBp&Cv5xwXMK>(Q<
zI2nuc!%696GM(V`D9<Gjg=ZvQh)A(yB4pOgP`hS}rUijdfa2jroB(Wj0(y-^5%yvt
z+~O|)<fx}6n`P^&w^8-B=hMrRa>px*_n_)MxZ)iscn2O^74Iq4durai8f?&sv26Z{
z<3Bha?)cRAzy~o(!-xtnIHCqe=50m0@1F0jZ;4sDuGm{tdy7o2uUB35s;d)(r=@ib
z6oW1EHvFx+xBYNVc6DLW)-_NJHtXp+{2IbjGvt8h6IU=dEN+J$yg#^mu$%h4J8*29
z`IodGVZd~((frFs3%na-1M(@64L^gIXUm2OFo^yQZJeDO%Lb$7%wyf_f-%n=mDpZ0
za}g^NBx17Z?__@BJT}cyFprtaj3oLtb9qvdQeuBk56q9~jpPh3jK`BVyQ9hJo_Eu{
zh<qXT^!6P(c<A8E2SF`LUD71qRpO8?m?2%0pjNwLiLOXjYF3&|Cc4C^5KBoQ<TnEb
z8Hd2nbfqGK$Y*R_UE*X?kTmA>iNPVwwpR}!tFQxd3hf9w0028nP=&AyBX$FT`D+D?
zrel(>(sg~{Bl>_uhY<<k0A$?*w>Sj=_|eZ;npXqOYM@gNjp&yWcwG&=z7n`x2wYYI
zV`^Y*CGbum@QxC=t_H5pTj#A`|H|E<x<mhM_hH#}1UaaA4HN@Sm}TCI%+pMmCy=`!
zqDk<6{}JjJjf0Km&o|Qmqt&(*%3{JT{~caxR5!%vAzx9s<rsV#$9!Yn;c7T+&Qz^e
zj5W-eCHq?aQxz?dx30ymy`ci<Oc{vC5?FdSV~kOc%rRL86@BLLDPCeD2^PdC7*Nw7
zM%eLWoZ|&H1vV{<@|Oh;VoMyvauJD#{2VJm=8V7nSXVpO)&6$4{mkz63+*Y5?h4s7
zJLbX+ni&M6W(W2d$4@finkflJAl9xiQ^{BY3`U2LOiE!A<(vj784o8SP?%H`hQgXl
z=O)57nnxF@VI5BqX#ygzEcBX?%hq~R%0(&{h9K)<xW#t><fvcyHmg2%#kZs2+oAY6
zRA0x6uczSak@p@`e8*Maaj;8*ElRLW4YuXZt=2VseCq!BkIv_}ET4U}N!fGw(Jf`q
zsW01<Js0GWE6ScLO5Ihp?rQEr(bXt3jk<+W%_SAS8YLFWi&_3t70x7Dbwf#$1-V7W
z80uYHGt8N?Ch-lhEg0zSy0C7A@#?dz5UAbsz-;6nnvyCDJBs3%e3fOi)>Lg9M#%~?
z&U%U5hPo{O^14E{%(3IBb;~>)8%O39cs3@o5tb<A6tJAMhfQMq&6vou0zVC6lrXu@
zQ5kE`uI`jHiAx)hhQRq^Y=+NlVO!WY(h00eFT%0oLUNk@&RnK$E$IfIlGs=RVuwR^
z%{nfMDL$$>ktwGmDVSrLX&l&pBA&diS@~NrQ4%$iFs)fq0^kJ&lEQRE0@d!T;$fT_
z!Wp2e=3nbWcpU5l(Wvnob<EcDzwkPgM)nu~5CE{g!>2l$mIf5ZZWXWY+_B>3E%$GJ
zbaUzU!$#$$L(1lUb#uSW)D>MIP%W0DMVIH^p}U6`!-}h2b+tdGOpa4@(e1r=;_ivX
zp5@W`6N-DE>fR^2_pQ1Ea<FS9*jEVlJq#<s6KW6=Ppa;da`}2{!Qy%)|I1D}ys~%4
zig$OxyL<Vf;ytW-4=eT~s{M#uzMeCiE<=w%N(1kg14k3vyeg)gQY)dKZ<&LHG-Y~^
zg8?S7iS+b!kcvs*C?OI{;Ib7&qyTaX>@g9CJi;b;>1I;6VK9+#!V(ktJiJs)<d<+V
zquSHMPaH7$UI5*asm!9|;8S`SEMt$rznhK;ycnJkBAjLcH2HYQBy59<H9A#oFALxf
zqB3n!><-AoGU^-H%5OkMu@^weR`#Bjy{DI6e?<S>2QTyQ-n)7C=AGGvS@@{A$S?s~
zW-&djIoBG2sbnFNz1kq~*P<d^fi_35akL_UOI^Sh*hId4rFCDSb)VAOr?&P%gyKD|
z0(6~LU8iN{^mDl5r%)4mTk`N+A8NEMB~{7SHOY%>M&zg;LF#9f9Owht*z)7d9AlK1
z&&g}ytn2dv=h=oc{?37x-wcI2uSB{sgI#0SLhCqd6Lje-q}a2&SZi*)2%jP25C0k&
z62{UpAtci&&5}m(1p`S>R}JDf_QUbW^mQ(BG}DAG7nl$6Dvm0p{RY$|ZUvB|9tQq+
z^MBm@DDVeOKW$p+9W3+?D!s?m-s3rn45DTp1&L?|&yHrAiE5^|j)xd<A4O6zDH4xm
zco3FgRlqV!2qAXhbCX~}RfUjdFQXycfE;gP%cG>_qRV%0;O>Cp+N!#?=7v^Xo`nN)
zuwCAH9InTsitDWEI-489Y=`e2Ua~Buby2q##Vm1(t4(#ak*M4`9b*?Tb~?`M_DlH#
z4H$z_$p8+>I$!+&0E`x$hv)iG!;tjG3|UtA*$vfxwK<usH8#x>`f^GkYs%Irb~5tc
z(B0_9gM|g?D@SZP1&e@q>^dxb$!l2eFrlwz1JxFhBmveBai~?82B2B>958V4h$s?k
zPDBE%nQ5-^Sb{58Gy_T3;1<sU03Tg&+e)yl5NuO|?P{<cOfOG^>e-RM@^HV>e&A8F
z(mtekPN<#}xs$7bElazVK!+OW$emqv1s4x2^((Fp)zu+09nX;pPoSX}pE8_j3*^_s
z9L+sLR6k3`)J{N<4yNC#s?lf_fd{u@<yX7=YHcZ4H5dnyHcAl~NG9{cGi1P6uzheK
zbb}?r>Z3y}{BEZc2W3#05c|m+epN9v+X?n}sGl8$H9%D{q6cwVfYp}jc!T)7tEx#}
zpW>sEURLKEo-3EYW`rb=^w<9f!f?nUbi#*l6v4X)1Oy_0kXdJ}G)AD)(`fRzOFT(1
z5oRHs!#oIJNg1SomjbPYKx=-#64<E*b}pOL!0z1HVzB;x=SQ7OudX!jDKzg{zN9qw
zs?EI*+aB(F*rqlgP=c?h!B^H;Px17so?c+Mjd%?FFxRIHvKg0WWVYXe7uac`^YC0B
zs%LlY0I#W^=jN)YqgtmX&g9G@F7I&trc+5@)PW6CVu%e#ANs;}>SI?Lc%6ClsAml}
zuu1huS6afxOI%VJbC4A{&>*{KQ@rL3m*hY=tl7gvScJo{C4eiMYQawY2c^*_7V&M&
zfOi)FGT3h4y@PiTes6TC?YH0l_-(a5^!t4e&7U56a7f*KM6NrkxCT_$fXoc6mkbP4
zK?ckZux8C)G0a|YJx_ld*Zev+yeUh6_rUW+cph<G7mSMI8UAGR1ExmWm=pgyYpQ9l
z);s?Hx4kbU+(#&dsmi*Mv)mvnUEwNty{WhzJrwyW30E!wz71x|S^lc^kIz}4l&hRt
z{mfc8yHP@3h4$vGb2bC5tw#EM3p!qYM!y`TezjJ&!#Qi(c>x;FnG+q=bxc+0STpuj
zEAur9a4utubLOl$zhTZ=K?H)tJ*?TmBnzLuc;STRxjb|#JTy2uNIp-34Q7SOk2{~1
zo3X_HeFBbS=Cw=7ap`76;ICeYMTMjYUsrzvbbj)tc=fpMuzm{`PT;S+dg4|po)mcD
z>ZM6O9%oMy|C5;UoJSu%JD5rhMWhIL&?B&5Ph_^g*K<AwZhm352a@8kC~=0fm%uy)
zGo>fv?k2wf9&r*H4|xT2_-HmhF%uJ#3E0r!B>^Y8=7a?{II>{>3S9WGfJ(+^(6XEr
zH9I)vC0vMWR%lS*Cp6o|6rVW9&uW3w=T5vGKK|w#Z=4t&4ZnHm#2cFX^)xTcz7>h5
z`4hJ^W-^JMD|Bq3i~=8k4VL|A4GQ0dR)zPmX>%kY3Jc`z22{#$F(ItcVG&(@C?qlH
zKGOg?4iLpKxYQzH$SB;0OhodX0l9>mX@P~zz4z|Ew=|{rLaHwWPAl+KoxMN&(Qtl1
zsoSsC?FW~X%_rNMSG~c-gLe`Oi52h8f_JCl4XNJHinq7m?R``)dwUh{fa)EXH_w}k
zjt1G$iusQy-WJu{vf|xS@a|E(-Kw{H#e1OOJ@AN^y$2NUkm?=65~~jP!q%nD1qUlT
z*rL6D-h;m<-nx5<yNRMZsJeH^9kG8hn|#iHrU1;FpIWI+TULVYg<yL=x;&r+UsZ#z
z-m%Ry^AT*g@6Oc1)QXoac-du(>}B<~*P3c9I9g;!3uKiy?N_{gs<%(yxlp`=s&^1G
z{>mF%h~2rdaAU>WTJW~!k1F1Ms&^lB&*ddHzD;qls*9BwR@Vr~80CP2K{P@?@bib%
zht?N0+J~rXyNPTugA6m@p=M~IwOWOPF@Q5<T>w)YMs&@8XZ(U4vOV+EIIcwPFSdB7
z^=-^qNO?1qx8&EgjuA#Vvc7!Ql&_{xr4Cr_8ZZ}nv!<zvMEmZp>Mx*9>s@fkLr*OK
za!eN5?ob=c=4{R0D|jlaaOSKV)4tjRSzAMO+MSyHOQS|HQh}!K;GPYv<Ga}`dz3vN
znFR;ZbW9ZCm;^eE&@=*@nvb&_56ht#Zr=2R=g?qDA{ogle+RsLkG@aE!0m9&O&E*@
z7cYIsJVT-XC~Nv1(}D;5LX_}*$oeT$@&Vn@=ozi#R_8!Umlt9=B4NI99>z%Uucu%a
zRRn2+h=8DpN>$7FN|t{GHH(PJ*SLi`s$ajy<mr6x<H0YR9$$QXaq-=y7JNRwxZcXb
zl?~}k7G5fyIauk8DG%}MgOM-qn6vcSdMv*_JH!}E=o<JZFPDgP=#ow%urc3)yaMSI
z(iz0AP8H#I;p+v+@k7Wa4gnzK7?&IHDv_i2zrOOzS3W=V#UXiQOulke9l5qL5-p5G
zl@VSY;S0xk`8bcXf`@()a-l<yj6fW@Mf^6bfOMImt670C0s&xgqeoEq0RqHY3skpb
zSfPJA68s)iM%;Bsg6-q_^(nJVFZasluF4!QpTleE-F%Dw29WeL(?Vv&cjnk0;CWW>
zFAF*w-0N`0N8q_`*h;d*Bz{^HE+F2+h_@NG4h3vfv!+wH&!m~-d?I8M{x!x``^RC2
z6jJGERi}L^IGBa*Tb?5<F8o_4B%%%a4vam^zNTwkk6|7NpF#TfFykvA<-Ue~KJl|F
z{*HpbL-BX2e%RbLS%%S>4e$;#PtW(lnV`k9vTHkB#2J2UC3vO~Jo6=NCY@J<=N0z_
z)qO#BU-<e-c@E)B>O8p8d9=`ZROx(8?R*Vll!j5YVKjH~3E?Z9yOw<`okt3tN0iP1
zwQ~UCl!kG&VH`5q>>sAzPtS)H+cwp<4bhpP^DUqB$gM|;Ev=ut1pA_n2|yE}?A}%^
z4;BpBL*L5Y6NSAel)b0ay{8~T**>mrAD_QCfAI<9U191AO#Na~VLDU>9V@GhLw0tp
zIQt6DzCXJ3=q2UlGwRD{{&N49nLi)+ivh)XMRi_Ln5!ytRc5XhUG*|kuQRtD1YI}+
zz6Q;D18l<yabp+RI1u|qwJkW(VPoUk7ToPZ{ue<zW~;m_IT!4TR()gYVW@whiJWnv
zBj;gIPm*01R>z5zlI$c{hOp0>z6A%=gjo>QaE3#~1R}4?9uK3}q}(^Qa|om-ES(2g
zl$#*bu?k#t3lrd-$GrqF2Vp}BcTp$#n1JFxl8#I43>uNJmoUyIl9H|zz+(cM;vF5c
zce*Psni8sOO}&F{y~8GjN@l1$f_qWXcp47NZPb~H0(#b>9y`)s!CTa~gY_*c2{5an
zkB%MsWTVN{Eck660G);32LQ)ju*;~3VV3EW%&*rIhDt!^0DXl&faGPk#Zv%aJzqzG
z^?2p(l{?oKuEF6ie<SkrE&}EmaMLc%$gXX0<r|inPqxY}FBhBJ@<Ts3I)4(3$Mato
zU9j1&>maDF63VvMSI7nTDW1ZEuW;xU_Ke`jEBN{0z^fUcg>!+0Lc?)aHyl!>$86+~
zs+}BCb#PA5>>kZ^j^|T@a0nwZ!5c(1HiEtVs+BZ?Iy($+dLUoGb$Y<P905JlpPDph
zWM(3)a{-Zjg7_&=Q{?00)s7Hwk`NUU&eT=4-(8OV8Pp}#0RT^G)7E)r!B?!`3=ii#
z{B(MhX@D41P~8e2=o4Mrma5jXB_u&-{wK%>J4}B958-0~)q6~uD;$O;3kvOUIJ4P^
zUM1l_V<|j^0T}u=E!<ubRbQKIZ_|;hPzEwokn4jFBXarbe2}UKqv9_kZ6ZhupFker
z5r8UclE_S;8dIWjKF4eg07^Jk9}|%6x^J1RbfXMpDhC{MGVa~*0{EJY<V2Rsnjx-&
zdzQwy3J(Z>1Ye-632`67yJjat4Lkvkc(lv_IiYOAm1dG7!f`s+Wa_K?QHlNnDiT=$
z&=<B7Ibw-X?AwaXJCH3F`xJYlOfE72Ck;KS99l>K&M1_tdjw0HE#UU9^ah%!1QfIn
zYMjz|M*I>`KzNy}q0E{=mw^^JGaO)nC%OQu+taB9F%<^R(zi^vs>D0TWWcErg#$fh
zx*E%`wPUPhEgsv|kIxuOlCgW?mW^eNI6kxGMkD4S%kZbP@TWk7i65W(+f3b$|Hr_@
zck1x<g8|_;AsR$1PDTNFRrq6!sOB6C5hR-XCLf!a1aS@LttV<Y$E}-CCFW_YW}fPl
zFQIDj9{>RVfY_BjzAeKBi>u>_+kbEP?(kCIa{K(S;@+#e_sZo9R`-55(P#Da7d!VX
zOAl^8lGL6drE}=9^d<j!R_VO7$b9U}Gm5{x=&$?Oxzs27S-4a`n;(Qo{YxQn@cyK8
z&ja5hM(sTEnEAr_WuN-mh|)PCU%ISxUVg!fuo-Aj#=6^8fBSOXgQkZ=YUq&SKcq*J
zW-)cWl&nmHNgxAGgFHOf2c!4_yJRa_+nB)H_N-|FTX5^A$-*|AD6k1D{1<2hr0pm0
zAmdybV$FdXRX1W$2n$~Ts2N>nMF5DIhU&3hi~nmZbqoNEY*X9s_I$rbY1*we?M5qc
zF9CPX&L3Z_FP27n|8hN!bdT!pk;w%o>q;4)L$hyV&jHPvUxa4Hl{Yy*TvnuIa{U?f
zt4uB@nGDaTm#3QN#mts!k{OaTq}ou4YCBe%b`_d-DNTFSraee9uuuS0=(ghCR&Z}y
zisoNd+#xurD3c56re`E{gQFZonGg`7PllYrdr-eH2S9U&!|$deaWddx0oA+ka|9Kh
z5U{7hpCkAS1m8#CM}UXGg+D9<)GFeK0CJVzQ<KR;?5Yy5(-!=vLm60U%TD|(0jtS^
zCw~mU?zZ$lrHo*!$pYL{0rs-lwr0SQHyuYrFA8@hRFo(#_>3oe))c34rfuxisoi-c
zGb-b0pKYJUQ;VnJ#Ns88p@*Oup&!Ak2#nOzJm`YO(~Gd4oK6XU1zG+E?$U9FRoAA4
z!*`A@9M%6S0v=b_e$};KX7-cLos3!`Z#mfEg;)n(&|lB$Sgreg&RTu6X-)Itp_JNV
zd2_a`HIFT<3$@Ngn#x*)_rKNQQqbqmI_M__)*3VpjL{{Qfo#T!J7+iauCn2iwR4sS
zwFhGySx3II^|RI@=xdv?9q66)8QVaPbvYSfz<|RuwpHewS?B+A>owaxx;;chJ-7+`
z+B+j6EEuQxkl5YcIW6*0F|>C-$9A#popFAKCn6vu_QNNzqX>(RkeCUy&vvy>!!ca8
z{d9l(g??aZTC<^NWPDo6*uvkLgAI`hjlPKsGhDvHy2S=D6X1o^9KhV?;T_L;=!;-5
zFX6y0#A177^F%lTq}F%laYB+Apt<nh38!HR8;AUE$iG$^)lNAD{;?qNk?GQB$fdck
zW!TO@VWu&7?iY?ef=h>o<0jZ5>?K77o=<32DTxOhHB%h0EH60$H7HL`KEgT5NzEiq
zin!Yci^_7)7ng`pIu+I%6aF5u{2kol{{#SACJsLyetcDN>{J~)bH`Twn^k{{+<IBQ
zo|gSHihoA+&*aV&8S97V-aof!QJ4mmX^@$QqRsP^Vk}Loo9aJ)<^JK14(FLXzZ_fX
zK3eEL`ir?gx&8CoO7}&z`=ZiuQQ0)2ZW@`luX^fK51Vg%XqG*Fil<NY^v#=!b|?9>
zghR2ns`ggd-ik4vyPie(^TZa_zD2fgA&r@#F|43f@$7&jFfUdT3d8l(28Ho|ZB0ZH
zg<5qvM-Er0KnHzn(VP)>Ah-)av*D%{9QDz+$gHqG4_qQJn2w8b@;@-mI!gZ4NB&wT
z)MNBs1P2fdA$T3Z6$E$$3pUl@2)9)~J|N6s^elqg2yi!xIM~SQUsqKqVm0Q>@X3p0
z<Z1*moH)}Dq8t_e5i;W1L+l253L2HBpHRLW`4uU@T)v8wH%ER&%8?_#s+WLFm98S?
z%#mM_YLtyvk=iU9uP2lzM}9@hog=@eRvYb^PcLMjQt*<ueBvfAwV=*SgIQbwZU?>T
zDOCZD9y<7xssPqP!(S6C!0(`sV&)QPG|^|!?YIV=7TS;bOTf?2JC`m2%hEfSMe=w!
zLmsuj>xPU`(BkyY{5W~kg6$6aHF~K5TYrttAJ?B7z{X8<%TuZX_V{TiQvn@bI=Cb~
zrQoqVN*)h6@~8!^Z8TU%>%l)^+&`}+oQE;*0|TWD?9{$PKi&G6mq*{m9svJ81CTlK
Fe*vm!te*e?

literal 0
HcmV?d00001

diff --git a/TTS/utils/__pycache__/io.cpython-311.pyc b/TTS/utils/__pycache__/io.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..decdb5608a45a46c79f59a6c95865684d5a80310
GIT binary patch
literal 4685
zcmcH+TWl29_1>MCec9_>KLCS)48~9vIqRemYS~f}W8#DWM#Z)imPV8D%w2oz*`3Xu
zSv$7Yl2swqM9$-*6_(mcRzx8YRLv(JtwbIl{ai;IWi^r|q<-~pk%@%z)pPIcGY0xn
z@64RJk8{pF_nvdlIrnxX5=Ky_)3fRAAVPm<jarFRE8Dv<LN}0(bZnxO_s1z8w0)+K
z5mRDDN=X@i%AX0O0@xc5rh*(llnTLLFvFR)R2xP<^cvE|Ye<)fZ??TQ(JcX?JMeiW
z72#NZzzUF#AB;sqD1`Kw9wIS4OadfEBodnKtkDub?`!Hw`}Wob(X+;y@zP?0MsFis
zdOH!*K|OL!N_E5RoqF^dPW9+`9L2l-!MKmbMOPTkF1o=H)il*9lepqpLvvhdGHY1b
zICeXyiKFCgLKR(gR7E$aRu>B@zG`E;A1c3w%_b;?bp)G?Q^G1rW8HTxm=agfyErAm
zw1V4tnPk<Bk)58*<_vAlBy{QIrFlY4OVwqaq}9CX$kh?qv1Hpa=ZT!L^t?%ATBVh9
z)B<5MT7C0jdOv)uy`2N_TYLjKp#EC91(iFs2HI#Esdu6R-l(mo(FrvjR&fhP?$k#M
zKWMX+jk%o}Ysr9ZROT&1BZ}kLt}rn%4vUuTe2!2zoHnw$qM52~y8%jarm7LpfM^pl
z?fShnx*}{3aYKrSrzo}vBFM75^8HVTCP5MQ(4uP344ozRoMYvNMyyPFaq`@dmbV=%
zqhu|e*h4VuP~I_2d&sa7xkcImESC1~*!e_59)@8L0(iWFwxT`P7uFWOjUM_sdT8V5
z?Zn54;)`eRlg<7MrTz<l(>J5zrRaDuIKB<|DDHQIio%3e6gR9W-ZG%xt|;&1RkJ!0
zP!!$L6ot#;Mykuxh-oUyr<iuL`I%N;p*;X;s0P~Dz-s_Nu-a1=M~dP|Ma0qsuAu)6
z(p@#q|Dz!XhaHEq!?JXGSoU^b)}bjc%oy5?tXWyGYRC5a=E$NQ1|>3!=K7aapf;xR
z4zaanW0XL;D16(m17Jq%dxja$)+r0vKn34T`~k+O0FP2PRULtG;<30unOkJ4GO~sP
z>VX;)S5e|3jj<t7rPH?SpIcymPa!qa^K|kYU@djlmYt|^+6~?IaR8v9a<KP(>3U!-
zPzv_04-|v(jiUg(x7@pbRa}d#!o!si_ftkn_b|}Q0P|jy%|iDv&<CK|CJwMp2D;e5
z830W-QSo7^j~PQ>#qX02RM5W%Jab@7CtNY7Ix}v2M$IXvrKt{hKv&XKZHBmhN}N2+
zw)h)nqRcwCOP~huHc+jVhGL%VQ3~i5(A=r3FL&@M)Xn~_#vIh!O1&%=d^BAUZQWrq
zRm&_hC0=w~K)*wO@aaCM78;@7VP;k>y3oq8wPj#kyeqW^v=eAqa_Zp=q1pP(1qr;q
z9}E%~1ZI$$PY<xC)q`R%%fskJWQz;FcZDlx0SxyF!tm&UarESM6d(k*a{2#C5f7?=
z1{GZ}<_Mfys?8*Lkb*-iYid^JjWo3~vgOPW3T@A3Z21^TOebXfrR30%JSb1w@Q)|L
z{QM8Y*-G*n^9Ib|k}?EfUXv{<=cy?}u*g}|A-X)ZD5q^ZN3=wpq}e)?Rk3t34*`df
z+YW7@a??%9=hE^7&67b88cbPMA&O=!hsbFIA}SNwG+`P>oS(AEJ9(02>oZL|A)j+(
z!}jQOIiJ;Gm2dAz2#&>vAk-<G``ZcmtQW&==IYqI4S9|q_k2rIK2H`GEUL@kY4RDE
z&t~(3-_c>sK-Spo>aJbpr>|+_E=|o6aJ3NfD3BOgzJK0KOKogn-rAFHgjW=Xs7^yz
ztxM|q=v3V2hU&<!2$74>0XPz4@v!T&Y<d{#u4LGd^|*e+W~i>;Q-~{CIg)h+*l-%)
zXk2JlWL5#5jgfL*^a!i<GnAlVbH`%3IBm0Rg_(dXbLZ-|F+#;-PJaS}-22@H`&&WZ
z^`ltV+QNs+?=7!SY)1P_(f(pI`Nyf7)3>u9XFr?RJen*WO|HCN4zyQLNE*c-z3Kh8
zBHe3O%k46_;&vI{@}8rA622My`MtT#!8b~SZ)_erUpjWaxcfr6b5|uGc7z`y0FM}`
zASpD8D?!u|-RkcBO>$i=cE1YmCnkV<X|GXf<47KA6MV?)kD7X_?4Hu|`0?XsBY30V
zb)b%(wX5qr>&|9mpcEMZe-A=_f}bIwz%V?#X84d(KH{*s|7mqD;geZ4li19BK0n!W
zW}ooIKJm=Kz!!%wgQt2%4hUZ!5JwILz8t^+N42_p1`1|D%++r%fxd6RO`$n;EntU@
zRm*H6vg1}aT@dg3TAlQ=ls$hKIn30mWdPCB7&&lbsuhRdqr8?<$NX>RZ{`HD2)7`6
zxC5V8mZd8wD=r|<*%#0TTl0=mko2ZIOzZj}v+>a$plp)9!Jsw42V8;U*27W*6QLcr
z2!fUm@+4`@6|`JF?rR)fNSv5X-H3N5siwF5RJ#)M7-l@o&ntb2q4_;u>~>$|H<mHW
z8MVN1zDlV@c~a|!tBlz7AXL8uxlw-pZAWzo&&ym<%j>Eul6lnxfATZNJPQ6>o6A{7
z)=_w_5`>$X!tf}|hN?nw9y|Pt$G^dVjsnF=`0TF$te}ctl;-hPtZ)6&UyuA{@P2+X
zd9jqdSe#51V^=q0S4*+0tJ13Uc&qIo%dO_&Of}%vb1%FfE(Q1RAphpA>7wNnWBJWk
zz7)$d4y#f{VwCJe`H>X<R^0owxOe@?M&S2}O)*{)<3%xE4n*NtNxRCy$cN$g!q?l^
z+E?4lfwm`fw#7a=vU;j0?y*@G`>TN;q1(gK@R0C%0t1Zu#wOyz=;&yC7kwVaU4Qj%
zd73wz#T;BkDLY@3nJQ(clCon!UtoaibPrso68t)qm?l|54Nc)K?)d4;43C+aE3$l=
zC#dwttn(D_oU$y_GZ41ppCT{gWS5-pgQvX9(rJ1Tnk*Z%=Kxj&jBy!-R`^p!eZ|^a
zM$Z@j)%yTNR`^p!?JN9wfKC)!Ur;t?SEnb#^>C?o5F!DfKqzP+cK9*m)jPqe^N{sE
zY7Fg06wl*w*X~N74>R}6U^yJA2(0p;YtOo{{^myhX5q7mdq?gI_b0#lW$~3?l#-W=
z6IY6<w~C5hQs#=OxzdDLOq!(=nPP9I)RnDBoSPrD$Eu9O$RDb-HRkYUs>0Lg<+HdV
NL~w5fJq2)O{1Z#tOcDS9

literal 0
HcmV?d00001

diff --git a/TTS/utils/__pycache__/manage.cpython-311.pyc b/TTS/utils/__pycache__/manage.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b154639857141ef4d9298daef9839c3a410bc12
GIT binary patch
literal 36630
zcmeIbdvF^^nkNVl08t<S0wh5aBtZ%kDZWWaqTZsZ2dO6|%6i!nB^ibYP@)7rTmbbz
zgKkZSH<shjZgYFxYmVDq<2u~kbhjsl6EVHP-JRLtc<edF#$H?%qc(`c9WQiu;fv_I
zKaQU6zV&MFZtwSH0fj06R7>lgh>ME>lV4TkJF_ys{PKHb{mgDJ(cyP%?9Q0?EuHRv
zp@+<mh2Y`W{}$kVT~HU)hjk<JRX?I<&qX6e>}nV>u&Z&zh-*>UG*LWKtf#n!uz7+T
z;U+927WQlmmrPhktP{2o+k}0@&iwS@(veb@-ZA39)f9G4I7gf;tT<dYQ9e>WQ87}%
zp3Py`MCC{&?zy0a{gni*__GD=!O{;cBUMGZ6S|<|9bM2Fs{W}#r~3u|<===qsK2Od
zFZ&fe4!7%5##aN;+wDauZggt;;n;XMlq%*YqI@XSZb%si$48?n<C*bDG-bLpGaU|5
zg6Lbp2_&$FrvgF$=+xxc_^nhmAHu_($ke3&{&@5@yeB3?lhFv`m)#0Q{WFme?+*r|
z0e^6ukL|o2jZQ}n_<W;C79GDAqUckTk)A33mM=U$8k&rRBEFjsyC+AxCxgCSJ==Y;
zmQ0%a_wV;iOvT2-;Q)&tn(V%O(RcRMGrpY^uPqaA^!C(LBos(z*MlOmSx<~deKFsQ
z#0*RaMsJ6@S=QMEzoXibfL!MqrG%-sX2yF0<G#`9iEvN!LA2cxbBvC9GfniOi7CUx
zbU0-M*cmeedQm3)#-p(kxf0C7EZ^^p*+xgZkvtM$X^oWeX!;$qi#h%9>;F-&)BQt?
z-H?vPE5@pR#9+|{+vap*`k?V03r4Hy9o>j2*d8>$qaP^_a(FTaEr49G1ke(+0+s}A
zfYzWL&=xEOw9^Q~m~{d=Le5}WsBEk#SpJUjrzrXtN?k_EDI{1Csu(i{UGEr2T&m|v
zJXZ#*kVBP{Lv>yb)j>C6xDlh~9bHcDHSn#?^Q{fmAw^x#1Lz6X1J)xR>JV%QHH4@x
zMKwwdQLim|Wo}X8Q@y=;;a-+oBjp&>j}-^Eq9u(;)09`2t?+Hm^KF9fwmjcv__pNv
zZi8=Yo^Q*X?ty-!6|=b`Wji|+4291ICIh!Z{P6Khqb8+wI-L>sgaCg1Hip6v_3!JV
zxMqIZ?o6mcGBI>HU9Q)~4RM{nC=(tx;z>a;@uEkSQA;LF2ZZ@k<oOFFU&K}FHn}Zo
z%f`Hu8M3+rm-Wrw#W8e$ic)`(Em@UUP=}nAa^w0xsE+Fr8T6ga1b6YA8WBppsEbx-
zBSqcWdu{FS*fSx)qL@{un>B4(ib<PNE~VWtoH9hCk(i^W#|Jcqmlo+EwAo&oGDT*l
zLwt(!`^P88qkezNHqB4n;zN;$|7L(sneT=8o4{y1by><jH4~kliTbDc(AfBcRMAu<
zWt;{kPZ{q_jZdbma@N4MK;TY>oYOvp;(2;oR1iI&pyKHO4{Y6TP8lPi@R*XC#jQnL
zo;rl58N+)hsm1Rf4F^y$e}rC|m;D|+>+@Yk{UW}Hfyiy&U?_4oIyLPZn3@=Sc=?oX
zbS4s=n($Ae%@N<FOBa1J(eZG^H^Ihi&-6o{UO=o;J&=1cUD1P}$e#jy^GLVO+5gb|
z*u2KMS2?%H)k<8gz||%_ZF5G6t4UTj%o!isl2*51^{zMWT-q%*?w1<(&kahp2Eo?w
zRb$g9)j#w6*t0mXd|7N9kQxVAP_k|drFSQ-Rf2VE(&k*VHLltk6P=>1N3!(@8TZ>_
zoy+|Z_fPuY>z69pgw8&(qHk_68^2k!wMw>DA>)3HO6x|I5sL@vdo~-XQQEnfJ2Ek%
zm3hOK?V{M&1uV2)^~muCBHn<PUUW%`{35*GBmAw%0p*E&*_);T;%$pW`St@|b|0P6
zWYwc28E!A)F}!tr0|4%~;*zo`nM9~TUN66mqHF<()ob2c?ttq@ln4!^N4nqIbe`U&
z{bJoKQr#=8%LUF2$G1>kl<z3@PUh#t4ePD#2znoL{7xoB$2+2`n7MdHue+t21)jP?
zS+ZYTpGEnDx*Pw;Ebvz0(6~_(^S`56{<~wwO`24HF<Tth{|JO&+{mDb!Qvn3J~Gdm
zqs5t8>2!ApKeJy#!HgLnx}t0)cHFGRfU_JzmB{>!P4ZPRs}hy+<+K=z_iFmn5HCWF
zjjRnO)&^6Y+F**K4Z-4%tlHFi-DJayr~jxVH+9^w3D&1R%q#yRb6)zQpg9KYJX`em
z-#kt|-iRBpFml7OjuP*1U?Sulo8l(|(F5M-!|9MO9GJW{gQe0(BxxiR^)cl5tf;#?
zrT4~)4tSsGeX&aKwLq9g@<VTM>i#5=yP@EXU(t+gw=iUwGEMX2lTkKF_&WGA?8w*C
z9gX%>ap(bP=SZqJFg+cb3?kW9@-AV;`Kc;tTnmDEW-^)y2ou3=q)MjD=^Ca?EKSNG
zCmov!hy9aiSIU}M2r#!JwEod-XFoPOQ#L>4=4Z(wv6{0gI-y4$bR0<|3uxRUUDE1!
z+_$0ATaM`0ZDm6FA<=eNvK<y|hm)1nb0tZaTXHogMkLqv4V~4pjRD}mx-C*&=i+Ip
zZtt=Yb9+NqVn-Ojyg6ClDAji_-jV9}FE>l|hgQr|{mBiTr3?`O=8q-Ydxe(sa7$77
zx_n>0KG`N;=g+NgZT_V7XI($;T8v7*BP)l*trw-O7w3nP)phU1H+06z>-zP&`VaSh
z-2XxUVy9TQN2=Sip)=IL+tVm)ol4vVfLj`ouPe>+^+~mS6)s*$-)x-{J=2nBT43%g
zN<BiUk#K}klcUshrPR!eQqzs-xh8q837%_7SHqfX+p25ZVy)0J47U`NugkB?*OeLh
zD!R@|u5*IxT++2Au|sq<&kxNHZB$U_7^n-YbWW^rH4H&&RuaMr3^%@soA(LdfkFb#
zD@x!w{|8D%SxwZTaedsN#VYx8>!R+{qG7%p$*}XosE%SZ^ZV)1$c8k(1NSMXJj&R>
zWMhlp3GWEuHyV-hfON^D4)akSlx!Nr8Vd|eZImmN)m{J$(7Jl87|E*YIct)0uW?&e
zxh)Cfns>*lcgIr4@?Ft;TJoM2xgm)g61btHV~fCTk^4{G1B7i^|2@DB>pwmEuSkti
zu}*Z;DzZt>p|7<)hc!i?P*xmdtNh}|++Ka$6USDZKZq#p1{sG?kMe!wVUoM`74o1?
zWF5@+;x0nfrtVDlr&T?_-I{Jxg}gy2&jV<CueL#|?by&2Sum9W=1wPT>ep&IR%<#I
z4=jC0tQnAM2H<J)tl64YZB2>OqRl7Sd;)XoepR=b2vpXuuj0+i;}X3B&-`ERs(l^#
z%o=8m7M*%6$?ZGzvN2e6m%U5AEUH>HgNEE*4jM1&CaartdYvX-9(8^dYu!cN1H)C_
zeSNS9D?ZT<3Vm@;$KO^z#|@gb{wmgbRcv)=yjT-+VAlL1G2>=Us#dJ?MG5p~{x5F8
z`lx4XvOaER_Z-m}x&06~1&i7GXnNfCc<}MaWTmD*%q$=7qk{d19iHhfE1lP-vM`FE
zWRp~&d{8F#VA-}L@*4o3yitI%Fbaxi&9j`QmR73eM@6}9)5Nh+U;K_ZcIv6$5vMdx
z{g_u)(DAbGFV1D#k8!j|Q(Nb^$t~!}=3J<*lMYQ;Wx4$pEdMs8qb++i`B!|K+=AuL
zZwt*%`fqo|7G5Q)%NxBN@?tMJ%8yS+$EPN}Q)A4FtwG)~eriIqw_!UV?@VNT@|KrH
z@s9@|Ja5M&M|x`}#6QgJnfheE=)+LIFMW?p`juT1--W@<_AoJDf4UTw0hQ&A+@6{V
z2fa5#-hg*{Dl$%tPcIr00?VdxIM({y6q8C!whBM9XJQi(>3!w_i5iM{C#RxbMsgxH
z>kL`}dSd(JK9So(RS7*94TXXc7G-i~;%11a-V59djE4g^!y&oq5%0BF(GBl2N4h9R
zI(tDISND4}1hf}Ci|Hvo8VXWCS-&?;uA-x5FC{S%tB8}bs$SVMClaXMAhM{-^#R|O
zURh*nYBJ>rhQ<Oj;i&)K)F|j!{+WwnYrIfIy9un6>F{_oWgz05w%PLPx*c~J5?p4G
z_*s`^h-#{$Otvs;CQWK9FHl~GcTsk`3G4wVL~)m;hu7rTl>g>KdiAk}vud(CFMN!0
z`gd6R9_co0y1M!~N3vzdTFc(mmc7dcv87LH=@Yr8xsv&F8%1VI#d<?y!Y4NDlp1!<
zol80z5{;{lCc)9P?yMB5PKeHvlJlhCJehP>%@=Q&bk62)Hj0aojA;3TqODJ|^$E7V
zq^*4J_}uYtzWGKbq^y+-E~%znu#!u<J(9a^u~u^L0%>h;XJFo#+|nd%@hz1|TMjM{
zN?VSAzAi%;!2F41<2I=g<d@X=%5o5Fm6a1x;~7vDm52baU`o1M*W5j;?w+L%!S_1c
za#X&q+>)=ttC!^)(S1d7UlH6_lJ2I&TcW#d!Mb4GFi}<pD$sx$fsbG>7s^j3IsjxR
z+J+?CkYF2PO-ov97p|>Z+XQPHYx(sj*M#!xvJ-7LB-;%k<DUBZm%l5VzaSpEAo(t?
z`L3?|t_mYJMBf{d?+ww`HFtW!xNs{m!}8glKylmQMB5I@wnMP(VBR|y2Zi#TaH6eO
zvh@nKUgqsf+!D%taH4IyWZN#-wlnX}#4(|~6Armcwl2Zem2`L#M#;N<saEp#Enkwn
zM_2Yr-k~SQB<}^mL5{uN@S;m<-@n`;wI5pvO6@~WPDt&Sgv%pR`!(VExYT|}s0zc0
z&I!pmAu#t94U1mUxl3~H5}dn|4sT*mYTmUpA~hdgaY@a`SME#A=g|35^J{{G94jfh
zcuDGhW%-WOed<ZG)Qt$QOWjw5t8Yl%e&Nlxq;6iQio%J`8Ob>#WZZ^9=X_m{Od`ZT
z`mp(kLHAdNvLn?7&DMp&7>W3EK(KZ633%lH3K9lIL?D7dBSKvv=kMv}_56<*(Wdyt
zRXlzc9wAnBHjO%5O|r4LN8`lwDVUc!u9{WE(rF2nB4VQC6A_wa)9?aB4G7-fnGa*n
z<d+pB()6X;p=M;|5iCT${uk#KR1w34+LJ@D<S?kfLbrUI((?$Gl5dk+(DFM7mU0j*
z_}9E^D<ha~jJ1YsHzU`HJ!t>Pl24rFq%CDkICWZkKAE3aqJxzpr?P*Px9!P`?__l=
zD?rktE$tPtFMdD7m%_k$K@yDd6f$(!amLH@jz_!-MK(<&S%xaXZJG{I=rmf}a|yLU
zARPV}0b+jfCkUJ*aEicb0z(AO0K~Wq6%=#j(punCfMM7ZE6I{SF|$k~dHJK1!vMfA
zHu-1ifmrH{Drqm4*?}_KM5aYpJLS!O4y8fd9=7dI@B6E@WQ5Y8vF7Ix8%nJ2Be%#~
z8Iq%`Z>{slYUh!aCb9Fl)OlRw+CXg#F0>|gC$n^c2)e*gF&~wxTNg{D>R!RoD>{0U
z6}1aNsi9{vCN&&b-X%307Ag*l6^E161s61-dPuAuT6fnA4Sh=`0C1xF70LaI;C_X9
z_b(fThJHBFeMoX265NNF_raxBq2VB$=<bu;eS*7>dGB2^2@QMUME5?)y-#rOOV+h5
z4oY2p%Og_P$tNzUYxv21sq1y&$_=UO4WW)4ID$#<y`^1J@4(7wsTXV8C8_ta@Y)Tj
z_YJ`xlzKzLSVZcL3f_BgUmJ?*8-Ak$040+KzvXoGTZOHMM9*Q#b6D^kPS$lT8Kv!q
zS8Ap0XP#V=wqFn~j!4_FDhH(PH-$QKtk}1gqSD?IPfDb{7llh#q`jat0@B`_!su;j
z@3?U1JJQ~`;C&meP_c3a3_8!K9{K&YL|4-wG~8UQ1b|yPEnkK6m(w>_{OOy9o1%ME
za*qlb_m!(gsJ)W74*-Xh>`E!+o7yX)>#F3sDr6iZBTgiYATi;VJLIdH-~`PH_Z5+u
z1<5i{W^nm@I3ffXF+kfVlJO*P&Hv>R13*Vbv9OGe0^zXtO$Zg=%xql(lR+;Ziq7zp
z-hh{OLA1ePGsa7(Ad*EpS!D8N27hS90&gn&yHp88w`o@Tr+S{GTDWY*B|=MLdSW^l
z+btV2=Fv>ZoKTW#Mo7F0+P_5SbgensRvm4N9?`K!a_kYfJ-^@nie~$Z_RnvocT#`H
z{Z^VTCf#@ZRkW8Mp_Z%qt%CK8HfGG5-*?&=%ajYP#G3vS+V_i&1Crx_z#aJgcHnok
z9e8gF>PlW{K|wknAua?PW01)Hos7R~Rval^rHgxRhnC76I+~6Yt1Hl#nZUo?q#KWl
zjseLrAaDcu-AK|VHgDg-jaHJKJ6EAIq9Pl?Rd1Wst9^20v+mg<wT=p8s3ZkUOC<A&
zn8!7b+k+<HgW_1D_Vr|08kxBriPb$vAo$Ga?LXj^`8b_u)$pf9Sq1_YaGLAc)j2|~
zR~Ydnth=e4NUS=)t69L`AVFjj;F0bt&MuU8id>h(bqQQoQenmS0?Au?!F5;f!S!}{
z1I!!Q=4{8ph)}fyZn0Uu0*cO^l5?lP+=hV?7?2OkEdSjp8?Zk^WnS{#9e(b++s-5?
z*)4)f>aN~!VGPvH8a92)@}{UbmneQ*_jt$SBa9DdidAXfb(-Xh#G0P-f-|U{A4R*_
zdmiS4xK+!sc!l!H5nb8Y8&=Ax&40<6u>S^GM*cZK`aSoG+%AdRC2+ek@7Z8EtOw@=
zFy~Dd*(rcvA%2U#j6w;{pIE0QygyM30Jj*FuS>7XSJ8Dyavc(w!*HN<29~!WTQnON
z8L9JlG&C`)yf@UHC{8xN!|;XjD`qUVP>DG1vRj`7BIW;n3a?)%-jLN$K05w;YN-z9
zvnHoP8$xZ2!jd&@S!xXk)OMeyM58+N<@3(A8raBGV2i|I%f?zNH$9j)euKF&^5irW
z1KOc)$zz+==(Gka<Iu{>OO|-aSM*`)kDuP9w~ds7pz6cCyv!@j7G^%O;#rosF{cYk
zK?}><FE6~U5vBNUZ_EPkf!m?cyWW_C6e^H2Pb}=Qsh-$gB`l-#It7)7$kYr!8q(<X
z<|f=-u&m)JrC`dRbT?>qRq*+6yAfR8D0u%2Kc`ILsryi#vz<9LaBTSEG5_TYXD)&^
zyNws2p)saObRaK?VD&Oh;~W~JK#YG+xqJ=)8lzNat6V67ju>1bw$v`Q_N}!ZU2Q!o
zwhl_IgHMW9TTctErx};;$YRIRRjK2UP<{kXv>lafM+Mu_q@!k`U2-(9IXYGy9ipR4
za&-ODCOG=w7<PZ-CtZuN<#x$8xaK>(>N_p^&PZ@u&k9v<$WC<nC8uA=I7XYF6P>S0
z&R2zuV}$rA(RNz0ofa~V&3Jwv+M-djA`vQ^>Q8W^T@K<9if0FY6kEp3Pb>Q7n?bm%
zLNl>Q;ZmIg<J&2dM5~=HD@h_BV>Wg6?q&07FB(pj-hw*%%uPQZj`(Nz@TaC!$v9*{
zk!WCYGz1+R(zL1cXY_m01B3M3B24EoRh|utjZZU$$xqW{0F5oylUg#MDw`r*Hk)m+
zrn71pK;cMVA<GEqb3x6~S~-75Xxt%H?fm7SXx%Sa_tSE5Q_pk_TUKo?iI}kSsMI>J
za!t7Knsi}Qv;`$wP{_Eaq{Ao+?FHW<;`MeuiAv|*6m0>?77#K{CJmJLfwKR9I~Iy>
zkI@+5BlO}(=R^DARIeHl<(IZ)lb4o>@V`?oV*u$FmT*f|9e<4XwL`M*p!YSReqT#%
zQtQFxYhb6Jz9!nPOSbDm#;M-c;sdF9-*V?lR621*v|W{KSA~pw&dd69RD#xMXshTG
zxX=Fub7a}qG0BLLj(?+iT4yyK6Y621WC$j^iKua~nlat8+SsH6vB|_OW|N)<7W);N
z^zFvsXRVo~W0cuT+zLcP_X7`mBBKF*tOqK^V}Wr#5_-$W=(d=-CmP^;ZpFYxV|+EO
z0-k#2A|9K$Y)ZGAWv*LV36Hn|Y!;_VE?+q1fAzw#<EO5q^nOMJrpnnc$WGQ7eq5#T
zoZj$PDd;0z!SvwK&iKNSzD9>cMggFgQ`ryYI``H!ciSptZxy1uTXJ{*p?yw2w`;y^
z{>Xa$mJeV5`1%Le7dw_+!k*)@6YEb%^(ROJnzPB8P=a^o(hvkYM=prmMTxs8WSl&D
z)D#}QU;6*`KL#N4CZdT*{|GxSqV;TeRtZrup_4wM-O}MIO&BG73-m@oA!U~h#4RR4
zgq-`NBM31<PPi!uT~;|f4t@M~%W%pf_dd;89+l$11CTPOJDGn(UT>3^@d2sSH~)^&
zLy>%+>i8)nQPTu=D1fFBKT3*esd`P=f1s4_06fw?-P!^U=|Pd(lC;(>#L`mJmhHlh
zBT~!JxzqFC5p7L^ttn}*T&NZ7O>oJU?MrT{<$yY{YM~=>U8>zF*vTbZ&Zz?{=3|L2
zsoE#l$t7FPqys@e6x0V)_H-kdmyBN5qu)EB&4m4P0@cp{<?2fXRhrMUhG0?91XDD9
zusCS`5O@Nbg2o_6<74u_hA4(|{|9P%_eD$5?&^Xi>D_@pXq`2w#U)w&D+uFqY(IF<
zrlr|(^0#Nl;cW3_QLuEjnEE8>KntDF$I{PoXk}TjJXjHQF-@w<j|}O%D&?+PxpOOb
zH6IynnS!;{H<SIF)+a}&>bjA%lp(}~b`y9Yc#?nW-LXc*sL};f>k{F8kcL$qy$>d|
z@i0-3hiQR%nD&x+))K6b8}Xhv#H`Qu!z4_>W+DFCMB_Y1SCRDX;+8lUFBu~ZoQ7fk
zAEHeD3IV3_LW-d=+rWVL=+*Aw0kTMoy&Js^`3#%DB&gv-;Se;fq5ksFdo~mecxNV|
zz~MbEV{`;hPxW|T18ElZW~Q-BJu$<*vA}5P=G4?(7{i1D{AhM1AVWo%kc?0ILJtBk
znuBS{#6%=#iveXyQt04`uwoJ&=wr`BZ%^^EwC53d&jI|3UPY|*-t}PT^_~pizegee
z89&(Op`es)0vageVJM}{1i~>pd+JW_`%}h&SI?e_*=fsiSYE^e<CkLFGfmoLUAsNj
z%|d)vz{2jnx6>CZku}nLrlYrGHlJ){$?Sw;Rv-AEft&J!#djN~Ha!!;-LZ0CWI7bM
z3ofLe5FMpNyN9T4)b5lSnM4Dy2C_w`B7QyugG4?QWGGecoeqkXUKA<U^s<^VghP|5
zA|7_3a^K7Ht9KE*-753L9^pRA6k~g?ls&_s<e}d5AZ3>^dD=6pKf{wkZ_11YP?1b!
zK$dYaC@Ty*{8=PSo7fzFdQPYj>sw?E065y}%IwsVHQT^usi<G8Xj-jkN(_h<Z7|<i
zER!la=LVlPbR_HAlTEG3+NO;noi|*xVJ)sL-OvHdoyLx@Z6`2qT(mU{w&tWAXj!l~
z!u@1)v16^{z-q^V<zBJl5H@;Z%VDYI@XC?ZmSaN8v5jI~eb3z4bw}CbhYR(KB@mWL
z4lv{l_SU4gW6isL73v-pqIb9C-Mw^E^6sCv&)c7RTjuTQ>NY~FzG~at38<3K9eaxU
z)ioyTI+EVzr>-|wW(C)qvP-&d($h`ZC0zk}3dk<$^3#)Fc1hQ5db%w;w6d;z!)l|Z
z0?eI8V;gka53KnPt@;kF7)0M7wxBBg7bpiqLuj!>s@c61lxhwxpO9(>S1wC6L+ISr
z9qCSbTHh+v9)nxlC102J%Gc$Pe1&>b&w=GTLeELK`HqF_QaO}(I-UgQ%*o1{waV7j
zN(dOm%3i6m7Zee8%ZC<^A(KtG`3jqAnz5;(7*UyDR#Rb<ohcT#93oa9RT<0VlFU#d
zR)^*w_MFfhB#KbQJP`y|jwBvSbWvestYvQni|?{MHh&~)&$N>5u+2g4GmDn6-c1Hj
zhJ>od*%ZM%0~}^SZq@b%jC!n1x*bsAzh5M)M(cG~p-PQ?x((V5b}R;^vF;<0yG)ai
zjJ&-tDGR`8F+?}Sy^4l-AY=&N5O@y^muIDfe^`_tSwhFQg8y@MYQ{SO`#=(;jE(aX
zEali#7)%}D*#og5Zyd3<p7P!fKxc9qJSCtDEG7XYfT1}*Ivxls{353MIY2vz18G0x
zp<v^!u{Y(+&Y^bv{qabs%X?~*DpUO@rzU}%l|l%=A5fO442oxV-CMog-fItilQ&{!
zlt2|u=^v)__n(<&qGR3rKGmnV&?NL0QJdJIbW_N@s7qc8VI4Vz7C~+#cTT1QFR>0J
z|ADE=AhbrwP=5H*@pdlP<cKsKQ>O7r*oE>FbZ;aUT*`Ex2igR`Ol>>FY(+VG73{7!
zFc~5KnSLyX<!uf`?)nie<~plpU!;9^6SDnJC<=CcFpR8vZ}0nkKj=$1Kn05xN2Q9R
za~#Yyh0>iO*DG<o0@uqPuZi4siMuXv*V*H=$h{?TZwcI6>t$8*55=-ZsjP9XIB6~W
ze&4%&f7t)Hf8pY))hocDv#fIdcEYfDODNkTmhF+sVBw>;U(+u)CY@#Pn}1*y+*d{C
zh~yj*oFgOyxkk)#1%A!$f1~nIhvwfyH!|N-_IX-SJr7kn*W65`9PR(yTy?bF@OiuO
zXt(+E?Rr3sx*HJ|Y@hYN;YQ)4PttxTOOx?qFNL~brJ$XxkeMO(%uj{-sN}pKELZUI
zeu01a7irR@rW&(fVWgD5Z%Csh>==L;it$hKICE^5%r737b+Z|Uz|_qd;<-42|EDOL
z{|F%E95{FW@+tqNa~J(bPFy&4Y%rzYfw5nlJNA7P*&Ld@H_k&**nl%9AgyB@I>Mj)
z&j7UCo{}sY%8}M(0|sb#%hEcm<+fS~-K?!}i>H^~mb#APZoRtZz5DOSe-KZc6LudH
ztB*_7$LCJcihuw6Z@>HY!fnyfDmhwd%y|BI=Vu50r2psr!hthl=UJ)qtZ@F)YUd@P
z^Ab$h=A+39_xx_?*UUxbeY)}nP<4eRw}<WQ)KatTdyX4|`p<I41j$`DS^A1{t-d1r
z5V!DH$yw+yR+Mq0wqHe#nlXhXkM^YLU+$Rso?g9#fn!k6wgX31VDE<=4B7J+T*~t8
z*RX8u2DhOoor=rUKz+sDDaYy!scONOpj1^TOSM>M(-lm0ph*{XW#imY+T5(;+rV2>
zE!5y~6l`TQiw1DILT)d{jo+jFn(=Mpef|BkjS_08$~Rkjya;4K&Rd}K4cbeFFX1#6
zj+qDer|!o3z1JFVWLMtjF50vfVeVpC&g|)WVs0#OgK4{1ENSVj9wN${Lxo`zYePir
z^TJrK5t?Cxu_a#K9um8qf*5C}y_8Mu6gPp*3=WHl@JnbM|8szpiJyQ^%5n_m3(V$2
zmd(FI5grnVQ!I`)Hz6{+bzYF~vhism@zVeq1G`7$p>Xn3`cSTUT|GOaCPe4f_^}9Q
zS%IwXu@23$_}sW>37cF5pfe0g9$V*!AiU>%BIlDhA8wvHD!%{ScfY$3TRbQ__DYVu
zX?}l2HCt4zVnvr!(M6)+oBDNU`Fq?#o#@;qIkySUZR=HA1n-+-RY0l=2+jaoR`)E{
z38j1BL~gId?G?Da8%0)&E9r1AoE97{a7m~eIXhRKu-aX=iq1jFIVdofENe`R2xUIF
z=ftT2J}hg6V{uwnouJy6hD7IK$$40C9{%Q=uN>8qqm7#3$^s@CJG~5Ld{Wz2boSS6
z<zT1Twn(-uf^AE(ykhPImfvqS%t%aEPG_;Nao$zVyLejQyh@wUEI6hC^jG_iI0r1c
zzqU9Ab%wuoHw_##{PjT-?nl+QRM{@9;#FuxT`F9<8P?31o9gu&+%m&w#yk?-sNOQD
zxIH=BgIVIV;ZM7!KZgL<f!~hRHj6l*QBg8$2HrCRgBj!I*<z&DKelRcHQFry;^2XO
ztNIep>?P_84jw1d2$3$$ssR2QqzMHq4}sz6UFH3N?+JHfrOF;_B?}H2p0mhy$|*h1
zCy<u^uLuz5_?cD3M|oxdR*Ib<)>OWczxOtT$>3E$W{w?41SVUdNiVh>lea>_9`A)v
z28R*a8;|z%^x))_;;{%AFr)k@C@E!yxB<9=$q{iP3(j|es1fG}$oT4CB1+m0NoHaz
z_=o?LQrqNu`om!Kgi|GaXkscFBFR<CK){|3mi-G^Al2f?s?odt@v-#rT-a~%AwO^-
z7*?%Xgtciz(gcaT0|0DguXucI!L?S?zKY#{r&zN`s@b#jmQ=HUxfk~D_Vc3syktKQ
zBFEAuTz+l6Zp(-J6X*Y^@58>iGa$k(zyKKl3f$5L0$lBv4b9%kly_dNr<FBe2vpQB
z+?FapR2eK)49pFX^^5@~P;kk5?|ezpSvhZFwog#hEp?&#wTX*hA=nR!_JflBAeGd<
z{MNe1`{AiX;E&FHcxLV_RTZ{M89-HSr>d&_vZ2|#bXD_W73-Xyh1pd{m*D7PIPWf)
zQRR1KC)&c2EiBl=Oq$!XYJ>Xp@_`iy4K7Nyi$cax3%Zd$(u8sGr<F&_4S!W`JW^%;
zs~SDv=a$N&9fr?4j7NLSpYPBEYBn7TqWf2vDx0g-<{mnuu%{s!d?~S@nw6bHTFn-3
zl9M75MEjIqJV$K?mOV{z(;xh5lROHOtUzyCREk4a80SPTMkVAdjAU{YbNEM5hf+aZ
z+{{oX^2>@p!ENE<T;W4Zeu|{OP?E!#n$;xI8Ldz}8JP=2Se&m`1VmVpjhDS+h_E;p
z5l&VV$km*mE5thZ)ArHk%gtm*fn2|JT|o{LY45erHbo(uvn?df7La{@8+VdH|5e>&
zkIa=SP`3gto_x-cV1-JF7ejoahnU0|wA|D4-e3u8tLHte)_UHJI@{EB&D`gXUv{X0
zg^61VlkCOulHB(IaZ5?OXp9cAu?GHEpy3Ruou;$d(&D`Z6JpuWmkc}7K_t=6p5(;P
zJ=1xowJxpyB`Z`Zs$axxZ+*_eIIWtPp`Kekh@R1e_k|{X;X7FSLXgWq!Y~!Z4!kw1
zqIL`m>BDr@7>|$f@QAghF!-2lcFuuS-QEt8VZfeu`eAem48L?1Y-yXu8D~+JD%((M
zM7PX9yv3v=NXlsRv|qvK5v;HwKeT}$F`1l(p*I*6Y=;E3ITq%<7YJiB!6ZsJYYiu|
zjp9HyEc`v$7H+15M=v5{vLx4p`GBAZJ(vzdmP8!Pn^X5xZ)-TlksyR5I{d%Jc%t(*
zDJlOKc!+gmsgap!h@3*;9gae-2A#Id{%e$us{}<7t1e91^e{;m3@5DU)aVr0pO81D
zj5uKKF59<Z`}HFfmCIhC(~Nk=V<{ew(Abmd93$~BvqTO5Hx!Lj$y0W}TtmiQjA$f9
zjM^c&53`_76{D?@nF(13!zw?>+8T@)DodYl#5z!BR+gu|(sU3pFbhkLN;8C7S|LNL
z0Iv-sjey7m@G^A#Ji0vx)^?v--F@mwhq(K)wEMEi?S#1hT;lTL@vo}f@7;Po{DW{}
zm(;X->APap390JDoORu5f4ukm{qOcKbc)s%$=dR@uG(@*pLA3Tj>h!{?}xWO4u23{
zJh)sUHVjA&1M}APib^m{`+v}nQ|2mqq>3Io3<<{-)zmNCT`~wYyTqDZQVk5SO_gW$
z%lhON@5lE(xc5g7K76paXLZXCVapD7ve6rhCBoJ>WG8z3lE*K2{7H{@&C|c?>0gdL
zIUu}#Q}m2Vo>750cB0V`r5ci*=s6>K&Iq0}8&)bxE>MPFv%-JFpxS@4dqxfAW+F%a
z=C`G~N+>J8mM8&`o#=X9a=k9NUdN$QYpy-3u02b!l><+{Bf73hu4{tp8u)I`DritJ
z(r%;3P*%fY_O7~mmrk#gJQ)&QS0vXJfw^xsiu5eP{2=PVlp+sduUmCUcFN8an^UH5
zRe<gynOz&SD{x{JbZVZ$mZ^q}w&9kpuG|rJze?`%HDPo@xJ>Tpu6=8}23L0tu3Qs$
zotJi<hgG;>tH--}O8Pjpt6c5EL80w{$Q_ipg93LD=GVg3T}yWa$6<jx9HGPWKHqty
zda%;)MWu1D#{7jx5BR0C`dFLp%eJCpT?QP;%M6(`y4eaxuomN%>3v}_*IG@ece7fK
z5O#`ZNi-BBURR@r@2ikVn{Wb&JEb9eX7!I-k&_|K|Mh;$+=?C#$MugK8c|u!PWgKv
z336iO2-6S)amI?UcoxUjj$<pP;x>^aMaCTRvXfa8Wu)RIZ1OzRt~zclgWD-dMp`4|
zdA2%dbGg9YU(N!@&Qrby73MB4z>VyQjI=anvtbe;RehDE81|&GQW+~!aZFVpt*(}N
zCIz{etVBh@{GVg6#kv^Z?<`4kSO>5}L&k9YT(Ce-#30fFmvDfc^8CA_gR53Y*dMCo
zktjH3+4JyHC9ly!dW`3%c-UHy0&+&#fNGmc@68$FPaiZ%i=|c?#g>+s_KYjlPJ5A1
z_>Yk@P`FE1Rg0}6fOWm5?!(%|kXW-_s=+Z=M#}&&9KpF$;DrgE^MZq1R=uus>7iJE
zM5;eBUxJ-vS?$8aYFVdH)~QC~7O~-|)Npj(`V9<Z1#A6!xksoU63fp><!1!j8Fq|l
z1M@s1mY<c%&kDA)B*{KVoHwo|UlmW8(2vjnc|vc(xY6ZJ<`$A3v&sCj(=T_AM3b4$
z2RFq2TA4O!y%H}IUV<q?@2kL2$qx5?dg%8yX)sf?6Qxk)R8yoVW|OB#M4ljNQ)~H&
zrU%9%jaXhH;2}V#H$ryAMyM;Dy+7S{nJ!5phE|wVDUZUO*s-l_mZeW+?kO1OFKNsU
zBSSW3y<iA?a>i?s<qXCv!Sr}NOiPF3@Oo?eC-;9A|8X43o;70YkkmRP;HZ}*R!Xjk
zjW1<V5rQhL?q6(VJW!OM`DIs)P2@&-n?_i1WfJ510_k%Ho$)2p=dM7emrQTWS;2~5
zGQBA$z4;~67w4ph7T_kUZO%#6vpD`Dfj5@_gWQVFBFy25ZW<fffG-srhpoJD-j<ij
z8z;!UMBXKF@VsBRCM9u5^IkZ%^`-K*zEs{eZ0BFRCia(#jdSW=B5y~b*g+>yepzhw
zGTT%#5=h?hj)wRs_NiGyNhU<(?R8)ba(hLd_uTz(0sPOH+pH)RZ3YfSW>?0fh1%V{
zTsYeUW=JnN7j>-&*JYE%963Aum_3c8W$U+?O?f0$OJ@dyj|bD?4z&+ODNWY`O9=7x
zW6r=#aD1wVu@L0#sXge&7cH>8W~)WaEiXU;9Le3I2}vnc$&Z++6$?q#W-5_chU6@%
zsj+RU>^F~=s>=;ib!M#PIeDn!4X4Sw>^jN+_drEBVpz4;Z!eLRLTUZv{|5@r?BG>h
zRFfVMn*NY`ufA{)z9~Y@O?lG6ndaa{u|ASRy_<%n)ARomRqH80gjlM?Gm?Ky)s+8t
z@XIj(P`54e-_mMrWK05VW?_$U;^Q+PoPnHm&k50UQu3U{&T+lD<&zUXJM-f+OLfar
zV)L-nJS=dH>unvMng7K0bKBCjl^(I}ywrAH;I^sOaN)>BvGtPFdP(4#*E_pE+xsVd
zKkr+%3ddd(J71SNUl+Kx^_I3zZv8C$<M7f!;qa?s%XtY$CUZ^6ZS8B@4y|rGv|<&v
zosqVk5xA|`#*(<SW|f19aF@_?Xr)f%&Pv=_fjg@TTkr{;gDZDM?t;W!5V#A>hJ+GP
zz{w}DPJd0}UK6<2lmMZ2m$2uYaQ?c;-H^B&0(V0jbWyncrpN^(E+BA$oRUsFK_xCr
z++~5gtfZonhMqtp?5f0F6}YQP04wQ&aLF%nZ%W*o0{5mi=r!TWsK^B+E+}w8RY^ka
z9%0{R>5d2kHw6D3k-IB#cLnaQD$BjX{!7AZ0g<~YaW@6-rYh*DFz6QoVUe4VxCtTS
zWEx&AeNbrlzr{GAQ_c9leL2!VOWE`PUM)HXl1Hw;z`y)s#}XGN26D*;qgvpadzdba
zvso_umAC<?su_cZpz$B5jDEl@D1)fEIBo*7fR=038D6ud#}0^I!KGF!xgd^FZAg+8
zQAbNNwZ}(AG;9Rk9@pQ}=cepK^vV6$%;)6D{JY~|Mt~m=F?LS*Fiv`*G9h|KnTXBu
zgPKw1e$&I~bKZg6b}FC#+fA{H=_Qo*@Pw~2`y_^7HOy%;pQc!&Gdz8J1G_7xx<+43
z@@iLB{t|kDo%%V<|6|-jbj-Fa9`YuhEKjIRwt@R0-O-riX8=-`2=ryyXPF?OE*TpS
zg@f{`@!VwSK0}!@iY}J1e9D@Q5=n6p*v(Qvs%R{jXZ6Pa&nO`GJbBeR@M}Vq2LZ6L
zf$kg>K&mYr9~FIQ{<!pm(!>MNy<Ku+HxAGBN^hc0tn8L5yXTH4Yn#_<J63Bu7JJ27
zpH%Cc8(MdeW{6{p<k*rq>GEk^<2;wF-If?l@QG1&gtf733wHHoTjq}@t7{gn%)dSV
zcCx8+@#rUg3)}+t)ZMU9yimMOqT|GOmUe#fU8!O3U%12ye8r<5GUT%T$sPNbO6E^4
zTw5&v&@Z~W1y}de?fbMIXo!1j;x(bV6E5khoj*<)j^QZ$hV8#>7Ay8j6{NINwlmqW
za}l8n2Sitk;A(l=z9apBlPTA1o>iM?0S2J$lC53HxD6k=g7rrh<YO38fFq3aYf{VH
zTzMs55pWjYd{UcM#SK~Gs)T}5khAh5JfNMUIrs<PjiOT+d5@Y|hnZiD(#=}3JF1dj
z`W1Pr8XMeEl=L=Hyu;xWg{7duoKphT=kjUm4=HrD(mWj$U~;5xkl<n)wTVnGhs6l2
zc#IC^=DXBw%YLY%uyS6Mm<#D&?&mkOy$i)nb3!F?&ZL4wG^f3K-Q(j>lBDmyy|DIv
zv@x6CowCe*uG|v+VToH3q~DzP%hehHCVaUXsM>Z%k%H&e0P^S^P_Fc5i@)Q_+(Wr?
z3lvN{GtaQ8fVpa+LqrOv|6W#=(vqmk=90bBO6~E+-<vIY$uXNV!e*_Tq*sm0Z29q=
zPpt44H%VPsRaL)9Y<ISV>>VV~zyew7B4_5<;?}q=ZKmmr_NvmsUmF+W@rM{SFV>^s
zO?#9ij(NXLTC_>~bEWFq7V_Ou5<;ebzt5b}0~0|j=mGmPk0Q3GwO+Pp%-BLkF2N+E
z7#z(|VB#5VEAdf{Xg93wZh=(~;z;)^9SE~j(fH9%4JlJ}Dhg#V;|x9xmMS6WXFFTj
z{*uUoRMGvL!?7BuwJ|390q;?KC<<p*$o$%FZ@Y*87l_9H2?63brp&h?y@kObPyNT!
zBuv@q<Bs^KnEc%_6`etdOC~Wsp?Ee}l<-6xP@Pl7^19D6>f<LAZbre3uWue1pFU0u
z{t|pNgophhPU<6p)&#`fsS;E%9WI~Z!V%A(WC#4erChk1u<{`<o+wn9wnRv#KvZ_S
zJ;hP}Oe-$)Ei~v8pgHol%KkMas9>GukBrAc_@){T<Bo<Rsgj#u8r)@`Xl`g?+K;-*
zRFRQY*+|BmDSeD@V|9q|qw?mG3*tLEMASt{izBmfWLJ6jcpu)e%}$J<LMVF&-fZMQ
z0>EU3)7dK@pILB257>qSqUp;TbNUUV)3R-S>$XJTlkuNT{CGluY%w6VoRnHlid#=f
zTThXCY)w5Z?i_U+uqznXuXpbd_6^|>5MlI|(0yC%zAbg%{-I5BcC33E(p%K}W$rIa
z|5fQqMBFzd?HhV>RN8k=^t>v0UWI`otZF6_m&CFTsSG5D(Y|dlnDo?tJp94%;sMdK
zNAl2zbm&_>0P};%>L!$1c}&0VZkF6T*WCM8-TVKdX*u{W+yA^>bPr1ILBTx;G41=O
ze{gysiVxc0b3p=g(B`GD+Monh3<F<NfxjV;^(%D8B+L1+a0X_1&3juC^<tS%D)Ui~
zbS>>5b0FIfYzsF;XQ$-s6r7!84Fu)s3?NgWF0uqt`DH`1cd~Y0UaYTdrH@a|PrxKp
zVpgFr4E<)^Q6)JViB*r5WB}9r+qZaIDBlMs+V)Gf{eo@(dU=ghPUfUI)!ed;3gL9M
zbsyJ$Q2R%o4?U2VTX1>^z}%^<g~?fbHLdy;srr?I1||mk4*By(eIN8K+JDLa+5NS>
zCsuLLs72g+LE3vkth*@HT?Ex!QMd45wW3w1XkB-C7OsF4cWuX~oCMeQZ=TlFW~)$S
zIYiAkgeov|mf3e_Ke~`UfTDZ3Mywx@>IX!}pyU{&?mD#Ytok9JHil8EDmwcmXTRX=
z|ArbWXBfGRd9rY{>{zev^Y$a2!E(bF#Wp;AQEnWpGJjEJp!*s<-Pcyr{f??*6^1X(
z6z9td<FRV<m(>*K%UV6%*HN4=dwY&I8Z<Hvg;XB=dr}!4PQSk=mA@satf(mF5~^uZ
zSs|t6{+3}rsk|9cttB{T<&QeQI8p!qA(C3HEPF>G1z(oPQZtdXM5dOM18E-Ll(gQI
zI8^5jQd=WCo3*`MT{nsSLS((RWwZ9UEpAsxZ92Fj{Ud*ETo9VGYGQnoTD(vXXmi%~
zFKzCKoaMhpRW9*=3&{V508y#@ZwdUL1pWg7b|455r2KyZ$fHOhb5|(R+8m0M|Gy}i
zLXGnOH$7%~U0NEH|9>d-KNC>VoyeZ3EWQZ4nG^>zP^Lw~8|cw;0rV~-j%AfaiP9D$
zgprprU0KR=<bm(Br$2g2gd|@=k5+&*#fVbmgWB=!b{$_z0s3gllJSl((vio<+jV>y
zK%^9zXb496<S72T@ZXeTq}-@2JBeZxo=Y*7S(^WzV$35TpGz@%7AC~9E~$(e)U_|^
z^C?Cmflq&Mnv6r{Eg)a$D>xbOHQnJzc(q?PG<(OYkRJ=i@^(jI0@6!FS92CTkAS2D
zriiX;Mlsk-)OtRw{mId^@Tq>q@}%(#o7j9-YCbF03`;e`psFq14CIiN%jf^B4>}~(
zLsIn+h)cz$`Rp%u{@LEO-hovdlYdI=9hQ2B#p-iX^|`r|Nk{d<zRYKb+zal+KGE4F
zIlBaB*Ee5rrT>VB0d)M`_`=!6{Y&RR>k}ROB*#9H+b?nZg^bG(g2&A&LJ$uMAxQTb
zLXhrvbR26nd|7A1{g<uAW1Z$NI}LQ-qo;dcHSYh$+;e=lmJlS)BIB73V-ArW%pkrM
zbHl4Wd^oGNQpk)N2Ot>=tHb0PHLDjD*v}}uPPLhIVUv22a%hn-_?0Sqfw)oifpj31
z{2vbM%26%RqH!1p(g*caL<)GIoVH9x$PiCg;mLvHXb!>-;<FO%5~((M_Gbwr<0kym
z9IhqfpTfcc{8NRahdxsBc>(@uF2@RelI354e`?u;f0~o-zl47}uYbIiEr~Yul9*lj
zau!Eg2(v5XaJy5r?wzHuoHdTPGxAzU%$D2*vB0|5ZcXb<!afk}99nmX%O|g8w1TBd
zZ_@EwY&~HN6&uA-mNi~k#|kRa^Y@TNt~OgnQ6qhH9<5Z7i?mb)kTnzf)NI|*m0Gra
zO<?ZiS5EhPwuLJ=rrL>*QT0mB-nk=B>$iRU#s_aK4v6*JrTXnl<x>5wxnVWeqEz&p
zkUS^Aq<8_2NDd>Tm@zUcAQ#e?P3YHm>=ydY39pR^x9$o%!s3pwv?Kh(Gm@+4?~>PH
zMkR2NR2qCucfU~{vjI7GY}v>p%7*>NjWTp4>(mI%&(F(_92j()zHl1{J(e%F=m9kr
zfW$Rpm`}hB$)h1d!Cn4OyYYaW_Z(!as-sE%;Dq*=zbNIL{%P??1fL*=G?q$D|Kj@1
zWAk+y3Ad(9E$&jJ)GU7{Z8Z*GVg%@M9BpH~8+sUFg3puz+(SkY;!DNmiH9)U$DwBi
z3di6C6_z<vOtz7wP(K254;9mfj?uWjh2(UoSm}K0`&~ci`X@c__25VvxWwy=7eBc!
zY<q>Ifuld^6)Reqc`DBQU?+R5L+`igqloBek{rZ3!%-lH#S=>dpPdog_DgO1myfKr
zy&|-|A|C@nXUZ_aA5KIaK8`aRSR>opQk<XBoPK|*#7~oM2JBBilyC6ipg@==wcjsO
z%(Uy^$*_QZp_zTAgg;D=7YMvY;2ME92m}a(2vGY{Hqz1=ADsx{e7qn}%NIK?fo~vu
z@sJ+l1l}R=E`c8r_&EV~P6OFC%Vt0#-O<|1w6>Y15Yw+=f@7X|rHuS#xP@;-_OKc9
zkA?%0$Y=;3H}|9A&4|QK>O3qnSZT~NK6L{HoWUUb<`G_r{0D~Pqq7@DHWRnuDK)|7
zcr)NFHbLa80H@PbyP?y9W}^vom;!9Y^r!%5tLYw8a#iqx-fJQ)T@@VEn@kV}E1=Y5
zg4MAC@HmJBDhMB(4X1$8ViTS8uYyipars7(-87_MG;ZkVs)k=15d6+{Il@MR(R5S4
z1fe@!R|e#30cbOtM)Zr9C}c#xVvw%|AmUv@aVltIwJ9;xCP1v>k_Adl$k*+oW-H*P
zo+X2s5xyF4s#KzqM=c97n^0>2C5SxAU8a2-It6r<vf8Mix7Tz8^;1C*bktogQ){Av
zlC~~ZE=HCH79V_u^#G4qpp-<uT`ZN|)Vzq7fy<J7VfPFuu@Sh@u+=oFUqt?NT}D*8
zuI!Sp1>i}!QZ**9*#VTi$YT#H*=qum6hH|gk3l(EgXzGA4nPSbkHd19(}Z;aKnWv{
zW|n0!bvA$!l<{D`S-_AEn_lfT{0dDqMwa|DHtA^k>u8diNdgiczsVDdhxqFD3=YH0
zZlF`eup@;zqKrC-2P@-%e?xcwf&gvrQzba)5+{4&u(w;X_6iYxscQ12{31QKLX#nW
zeALgLcrvr)Cn-wt)Xh6ku4L8Z|B$>}2y6p*#?jZ{fED}sZqyV`7ugO7&c9xt)L9>~
zzogFci2bQ<N*}Smq)xeU3p$NU>Y9W?E~#r4Hg`{T_DAe5sdGGHe@R`Xpmk4m2Zcf|
zsoO1R9ciDO(I>6FNgGZc+ncoRPL|`~gTqPJv81haqoTxFgh}WudSu(EJ!jG5r5A?t
Pd%k8Nzu6?5_2~Z#a;kBk

literal 0
HcmV?d00001

diff --git a/TTS/utils/__pycache__/samplers.cpython-311.pyc b/TTS/utils/__pycache__/samplers.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d93fa4b8d8fdfa7b85aec7b94b680abb934bb651
GIT binary patch
literal 12400
zcmcgSTWl0pmR0?%w%fF^4fxq9g8?@&HYPkB<6tlZLc&07n8cGzQ|>C;4ef5HstmZd
z&5{+Xv?K3s6M@nrWh8$@S);&;kBJm%XIDr&TII6~)hex)EFta3PJUL-Ix9ul{n&Hv
zt*WkS+e{LTwz~S%<G#*4_uTWWzptsOV&Iq<zdD|7W|)7+2lMb6H><ycn~xch5!nPY
zO0VoF3(u~EYsx+9p7M-(ro5wG7R%fT-;{sUKgEr5Q-RR{%ea_V8PRix5xo-knVVrg
zhd(_=t4Ns-%KU4}szvS&Ga4i{0#KuBO^q5-Rt;rA$zRd)CXpRxA~oOOK<6Xw5_eKa
zB!sbqRPwwYSJaaCQZk-OLP79^pvESLg{kR;B$xc=P5to9m?Eif3P~|FWxR|TXp@V}
zcv$U*n-73I5`!ceb&2e#JIjo-qU#Pf>Ji;A2`^56)R$#$vZH=L%Ui0U-5VXJZ&mSd
z&?kkL64I2ER25#4-kX7&IG{eukIShkUf?I<*QF$%fI0A~aXy|D<1tC;t|A=_$`eW-
zxx?;^GQJZlNBW>G?Jp+TT9Vo^b22z5Tpj$cz6Ibu`!S=kaAO>w!S!?VfmyVbSTC6j
z`x(6d+$v+?O@@hDV~ET!^MJ)g@hUUZKy)q|jVI%3G+Oc-I7697krLx_2)Zo=qS07F
zP?Ts?LBx3e_74L+mlR1>dS(S>vS&zAuBoZ%o|CDm@!3mfdtx(+nwp9xQ=+8wjEoHT
z%&75%(xcEdR=THW<xS8}daGj*x=k!<p-kBY;QQOmGFLP2U+{m;H9qAU^V>A8UFX^h
zTzhd_IO|ymX5qIAHJO+-TX^unpHT4EaD%Kt*nz#n*(s@F4da++u3FS_($4ub#5m~>
ztmHcH7#cf$3I4150o-TLF)R$9Vcy+5$IiR18pAQLAWhc&W#(g+WftmiS>zzRj<AI6
zum-N~eI+(76M`6&m+PR$4}ty(DJk8YmIu<2HAGY;IU4QmPo!c(LK*0`s&0XnN(8|7
ze`Nm1+-3@)6L2mL<~;X;`HS~L^w|#OdPE2KL>7`0Qpuf=k_5E}EX4_leuZE}c!&(V
z&Z?644LEuaj((ddl9B01kG4Eg9v}E!zt(eH?>VkT2KC4wlxU$7I@ID$5FFOfm@Mxe
z=*_u3GR#$r5u8hB($*z|lx4^)<Q7B;>+^ID{%90t7>%a4RMM2Z-iFnY{J@r>y4-<#
z{aT3ELwteb36c*uq(V}dl3*uQMWa(GaVCNH!D#fo86jbmz$juW7LAfUAh$w)au`6V
z#@J;sDUpaqfyA3}JVfs}K~F|SK>&vZF#teH^S9#idbt$_!6ujYg%xHUz_H$w><VLp
zKGxfgFE$2bW@J<x&^nO*c&r`)P$_)_4E>1#vhjR(0w9O|L=K}$Da+xiQvC%<9+zUO
zT?q*P1Cu6l(G<u6KLrA6?-*(do9yJrQ!+2`V?shm#w3v^LJA(CRy;Yeo=6r2RRFCL
zjjPg>V#;OXB~M=RYKo_ViU|B}OPZ6;CB>5FrUo$IgEDaRzF|iMzo-&VOMGWMiHbo@
zsX~HJ&P<I-GER?<uV4>jlJO*hI)S;C=}^WD8b&9kX8<+YUAd!IFI>WA>ZC+)<EI4~
zRGF0Efi<Oa2g>Bk_;>=)8B3)SfKPm!ACYIIE(;OZ{-A{uvt4`&`jT(N6%#F|jx2X6
z%Bkrn=nQpjr&y%$q%59{rBLsy5?wXg{u|`^k<+Crs&sJM!~)1{#^Hf0^55!XsCb|K
zAn23<P7(d1Rir!P$#~OXa-~6ii0n0@5R|Idz{qm;_*ZP4`7o%O#u*?|sc?swcTc?o
z?dJk>Rhg;`SC;j2)tTz~;G(&Ej0w?;bCm0>?9T)y*!f_lx*sYm?7_QoPm4%}HAg1k
zlzO07PG@{_LpBKZkoyjoaaF1?Fbu4jB<o2%&!4K7aYHMh5<N10c=B6MUU)(QMGl}T
z6SdY3HC)7dUPjWU<7CV8WFJg{!GNqT#;?bfI2eii*sN_om|IMZPr(L6ZZ5Mp?zu8w
zi_&#8$6zOn^HLJcO)=8#P#%ajYPHoq0BBuJpnBdm$J%X!d6!*5J4t%iwa^Im6KZLk
z<^#996)q+BHEFg~0~W570Aq?6g{HAYRd!WBN(|)pf%Lw0q@HE|*fh7*Vi1N<b^`#3
zUub)qo?q?ww{Y*{y-(cw{vj=VN)Mm1iq<O4KhwPBE<ggNPIS>;#0q&CEJCWH_QAtX
zpWR99XBPl6yWv6`o=;Bxc53O3FL&r&16ueMJ^TuN4UkMOQYF_w3!+3z0SOpFk_8pK
z3s?uG<P)Z+rKAX!t7&wWs-tDW3)<NdP*5OFM-9d)ZHb~ZH5d*Sk~WHLBKA(H#v$QL
zexqKg%GPzmdL5KFe2YT5<Ts>kDNsfsK=}q)$Q>7NmOQA6N?!6-Mw^M+0%$bzw&S=#
zbz)4nK98q5p{3Fc;_`bS@qce+0-J1Bb(_ZR*16pUZuhg`)<Sdl<3_Fds21$|Qg|AC
zr9jSN_4RMvOwDmtCyy&Wrl}=cv)r_O@tD@st2gz6<q@D@{%rPSuA$h_m~XoGi|nal
zs3HHx*R9=8Tf4Q^eR}J@LI}@d=ZlLoxthGB)pitWJD%0H<t4qg<LlZNpVq#(bW*F`
zr`PT)kn>q}-3sFljIc$M@sZ6hz6DTgds}aNJ3F*+K70Nd;Tp;$Umb?4);6rS4P)W6
zmYrFT&NUXBcYxCnDz?A{A9o(b>-GXU*KQe<X*=M*dK!3~TJjUj9Jo?tJKpX@!21oH
zh-<h<7`aciq$I;!wdA*2jivPXas4?3k{zGV0E5phEaqHEm&nY4M|=l_Tp7Dc9Wdt>
zU2~oQh{8Fqio41<GH&Ncpw|u)z2=+qXI$@lGro)q^c1@gRMAf~Tlqz%0uL_3-3Cv5
z&ciavtr_p4r60^Tc+GIm(Li?{$#`To83p&%n+Ci<NE1rZR+U;8W@u>|s=ejG3`@N$
zvTm;TkN*F!z{2N%7fMTVD6`L-f#|tLxI&(MUM>52r(vz<0+|53v$%-ZFCK#eK9d5d
zdovIpsi+I>9c{rDaP$w)f9)z;9nW6@3;!zns5r9u2kr0B#%OvFZH#Kg(8b^?+jVi}
zGK2oD180uvAtEW|h>PlAR#A}mdFsx|&s9d1PO_zHFG3FrI%J?}YkTRLzj(hPC*0k3
zZ=3ash>O!B#FmK!<)hGmdX@*_uH>T@bIE6@AEcm+A%T(y+;&wyfX{~z97cdn6eu+G
z^GjZ$OG<Uug+yGm)K?VMf)~_+tdu-rDk+uNF&Q1wk}IytC-BBMA*W`h6$P2CtR$#j
zZiRR0jWh%y#cNPQ`47+#G&WGIt;>3g)girl=cD?ik%zz5BK>-#|7mr9p}K#$efOtr
zk7m9Y{<7isVeQ}<{oom`=d9jy_NxQ>IZ10D*W1Sn!7!Z9+IJ!(bX@HCv>V8$)%NPO
zz0i56A?sQ3Y!0MY9BdGk$w93tqBliW7=KNefV<uto7-Qk-&v^deJm7SxmY-aXR)Dm
zd24fie_p+tyqC<mi}hP`FBe;0$PE=6nsTR#wVOZYKH~EAcWds|<lt8f)q|H?v$a^e
zr5N0tyEy+^_7uAC2j|lZ>HLV^w)bghZy~g|xV0r$jlW`JbIwbLU5@lTZ2My7pN5|_
z{2{CzyPzMtpdG%bAHG-^{-r)VrA3l@Bw1+KwXy@Jva*Y*ZCLor6^09Rlt&o+@K*NL
za#L&m(r=nSX<k(I-Gf@wV9r;pZ^{qptzBAu*JJi+eJ{+i*!)87ocVj!+FsZ`y?A*^
z)q7sicE0lDfPQjV>liLv9@Q^TYaP>2s<*zEJGH#6Ie+kO`d)f*@S|I~TgC8>e6{&o
z*$lW4go>fI?5&j`)3_^lYpuFN(Szc3bqv4<U>Vy50o(&`&;y0ehpgQJ%Cg{IKzt2q
zSpvFJ$vB1k+Q=CWPa<b$;Kmj?Ykj^rn0KY2zR0A3?cp9!GvaA&BmnJ_(eybF7AuB*
zBGg!yafnhabuY`No70qSggWW&^^GbL4zk(lW-v~HHVQfxW`wOyDAWmGUA#Y>-+cG-
zy~~Agx3;DG@n&ty{=(r`wT4%rwBn@3t%nK+lqw}R`1~?T6%cbMA>>N{sGu_fLny7%
zEzd#qJ#6tZ0I>dGo9HbsB1fIoTFw@@MqpD14sTx8-@F2Ev^qft7!vf5P~Zd;F{J0n
zX*mKH`2_?!0ocP-=%&aBNO&y+H!!OCTX9ukm<mA$Q&(TCZ79}lE7pZq>UnS93bPRy
zVt0Chhb?fPb$Q1il=Y_}L7SlCP&~-YgxS$ALRL58gQi|#Z`uQcTwKI7fI2I`4b`v1
zUqMq2h*=%HeTHr&c(xAYWjxl6j!7QOO-SBx7m1F!@6?R?NPfaEdLcT-kt{_3Hi!?h
z6e(Gi#9<mZ7yc2jruQ4<D7ttdaYL9@%%BfwAILSxq9hv$iV@)&BrixNV^US8mA;;y
ziMTpBGu91`Y0m{ol~b|FnQL$KT$mk6$+5}3=U>0DQ5+4NY|tF&V?)U3tZ_@?Oyw|;
z0{BP4t>HT%7ERJWP<MD*ip9s{;HC@w_)IdULLvymUZB-@L7o6lE~$c-FQ|N6;b#;G
z>_H$5wWbhLdBF<Ep^hVNUmW`8(XkffxB^|_cw~x_0O3Lsr!z7-1{F@-Bm^yIY6b=d
zNWD%6y*o+NyR&y+1R`O^M2;ndsWDOD<9+<zcqC#W^Q)s>{E;sHa2J25i$B=KAL!y=
z>f-lz@x5LAzIQ1JOPtwD%#xD8B*-j;75@`%Ae>bZ(*6wI{+x`@ye;FCalw-{rj-q`
z%w-UrHuu0OsVn99oZIfOWWaT}!#s3X82oNz=#0CvEntEBjT}ZW$gwN2keGu*aAO5$
zT8YoW&OuOS-n~c~SAvYoiS)P&%)~a!OqAb&1`!Xj5~<bXmQy!q4g<*#oMaDR>~XNc
za|1Jf3$2xI03hQEp>y<naC&KQNy%-_4SoE^M{g|d&>D8>4Z8}pyR8~jWLO#%+JoYv
zQ^y1Y3MA2%A|l$7XfjJa$u{})1J_U4av<&G;B`?a3_S36FbpvjZ!=5nOM?$PZZlNE
zvocP{u`Ea;vMi`*3@g0~9AR)L(6FTk5WE`jWQj#g(WGX%d=e_GHOzkkk#pEVD*&Kz
z_y|3h4&>Fv!B1{jFDUkYTU5w{(HgS!)b#e@_LIcYCVIs%ope)DB3`OiMHqn8E_u@N
z=_n1kpo4Oy%pRNI5fWT>scg~sLnu?`0AP&8nYV1vxUkNJ3tSlDA`pTI6dQTCfL&^E
z3^2wQ@h6@?1hhl1>4#o}M=f+t51q?;mP1=|;^L;oO@&a87V6PMJy}nY5aICwn7bA}
zqKA)Q7^9^?&b6F{`<}>`J#ga;mD{a4=LRaL%U*?6@;HKubQwuqtwRjrPJ9Fn6yzDq
zmDT5l^oHG9s8bJh7P!toJBn!9$z=H|Q6!epmdf=a=G%S55bY^5n|E~n*21j<*G3ts
zO6IZGb_ClI>_C9(Uq)h9B=6)7d{=1+{R&?qKoeC#vjI56{H?fZ&=NurW?p)Eh4Fch
z60mr)e828pE$amr$pVnW183=ZfXs*>5(6Bp19=;dRrLKTb9j<Rbd9@2x8#+4l7HM5
z12F6s57;dnu~-5kd_Xc<MeG(70sd0miJ90n$(hvqFOYEbHO=)H$tBq^FGOJ)_U50#
zGy$6jza-&1RWx9_Q3iCA?+?0-lI~$zBU0A?Wgo!Ip$%ht4s4S3@@-b!*2-<29p|rL
z#_S47PM8?nh$j*}Rwi#r!ix-q`L#HHU4moWNX;Zf%zY#zaNR-DSO-`@N&ZS1wJT+_
ziX=VE^yTZurzqql9UHz!F}p!o=d{2ei8qZVmw>50^Q#<^tt58YEHA7vrXbCV{XkN+
z-0>J$Vv@&<PR%6L_;ezUWQQE2V+6Y9VC%_*pg3s@IYiPnLw*mxey=cf(q>)5%_R3R
z{+CjGViLIMP5Vbfoj1r^r3)yWG#_l18`^&5E-1DqY@Z}`8iifNv^|DYrs?A@%EyEP
z$6f+c&*F<?$P3gq{Dz37cVL6X&|@$UG;~mmto{Jt=Y8V?NjQ)5n=AOv#|7y&v?R!p
z!BiZMLZ93jAEr0w{Ho3F10BvfZ8y2iVc+>30pW@-i<~W?XL}f!&G;d83gef*H?yY(
ziT&q3PriLP9SM~6HkIT^vXXyNPy|(#<;(Cy!)r*n(k7EljGY{yp)<d+RJ4bFY;zm|
z>1(Um1{l!Bu<&t)t$-?hq>s6O>?xL(wcKH8|3}b9`3nHwLxQEwnPAzrxMwM@aYuCS
zNP#<2VFGMxFLd;2+xoJ><?6cJ;rv;xdY4|k3nY5r<>i)eKJ#e!@wNx=JbXuM>C;>K
zvac_P_beTL{Q8rfTKJ3}KJ#4N4?z){!uVgf`GEPrOVb!&?ZY!YAgG`E&};ex^WI9E
zy)xwM@Dza8?CA!3c-_ur4vqp}<SMQrIpE&f7fxPr+9S><z`0@0o%Pa>{MPyzbDoUH
z2m@_dm^L_VeUlcABFobVZTyuMn76fMe*$;48B96>_tcfZz2@{Jde-L{Vj|h%M^^Cp
z3G|>~R0X)G(A-DQrC9#JCr1~DKN+xItu>)M(1_URJlt3l0xR8_%cfR3G0&-uZbZaL
zDA@x+X2lr=+Xua+f9fp$1sW-B0B{!JK6)<ong+Y^g4$V}oPvtPIH9IG2ahEWQba~B
zk^2!G2N3bmU0!YMSCqt-EpP1nQh@49jFwlU%92EkoGUVk*<gQHaAO;BvsQRcZ?LJ!
z68$q&Q2qhH_eMYu_AXemnC#q6Kz68Tt`e-x;}HN_bD!Samp!-KwrA<(FFn7n(b`Vy
zZKt#6m)rJz(eOp$$p8r)j}^p8U6i#$ihfAZdR4tw)!JtCwiyU&)j;O}a)F{rJRsrY
z001rAr-%C>KE1WQP+LA%yx7KXVzrh#(9}cjT>TV)GqNkUI+9Hr#sTcHT@14hGxi;R
z)ifrVpV1qnuAKYZ^geu4eiQqC8Zal!Q1VO(>SW0ilj4arJ_#}So7dfS#OnW7=tUU<
zfU&#K!T^fdBg>FJQ14~5`VPImW6AR{q}3lT1P_DbvM~GG`uxZz?`TbX^rk(K2Z?G;
z@YUZjJ#?(V9ito^+}Kk<qEL4Mx95-Y8LnYP1iyj5@;3lr6Pdpi*Jkt?5bS4Oe5v@t
z_LWe`dlmwr1O}P<#yq#!k>8}(?Ov&|mC*9FaJ$b7yk>z~w-+e#JV2&Q@QZ|Gbav&l
z07^b}b{ech+gx$#rABIlQ}Y}5t7(JNt&NhOd_+S(C)tgU#KR(v6QKmA`RJG!7lA+r
z{it$4ejgq&exmFIxZ-A6w#Zc9Cch$6bDR8%OlM)^^BMDt!p7%{ua<?cBG-c4tKZ_=
z@5)8H-eOq{MsIw{@BWsSe&?)o-Nmvsz=9^=T3K-A)&d+8-hNju8ra9eO05O?)^G9c
Kck2`pGX5X!rD4+m

literal 0
HcmV?d00001

diff --git a/TTS/utils/__pycache__/synthesizer.cpython-311.pyc b/TTS/utils/__pycache__/synthesizer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9684e8e24c3411862a336be2de2dd86828b67193
GIT binary patch
literal 24501
zcmeHvTWlLymRRv2iKO@xDTxwAsU+$}*_PUm@k4FPFZpfD?v`w~WxHvbMcI@oQe8#4
zZ4K4!Y4jqRS!ahmankBu_YQ%0IvTVW@J9zQ5G>-4Uc}jaq>89OL?HqMEc#=EAfVAb
z$zYNIIp-FuSVhTh_vEqp*kbY4t@}9lb<Vlxo_lWn3x~r-!F6M5dg|6Ciuzl8QLdV@
z=ez$5o=+);Vhl-YQhpjH4e)ME8fQ$CCW9Pio~$8hYA0*Svu?5uo~C5ojAhb7(wLLh
z8QY|d_-m5&GxkY4@z*9DGtNmD@z*8YGoDEg@mrD&Gu}z>jBnC6(>U2k!mP=rndZr6
z;<qLJGcA)X#9y0i1sH*{zYY9B;;&D(&vZ<77$_rkjAHB`Q;Z{SnWmLSerckpui!6V
zlbwuVoC-Vt4qwN@2FZLn!DZlSOr^lBPo|?xB$iH1C2qj0?Ls2M9ai4zxp-!7HZqfD
z;z`Vjkztc$<?g03H{)D_lbjhgn&PJ7Y~*%25sORq44asVaEW9*6^la_=fOE9kv_+!
zV{wj4vk+q8qPOFb_oKJL-;zjW;_PfX8O_82Kn%)ZBZ<`PTqahoJ$VGnFW)^hP}Dzy
zaf?$pR+C06^|Bwci804bObz^N;a>-T3;eC{x52+2{&x5~;O~UL3;yo7nei~jskTLG
z%D^;y?3k>H*D~I?Y0Aj>J~n>|mH0}n72pBpZ`9<oFijtuCoKe?0;`s3hA?YYm><Gy
zYW<Y77N!Nl>zUS%sYyE%0LX1R=s}2eFrGNnw?(Z7)U6%DUCd^t17h5j7>0)2*q&OZ
z^W&OH57PzjAq>-}gZOTM(6GKInJr8&r0;`&Km5bkKUH~c%mBdlE>d?4lRm(*L8*Sc
zj2rWDR-(p^0)}L`NbF`jc55~b!!5bUJHfEXY&3II@?Zcr8;{<Kvs`2fP)BOOpky?4
zV=j6lu7oN0p@HR8!w5X70kDqXkT#7PCZ}=Sj%ll7#aB7TO7J|wBv{Exu$g!*b6h+U
zn`5F<Eeq7ZreaRD_bCb)3X<A){}tSSYWS4O0E#M?9%b^H7$pTo>C+gftRYK9j7ogg
z2yZGWXHD>CR^!a@rZ6-rTLW*kYFsV6)v0fF@MclpEbwMk->mRP7$$~fZSYpF#yQ~4
zuDmf=Gl%X??Fka+(#5%zw*&?GJjxqM*`U5*`@G5<N$FGG7>sLF-m+A_N%3o+jOk10
z*RRyRg*?qld{$TgtR3R~Y8*AKwv6r1Y*WH1YPwbN=L3pQ`_zvZXFEwTRm|ZdL)O3!
zLXF$B`4m5lurce<GiexiYFc4))O5R&mZ_-_G}OLB3D4FL__Xq_eF7%bZU|q+hu45_
zCGb17a20>n3RtpH{Z+itz^{Et{p;E5OkKAAHpLD>KXoavwa;ullme9Sl}ZgiwwG|?
z1Lz?wRK@=zpueOI2%Su=mbn0s;jwrs&PJ1T>DRfZ^u)wCP3E?#Xe>@orCE4`NfIZ;
zLHb07&H%IH=qL?*j*g}n@Rd24jV@n=7m+w+vYn69z$N3coAeZ$o}pm^rf1po?F18N
z=uG@hX3(Z)o`5(Eq7xj=B;+Jeoivn_gvp;yPtjZ)re3HHN6&GI)D8OD>|O3UbFHdG
z6=J#Mbc&8gp$1BB@XSV8n7xyCHN~HpqVJ~X=$kOn)3G?4i6&B5^;iPBCw3Fk$6(gy
zpi3|rOKQ5A<VeM_EosP^Vv=#SaA@ibeLW7{s5BXCj2%E42^ln#j;0<HXGhNtLlvRF
zAuSibF@ud}>FZGr%7M0!V5kC&kuuv<I`JTTgBw=sfWty&E_EiO`?(CenNH7U66sVl
z86Ku_I>YLdn!(7*V~8`_;CeWf8SsYz$rBX49+Eo$y=JY->9sYZN8?k`xg=B<YCe`u
zRn=83fz<caRaKyCtNR1s)yera8&H-L=yJZ7Hso4Y*R{>MPFG8MjfPa0^3n#aFGeR_
z{7`p+#7qP1+dvI~LqDQ1s?hQyOZX8DQN$j)|9U!|)ZuwN6}_H}4>1W2pEL?bgSv4(
z8clK<JQ`z>g-gbGYQE?2`O$;OmQv{q9lae*B%{~kGATmf4Dgl&V4zgbMQ3J{@d&aO
zsUba=fh9pi8&)?JO|V@2y>Pcw!_CcttZ9kB${~@7L|BwASQIK)lqFacAy||hSQHFc
zlmJ+ql~|nTS)AfooNpy7k~bk)$)RAq{Az*jmVK@=W<+M9DG*IrSnp_rSIMn#5jEbC
znwzOevXTRnD~UMC&O~m<W0^DylID!$jNFJrD-|{+nZV1UP$1PnlGshjOvF%of{T=U
z4n$>Zge>3yu4F^t@(4>cQCxSiIQ2;mIZ3%q&`~7E?U>Xg`^sGz!D)qvybUT$H7x9s
z%v?M<^+$9r3PGe9<!+8dV#z4SMIsyujOFRB{J8grgG1*zAcvv5QSRo@Xq>y1NzV=)
zPR~r;J%3^-HpgY4jwz7wxgnTihvqVgBsWx<A=$y%yKFCjo%d;{1O+v4#JP4r#^2wg
zes|hHS)D(xyI;3r@fR$9!O|*PT6s%rv3>KRS+oR-9fSC6D|T+fXM3@02R=L0r;<(y
zSKfmIn69<h(zR&5?=1RhcsSNtg5)jODM$L-Ftpea!q>K9TRT2migY(VgT)|uZ!HEp
z<m7FT8{@srE8gCMw>O^=yqiTY)bzfkT!K<wz3cwI5@oPnFsyq04~AE~0|oDZoSL_v
z$9wR+U_UQ{$Gds0zNJJNt)~n{yZe`8zZhE@S>FHjxL`jb+K=$|BgNi<M`^M5&8KI?
z-jj=?qCK=$AH)o2F~f?zy<l(WJGMR<dA3`ypBC+>dHZR|BlhkQd-p$ch`nRwOn%Jt
z7AfV-FU~A&S#Eg(rR)>!`*{04EF~rO4)brG6?@N>bNEVBEwrKN@-Lm>T|ICY-M$s~
zrh<FZ@`)#R1or{aeSmi#C=Luh9ufzzw1KglS#<aD<X*$neFb;lvh~Ss!M#^>@8#Wl
zmDK!!331@OCV9z$rO9P}2WUb`W~9@Yy0CyCTzZ+{|J@)s`la5qnl1}M;bWs-aD|nz
zY5p-#cUcgTMK6s+f)XN14L1_E<0(}Z(TFnZg%PYB;r<|aWTGqrR~QS-DGab*b^3do
ztXr#unai2Gv+=9L^ktx>7@Htc%vCkZMA#@<6nIiiEE$icAgG2&iIR0(VNziOdjL9v
z!{*cM2)OJU;LNw`$WO=DW-&c7V4y)C<!-$H;j;P3DDT=Mxb}#yJ-lTPyB}gDr#i_I
zJwZ%e_(G&Q68{li^dpfqu3HeQW0O&r%dpg?Dr5H8ur3ddGr1t0>d$NIpd`>i${O?|
z4@(vk8_>bf)IkNYP7S~qVQ~TfuqpZ(RR1(6=?H=ly~H}1K{M7{2Wd62EPu)~LPY}I
z1}hMlC<!<@g}R=oPP%;^u=nGM8#gmt1=@~5`b-++e8gTET3G--cJ4f#n4*>WUX~dc
zO-<LM^m^H;N~kV3NR<^U)xi0qGW`*RQHVn;m1}!s194g}^FZNMk`8GMB&wAATZ2?9
z_u5#vmc^kZ5{2ZHHK&T;z=k14a*>I*qMKs3LulB{?!|0G8aawSltwtjG;EYiQ+wd8
zYDz0l;#o{Q0}fDA8`avAe^+SPA-2F&b>F$_?_Fjdr+EK9!M{)R?;|sg#eUzhV(BSZ
zdh*Pp)RP&(GAdd|dCO?A-iy;lCouN=XJpnc*lE#D^U7T^K{QE2C^V+h1r(1>C%h5V
z;#Cx>SD1hrV?%nZP??r8*U_a)fsjh|=QpG&Eu5@@aAl}rMKey+2`Y@~T4({ev^C5c
z3H7R}@~RR=NsUi^3qeP30_uf-*gXHQRB9}XS&Ed8YxpRv&RHS=YM0L!<21-eb2D)o
zs1&5)3nxIEfLekIjonJ6-{<I?>GyHbhL`|JQ9P@t22{XGPFOnwtSIshQ1uT^bFkpR
z1tl)S5wMz}g4`3oHA>D3x)RyY9KRh+j)g5Ob{D%79n>sJZX$dW5mGLJ?Ze<^f~Jau
zS%Jl#z?=mCO)~zsVE{pWL?adRhv8+uOE;V7Kq^9O4$^Ty1qbNBMg@94+x!rPBiCw6
zP;41o&OH85XxT5e>_@sN0u4A<EQ19LiFkgOkHmP(pkQG{3&UHOVr%=uL9ulR^Ovcj
z3#npk#oke{cL?@Q(cZ}`H&R6xQbpH#s=!Xd=6nYmL{(IgQPo<Ya#a0!Oug<x?A6aJ
zdb(IJ=~crSRTT&G+<FD62Gc88zcFS5Tmxf#Y+9$Tl<7i+lvmZu+RA?kFO@wLpbIn5
z1^mM`^UEq-psq|a5B(5L{J$oG`Fah-Ot5F5Nn>FvVfB(R%}HjmiD90G%?&~hl1pa&
zI2(~>5kg>3K|US7SMi;YLKVM%6H?E&ug7`>s$l#>NW<L&hhTg|@Qb$}Ui|fy&#x?p
zpIjA!hsEGw!80m)Mi*;~t+)gMQs`f95?bC6Ti(!6L2tp*o4@@u@f<L}SFntWmT}%P
z{^QYJWsy)Lbu#$FI<$Y4c&268jG>}AX4`Zz#!AdRmsX?$Jv@~6l{&qAWXNdPwHC(X
z8huJ&`D*<kojRRxKF}YlNDeJiuWHnWTq=Z@kdj8=1fr^aT(^#>Ol*q|&TA2sw!ZA4
z3@Sd#mlhUd(K8U#yH{#Ft?mHjHLOE{Lxg3uPavPVa{|l-7?yTQRi-{N{aK^`8U|xM
zrE8?X|L^5h6vFWOiAHQz^n+zBkTQi5a@VwNy@X3dR8@R^g@mf6sm#V$iN6mKSF)p6
zJbM#V!!vQvGReLK?C*}vZ$3wY@kLSUG4Ux-;ekLRixtS-%M6q~4hS{zTLXI*p6mcP
zVTUX-qZk3E2hrqQ{0PgYS@r^^KmmfN>!bnDih*WK-Xo4Enp|YOWp-HUmkOhm>TX84
zXePswq9i*JP|A6U0#K2>EBe9>B=4U<CFUD9<iyJm#W?}NfdRKuf!=&tXx%Bc?v%v_
z*ZuuVBVzpkN(N^PD1bc8ibJRQp^4RoHa>VtXgDo4puXhWmi{N^uU&l00ior9*m3}Q
z^P0;mx_U9|2+^~=S+KvUs!P^n@r5w%%%cyVI-gGv*8R3<d7HPq{VLYIR__w)iLf$j
z_|5xJsSo)5=Y+nu#J;z%uvt{l+#eI|11t7j1^ce2W^wmf{;l)kTMR#y66|Twp5~Ps
z<(^s4m{hy66r?g?=p<x}L<;%_JXNc84O6U^aoFS9&2(TpD$I(!#8hco*f%y#r(Ip>
zSp5btE9@Vp1(o^h7@K+it?{xpN7;!W*jSOj0VB&&jg0+d6Ob?nj($G@`$P&4Dd*<~
zB^!y3fG&rV0@cVaLm>;>v2Y!G6?#&ZzOR8#YOPK#gTgw=3hyYU$9WSJHhbZrRoGmC
z_xYVV6n|}U`!@(H1I|6_+xmvwjtBemr-k~hV*S<<Wwu^2{L34NQD;SaFW-OUg~_<f
z_B{oT99gQR`Zl3{@qpNSnW+7WUA>Qn#IC(!*E{&$!IQgWMNs8D1gz|r^+e@vBZeb(
zx!rPS=`=>FNaBYPufoCC)EF_f49LO>FoV!wqAIUHGW;1DNFc70ODBVCa;y_PR15>S
zs{ge_2-9P8_4HfTHdWxfmI%SrfLsp$a9#8_&^_vet!(I#Ue!!&+DThUBlme_y3(qd
z^hRKW6i|#WXRYJ`eUqx1Qs9!ZD|A&|%>POKlZ-rMQH*=LKT7$eMZ8zc>+<H~`i<Nl
zuX&Q{VB7F|nu{NuU?&0N#(qncmnHH(7s_btE^s7Uxho^lS;;FePLvQ*YqlSgu3MnQ
zFqA=OgK2R$gwMC=cm`RPmjVBKi01woI50J~QXK=!n}zmWV*4)K#0adm_VcY1a6f4T
z=V`Y%d`27|mxIVUCHTe@pSX2Y2p$oGN96qviyd}URxASr%fRvuanr~%8%#^QWk9f8
z5G@yY%Z2i+*9+`Mw70L=Lj`+Cu=j}e9$vX|!Xk5OuRKMmi*BeVk};X0P!#zTRl4w$
zue>@%C?WU+MK6R`)*3)OhWX|3*(B&}P>Wx-zd)l4#aIHgrmB8@1JMxeHOgp5Odu5q
zG@v5dJHQ^nOjK*-v7uTYjXN(x4$$ncGxewtTG+^Lg+@qq<=sYj$5}x#qd^6S9aGjk
zN<I#o1Y4__Gzej3n~`~)!No=?{uV%RsM5Sgl^Q6w@BW<?S4Y9skv9vj9uc;4EIl&!
zHi0E5I{^uG+#2~88IJz}9zD0N@ZRbnV0*KMymn9_uY7$8kBS~5kC4}0`ZNoMjA|l>
zgPl<qNTBV5GAc$%tS_S)#epVAU2s;{4j`&Va0n4i`=-@0vL@JrfVtx<_{$e)OJx1O
z1MqAdO=*y2LT{lR45$uKmnZX)S0EK6r~3&?zyMwahNjQ2U<j!6=HeQqJ}xy-c2Mw9
z-hfx_ThM}6SDOlAk(`Oycm%T0{fF*m;@pX|I^%(($tc+F099ldOoNTOK}@GIuxP;5
z0@j)EVB8erZpM>IdKMs7$^f}N1${)YhV25MWWfYj<XG6oqA8zbg-vp>ZvvBJ$rPKN
zlWJfee)g_pOMtNxmx+Sm6YTE+sel#qENqsuZ1gUR!$@|_m4!j26tZW9Lxb&;Rq~ix
zuaZ{`oBs>MC@T2|%I<-Mz;>a&L#*#WVY$25(D-TJulha-KL~$1^s6DEAtW}077wr1
zH|Anr>=5c{v7W|+^lF3eLI2YBPqsYR0?`(CZhR?&jPW=>ewnvi5iD0k%N6j*cUc(X
zh&dD5SA#tOh?1~w0>4gGkL@7Dg(UxaybiQBI;2O1<2dzO!x%7*FhG&qvcW9O&Shg3
zr4R2wEPEG|*kmli>QrgFvJgGrrkzZxq5V%t!J+lzJ*wE)^5E)9<Mu-1_9rKw*@VVZ
zV&kbrThZ0RTUtmbOt48T&#uVCspw2xawwab5%LO#9LUyGjuz9<H-$rsYLM^#6Hsgw
z_km#fs^S;*H>d@B*1&nPb(|}s;V`hO(M=S@ICOW`aKo_R$XeNModT;$62%>gjk(o2
z?&8qG0mDgMnYbg7wSlo^SJuX8tjD$TYSy;#E=sk5EoyNWobY#P<yEdVqniH`b&)D}
zR(Kp!qemk4zr*gzFEXl$FXS_3nw0)s$7koDJXP^Dt@2!^_7>s!TwE9LRN$}&A$)_{
z<TdqG(qau5GgJ52qL-@|oSDu_nrW4fXPx?fhP?3m6XTRdMY-Ti{qv1$FpY`_a%HvY
zU8z6Kpz5Q1{ZQVj8d&AtV0UGZE?*NyUNZ(tp6Xq#s&Z@<CG3JLwJmD|%r{qIe!V`=
zM+U^4R2z(XE3`_T>F#-T^kUB%{~S)Vd{MP;L2|nxpNb*e_Lt!t<J4nX&cK2qSAzwr
ziFnTNGcb;SC0_lvmyU)Cr81KG7BfFQ8XLg)-y98h>Mvjwct=N9rdQBa%!=Y}g+ex{
zQC7n<7Tl>+h{~XqNBQce*nJBg-59#ECKy8;tyjX$SCB{n22VX_1vs8Bs@3(LkAO}W
zJRf-8H@r{XF<qkGH)Ks!*w>&R3)l?Ma%C^6p>I8wWz9dkpsTkkqy6mU%kWLZ;~Mpt
zNAyvZK2Tv*4jKHcW8=~`K75d^+Ts9C+_caD5ZaUuWXyBaf;a2sHf3-ZseEZu<Z(T>
z?$Y<VzVD{hg9oH6TFJ~BnPv#7%Ne=3tYehQ8Ly@nd@t)I#$VMpJw`p|sXUhH%_6<3
z+@8qFD!<4&v*xUcX?a+ewV~E!p)u8;Iil1_Kl@a{p{CWkXI<o=gcqQ>AGdx9^Z!?>
z46x9cZDax!<XmXVdUFPD4}`W=LYbh(ui=LqA2`tWZ@#-y--@p)O;07J;?t*DXx3p*
zuaYjWnL(5==$Yo}e&wxdgw=2YQJp*T14?@B^W|_B{0tl@n%<<OX4*A<)KT$oT<hu0
zO1^oJzit>8TCy#SwUP_?DWPF7mQ%N*D2k!=Z81=p%L*K5b4#`g?-u;2z6>f*%h&Xf
z;)9erxhb#Va!MK~&!$hc;D^?z^q0ql^|~wT&*Qpe-IcB4QdN+fboE4Y336qd^y%2`
z*`^AO>hh?R%T(nl_atC8Y;rce4n86kkhHIZmrVe3hf-hdvkHHkm`><DmC~nizlB^N
zt!~~phUu(e1lyXKQF6doH*JhpdcFhgu<N9ws-Bq#*%D-VJ7c?HV7lf3)8vqx@&0=`
zW9m;~=4pjLTJ_Z`W&cs~?_Xps?1FBdw!C7VzNzbtihMh&YqL7T<aa4`(>}8nJ?1bP
zzO~T$5-GQ}D$hnzZfk1S#`UPkMp;W`+!g|<A2;vS>FWS>eXr6kZOPhR0za*!%GLqL
zcV@lWK(_U9=u7CQuhhOmeqZMCTY$%F`s5#Y`9JV-1%s65|7Bjj^^fM|f1iBZuHCDh
zR=TQ6me1DDgN(J%rdv_j^^~op-^y&jXk-VJ4Hkkr37|XM_UF_W>$E4jG(Fk2Y!LHq
zlrn1v1%BUL8wi=|0nVA|jYMpaetRxOM?ndx8lK5VHWj61`B3H`q1&%$G*PRl)m9O`
zt4?cLj-?ZfypaqC_7Yo^K(dR&>5pJ_b?L$D)L-l2E7AG_)X>){L1_T$Z~M=u;6NMh
zLu{sr`Wbg5uIgGu^vh*iait-c6}7%)EB9nN1sfmwcCA05$Yt)rNikBYe5OHH42i9%
zy%GAc@rl=iNluscZvd2h^g=l_K~B@Eb-)4+M`xq4M21bLeyAKaUOpB4()z0@^lhj<
z2k`m@DZ_+nP<o@rDT4v12Iqv=m;XaWRcTf?I6p{I5PH&Wbm_Lrbms{t*g1{<AE{+v
z;MV(SC^1Y&873j0z@;Gotfo`)QPx?op;iv#swSotnt1u<8Sa$4>{!hF+$A{JtQ=F8
zO`YgCm~(*r(=|kwYji)^wzvyH@O_jp4d@W1uc^I=<6KTtJ%nMbE$jo6jyaruZ!ST$
zLk^xhF*x5&kI)miK}fm|cS>}fMUK~vNj4nQ2yE0P7|DDGEKrWZk)3)xfhZrH<RmM6
zt%XU?fW6k=!9s^3=Q8JiNnh@~3a3WWw>a1{ycH)$v7=xwE}smK(lhJ1%^>~qj)3+m
zNS0vkTq}2h5=1(n+|rFf&%a43bSA-(lj-G*oW^9G@D9??@g$hf<L;<}wewd7;XpH3
zpvk*cB#~@4Cg+DEDY8}lhGk=2+4u||Xi@Cg6u{7wL7K${>iq7FIt)inftiFF&B0-J
z!ZzUyP5DbE^nABsC0DkNQ_pd#XSV>O|6mxLUsQ{z49P1>c;#RyT%7EDsKcRftZNF|
zZCv^J1G>RbVI`O!7!V~TJ%4@Oh$wM@sAVYYhDFzf8w`sA0wydN7s;j=1x9CC7BL&w
z%94#!KSl#KQ%MXzA&1K-frJ9q5pWC}&o*$9MLCZP2awAt0A1NbkSA=DYGi8(1S8cR
zK09{w#4%zT!Tth6tm-@j#gfiNejvHfvf~Dum;#)PBoj02eN0@hR3B<UHZ!F<d3TJH
z9HbQ7%!D(~9I@aao1F7qzyp9cmA{vSIA5_blT!~$E+!q(et{|B20aj%h5Q*5%Kj1^
z)cr{g9X}%VC-q`~g|WU$tkMh6PYsn&rI+S?P^I#>ok)EWGx1@1z7ay-jz^PQGVo5r
z7ck)T@-WTb0NCL@U}6BFEGlT=G$9x^WLR9l%jcX;v*}s+&^EDeLmd_|-el3j49-J?
zEhpM%kmg7p>{+EM<?jaJcIONnRgA=EuE$|wNZnv>V}|$9`G^34FH^B|=&O}?z2)=a
zNY|3(RQxWnKb6e4;&(ZA5d#CU^bCwdyaH0Tz>g&pv&3>nGN*8$ZTKOKnK*vgLaJrr
zw*jeS?D#oAVR5O6!5KK=E+Z(1Ax1FCsb}ITH0?+NSC$MD>`jcYszI_fPZt8gHNU*Q
zU0dGUw$7!v_vYep$Z1u-Fd`YD!*O*C<{fgRj71>&Qc@Fkk=i?4<oaF07FdwWsc<{7
z0+9*j11#)AbpA0qc)o`H1f5U8k(w&lpx_8jKI}b=td-|xq=P7%iQs@sW*{UuECCy?
zoa9lmsP&Lh@Sk92LmW<imOlk1ccWBGXefyX;#2GcOk@FQ#12Ao&%~3F*)*3)$ri$~
z%He$?T);MPEDG|WjD}KReZ4D{SVGWt_^1kgtW2)9jhv$=ehx_oWsjF%#4<oK0bS0!
zwA<MbV1W6S`x?*yETV>~rq&0SKDqqhGVWV@7R_MpTcS)>=h7MS00ZHYk@6fjg2ANM
z2?mp1Cmf%4I7=4F?k-U^R)5jw&+S|sExOt<V%Ms>k#E{5xOa)}UA%i&u{p5P{6?V}
z%&3IsonrIOoO#vm$#wty)csS7r@-pc-n`Vy*VAwpgQ5H>G5E&f=>4&xz2TQ9e{nMR
zQ^6jBlis{NRBWSRQyq5Pp`K_t42S<(tqs4qxIFRrU2)5)=U0TDNwH^=gce<a66JM-
ze`8zT`&V9p-Y3#<3>^ZVG2f)cqnG(eyl^zmAC1Eq_6Pe#?~W2>@mw;j^>2Rc6#I{^
z^j|9UUn=^$pL9R#DVeD7q~Qf+3^zgR!%e)e=XbSKc++DGzx9yNe^~54{A~1jC;pxt
z6Z=m?JlxPQFduCfJNJv7C!g;XJKxEj%$@w!<I7Dvcvo!PE;fz`p8cX{|Ff+H&nWL1
zT@7~TyB`gTT?fRjQ!8C>7rNdSx+cV~2_bl144&sNToi-v<ZL-xu@PEc>j*DzB9GN3
zx@0sozGrx0GBr1RPk{sD=4mL|sGczI?u2^{d&BH#$eSNId3t9#2W#Npeiv*z8d<cP
zR^4hacB(nBSWkK!Y_;304Xbu<ZuG$z-@Q|??-K31UYH;d4!A#XitZu7{U)HpgR`P9
zynLbH+s^y8f9pb1+!4{cP4w;)TqB}u<XLCIb(nV@UTyEn?|3vKh7O6LGb^EqLTEw=
zT@XVTg!YSK`$hhpOJe(Eu0B`4<^kODIU0U5ygc()<3i}57&=G-O2#@*5Dj&q;|_{l
zr=MRHyDsNW<xc&6wHYj2YCQ0P0L58t?JgOOE$0ly?yV&gdH`R+T?=$P91#P<V&KS1
z;AA0iQV5(D1E)(hl_YY)7bbH6Z~z=Y{<a`k0yhSK4_C<qMc~s(HHAu)&C&3(LHQl@
zW+x8U9Rq)n!EV1pI$v?CJKsq;{D}H;?+)_r;Z=9@lKG)i^zVFX7u-ig_YuHP&zt4`
z9~9ktc=wSty8Dq+q(@fh$pSseU!D@^8zOy!w+Gi++83QgPowAw<uklz6P(9#H9|eC
zCk+Gv#jYJIUHc1N`vvUIgF^6-i1&%bF~9)%Qx4rgri){1uC^CoxNYqP`Yus>9NjO_
z$(dHYtxG!|jx6sLyt_s3?h@tqG!@%-Ein&MU~*ZatQ}3oAPt|Ic=%y4G*k@rJsN>;
zS`@p7ik*Wc7sQezB{vo5EP0gI+_BuT-}$MI{$hLA!<3r8v*%H;7}`?o+JtWZpbV(g
zSl`j~f&%AzbaKZ^eyS~$|H;FVoTb<td_gsM-Zd2a2AA)OeS4k`iG3%ZZx#E_tn^J3
z`X+?F3u51emA=VB-=xrYMeMt>(s#YkcU|aX#6E`ib+3Vu_y=O&?v=jLLf<HV<h*#~
zqR{t_i1#GMclSR!yVAX{(7lfz852j&3Egjr-EZa207GbLE&AI;|EA?*qJPJetD^rf
zu%ncC^rCn)Cir8e8dG;u?lj<*zdL{M!H3J8h2~9s^QKjQkZ(UE_zyz{-hX(t|KPLD
z{JE<_|GQ%UyGxEG$G81k9^2uZ9pObQ{bvgOXP!re{&Qmgxg`gbOLvPMTUR>XEOfxP
zGgkJVF6=%1{9R%16>;wsq2sF9aW!`wzs|8gzgzw?2jGKe@QMHHj=w(g&1wFv3&Q9{
zar9zg>qQ>!Yg6DBJNkZ|{XF~G2M<5U9fwZE9B*LY2cLf+bPS0dLl7JYtpqj|0-J=u
z7BR3TXMvjddX~+PU1I+}-i!CM?r(;~LqO*V@z6B9uXb!&KK_LHIwkHrCUhJZJC5g$
z17?3=UV7&<=R>F9?#-Ej7M46qrlt62^@2N;Gk;=!!5IMcN_Py@rXk+m2RH?X#EW=n
zrKd!>9Zi4P{KWTlySNn&k6abP?}}mIdJtUnhD(&o)AX0q{GPM_;jIZ_@Vq#9o&*6-
zKlo5=2JE!~V_n-hj5V5A*?Ft5^H#BK8y+2K#iIkQYpsFrYmB>synj2)3A^@ueUslm
zDeSx~?z{}sRlXB{OUJ&)^pFH@;L308c>IAlbVeL{hrg5-hh};IX1Le3?fiOB+;(nd
zTe7e%S@icnA=`1n*$xxV5CCuPDZ)gZB2_E41Rn;A?VE~$o?^>Dk?t+_>?wvuN?u#D
z^92RY_vkDhD>YI6j>Y3eZwp|(W71Fz4J>bcG`tepQ3&l2LOaFK&Xv$`Av7$6_KKms
zE1^S$&><l-DuzaRH@$|$DTa2egbox!2l#_;i3fr8PKbD4z<A&?t`&Muf!@OppA?5r
z3-lS0K9fU~YixpPK=k!3*NMJ2o&-hTei-XP@zA(<C?fbGfa-KZ&JL*Vqw@y9ylBDO
z$9wx$eJy<Jh~V1~8F=6R)$V;ylh4lx-IHSXWbRDv%(va)N2i|zh3@@g_x_dc6NT;*
z&pU<g(_;5&LSbz*VA@JxS0S+LsbgjL@xt!o&)bFF?})qK5dxRQz@?mJ)#bbY;nE$!
z)i1jG0dE}x%kMmn{B_MY_UGq?1LNX>@xqpIe#<zZ@2~fMzW1{c!Z*C|Wg3HHGX{=)
zJ|YA*iGfYPPP~B?Z>Zo63Em#j+p{<d<K%C9aQBm+KKN;VQfMApw68Y$mwcaGdT=T4
zd(<Iz?tHQ!b{-KLk1X0&Ee_tfJ%0rpxCP4&(XxZL>>$Ah^T{WZLhpXwc@S>Fa!9ls
z;w^^&sTb=x{Akp_-PU~4M*WS=cFJS=8{h6zcGJJJ*MJ`mv3Vf+$2((3*`Gp2Sf1(g
zkKr{DZp2So5sgknvPJML@f^sp5y>8jya&n!IfnfLAV_BXL>Kw)5s15)XeJSZg~-h`
zL%zkr?!nsOIeHciLRj2kWv`%vM*J*psFNj_WWvQ5?0r%!8f3EHfCJx-lE2mz12f!6
z1P%}FgHUpK=zwen>&B$$;NBiL3ebLk4`f{V2UywBS%kYYRP5@;yPYn(!H!}}fZV}i
zOM59eVW`2+?~uouKTxWX18S+(V5v^_g8<M`vdVrN6{Jh`vfqw=hwOJ!Eo~*2>~~|h
zNA@>hxL5Z3D1hH6`<p0VbE#SO`_bPb`&-c;ko|3xw^^xRJ7sg0I^=M<vgJB&{74$9
z+J=(RS~I*f4zp~{aK(9Hf~b$LkjPR~LrrHsR-)kXbUS%G?<Nm^{33Z)JN>qrt|um7
zmNi{ZGvx8yOdi#aA5ttImJ@==4R6mztJ7d;$seqlH7o;2d_IM`;Pd%}{NyiykdFZy
zxFfYScsNVrx*KYG5fVJ)*7VA)=`FVg4q-r)#_6|0hS4Y9$0x9VAV%*rIcly#3Ft13
z=DiOg75bN<OYw;fs-)2Q+l@7?%g~|lcnX~W4_G&NzX1T?t#<5GV>5h+3%<DpjI`JQ
zvsE+je~_Ett6ZhVJEj_632e=3d}~fmsfN756wz9$BVLdo0wpW)`l#0Sa(X|Y%VO+)
zOR0s#knA#$SG&ZHnUGbuiycQ2`y3pp7QWepIt@azs6ddl8>8q$^AgsKscfL`9+b~X
z$)70*U;wN^%YoeBZTMP9`Ac!+J06l#NkNoP99xILE+8<kK89qKzxdIt#E_4$4JxWO
zj_5jwK7l0~2N$N3)5ssCvm;#8ZEHgZiGe;Sukdr?pbp^(Cz2eP>~?IhT(6UuwSk1m
zDgq@Ufe{_r2)RVEOP7d6$Qx0hNVR7OCndd0lu~kMj{t-}SoX?nc7!d$E2_x3E8xP(
zA%mevS?`fwk*dE(enqO8*WYVY1Fzgg%6^ahG#(#M>FzbEi`U;pY8$V=i_{)oe;284
zUVp=^Za8h=sZ%ezi_`$Gzl+pQc>P_ZcJcbVNFC($cab{6>+e$SwK@Y(#>Q^0>p}Ph
MiTnN~DM;`CFRQ38X#fBK

literal 0
HcmV?d00001

diff --git a/TTS/utils/audio/__init__.py b/TTS/utils/audio/__init__.py
new file mode 100644
index 0000000..f18f221
--- /dev/null
+++ b/TTS/utils/audio/__init__.py
@@ -0,0 +1 @@
+from TTS.utils.audio.processor import AudioProcessor
diff --git a/TTS/utils/audio/__pycache__/__init__.cpython-311.pyc b/TTS/utils/audio/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1060a4383481e54e8a358e7c1d7e63c767a8293a
GIT binary patch
literal 255
zcmZ3^%ge<81nFs6X{JE>F^B^LOi;#WAs}NqLkdF*V-7<uV-zDJLkd$cgC_G!kOED{
zTYQeCDVg~JMfu68#l`tWews|TBtt@i^-4=JbBgs6L6Uj}Fxetzpr(}!pFw(mx#))$
zrxq3KS0)x`=)0sAXP4v`=sV}<rd5V|>L-^Lm*nTh=jErQ7V87e(+8WU4>nIfK0Y%q
uvm`!Vub}c5hfQvNN@-52T@eS+P(~mwmIV?Ym>C%vZ!nl#z=n$0fGPmOz(z6v

literal 0
HcmV?d00001

diff --git a/TTS/utils/audio/__pycache__/numpy_transforms.cpython-311.pyc b/TTS/utils/audio/__pycache__/numpy_transforms.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1856323875aff393dc5fe0f5566d47dc4b045c47
GIT binary patch
literal 24021
zcmc(Hd2kz7nqT82hz59xq$o<1T9ho25)V;_MNyY2$vSLVV~aXGvNaUQ20=(5Ky`yU
zI8e;-dJ<B_Sa4RZfhTenQ|p<b%9;&RHI>qCW)khjTjf-o3R*#8E;dS(Y)z*8&juUs
zq*9YqCcp2!KF}bgk!L5hP2l0{*YEt^cfId@%^wDXegRix<l@L5?Gc2(pc~^_rUPGY
z@Cw3r1X++BF=5Pc&fy@~8FP-g&bb)uin-6Zk>`qe&Ux8y#kmUh>pNG8Uw6zs=0E2j
z6VHh#?=t^9cRaE;9FQyU_2FBIuODAA?7bK?hA|S51MhgxRq3^YcU*E6%2eY^b;YnN
z90&(SoGXk9!iZ0<L5*s;_8s9|ja-Mc7PXhj^>PDhH<q?8FAvwrO^f>U$t%#pvaejL
z8MW%=mGY`Pj<SAQ(2spyUU@Z2H=vJ|@|ru&XDBbBd?U)QMeD7$S*)|&mG`qA^_HWa
zOMVXhY_OGmdd-cf*@Sv+=&{{a&)$by-h`4XSRb3w$J5Jpplq|;DR(WJNB0s~+Je$6
z(Q>EUvuN~7mEVf;tJp|r#@poW@(%g=I~DRy+xUs*dhy+bZ(q1Y-i_~`@anQPs*(4i
zwKaNc)Y3lCti2_pm8Dw<$otWPgce?q`!Rm|x|L(`0Lrdqy&aSfEuvNG=|wiirD*;z
z>bB}5<vg|x<2`c6S4LS!f#YNN8sjD|dTGho*U2xV&2?q-Eg$pociiXJqu*EXJ;D0_
z+8u8h1-y#>s2_{l0^sY{(E4*FBR&b-+9`+XP+eLj8n3<YfINT}HlT%5&&WZiQGO%J
zpOMclqSDvVkG*}b{B@LXL+Nvi_<5<)Z=iHLT0bbiiPp~pr!@X=ETSr(JcyE;*vv01
z8b?U4C(15JvLTe*T!xk5MVRo)GFG8OUi`!@hw-Gdj0WuUbju?s(^Xa`vP3VVDA!$v
zC8r!k?iSfKAhcinf~0V}qfl|+dNQmY`x-#csfqDe_(Z$2;2R4?#z#YHxZobY9*wtq
z3Zfbs!^1%(lnfVqBO}Q{H983>J~1{p7LKW?5{?ayjD@ZlP&BSopbN!ENhKwu!4wfm
z!K;Lm6H0s-FuOcRsW3hKauUgRgp})BjVV_O`fJiD2$vkl6jbL+!rKnyx+l?=?7HN(
za+3t=*F{#EZnCJm_(};WN6MLUr(E~kKS19fQ~&G&W;~OxADD>7WQnFV9v)6AiHH&!
zlZHZSRPFNnC0r-j@YOzcLmBb$uDBdhl+g9QC)A@-C%&NJD3k!DPE{#*;)B3t!5imD
zbdQWh;{~LlYwfN=MJzg`B-Bu$Vk8<%h84Bo!kF5<Pr>V@t05(#s;~nD>QIt?dA$3y
zimKh$L+WVvp|E->nHcXrm>3(me)?GV@PwL7j19&Ua#-y?b!wn{A{mXT-JuCNn&`&T
zj$a>4DxtVKl2FFfuJP-I>LEf64R(;>Lm_2eg|GT`BvV3hmEfz%Rjgd7UY^-Kw^m!Z
z`C;|uobjRDHm$lf``W|m4LRfUNUWRP|GBvOp}0D`dH(eWqq)=Pb8;*vuGYk{yf~I~
zk9~>81z@Cl1S=69jD_Qo<mlj-TBza~>Sz=cIi_;VtVCgD6%v!aT(}{eF#5U=0Q3bK
zn8f`Jcgmeb&f@C_$okmKR{bx*uCnL7?MPC~`o(A!EvB54pr;>z20u3G@P@}$(vvhP
z4&~kUvNS4t&JAzMlk$!@L7lG2-@)RIPb9<AXkuIn#bxPgG%hEuN*pQDh>{qS#-g!U
zR1FU&;_?$omNEMq(Ri{$LUO*3wWl|-M6(G+8aQ?MR2L)p$sXyAwdc_nt%iYoNgYj0
zfN$jJm8cxnADT-QVOV<M)(cohe^ebrpW#SYDYyU>JR`A0DA``2m`lf3aycp$yd$CE
zWI`$UF;RW#3w}&qhgA|BjMgPdX`%{s#`Fi13B8cA9uHdaRij8ir!_)VOXlxr!Op3}
z3xPULeVQocMJeY#O`q1&n)`GffG9nxtxpeTuV@XO^P<+UTdUoZuiZ2C5@nzJ{&REQ
z?{$9EsWo(KwOjJFTc%!G5F65yIdN^yy;f;PZ^NZBi0FmBUw#7#sL&xhF0toIebS^B
zL9irPs*Dx}1Y0???ZxHpI!}B-A{PjE#>WeuSR&H1rQpKPU1z~@t>6X+hgE98K%9FJ
z50x(D^x&&*L^35TREX2c+mln1kE&~DuVoxF-<<ts&e!}%49-5EUOTgEc2~|_zj&f<
zRAv*s0KhiU2HOP8Ny`pQ0i*fYoYJ#TRoQ?}+FeQ;ehQxP#8o1!@U?MeGcwAiNZDJF
zy^|ibBUv;>XQqGVt=YG7zST=i@(UWULa4dxQabQca7RMWb)A$=_sR}R=yn)ib`c(k
zO2OE|m)}J~;s*;42r7!D<%wO;7(~K^kkkGQ3ehHpN=3^>>X}_KRH+@iDX0COgcbyh
zJLJR+4<_PQ!b(yir5{aBfSs>|ViRFiN+u*ZJRBW@Zq#*QKNOCo2(L+PmM(1XllsjD
zN^~g~BE++VI;o9wf1h-O?l`x1Nr%ECp@~?Mb=hOpAflDpAT-eU@kAt~M3bXTIfu)t
zY<U)4eMp~+v}klpOaF2slXJ)75V!^g#-KB$p|BKB#5?2RNGKV-67Guh&2IUx?_Pg#
zU%R{DQYXe36LE5^fHIGcO^n@jmyw4;(oH2kAxv=5RSATX)RKpk-FUE<O1z8&601=N
zR;3*`U%a*Z=I)ujvwJiB55<-o|ExKxi7j~sFNoFeuYX^;J(*oU=g^jK$}itEcSx)2
z$=CI0Vo%Q9!&wz=vWp0WhQLUZ*ylSo`C@3va$*6YI6})x2*n_ZQbZE5SOjFlFw-JW
zr<^A*4Wa9An{b{&*X<4_Uz9#zvwV5wAzED^MWLmAg-Vdhni77|C<JQKj`X4Te3|u{
zxF#>I$+_3CwTk#Z|NZg3{{OxoCc1k#5gt+Yp^7P7NL-RN!B+4J0GqI_E)}-2({5M5
zeldkDbdCvUO|gOI%nKsKAqo+v?23@6MwySza!hcn1wHhSkH@Y{<4U+QJVrZ+DC6Jc
zXjo!KrKCn9@ldQwf>Am$5hDsw!t%s$SfV1s2}KFVz)^`fI4pTJ9F9xza5OSHl$cPW
z@rYhq=fr1|KV-@pz3Rq$2&Peq$T~R@Pe#YWFDgnx>62bn!sf^c*&`CBpoWt))Mphb
z`ZU6_ebS8Rxm;Pr<VtB@dM{hLwsa}mfb+ZVLZEbH1&=x$g$3^A7{um)ZwhoR#At#`
z6j~H;2p1?Kmcp2ias*G_z*i*#f=Fo+e1Yl9Z(pCf{-~-sw{n+O)t9g8%lY~qi8Zr(
zJ{Q+K6xU>Hv!}8zX8m)o%^jSt)x>Riaa+#4jZtR1lZ&*!EqaVN44Pbu9w!@4hM~lu
zE=tL<i!Ffi0x-bvtn?Ff7>Q~_oL{!^b_`ErR05t?;O#LiqF;|SS2i@aey$;VZV}cT
zg-UY+RdADBR9-^E!}zj`_|TNeWK!5=a*1uYgvkWCTvZ~MLlW35CaJ@rSXhch<Kd8E
z6VZu@B<Z%qh!g^=C9fuwOH6E+?P8bYH5z%Dx<7%9VkidBfc*4+_4WovL*u2a*vZD$
zC-semlB0bW-Z<DHojQNv8TQ;I{o1$f+;9M)GSko{Cz3D|Nux59FU2r4D-zOZM>&Cl
zDt#M~Ku9+Ufht(N?U_@W*qRqxbM98g2zQ+dmEdg>WLrzq^!tx^^4m52zKZ-#IqcH^
zhO2Z>dXXrOU9vsj+^Lc`#%?m&???wsZZj>%G72K9Dj~&EYk+ek+2fIa_S4V*>_4ae
zd@r0Fy}0MA<^|n`ViEYFqVX%rF3Km(+3qeuI@>7*@!axFl<8iPcn<5z`0W(xzm2c%
zws;(<POrUr<ksGsduR5~?$3lE_;UQS_9a~#i)$X$H{L$>ofH4yM0S@}-<hxPoO*>e
z2z}}BhbQlx$-XkbO>5biZ`rAdy?L=W=kDb~(ot|-4P8-AqdSu<z@e7Xeg`*p7MGp0
z8;ZiR|H$#K<A&3=P^OxOQHFL(oD8m%YiZSOBNKC>?X|IduF1!Sb0K7t63nR(k}!=h
zxq_CI26$K|49*(GCC29_SDzY0*^X!8>jBFF*HzXE$p*HV=Pz^_P4f(C?_NBj@pO|@
zLw5mOVCuKvxPF(MZ*Da?lGKGlXh<!1Lu&H+co?oW@<eRg3gV&#V*;Y!I8<;9l#>(M
zH_2%bMQ89;$*O~#s1quKIbRDt(<+km2)~)?f8_JedZ#bXRL)jT^)HBlTfH}X-#>8M
zpfTOzgScXyFVYHLG|%k3e*~b6WL!GQI2p;<co-aQaxv^Y7b>$W<+76iyb;bj&J=Hf
z{9-1l7&YiOS(1CvY}eirlLhS-G1-MOcT$zmlX4dAV%C;mew{MB3V9Wo6lP1MoTFQ^
zV3arMxyM2iTPWo`J^BU`mBs}l$S{Na`f8C-58=^wsLZQKri7pQ0#8_~WjWu059{tU
zW*r}_xU*s|id%f{KhJ-kvVIXXjO%~FVpp~RV2R25#J}c39>)!-ge-m+U-q58U0^dA
z0h4?Z4t7WiwrGrp_gW&jM9Oll4m2<hmL^oPr;jMnk&$TU@#vUzB{2*yu+qiXfT7<>
z+YOkUVuTfg^DR0CzF>>QrbR-Rqnk*=!nR2L5_6zCC5%Tzt}0nFFrIvYTa`B}XT(_%
z>>xJg#BO{t{h5gmjslqT@{F9j+b}ViW~Kg?#^5BL{AL)0LW7(o4>rH<m3c&fZ<D-n
z+~9_>xftJ`$rmhqmytQ`e@&tJ6qFO6D_^68;g%uarO<R0QdEkJ#L#w6MZ90vafEjw
z!jJG(4<aGFi?@6?eKY=9KZ&7b8AtZa+{^jrdUL*Anr~O$x9eG!qfnuQM`Ab>DY!>N
z@%SmYY>9RGA<sI3T9Fid;~^XpfSw9o?wu}phr@9AD!jB=P`OA|E>S}EHO-h^WHD9Z
zE8_bl83J}$S7HJ4lN_7~k%#=A;|HkuG1X=l*ugpZ!{huEg}JY#t4e5m9AwUI%7dff
z;Y-qJGCAJY-L0RtbS0EXx11R69)mZkI}(nE>C8(uACuzf8cmMHBpkI&jA2^vh$TZf
zH_;n?>fsQH#8~*6gu|7gu!1IVWTe}uIx1^0bmd7Hf{d)E=Oh=~eM{lK45i${g@|D6
zX|gC6tklXqd{y#?LT#-V0?TIiEcnHRs>X$7E%d2fu~6NBPqEq;@ctF{i{8Jcq$v12
zUe2JN@-<z&oWw=6C~P^m1Nb$|Ne6R|MF(@v$-Bl<M(HTxx3HGc|8I-wE?Gh%ZARq!
z>J>b!U#K_6JJwp8nB%KPN`h8QS7J(n+M(oM;j6Aif;E$9&Gu$$vy=BP=3YCSyD*Xy
zBbpe=i;<i=!j|X@+P3Q2m7mx~!HOockv^i8VXM=NyC<!V>`0=Raaryn@*J1CnJfdt
z+Ek<j%T#z*SmJ=pWov<^&Ms^@Y&LGzWNxDb{U(WN^vhoUhC6A5N8!2bmD#Z`^>64*
z9L=Ez<GScKpp?hnQkL+u=(1~Qb$!xOCpe(Cx8Sg2DQGk%yCY>=Gn{<17*BCPSoZXg
zaQ^5GuWi)k3}pLks*P5XL@xTpu;4M~Zy)25EAwkHID0JRfv~MOaoy-c|9ydx_+&40
zcbzqi5BR#`iKH|Vje`&+a?)JCIvQ5s!5_F39mg&&!rXyJj(7dsXs@i{;(mo`$y#m~
z(s5P6IUYq|0gf&cV+A+;!evJO1<PtIR19<R%ebiEi>iDG1y?8*!?_WBj5v#AGP&SE
z3z0DH;*nUG%i9XBo-d;_zAx~yhiaj6V9;ooIJ)2%Ty!YObT~`M381>MU{k`QVD;>o
zG})L9@4fW>x{p?Tw0ge&$IZF5d$jsJT5xYZxOeLCqnf(g-gNT4!0o{2HR~SMto!~E
zt)?Si(=m1Ikq^6!PJGgpnZsI5E1)d48k9rIJUF{MeR*cz?7q*%7MvCEeQZ&1de%_I
zs-v3kSl)Lm=R3wv_LgNl#<5qhF5NpbI6Fv%x3Xl~@u+s$?Z%7~Tiw^cFXuMw!e{O^
zB=Z~ijn>kaZ|T#T`n1~J`P$vn-baDjT%ZS^^s%g0tKR@<juL#P9n%i%ex{Bx`E(dA
z88Qx;$SefOc7P)a0Vaq_P~OG^Q&hQd<HnbXsw)7<@FN?7U!}$z3}c8gM33MRU@-iX
zN>VHdq5zz#8#^dBowBE3>QQamFGLcx#ZKYflW!S&8;n*4!KyIt;Pe{0F6laIqsq_A
zC8MAMKXy{mi{s3Td>Z_ayj{ZK`cNV{3O_ZbY?NX9Rg_ayoWtRqzodrR&V452C{`%^
z0vnC1^y9G*$uP(1)P-KlcSH?Du?Qm=Bo#Pl%tHA#TKNKB^$ZfYi2Xt!IJ-05GuxMG
z*2Hz$kS1=N>(l%@?mK=`@xbvDzve$Mb#Os!Dhduy<3e>!dfoJu>EMD`g|p$_^gyQn
zz1OpFg?_I>YuK0*H|E?Mx!K@jBQ5yHqsie>9ynyeI8L>cA_@FL&=@7NDw(mU{9Qay
z{vIXMNK6_bTZg`kM*c4V(1>G-qBj;FoH|*)%Pz!mDq@OLP|(UKOn+?9O7hUCe%bcI
zM4jxii`3tB$exrFsW;_7T9NV~^`+cMD?wp?qBOu_%7rwL@*)kUDv(ym)ktgPTBLQd
zA1NYZkv7PUNSDh^NLNt$uH#MgeWUV*|Ar_xrz%taYJqfqsuDtZRp@`?p+SWV-JrfL
zAUg8-mTd^>6b6xAaG8fFwu6F>IAO!>gX0;Z7?RHRea@2Zpz?!YL-?1waZG0rw{#@Q
zA_(hX_*Ehj2?weC=*Kz@HVT)XBI|{b#m=m)!j><{I6ywZt#Dw1g|evTsS~p=hZC`h
zu{g|nV`x?YiqQbKLrVj2tkNYBb4Kxgej3FNMS(`zSd^648Ih5=)TSmC9C5$O#fwCn
zDTx-r3c{gL@m-bFHk3%jQ2HR-WI+y%^J>qsBVp)TyebT{aMG3d)>zh)oNEk8FJ7am
zm3s8|?b|1v?LRYk_}K9mrM-}02nZtj+Ka;o7Tneq=5^XOUO`krXgIvF-NZLlX0Ats
zrgaS`;v><B6dfB+ATZ$o!T=aHvH3>!(p6YATNgig)a$W2uPTY*u&O2$TQ%wc<0@;p
zjm??~sJ+i`-_l{yXEeUI$3E21II7z_Xxp@jnzfe$&!J#wybG>InS3N|28KE$RoRON
zC8X2-M&Ax;6O}H>JZ~GgzAjY85b7ga+R!fV_Jkg-HGo6R3)XBdfY?D$ICaH=4T$lA
zRrf;IQdOeyr&i&cnL3Cah3rLzOwU4qhluegFy<Rz>WYhZw)v;cQ6Uf?OpYpH1dzpK
z^$VG@s&N1uf?hJj5cV&m=)Ry13d@EPvE<|!JP=lif*%clW9jrb^4$;U`osN&We^%X
zhA~PHSo~__f404O>rZ~s{{o^~uQK&Bt|W$j{>}P7IPza6-`QWNiYHJdF%cOZqz8(M
zF1fksR$v9_9>_Kth*wTb48_9j0j_r~m9&n^wNgU*i)kpXwU$#g=g5|VcqI`X4#Q0n
zQZFgrLarJ>Vu%|0<tMm)Yh9EKO8hky?O^s}@u))xz=O7yKI_hC;@Uie5rMWi^Jgns
zib8EOw(drPjsC5+n{DrJy}e8Gx8(gTIe$yhg{O~$%csv3JygysR5byvAlN4amQU|4
zRub$N8m08PqDXK+s9&9}eQ$qye=$h+Rk%<06{`uZ5$e{YTZ**=*9kR^(^rbi2(A|z
zSEL7u4Fop|E2K<CaXG<FLd)t*e{luD%|iY1^tR$kf>#MGt(g~#Ed;L?8djta71t0f
z2~Dfg$>LhTn!9o8z_gftt+-xjT1ntkN^{pg7K2mAn9VU<s={d@nTc=*fPKd+d!Bsy
zQhw(0CY1LL7j~I2FpbOF|JriNaAjA@1+x?;icI0hsBcBQ*$syq9=5CUhOVK|L2j-|
z<d2G6|7|L7BDK;`oZm=umpQ$x7_dHl#EUfypNU<?=(Bsu0ZzG!`5P#hudH5d6`obO
zP^BlR7DFvikzcPosMK#ybx>;two5e!&$F}#7waT?*_MN1kPy$Na(U?Ceng^uewOA|
zFDprQbZdCGNZxT%--VY7n4l|oc_$@d>Ri-S8KRCqSLoPaE0|V22>`yX)$CYpWzM&9
z`ZALAVI=rquN!Pghi2ZGedAk)rw&fn(!Toa`zLRo$sEkqesFlMHrKFOYuKDe8tlji
zI}r0L)=s^`ghF)TSHJp|t}njWk5SXfTcIMUM8_x!kL^CH5tWbaD74rx_DcxY*g=z`
z5KrazkPPF?E|Mu!fytFk7`R=o;FSZ{68@En((Oj7LS|v<Ws<;GdIC&gSmf}=c?B$c
zD+udGWvRSL`I0mi{VMmPr7CFKoATLPfM==WhCfNJcm1-wb_fOZ<FA4?!QU=<iO)XL
zROKaBQ>j2%AC(<OJn4Wi+xfK7(79EpA5!ixOlju(iQzm}8c)C>gZTC&0?U)w9J2+G
zLWsq}HYJRbYzxEfJ3X+B`}Ir3nZ%f;EGpF&6-EM(F4F|T!ZVA#LrRR3)es`vdO+{q
zEQPRnD;CxkuG+aBkz%|}39<M#+R-;Pwx*9o;$(>F6GX(^DCQWVoKe`iI3~b+<4E8)
zL!Tf()gJ4*rK{KKFA@zIA+@$nSa2;)q!7BwTonKzmROXpPV96<$UqqJ_6|-Y;w2ju
zGX&SxKH1H*Ahb3JBi93Szj9YSPYkjHEg<CgiFCm+@C9jEh0Ykj%4-OwBYEWp5`?~o
z<NA?5y<j8q(_`XbZlq-ivRIp0YC(Q}WxPf}W?iAuBmpMaRAP5yweibkSrG0Tc@RX(
zh*<d{%D`F^)HwhUWgS98Q%1}tw5A<e{qy<y=MhVS_X*BtU(*^kXtf*jwHxt<LCuQH
z+04<o?elW3X17+eJ72RKUO#x}`aV>%+djB<=h|GSwr1!3W-WLqA3Q{!x>lCJUB}|{
z*QG<5wV5%kd2_CDbH1)a6Fc)_=X~WuaaT^<wXm!{J^9|gTwpE#6kS4~?`LAwv^vu}
z+dK6&?y1pF*-4l%amUSC!h|XKNCB7z)w{SU6MT5fVF|(Kg-G$HyrNB<+z@QRDv$~M
zS~{==EkLNI+z^FW_>XDv*(JLyKRkqsd-C~H5a^u4$O)$)uAJnG6au~K8xQ7WL;PhA
zaTuHpN(6qW-$iqHfx?Torbr#!#4X>v(7<pe%SfyqaG?URqBs~;3vS{e9E`FMjwP_B
z`;K`BiyXy;0AoR27AQYL(-ESZ;{ZT6^+Hu`dSxc5)vnWm>+`|&&}IJM^hkOneRety
z8-D7b{yr58-gM0!d9W$>^<e}vX`-AL<y?5A<YXu^!mhvEZ>}Uwi?GO4*sB1@!)jAd
z<p(kt+~Vs8$okmKrZ<|n4;E|TnEXpKT8{~fE97Qjq632K*9!A;>-1#ks_DeCgdj!k
zTJEER6vrDKj?!5zWE|F;g+r30(u4DLSCLJMZ|T~#<5?UiXZZLb!i~^JSZR8k6p01w
zT+aa&%14wOMxwfqn1TZPK$<^Z!c)eSe~c$rXt`Sf!2Mb;1cHBGP4~=PnZ0sr^5*1B
zYBr^bD>E|U4~^i%2#Vs`{opPS8!F&kRLH9<2zB`<$hR#mkt18$?*hQT3P6Mz_9Dok
z{RrY6N*2>PWVcxrWy_DfjEm|Ld)sQ$9NC38U04abov_49Zg8^V#ZI(C410pTV)F%Y
z7I$#F`AN?|q>SIEgu()h2q2m#XDf$NaD>Wz-9Eg%$npD6QSf&NzefS!#UG!4`dX%S
z=7#2Lp{NM|Y{mP_(gW``-EPXfhOnPgT5aq1hdvsa8<<~v_ssX=AH}tr?V4{#-nWCo
zf2z{8GdrhFa53pvjBS&VX#F|1hn?4)ml&26oJDd-4a6M|0S@Y?I--FcNBMs71|m{`
zEzjhCTau+nQNu`zODA6$FjVtX?4)O7ErYl$_BsVo3Ez@@0Pi4z>c~bxhjt61bL&n-
zwm(B8FAepHtQ7qa#yZ=FOt!NlMRPFvyN57tj28zwckSX5U6}{=aX><A4~gA@AGTDC
zV3?FH3+<~Waw+A?`=~NQRC5df=gR7I8xIiWk)ov$qk7!vaVu`rUzfg|?alc+G=E3l
z-$CakRZ}l<vT^b)h7UrC`A+e2weBNT{t&&H+)Mmq(o+mK5X?@c*~W7w_cB^>*nJDk
zQ{lehi7zkZXUkVnzC$p|r`(`0XXyW+@sr^YUTI+VG!0UZXflE_=#0v+m+=mWF(LRg
z4a*sNu>*l-Ox&AmY!U^VKY1e6SmLTG;hokv49O6kt}#8qx+L<ALY9Z($=LNS=@cCs
zGcRT|NiS+Bc;|HtF*!2mo=bX$RI5JsdFp-Iw+RN`E!R8j12T!BCTQPH&n*q3Dobn;
zs52TaIrf>iUPl7*?CfASs0~zcKfcYXKB<!h8wFR63=cRjH?afPiiI&6?Xm8!N%TsK
zz0bp;n2HIojY*li#{L5PaUdP|E7M{Q=FtTMF>A+7D}z_qM{v{*s(`WmN1#b=M-$z0
z=3>0|E!-~J>iZ1exAHIWI77_!0st^qGu|LC3LekepH(-`reOW9%vJBus(0k8cg(nP
zaNIN9^9!#~Q#X4xeNC&D^3@X0w$78Ga7oW03N1Pw9>mrWk>hxuz+@$2BW!-TBK#ld
znrs{dw*WFO*&cFjWDBL+a4P_&oOSG~&d!ER*+R*Yb8l&z)@J|GSlMN76SZB+M3Pjw
ze%UQ9gogx-Cy4kZbg=6pR5ib{C7XOB4_2BN_O!?D6mTFnOZZxJEe>!=xh|SxdQuPB
zl95?W+Q)C7Edpms>U@RS(0_SYG#i-&nkO`d<sN{cCb5WbliG>n@zRVX-UjHkt0e^n
z2?e`f;j9ti!uA%tVg1EfC`A@gBE8Ys(;@Yor+!ovw<HSDXu0eON%{*qUsd?9eK(yK
zq7NoOxg2B7*sNT;Br-TgYSaHMh)ozBj)fqKG43ReJ-8RfjE|IXMrVpiCi~RUFy1_(
z7WJ+9uik=OpbZt%;3wLH+$3(J9A+U?WH*B2;>n)p71DRgLCSDO6NL)kn#Nx6T*V8&
z__ri#IDDz#9h$(NOHuwgiWwWOKc;#_@5%#8*e2eG+RXmL{}2GgYc1Y+`Di^@%<^Zb
z`Ujv7^-lpnCgJ2_8Q$_3&aVHF^Pcxd%DpS|!*{=V@0(iN?tI&BZS|h~>OETH9<6%s
z)X~S)%Wu7P^Q{k+I}lH8`RcZ*qYD+)MZxXqdDO7#y|3T?`m`VMH?zLHujzB&>W9A7
znaR1`_kEhLKkw_$`T7@pb+@W-R%aZVuQ~5)#=G_lzM9$U&wVWqeJz=bznA!KLR;OP
zU)`<ww&Z<Va=tB(+?DSf&AaPAcQ-$DH)pnISI&)U?wxt}&YbaK#s>a1%S0@6b@1}A
zDGO=8M>2tJ3VfI{H&fa1gp)<aJYiE%F7ryIrzIH?ZfCDg%gV0u`egV|;)$K}7i}wO
zyzHW2>?O)$9`qkQc-#+nrE;gM?55H<sKFeKt0X+J?0;~OggC*EDz~BZ&v}jF{s;#i
z@5j!A=oW+sgGAW`kdZL^PZkNq*q8hp%I7D3V-w0D+?8)l0(?c_S*Gawe}@u(LUadC
z0o^TB`lpZ1tV9sknyCZKshu8}d2{y74-ecqobAafKRR&l@cg#BFW-Av+qhF(w^Liy
zn_tzd1$%$G^(W6iSof*(kK44}FXwl^tOZ|&pP;H9EnUp?Yr&SRQwz3c2ee=t0?gnk
z>Rs?v{gul}(Y;8>g~X$KZFGVa;&d_v;5VfctOnMI=z`Y6wi@QzE}{)kC{`3Yi0FaV
z7whAiY&!{SkKXXKgg1*8xI6*tkiCP*Wp9_Ykcc#6;wFc_*@7)vDdizO>Ok^@rNW!;
zv{e5dCH^%nRX+fP*V~qgHj-)AOkg&URy5y=Oeov>!N`y5?lsQ4?ykJIQft{he_3nk
zMch4mi$sruGm%XsY9c$18@oHr+L@EtLMXwzATGPoUiu#&CDE2~S(d+@<$zlEi72Kd
zAi-|yU!nr6K2Gh!vFJE|0p}CS*5Cgn3Rh|MOy?)3=fR%mhoVVUS-_)*ge&3`ggt`b
z5K@+&cyCu`bgohpx8}vIpNqTTl>BMyPueu`U|u|!b06d~D`+g|Ahl)Uh`Mv(%LLbY
z0NjWpq5^TTw_@OT&`E~-IA~^}N{r(*oj|0L_$Q|JBluqqc;(!T3ZfpDRQ=^!WmEB^
z8DqR%(kte_iZES6P;Oikb7SCwZVg<p%mAZ0o30T!WCbeekM)=2t?@n8qBka7ZqzIP
z7U;6LgB|J_FO>foJz^_MXg1wdK4W|e1+xAJTGdtnY*&nbP~n<6l5u4AYE5l(Jz5j_
zo97)%{ojctmX8kI88O2tW$7O-EodPb&u>2zHU?0-dW`RB>$Btvmd#N4a}3z-gu~n3
zMzhL)!qdN?c}Z+uk43yAKeHp<pT42hug_lA>OrCHbN!fF&fUp3@jc3)qAasn;3~dO
z5zLC4a@bzU1A-dA&IQjw<Q6K`#6(;sdq(*wl_Fl}ktO6#E>!6MN{sz;Ern>C|2vVb
z1WA<ari2}A93hC@68tT>Qv?lC5~3tb3F$2SQ<9{D|M-J2F@9v5peHgV-=>5DOW1KM
zJE&k@XXcA!P7{UvA8endkYU8M6qA39u^AmOq%rbg2w_Jczx3@v)Y?QWyifTVssW>j
zd&aJ#=yEt53qoVg{#g*3a`w+-p=yf#EeLfv<Fg<Hrr2L;3kyPPZmAE#PnuRO)Hj(Q
z1gJRT4urs%>EqgZM7q$eSE#N-pbLRM0xKElCs1TyfI!|rjo|gYa}HrGW;>;A7B>lN
z*4<y98#r5Z1sph$F_ML*l`Lmr`%eD*%B$@6vBy8XE$y1_eP^)fF$!DNimP3YhN565
z0TW_*0ikun{A&*mPz6M58%Y}Pm9o3J)9mN|F#E}!KAS&{$Zn%p2_%^fi4LhK7zu9(
zchnGms3H6q4H?C(3W(O}*-;d#9NVoVJ#_mbyPNZ{p9fn%>9d}eK&`^Mjq@_>WvXo?
z>65q5vAel2`}wr)vu5jQ2~;KQJAm~KIM!K7rtS_3BWvFIV?P6Oub#}miUw`P`0Z+8
z!=?wU=Uvn+PcqIssEE7yp&w%w0CHzu&!4$ql_-I#&_W3=2vxj3q0adeG_)kfo@<}S
z0iK%>ZzO5_8y|M}K>36N)-w)O2|Id;Ft%6;pDnWHUSydMhCacNZAJKPKzOeG{xWI@
zwT;B+a?3mj4w;{B{jAS=S_0K02#eUNv6(*gEemx`3(J-lYnvS~AC2U3Mddq_MGxI_
z?jp!1faNL)@)INy#951=Dj`@?tR|>N2-foU>N&SHP_EfFhxXAh_!#&Af;Em&5P+5H
zV_+Fx>ac==d=w1yQ<owG1JoyP!eDWhU(%l5_6A|&=J}IF!S4uK3CBIMIMQv;c&Swo
zNOQ?gcNASdM;qRv(~|{%V0IJBTWIg#zlV;pUz=z!idz-oE&~D}<nZBWSiW*$MJp}E
zvfYlVqF^LC8t4{SXe?F`1SB*TD+%IDK~R8j6eOrhsKa}I1l1rCuUJb^9U+Ld+{hPY
zIpvzYa46-dbJTH$2Qa7H1MvbVPqqjcYeUll()#5Kb&U%(^~LH{j{2ftB#$e_+2*2$
zZV`yMtXM&ikA_uAP(ToCia~;^1hI<OtrNVJ#buOdw!)zU4(g72)Cr~-3@}o$QoTEt
z;iX<G80e#}D;dbq#6XTG1{$QoZXi^M087%<!uq!PB&|P$l96!q;BIb$Wq#WF8Kj2s
zv;=B~JiPxLu`GDiNVq)2-8_Ht>Ck74$xDjx!U18|J{k-h5Jr+|y@Q@`H}7FTpC0(^
zCF^Mkv|8A<^HYd{WkljelEEAR&mCkx57x7v-1))$`G{4h1nLl8a2#@c3V~khfT%JO
zJ^|ciRhCJD2zR+q*eX*3wRs(0&RhWIPz65^>ON^^KzUN<b_BEANLB^sPyYBE1Im*s
z2S&xkGC}ioKSo;wEs^^Cj;-mfxBF;Ow$2T+p9juQ{0t~hnw^d=-WmXGq?b;ZmGu7r
DkP&b#

literal 0
HcmV?d00001

diff --git a/TTS/utils/audio/__pycache__/processor.cpython-311.pyc b/TTS/utils/audio/__pycache__/processor.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..faa74a5379ad33cdfcc56dc07b75fe4c27f219b1
GIT binary patch
literal 27818
zcmdUYYj9gfmfppS1WAwp-*57Yq9j5VDN3SbN!G*ETe3urMcT6I2L$qh6ev9C3s4Ul
zno(TdEZ5P_aAuX5p3!7c6RnpnDI3m4lc+YT#9QT5{9}IrEK~`(6;&yla<=@Bjy+XV
zQ%NP?>BfBl5Gi|HspJam+ugU{efpf!=bUc+Wo2c#fM;@IYGU|JLHKVJP#;Im^YvfD
z^PwOKk|iXBEu$6-`K=*q*fwel+ehszYzsL?9f-GuN=8fB+c{ds-sS9V4^@O+qb`=#
z5vmMVjaG%-qwcV0)Dx~Atq#|W)`V+EYr}P;b>aHa`f$T&L%4CYG2ArT6mA}E4!4Z9
zgj+{j!)>E&;r7vXi(nOw3sTAZf>avl_}C^0ck#!c(N4)WEO?z?QqXHrN)KI&2Vy7B
zdaa86NN_x^I7VisLxCZDsv3^_Ba&a1hR6M(fb8`sW&ZHAFCO(tV~TrhCK!@@;XueY
z=8pwqO8Iy+JUtT+_$K<5GHHyGA+=I21p?vei!4dyL@*-x0ugCC8jQr1ib***fhb=n
z7*?F2s9*A3@n2Rds7&OJO$Wx6ns6`_3dREC6cY+WCgT^CifLId(=CT%zRS_jOgP|+
zMCGtjLHQ^(ic##b_(WWB#pPhw7Yic)ctEKzq#n<W3VX5HhVScB7_AS5fG}#2gi)(x
z8MT!Of9OAHr&RI2Wz-&UNV}xxrK<NGAET<fTK%IXk~>g3<y0dik1;f1m#W{l16Ec~
zQ-d_+Qf;6Dxn0O#sh0Lk{;CN_vzkFbjddu^E!7*_AvKsnjiykODddr?6XjAf%J4`n
z<}bz&>S;xq>NV1|Ax(|cZp>LQLLEp`yGEK$q^Xm{Rm)k2H1$%~s%h3EO@q|EYMKp5
z(<p6xUl?s#qj#H-vRT?KdGpF^!RTy$-!a-MJ%>`-06VLX<d*mCc~I3Z?a9m28|cUz
zxe}=lb#x--RzOaFprzpZHhdSQy?O21uBOIJJEa{+vyPSE33Q;{1L#wiWCgByL8%&?
zk%G}5$kFjYEEbjhcRYf?qC_oDoQTTe$jESCdAW#ZC>js!Wj+cGhC*WeVn7@Z`C~EB
zFHd5%N8;oQh_OIij7LQ&FyWsG#l_41&`cmkF@cDGEEEv^;zTHLHAvoIBp46+L%~^p
zJQ$5=rAH?Wg<lEAFS1}bDg{E~cr-E*oa_@vE(Wf#>LSs&7!LR&!N}yqOo+AGpKG1g
zu3R4m<;j@dhZwM7DBzR*@qpL^^tVNfPE#}dA+Mf@IvYnh=#!s@Ok}xGmq_{g#3Q_y
ztbaq%NFY}eWs?KE2C-)>8Vx<QbUrFurC^MWPA(VKkQ)YG?|3LU9TTry3`E#q<l1Cv
zjVMJi&}=;X@o_$AQ8_pnM3+`;_c4DcW^8z5CQP_Gw)&_b)mT8LMp9a!__!RMl>K2*
z3IY~l7|7VF!xM^5`X*)~<Ez(#RAS)jG)6WO7iVGtiA^wo;Br8Y(;yjZ_D_d`@fj&V
zz;@)2VJgBz(*l=4#-y>;>qE+5cm|yq!!TnmE=EJVN(Swmph|f?UcH|a7?Xf6b}={+
z_l4IOQ<b(xBT|e;mgS9!V}baU049ET<k$!n07&Z^W6SI0)cIRi%4)d+#vpn{<VCDH
zj~AoUIX+<Zk=5a4)n2_qa~zxyFI>1_9yvsa!5Hum7TIdc4lRGC_7v-8LFL*IK5OlA
zVB#4e4JiXM@jn1nj5C0fz}9~`8kEE_|M;azIXV-Ou$;kc^@%4V;$$Eakg<~YZSO~p
z@u&>&#KHiiARmFJKPaooS3ye?fWugDb`6|&43L(C0hXAE0gae>=(qK6A22Ml>F5<;
zgjI3rQFSddvLO1Wv1BpfxgHo7^H>0N2E^l>B|RMso2$-4wgwJV?3n=T`gC;u1D#pL
zC<egrjt7BV`at?LE|wrIhmjdH?%yx=_w}1wKoOia61I;8CxK=e$NTLNKF^4WlfxP%
zG*^_@)!4OgH~<EE{JT`9=FP9~<OHMJBl1kZ{E0?lIv`^%!n80s^^yIN$$<Dq@1-r`
zrME<&e+2r;zh$+R8TMbzHF@<3!dkmT>kS+PurY8v0k>&<Kzz}FRY!^TEEB-ytAnG8
z?k9qwI6!wo4!kpiHF+&ZWoZ9I4KP!<)$u*A*nf46iv3p^o%tRso(_WOe}8>u#jQHT
zYK7lZU)QdXZ6qiCYfUOkp<))sF&X#;V)~3sf(c-^n^rV-s=jILfr;@Hdt!0<sXW{d
z{F21rgi;nqm79R?Ltv6N8E^*ibl+hSQ?_Y!T|{RX55`wyj@QnuQgz8--R5{#hNH}L
zN-N&bB!dLN7sQfSgJBucm_Qv*i<FNs^s6rvDe5zd0lqEPWU>R1O28tX<eV{yEYyK{
zdYAM0UDk@V7(<X;ogvg1(VPfFYleAjcNxJ6-Hj1N{|*zrHi^jZ5-QUqLy+b~0)Eit
zZ?g?CEfa~@4dS?ot0%rRSE7Mj=S_7URGFFs1o?fog^K$&4I%Lqt&DHihI40zMJ}ew
zqeKk9sBq_x<W?HcdLDTyn4vMwC$3H=F!G!=)F2McH@ARog*ibaTHU*64{=j@yh=U`
z5%<Sqz-`!BuYuQS@6EZ(oHS7qW0YAe>|tIV0GF{S7=v#GcG&J=yNd0-k%2V;)6lWs
zJ0sJ5NVcv2OTjJ(|DejpzN~{!Da~QokuUclINqgx#k&-L75MwQ0ixm$g@h#`__WUn
zD?%1En6M#a)k1cJG;wXhF>R43FDmCxq68sT{469&5h~HXl_NwovnNr3kS0(}xDYDS
zzEvWm3Ot2G6GGHl_9Utha%mwqLYg2kQJrf~mG->`-`!fM4k1W)_>-tbs9FuhH>f^A
zNYsCf{@>NdK=9S7UlR?;MdQn!gy)7;vPP;*xjhBB>$UWa$laiYnh|Q$LM;e2X`xnx
zG>K=T4WSn8TRTFnTBrk|HZ9bNP`efq5$e!FcFA^$&81uuKcL16!j!1`k$Ro#kF+Q3
zl0BE+DLG8v-0`DoIsw9bbC2J%B%C+4#*gbM62g=Y2f~yNA;OfF=w9i^fGk}Z@FrS{
zI=j^b@N07vsY*DF;Y67!r9*OFVo(|@KP5<IX0an$CrlYi)y3ME*o?<okO#1;r}>2-
z0b2ERzYu<zC(3-!5<jE1lUi*akL&9b>*Uk%vw8~2Wlmuc66=#a`gh%DZu=48tsWYe
z>)qEkn98e6di6ZIFVUUY@G;WgWwTE4`SsrP#$Pd{H_V5z$M9<<z^|_;VM<$T_o_a|
zyxuj;t?xOt<b)^>C#vu#B)6z>#&<$Ax9d%ma((0VP1k#_d#`sPSFbU(>Q8J;Y)bSb
zydMMq+|_Up6jg*3G#nuj6lp7vg|rpOLK^;%gkdPi_@CD^BvY3sd1WgHXG=J;m1$9a
zN!V$&LwsZ1TjEUQQY3mMB3@9Xco(>wx6f-++<DR`rOGVGfkH@^yvnqsXB$|d;Nx`J
z2}oK0>^2r}km!v2BP8D(3y2{S%F5u$Kyx*bMz8pLyLdS!&WhpzaX1iXo41*1agd2_
zHPLooU*BxGm_V9$ukU^Ly4NAEM=KPkT8`pWMWBjP-Oj1LU}VJGmrJioc;$^OyE+z%
z%h(gS3m;{|FpA449g=BbD-I?zkqIR$)dkEqhDsMJC{^a2zf5aK-b6Xe)vZ`CqWk?<
z6$jgBD`ncoNvS0Lo3^V#r*eC8rA*t5DDFJYTq(_Q$V#P&iB)P%d>v;B6^|(r!>3dr
zM&-H`Cp3|arBTWmBf+>3#TuDbECIz1&1FC-f0a}QM`alzE}~pYxeftJxh`*1?3ev=
z3_~cB^3s8AM>wXGkfg#Nmx;_OWi)#^Z75S`vMLo9R1`DQz8Ke{ZiZ4a5Sn0fDicOw
z(<yHzA1!8m0_CF=JVFkwXf}f~tx`6B@>cTole3E)ZF=PG6x>G6069C!d7hj;a(0um
zhn&6Syg&|dFY<nJh-AqJ;V7j!j!Lokl+tj3r0+4sb}4X8aj<<Khfq?5(C3gZ7MGL?
zJ}D6ABepyjz=s(1j>J~a_Wb_nR)|LB*w$<Q*u||!0<lZ+==9dZ(eT8zS59snpNYkz
zVKCKFAhs16qpc8{gkoFS25jrJs$=S#zNR>RK2rJke6x*gN$M+7{qUz~SM2YBgl-5c
zr!7L;ra61Y*_3S(=j=Bt^ImN|skYw5K}yk_?bt%^)@)}Vz1y;FJv=^_v%UodlxJHv
zQfyOp-FA9+Xkj&XlekcsM2UJ`E!_yYv;6hAxntp4y4jm)_9C__+rF8KX~}kON_B2s
zur1hr^U8zWzxLhtu@aiQSR%DWjU5QOv^?u~QC=}C_S3svYf7%cyuv3Hwewe(D>_pZ
zosXMZZrkUM+&npVGFw@{(6U^)K2^Cs+tkA2zNl$SweL^Y9LUrhNVyI?si;p?w12U6
zFe`Rv#DPyoGDlxaO-yAbF0a_E{Z(HHaK0fY?b?C@&Mu7H_AU5U%7soZ1+yJJ?B#A<
zh%LLjQvBDwFYWHiF#i*m2btoxuP?i{q}VT8G5#R_@JFeNar%wCmKpJL|5rAlvNyxh
zd{NUdAOGp*Tbt)fvlTTfg01ZN$FAyB%|P0<GvnHsa_!7^Z20r=&%+-@Z%0=IOXc&-
z`DyvQZT^+Vp8EOFqXzGCgLiTL;!L_>XQp9i+OsR;*_C3yFPeL@O>LQ`o@{;dt+(==
zm5P#<@~;Fq-;hHyv*Hn44J$%P*`|f(*ekp5#s6^jZ_a*xD!uP~X5aa=YtP(?`N@TM
z7f(H~WL=&|uJ&bD`@+S=(xui1Q)$=9jO%2|bu!zze(p%d1&|XlW@obP>+l-Nc5S5C
z&J_EtRH7DEhwgk0P#2UE?5v^CR7ws<-ytlnu^jkFYz47&U!R5pn-(x}mj$^tt}*Fg
zLv4~ZVF?m@r^Vi}{Ma%q6i>y!dF?}rgRM__2t|J)c&##V;qqH>;2$MFeTh+XpvpXt
zm*Qb-cS6Qm=Z&82%v;6z3H~P)F$m{|@T8(<ej-)ThTr4bb*b7z_$_XNv-IlyH+eAS
zI`r7xo^tQTFF6TkseuPl&fW6s)B?+}QX&I)%aP*abQ8^`tz8m{3m-FEdEGQ-^A>qi
zoW?u%Brw`$CQ|;#3Ck2?uqlaT)%Gc1wWjQ9zTDQK8*y4`$rAOe@r}~7jiH$l)Z>TI
zm#5ZA6plTTZ5CTaL(STz@(-ywuCb5kt=-&0Y$s^+P%D3Cv7y6wQk=$u&VpSuhARYW
z<#lYdbt@S8poFcq&Q8x_%o0}S8e>rs%crs6^13)NRJmtL+tAWBUN5bJ>epCx^F|Bh
z!4kswo!6UJPwUg_!Kz!UY^*`A!+#9Raww<U38=;p;8&vJ1yY$1=h3G!VQ*a!p=P`A
zMz1NhAd#+gG8;4yEgu$pz_OE(nC!pi-75|o6!WN46A2qczUTbVFdFP4sF-szkvP=&
zj0f8*o&onD!txBwmxF#%S%NzsSC!#}5X4}F!jSrsS;w@WblxOAHde#dZC*WkTlYe<
z%QavSR~zJceVSE>QqH&<#<a0*%9n(ke*YrdBTy52Uxk#DOQ(uhLQDoPLxnyO@B@7W
zj7AFH8pS>yoxaBSH>DJoasGH*RvfX5eki_(wU@`?C}nX7*<-|BDwYw>y;rIn6>F5@
zG7cMO5g7xi^Ms7+dyz`B6Tj`X$!Dni3lvi_%;b_W!ZDZvj&sYj8Jlg&L$=x%8$c`u
zW{G&F7NN50C&xZG_9rK9o|v;_E8Lli)~|%-vidLFAiWE3rrnz}?#&=Vm6t5p+PYiM
zFKoNDH(7V5eYv(TRoj<s?Yw>PQR~2R>%h{>_lMK1douKUAyxN6w!Y!k`GuEnjV1@~
z>|ffJUO%v0Kai>)K$c8v@1xe|ms_8IV0maux9-c(??9^Vz!xnWQ=5Y6mZ?n3RH|<3
ziK}wHE>qo=c6H4SW*Zt8x-%`$r5m1Gs>^IUl<Gf}u0K3?EbFcXi?Je9RDwNgBNijw
zx@D;(-LmUJ_ru<F^YQtTY;EJM1K=L28y>fJ-aemg>YyGZtCsBdE5EW?I~u^*bTrH#
zTPYEmI_4{%G`HVwUu;M>Z`HC|st>ZP$yXO&rd;Xv1DW;%^T)D{?MPo+H+O;&oZ?jG
z-+UV?LwEvH!yizYeQQz~>r*lQccC&?otl_&xE>EY`1CrzJC%8+n%AH*1{x#3hLO~$
z3n+jl41_RG!}5#*Xmo;604-EV0Z^j$brw^EuqG-6y#h62^hr;H7@}gvbd-;%3F0(5
zL~)F|=<RKNT6-BK(P}8##=JI}TK(*;A<I99-wI$n63}pm5!L3XlL2RcYx*MTB^!$Q
zE)<3=35`U>Sln8a#L&N|&=Q8x84-~s%#|E~<E@b=$iI~w5`oDV$<YZwb&%=60#IA#
z2!T9BDK?OEiJTBQ4BKmnUJg?n69_O`umwRA+IJ57_^6o?e}!QvXIt`6zv0^!LjD02
z`PbEdxa()Te!BbC?m4H5{2R*JiX#8kTL+TsGPT`{_Is7fwL4R_J9F57-*W4|2d_OG
zO}C!N(C<{L?o_tE{np#b!A$+e#qN8(57wnOzpz~YLaP1+75yJvZaw&^^Rx1F>q{B>
zok`W5$wU7-W*;zM@XmF&uV7<OR3+KB)P27f7+fR_E;h`c00!^aNDWN4?|jgm+51vz
z&r9jHmp*?dHGDoZd?9`ILb}bL3IsEO%jwq3^UiEzXR<rfwIkiQBjwrggwqL2^<m3n
zswR1Lse0+i{WIy#Lz&J)^Cz-x=zOMUPioViblaW>M;@L@x1IcaBy;BNbnDxNg|*I|
zWmJKQV@#xh7;d&vi(n!?G;O_f<ez}v=$_TQsYntFQ}BF@n7djmAs5M}Z#S;n5Vjk_
zc7zRv;@2Grml(d6AY5t)mre?j^H-MZ&O~Wa+t6s$!DFr;^7muc%b;y=V)JRaS(+%5
z$}U^v7oh1X(@O<X(#VrBo^MNH-Aunw#<U8ie29S_tax7N97@b>dCzk5IJ9&2gacZ)
z(g_DpTBYBMK|h_-{F9)|%&HPY7>vb(<Iq_Wr|K6kM5eD@;0nyuSZQ56$&{pErhh;R
z)0{d~FNAdCaTujQZ>bk|t{9GD2RYsV&&#;;E#m4vI{fk0UXkRM#1iY>AWoc{qmtx+
zTgAqs8lO!3J3ToDosx!iE_wv#NL(<Gj$ZLi2gfhr7|AeZTpl5Zlp^x0aFBu|=DBS?
z<l}wxg`|*NT1iNZ2|gh!SWo^vaAvC+C(ck6-vWrU`-|BmbJIUfqs`k%W*anlz1U-L
zCcaOUyHPA^e-SUmP6MIXNy@9(A@z)rG*QAB`F!4T&PFpase{d1@m}#M%}HF`u}d(&
z(f2Cy%cv%Xy{5p1k2#D9mtR8MY;ztC*5%2=gn~tef`3~lRM*}rT_|6wgG`DGryhA;
zSoXY-_UzAi_Rl%8wM`2xz`DTCHH`~pnVK%h!Wvo^&SV-k&sAjWTNd_Z>U-wOvyE*F
zZ)X~}%(<Sl1XJuc=g!u4KC10ruI*0OZp_qfBw5lcR_@Ln&bn*nqbc_W{IZ@Vgi<X7
zDL4I|)QQR3Ox=dXx_j+Qv5fbH<vQZ%@msM9b+1^Hu@!;d+)V{%QkSn~g0sd*_y?^=
zt>4gVh!UQ)p5vk6TsEE>!oK=_8CLf1p12zq8k5g0&OX?eavx7Qk8|u<1m4EMS7{HS
z5r$0vJv?S)YDNZ7abeK;0~Dh9iz8J?C-RW5#e}E}%GOXL%GUf(DH|xsjM$-&qy4|t
ze;&;`sUCB{;Rv=<6r*EPumy|pwF`4C2KkW7Ya4WCKV{%~3OO;_3^QzJ-teCx1V|=m
z_-@!nV<)adwMZXHKaejh7&)?bR)a<%FS}MX&SAhNhk9ycFY}t9Zk?aniZx3a^4jGR
zOn+{B4y6wI1lttGivex0t#uJ-ixIRLmiK7{XIt}@w^2nD#T>>6FWwM-+aR=h(=D4b
zEfD=fFW_|DbU$*gTXwEX_ADMxI|nk(fs}LLar<-W_TEf;FV_z^UC<Ida&|8}yOVD$
zZB9G)XPo;}>MtkTylP@8v{gW;*SrdtoDH%3xtAxWWKg#sEgpnz^)(TO(Y@+<Kb3Yd
z<esxw?h~Qep@TD^YjQxEfpCY@7MPcZ7!+#94<do!<i#-<91^b1*`#rc!&Lzlmaw{*
z2UL_32%g`NaqUfl(LS3QZj%@a^F&QITG|z*y?PV&!nlZ8&!c<!rjgnr`NS&U!1$76
zC@|r*Fa;mt7)a3vlVH4>j7_ikFx2Wu&+=aC`mKT#HwdO$P!eFON$|8Tyqp}#Y}lD{
z?@GIOW!$?`&RvBfh_!$MW%8#OP|Vh!!zlw9CRs+9=#$?=7SQnuytp3oefUiMtLLr9
z_Q%x6?z|;aG}Vu(pQKy3A$;L(UKmUs$#jDz?M%COX52ed&Yd~vzh;7dl8iGN@By+z
zqHi$KkSyHHMXra5gw|oC*PevMEa|vzG09v=O0D5@_-Rnt1HVRx;3w3=9x&~Q*Nr@w
zj98Q1%Uk2xzF!dHwQ4R%C5V?`4=4{>dRJ>LN~huG-waZXd$zp&zb#VtY$ak-AS5f*
zGL7$q&0Kd<la&}##cTACE)y~51ZJ_%{^Br~`-*-eZdX}r92cfz8{jv!l~hQ%uH_+w
z^M#q#T*3yqwocjDhW-<WJcqvUUHMV+9f6}bRZH$qY`K+9X5#W8eDiuZ-2V|JCNsCZ
zyj)?=^zt<8@{4d3n}00E)%jF}9HqDte=L4&I-ry?vx^<uKe2O!LG4j2N0^v5M&L%o
zhs$@_;pp7Sxq@xJfqwrPU?=vkkQFlC8o}j(Sa;!_<q9!XA!a>wk31WfJsXqnq&?n@
z$2&Lp*yYJqxw!_kvGvxEvdpP(&eU&$9<-A5pp~Qttt_9jtW>e$){{22<*}#vmxpd2
zOK$t<<n5D-NA3-MTK8FV%JWj%^HRq1(%j(O;1`~nTP5@F{Iu*=8DzNb+MoRJgC8#V
z({3^279Y7cExR}UOY6Pzw0lR!y(8t^QHaB|rF|1!`hRa}$2CzL7PDbtPiacEB8ypD
z=vctm(d(9Hv@G?dF1N+?4YNR2rn)K&er;jiv*)7$EN(-)00z0IU~zlYFUEJ=5UUF*
z3l_Iy)~hdWGy7PuzPaq|1Jv>f?kONZl}{lc--H7}8V#Jh`g$Tn$(EA*WAd@w%<P4&
zF=i;Ett|N`lw_WqpTfyoLsjOs&bH^#CG%&(!m)piQb;FF>XGkh9sRaSsBHS>x!bQM
z2S0lK_Un*j^`%^cY1d%JHJH=Hh_GfTJ7U2iBRY6z=t12>EUv?8&*6;c@Z8{&XIoA3
zpP}(4Y6<GZspa3pLrW@&ci~e&7p-xP4wBv#APC^)5B%CUFca|WMyf>rl7z+z9##gm
zhCQ>;9hiZPEeDORN>&3U*XXI5o8I~09kC?nt_<7k{=@tPa2^!`Bj^`Z8idUiF{=Dv
zN-qu&!hnAqHcE0IgHi^;UZ)&GJu)p|9h)mIpTQK*VBz)0NQO-Z;{s_G7)Dnt=koy4
zO`MVedCzv_L1f<dhXj%#ln0O`Gn2MuSKGoHX_q(S^3GWvyXqG#N&B6urIGuur(6fq
zu7erZ!8r>{$}Cd+P=B?26ox4e%xnN_{R5f)7e5_b?mwF9KROqG<n}JRy^HJ8?kySj
zmXv$T=i5@w)12>evz?%>BrXnDRXJ3hL>mjA?+vbOaSh4R+ye$RSjrciGJZ#cBx9Rc
zJ`I|Rz#z=|b+cv|hch^5+@zs)y+rKn(R-bD0}R?3C-kBYgJujv`;lh`LtOJ>Lw1h;
zX!${yRRknuLjOF`N95}zhhbR;lex|AZHoOFITy(JIh?%Bu2Ms=XKwH8x+36dPVyN+
z=nWKSgiv4F1-q`^r$gvtSL4DUgiakP*Y31ycgD3Fz^U0${Q*#`nl;iID-N+O|92qP
zYJ}Lh#?yoM*I=^<yP!d50aRO!EXTLR$i<;IZ|8w^!!hXp(xKNNXyHN`6K5ZtiQ^P3
zo%-^N5nMzNl`nAsH$%v(9KR~-!F1*E{syCuRhwZbe$Bmsq0Ia?GN}(S-^8cL{}j$s
zmRl`-e~}!WJE3V~jJ`?&k`!@)oPQ1{j|fy_;BrUra+e<!2n7mK{2eW`Ybc9x`IXhr
zN(!*hp2~SVjeKOQYJL*=AhPgsx~el%)j4N_#CWc}kY5K8=gh_1@Dve5p~?Jt8vB5F
zjae|Ebj!zBBzLuiLV~LyZ;AZRP#m|wr-P&ho9hkuApa9MNJLv*u9Y}Sz9ZzY?_^vJ
zX$B+F7vMJvGGs!X1(H4^t^Y4H0i?UVAv|`sWUFhihsxAH$M;XUO%yXNrJYn|sRuhL
zINy-7B9xXD89w4Hh{AmRKj7pKpP8~?aJ0dk#jrrq!T2$GqfvgL^JSz^H0F`4vq(Qd
ze4IAm1rh~|5lb;iqBa4@r(q^zx$qMZum^q(&%m$MBiZjcK1Qm$T1rw|VoFQ=@1arZ
zzJi@ozy%HcUXD=mcr}td;&6!3f{csENuX`i6B$L@2Gmd}YseQqeivE859naMp_Zdp
z+4+?`Q?>&K4v6OmU-cb3dHSfhpQ#rk)Rp}>EjuPhWB$GX&)2i*a&Ro*9}jHuYC;mi
zOH=^UP|~GKTt_7a!_!eYE*`?M2u7oUw4w5QdNyWx-{)t?U2=KNw7noJRfIO?)zG6J
zDDhw2ziR+?ifT9**}tu@+5Tx1VXmlW^JXf#FfFDWiT!kllix4bqmCL39p(M_U^pJ+
zePi!VaWkbhM7~wf2@Y1QR1PI{>0J+M(yP~vJ)i)!(~<rWF#k6I)Q003IxbmH9|)Jg
zKpU5X;Ih4P#fBs2938z#A8e2)anTU12tz3h8BqbMoyQGIZk^~iS$N2lt3a!$H~+sA
zDtZrTARyi-xZHE^{K>AHyXKGFI-9OopM>nECson&m`h_8wk=4vqf6E4j_s+I9b_nR
zI^{W&_MFLh&Ol^S*+|kLT@a-ExVd~wN2X;9Ofpug%c_9U;Cw^QoOPu^@YKweF{1Df
zKmUlHFApj<4D2}JH1*L)1Zk1;aa<lIVHk}vGVVqz#fVF=-z*>u{-0quj^R=wrk%j8
zV{wt)S%^z&;uk?jp^xAf0{Ed002aZWh8)B2wNRjKD8v*5m#Zl%RMvss+D!n(?voOS
zb4}4w!H@};nT%uAhw{$J_YOdDVJw+Wj^(=I!zGJF?y?b2PQrQ%c#_wG+kY;i&+Ov0
zi@+7^OdPu;49XGIi$e11?C;xcKxRd`DcC3P6;D5Pko6{m65qpI^5aVc@`^jJ2lAiN
z_d{@?k#m{S>Kkr#p|G<43W>}sg|f}Inej`X>FUaIfK_Z4k}(aPr#<Vb&DJ&DI-NTY
z()L`oy8c!NInB>TwuulDaDIIb&NG9wL1>Z(&<>DB{PBl4o!MO@xVnPRB4F7sLmSMl
zy7TMM#SB?+&CoWQMKSD`aW)>K$VQ0<n-4ofH-x$Hsh(nseadk}l-0x$QHBDJh#mLK
z!B+>3>^7t#{Y@*4E5{jZbTvc*sWS0Ga3EDCUZ_Ong*1)^MbLn<LG6MkZ%_rSdZfLp
zrDub*r1A1>Rp)MCpuLEC)50^X?0g>Jod1d@BwA}F7}bWavP(@h6+Lu^z!^Dphq{J8
zu`rqfjbQ`&DE}HxjJlK)_m}HefE1v9{ZkK(In4~9xsK@uQ|`{RyEEhNOgTFlHfHAn
zO-R^|^lWfx89)eVy@8turr_t#b+fP!#t`|NPYk$i_BTZb97ib8TipFY*I;1u8K1^4
zlUYsC^L-6G)e4-)?#C)R>O<i8Z=j!SRN!ZaCP|~EA{OnV8L?230^uATRW;m5J3y1s
zjf7nDQkSY|B~p5Q?!;qvN3weHwE~qb4iXb?2J@#_W3p+I2nDc7k^mGmN&m=nnu}~y
zBx?cBZi}=TPjfM@U0WxZWVDb!g*ow+>SK?*r;KF71SkbhzNGcKEz-4Kh|}&R_jC(#
zJuGHu<CS|Pd%^+}y*vqnMR;9^*slmzZLbSgXn&&ZDVU5++aIW<(7Uj9<kR*UGOmS4
z8d{>~WU@X@>QbP3AxVeB|1U69=K_9Qf<)_;G3GhZ@M+GNGc>E-{Rxu^^;kFfQI(Pd
zRd30^|HXK1(m8(DCo_zIX&o+)qto%k;ZhVW7Xk^CHyH`yzBzv+9=gUZZo!FgBIdz3
z*)qtmOTgW35wiDV<`|sm)${W^@eDYtkd0A6ub9oE+I^jV6WWRuUfCxOv-^M0bap!`
z?Y?q@g6^0vXUV(ur6@aLM53p;Ib$K_92&u5y^p!&^vet1>}F<Bc#MWlKlxA8iSDAK
zR;d%zD|%fr>Cg1c+)|Md#KZLAI5~UC`D=36;c8m{9B1T&`mFxnQ0xsjW{D0fzu@4q
zx{EnPXyOSn0ZlXsuBy2Mk3CJvo(I-XM^c_sY0s&Q=M>kUQmj3*`vf;(PI>%kk3Zw_
zLz_zKsLmC^UUt^9VzpJCwSb3tRJUQdZbQ<auItIv^~_u5Ex&tQ+fK(~k;vfUbZn~O
z%%{8I;FtCcWjsSE^|xZ9<iGpf@0iAma+j@#+W15F&)i8%+S8Ts(19GL4Wn<*GnI1p
zvZp)wGBdtrhipmR>zQ-Ls?o{6sUP$Rf9q)*+-m#VJ_}sMj>BIUO?G^m<YNp&V^;La
zxUl7Ewi`x#W?s>fg-&E#cSQD*<Rh-(6*`S+l*8&6&lIb%U-|k9Gv8sZlTLf^S`&`q
zrzztGjHcr{bD1#7w#z>>%631-Al}6vf3BC9>@PH2r>l;W!~+*RFoL$p)`TsBD~JrL
zWQSQ_hl^H9id%h1dA1=)Y1geko>phlpib7CWbTo<|DYSR)N+q*7qe#n(6?vlpF|oN
zhyQJ0`eCqS9Mdz-jEkxn^5yEl;ql<~wN)`FVM+5w@O+NVqrzhFoAD<Bem=JKi#f45
z)of0_HoDa;qfr+lAN&F?R+Q;tERK6s)|1}T!RdgHw!=NS|3$!fWubU1<OeZAAL0_F
zA+p3Tl_fhkNS$>0$Z+e$0L(b4M7;yS|NSFe5A$x_UmgFiv+o~dGL<23bsjQ>Ld$>E
zAUr4KJ+9}7#JBB|i5bDsN}2@48jLEX7;Wm9;<zG{xOY8$$9)`uOG?Su%ml8;RqQf|
zx3`pwblKkL-y)GrSQ=J6xehB%6vWO?;<ga@c-|P{h}`&admAUsAN{We1&Cbz@D?HR
zQa}>t_sG(QOk<8&!1X<9d~Uh%xy8dvb@$uTjR!N02S44AZahABV(!F~#+HS^UyLty
z|HgX1<TvvD%MZqX{qFsD)BT4t{fE;Vk7PC;Np~DcHy%aO+SU&b{_NmiY`?QRUArYy
zyM@l`)Dqs``M6p9sQGsD+$mg(aPtH%6?o+8T6T3M_b#1FZ9TInFUk)_GW*Ybel_iS
zE#rDE<$5jaYWT_756&*Y(x4;bf~Kt_>#DnX_K|DdvTI#(^IvZL)z);^4muL!8pyZ?
zQm%o=&dQrkT!Qh)*|F^GScorfPi;7yJeNGTG?3YG_|w<Y&Y_HRD5d@iH3Bq;^!N1^
zIOc7fF2GJ$if-tx94C8C0G>3%?}*goz(p9dRhKD24HK}^4R(2lGcdobI2~}YAtTxj
zZEI;x9ILo+sk={YhN?24M5Yb$=ZGlSARBX1>YRb&Z(V|?G2>oeWKZB-H*{=>U86L_
z9+^x(huG~$JYq<0LySzfn2ih_lRNM}HZpQ-2yBsM1g#sTuW!IXPl#}ME!gx74SCno
zF$fw5pHC_G(Oso8xYXVU69(TqGyYKSODT>@M8|zT`88z4{Wo-&WE@AdFGeNVM<u^V
z@uj(Q<4noLw9twTgDVr|#C;KTq+F)m13M5QpC;cQk@LsoFnfp($@dF#{+yg&k;C-W
z#LO!;-~cWIVr&`X!*);-#)mQX?LShG##|<K6W{8dCm*|>f*cWlu?95bhW2m8I%RP<
zSD-$D`zB5xG-s<DAneIjw?cn|bqkpV#GCY^PxVlJW$PNjT+t^=#(q3OO<lI3Gh5S;
zZRyB1L5@^|!x^~QBir1btz&tS%u}~=)DpHhaHB_o`?gIesR8%q*aNI#bZ|1Fq~`rM
z`6t&VNBzR~+k008cow_v;Y2n(`OY~@x1(+0-0jg70iMM}_f9cSzB6QLcAT&zhgSr8
zFTS$0{r>I;=N`WHsr=d1)XT$}mtRl4;Y;oCWqL30Bt_iaRgMF=OXha@ij91|Tknl>
z-?0;)O>y7b7cy^OWZoxbl`9S;TM^7om&@^da{C?h3ZA8|`{)%s`A(bFu`4Ox!SND!
zmd5T+F;Biz-ryMEU5AHv9iA1z?DV@G14-+h@)ZG|Mfn~Ig(u(XDsh~#EN;JthLP_<
z*F&_7d_`Oj#q+k4@6(#kT8gCNxjj{m+QoAiDU3tygF_EbF;BiDIvr@A=G3io)GQpj
zjkNIaM!~~E2IqvOhH}A4*4#l0;Q162tVSAqJzB}EjyeW@We#zE?AFwZ08jGV9kdW0
z-O*r?VjC)Gj^OZ`;8~hs;m@sqhbE|B4ZgN!M>WAvbrMtyuSFP-;`N~BAzG?_Gx&DZ
zIcT#&F7G+{7RPzaQjEV-ue{DZMcgeVXdSwb*WxSe^;CzW3_=uMQGSRQXE{n>m<wKt
zqc6f|@_&M(IB?M$Zhm-?{2s=Y^aZ1RARk0s<bOxeE*{P0j7mKz=i>2LAJ?>jMZz5s
zF?I|^VgFl-K?B#v3fE5y$7Eth*v({2`NOTwNPNMOBGYLzEXa&~h|A_CJ`6oFi-bCg
zW5|=li%JPUN5qk%8Dak3xsTI=1M+`HQt&fE>;hbD1ud4W(2z3!vO-6y*zbu@b%XuO
z3N<P9mlYaQ>MtwQrqtgPp+9B*tyrrqI3M=^m$T*dG7DBjad-aJuh^Gwp7Nc+-~R#m
CE{@3n

literal 0
HcmV?d00001

diff --git a/TTS/utils/audio/__pycache__/torch_transforms.cpython-311.pyc b/TTS/utils/audio/__pycache__/torch_transforms.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09048350f36215b59fb17e42d988e6a6e55146ec
GIT binary patch
literal 7009
zcmb_hZ)_CD72mx-zW=_nv5n6_vS5mRiDSnRNyG(6px}fiF(fu^WtuGO+d1Ewz1?GW
z5C6%bQi-aU$ZARI2tK5$qpH*hSK>>heC(%w_MwwFt%OuHjUv8OrX*67PknFpZg0<E
z$cN5ZZ|BX-zV~M4&F{Tg|0Na+b5Lfov)NN!9QP$MjpDCY9=rjStDMZqyvj}4PkxFA
z-lOt)&y<JfJlyM??7hs%KH{16HUwOU+D(U>^2z)Jm-64k?(r0Fc{DB47>duv$OntS
z{01}<4z)~qWPZvU;<oz9?PV^@%c095&=9`NP5I>r&^8i~<8t&ee;uUVMER^t1zCIS
zq4rRjo9Cy(U}3@vP3kn0o0uG%l*Ay%P3U<d=vl$c5kV@-iY^qWo*{;zXfr~#sF{kc
z8A6e<>`fu7Qz0uECZWQ#WM*=%7ly-O0m>0NW2EgWvYMF9n!=!>nW;4E$3`q;W0S(1
zqRIN4U?_`(Nk+r!VsJ=KFNiA9X3X591Zc(lG?)SAXQ`AYhA>UcIYKl6r3e{aE#@`D
zB@L9hr40#s0p};Fn+#*G*DbTPyMz=ZS)7wD2!qqQu5KyfL{^xjMIyjx*V@i0I1zh6
zo^wquC5#9!q=toKBr6qF(-2Hu7?V_kxb)WT88l3~rPfK<MGaO0mqfzt%i#P_T80wI
zge`6?xJfhvHqy*Vrl1&tR4Awlkw@~NU13#xiAb=$Y-vYq?X4E>$w`_f+C6)k`)x@1
zf=Wb+`f%5`wAPg93^D6&$KFDx!}nVCjO&_vA@W2Ovw1~(yl@zx<coPBtElLzSxU|q
ziI!PlUY61Gg`$btK$uY=8I$TW;6la^P%O<qL9sOd|5mJtXy)V9HqS!qlmuDH!(QVt
ze(dCm20<-9(5>wSB+Rv2^%;T=SVHF%aNfuIv#g8YD59y0^0cTb8j<J|loL&wR>_F0
z82A*>%@ngJgB7|UABFR-T_6-^itb;Rxb^04K3_OeoRQG4vZ^j^?Y3-8{lExwIe;5m
zGxMhwqmCVIU4qf!=UoT7pwEG0K0(>>`2u*qW-_-$kj_gpnmudR)P@8yJTp8bjIv=w
zBXnkANO*y9ObrY$Gs8m5hHN@k=m0tM+!OZTv_XV7Cf`KKfEAJe%Y|N3KV^yraRCqy
z)_+2kG_v5DE({YjcuN>W#Y4h?!A1-KJW#>*-Yt;=(6l*RRxT*G>980kRU~Z&?4Znw
zIik$u06nJ{Snr%71KffwY~8@hX-Sh?BG8}&uqdEalBz692$m_A$ryn^H5f;zk{N<9
zNQAShGEH?u8V3AOha0bwv*?5lS4U18de9nOD8jA@s#suyhFh$dAdeA?;TEA>RRx(C
z8LF_5g|jk>g@R5^A~TV~1xYOuqeU>FZ$^cn4mfxA+2^#vu%czpZl2)D#&I1IR3cq~
za|NoJi>pFdl@vfzNC%iH1tOiZm7x*n(k))K$KKXG_7umgh$~2pQx7C6_f7%%+D=(E
zUbWin7cw1o3!?#`m@+;12hVYz@>e+%o{iF)^sKmg8Y4Kabqvpy_!1|ga#l(n;84EJ
zd4cmeoDaB0ddQX9fNOL50>GhdtdxSlK?+spLcoO_E({z?Ke4400WRY7MS+VtTnxCF
z!?gp~?r?G7;tkH+-KcTgtg~7k&iqlc-Q3r#m;BdZ#y6X@_Fjjzy6LQ!%&&Pijo?H`
zl<gvOpu4-ll@h@9I9!<al(?1c4c`6Syz(+P0cka)p5E~>NQ`3`hZRZ;5#p_!>b3mf
z#)MiyQG|ePilXIb>5&y~rhZl!a*TT7W(Df`mlbj48CDR{1(SkCO`5mb?67GC>Y>ew
zx<if?asc1*GmNwc0tDJJ@Gw(!5-W5{!t@f-2rB>yi)5PA@|&1OTWxmsV}(%vv}7oT
z6?RZSwi4oWQBh?Ktafjz)AAWa%~ITBid#z)NN_vp(?~jzY(s)uL2(BtE<DB6roBj#
zNcxcMK!R>zg+wu<N`@hd21-EWYAKh#e`^G=n;Ihvl93xZMvQZ&UKshYp3g3vJ~5Ih
z8m68Hd<KjenVg&$L3A@l*o|O>O;t3hq!|eN#&BVQqQ@?FI(LcT#`e7i?Z%ftzPrSI
zyN?UCe;8N_+zIyH4)#`pebr#!da$q7-B<Q4g=>9#;Sp~<wm$`2w6<ek*;ftr)_MhC
zV~t18PT(T7`k0-AI3`(3?t#|!hPZmC{o>u8-eq;Y|G=ML{_Bgk_Eq{1RQnHn@l2)v
zc(wod`su0lo->u6Gu56mOl)!&lT)`R(z(I$p}n<e;!bq;?da}Gw7(kdUvK<vc%hSx
zcmUJnGS2w|22VA^t^Y#nRY09t9KwpspJO9wcZq*AX33t`^LY_4Yu1rY-!L8I0Yu?u
zn~25DI(W4Rm?QIxfHj+Bc}xd!06iQ@vkrc(IM~&&7@vTfo{}f)k-cla)+_Ht-gHn4
zBsQ^*UvZGmEot-YwweL7JC;J-(INX~!0A~B8-Z_I3tR`iHyzy<y(O<4l!L$aw9ru2
zheyUP7k!T$<7pY=Z<$*YfgNjL#&~X7DZ%A`5!SKfx1V^$m&0q3R&3q09}riX82+m;
z#+fFM(k@~P9%WYOb1fDjKCl8~9K>F6(7Mr!&l2CNxzyHb7t`Y)JKebzt}_b}epp$H
zJ|v|LFg~{UH6RLHt%%^tPMzC1CPILkkcSzNwHRjBbR}E%R|7X_!9{D5T~kwO$TLBz
z0QYW!5Y)VEzDvv^)!=i3`$hY$^doF=>6^$&1+wYI`LuB6s4y=~zK2QanRH$<bLq2Z
zjzVTV1dV6kbJX0!gLw~QsTH6ktHKo$qMI)d06ICA@>^{gNZK*3VYtFp8>WMh`WleE
z%2qpNmWz0kQpvoX8qK7ap8wZx7mgpk!CPL4W+|Q_eky7CG`PoEp`vEMHI^(A7BMU@
zT(4cB<S8)64q4&G-Ct5IFPH#P2Ugn(G~Q-Ghs7HfJ$%M_YFd7XAo+qFUeF_~HkdzT
z`BXs01AB)t$O<>_ZMJm*+K=u0JPXd4Zd$x<@o!uFDT|*l5NztZLorpsSf7C;QxsbR
zh~1Wh5yl6wu$Q=vUM?D2dg;nLAH7n3rIzTp8u=u0?a0UN%kAYOwMcyF;0EUjo#gNK
zB(LrNEWPSmi&u8M{KaUc?_j0pP_^e!`SrVr?&V-L(SIi~a62)u%CEkD>u2lF9{T%<
z^~69WF<woKmydiE?O6IrEgJjXiH}ZPkv@ED=`BEhY)LGgbjX{vZQE<zPt~^XSpH?L
zJ6YQyY;;Fr;rkqrZ;+IEur}6Smhg9_gZ++N!Qc7v?9$t19yDy-c^7T?w7C4i%IJ#p
z`To`2tMA;{`}r%kj;(J$P}zQ<3N)IoM$={fpD^$AdSo{&$Cdrd>5pUOBiIB>!G~VL
z=|-<je)j&IUBkC`4X>WB>>91^8of1INxWE1yvSCq8rg|s57iP~SL2_=SNPS*wfC>Y
zD~W^E#6cJlPn3fcv&alOccWl%V1eyFP=R#3(GJg9XWWVttX$m~s_;!#p#I3P1?|lw
z+ZCRm-3d@ocS6(}e;vH)rW3{>7`ec2)(JqH!EeP0Ee(tC*_yIKqU*psOI$tUset7%
zsD02-kL6>jCQwK-y_hljFzHLRTY>uR&<fZ$VgrsbQSGFTrF<-(Yo>S2EDq1E70y!3
z!ixFAg5_slA=ueVdDsbKbpsb!5ZFngd!WA9=gPu1rJKKpej^Ly5_h+2$MVTLT?4nf
z25Rwb)%er3&YtCGz^#{GsdaQO@3KjvYdOkl8*#3C@IDvp2KNS14t?9n^*voprdCsx
z<o;@M|D9y|b~0T_9;_x0V)j&qpY1npFTh=8e)Dsvv|i(ye&fE}H4~l^Br*?OQ20ei
zby@=&yIRcL&wW|`+jsvyOrHluDKBNq=~dyYUK$#1c<5nxFt}=2z(dfw*yUP4=abYQ
zp~u(<<P!H)ylZLU58a>bSb1ml*m`obk{qojM{k{|Bo9^MhpX|!>%qehPaUVtrj8bE
zHTBY_$&^^`22<k4VbkqNkof|A7p-yaJb*BKHG^+wU~h~4PV1lW4fM9U{nt7^EG?C;
zDV6xZdT@Z5n~IH5+#oylj8i;LV`CIg-1r#9!v;4t#-<_&+W2J|E@)v916mP2@rqVd
z6wenWwcZmDMOn{?BD)>XV<3Qes1-Hf(xhbI3YF7kifc|!1F>TDOG}1eGyq+Dq18Y<
zdCm53%xftUG}EX6fonzMx8Zr&AKc)Olz6VArxxE?>*}p_bb<rd;vKa_H{1+ro!zze
zPONt%YukD^`uF)EdEbhh;#2;P4fsb+o&1y!`wwk!4(a9EIyXGL|5bp{I=SzKw#&b;
zTifdWgV=8qSHG*Z+0S-@%`xStZ{mD$*X=0Y_BLB*)_a4qM`8s1K6udHfy4BDU~$0=
v1qfVCc)rH9Ut+%+7rn%OHLh#D^>3q1^zdNLqv;jTa_Bzm`(}#~%#{BD$uGVW

literal 0
HcmV?d00001

diff --git a/TTS/utils/audio/numpy_transforms.py b/TTS/utils/audio/numpy_transforms.py
new file mode 100644
index 0000000..af88569
--- /dev/null
+++ b/TTS/utils/audio/numpy_transforms.py
@@ -0,0 +1,485 @@
+from io import BytesIO
+from typing import Tuple
+
+import librosa
+import numpy as np
+import scipy
+import soundfile as sf
+from librosa import magphase, pyin
+
+# For using kwargs
+# pylint: disable=unused-argument
+
+
+def build_mel_basis(
+    *,
+    sample_rate: int = None,
+    fft_size: int = None,
+    num_mels: int = None,
+    mel_fmax: int = None,
+    mel_fmin: int = None,
+    **kwargs,
+) -> np.ndarray:
+    """Build melspectrogram basis.
+
+    Returns:
+        np.ndarray: melspectrogram basis.
+    """
+    if mel_fmax is not None:
+        assert mel_fmax <= sample_rate // 2
+        assert mel_fmax - mel_fmin > 0
+    return librosa.filters.mel(sr=sample_rate, n_fft=fft_size, n_mels=num_mels, fmin=mel_fmin, fmax=mel_fmax)
+
+
+def millisec_to_length(
+    *, frame_length_ms: int = None, frame_shift_ms: int = None, sample_rate: int = None, **kwargs
+) -> Tuple[int, int]:
+    """Compute hop and window length from milliseconds.
+
+    Returns:
+        Tuple[int, int]: hop length and window length for STFT.
+    """
+    factor = frame_length_ms / frame_shift_ms
+    assert (factor).is_integer(), " [!] frame_shift_ms should divide frame_length_ms"
+    win_length = int(frame_length_ms / 1000.0 * sample_rate)
+    hop_length = int(win_length / float(factor))
+    return win_length, hop_length
+
+
+def _log(x, base):
+    if base == 10:
+        return np.log10(x)
+    return np.log(x)
+
+
+def _exp(x, base):
+    if base == 10:
+        return np.power(10, x)
+    return np.exp(x)
+
+
+def amp_to_db(*, x: np.ndarray = None, gain: float = 1, base: int = 10, **kwargs) -> np.ndarray:
+    """Convert amplitude values to decibels.
+
+    Args:
+        x (np.ndarray): Amplitude spectrogram.
+        gain (float): Gain factor. Defaults to 1.
+        base (int): Logarithm base. Defaults to 10.
+
+    Returns:
+        np.ndarray: Decibels spectrogram.
+    """
+    assert (x < 0).sum() == 0, " [!] Input values must be non-negative."
+    return gain * _log(np.maximum(1e-8, x), base)
+
+
+# pylint: disable=no-self-use
+def db_to_amp(*, x: np.ndarray = None, gain: float = 1, base: int = 10, **kwargs) -> np.ndarray:
+    """Convert decibels spectrogram to amplitude spectrogram.
+
+    Args:
+        x (np.ndarray): Decibels spectrogram.
+        gain (float): Gain factor. Defaults to 1.
+        base (int): Logarithm base. Defaults to 10.
+
+    Returns:
+        np.ndarray: Amplitude spectrogram.
+    """
+    return _exp(x / gain, base)
+
+
+def preemphasis(*, x: np.ndarray, coef: float = 0.97, **kwargs) -> np.ndarray:
+    """Apply pre-emphasis to the audio signal. Useful to reduce the correlation between neighbouring signal values.
+
+    Args:
+        x (np.ndarray): Audio signal.
+
+    Raises:
+        RuntimeError: Preemphasis coeff is set to 0.
+
+    Returns:
+        np.ndarray: Decorrelated audio signal.
+    """
+    if coef == 0:
+        raise RuntimeError(" [!] Preemphasis is set 0.0.")
+    return scipy.signal.lfilter([1, -coef], [1], x)
+
+
+def deemphasis(*, x: np.ndarray = None, coef: float = 0.97, **kwargs) -> np.ndarray:
+    """Reverse pre-emphasis."""
+    if coef == 0:
+        raise RuntimeError(" [!] Preemphasis is set 0.0.")
+    return scipy.signal.lfilter([1], [1, -coef], x)
+
+
+def spec_to_mel(*, spec: np.ndarray, mel_basis: np.ndarray = None, **kwargs) -> np.ndarray:
+    """Convert a full scale linear spectrogram output of a network to a melspectrogram.
+
+    Args:
+        spec (np.ndarray): Normalized full scale linear spectrogram.
+
+    Shapes:
+        - spec: :math:`[C, T]`
+
+    Returns:
+        np.ndarray: Normalized melspectrogram.
+    """
+    return np.dot(mel_basis, spec)
+
+
+def mel_to_spec(*, mel: np.ndarray = None, mel_basis: np.ndarray = None, **kwargs) -> np.ndarray:
+    """Convert a melspectrogram to full scale spectrogram."""
+    assert (mel < 0).sum() == 0, " [!] Input values must be non-negative."
+    inv_mel_basis = np.linalg.pinv(mel_basis)
+    return np.maximum(1e-10, np.dot(inv_mel_basis, mel))
+
+
+def wav_to_spec(*, wav: np.ndarray = None, **kwargs) -> np.ndarray:
+    """Compute a spectrogram from a waveform.
+
+    Args:
+        wav (np.ndarray): Waveform. Shape :math:`[T_wav,]`
+
+    Returns:
+        np.ndarray: Spectrogram. Shape :math:`[C, T_spec]`. :math:`T_spec == T_wav / hop_length`
+    """
+    D = stft(y=wav, **kwargs)
+    S = np.abs(D)
+    return S.astype(np.float32)
+
+
+def wav_to_mel(*, wav: np.ndarray = None, mel_basis=None, **kwargs) -> np.ndarray:
+    """Compute a melspectrogram from a waveform."""
+    D = stft(y=wav, **kwargs)
+    S = spec_to_mel(spec=np.abs(D), mel_basis=mel_basis, **kwargs)
+    return S.astype(np.float32)
+
+
+def spec_to_wav(*, spec: np.ndarray, power: float = 1.5, **kwargs) -> np.ndarray:
+    """Convert a spectrogram to a waveform using Griffi-Lim vocoder."""
+    S = spec.copy()
+    return griffin_lim(spec=S**power, **kwargs)
+
+
+def mel_to_wav(*, mel: np.ndarray = None, power: float = 1.5, **kwargs) -> np.ndarray:
+    """Convert a melspectrogram to a waveform using Griffi-Lim vocoder."""
+    S = mel.copy()
+    S = mel_to_spec(mel=S, mel_basis=kwargs["mel_basis"])  # Convert back to linear
+    return griffin_lim(spec=S**power, **kwargs)
+
+
+### STFT and ISTFT ###
+def stft(
+    *,
+    y: np.ndarray = None,
+    fft_size: int = None,
+    hop_length: int = None,
+    win_length: int = None,
+    pad_mode: str = "reflect",
+    window: str = "hann",
+    center: bool = True,
+    **kwargs,
+) -> np.ndarray:
+    """Librosa STFT wrapper.
+
+    Check http://librosa.org/doc/main/generated/librosa.stft.html argument details.
+
+    Returns:
+        np.ndarray: Complex number array.
+    """
+    return librosa.stft(
+        y=y,
+        n_fft=fft_size,
+        hop_length=hop_length,
+        win_length=win_length,
+        pad_mode=pad_mode,
+        window=window,
+        center=center,
+    )
+
+
+def istft(
+    *,
+    y: np.ndarray = None,
+    hop_length: int = None,
+    win_length: int = None,
+    window: str = "hann",
+    center: bool = True,
+    **kwargs,
+) -> np.ndarray:
+    """Librosa iSTFT wrapper.
+
+    Check http://librosa.org/doc/main/generated/librosa.istft.html argument details.
+
+    Returns:
+        np.ndarray: Complex number array.
+    """
+    return librosa.istft(y, hop_length=hop_length, win_length=win_length, center=center, window=window)
+
+
+def griffin_lim(*, spec: np.ndarray = None, num_iter=60, **kwargs) -> np.ndarray:
+    angles = np.exp(2j * np.pi * np.random.rand(*spec.shape))
+    S_complex = np.abs(spec).astype(complex)
+    y = istft(y=S_complex * angles, **kwargs)
+    if not np.isfinite(y).all():
+        print(" [!] Waveform is not finite everywhere. Skipping the GL.")
+        return np.array([0.0])
+    for _ in range(num_iter):
+        angles = np.exp(1j * np.angle(stft(y=y, **kwargs)))
+        y = istft(y=S_complex * angles, **kwargs)
+    return y
+
+
+def compute_stft_paddings(
+    *, x: np.ndarray = None, hop_length: int = None, pad_two_sides: bool = False, **kwargs
+) -> Tuple[int, int]:
+    """Compute paddings used by Librosa's STFT. Compute right padding (final frame) or both sides padding
+    (first and final frames)"""
+    pad = (x.shape[0] // hop_length + 1) * hop_length - x.shape[0]
+    if not pad_two_sides:
+        return 0, pad
+    return pad // 2, pad // 2 + pad % 2
+
+
+def compute_f0(
+    *,
+    x: np.ndarray = None,
+    pitch_fmax: float = None,
+    pitch_fmin: float = None,
+    hop_length: int = None,
+    win_length: int = None,
+    sample_rate: int = None,
+    stft_pad_mode: str = "reflect",
+    center: bool = True,
+    **kwargs,
+) -> np.ndarray:
+    """Compute pitch (f0) of a waveform using the same parameters used for computing melspectrogram.
+
+    Args:
+        x (np.ndarray): Waveform. Shape :math:`[T_wav,]`
+        pitch_fmax (float): Pitch max value.
+        pitch_fmin (float): Pitch min value.
+        hop_length (int): Number of frames between STFT columns.
+        win_length (int): STFT window length.
+        sample_rate (int): Audio sampling rate.
+        stft_pad_mode (str): Padding mode for STFT.
+        center (bool): Centered padding.
+
+    Returns:
+        np.ndarray: Pitch. Shape :math:`[T_pitch,]`. :math:`T_pitch == T_wav / hop_length`
+
+    Examples:
+        >>> WAV_FILE = filename = librosa.example('vibeace')
+        >>> from TTS.config import BaseAudioConfig
+        >>> from TTS.utils.audio import AudioProcessor
+        >>> conf = BaseAudioConfig(pitch_fmax=640, pitch_fmin=1)
+        >>> ap = AudioProcessor(**conf)
+        >>> wav = ap.load_wav(WAV_FILE, sr=ap.sample_rate)[:5 * ap.sample_rate]
+        >>> pitch = ap.compute_f0(wav)
+    """
+    assert pitch_fmax is not None, " [!] Set `pitch_fmax` before caling `compute_f0`."
+    assert pitch_fmin is not None, " [!] Set `pitch_fmin` before caling `compute_f0`."
+
+    f0, voiced_mask, _ = pyin(
+        y=x.astype(np.double),
+        fmin=pitch_fmin,
+        fmax=pitch_fmax,
+        sr=sample_rate,
+        frame_length=win_length,
+        win_length=win_length // 2,
+        hop_length=hop_length,
+        pad_mode=stft_pad_mode,
+        center=center,
+        n_thresholds=100,
+        beta_parameters=(2, 18),
+        boltzmann_parameter=2,
+        resolution=0.1,
+        max_transition_rate=35.92,
+        switch_prob=0.01,
+        no_trough_prob=0.01,
+    )
+    f0[~voiced_mask] = 0.0
+
+    return f0
+
+
+def compute_energy(y: np.ndarray, **kwargs) -> np.ndarray:
+    """Compute energy of a waveform using the same parameters used for computing melspectrogram.
+    Args:
+      x (np.ndarray): Waveform. Shape :math:`[T_wav,]`
+    Returns:
+      np.ndarray: energy. Shape :math:`[T_energy,]`. :math:`T_energy == T_wav / hop_length`
+    Examples:
+      >>> WAV_FILE = filename = librosa.example('vibeace')
+      >>> from TTS.config import BaseAudioConfig
+      >>> from TTS.utils.audio import AudioProcessor
+      >>> conf = BaseAudioConfig()
+      >>> ap = AudioProcessor(**conf)
+      >>> wav = ap.load_wav(WAV_FILE, sr=ap.sample_rate)[:5 * ap.sample_rate]
+      >>> energy = ap.compute_energy(wav)
+    """
+    x = stft(y=y, **kwargs)
+    mag, _ = magphase(x)
+    energy = np.sqrt(np.sum(mag**2, axis=0))
+    return energy
+
+
+### Audio Processing ###
+def find_endpoint(
+    *,
+    wav: np.ndarray = None,
+    trim_db: float = -40,
+    sample_rate: int = None,
+    min_silence_sec=0.8,
+    gain: float = None,
+    base: int = None,
+    **kwargs,
+) -> int:
+    """Find the last point without silence at the end of a audio signal.
+
+    Args:
+        wav (np.ndarray): Audio signal.
+        threshold_db (int, optional): Silence threshold in decibels. Defaults to -40.
+        min_silence_sec (float, optional): Ignore silences that are shorter then this in secs. Defaults to 0.8.
+        gian (float, optional): Gain to be used to convert trim_db to trim_amp. Defaults to None.
+        base (int, optional): Base of the logarithm used to convert trim_db to trim_amp. Defaults to 10.
+
+    Returns:
+        int: Last point without silence.
+    """
+    window_length = int(sample_rate * min_silence_sec)
+    hop_length = int(window_length / 4)
+    threshold = db_to_amp(x=-trim_db, gain=gain, base=base)
+    for x in range(hop_length, len(wav) - window_length, hop_length):
+        if np.max(wav[x : x + window_length]) < threshold:
+            return x + hop_length
+    return len(wav)
+
+
+def trim_silence(
+    *,
+    wav: np.ndarray = None,
+    sample_rate: int = None,
+    trim_db: float = None,
+    win_length: int = None,
+    hop_length: int = None,
+    **kwargs,
+) -> np.ndarray:
+    """Trim silent parts with a threshold and 0.01 sec margin"""
+    margin = int(sample_rate * 0.01)
+    wav = wav[margin:-margin]
+    return librosa.effects.trim(wav, top_db=trim_db, frame_length=win_length, hop_length=hop_length)[0]
+
+
+def volume_norm(*, x: np.ndarray = None, coef: float = 0.95, **kwargs) -> np.ndarray:
+    """Normalize the volume of an audio signal.
+
+    Args:
+        x (np.ndarray): Raw waveform.
+        coef (float): Coefficient to rescale the maximum value. Defaults to 0.95.
+
+    Returns:
+        np.ndarray: Volume normalized waveform.
+    """
+    return x / abs(x).max() * coef
+
+
+def rms_norm(*, wav: np.ndarray = None, db_level: float = -27.0, **kwargs) -> np.ndarray:
+    r = 10 ** (db_level / 20)
+    a = np.sqrt((len(wav) * (r**2)) / np.sum(wav**2))
+    return wav * a
+
+
+def rms_volume_norm(*, x: np.ndarray, db_level: float = -27.0, **kwargs) -> np.ndarray:
+    """Normalize the volume based on RMS of the signal.
+
+    Args:
+        x (np.ndarray): Raw waveform.
+        db_level (float): Target dB level in RMS. Defaults to -27.0.
+
+    Returns:
+        np.ndarray: RMS normalized waveform.
+    """
+    assert -99 <= db_level <= 0, " [!] db_level should be between -99 and 0"
+    wav = rms_norm(wav=x, db_level=db_level)
+    return wav
+
+
+def load_wav(*, filename: str, sample_rate: int = None, resample: bool = False, **kwargs) -> np.ndarray:
+    """Read a wav file using Librosa and optionally resample, silence trim, volume normalize.
+
+    Resampling slows down loading the file significantly. Therefore it is recommended to resample the file before.
+
+    Args:
+        filename (str): Path to the wav file.
+        sr (int, optional): Sampling rate for resampling. Defaults to None.
+        resample (bool, optional): Resample the audio file when loading. Slows down the I/O time. Defaults to False.
+
+    Returns:
+        np.ndarray: Loaded waveform.
+    """
+    if resample:
+        # loading with resampling. It is significantly slower.
+        x, _ = librosa.load(filename, sr=sample_rate)
+    else:
+        # SF is faster than librosa for loading files
+        x, _ = sf.read(filename)
+    return x
+
+
+def save_wav(*, wav: np.ndarray, path: str, sample_rate: int = None, pipe_out=None, **kwargs) -> None:
+    """Save float waveform to a file using Scipy.
+
+    Args:
+        wav (np.ndarray): Waveform with float values in range [-1, 1] to save.
+        path (str): Path to a output file.
+        sr (int, optional): Sampling rate used for saving to the file. Defaults to None.
+        pipe_out (BytesIO, optional): Flag to stdout the generated TTS wav file for shell pipe.
+    """
+    wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
+
+    wav_norm = wav_norm.astype(np.int16)
+    if pipe_out:
+        wav_buffer = BytesIO()
+        scipy.io.wavfile.write(wav_buffer, sample_rate, wav_norm)
+        wav_buffer.seek(0)
+        pipe_out.buffer.write(wav_buffer.read())
+    scipy.io.wavfile.write(path, sample_rate, wav_norm)
+
+
+def mulaw_encode(*, wav: np.ndarray, mulaw_qc: int, **kwargs) -> np.ndarray:
+    mu = 2**mulaw_qc - 1
+    signal = np.sign(wav) * np.log(1 + mu * np.abs(wav)) / np.log(1.0 + mu)
+    signal = (signal + 1) / 2 * mu + 0.5
+    return np.floor(
+        signal,
+    )
+
+
+def mulaw_decode(*, wav, mulaw_qc: int, **kwargs) -> np.ndarray:
+    """Recovers waveform from quantized values."""
+    mu = 2**mulaw_qc - 1
+    x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1)
+    return x
+
+
+def encode_16bits(*, x: np.ndarray, **kwargs) -> np.ndarray:
+    return np.clip(x * 2**15, -(2**15), 2**15 - 1).astype(np.int16)
+
+
+def quantize(*, x: np.ndarray, quantize_bits: int, **kwargs) -> np.ndarray:
+    """Quantize a waveform to a given number of bits.
+
+    Args:
+        x (np.ndarray): Waveform to quantize. Must be normalized into the range `[-1, 1]`.
+        quantize_bits (int): Number of quantization bits.
+
+    Returns:
+        np.ndarray: Quantized waveform.
+    """
+    return (x + 1.0) * (2**quantize_bits - 1) / 2
+
+
+def dequantize(*, x, quantize_bits, **kwargs) -> np.ndarray:
+    """Dequantize a waveform from the given number of bits."""
+    return 2 * x / (2**quantize_bits - 1) - 1
diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py
new file mode 100644
index 0000000..c53bad5
--- /dev/null
+++ b/TTS/utils/audio/processor.py
@@ -0,0 +1,633 @@
+from io import BytesIO
+from typing import Dict, Tuple
+
+import librosa
+import numpy as np
+import scipy.io.wavfile
+import scipy.signal
+
+from TTS.tts.utils.helpers import StandardScaler
+from TTS.utils.audio.numpy_transforms import (
+    amp_to_db,
+    build_mel_basis,
+    compute_f0,
+    db_to_amp,
+    deemphasis,
+    find_endpoint,
+    griffin_lim,
+    load_wav,
+    mel_to_spec,
+    millisec_to_length,
+    preemphasis,
+    rms_volume_norm,
+    spec_to_mel,
+    stft,
+    trim_silence,
+    volume_norm,
+)
+
+# pylint: disable=too-many-public-methods
+
+
+class AudioProcessor(object):
+    """Audio Processor for TTS.
+
+    Note:
+        All the class arguments are set to default values to enable a flexible initialization
+        of the class with the model config. They are not meaningful for all the arguments.
+
+    Args:
+        sample_rate (int, optional):
+            target audio sampling rate. Defaults to None.
+
+        resample (bool, optional):
+            enable/disable resampling of the audio clips when the target sampling rate does not match the original sampling rate. Defaults to False.
+
+        num_mels (int, optional):
+            number of melspectrogram dimensions. Defaults to None.
+
+        log_func (int, optional):
+            log exponent used for converting spectrogram aplitude to DB.
+
+        min_level_db (int, optional):
+            minimum db threshold for the computed melspectrograms. Defaults to None.
+
+        frame_shift_ms (int, optional):
+            milliseconds of frames between STFT columns. Defaults to None.
+
+        frame_length_ms (int, optional):
+            milliseconds of STFT window length. Defaults to None.
+
+        hop_length (int, optional):
+            number of frames between STFT columns. Used if ```frame_shift_ms``` is None. Defaults to None.
+
+        win_length (int, optional):
+            STFT window length. Used if ```frame_length_ms``` is None. Defaults to None.
+
+        ref_level_db (int, optional):
+            reference DB level to avoid background noise. In general <20DB corresponds to the air noise. Defaults to None.
+
+        fft_size (int, optional):
+            FFT window size for STFT. Defaults to 1024.
+
+        power (int, optional):
+            Exponent value applied to the spectrogram before GriffinLim. Defaults to None.
+
+        preemphasis (float, optional):
+            Preemphasis coefficient. Preemphasis is disabled if == 0.0. Defaults to 0.0.
+
+        signal_norm (bool, optional):
+            enable/disable signal normalization. Defaults to None.
+
+        symmetric_norm (bool, optional):
+            enable/disable symmetric normalization. If set True normalization is performed in the range [-k, k] else [0, k], Defaults to None.
+
+        max_norm (float, optional):
+            ```k``` defining the normalization range. Defaults to None.
+
+        mel_fmin (int, optional):
+            minimum filter frequency for computing melspectrograms. Defaults to None.
+
+        mel_fmax (int, optional):
+            maximum filter frequency for computing melspectrograms. Defaults to None.
+
+        pitch_fmin (int, optional):
+            minimum filter frequency for computing pitch. Defaults to None.
+
+        pitch_fmax (int, optional):
+            maximum filter frequency for computing pitch. Defaults to None.
+
+        spec_gain (int, optional):
+            gain applied when converting amplitude to DB. Defaults to 20.
+
+        stft_pad_mode (str, optional):
+            Padding mode for STFT. Defaults to 'reflect'.
+
+        clip_norm (bool, optional):
+            enable/disable clipping the our of range values in the normalized audio signal. Defaults to True.
+
+        griffin_lim_iters (int, optional):
+            Number of GriffinLim iterations. Defaults to None.
+
+        do_trim_silence (bool, optional):
+            enable/disable silence trimming when loading the audio signal. Defaults to False.
+
+        trim_db (int, optional):
+            DB threshold used for silence trimming. Defaults to 60.
+
+        do_sound_norm (bool, optional):
+            enable/disable signal normalization. Defaults to False.
+
+        do_amp_to_db_linear (bool, optional):
+            enable/disable amplitude to dB conversion of linear spectrograms. Defaults to True.
+
+        do_amp_to_db_mel (bool, optional):
+            enable/disable amplitude to dB conversion of mel spectrograms. Defaults to True.
+
+        do_rms_norm (bool, optional):
+            enable/disable RMS volume normalization when loading an audio file. Defaults to False.
+
+        db_level (int, optional):
+            dB level used for rms normalization. The range is -99 to 0. Defaults to None.
+
+        stats_path (str, optional):
+            Path to the computed stats file. Defaults to None.
+
+        verbose (bool, optional):
+            enable/disable logging. Defaults to True.
+
+    """
+
+    def __init__(
+        self,
+        sample_rate=None,
+        resample=False,
+        num_mels=None,
+        log_func="np.log10",
+        min_level_db=None,
+        frame_shift_ms=None,
+        frame_length_ms=None,
+        hop_length=None,
+        win_length=None,
+        ref_level_db=None,
+        fft_size=1024,
+        power=None,
+        preemphasis=0.0,
+        signal_norm=None,
+        symmetric_norm=None,
+        max_norm=None,
+        mel_fmin=None,
+        mel_fmax=None,
+        pitch_fmax=None,
+        pitch_fmin=None,
+        spec_gain=20,
+        stft_pad_mode="reflect",
+        clip_norm=True,
+        griffin_lim_iters=None,
+        do_trim_silence=False,
+        trim_db=60,
+        do_sound_norm=False,
+        do_amp_to_db_linear=True,
+        do_amp_to_db_mel=True,
+        do_rms_norm=False,
+        db_level=None,
+        stats_path=None,
+        verbose=True,
+        **_,
+    ):
+        # setup class attributed
+        self.sample_rate = sample_rate
+        self.resample = resample
+        self.num_mels = num_mels
+        self.log_func = log_func
+        self.min_level_db = min_level_db or 0
+        self.frame_shift_ms = frame_shift_ms
+        self.frame_length_ms = frame_length_ms
+        self.ref_level_db = ref_level_db
+        self.fft_size = fft_size
+        self.power = power
+        self.preemphasis = preemphasis
+        self.griffin_lim_iters = griffin_lim_iters
+        self.signal_norm = signal_norm
+        self.symmetric_norm = symmetric_norm
+        self.mel_fmin = mel_fmin or 0
+        self.mel_fmax = mel_fmax
+        self.pitch_fmin = pitch_fmin
+        self.pitch_fmax = pitch_fmax
+        self.spec_gain = float(spec_gain)
+        self.stft_pad_mode = stft_pad_mode
+        self.max_norm = 1.0 if max_norm is None else float(max_norm)
+        self.clip_norm = clip_norm
+        self.do_trim_silence = do_trim_silence
+        self.trim_db = trim_db
+        self.do_sound_norm = do_sound_norm
+        self.do_amp_to_db_linear = do_amp_to_db_linear
+        self.do_amp_to_db_mel = do_amp_to_db_mel
+        self.do_rms_norm = do_rms_norm
+        self.db_level = db_level
+        self.stats_path = stats_path
+        # setup exp_func for db to amp conversion
+        if log_func == "np.log":
+            self.base = np.e
+        elif log_func == "np.log10":
+            self.base = 10
+        else:
+            raise ValueError(" [!] unknown `log_func` value.")
+        # setup stft parameters
+        if hop_length is None:
+            # compute stft parameters from given time values
+            self.win_length, self.hop_length = millisec_to_length(
+                frame_length_ms=self.frame_length_ms, frame_shift_ms=self.frame_shift_ms, sample_rate=self.sample_rate
+            )
+        else:
+            # use stft parameters from config file
+            self.hop_length = hop_length
+            self.win_length = win_length
+        assert min_level_db != 0.0, " [!] min_level_db is 0"
+        assert (
+            self.win_length <= self.fft_size
+        ), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
+        members = vars(self)
+        if verbose:
+            print(" > Setting up Audio Processor...")
+            for key, value in members.items():
+                print(" | > {}:{}".format(key, value))
+        # create spectrogram utils
+        self.mel_basis = build_mel_basis(
+            sample_rate=self.sample_rate,
+            fft_size=self.fft_size,
+            num_mels=self.num_mels,
+            mel_fmax=self.mel_fmax,
+            mel_fmin=self.mel_fmin,
+        )
+        # setup scaler
+        if stats_path and signal_norm:
+            mel_mean, mel_std, linear_mean, linear_std, _ = self.load_stats(stats_path)
+            self.setup_scaler(mel_mean, mel_std, linear_mean, linear_std)
+            self.signal_norm = True
+            self.max_norm = None
+            self.clip_norm = None
+            self.symmetric_norm = None
+
+    @staticmethod
+    def init_from_config(config: "Coqpit", verbose=True):
+        if "audio" in config:
+            return AudioProcessor(verbose=verbose, **config.audio)
+        return AudioProcessor(verbose=verbose, **config)
+
+    ### normalization ###
+    def normalize(self, S: np.ndarray) -> np.ndarray:
+        """Normalize values into `[0, self.max_norm]` or `[-self.max_norm, self.max_norm]`
+
+        Args:
+            S (np.ndarray): Spectrogram to normalize.
+
+        Raises:
+            RuntimeError: Mean and variance is computed from incompatible parameters.
+
+        Returns:
+            np.ndarray: Normalized spectrogram.
+        """
+        # pylint: disable=no-else-return
+        S = S.copy()
+        if self.signal_norm:
+            # mean-var scaling
+            if hasattr(self, "mel_scaler"):
+                if S.shape[0] == self.num_mels:
+                    return self.mel_scaler.transform(S.T).T
+                elif S.shape[0] == self.fft_size / 2:
+                    return self.linear_scaler.transform(S.T).T
+                else:
+                    raise RuntimeError(" [!] Mean-Var stats does not match the given feature dimensions.")
+            # range normalization
+            S -= self.ref_level_db  # discard certain range of DB assuming it is air noise
+            S_norm = (S - self.min_level_db) / (-self.min_level_db)
+            if self.symmetric_norm:
+                S_norm = ((2 * self.max_norm) * S_norm) - self.max_norm
+                if self.clip_norm:
+                    S_norm = np.clip(
+                        S_norm, -self.max_norm, self.max_norm  # pylint: disable=invalid-unary-operand-type
+                    )
+                return S_norm
+            else:
+                S_norm = self.max_norm * S_norm
+                if self.clip_norm:
+                    S_norm = np.clip(S_norm, 0, self.max_norm)
+                return S_norm
+        else:
+            return S
+
+    def denormalize(self, S: np.ndarray) -> np.ndarray:
+        """Denormalize spectrogram values.
+
+        Args:
+            S (np.ndarray): Spectrogram to denormalize.
+
+        Raises:
+            RuntimeError: Mean and variance are incompatible.
+
+        Returns:
+            np.ndarray: Denormalized spectrogram.
+        """
+        # pylint: disable=no-else-return
+        S_denorm = S.copy()
+        if self.signal_norm:
+            # mean-var scaling
+            if hasattr(self, "mel_scaler"):
+                if S_denorm.shape[0] == self.num_mels:
+                    return self.mel_scaler.inverse_transform(S_denorm.T).T
+                elif S_denorm.shape[0] == self.fft_size / 2:
+                    return self.linear_scaler.inverse_transform(S_denorm.T).T
+                else:
+                    raise RuntimeError(" [!] Mean-Var stats does not match the given feature dimensions.")
+            if self.symmetric_norm:
+                if self.clip_norm:
+                    S_denorm = np.clip(
+                        S_denorm, -self.max_norm, self.max_norm  # pylint: disable=invalid-unary-operand-type
+                    )
+                S_denorm = ((S_denorm + self.max_norm) * -self.min_level_db / (2 * self.max_norm)) + self.min_level_db
+                return S_denorm + self.ref_level_db
+            else:
+                if self.clip_norm:
+                    S_denorm = np.clip(S_denorm, 0, self.max_norm)
+                S_denorm = (S_denorm * -self.min_level_db / self.max_norm) + self.min_level_db
+                return S_denorm + self.ref_level_db
+        else:
+            return S_denorm
+
+    ### Mean-STD scaling ###
+    def load_stats(self, stats_path: str) -> Tuple[np.array, np.array, np.array, np.array, Dict]:
+        """Loading mean and variance statistics from a `npy` file.
+
+        Args:
+            stats_path (str): Path to the `npy` file containing
+
+        Returns:
+            Tuple[np.array, np.array, np.array, np.array, Dict]: loaded statistics and the config used to
+                compute them.
+        """
+        stats = np.load(stats_path, allow_pickle=True).item()  # pylint: disable=unexpected-keyword-arg
+        mel_mean = stats["mel_mean"]
+        mel_std = stats["mel_std"]
+        linear_mean = stats["linear_mean"]
+        linear_std = stats["linear_std"]
+        stats_config = stats["audio_config"]
+        # check all audio parameters used for computing stats
+        skip_parameters = ["griffin_lim_iters", "stats_path", "do_trim_silence", "ref_level_db", "power"]
+        for key in stats_config.keys():
+            if key in skip_parameters:
+                continue
+            if key not in ["sample_rate", "trim_db"]:
+                assert (
+                    stats_config[key] == self.__dict__[key]
+                ), f" [!] Audio param {key} does not match the value used for computing mean-var stats. {stats_config[key]} vs {self.__dict__[key]}"
+        return mel_mean, mel_std, linear_mean, linear_std, stats_config
+
+    # pylint: disable=attribute-defined-outside-init
+    def setup_scaler(
+        self, mel_mean: np.ndarray, mel_std: np.ndarray, linear_mean: np.ndarray, linear_std: np.ndarray
+    ) -> None:
+        """Initialize scaler objects used in mean-std normalization.
+
+        Args:
+            mel_mean (np.ndarray): Mean for melspectrograms.
+            mel_std (np.ndarray): STD for melspectrograms.
+            linear_mean (np.ndarray): Mean for full scale spectrograms.
+            linear_std (np.ndarray): STD for full scale spectrograms.
+        """
+        self.mel_scaler = StandardScaler()
+        self.mel_scaler.set_stats(mel_mean, mel_std)
+        self.linear_scaler = StandardScaler()
+        self.linear_scaler.set_stats(linear_mean, linear_std)
+
+    ### Preemphasis ###
+    def apply_preemphasis(self, x: np.ndarray) -> np.ndarray:
+        """Apply pre-emphasis to the audio signal. Useful to reduce the correlation between neighbouring signal values.
+
+        Args:
+            x (np.ndarray): Audio signal.
+
+        Raises:
+            RuntimeError: Preemphasis coeff is set to 0.
+
+        Returns:
+            np.ndarray: Decorrelated audio signal.
+        """
+        return preemphasis(x=x, coef=self.preemphasis)
+
+    def apply_inv_preemphasis(self, x: np.ndarray) -> np.ndarray:
+        """Reverse pre-emphasis."""
+        return deemphasis(x=x, coef=self.preemphasis)
+
+    ### SPECTROGRAMs ###
+    def spectrogram(self, y: np.ndarray) -> np.ndarray:
+        """Compute a spectrogram from a waveform.
+
+        Args:
+            y (np.ndarray): Waveform.
+
+        Returns:
+            np.ndarray: Spectrogram.
+        """
+        if self.preemphasis != 0:
+            y = self.apply_preemphasis(y)
+        D = stft(
+            y=y,
+            fft_size=self.fft_size,
+            hop_length=self.hop_length,
+            win_length=self.win_length,
+            pad_mode=self.stft_pad_mode,
+        )
+        if self.do_amp_to_db_linear:
+            S = amp_to_db(x=np.abs(D), gain=self.spec_gain, base=self.base)
+        else:
+            S = np.abs(D)
+        return self.normalize(S).astype(np.float32)
+
+    def melspectrogram(self, y: np.ndarray) -> np.ndarray:
+        """Compute a melspectrogram from a waveform."""
+        if self.preemphasis != 0:
+            y = self.apply_preemphasis(y)
+        D = stft(
+            y=y,
+            fft_size=self.fft_size,
+            hop_length=self.hop_length,
+            win_length=self.win_length,
+            pad_mode=self.stft_pad_mode,
+        )
+        S = spec_to_mel(spec=np.abs(D), mel_basis=self.mel_basis)
+        if self.do_amp_to_db_mel:
+            S = amp_to_db(x=S, gain=self.spec_gain, base=self.base)
+
+        return self.normalize(S).astype(np.float32)
+
+    def inv_spectrogram(self, spectrogram: np.ndarray) -> np.ndarray:
+        """Convert a spectrogram to a waveform using Griffi-Lim vocoder."""
+        S = self.denormalize(spectrogram)
+        S = db_to_amp(x=S, gain=self.spec_gain, base=self.base)
+        # Reconstruct phase
+        W = self._griffin_lim(S**self.power)
+        return self.apply_inv_preemphasis(W) if self.preemphasis != 0 else W
+
+    def inv_melspectrogram(self, mel_spectrogram: np.ndarray) -> np.ndarray:
+        """Convert a melspectrogram to a waveform using Griffi-Lim vocoder."""
+        D = self.denormalize(mel_spectrogram)
+        S = db_to_amp(x=D, gain=self.spec_gain, base=self.base)
+        S = mel_to_spec(mel=S, mel_basis=self.mel_basis)  # Convert back to linear
+        W = self._griffin_lim(S**self.power)
+        return self.apply_inv_preemphasis(W) if self.preemphasis != 0 else W
+
+    def out_linear_to_mel(self, linear_spec: np.ndarray) -> np.ndarray:
+        """Convert a full scale linear spectrogram output of a network to a melspectrogram.
+
+        Args:
+            linear_spec (np.ndarray): Normalized full scale linear spectrogram.
+
+        Returns:
+            np.ndarray: Normalized melspectrogram.
+        """
+        S = self.denormalize(linear_spec)
+        S = db_to_amp(x=S, gain=self.spec_gain, base=self.base)
+        S = spec_to_mel(spec=np.abs(S), mel_basis=self.mel_basis)
+        S = amp_to_db(x=S, gain=self.spec_gain, base=self.base)
+        mel = self.normalize(S)
+        return mel
+
+    def _griffin_lim(self, S):
+        return griffin_lim(
+            spec=S,
+            num_iter=self.griffin_lim_iters,
+            hop_length=self.hop_length,
+            win_length=self.win_length,
+            fft_size=self.fft_size,
+            pad_mode=self.stft_pad_mode,
+        )
+
+    def compute_f0(self, x: np.ndarray) -> np.ndarray:
+        """Compute pitch (f0) of a waveform using the same parameters used for computing melspectrogram.
+
+        Args:
+            x (np.ndarray): Waveform.
+
+        Returns:
+            np.ndarray: Pitch.
+
+        Examples:
+            >>> WAV_FILE = filename = librosa.example('vibeace')
+            >>> from TTS.config import BaseAudioConfig
+            >>> from TTS.utils.audio import AudioProcessor
+            >>> conf = BaseAudioConfig(pitch_fmax=640, pitch_fmin=1)
+            >>> ap = AudioProcessor(**conf)
+            >>> wav = ap.load_wav(WAV_FILE, sr=ap.sample_rate)[:5 * ap.sample_rate]
+            >>> pitch = ap.compute_f0(wav)
+        """
+        # align F0 length to the spectrogram length
+        if len(x) % self.hop_length == 0:
+            x = np.pad(x, (0, self.hop_length // 2), mode=self.stft_pad_mode)
+
+        f0 = compute_f0(
+            x=x,
+            pitch_fmax=self.pitch_fmax,
+            pitch_fmin=self.pitch_fmin,
+            hop_length=self.hop_length,
+            win_length=self.win_length,
+            sample_rate=self.sample_rate,
+            stft_pad_mode=self.stft_pad_mode,
+            center=True,
+        )
+
+        return f0
+
+    ### Audio Processing ###
+    def find_endpoint(self, wav: np.ndarray, min_silence_sec=0.8) -> int:
+        """Find the last point without silence at the end of a audio signal.
+
+        Args:
+            wav (np.ndarray): Audio signal.
+            threshold_db (int, optional): Silence threshold in decibels. Defaults to -40.
+            min_silence_sec (float, optional): Ignore silences that are shorter then this in secs. Defaults to 0.8.
+
+        Returns:
+            int: Last point without silence.
+        """
+        return find_endpoint(
+            wav=wav,
+            trim_db=self.trim_db,
+            sample_rate=self.sample_rate,
+            min_silence_sec=min_silence_sec,
+            gain=self.spec_gain,
+            base=self.base,
+        )
+
+    def trim_silence(self, wav):
+        """Trim silent parts with a threshold and 0.01 sec margin"""
+        return trim_silence(
+            wav=wav,
+            sample_rate=self.sample_rate,
+            trim_db=self.trim_db,
+            win_length=self.win_length,
+            hop_length=self.hop_length,
+        )
+
+    @staticmethod
+    def sound_norm(x: np.ndarray) -> np.ndarray:
+        """Normalize the volume of an audio signal.
+
+        Args:
+            x (np.ndarray): Raw waveform.
+
+        Returns:
+            np.ndarray: Volume normalized waveform.
+        """
+        return volume_norm(x=x)
+
+    def rms_volume_norm(self, x: np.ndarray, db_level: float = None) -> np.ndarray:
+        """Normalize the volume based on RMS of the signal.
+
+        Args:
+            x (np.ndarray): Raw waveform.
+
+        Returns:
+            np.ndarray: RMS normalized waveform.
+        """
+        if db_level is None:
+            db_level = self.db_level
+        return rms_volume_norm(x=x, db_level=db_level)
+
+    ### save and load ###
+    def load_wav(self, filename: str, sr: int = None) -> np.ndarray:
+        """Read a wav file using Librosa and optionally resample, silence trim, volume normalize.
+
+        Resampling slows down loading the file significantly. Therefore it is recommended to resample the file before.
+
+        Args:
+            filename (str): Path to the wav file.
+            sr (int, optional): Sampling rate for resampling. Defaults to None.
+
+        Returns:
+            np.ndarray: Loaded waveform.
+        """
+        if sr is not None:
+            x = load_wav(filename=filename, sample_rate=sr, resample=True)
+        else:
+            x = load_wav(filename=filename, sample_rate=self.sample_rate, resample=self.resample)
+        if self.do_trim_silence:
+            try:
+                x = self.trim_silence(x)
+            except ValueError:
+                print(f" [!] File cannot be trimmed for silence - {filename}")
+        if self.do_sound_norm:
+            x = self.sound_norm(x)
+        if self.do_rms_norm:
+            x = self.rms_volume_norm(x, self.db_level)
+        return x
+
+    def save_wav(self, wav: np.ndarray, path: str, sr: int = None, pipe_out=None) -> None:
+        """Save a waveform to a file using Scipy.
+
+        Args:
+            wav (np.ndarray): Waveform to save.
+            path (str): Path to a output file.
+            sr (int, optional): Sampling rate used for saving to the file. Defaults to None.
+            pipe_out (BytesIO, optional): Flag to stdout the generated TTS wav file for shell pipe.
+        """
+        if self.do_rms_norm:
+            wav_norm = self.rms_volume_norm(wav, self.db_level) * 32767
+        else:
+            wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
+
+        wav_norm = wav_norm.astype(np.int16)
+        if pipe_out:
+            wav_buffer = BytesIO()
+            scipy.io.wavfile.write(wav_buffer, sr if sr else self.sample_rate, wav_norm)
+            wav_buffer.seek(0)
+            pipe_out.buffer.write(wav_buffer.read())
+        scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm)
+
+    def get_duration(self, filename: str) -> float:
+        """Get the duration of a wav file using Librosa.
+
+        Args:
+            filename (str): Path to the wav file.
+        """
+        return librosa.get_duration(filename=filename)
diff --git a/TTS/utils/audio/torch_transforms.py b/TTS/utils/audio/torch_transforms.py
new file mode 100644
index 0000000..fd40ebb
--- /dev/null
+++ b/TTS/utils/audio/torch_transforms.py
@@ -0,0 +1,165 @@
+import librosa
+import torch
+from torch import nn
+
+
+class TorchSTFT(nn.Module):  # pylint: disable=abstract-method
+    """Some of the audio processing funtions using Torch for faster batch processing.
+
+    Args:
+
+        n_fft (int):
+            FFT window size for STFT.
+
+        hop_length (int):
+            number of frames between STFT columns.
+
+        win_length (int, optional):
+            STFT window length.
+
+        pad_wav (bool, optional):
+            If True pad the audio with (n_fft - hop_length) / 2). Defaults to False.
+
+        window (str, optional):
+            The name of a function to create a window tensor that is applied/multiplied to each frame/window. Defaults to "hann_window"
+
+        sample_rate (int, optional):
+            target audio sampling rate. Defaults to None.
+
+        mel_fmin (int, optional):
+            minimum filter frequency for computing melspectrograms. Defaults to None.
+
+        mel_fmax (int, optional):
+            maximum filter frequency for computing melspectrograms. Defaults to None.
+
+        n_mels (int, optional):
+            number of melspectrogram dimensions. Defaults to None.
+
+        use_mel (bool, optional):
+            If True compute the melspectrograms otherwise. Defaults to False.
+
+        do_amp_to_db_linear (bool, optional):
+            enable/disable amplitude to dB conversion of linear spectrograms. Defaults to False.
+
+        spec_gain (float, optional):
+            gain applied when converting amplitude to DB. Defaults to 1.0.
+
+        power (float, optional):
+            Exponent for the magnitude spectrogram, e.g., 1 for energy, 2 for power, etc.  Defaults to None.
+
+        use_htk (bool, optional):
+            Use HTK formula in mel filter instead of Slaney.
+
+        mel_norm (None, 'slaney', or number, optional):
+            If 'slaney', divide the triangular mel weights by the width of the mel band
+            (area normalization).
+
+            If numeric, use `librosa.util.normalize` to normalize each filter by to unit l_p norm.
+            See `librosa.util.normalize` for a full description of supported norm values
+            (including `+-np.inf`).
+
+            Otherwise, leave all the triangles aiming for a peak value of 1.0. Defaults to "slaney".
+    """
+
+    def __init__(
+        self,
+        n_fft,
+        hop_length,
+        win_length,
+        pad_wav=False,
+        window="hann_window",
+        sample_rate=None,
+        mel_fmin=0,
+        mel_fmax=None,
+        n_mels=80,
+        use_mel=False,
+        do_amp_to_db=False,
+        spec_gain=1.0,
+        power=None,
+        use_htk=False,
+        mel_norm="slaney",
+        normalized=False,
+    ):
+        super().__init__()
+        self.n_fft = n_fft
+        self.hop_length = hop_length
+        self.win_length = win_length
+        self.pad_wav = pad_wav
+        self.sample_rate = sample_rate
+        self.mel_fmin = mel_fmin
+        self.mel_fmax = mel_fmax
+        self.n_mels = n_mels
+        self.use_mel = use_mel
+        self.do_amp_to_db = do_amp_to_db
+        self.spec_gain = spec_gain
+        self.power = power
+        self.use_htk = use_htk
+        self.mel_norm = mel_norm
+        self.window = nn.Parameter(getattr(torch, window)(win_length), requires_grad=False)
+        self.mel_basis = None
+        self.normalized = normalized
+        if use_mel:
+            self._build_mel_basis()
+
+    def __call__(self, x):
+        """Compute spectrogram frames by torch based stft.
+
+        Args:
+            x (Tensor): input waveform
+
+        Returns:
+            Tensor: spectrogram frames.
+
+        Shapes:
+            x: [B x T] or [:math:`[B, 1, T]`]
+        """
+        if x.ndim == 2:
+            x = x.unsqueeze(1)
+        if self.pad_wav:
+            padding = int((self.n_fft - self.hop_length) / 2)
+            x = torch.nn.functional.pad(x, (padding, padding), mode="reflect")
+        # B x D x T x 2
+        o = torch.stft(
+            x.squeeze(1),
+            self.n_fft,
+            self.hop_length,
+            self.win_length,
+            self.window,
+            center=True,
+            pad_mode="reflect",  # compatible with audio.py
+            normalized=self.normalized,
+            onesided=True,
+            return_complex=False,
+        )
+        M = o[:, :, :, 0]
+        P = o[:, :, :, 1]
+        S = torch.sqrt(torch.clamp(M**2 + P**2, min=1e-8))
+
+        if self.power is not None:
+            S = S**self.power
+
+        if self.use_mel:
+            S = torch.matmul(self.mel_basis.to(x), S)
+        if self.do_amp_to_db:
+            S = self._amp_to_db(S, spec_gain=self.spec_gain)
+        return S
+
+    def _build_mel_basis(self):
+        mel_basis = librosa.filters.mel(
+            sr=self.sample_rate,
+            n_fft=self.n_fft,
+            n_mels=self.n_mels,
+            fmin=self.mel_fmin,
+            fmax=self.mel_fmax,
+            htk=self.use_htk,
+            norm=self.mel_norm,
+        )
+        self.mel_basis = torch.from_numpy(mel_basis).float()
+
+    @staticmethod
+    def _amp_to_db(x, spec_gain=1.0):
+        return torch.log(torch.clamp(x, min=1e-5) * spec_gain)
+
+    @staticmethod
+    def _db_to_amp(x, spec_gain=1.0):
+        return torch.exp(x) / spec_gain
diff --git a/TTS/utils/callbacks.py b/TTS/utils/callbacks.py
new file mode 100644
index 0000000..511d215
--- /dev/null
+++ b/TTS/utils/callbacks.py
@@ -0,0 +1,105 @@
+class TrainerCallback:
+    @staticmethod
+    def on_init_start(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_init_start"):
+                trainer.model.module.on_init_start(trainer)
+        else:
+            if hasattr(trainer.model, "on_init_start"):
+                trainer.model.on_init_start(trainer)
+
+        if hasattr(trainer.criterion, "on_init_start"):
+            trainer.criterion.on_init_start(trainer)
+
+        if hasattr(trainer.optimizer, "on_init_start"):
+            trainer.optimizer.on_init_start(trainer)
+
+    @staticmethod
+    def on_init_end(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_init_end"):
+                trainer.model.module.on_init_end(trainer)
+        else:
+            if hasattr(trainer.model, "on_init_end"):
+                trainer.model.on_init_end(trainer)
+
+        if hasattr(trainer.criterion, "on_init_end"):
+            trainer.criterion.on_init_end(trainer)
+
+        if hasattr(trainer.optimizer, "on_init_end"):
+            trainer.optimizer.on_init_end(trainer)
+
+    @staticmethod
+    def on_epoch_start(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_epoch_start"):
+                trainer.model.module.on_epoch_start(trainer)
+        else:
+            if hasattr(trainer.model, "on_epoch_start"):
+                trainer.model.on_epoch_start(trainer)
+
+        if hasattr(trainer.criterion, "on_epoch_start"):
+            trainer.criterion.on_epoch_start(trainer)
+
+        if hasattr(trainer.optimizer, "on_epoch_start"):
+            trainer.optimizer.on_epoch_start(trainer)
+
+    @staticmethod
+    def on_epoch_end(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_epoch_end"):
+                trainer.model.module.on_epoch_end(trainer)
+        else:
+            if hasattr(trainer.model, "on_epoch_end"):
+                trainer.model.on_epoch_end(trainer)
+
+        if hasattr(trainer.criterion, "on_epoch_end"):
+            trainer.criterion.on_epoch_end(trainer)
+
+        if hasattr(trainer.optimizer, "on_epoch_end"):
+            trainer.optimizer.on_epoch_end(trainer)
+
+    @staticmethod
+    def on_train_step_start(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_train_step_start"):
+                trainer.model.module.on_train_step_start(trainer)
+        else:
+            if hasattr(trainer.model, "on_train_step_start"):
+                trainer.model.on_train_step_start(trainer)
+
+        if hasattr(trainer.criterion, "on_train_step_start"):
+            trainer.criterion.on_train_step_start(trainer)
+
+        if hasattr(trainer.optimizer, "on_train_step_start"):
+            trainer.optimizer.on_train_step_start(trainer)
+
+    @staticmethod
+    def on_train_step_end(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_train_step_end"):
+                trainer.model.module.on_train_step_end(trainer)
+        else:
+            if hasattr(trainer.model, "on_train_step_end"):
+                trainer.model.on_train_step_end(trainer)
+
+        if hasattr(trainer.criterion, "on_train_step_end"):
+            trainer.criterion.on_train_step_end(trainer)
+
+        if hasattr(trainer.optimizer, "on_train_step_end"):
+            trainer.optimizer.on_train_step_end(trainer)
+
+    @staticmethod
+    def on_keyboard_interrupt(trainer) -> None:
+        if hasattr(trainer.model, "module"):
+            if hasattr(trainer.model.module, "on_keyboard_interrupt"):
+                trainer.model.module.on_keyboard_interrupt(trainer)
+        else:
+            if hasattr(trainer.model, "on_keyboard_interrupt"):
+                trainer.model.on_keyboard_interrupt(trainer)
+
+        if hasattr(trainer.criterion, "on_keyboard_interrupt"):
+            trainer.criterion.on_keyboard_interrupt(trainer)
+
+        if hasattr(trainer.optimizer, "on_keyboard_interrupt"):
+            trainer.optimizer.on_keyboard_interrupt(trainer)
diff --git a/TTS/utils/capacitron_optimizer.py b/TTS/utils/capacitron_optimizer.py
new file mode 100644
index 0000000..7206ffd
--- /dev/null
+++ b/TTS/utils/capacitron_optimizer.py
@@ -0,0 +1,67 @@
+from typing import Generator
+
+from trainer.trainer_utils import get_optimizer
+
+
+class CapacitronOptimizer:
+    """Double optimizer class for the Capacitron model."""
+
+    def __init__(self, config: dict, model_params: Generator) -> None:
+        self.primary_params, self.secondary_params = self.split_model_parameters(model_params)
+
+        optimizer_names = list(config.optimizer_params.keys())
+        optimizer_parameters = list(config.optimizer_params.values())
+
+        self.primary_optimizer = get_optimizer(
+            optimizer_names[0],
+            optimizer_parameters[0],
+            config.lr,
+            parameters=self.primary_params,
+        )
+
+        self.secondary_optimizer = get_optimizer(
+            optimizer_names[1],
+            self.extract_optimizer_parameters(optimizer_parameters[1]),
+            optimizer_parameters[1]["lr"],
+            parameters=self.secondary_params,
+        )
+
+        self.param_groups = self.primary_optimizer.param_groups
+
+    def first_step(self):
+        self.secondary_optimizer.step()
+        self.secondary_optimizer.zero_grad()
+        self.primary_optimizer.zero_grad()
+
+    def step(self):
+        # Update param groups to display the correct learning rate
+        self.param_groups = self.primary_optimizer.param_groups
+        self.primary_optimizer.step()
+
+    def zero_grad(self, set_to_none=False):
+        self.primary_optimizer.zero_grad(set_to_none)
+        self.secondary_optimizer.zero_grad(set_to_none)
+
+    def load_state_dict(self, state_dict):
+        self.primary_optimizer.load_state_dict(state_dict[0])
+        self.secondary_optimizer.load_state_dict(state_dict[1])
+
+    def state_dict(self):
+        return [self.primary_optimizer.state_dict(), self.secondary_optimizer.state_dict()]
+
+    @staticmethod
+    def split_model_parameters(model_params: Generator) -> list:
+        primary_params = []
+        secondary_params = []
+        for name, param in model_params:
+            if param.requires_grad:
+                if name == "capacitron_vae_layer.beta":
+                    secondary_params.append(param)
+                else:
+                    primary_params.append(param)
+        return [iter(primary_params), iter(secondary_params)]
+
+    @staticmethod
+    def extract_optimizer_parameters(params: dict) -> dict:
+        """Extract parameters that are not the learning rate"""
+        return {k: v for k, v in params.items() if k != "lr"}
diff --git a/TTS/utils/distribute.py b/TTS/utils/distribute.py
new file mode 100644
index 0000000..a51ef76
--- /dev/null
+++ b/TTS/utils/distribute.py
@@ -0,0 +1,20 @@
+# edited from https://github.com/fastai/imagenet-fast/blob/master/imagenet_nv/distributed.py
+import torch
+import torch.distributed as dist
+
+
+def reduce_tensor(tensor, num_gpus):
+    rt = tensor.clone()
+    dist.all_reduce(rt, op=dist.reduce_op.SUM)
+    rt /= num_gpus
+    return rt
+
+
+def init_distributed(rank, num_gpus, group_name, dist_backend, dist_url):
+    assert torch.cuda.is_available(), "Distributed mode requires CUDA."
+
+    # Set cuda device so everything is done on the right GPU.
+    torch.cuda.set_device(rank % torch.cuda.device_count())
+
+    # Initialize distributed communication
+    dist.init_process_group(dist_backend, init_method=dist_url, world_size=num_gpus, rank=rank, group_name=group_name)
diff --git a/TTS/utils/download.py b/TTS/utils/download.py
new file mode 100644
index 0000000..3f06b57
--- /dev/null
+++ b/TTS/utils/download.py
@@ -0,0 +1,206 @@
+# Adapted from https://github.com/pytorch/audio/
+
+import hashlib
+import logging
+import os
+import tarfile
+import urllib
+import urllib.request
+import zipfile
+from os.path import expanduser
+from typing import Any, Iterable, List, Optional
+
+from torch.utils.model_zoo import tqdm
+
+
+def stream_url(
+    url: str, start_byte: Optional[int] = None, block_size: int = 32 * 1024, progress_bar: bool = True
+) -> Iterable:
+    """Stream url by chunk
+
+    Args:
+        url (str): Url.
+        start_byte (int or None, optional): Start streaming at that point (Default: ``None``).
+        block_size (int, optional): Size of chunks to stream (Default: ``32 * 1024``).
+        progress_bar (bool, optional): Display a progress bar (Default: ``True``).
+    """
+
+    # If we already have the whole file, there is no need to download it again
+    req = urllib.request.Request(url, method="HEAD")
+    with urllib.request.urlopen(req) as response:
+        url_size = int(response.info().get("Content-Length", -1))
+    if url_size == start_byte:
+        return
+
+    req = urllib.request.Request(url)
+    if start_byte:
+        req.headers["Range"] = "bytes={}-".format(start_byte)
+
+    with urllib.request.urlopen(req) as upointer, tqdm(
+        unit="B",
+        unit_scale=True,
+        unit_divisor=1024,
+        total=url_size,
+        disable=not progress_bar,
+    ) as pbar:
+        num_bytes = 0
+        while True:
+            chunk = upointer.read(block_size)
+            if not chunk:
+                break
+            yield chunk
+            num_bytes += len(chunk)
+            pbar.update(len(chunk))
+
+
+def download_url(
+    url: str,
+    download_folder: str,
+    filename: Optional[str] = None,
+    hash_value: Optional[str] = None,
+    hash_type: str = "sha256",
+    progress_bar: bool = True,
+    resume: bool = False,
+) -> None:
+    """Download file to disk.
+
+    Args:
+        url (str): Url.
+        download_folder (str): Folder to download file.
+        filename (str or None, optional): Name of downloaded file. If None, it is inferred from the url
+            (Default: ``None``).
+        hash_value (str or None, optional): Hash for url (Default: ``None``).
+        hash_type (str, optional): Hash type, among "sha256" and "md5" (Default: ``"sha256"``).
+        progress_bar (bool, optional): Display a progress bar (Default: ``True``).
+        resume (bool, optional): Enable resuming download (Default: ``False``).
+    """
+
+    req = urllib.request.Request(url, method="HEAD")
+    req_info = urllib.request.urlopen(req).info()  # pylint: disable=consider-using-with
+
+    # Detect filename
+    filename = filename or req_info.get_filename() or os.path.basename(url)
+    filepath = os.path.join(download_folder, filename)
+    if resume and os.path.exists(filepath):
+        mode = "ab"
+        local_size: Optional[int] = os.path.getsize(filepath)
+
+    elif not resume and os.path.exists(filepath):
+        raise RuntimeError("{} already exists. Delete the file manually and retry.".format(filepath))
+    else:
+        mode = "wb"
+        local_size = None
+
+    if hash_value and local_size == int(req_info.get("Content-Length", -1)):
+        with open(filepath, "rb") as file_obj:
+            if validate_file(file_obj, hash_value, hash_type):
+                return
+        raise RuntimeError("The hash of {} does not match. Delete the file manually and retry.".format(filepath))
+
+    with open(filepath, mode) as fpointer:
+        for chunk in stream_url(url, start_byte=local_size, progress_bar=progress_bar):
+            fpointer.write(chunk)
+
+    with open(filepath, "rb") as file_obj:
+        if hash_value and not validate_file(file_obj, hash_value, hash_type):
+            raise RuntimeError("The hash of {} does not match. Delete the file manually and retry.".format(filepath))
+
+
+def validate_file(file_obj: Any, hash_value: str, hash_type: str = "sha256") -> bool:
+    """Validate a given file object with its hash.
+
+    Args:
+        file_obj: File object to read from.
+        hash_value (str): Hash for url.
+        hash_type (str, optional): Hash type, among "sha256" and "md5" (Default: ``"sha256"``).
+
+    Returns:
+        bool: return True if its a valid file, else False.
+    """
+
+    if hash_type == "sha256":
+        hash_func = hashlib.sha256()
+    elif hash_type == "md5":
+        hash_func = hashlib.md5()
+    else:
+        raise ValueError
+
+    while True:
+        # Read by chunk to avoid filling memory
+        chunk = file_obj.read(1024**2)
+        if not chunk:
+            break
+        hash_func.update(chunk)
+
+    return hash_func.hexdigest() == hash_value
+
+
+def extract_archive(from_path: str, to_path: Optional[str] = None, overwrite: bool = False) -> List[str]:
+    """Extract archive.
+    Args:
+        from_path (str): the path of the archive.
+        to_path (str or None, optional): the root path of the extraced files (directory of from_path)
+            (Default: ``None``)
+        overwrite (bool, optional): overwrite existing files (Default: ``False``)
+
+    Returns:
+        list: List of paths to extracted files even if not overwritten.
+    """
+
+    if to_path is None:
+        to_path = os.path.dirname(from_path)
+
+    try:
+        with tarfile.open(from_path, "r") as tar:
+            logging.info("Opened tar file %s.", from_path)
+            files = []
+            for file_ in tar:  # type: Any
+                file_path = os.path.join(to_path, file_.name)
+                if file_.isfile():
+                    files.append(file_path)
+                    if os.path.exists(file_path):
+                        logging.info("%s already extracted.", file_path)
+                        if not overwrite:
+                            continue
+                tar.extract(file_, to_path)
+            return files
+    except tarfile.ReadError:
+        pass
+
+    try:
+        with zipfile.ZipFile(from_path, "r") as zfile:
+            logging.info("Opened zip file %s.", from_path)
+            files = zfile.namelist()
+            for file_ in files:
+                file_path = os.path.join(to_path, file_)
+                if os.path.exists(file_path):
+                    logging.info("%s already extracted.", file_path)
+                    if not overwrite:
+                        continue
+                zfile.extract(file_, to_path)
+        return files
+    except zipfile.BadZipFile:
+        pass
+
+    raise NotImplementedError(" > [!] only supports tar.gz, tgz, and zip achives.")
+
+
+def download_kaggle_dataset(dataset_path: str, dataset_name: str, output_path: str):
+    """Download dataset from kaggle.
+    Args:
+        dataset_path (str):
+        This the kaggle link to the dataset. for example vctk is 'mfekadu/english-multispeaker-corpus-for-voice-cloning'
+        dataset_name (str): Name of the folder the dataset will be saved in.
+        output_path (str): Path of the location you want the dataset folder to be saved to.
+    """
+    data_path = os.path.join(output_path, dataset_name)
+    try:
+        import kaggle  # pylint: disable=import-outside-toplevel
+
+        kaggle.api.authenticate()
+        print(f"""\nDownloading {dataset_name}...""")
+        kaggle.api.dataset_download_files(dataset_path, path=data_path, unzip=True)
+    except OSError:
+        print(
+            f"""[!] in order to download kaggle datasets, you need to have a kaggle api token stored in your {os.path.join(expanduser('~'), '.kaggle/kaggle.json')}"""
+        )
diff --git a/TTS/utils/downloaders.py b/TTS/utils/downloaders.py
new file mode 100644
index 0000000..104dc7b
--- /dev/null
+++ b/TTS/utils/downloaders.py
@@ -0,0 +1,126 @@
+import os
+from typing import Optional
+
+from TTS.utils.download import download_kaggle_dataset, download_url, extract_archive
+
+
+def download_ljspeech(path: str):
+    """Download and extract LJSpeech dataset
+
+    Args:
+        path (str): path to the directory where the dataset will be stored.
+    """
+    os.makedirs(path, exist_ok=True)
+    url = "https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2"
+    download_url(url, path)
+    basename = os.path.basename(url)
+    archive = os.path.join(path, basename)
+    print(" > Extracting archive file...")
+    extract_archive(archive)
+
+
+def download_vctk(path: str, use_kaggle: Optional[bool] = False):
+    """Download and extract VCTK dataset.
+
+    Args:
+        path (str): path to the directory where the dataset will be stored.
+
+        use_kaggle (bool, optional): Downloads vctk dataset from kaggle. Is generally faster. Defaults to False.
+    """
+    if use_kaggle:
+        download_kaggle_dataset("mfekadu/english-multispeaker-corpus-for-voice-cloning", "VCTK", path)
+    else:
+        os.makedirs(path, exist_ok=True)
+        url = "https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"
+        download_url(url, path)
+        basename = os.path.basename(url)
+        archive = os.path.join(path, basename)
+        print(" > Extracting archive file...")
+        extract_archive(archive)
+
+
+def download_tweb(path: str):
+    """Download and extract Tweb dataset
+
+    Args:
+        path (str): Path to the directory where the dataset will be stored.
+    """
+    download_kaggle_dataset("bryanpark/the-world-english-bible-speech-dataset", "TWEB", path)
+
+
+def download_libri_tts(path: str, subset: Optional[str] = "all"):
+    """Download and extract libri tts dataset.
+
+    Args:
+        path (str): Path to the directory where the dataset will be stored.
+
+        subset (str, optional): Name of the subset to download. If you only want to download a certain
+        portion specify it here. Defaults to 'all'.
+    """
+
+    subset_dict = {
+        "libri-tts-clean-100": "http://www.openslr.org/resources/60/train-clean-100.tar.gz",
+        "libri-tts-clean-360": "http://www.openslr.org/resources/60/train-clean-360.tar.gz",
+        "libri-tts-other-500": "http://www.openslr.org/resources/60/train-other-500.tar.gz",
+        "libri-tts-dev-clean": "http://www.openslr.org/resources/60/dev-clean.tar.gz",
+        "libri-tts-dev-other": "http://www.openslr.org/resources/60/dev-other.tar.gz",
+        "libri-tts-test-clean": "http://www.openslr.org/resources/60/test-clean.tar.gz",
+        "libri-tts-test-other": "http://www.openslr.org/resources/60/test-other.tar.gz",
+    }
+
+    os.makedirs(path, exist_ok=True)
+    if subset == "all":
+        for sub, val in subset_dict.items():
+            print(f" > Downloading {sub}...")
+            download_url(val, path)
+            basename = os.path.basename(val)
+            archive = os.path.join(path, basename)
+            print(" > Extracting archive file...")
+            extract_archive(archive)
+        print(" > All subsets downloaded")
+    else:
+        url = subset_dict[subset]
+        download_url(url, path)
+        basename = os.path.basename(url)
+        archive = os.path.join(path, basename)
+        print(" > Extracting archive file...")
+        extract_archive(archive)
+
+
+def download_thorsten_de(path: str):
+    """Download and extract Thorsten german male voice dataset.
+
+    Args:
+        path (str): Path to the directory where the dataset will be stored.
+    """
+    os.makedirs(path, exist_ok=True)
+    url = "https://www.openslr.org/resources/95/thorsten-de_v02.tgz"
+    download_url(url, path)
+    basename = os.path.basename(url)
+    archive = os.path.join(path, basename)
+    print(" > Extracting archive file...")
+    extract_archive(archive)
+
+
+def download_mailabs(path: str, language: str = "english"):
+    """Download and extract Mailabs dataset.
+
+    Args:
+        path (str): Path to the directory where the dataset will be stored.
+
+        language (str): Language subset to download. Defaults to english.
+    """
+    language_dict = {
+        "english": "https://data.solak.de/data/Training/stt_tts/en_US.tgz",
+        "german": "https://data.solak.de/data/Training/stt_tts/de_DE.tgz",
+        "french": "https://data.solak.de/data/Training/stt_tts/fr_FR.tgz",
+        "italian": "https://data.solak.de/data/Training/stt_tts/it_IT.tgz",
+        "spanish": "https://data.solak.de/data/Training/stt_tts/es_ES.tgz",
+    }
+    os.makedirs(path, exist_ok=True)
+    url = language_dict[language]
+    download_url(url, path)
+    basename = os.path.basename(url)
+    archive = os.path.join(path, basename)
+    print(" > Extracting archive file...")
+    extract_archive(archive)
diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py
new file mode 100644
index 0000000..4fa4741
--- /dev/null
+++ b/TTS/utils/generic_utils.py
@@ -0,0 +1,239 @@
+# -*- coding: utf-8 -*-
+import datetime
+import importlib
+import logging
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+from typing import Dict
+
+import fsspec
+import torch
+
+
+def to_cuda(x: torch.Tensor) -> torch.Tensor:
+    if x is None:
+        return None
+    if torch.is_tensor(x):
+        x = x.contiguous()
+        if torch.cuda.is_available():
+            x = x.cuda(non_blocking=True)
+    return x
+
+
+def get_cuda():
+    use_cuda = torch.cuda.is_available()
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    return use_cuda, device
+
+
+def get_git_branch():
+    try:
+        out = subprocess.check_output(["git", "branch"]).decode("utf8")
+        current = next(line for line in out.split("\n") if line.startswith("*"))
+        current.replace("* ", "")
+    except subprocess.CalledProcessError:
+        current = "inside_docker"
+    except (FileNotFoundError, StopIteration) as e:
+        current = "unknown"
+    return current
+
+
+def get_commit_hash():
+    """https://stackoverflow.com/questions/14989858/get-the-current-git-hash-in-a-python-script"""
+    # try:
+    #     subprocess.check_output(['git', 'diff-index', '--quiet',
+    #                              'HEAD'])  # Verify client is clean
+    # except:
+    #     raise RuntimeError(
+    #         " !! Commit before training to get the commit hash.")
+    try:
+        commit = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode().strip()
+    # Not copying .git folder into docker container
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        commit = "0000000"
+    return commit
+
+
+def get_experiment_folder_path(root_path, model_name):
+    """Get an experiment folder path with the current date and time"""
+    date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I+%M%p")
+    commit_hash = get_commit_hash()
+    output_folder = os.path.join(root_path, model_name + "-" + date_str + "-" + commit_hash)
+    return output_folder
+
+
+def remove_experiment_folder(experiment_path):
+    """Check folder if there is a checkpoint, otherwise remove the folder"""
+    fs = fsspec.get_mapper(experiment_path).fs
+    checkpoint_files = fs.glob(experiment_path + "/*.pth")
+    if not checkpoint_files:
+        if fs.exists(experiment_path):
+            fs.rm(experiment_path, recursive=True)
+            print(" ! Run is removed from {}".format(experiment_path))
+    else:
+        print(" ! Run is kept in {}".format(experiment_path))
+
+
+def count_parameters(model):
+    r"""Count number of trainable parameters in a network"""
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+
+def to_camel(text):
+    text = text.capitalize()
+    text = re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text)
+    text = text.replace("Tts", "TTS")
+    text = text.replace("vc", "VC")
+    return text
+
+
+def find_module(module_path: str, module_name: str) -> object:
+    module_name = module_name.lower()
+    module = importlib.import_module(module_path + "." + module_name)
+    class_name = to_camel(module_name)
+    return getattr(module, class_name)
+
+
+def import_class(module_path: str) -> object:
+    """Import a class from a module path.
+
+    Args:
+        module_path (str): The module path of the class.
+
+    Returns:
+        object: The imported class.
+    """
+    class_name = module_path.split(".")[-1]
+    module_path = ".".join(module_path.split(".")[:-1])
+    module = importlib.import_module(module_path)
+    return getattr(module, class_name)
+
+
+def get_import_path(obj: object) -> str:
+    """Get the import path of a class.
+
+    Args:
+        obj (object): The class object.
+
+    Returns:
+        str: The import path of the class.
+    """
+    return ".".join([type(obj).__module__, type(obj).__name__])
+
+
+def get_user_data_dir(appname):
+    TTS_HOME = os.environ.get("TTS_HOME")
+    XDG_DATA_HOME = os.environ.get("XDG_DATA_HOME")
+    if TTS_HOME is not None:
+        ans = Path(TTS_HOME).expanduser().resolve(strict=False)
+    elif XDG_DATA_HOME is not None:
+        ans = Path(XDG_DATA_HOME).expanduser().resolve(strict=False)
+    elif sys.platform == "win32":
+        import winreg  # pylint: disable=import-outside-toplevel
+
+        key = winreg.OpenKey(
+            winreg.HKEY_CURRENT_USER, r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
+        )
+        dir_, _ = winreg.QueryValueEx(key, "Local AppData")
+        ans = Path(dir_).resolve(strict=False)
+    elif sys.platform == "darwin":
+        ans = Path("~/Library/Application Support/").expanduser()
+    else:
+        ans = Path.home().joinpath(".local/share")
+    return ans.joinpath(appname)
+
+
+def set_init_dict(model_dict, checkpoint_state, c):
+    # Partial initialization: if there is a mismatch with new and old layer, it is skipped.
+    for k, v in checkpoint_state.items():
+        if k not in model_dict:
+            print(" | > Layer missing in the model definition: {}".format(k))
+    # 1. filter out unnecessary keys
+    pretrained_dict = {k: v for k, v in checkpoint_state.items() if k in model_dict}
+    # 2. filter out different size layers
+    pretrained_dict = {k: v for k, v in pretrained_dict.items() if v.numel() == model_dict[k].numel()}
+    # 3. skip reinit layers
+    if c.has("reinit_layers") and c.reinit_layers is not None:
+        for reinit_layer_name in c.reinit_layers:
+            pretrained_dict = {k: v for k, v in pretrained_dict.items() if reinit_layer_name not in k}
+    # 4. overwrite entries in the existing state dict
+    model_dict.update(pretrained_dict)
+    print(" | > {} / {} layers are restored.".format(len(pretrained_dict), len(model_dict)))
+    return model_dict
+
+
+def format_aux_input(def_args: Dict, kwargs: Dict) -> Dict:
+    """Format kwargs to hande auxilary inputs to models.
+
+    Args:
+        def_args (Dict): A dictionary of argument names and their default values if not defined in `kwargs`.
+        kwargs (Dict): A `dict` or `kwargs` that includes auxilary inputs to the model.
+
+    Returns:
+        Dict: arguments with formatted auxilary inputs.
+    """
+    kwargs = kwargs.copy()
+    for name in def_args:
+        if name not in kwargs or kwargs[name] is None:
+            kwargs[name] = def_args[name]
+    return kwargs
+
+
+class KeepAverage:
+    def __init__(self):
+        self.avg_values = {}
+        self.iters = {}
+
+    def __getitem__(self, key):
+        return self.avg_values[key]
+
+    def items(self):
+        return self.avg_values.items()
+
+    def add_value(self, name, init_val=0, init_iter=0):
+        self.avg_values[name] = init_val
+        self.iters[name] = init_iter
+
+    def update_value(self, name, value, weighted_avg=False):
+        if name not in self.avg_values:
+            # add value if not exist before
+            self.add_value(name, init_val=value)
+        else:
+            # else update existing value
+            if weighted_avg:
+                self.avg_values[name] = 0.99 * self.avg_values[name] + 0.01 * value
+                self.iters[name] += 1
+            else:
+                self.avg_values[name] = self.avg_values[name] * self.iters[name] + value
+                self.iters[name] += 1
+                self.avg_values[name] /= self.iters[name]
+
+    def add_values(self, name_dict):
+        for key, value in name_dict.items():
+            self.add_value(key, init_val=value)
+
+    def update_values(self, value_dict):
+        for key, value in value_dict.items():
+            self.update_value(key, value)
+
+
+def get_timestamp():
+    return datetime.now().strftime("%y%m%d-%H%M%S")
+
+
+def setup_logger(logger_name, root, phase, level=logging.INFO, screen=False, tofile=False):
+    lg = logging.getLogger(logger_name)
+    formatter = logging.Formatter("%(asctime)s.%(msecs)03d - %(levelname)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S")
+    lg.setLevel(level)
+    if tofile:
+        log_file = os.path.join(root, phase + "_{}.log".format(get_timestamp()))
+        fh = logging.FileHandler(log_file, mode="w")
+        fh.setFormatter(formatter)
+        lg.addHandler(fh)
+    if screen:
+        sh = logging.StreamHandler()
+        sh.setFormatter(formatter)
+        lg.addHandler(sh)
diff --git a/TTS/utils/io.py b/TTS/utils/io.py
new file mode 100644
index 0000000..3107ba6
--- /dev/null
+++ b/TTS/utils/io.py
@@ -0,0 +1,70 @@
+import os
+import pickle as pickle_tts
+from typing import Any, Callable, Dict, Union
+
+import fsspec
+import torch
+
+from TTS.utils.generic_utils import get_user_data_dir
+
+
+class RenamingUnpickler(pickle_tts.Unpickler):
+    """Overload default pickler to solve module renaming problem"""
+
+    def find_class(self, module, name):
+        return super().find_class(module.replace("mozilla_voice_tts", "TTS"), name)
+
+
+class AttrDict(dict):
+    """A custom dict which converts dict keys
+    to class attributes"""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.__dict__ = self
+
+
+def load_fsspec(
+    path: str,
+    map_location: Union[str, Callable, torch.device, Dict[Union[str, torch.device], Union[str, torch.device]]] = None,
+    cache: bool = True,
+    **kwargs,
+) -> Any:
+    """Like torch.load but can load from other locations (e.g. s3:// , gs://).
+
+    Args:
+        path: Any path or url supported by fsspec.
+        map_location: torch.device or str.
+        cache: If True, cache a remote file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to True.
+        **kwargs: Keyword arguments forwarded to torch.load.
+
+    Returns:
+        Object stored in path.
+    """
+    is_local = os.path.isdir(path) or os.path.isfile(path)
+    if cache and not is_local:
+        with fsspec.open(
+            f"filecache::{path}",
+            filecache={"cache_storage": str(get_user_data_dir("tts_cache"))},
+            mode="rb",
+        ) as f:
+            return torch.load(f, map_location=map_location, **kwargs)
+    else:
+        with fsspec.open(path, "rb") as f:
+            return torch.load(f, map_location=map_location, **kwargs)
+
+
+def load_checkpoint(
+    model, checkpoint_path, use_cuda=False, eval=False, cache=False
+):  # pylint: disable=redefined-builtin
+    try:
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache)
+    except ModuleNotFoundError:
+        pickle_tts.Unpickler = RenamingUnpickler
+        state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), pickle_module=pickle_tts, cache=cache)
+    model.load_state_dict(state["model"])
+    if use_cuda:
+        model.cuda()
+    if eval:
+        model.eval()
+    return model, state
diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py
new file mode 100644
index 0000000..3a527f4
--- /dev/null
+++ b/TTS/utils/manage.py
@@ -0,0 +1,621 @@
+import json
+import os
+import re
+import tarfile
+import zipfile
+from pathlib import Path
+from shutil import copyfile, rmtree
+from typing import Dict, List, Tuple
+
+import fsspec
+import requests
+from tqdm import tqdm
+
+from TTS.config import load_config, read_json_with_comments
+from TTS.utils.generic_utils import get_user_data_dir
+
+LICENSE_URLS = {
+    "cc by-nc-nd 4.0": "https://creativecommons.org/licenses/by-nc-nd/4.0/",
+    "mpl": "https://www.mozilla.org/en-US/MPL/2.0/",
+    "mpl2": "https://www.mozilla.org/en-US/MPL/2.0/",
+    "mpl 2.0": "https://www.mozilla.org/en-US/MPL/2.0/",
+    "mit": "https://choosealicense.com/licenses/mit/",
+    "apache 2.0": "https://choosealicense.com/licenses/apache-2.0/",
+    "apache2": "https://choosealicense.com/licenses/apache-2.0/",
+    "cc-by-sa 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
+    "cpml": "https://coqui.ai/cpml.txt",
+}
+
+
+class ModelManager(object):
+    tqdm_progress = None
+    """Manage TTS models defined in .models.json.
+    It provides an interface to list and download
+    models defines in '.model.json'
+
+    Models are downloaded under '.TTS' folder in the user's
+    home path.
+
+    Args:
+        models_file (str): path to .model.json file. Defaults to None.
+        output_prefix (str): prefix to `tts` to download models. Defaults to None
+        progress_bar (bool): print a progress bar when donwloading a file. Defaults to False.
+        verbose (bool): print info. Defaults to True.
+    """
+
+    def __init__(self, models_file=None, output_prefix=None, progress_bar=False, verbose=True):
+        super().__init__()
+        self.progress_bar = progress_bar
+        self.verbose = verbose
+        if output_prefix is None:
+            self.output_prefix = get_user_data_dir("tts")
+        else:
+            self.output_prefix = os.path.join(output_prefix, "tts")
+        self.models_dict = None
+        if models_file is not None:
+            self.read_models_file(models_file)
+        else:
+            # try the default location
+            path = Path(__file__).parent / "../.models.json"
+            self.read_models_file(path)
+
+    def read_models_file(self, file_path):
+        """Read .models.json as a dict
+
+        Args:
+            file_path (str): path to .models.json.
+        """
+        self.models_dict = read_json_with_comments(file_path)
+
+    def _list_models(self, model_type, model_count=0):
+        if self.verbose:
+            print("\n Name format: type/language/dataset/model")
+        model_list = []
+        for lang in self.models_dict[model_type]:
+            for dataset in self.models_dict[model_type][lang]:
+                for model in self.models_dict[model_type][lang][dataset]:
+                    model_full_name = f"{model_type}--{lang}--{dataset}--{model}"
+                    output_path = os.path.join(self.output_prefix, model_full_name)
+                    if self.verbose:
+                        if os.path.exists(output_path):
+                            print(f" {model_count}: {model_type}/{lang}/{dataset}/{model} [already downloaded]")
+                        else:
+                            print(f" {model_count}: {model_type}/{lang}/{dataset}/{model}")
+                    model_list.append(f"{model_type}/{lang}/{dataset}/{model}")
+                    model_count += 1
+        return model_list
+
+    def _list_for_model_type(self, model_type):
+        models_name_list = []
+        model_count = 1
+        models_name_list.extend(self._list_models(model_type, model_count))
+        return models_name_list
+
+    def list_models(self):
+        models_name_list = []
+        model_count = 1
+        for model_type in self.models_dict:
+            model_list = self._list_models(model_type, model_count)
+            models_name_list.extend(model_list)
+        return models_name_list
+
+    def model_info_by_idx(self, model_query):
+        """Print the description of the model from .models.json file using model_idx
+
+        Args:
+            model_query (str): <model_tye>/<model_idx>
+        """
+        model_name_list = []
+        model_type, model_query_idx = model_query.split("/")
+        try:
+            model_query_idx = int(model_query_idx)
+            if model_query_idx <= 0:
+                print("> model_query_idx should be a positive integer!")
+                return
+        except:
+            print("> model_query_idx should be an integer!")
+            return
+        model_count = 0
+        if model_type in self.models_dict:
+            for lang in self.models_dict[model_type]:
+                for dataset in self.models_dict[model_type][lang]:
+                    for model in self.models_dict[model_type][lang][dataset]:
+                        model_name_list.append(f"{model_type}/{lang}/{dataset}/{model}")
+                        model_count += 1
+        else:
+            print(f"> model_type {model_type} does not exist in the list.")
+            return
+        if model_query_idx > model_count:
+            print(f"model query idx exceeds the number of available models [{model_count}] ")
+        else:
+            model_type, lang, dataset, model = model_name_list[model_query_idx - 1].split("/")
+            print(f"> model type : {model_type}")
+            print(f"> language supported : {lang}")
+            print(f"> dataset used : {dataset}")
+            print(f"> model name : {model}")
+            if "description" in self.models_dict[model_type][lang][dataset][model]:
+                print(f"> description : {self.models_dict[model_type][lang][dataset][model]['description']}")
+            else:
+                print("> description : coming soon")
+            if "default_vocoder" in self.models_dict[model_type][lang][dataset][model]:
+                print(f"> default_vocoder : {self.models_dict[model_type][lang][dataset][model]['default_vocoder']}")
+
+    def model_info_by_full_name(self, model_query_name):
+        """Print the description of the model from .models.json file using model_full_name
+
+        Args:
+            model_query_name (str): Format is <model_type>/<language>/<dataset>/<model_name>
+        """
+        model_type, lang, dataset, model = model_query_name.split("/")
+        if model_type in self.models_dict:
+            if lang in self.models_dict[model_type]:
+                if dataset in self.models_dict[model_type][lang]:
+                    if model in self.models_dict[model_type][lang][dataset]:
+                        print(f"> model type : {model_type}")
+                        print(f"> language supported : {lang}")
+                        print(f"> dataset used : {dataset}")
+                        print(f"> model name : {model}")
+                        if "description" in self.models_dict[model_type][lang][dataset][model]:
+                            print(
+                                f"> description : {self.models_dict[model_type][lang][dataset][model]['description']}"
+                            )
+                        else:
+                            print("> description : coming soon")
+                        if "default_vocoder" in self.models_dict[model_type][lang][dataset][model]:
+                            print(
+                                f"> default_vocoder : {self.models_dict[model_type][lang][dataset][model]['default_vocoder']}"
+                            )
+                    else:
+                        print(f"> model {model} does not exist for {model_type}/{lang}/{dataset}.")
+                else:
+                    print(f"> dataset {dataset} does not exist for {model_type}/{lang}.")
+            else:
+                print(f"> lang {lang} does not exist for {model_type}.")
+        else:
+            print(f"> model_type {model_type} does not exist in the list.")
+
+    def list_tts_models(self):
+        """Print all `TTS` models and return a list of model names
+
+        Format is `language/dataset/model`
+        """
+        return self._list_for_model_type("tts_models")
+
+    def list_vocoder_models(self):
+        """Print all the `vocoder` models and return a list of model names
+
+        Format is `language/dataset/model`
+        """
+        return self._list_for_model_type("vocoder_models")
+
+    def list_vc_models(self):
+        """Print all the voice conversion models and return a list of model names
+
+        Format is `language/dataset/model`
+        """
+        return self._list_for_model_type("voice_conversion_models")
+
+    def list_langs(self):
+        """Print all the available languages"""
+        print(" Name format: type/language")
+        for model_type in self.models_dict:
+            for lang in self.models_dict[model_type]:
+                print(f" >: {model_type}/{lang} ")
+
+    def list_datasets(self):
+        """Print all the datasets"""
+        print(" Name format: type/language/dataset")
+        for model_type in self.models_dict:
+            for lang in self.models_dict[model_type]:
+                for dataset in self.models_dict[model_type][lang]:
+                    print(f" >: {model_type}/{lang}/{dataset}")
+
+    @staticmethod
+    def print_model_license(model_item: Dict):
+        """Print the license of a model
+
+        Args:
+            model_item (dict): model item in the models.json
+        """
+        if "license" in model_item and model_item["license"].strip() != "":
+            print(f" > Model's license - {model_item['license']}")
+            if model_item["license"].lower() in LICENSE_URLS:
+                print(f" > Check {LICENSE_URLS[model_item['license'].lower()]} for more info.")
+            else:
+                print(" > Check https://opensource.org/licenses for more info.")
+        else:
+            print(" > Model's license - No license information available")
+
+    def _download_github_model(self, model_item: Dict, output_path: str):
+        if isinstance(model_item["github_rls_url"], list):
+            self._download_model_files(model_item["github_rls_url"], output_path, self.progress_bar)
+        else:
+            self._download_zip_file(model_item["github_rls_url"], output_path, self.progress_bar)
+
+    def _download_hf_model(self, model_item: Dict, output_path: str):
+        if isinstance(model_item["hf_url"], list):
+            self._download_model_files(model_item["hf_url"], output_path, self.progress_bar)
+        else:
+            self._download_zip_file(model_item["hf_url"], output_path, self.progress_bar)
+
+    def download_fairseq_model(self, model_name, output_path):
+        URI_PREFIX = "https://coqui.gateway.scarf.sh/fairseq/"
+        _, lang, _, _ = model_name.split("/")
+        model_download_uri = os.path.join(URI_PREFIX, f"{lang}.tar.gz")
+        self._download_tar_file(model_download_uri, output_path, self.progress_bar)
+
+    @staticmethod
+    def set_model_url(model_item: Dict):
+        model_item["model_url"] = None
+        if "github_rls_url" in model_item:
+            model_item["model_url"] = model_item["github_rls_url"]
+        elif "hf_url" in model_item:
+            model_item["model_url"] = model_item["hf_url"]
+        elif "fairseq" in model_item["model_name"]:
+            model_item["model_url"] = "https://coqui.gateway.scarf.sh/fairseq/"
+        elif "xtts" in model_item["model_name"]:
+            model_item["model_url"] = "https://coqui.gateway.scarf.sh/xtts/"
+        return model_item
+
+    def _set_model_item(self, model_name):
+        # fetch model info from the dict
+        if "fairseq" in model_name:
+            model_type = "tts_models"
+            lang = model_name.split("/")[1]
+            model_item = {
+                "model_type": "tts_models",
+                "license": "CC BY-NC 4.0",
+                "default_vocoder": None,
+                "author": "fairseq",
+                "description": "this model is released by Meta under Fairseq repo. Visit https://github.com/facebookresearch/fairseq/tree/main/examples/mms for more info.",
+            }
+            model_item["model_name"] = model_name
+        elif "xtts" in model_name and len(model_name.split("/")) != 4:
+            # loading xtts models with only model name (e.g. xtts_v2.0.2)
+            # check model name has the version number with regex
+            version_regex = r"v\d+\.\d+\.\d+"
+            if re.search(version_regex, model_name):
+                model_version = model_name.split("_")[-1]
+            else:
+                model_version = "main"
+            model_type = "tts_models"
+            lang = "multilingual"
+            dataset = "multi-dataset"
+            model = model_name
+            model_item = {
+                "default_vocoder": None,
+                "license": "CPML",
+                "contact": "info@coqui.ai",
+                "tos_required": True,
+                "hf_url": [
+                    f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/model.pth",
+                    f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/config.json",
+                    f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/vocab.json",
+                    f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/hash.md5",
+                    f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/speakers_xtts.pth",
+                ],
+            }
+        else:
+            # get model from models.json
+            model_type, lang, dataset, model = model_name.split("/")
+            model_item = self.models_dict[model_type][lang][dataset][model]
+            model_item["model_type"] = model_type
+
+        model_full_name = f"{model_type}--{lang}--{dataset}--{model}"
+        md5hash = model_item["model_hash"] if "model_hash" in model_item else None
+        model_item = self.set_model_url(model_item)
+        return model_item, model_full_name, model, md5hash
+
+    @staticmethod
+    def ask_tos(model_full_path):
+        """Ask the user to agree to the terms of service"""
+        tos_path = os.path.join(model_full_path, "tos_agreed.txt")
+        print(" > You must confirm the following:")
+        print(' | > "I have purchased a commercial license from Coqui: licensing@coqui.ai"')
+        print(' | > "Otherwise, I agree to the terms of the non-commercial CPML: https://coqui.ai/cpml" - [y/n]')
+        answer = input(" | | > ")
+        if answer.lower() == "y":
+            with open(tos_path, "w", encoding="utf-8") as f:
+                f.write("I have read, understood and agreed to the Terms and Conditions.")
+            return True
+        return False
+
+    @staticmethod
+    def tos_agreed(model_item, model_full_path):
+        """Check if the user has agreed to the terms of service"""
+        if "tos_required" in model_item and model_item["tos_required"]:
+            tos_path = os.path.join(model_full_path, "tos_agreed.txt")
+            if os.path.exists(tos_path) or os.environ.get("COQUI_TOS_AGREED") == "1":
+                return True
+            return False
+        return True
+
+    def create_dir_and_download_model(self, model_name, model_item, output_path):
+        os.makedirs(output_path, exist_ok=True)
+        # handle TOS
+        if not self.tos_agreed(model_item, output_path):
+            if not self.ask_tos(output_path):
+                os.rmdir(output_path)
+                raise Exception(" [!] You must agree to the terms of service to use this model.")
+        print(f" > Downloading model to {output_path}")
+        try:
+            if "fairseq" in model_name:
+                self.download_fairseq_model(model_name, output_path)
+            elif "github_rls_url" in model_item:
+                self._download_github_model(model_item, output_path)
+            elif "hf_url" in model_item:
+                self._download_hf_model(model_item, output_path)
+
+        except requests.RequestException as e:
+            print(f" > Failed to download the model file to {output_path}")
+            rmtree(output_path)
+            raise e
+        self.print_model_license(model_item=model_item)
+
+    def check_if_configs_are_equal(self, model_name, model_item, output_path):
+        with fsspec.open(self._find_files(output_path)[1], "r", encoding="utf-8") as f:
+            config_local = json.load(f)
+        remote_url = None
+        for url in model_item["hf_url"]:
+            if "config.json" in url:
+                remote_url = url
+                break
+
+        with fsspec.open(remote_url, "r", encoding="utf-8") as f:
+            config_remote = json.load(f)
+
+        if not config_local == config_remote:
+            print(f" > {model_name} is already downloaded however it has been changed. Redownloading it...")
+            self.create_dir_and_download_model(model_name, model_item, output_path)
+
+    def download_model(self, model_name):
+        """Download model files given the full model name.
+        Model name is in the format
+            'type/language/dataset/model'
+            e.g. 'tts_model/en/ljspeech/tacotron'
+
+        Every model must have the following files:
+            - *.pth : pytorch model checkpoint file.
+            - config.json : model config file.
+            - scale_stats.npy (if exist): scale values for preprocessing.
+
+        Args:
+            model_name (str): model name as explained above.
+        """
+        model_item, model_full_name, model, md5sum = self._set_model_item(model_name)
+        # set the model specific output path
+        output_path = os.path.join(self.output_prefix, model_full_name)
+        if os.path.exists(output_path):
+            if md5sum is not None:
+                md5sum_file = os.path.join(output_path, "hash.md5")
+                if os.path.isfile(md5sum_file):
+                    with open(md5sum_file, mode="r") as f:
+                        if not f.read() == md5sum:
+                            print(f" > {model_name} has been updated, clearing model cache...")
+                            self.create_dir_and_download_model(model_name, model_item, output_path)
+                        else:
+                            print(f" > {model_name} is already downloaded.")
+                else:
+                    print(f" > {model_name} has been updated, clearing model cache...")
+                    self.create_dir_and_download_model(model_name, model_item, output_path)
+            # if the configs are different, redownload it
+            # ToDo: we need a better way to handle it
+            if "xtts" in model_name:
+                try:
+                    self.check_if_configs_are_equal(model_name, model_item, output_path)
+                except:
+                    pass
+            else:
+                print(f" > {model_name} is already downloaded.")
+        else:
+            self.create_dir_and_download_model(model_name, model_item, output_path)
+
+        # find downloaded files
+        output_model_path = output_path
+        output_config_path = None
+        if (
+            model not in ["tortoise-v2", "bark"] and "fairseq" not in model_name and "xtts" not in model_name
+        ):  # TODO:This is stupid but don't care for now.
+            output_model_path, output_config_path = self._find_files(output_path)
+        # update paths in the config.json
+        self._update_paths(output_path, output_config_path)
+        return output_model_path, output_config_path, model_item
+
+    @staticmethod
+    def _find_files(output_path: str) -> Tuple[str, str]:
+        """Find the model and config files in the output path
+
+        Args:
+            output_path (str): path to the model files
+
+        Returns:
+            Tuple[str, str]: path to the model file and config file
+        """
+        model_file = None
+        config_file = None
+        for file_name in os.listdir(output_path):
+            if file_name in ["model_file.pth", "model_file.pth.tar", "model.pth"]:
+                model_file = os.path.join(output_path, file_name)
+            elif file_name == "config.json":
+                config_file = os.path.join(output_path, file_name)
+        if model_file is None:
+            raise ValueError(" [!] Model file not found in the output path")
+        if config_file is None:
+            raise ValueError(" [!] Config file not found in the output path")
+        return model_file, config_file
+
+    @staticmethod
+    def _find_speaker_encoder(output_path: str) -> str:
+        """Find the speaker encoder file in the output path
+
+        Args:
+            output_path (str): path to the model files
+
+        Returns:
+            str: path to the speaker encoder file
+        """
+        speaker_encoder_file = None
+        for file_name in os.listdir(output_path):
+            if file_name in ["model_se.pth", "model_se.pth.tar"]:
+                speaker_encoder_file = os.path.join(output_path, file_name)
+        return speaker_encoder_file
+
+    def _update_paths(self, output_path: str, config_path: str) -> None:
+        """Update paths for certain files in config.json after download.
+
+        Args:
+            output_path (str): local path the model is downloaded to.
+            config_path (str): local config.json path.
+        """
+        output_stats_path = os.path.join(output_path, "scale_stats.npy")
+        output_d_vector_file_path = os.path.join(output_path, "speakers.json")
+        output_d_vector_file_pth_path = os.path.join(output_path, "speakers.pth")
+        output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json")
+        output_speaker_ids_file_pth_path = os.path.join(output_path, "speaker_ids.pth")
+        speaker_encoder_config_path = os.path.join(output_path, "config_se.json")
+        speaker_encoder_model_path = self._find_speaker_encoder(output_path)
+
+        # update the scale_path.npy file path in the model config.json
+        self._update_path("audio.stats_path", output_stats_path, config_path)
+
+        # update the speakers.json file path in the model config.json to the current path
+        self._update_path("d_vector_file", output_d_vector_file_path, config_path)
+        self._update_path("d_vector_file", output_d_vector_file_pth_path, config_path)
+        self._update_path("model_args.d_vector_file", output_d_vector_file_path, config_path)
+        self._update_path("model_args.d_vector_file", output_d_vector_file_pth_path, config_path)
+
+        # update the speaker_ids.json file path in the model config.json to the current path
+        self._update_path("speakers_file", output_speaker_ids_file_path, config_path)
+        self._update_path("speakers_file", output_speaker_ids_file_pth_path, config_path)
+        self._update_path("model_args.speakers_file", output_speaker_ids_file_path, config_path)
+        self._update_path("model_args.speakers_file", output_speaker_ids_file_pth_path, config_path)
+
+        # update the speaker_encoder file path in the model config.json to the current path
+        self._update_path("speaker_encoder_model_path", speaker_encoder_model_path, config_path)
+        self._update_path("model_args.speaker_encoder_model_path", speaker_encoder_model_path, config_path)
+        self._update_path("speaker_encoder_config_path", speaker_encoder_config_path, config_path)
+        self._update_path("model_args.speaker_encoder_config_path", speaker_encoder_config_path, config_path)
+
+    @staticmethod
+    def _update_path(field_name, new_path, config_path):
+        """Update the path in the model config.json for the current environment after download"""
+        if new_path and os.path.exists(new_path):
+            config = load_config(config_path)
+            field_names = field_name.split(".")
+            if len(field_names) > 1:
+                # field name points to a sub-level field
+                sub_conf = config
+                for fd in field_names[:-1]:
+                    if fd in sub_conf:
+                        sub_conf = sub_conf[fd]
+                    else:
+                        return
+                if isinstance(sub_conf[field_names[-1]], list):
+                    sub_conf[field_names[-1]] = [new_path]
+                else:
+                    sub_conf[field_names[-1]] = new_path
+            else:
+                # field name points to a top-level field
+                if not field_name in config:
+                    return
+                if isinstance(config[field_name], list):
+                    config[field_name] = [new_path]
+                else:
+                    config[field_name] = new_path
+            config.save_json(config_path)
+
+    @staticmethod
+    def _download_zip_file(file_url, output_folder, progress_bar):
+        """Download the github releases"""
+        # download the file
+        r = requests.get(file_url, stream=True)
+        # extract the file
+        try:
+            total_size_in_bytes = int(r.headers.get("content-length", 0))
+            block_size = 1024  # 1 Kibibyte
+            if progress_bar:
+                ModelManager.tqdm_progress = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)
+            temp_zip_name = os.path.join(output_folder, file_url.split("/")[-1])
+            with open(temp_zip_name, "wb") as file:
+                for data in r.iter_content(block_size):
+                    if progress_bar:
+                        ModelManager.tqdm_progress.update(len(data))
+                    file.write(data)
+            with zipfile.ZipFile(temp_zip_name) as z:
+                z.extractall(output_folder)
+            os.remove(temp_zip_name)  # delete zip after extract
+        except zipfile.BadZipFile:
+            print(f" > Error: Bad zip file - {file_url}")
+            raise zipfile.BadZipFile  # pylint: disable=raise-missing-from
+        # move the files to the outer path
+        for file_path in z.namelist():
+            src_path = os.path.join(output_folder, file_path)
+            if os.path.isfile(src_path):
+                dst_path = os.path.join(output_folder, os.path.basename(file_path))
+                if src_path != dst_path:
+                    copyfile(src_path, dst_path)
+        # remove redundant (hidden or not) folders
+        for file_path in z.namelist():
+            if os.path.isdir(os.path.join(output_folder, file_path)):
+                rmtree(os.path.join(output_folder, file_path))
+
+    @staticmethod
+    def _download_tar_file(file_url, output_folder, progress_bar):
+        """Download the github releases"""
+        # download the file
+        r = requests.get(file_url, stream=True)
+        # extract the file
+        try:
+            total_size_in_bytes = int(r.headers.get("content-length", 0))
+            block_size = 1024  # 1 Kibibyte
+            if progress_bar:
+                ModelManager.tqdm_progress = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)
+            temp_tar_name = os.path.join(output_folder, file_url.split("/")[-1])
+            with open(temp_tar_name, "wb") as file:
+                for data in r.iter_content(block_size):
+                    if progress_bar:
+                        ModelManager.tqdm_progress.update(len(data))
+                    file.write(data)
+            with tarfile.open(temp_tar_name) as t:
+                t.extractall(output_folder)
+                tar_names = t.getnames()
+            os.remove(temp_tar_name)  # delete tar after extract
+        except tarfile.ReadError:
+            print(f" > Error: Bad tar file - {file_url}")
+            raise tarfile.ReadError  # pylint: disable=raise-missing-from
+        # move the files to the outer path
+        for file_path in os.listdir(os.path.join(output_folder, tar_names[0])):
+            src_path = os.path.join(output_folder, tar_names[0], file_path)
+            dst_path = os.path.join(output_folder, os.path.basename(file_path))
+            if src_path != dst_path:
+                copyfile(src_path, dst_path)
+        # remove the extracted folder
+        rmtree(os.path.join(output_folder, tar_names[0]))
+
+    @staticmethod
+    def _download_model_files(file_urls, output_folder, progress_bar):
+        """Download the github releases"""
+        for file_url in file_urls:
+            # download the file
+            r = requests.get(file_url, stream=True)
+            # extract the file
+            bease_filename = file_url.split("/")[-1]
+            temp_zip_name = os.path.join(output_folder, bease_filename)
+            total_size_in_bytes = int(r.headers.get("content-length", 0))
+            block_size = 1024  # 1 Kibibyte
+            with open(temp_zip_name, "wb") as file:
+                if progress_bar:
+                    ModelManager.tqdm_progress = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)
+                for data in r.iter_content(block_size):
+                    if progress_bar:
+                        ModelManager.tqdm_progress.update(len(data))
+                    file.write(data)
+
+    @staticmethod
+    def _check_dict_key(my_dict, key):
+        if key in my_dict.keys() and my_dict[key] is not None:
+            if not isinstance(key, str):
+                return True
+            if isinstance(key, str) and len(my_dict[key]) > 0:
+                return True
+        return False
diff --git a/TTS/utils/radam.py b/TTS/utils/radam.py
new file mode 100644
index 0000000..cbd1499
--- /dev/null
+++ b/TTS/utils/radam.py
@@ -0,0 +1,105 @@
+# modified from https://github.com/LiyuanLucasLiu/RAdam
+
+import math
+
+import torch
+from torch.optim.optimizer import Optimizer
+
+
+class RAdam(Optimizer):
+    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, degenerated_to_sgd=True):
+        if lr < 0.0:
+            raise ValueError("Invalid learning rate: {}".format(lr))
+        if eps < 0.0:
+            raise ValueError("Invalid epsilon value: {}".format(eps))
+        if not 0.0 <= betas[0] < 1.0:
+            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
+        if not 0.0 <= betas[1] < 1.0:
+            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
+
+        self.degenerated_to_sgd = degenerated_to_sgd
+        if isinstance(params, (list, tuple)) and len(params) > 0 and isinstance(params[0], dict):
+            for param in params:
+                if "betas" in param and (param["betas"][0] != betas[0] or param["betas"][1] != betas[1]):
+                    param["buffer"] = [[None, None, None] for _ in range(10)]
+        defaults = dict(
+            lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, buffer=[[None, None, None] for _ in range(10)]
+        )
+        super().__init__(params, defaults)
+
+    def __setstate__(self, state):  # pylint: disable=useless-super-delegation
+        super().__setstate__(state)
+
+    def step(self, closure=None):
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+            for p in group["params"]:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data.float()
+                if grad.is_sparse:
+                    raise RuntimeError("RAdam does not support sparse gradients")
+
+                p_data_fp32 = p.data.float()
+
+                state = self.state[p]
+
+                if len(state) == 0:
+                    state["step"] = 0
+                    state["exp_avg"] = torch.zeros_like(p_data_fp32)
+                    state["exp_avg_sq"] = torch.zeros_like(p_data_fp32)
+                else:
+                    state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32)
+                    state["exp_avg_sq"] = state["exp_avg_sq"].type_as(p_data_fp32)
+
+                exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"]
+                beta1, beta2 = group["betas"]
+
+                exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
+                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
+
+                state["step"] += 1
+                buffered = group["buffer"][int(state["step"] % 10)]
+                if state["step"] == buffered[0]:
+                    N_sma, step_size = buffered[1], buffered[2]
+                else:
+                    buffered[0] = state["step"]
+                    beta2_t = beta2 ** state["step"]
+                    N_sma_max = 2 / (1 - beta2) - 1
+                    N_sma = N_sma_max - 2 * state["step"] * beta2_t / (1 - beta2_t)
+                    buffered[1] = N_sma
+
+                    # more conservative since it's an approximated value
+                    if N_sma >= 5:
+                        step_size = math.sqrt(
+                            (1 - beta2_t)
+                            * (N_sma - 4)
+                            / (N_sma_max - 4)
+                            * (N_sma - 2)
+                            / N_sma
+                            * N_sma_max
+                            / (N_sma_max - 2)
+                        ) / (1 - beta1 ** state["step"])
+                    elif self.degenerated_to_sgd:
+                        step_size = 1.0 / (1 - beta1 ** state["step"])
+                    else:
+                        step_size = -1
+                    buffered[2] = step_size
+
+                # more conservative since it's an approximated value
+                if N_sma >= 5:
+                    if group["weight_decay"] != 0:
+                        p_data_fp32.add_(p_data_fp32, alpha=-group["weight_decay"] * group["lr"])
+                    denom = exp_avg_sq.sqrt().add_(group["eps"])
+                    p_data_fp32.addcdiv_(exp_avg, denom, value=-step_size * group["lr"])
+                    p.data.copy_(p_data_fp32)
+                elif step_size > 0:
+                    if group["weight_decay"] != 0:
+                        p_data_fp32.add_(p_data_fp32, alpha=-group["weight_decay"] * group["lr"])
+                    p_data_fp32.add_(exp_avg, alpha=-step_size * group["lr"])
+                    p.data.copy_(p_data_fp32)
+
+        return loss
diff --git a/TTS/utils/samplers.py b/TTS/utils/samplers.py
new file mode 100644
index 0000000..b08a763
--- /dev/null
+++ b/TTS/utils/samplers.py
@@ -0,0 +1,201 @@
+import math
+import random
+from typing import Callable, List, Union
+
+from torch.utils.data.sampler import BatchSampler, Sampler, SubsetRandomSampler
+
+
+class SubsetSampler(Sampler):
+    """
+    Samples elements sequentially from a given list of indices.
+
+    Args:
+        indices (list): a sequence of indices
+    """
+
+    def __init__(self, indices):
+        super().__init__(indices)
+        self.indices = indices
+
+    def __iter__(self):
+        return (self.indices[i] for i in range(len(self.indices)))
+
+    def __len__(self):
+        return len(self.indices)
+
+
+class PerfectBatchSampler(Sampler):
+    """
+    Samples a mini-batch of indices for a balanced class batching
+
+    Args:
+        dataset_items(list): dataset items to sample from.
+        classes (list): list of classes of dataset_items to sample from.
+        batch_size (int): total number of samples to be sampled in a mini-batch.
+        num_gpus (int): number of GPU in the data parallel mode.
+        shuffle (bool): if True, samples randomly, otherwise samples sequentially.
+        drop_last (bool): if True, drops last incomplete batch.
+    """
+
+    def __init__(
+        self,
+        dataset_items,
+        classes,
+        batch_size,
+        num_classes_in_batch,
+        num_gpus=1,
+        shuffle=True,
+        drop_last=False,
+        label_key="class_name",
+    ):
+        super().__init__(dataset_items)
+        assert (
+            batch_size % (num_classes_in_batch * num_gpus) == 0
+        ), "Batch size must be divisible by number of classes times the number of data parallel devices (if enabled)."
+
+        label_indices = {}
+        for idx, item in enumerate(dataset_items):
+            label = item[label_key]
+            if label not in label_indices.keys():
+                label_indices[label] = [idx]
+            else:
+                label_indices[label].append(idx)
+
+        if shuffle:
+            self._samplers = [SubsetRandomSampler(label_indices[key]) for key in classes]
+        else:
+            self._samplers = [SubsetSampler(label_indices[key]) for key in classes]
+
+        self._batch_size = batch_size
+        self._drop_last = drop_last
+        self._dp_devices = num_gpus
+        self._num_classes_in_batch = num_classes_in_batch
+
+    def __iter__(self):
+        batch = []
+        if self._num_classes_in_batch != len(self._samplers):
+            valid_samplers_idx = random.sample(range(len(self._samplers)), self._num_classes_in_batch)
+        else:
+            valid_samplers_idx = None
+
+        iters = [iter(s) for s in self._samplers]
+        done = False
+
+        while True:
+            b = []
+            for i, it in enumerate(iters):
+                if valid_samplers_idx is not None and i not in valid_samplers_idx:
+                    continue
+                idx = next(it, None)
+                if idx is None:
+                    done = True
+                    break
+                b.append(idx)
+            if done:
+                break
+            batch += b
+            if len(batch) == self._batch_size:
+                yield batch
+                batch = []
+                if valid_samplers_idx is not None:
+                    valid_samplers_idx = random.sample(range(len(self._samplers)), self._num_classes_in_batch)
+
+        if not self._drop_last:
+            if len(batch) > 0:
+                groups = len(batch) // self._num_classes_in_batch
+                if groups % self._dp_devices == 0:
+                    yield batch
+                else:
+                    batch = batch[: (groups // self._dp_devices) * self._dp_devices * self._num_classes_in_batch]
+                    if len(batch) > 0:
+                        yield batch
+
+    def __len__(self):
+        class_batch_size = self._batch_size // self._num_classes_in_batch
+        return min(((len(s) + class_batch_size - 1) // class_batch_size) for s in self._samplers)
+
+
+def identity(x):
+    return x
+
+
+class SortedSampler(Sampler):
+    """Samples elements sequentially, always in the same order.
+
+    Taken from https://github.com/PetrochukM/PyTorch-NLP
+
+    Args:
+        data (iterable): Iterable data.
+        sort_key (callable): Specifies a function of one argument that is used to extract a
+            numerical comparison key from each list element.
+
+    Example:
+        >>> list(SortedSampler(range(10), sort_key=lambda i: -i))
+        [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+
+    """
+
+    def __init__(self, data, sort_key: Callable = identity):
+        super().__init__(data)
+        self.data = data
+        self.sort_key = sort_key
+        zip_ = [(i, self.sort_key(row)) for i, row in enumerate(self.data)]
+        zip_ = sorted(zip_, key=lambda r: r[1])
+        self.sorted_indexes = [item[0] for item in zip_]
+
+    def __iter__(self):
+        return iter(self.sorted_indexes)
+
+    def __len__(self):
+        return len(self.data)
+
+
+class BucketBatchSampler(BatchSampler):
+    """Bucket batch sampler
+
+    Adapted from https://github.com/PetrochukM/PyTorch-NLP
+
+    Args:
+        sampler (torch.data.utils.sampler.Sampler):
+        batch_size (int): Size of mini-batch.
+        drop_last (bool): If `True` the sampler will drop the last batch if its size would be less
+            than `batch_size`.
+        data (list): List of data samples.
+        sort_key (callable, optional): Callable to specify a comparison key for sorting.
+        bucket_size_multiplier (int, optional): Buckets are of size
+            `batch_size * bucket_size_multiplier`.
+
+    Example:
+        >>> sampler = WeightedRandomSampler(weights, len(weights))
+        >>> sampler = BucketBatchSampler(sampler, data=data_items, batch_size=32, drop_last=True)
+    """
+
+    def __init__(
+        self,
+        sampler,
+        data,
+        batch_size,
+        drop_last,
+        sort_key: Union[Callable, List] = identity,
+        bucket_size_multiplier=100,
+    ):
+        super().__init__(sampler, batch_size, drop_last)
+        self.data = data
+        self.sort_key = sort_key
+        _bucket_size = batch_size * bucket_size_multiplier
+        if hasattr(sampler, "__len__"):
+            _bucket_size = min(_bucket_size, len(sampler))
+        self.bucket_sampler = BatchSampler(sampler, _bucket_size, False)
+
+    def __iter__(self):
+        for idxs in self.bucket_sampler:
+            bucket_data = [self.data[idx] for idx in idxs]
+            sorted_sampler = SortedSampler(bucket_data, self.sort_key)
+            for batch_idx in SubsetRandomSampler(list(BatchSampler(sorted_sampler, self.batch_size, self.drop_last))):
+                sorted_idxs = [idxs[i] for i in batch_idx]
+                yield sorted_idxs
+
+    def __len__(self):
+        if self.drop_last:
+            return len(self.sampler) // self.batch_size
+        return math.ceil(len(self.sampler) / self.batch_size)
diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py
new file mode 100644
index 0000000..b98647c
--- /dev/null
+++ b/TTS/utils/synthesizer.py
@@ -0,0 +1,505 @@
+import os
+import time
+from typing import List
+
+import numpy as np
+import pysbd
+import torch
+from torch import nn
+
+from TTS.config import load_config
+from TTS.tts.configs.vits_config import VitsConfig
+from TTS.tts.models import setup_model as setup_tts_model
+from TTS.tts.models.vits import Vits
+
+# pylint: disable=unused-wildcard-import
+# pylint: disable=wildcard-import
+from TTS.tts.utils.synthesis import synthesis, transfer_voice, trim_silence
+from TTS.utils.audio import AudioProcessor
+from TTS.utils.audio.numpy_transforms import save_wav
+from TTS.vc.models import setup_model as setup_vc_model
+from TTS.vocoder.models import setup_model as setup_vocoder_model
+from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input
+
+
+class Synthesizer(nn.Module):
+    def __init__(
+        self,
+        tts_checkpoint: str = "",
+        tts_config_path: str = "",
+        tts_speakers_file: str = "",
+        tts_languages_file: str = "",
+        vocoder_checkpoint: str = "",
+        vocoder_config: str = "",
+        encoder_checkpoint: str = "",
+        encoder_config: str = "",
+        vc_checkpoint: str = "",
+        vc_config: str = "",
+        model_dir: str = "",
+        voice_dir: str = None,
+        use_cuda: bool = False,
+    ) -> None:
+        """General 🐸 TTS interface for inference. It takes a tts and a vocoder
+        model and synthesize speech from the provided text.
+
+        The text is divided into a list of sentences using `pysbd` and synthesize
+        speech on each sentence separately.
+
+        If you have certain special characters in your text, you need to handle
+        them before providing the text to Synthesizer.
+
+        TODO: set the segmenter based on the source language
+
+        Args:
+            tts_checkpoint (str, optional): path to the tts model file.
+            tts_config_path (str, optional): path to the tts config file.
+            vocoder_checkpoint (str, optional): path to the vocoder model file. Defaults to None.
+            vocoder_config (str, optional): path to the vocoder config file. Defaults to None.
+            encoder_checkpoint (str, optional): path to the speaker encoder model file. Defaults to `""`,
+            encoder_config (str, optional): path to the speaker encoder config file. Defaults to `""`,
+            vc_checkpoint (str, optional): path to the voice conversion model file. Defaults to `""`,
+            vc_config (str, optional): path to the voice conversion config file. Defaults to `""`,
+            use_cuda (bool, optional): enable/disable cuda. Defaults to False.
+        """
+        super().__init__()
+        self.tts_checkpoint = tts_checkpoint
+        self.tts_config_path = tts_config_path
+        self.tts_speakers_file = tts_speakers_file
+        self.tts_languages_file = tts_languages_file
+        self.vocoder_checkpoint = vocoder_checkpoint
+        self.vocoder_config = vocoder_config
+        self.encoder_checkpoint = encoder_checkpoint
+        self.encoder_config = encoder_config
+        self.vc_checkpoint = vc_checkpoint
+        self.vc_config = vc_config
+        self.use_cuda = use_cuda
+
+        self.tts_model = None
+        self.vocoder_model = None
+        self.vc_model = None
+        self.speaker_manager = None
+        self.tts_speakers = {}
+        self.language_manager = None
+        self.num_languages = 0
+        self.tts_languages = {}
+        self.d_vector_dim = 0
+        self.seg = self._get_segmenter("en")
+        self.use_cuda = use_cuda
+        self.voice_dir = voice_dir
+        if self.use_cuda:
+            assert torch.cuda.is_available(), "CUDA is not availabe on this machine."
+
+        if tts_checkpoint:
+            self._load_tts(tts_checkpoint, tts_config_path, use_cuda)
+            self.output_sample_rate = self.tts_config.audio["sample_rate"]
+
+        if vocoder_checkpoint:
+            self._load_vocoder(vocoder_checkpoint, vocoder_config, use_cuda)
+            self.output_sample_rate = self.vocoder_config.audio["sample_rate"]
+
+        if vc_checkpoint:
+            self._load_vc(vc_checkpoint, vc_config, use_cuda)
+            self.output_sample_rate = self.vc_config.audio["output_sample_rate"]
+
+        if model_dir:
+            if "fairseq" in model_dir:
+                self._load_fairseq_from_dir(model_dir, use_cuda)
+                self.output_sample_rate = self.tts_config.audio["sample_rate"]
+            else:
+                self._load_tts_from_dir(model_dir, use_cuda)
+                self.output_sample_rate = self.tts_config.audio["output_sample_rate"]
+
+    @staticmethod
+    def _get_segmenter(lang: str):
+        """get the sentence segmenter for the given language.
+
+        Args:
+            lang (str): target language code.
+
+        Returns:
+            [type]: [description]
+        """
+        return pysbd.Segmenter(language=lang, clean=True)
+
+    def _load_vc(self, vc_checkpoint: str, vc_config_path: str, use_cuda: bool) -> None:
+        """Load the voice conversion model.
+
+        1. Load the model config.
+        2. Init the model from the config.
+        3. Load the model weights.
+        4. Move the model to the GPU if CUDA is enabled.
+
+        Args:
+            vc_checkpoint (str): path to the model checkpoint.
+            tts_config_path (str): path to the model config file.
+            use_cuda (bool): enable/disable CUDA use.
+        """
+        # pylint: disable=global-statement
+        self.vc_config = load_config(vc_config_path)
+        self.vc_model = setup_vc_model(config=self.vc_config)
+        self.vc_model.load_checkpoint(self.vc_config, vc_checkpoint)
+        if use_cuda:
+            self.vc_model.cuda()
+
+    def _load_fairseq_from_dir(self, model_dir: str, use_cuda: bool) -> None:
+        """Load the fairseq model from a directory.
+
+        We assume it is VITS and the model knows how to load itself from the directory and there is a config.json file in the directory.
+        """
+        self.tts_config = VitsConfig()
+        self.tts_model = Vits.init_from_config(self.tts_config)
+        self.tts_model.load_fairseq_checkpoint(self.tts_config, checkpoint_dir=model_dir, eval=True)
+        self.tts_config = self.tts_model.config
+        if use_cuda:
+            self.tts_model.cuda()
+
+    def _load_tts_from_dir(self, model_dir: str, use_cuda: bool) -> None:
+        """Load the TTS model from a directory.
+
+        We assume the model knows how to load itself from the directory and there is a config.json file in the directory.
+        """
+        config = load_config(os.path.join(model_dir, "config.json"))
+        self.tts_config = config
+        self.tts_model = setup_tts_model(config)
+        self.tts_model.load_checkpoint(config, checkpoint_dir=model_dir, eval=True)
+        if use_cuda:
+            self.tts_model.cuda()
+
+    def _load_tts(self, tts_checkpoint: str, tts_config_path: str, use_cuda: bool) -> None:
+        """Load the TTS model.
+
+        1. Load the model config.
+        2. Init the model from the config.
+        3. Load the model weights.
+        4. Move the model to the GPU if CUDA is enabled.
+        5. Init the speaker manager in the model.
+
+        Args:
+            tts_checkpoint (str): path to the model checkpoint.
+            tts_config_path (str): path to the model config file.
+            use_cuda (bool): enable/disable CUDA use.
+        """
+        # pylint: disable=global-statement
+        self.tts_config = load_config(tts_config_path)
+        if self.tts_config["use_phonemes"] and self.tts_config["phonemizer"] is None:
+            raise ValueError("Phonemizer is not defined in the TTS config.")
+
+        self.tts_model = setup_tts_model(config=self.tts_config)
+
+        if not self.encoder_checkpoint:
+            self._set_speaker_encoder_paths_from_tts_config()
+
+        self.tts_model.load_checkpoint(self.tts_config, tts_checkpoint, eval=True)
+        if use_cuda:
+            self.tts_model.cuda()
+
+        if self.encoder_checkpoint and hasattr(self.tts_model, "speaker_manager"):
+            self.tts_model.speaker_manager.init_encoder(self.encoder_checkpoint, self.encoder_config, use_cuda)
+
+    def _set_speaker_encoder_paths_from_tts_config(self):
+        """Set the encoder paths from the tts model config for models with speaker encoders."""
+        if hasattr(self.tts_config, "model_args") and hasattr(
+            self.tts_config.model_args, "speaker_encoder_config_path"
+        ):
+            self.encoder_checkpoint = self.tts_config.model_args.speaker_encoder_model_path
+            self.encoder_config = self.tts_config.model_args.speaker_encoder_config_path
+
+    def _load_vocoder(self, model_file: str, model_config: str, use_cuda: bool) -> None:
+        """Load the vocoder model.
+
+        1. Load the vocoder config.
+        2. Init the AudioProcessor for the vocoder.
+        3. Init the vocoder model from the config.
+        4. Move the model to the GPU if CUDA is enabled.
+
+        Args:
+            model_file (str): path to the model checkpoint.
+            model_config (str): path to the model config file.
+            use_cuda (bool): enable/disable CUDA use.
+        """
+        self.vocoder_config = load_config(model_config)
+        self.vocoder_ap = AudioProcessor(verbose=False, **self.vocoder_config.audio)
+        self.vocoder_model = setup_vocoder_model(self.vocoder_config)
+        self.vocoder_model.load_checkpoint(self.vocoder_config, model_file, eval=True)
+        if use_cuda:
+            self.vocoder_model.cuda()
+
+    def split_into_sentences(self, text) -> List[str]:
+        """Split give text into sentences.
+
+        Args:
+            text (str): input text in string format.
+
+        Returns:
+            List[str]: list of sentences.
+        """
+        return self.seg.segment(text)
+
+    def save_wav(self, wav: List[int], path: str, pipe_out=None) -> None:
+        """Save the waveform as a file.
+
+        Args:
+            wav (List[int]): waveform as a list of values.
+            path (str): output path to save the waveform.
+            pipe_out (BytesIO, optional): Flag to stdout the generated TTS wav file for shell pipe.
+        """
+        # if tensor convert to numpy
+        if torch.is_tensor(wav):
+            wav = wav.cpu().numpy()
+        if isinstance(wav, list):
+            wav = np.array(wav)
+        save_wav(wav=wav, path=path, sample_rate=self.output_sample_rate, pipe_out=pipe_out)
+
+    def voice_conversion(self, source_wav: str, target_wav: str) -> List[int]:
+        output_wav = self.vc_model.voice_conversion(source_wav, target_wav)
+        return output_wav
+
+    def tts(
+        self,
+        text: str = "",
+        speaker_name: str = "",
+        language_name: str = "",
+        speaker_wav=None,
+        style_wav=None,
+        style_text=None,
+        reference_wav=None,
+        reference_speaker_name=None,
+        split_sentences: bool = True,
+        **kwargs,
+    ) -> List[int]:
+        """🐸 TTS magic. Run all the models and generate speech.
+
+        Args:
+            text (str): input text.
+            speaker_name (str, optional): speaker id for multi-speaker models. Defaults to "".
+            language_name (str, optional): language id for multi-language models. Defaults to "".
+            speaker_wav (Union[str, List[str]], optional): path to the speaker wav for voice cloning. Defaults to None.
+            style_wav ([type], optional): style waveform for GST. Defaults to None.
+            style_text ([type], optional): transcription of style_wav for Capacitron. Defaults to None.
+            reference_wav ([type], optional): reference waveform for voice conversion. Defaults to None.
+            reference_speaker_name ([type], optional): speaker id of reference waveform. Defaults to None.
+            split_sentences (bool, optional): split the input text into sentences. Defaults to True.
+            **kwargs: additional arguments to pass to the TTS model.
+        Returns:
+            List[int]: [description]
+        """
+        start_time = time.time()
+        wavs = []
+
+        if not text and not reference_wav:
+            raise ValueError(
+                "You need to define either `text` (for sythesis) or a `reference_wav` (for voice conversion) to use the Coqui TTS API."
+            )
+
+        if text:
+            sens = [text]
+            if split_sentences:
+                print(" > Text splitted to sentences.")
+                sens = self.split_into_sentences(text)
+            print(sens)
+
+        # handle multi-speaker
+        if "voice_dir" in kwargs:
+            self.voice_dir = kwargs["voice_dir"]
+            kwargs.pop("voice_dir")
+        speaker_embedding = None
+        speaker_id = None
+        if self.tts_speakers_file or hasattr(self.tts_model.speaker_manager, "name_to_id"):
+            if speaker_name and isinstance(speaker_name, str) and not self.tts_config.model == "xtts":
+                if self.tts_config.use_d_vector_file:
+                    # get the average speaker embedding from the saved d_vectors.
+                    speaker_embedding = self.tts_model.speaker_manager.get_mean_embedding(
+                        speaker_name, num_samples=None, randomize=False
+                    )
+                    speaker_embedding = np.array(speaker_embedding)[None, :]  # [1 x embedding_dim]
+                else:
+                    # get speaker idx from the speaker name
+                    speaker_id = self.tts_model.speaker_manager.name_to_id[speaker_name]
+            # handle Neon models with single speaker.
+            elif len(self.tts_model.speaker_manager.name_to_id) == 1:
+                speaker_id = list(self.tts_model.speaker_manager.name_to_id.values())[0]
+            elif not speaker_name and not speaker_wav:
+                raise ValueError(
+                    " [!] Looks like you are using a multi-speaker model. "
+                    "You need to define either a `speaker_idx` or a `speaker_wav` to use a multi-speaker model."
+                )
+            else:
+                speaker_embedding = None
+        else:
+            if speaker_name and self.voice_dir is None:
+                raise ValueError(
+                    f" [!] Missing speakers.json file path for selecting speaker {speaker_name}."
+                    "Define path for speaker.json if it is a multi-speaker model or remove defined speaker idx. "
+                )
+
+        # handle multi-lingual
+        language_id = None
+        if self.tts_languages_file or (
+            hasattr(self.tts_model, "language_manager") 
+            and self.tts_model.language_manager is not None
+            and not self.tts_config.model == "xtts"
+        ):
+            if len(self.tts_model.language_manager.name_to_id) == 1:
+                language_id = list(self.tts_model.language_manager.name_to_id.values())[0]
+
+            elif language_name and isinstance(language_name, str):
+                try:
+                    language_id = self.tts_model.language_manager.name_to_id[language_name]
+                except KeyError as e:
+                    raise ValueError(
+                        f" [!] Looks like you use a multi-lingual model. "
+                        f"Language {language_name} is not in the available languages: "
+                        f"{self.tts_model.language_manager.name_to_id.keys()}."
+                    ) from e
+
+            elif not language_name:
+                raise ValueError(
+                    " [!] Look like you use a multi-lingual model. "
+                    "You need to define either a `language_name` or a `style_wav` to use a multi-lingual model."
+                )
+
+            else:
+                raise ValueError(
+                    f" [!] Missing language_ids.json file path for selecting language {language_name}."
+                    "Define path for language_ids.json if it is a multi-lingual model or remove defined language idx. "
+                )
+
+        # compute a new d_vector from the given clip.
+        if (
+            speaker_wav is not None
+            and self.tts_model.speaker_manager is not None
+            and hasattr(self.tts_model.speaker_manager, "encoder_ap")
+            and self.tts_model.speaker_manager.encoder_ap is not None
+        ):
+            speaker_embedding = self.tts_model.speaker_manager.compute_embedding_from_clip(speaker_wav)
+
+        vocoder_device = "cpu"
+        use_gl = self.vocoder_model is None
+        if not use_gl:
+            vocoder_device = next(self.vocoder_model.parameters()).device
+        if self.use_cuda:
+            vocoder_device = "cuda"
+
+        if not reference_wav:  # not voice conversion
+            for sen in sens:
+                if hasattr(self.tts_model, "synthesize"):
+                    outputs = self.tts_model.synthesize(
+                        text=sen,
+                        config=self.tts_config,
+                        speaker_id=speaker_name,
+                        voice_dirs=self.voice_dir,
+                        d_vector=speaker_embedding,
+                        speaker_wav=speaker_wav,
+                        language=language_name,
+                        **kwargs,
+                    )
+                else:
+                    # synthesize voice
+                    outputs = synthesis(
+                        model=self.tts_model,
+                        text=sen,
+                        CONFIG=self.tts_config,
+                        use_cuda=self.use_cuda,
+                        speaker_id=speaker_id,
+                        style_wav=style_wav,
+                        style_text=style_text,
+                        use_griffin_lim=use_gl,
+                        d_vector=speaker_embedding,
+                        language_id=language_id,
+                    )
+                waveform = outputs["wav"]
+                if not use_gl:
+                    mel_postnet_spec = outputs["outputs"]["model_outputs"][0].detach().cpu().numpy()
+                    # denormalize tts output based on tts audio config
+                    mel_postnet_spec = self.tts_model.ap.denormalize(mel_postnet_spec.T).T
+                    # renormalize spectrogram based on vocoder config
+                    vocoder_input = self.vocoder_ap.normalize(mel_postnet_spec.T)
+                    # compute scale factor for possible sample rate mismatch
+                    scale_factor = [
+                        1,
+                        self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate,
+                    ]
+                    if scale_factor[1] != 1:
+                        print(" > interpolating tts model output.")
+                        vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
+                    else:
+                        vocoder_input = torch.tensor(vocoder_input).unsqueeze(0)  # pylint: disable=not-callable
+                    # run vocoder model
+                    # [1, T, C]
+                    waveform = self.vocoder_model.inference(vocoder_input.to(vocoder_device))
+                if torch.is_tensor(waveform) and waveform.device != torch.device("cpu") and not use_gl:
+                    waveform = waveform.cpu()
+                if not use_gl:
+                    waveform = waveform.numpy()
+                waveform = waveform.squeeze()
+
+                # trim silence
+                if "do_trim_silence" in self.tts_config.audio and self.tts_config.audio["do_trim_silence"]:
+                    waveform = trim_silence(waveform, self.tts_model.ap)
+
+                wavs += list(waveform)
+                wavs += [0] * 10000
+        else:
+            # get the speaker embedding or speaker id for the reference wav file
+            reference_speaker_embedding = None
+            reference_speaker_id = None
+            if self.tts_speakers_file or hasattr(self.tts_model.speaker_manager, "name_to_id"):
+                if reference_speaker_name and isinstance(reference_speaker_name, str):
+                    if self.tts_config.use_d_vector_file:
+                        # get the speaker embedding from the saved d_vectors.
+                        reference_speaker_embedding = self.tts_model.speaker_manager.get_embeddings_by_name(
+                            reference_speaker_name
+                        )[0]
+                        reference_speaker_embedding = np.array(reference_speaker_embedding)[
+                            None, :
+                        ]  # [1 x embedding_dim]
+                    else:
+                        # get speaker idx from the speaker name
+                        reference_speaker_id = self.tts_model.speaker_manager.name_to_id[reference_speaker_name]
+                else:
+                    reference_speaker_embedding = self.tts_model.speaker_manager.compute_embedding_from_clip(
+                        reference_wav
+                    )
+            outputs = transfer_voice(
+                model=self.tts_model,
+                CONFIG=self.tts_config,
+                use_cuda=self.use_cuda,
+                reference_wav=reference_wav,
+                speaker_id=speaker_id,
+                d_vector=speaker_embedding,
+                use_griffin_lim=use_gl,
+                reference_speaker_id=reference_speaker_id,
+                reference_d_vector=reference_speaker_embedding,
+            )
+            waveform = outputs
+            if not use_gl:
+                mel_postnet_spec = outputs[0].detach().cpu().numpy()
+                # denormalize tts output based on tts audio config
+                mel_postnet_spec = self.tts_model.ap.denormalize(mel_postnet_spec.T).T
+                # renormalize spectrogram based on vocoder config
+                vocoder_input = self.vocoder_ap.normalize(mel_postnet_spec.T)
+                # compute scale factor for possible sample rate mismatch
+                scale_factor = [
+                    1,
+                    self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate,
+                ]
+                if scale_factor[1] != 1:
+                    print(" > interpolating tts model output.")
+                    vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
+                else:
+                    vocoder_input = torch.tensor(vocoder_input).unsqueeze(0)  # pylint: disable=not-callable
+                # run vocoder model
+                # [1, T, C]
+                waveform = self.vocoder_model.inference(vocoder_input.to(vocoder_device))
+            if torch.is_tensor(waveform) and waveform.device != torch.device("cpu"):
+                waveform = waveform.cpu()
+            if not use_gl:
+                waveform = waveform.numpy()
+            wavs = waveform.squeeze()
+
+        # compute stats
+        process_time = time.time() - start_time
+        audio_time = len(wavs) / self.tts_config.audio["sample_rate"]
+        print(f" > Processing time: {process_time}")
+        print(f" > Real-time factor: {process_time / audio_time}")
+        return wavs
diff --git a/TTS/utils/training.py b/TTS/utils/training.py
new file mode 100644
index 0000000..b51f55e
--- /dev/null
+++ b/TTS/utils/training.py
@@ -0,0 +1,44 @@
+import numpy as np
+import torch
+
+
+def check_update(model, grad_clip, ignore_stopnet=False, amp_opt_params=None):
+    r"""Check model gradient against unexpected jumps and failures"""
+    skip_flag = False
+    if ignore_stopnet:
+        if not amp_opt_params:
+            grad_norm = torch.nn.utils.clip_grad_norm_(
+                [param for name, param in model.named_parameters() if "stopnet" not in name], grad_clip
+            )
+        else:
+            grad_norm = torch.nn.utils.clip_grad_norm_(amp_opt_params, grad_clip)
+    else:
+        if not amp_opt_params:
+            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
+        else:
+            grad_norm = torch.nn.utils.clip_grad_norm_(amp_opt_params, grad_clip)
+
+    # compatibility with different torch versions
+    if isinstance(grad_norm, float):
+        if np.isinf(grad_norm):
+            print(" | > Gradient is INF !!")
+            skip_flag = True
+    else:
+        if torch.isinf(grad_norm):
+            print(" | > Gradient is INF !!")
+            skip_flag = True
+    return grad_norm, skip_flag
+
+
+def gradual_training_scheduler(global_step, config):
+    """Setup the gradual training schedule wrt number
+    of active GPUs"""
+    num_gpus = torch.cuda.device_count()
+    if num_gpus == 0:
+        num_gpus = 1
+    new_values = None
+    # we set the scheduling wrt num_gpus
+    for values in config.gradual_training:
+        if global_step * num_gpus >= values[0]:
+            new_values = values
+    return new_values[1], new_values[2]
diff --git a/TTS/utils/vad.py b/TTS/utils/vad.py
new file mode 100644
index 0000000..aefce2b
--- /dev/null
+++ b/TTS/utils/vad.py
@@ -0,0 +1,88 @@
+import torch
+import torchaudio
+
+
+def read_audio(path):
+    wav, sr = torchaudio.load(path)
+
+    if wav.size(0) > 1:
+        wav = wav.mean(dim=0, keepdim=True)
+
+    return wav.squeeze(0), sr
+
+
+def resample_wav(wav, sr, new_sr):
+    wav = wav.unsqueeze(0)
+    transform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=new_sr)
+    wav = transform(wav)
+    return wav.squeeze(0)
+
+
+def map_timestamps_to_new_sr(vad_sr, new_sr, timestamps, just_begging_end=False):
+    factor = new_sr / vad_sr
+    new_timestamps = []
+    if just_begging_end and timestamps:
+        # get just the start and end timestamps
+        new_dict = {"start": int(timestamps[0]["start"] * factor), "end": int(timestamps[-1]["end"] * factor)}
+        new_timestamps.append(new_dict)
+    else:
+        for ts in timestamps:
+            # map to the new SR
+            new_dict = {"start": int(ts["start"] * factor), "end": int(ts["end"] * factor)}
+            new_timestamps.append(new_dict)
+
+    return new_timestamps
+
+
+def get_vad_model_and_utils(use_cuda=False, use_onnx=False):
+    model, utils = torch.hub.load(
+        repo_or_dir="snakers4/silero-vad", model="silero_vad", force_reload=True, onnx=use_onnx, force_onnx_cpu=True
+    )
+    if use_cuda:
+        model = model.cuda()
+
+    get_speech_timestamps, save_audio, _, _, collect_chunks = utils
+    return model, get_speech_timestamps, save_audio, collect_chunks
+
+
+def remove_silence(
+    model_and_utils, audio_path, out_path, vad_sample_rate=8000, trim_just_beginning_and_end=True, use_cuda=False
+):
+    # get the VAD model and utils functions
+    model, get_speech_timestamps, _, collect_chunks = model_and_utils
+
+    # read ground truth wav and resample the audio for the VAD
+    try:
+        wav, gt_sample_rate = read_audio(audio_path)
+    except:
+        print(f"> ❗ Failed to read {audio_path}")
+        return None, False
+
+    # if needed, resample the audio for the VAD model
+    if gt_sample_rate != vad_sample_rate:
+        wav_vad = resample_wav(wav, gt_sample_rate, vad_sample_rate)
+    else:
+        wav_vad = wav
+
+    if use_cuda:
+        wav_vad = wav_vad.cuda()
+
+    # get speech timestamps from full audio file
+    speech_timestamps = get_speech_timestamps(wav_vad, model, sampling_rate=vad_sample_rate, window_size_samples=768)
+
+    # map the current speech_timestamps to the sample rate of the ground truth audio
+    new_speech_timestamps = map_timestamps_to_new_sr(
+        vad_sample_rate, gt_sample_rate, speech_timestamps, trim_just_beginning_and_end
+    )
+
+    # if have speech timestamps else save the wav
+    if new_speech_timestamps:
+        wav = collect_chunks(new_speech_timestamps, wav)
+        is_speech = True
+    else:
+        print(f"> The file {audio_path} probably does not have speech please check it !!")
+        is_speech = False
+
+    # save
+    torchaudio.save(out_path, wav[None, :], gt_sample_rate)
+    return out_path, is_speech