From 69cd493d03a93339fd72c461809de361ac9045d1 Mon Sep 17 00:00:00 2001 From: Sam Khoze <68170403+SamKhoze@users.noreply.github.com> Date: Tue, 18 Jun 2024 19:43:44 -0700 Subject: [PATCH] Add files via upload --- TTS/tts/utils/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 175 bytes .../utils/__pycache__/data.cpython-311.pyc | Bin 0 -> 7294 bytes .../utils/__pycache__/fairseq.cpython-311.pyc | Bin 0 -> 3351 bytes .../utils/__pycache__/helpers.cpython-311.pyc | Bin 0 -> 15037 bytes .../__pycache__/languages.cpython-311.pyc | Bin 0 -> 8666 bytes .../__pycache__/managers.cpython-311.pyc | Bin 0 -> 22464 bytes .../__pycache__/speakers.cpython-311.pyc | Bin 0 -> 13431 bytes .../utils/__pycache__/ssim.cpython-311.pyc | Bin 0 -> 20493 bytes .../__pycache__/synthesis.cpython-311.pyc | Bin 0 -> 12714 bytes .../utils/__pycache__/visual.cpython-311.pyc | Bin 0 -> 10594 bytes TTS/tts/utils/assets/tortoise/tokenizer.json | 1 + TTS/tts/utils/data.py | 79 +++ TTS/tts/utils/fairseq.py | 48 ++ TTS/tts/utils/helpers.py | 258 +++++++++ TTS/tts/utils/languages.py | 125 +++++ TTS/tts/utils/managers.py | 383 +++++++++++++ TTS/tts/utils/measures.py | 15 + TTS/tts/utils/monotonic_align/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 191 bytes TTS/tts/utils/monotonic_align/core.pyx | 47 ++ TTS/tts/utils/monotonic_align/setup.py | 7 + TTS/tts/utils/speakers.py | 222 ++++++++ TTS/tts/utils/ssim.py | 383 +++++++++++++ TTS/tts/utils/synthesis.py | 343 ++++++++++++ TTS/tts/utils/text/__init__.py | 1 + .../text/__pycache__/__init__.cpython-311.pyc | Bin 0 -> 259 bytes .../__pycache__/characters.cpython-311.pyc | Bin 0 -> 25065 bytes .../text/__pycache__/cleaners.cpython-311.pyc | Bin 0 -> 7133 bytes .../__pycache__/punctuation.cpython-311.pyc | Bin 0 -> 8053 bytes .../__pycache__/tokenizer.cpython-311.pyc | Bin 0 -> 11351 bytes TTS/tts/utils/text/bangla/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 187 bytes .../__pycache__/phonemizer.cpython-311.pyc | Bin 0 -> 5071 bytes TTS/tts/utils/text/bangla/phonemizer.py | 121 +++++ TTS/tts/utils/text/belarusian/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 191 bytes .../__pycache__/phonemizer.cpython-311.pyc | Bin 0 -> 1751 bytes TTS/tts/utils/text/belarusian/phonemizer.py | 37 ++ TTS/tts/utils/text/characters.py | 501 ++++++++++++++++++ .../utils/text/chinese_mandarin/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 197 bytes .../__pycache__/numbers.cpython-311.pyc | Bin 0 -> 6950 bytes .../__pycache__/phonemizer.cpython-311.pyc | Bin 0 -> 2183 bytes .../pinyinToPhonemes.cpython-311.pyc | Bin 0 -> 16530 bytes .../utils/text/chinese_mandarin/numbers.py | 127 +++++ .../utils/text/chinese_mandarin/phonemizer.py | 37 ++ .../text/chinese_mandarin/pinyinToPhonemes.py | 419 +++++++++++++++ TTS/tts/utils/text/cleaners.py | 171 ++++++ TTS/tts/utils/text/cmudict.py | 151 ++++++ TTS/tts/utils/text/english/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 188 bytes .../__pycache__/abbreviations.cpython-311.pyc | Bin 0 -> 875 bytes .../__pycache__/number_norm.cpython-311.pyc | Bin 0 -> 5298 bytes .../__pycache__/time_norm.cpython-311.pyc | Bin 0 -> 2494 bytes TTS/tts/utils/text/english/abbreviations.py | 26 + TTS/tts/utils/text/english/number_norm.py | 97 ++++ TTS/tts/utils/text/english/time_norm.py | 47 ++ TTS/tts/utils/text/french/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 187 bytes .../__pycache__/abbreviations.cpython-311.pyc | Bin 0 -> 1524 bytes TTS/tts/utils/text/french/abbreviations.py | 48 ++ TTS/tts/utils/text/japanese/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 189 bytes .../__pycache__/phonemizer.cpython-311.pyc | Bin 0 -> 14421 bytes TTS/tts/utils/text/japanese/phonemizer.py | 470 ++++++++++++++++ TTS/tts/utils/text/korean/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 187 bytes .../__pycache__/ko_dictionary.cpython-311.pyc | Bin 0 -> 1519 bytes .../korean/__pycache__/korean.cpython-311.pyc | Bin 0 -> 2615 bytes .../__pycache__/phonemizer.cpython-311.pyc | Bin 0 -> 1381 bytes TTS/tts/utils/text/korean/ko_dictionary.py | 44 ++ TTS/tts/utils/text/korean/korean.py | 32 ++ TTS/tts/utils/text/korean/phonemizer.py | 36 ++ TTS/tts/utils/text/phonemizers/__init__.py | 79 +++ .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 3954 bytes .../bangla_phonemizer.cpython-311.pyc | Bin 0 -> 3956 bytes .../__pycache__/base.cpython-311.pyc | Bin 0 -> 6894 bytes .../belarusian_phonemizer.cpython-311.pyc | Bin 0 -> 3439 bytes .../espeak_wrapper.cpython-311.pyc | Bin 0 -> 12262 bytes .../__pycache__/gruut_wrapper.cpython-311.pyc | Bin 0 -> 7261 bytes .../ja_jp_phonemizer.cpython-311.pyc | Bin 0 -> 3522 bytes .../ko_kr_phonemizer.cpython-311.pyc | Bin 0 -> 4597 bytes .../multi_phonemizer.cpython-311.pyc | Bin 0 -> 3723 bytes .../zh_cn_phonemizer.cpython-311.pyc | Bin 0 -> 3091 bytes .../text/phonemizers/bangla_phonemizer.py | 62 +++ TTS/tts/utils/text/phonemizers/base.py | 140 +++++ .../text/phonemizers/belarusian_phonemizer.py | 55 ++ .../utils/text/phonemizers/espeak_wrapper.py | 264 +++++++++ .../utils/text/phonemizers/gruut_wrapper.py | 151 ++++++ .../text/phonemizers/ja_jp_phonemizer.py | 72 +++ .../text/phonemizers/ko_kr_phonemizer.py | 65 +++ .../text/phonemizers/multi_phonemizer.py | 65 +++ .../text/phonemizers/zh_cn_phonemizer.py | 62 +++ TTS/tts/utils/text/punctuation.py | 171 ++++++ TTS/tts/utils/text/tokenizer.py | 216 ++++++++ TTS/tts/utils/visual.py | 238 +++++++++ 97 files changed, 5916 insertions(+) create mode 100644 TTS/tts/utils/__init__.py create mode 100644 TTS/tts/utils/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/__pycache__/data.cpython-311.pyc create mode 100644 TTS/tts/utils/__pycache__/fairseq.cpython-311.pyc create mode 100644 TTS/tts/utils/__pycache__/helpers.cpython-311.pyc create mode 100644 TTS/tts/utils/__pycache__/languages.cpython-311.pyc create mode 100644 TTS/tts/utils/__pycache__/managers.cpython-311.pyc create mode 100644 TTS/tts/utils/__pycache__/speakers.cpython-311.pyc create mode 100644 TTS/tts/utils/__pycache__/ssim.cpython-311.pyc create mode 100644 TTS/tts/utils/__pycache__/synthesis.cpython-311.pyc create mode 100644 TTS/tts/utils/__pycache__/visual.cpython-311.pyc create mode 100644 TTS/tts/utils/assets/tortoise/tokenizer.json create mode 100644 TTS/tts/utils/data.py create mode 100644 TTS/tts/utils/fairseq.py create mode 100644 TTS/tts/utils/helpers.py create mode 100644 TTS/tts/utils/languages.py create mode 100644 TTS/tts/utils/managers.py create mode 100644 TTS/tts/utils/measures.py create mode 100644 TTS/tts/utils/monotonic_align/__init__.py create mode 100644 TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/monotonic_align/core.pyx create mode 100644 TTS/tts/utils/monotonic_align/setup.py create mode 100644 TTS/tts/utils/speakers.py create mode 100644 TTS/tts/utils/ssim.py create mode 100644 TTS/tts/utils/synthesis.py create mode 100644 TTS/tts/utils/text/__init__.py create mode 100644 TTS/tts/utils/text/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/text/__pycache__/characters.cpython-311.pyc create mode 100644 TTS/tts/utils/text/__pycache__/cleaners.cpython-311.pyc create mode 100644 TTS/tts/utils/text/__pycache__/punctuation.cpython-311.pyc create mode 100644 TTS/tts/utils/text/__pycache__/tokenizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/bangla/__init__.py create mode 100644 TTS/tts/utils/text/bangla/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/text/bangla/__pycache__/phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/bangla/phonemizer.py create mode 100644 TTS/tts/utils/text/belarusian/__init__.py create mode 100644 TTS/tts/utils/text/belarusian/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/text/belarusian/__pycache__/phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/belarusian/phonemizer.py create mode 100644 TTS/tts/utils/text/characters.py create mode 100644 TTS/tts/utils/text/chinese_mandarin/__init__.py create mode 100644 TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-311.pyc create mode 100644 TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-311.pyc create mode 100644 TTS/tts/utils/text/chinese_mandarin/numbers.py create mode 100644 TTS/tts/utils/text/chinese_mandarin/phonemizer.py create mode 100644 TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py create mode 100644 TTS/tts/utils/text/cleaners.py create mode 100644 TTS/tts/utils/text/cmudict.py create mode 100644 TTS/tts/utils/text/english/__init__.py create mode 100644 TTS/tts/utils/text/english/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-311.pyc create mode 100644 TTS/tts/utils/text/english/__pycache__/number_norm.cpython-311.pyc create mode 100644 TTS/tts/utils/text/english/__pycache__/time_norm.cpython-311.pyc create mode 100644 TTS/tts/utils/text/english/abbreviations.py create mode 100644 TTS/tts/utils/text/english/number_norm.py create mode 100644 TTS/tts/utils/text/english/time_norm.py create mode 100644 TTS/tts/utils/text/french/__init__.py create mode 100644 TTS/tts/utils/text/french/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-311.pyc create mode 100644 TTS/tts/utils/text/french/abbreviations.py create mode 100644 TTS/tts/utils/text/japanese/__init__.py create mode 100644 TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/japanese/phonemizer.py create mode 100644 TTS/tts/utils/text/korean/__init__.py create mode 100644 TTS/tts/utils/text/korean/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-311.pyc create mode 100644 TTS/tts/utils/text/korean/__pycache__/korean.cpython-311.pyc create mode 100644 TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/korean/ko_dictionary.py create mode 100644 TTS/tts/utils/text/korean/korean.py create mode 100644 TTS/tts/utils/text/korean/phonemizer.py create mode 100644 TTS/tts/utils/text/phonemizers/__init__.py create mode 100644 TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-311.pyc create mode 100644 TTS/tts/utils/text/phonemizers/__pycache__/bangla_phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-311.pyc create mode 100644 TTS/tts/utils/text/phonemizers/__pycache__/belarusian_phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-311.pyc create mode 100644 TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-311.pyc create mode 100644 TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/phonemizers/__pycache__/multi_phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-311.pyc create mode 100644 TTS/tts/utils/text/phonemizers/bangla_phonemizer.py create mode 100644 TTS/tts/utils/text/phonemizers/base.py create mode 100644 TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py create mode 100644 TTS/tts/utils/text/phonemizers/espeak_wrapper.py create mode 100644 TTS/tts/utils/text/phonemizers/gruut_wrapper.py create mode 100644 TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py create mode 100644 TTS/tts/utils/text/phonemizers/ko_kr_phonemizer.py create mode 100644 TTS/tts/utils/text/phonemizers/multi_phonemizer.py create mode 100644 TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py create mode 100644 TTS/tts/utils/text/punctuation.py create mode 100644 TTS/tts/utils/text/tokenizer.py create mode 100644 TTS/tts/utils/visual.py diff --git a/TTS/tts/utils/__init__.py b/TTS/tts/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/tts/utils/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df536e90de00b7e36772c442ee20332160ccf862 GIT binary patch literal 175 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%Sk`9IJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`+sq*%YSBr~U2KR!M) tFS8^*Uaz3?7l%!5eoARhs$CH)&`6M_#r#0x12ZEd;|B&9QN#=s0|0gFDrx`# literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/__pycache__/data.cpython-311.pyc b/TTS/tts/utils/__pycache__/data.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2f7223944bf4e3b1fcd11fcf5de39fe5c346b2b7 GIT binary patch literal 7294 zcmdT}TTB~Q8lJJo#&`??W5|V&n5GSZ5L`l+wA>mRlC)`{O$lw9&~+xv*q9jGoEhhW zjAX4MaZ$9QQlk2ymHg1n5~VAlYFEp{KICy9XQEjZjTEWUs@lyyvC8hM>eK%J8GG<> zY120Qu!r$KXU?2+=D(fq{Qu#%6%}3vN;EPNiSrEeS9~azvO?wQG*mueL`G!e%tiCg zUUblB=S8RJxW!!LL?=*}$N_bWE}&&%l~^V@COpOIM2}Q1mY23f+@klE>!MfWp+7JB zfcnI0u~MptIK&Pg6jEH+II6VJ4t%7SVB)_Ld!I zm|w99_AHbMYnBX*%CIoXdMmQKi`|%M&?pQ`uL<$AB;V%@E+NJy4M!qnI8%aXxa4smCEa%#>@B{>;Rz{lj!`SZge zMUg{kB^H-MqM!(EshdVQ&KAZeQUg<>zU1RTvdk*)%MJj!-MM&fp?i-0J?1Omv2jtz z)#V#MZ(3^nyhYpHs@5J^KBK;SUgbwLenjU-RBq%c%)yMI;KhQ9p{BS7m09)^Mk#qK zvbUHFGr-(~cMpmqn0wIUfz@ka8w1U;5f+@w^F!moAkLvpEOo%wp1uO)ZH5I8$uJ+) z%&?`bWmr2K?=zpUEOR>`GB6=K)XW-AunZX0(e^Ug0Tan+^I(wp;r3m5PgF`sH&Uc; zdVh*YDS=2-+igAZ#x^f08KN(e=qBDxfVaFEE zjxA9K^l<^Yxb5gN1BM0vD;xS7ut3Wbv3YLAV)b1~!(fI3q(m*qU)7B;!_; zU^9*dZfj7V;{Y?9&x9GA9W#(|iW#`XEX=5$fjy#RUt!j*SilQE-J=}THbeJWf-XvS z0V#zGvIiQb8;kyI+GY!=z-~`Vp`uOox8)lb+wUC9wcqK2hYc4s0(Fk(hZkfoZ~>5k zu5;f88MvivgbcC|MSEde(JZu}%liOyrfAWt2Jl+0zI!QOoj2U;1@D?{1St5rL5&05ug>|3iHJu6dR_Ad9%MRU9JA8Na?k6Iae7_}g#AP0&P z0ip$Zb)bBo&iAQYpD71jwg{G_3W(t*l9DEgG4!}%!Tp&ooj6o*w<_^D_L*WCTq32h zjAPwXzdx3l{`Or#3_?JW3PP|Dm6V`Bh;Xyb>qUL`lc?N9Yxr~{cqox-ONg{bb60S9 z^C;|w#f)l#hhhl@I-aF5PEd#_5($DEiLpeq&6*bd3PL5?xiy$fDY0Zih{G`840{G? z9Mu*akRn1luE;?p8Eh{w_#Rbe36JXxn&YJ{g+mriJf-kz2UXZ8*UeQ{;I0^&nIKz5 zK^C{anOF!wr!ZpxfZ>!6FEhm}v{pZO0i7Bf+@qe*~uJ9pnfQTfAk0 zXVdrdX(1>^v+|2TD1yGB2Jl)I{!;r=URc_x)wb)k?Pep;bqFOP9Jc`=Yqt0%>4J{5 za{(p4!4T_tpk#qpU~8olh!%K52g;w&`4cL4Vza+na5w-Q+2e=_D{sIAPGljBD$oqn zqYyQS&h1k+GkSUnW~CuivxCN>#0L#f$DPL9PSCfC@stk%ASDsH{g57ba}0hmDhi@d znmHYWuxTpSneWd}%7v+*PrNYDaK#j9Qnux^ z7}L=`;6L#7S~zP%;jC_G5pVaoWaVp!sa$xYBVM zXNJSQ2SPfC`0Za19T>_jc;~!kz@}hMP$(HLQl!MDv!KA6b^61#q2rm{?#V)4u{>t{9j5{s{Q$duVR zK^|txl-l(k6P6V>_RZrG+f?jYgS@BQ9+Y!1g66##WEQesFNWk|NgxhcE)Q8QYxsn; z7)xS=3g;I$R!(v1;Dto%NWN( zzT0rpyq1-mnD5i~9kp6aO2dmC)b;ArHF zt{)7Vnfgx?;0>683V=IE-MD$>A@R$-^6?F;!Ngw0IJ5B_>0_VDRQ0OcPOxhZZNGi?QQMdhDvC$l%!>V{?F?4-G`UnSvmi(P47LY?S4;(t9;n%v{_AvwNy>G zky>iniE7G2%jUB23<}h-tKb4~{^Y{^=0-sQ1{*g5=|pTQErp9(NC=b9MF(#IXY_;$ zbx?iUyq5b_|G}^Pt&jYzOXoCyNcV@Vcbi4GJR|~hS)w1YpHOX`v!kz=DUKapZ2kk@ zxPD>?YHGtnIvT5ies;n)3x`VA$0Z^aeFo1wjEd*2tuv^tUjRXE?Y(SX4~5T$f8+mr z*V2)d_D3x}T4S%?*t;U=jmNcye!ZdJ>PS@=h0##fUf&uag7D7LA>bWq$hKwx?>6BC zF{L%(tLP#>A2QAYFEYX}N_+6Ed9C#4XO_?DO~;kednTktt0sRTu zn4Cj$3dszj*}rk8ja`<ixfXw|xHq literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/__pycache__/fairseq.cpython-311.pyc b/TTS/tts/utils/__pycache__/fairseq.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..372b69a6a64fa55c670e97f8845c80d6e698809b GIT binary patch literal 3351 zcmc(iO-$Qn7{~2=@Dc1%$jdsQrF4`QwklZ&^o#GdDx=d-SK2}OQ0n?|nzT+FY(rZs zY#eq-B@SsdsT_9T)OG3(J8YMocHC{Y{O0}m z|K8`xW51654a){7EcfE~JPh9DxR&zYl4mE6i`5m)C>ilFR2+C=t-*E`=>3L z;OQ-;{Z);J7mwFHew$J&=I)o<*`4DhPw3$VYF?Ff-+5l(+^g+*Z?o;ASeEG|wd~2# z>$bN&P?@eFWZhq$i=-1>FoSjPSC@V=m}p(B?>g564~5$D!Sk{uNHRpijVij`Uy9;ezgflMb|Jm4!|hbSIp0q|d-< z7xZc9a-=`ryh80eOWw13BkRF?YWGCJ%V!q5=>n7DGBHu&IW;Mz3;vV@f~ZvqX+D+- zX{;(dQe)V`Q8}dfK;T1K!$WRKh)Z%RhTYAHqJANyHD)A5738EOZ$oTclpcocN(uGg zk`>*^&Tj9{?%AE)Xh>rqv$fENp_FzC7Ug2pv5YK0l2;|UJgdrFS`mx0T73m2O7|4) zLf5yCE?S{Gw~ww@Yry#IbjLPRYiHyGhcI_Y zD$_!C^TphBAGE}Fg6zCjtRB&h@6SexXQG0L9z^u)Ci)H{qPvN62NAuR7k`Hqbcns% zU-08a#U~0rQR1Lb1;SF27YhEQDx{P`wJc;rju%i?&nE=_K}JfZ@pL3b0XJ9TL7_S= zJdD{58f{6zCl#vQE^kJ|w-iBE!jCy65xycQ4^$}=zAUBUk8h2K`9(#QQn57dEG2yN z=44n^mGGjP6qRtCOUjC{5Xw9*w8=sO>Dbm#Y$>TK=&KZk+=$NXM-HVAA6! z{nQ16Tr|l=gX!0qev55dYsk&#=B?37Wl=)!8ZgM9Nd^sONN0vD_UJmTvnSB3%U8-W z>E#y&88*qV!Hnq4$p5QqU0cxEb~I~tuIw?bhdwjNlu4!xW?E;aEw*hPqv}Aj=H^Q> zy)CRqrVKJ|l4*mvtuwbRwt4MSooz$2KA9-#)LwifI_VrPn?L@PL zcqul$Euu&77=$wkXE1lsEVg4EQ__WI#qVwFdiArMLGGFaMLb$%PNt@}b(oVI2AMR; zq`}-odwaUZJeQlv%~-=DrRdfKz4M|$`c2YrFav16H@DbxPp&<=X4N&WOuWY3Lv?#( z`&Fa3oxY{ypxti3>P^i6{!=cB!jRmAUHer+IgQnZm!@fpVpfWujjA(raD&=g6&Ly! Dxlo2e literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/__pycache__/helpers.cpython-311.pyc b/TTS/tts/utils/__pycache__/helpers.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e801680ee38dce61e4efdd3aa434eb6aac1b7964 GIT binary patch literal 15037 zcmch8Yit`=dgu(FBhFByC{dE-heopepe;+0-*IfmiXZF8T8Uz>6U%YAW6e;eOp)@; z$gx$b%iU&M+E8y4L#>J1EG}=6O%=oewCygcE(+8wTGT%|#Izy?5D=gUF82PZ7uf=f z?T_~R&X7Y6DQ&Zld(V*HoHOS;&pGFNop1i!@ApvXg!bpTRnr=y|a4VinO+msJn@NtV@l;YsK&p94xF4UInTk#e>h0+G z)pO&Oe&gL+PJDS4B2OrZOeahwJ?0cy(J8qmE0YdWXiK?8N6aI7UTO=VE%6iads33%@lYFY;@%SyGm4Tr{PkhL`-G?Pi_LBV2wP_|6+LX z8;T?=gAWAd_TU*wxud402T!M_Vh_G?VQ_p#QBzaVWJ;8j!K+uV463R!IHSfB%HVA& zF%9JgrXLtgG#XFF)o66K!#+O)m5df>?8`O)8LAZCly%MfO6{BB#%AIA5}s4P$%thR zDvLlvvR^%6uT#5Y}}eoNVwonCjT5no=|E{UK%#B zeWTI!`HuR0d_fWCh>SZ>4gi2Vu9NgN~@=rxGg2RdmDI!%8J@lUPv z9F41S(yC{+gQCjMur3m2JI!ZTDDs{nF2j+WHoSr&$g=RDIuoBb$nXvNZ3XM&%Tv&# z%0g0sjW*Tr{;Zs>fUoQY0K?~+=PxeYeRz5Pa+WSJP1&pSV-EwB=nakOgm@sBW&U5y z04%zO8GxmU&#W1cw?JQI>@`^f<{WIpLDY`CC7!$oJ4}jNcC2K>nJ5px zW>&13h&V<@A}ns@q%b8#qlPaUol1!_35;{m=-nA1QO@y3qhe}28YQDJyhtDMMYiH_ z;Qdo_7XltZFMv(7K<>>C$4F#Z{^2Z% zJikhA^?<3AFf$;@W2_7qm%HT!WdY8LRi9yutOwGyacen4_mGE>ZSfo;NPIJGX(|u# z#_%yb`qX+nN2}HrplY@V*)UcPY#vvo$WzD1@cdINZ_bf+0~W#$%M;2m62nxbvS>m|P5=kot)}Gh+XH5a$T9v(r7<&K8L&zMyT?NO?s!t| z=U<;L3j)XZ%cdM)Rn4142lz8mOqfZi3a_U4kyKJDcj(G(VOpx`(mpbG|eOxm_2D$ia3ox-QW#xc}&QL?nX&jJA3k5C2;QLgHR<_tK0;Tpp@nwoppP|Qohq`Y3a(RRz5fIkpF!Wpj!#TUNfj8m2;*1AtyKvK zC@JOb+$9+*L-k^PtE5p!01Zr>?$E)}_RIc(S;hrN_Nb%wh6 z`*UvC@9t_VbDp#(?U;pn=e%jkmczijUQff*4fb-~*zb@XFQ57~=b3?d_KI%o$yaT0 z-PvZ+j5(t)E-%ceqdL~KEA2iG_0PYTb^*=t2yeoMx?-*~UJ}M{^NKJv4PrAPEw=;} zp4<`?N#sFzM!9)fPTh-(kS;GyKqQNEJ*soRfzDh=PS2=(g-{`7!ce!)8%kCBS8l3G zoGJyC_^d=I6p*sB_ot~G*$$%}GbJEzvPyQ2vNw9FsjmZ4~BMfL1NCo8qNP)zal*v|i zLT5k~fc_ai(nvHBzatrLLYX2>@?pT1k6;n5cECoHH3)+#XD9|8H&~b_=|0NWvT8J$ z2^f{(ydRA#NH@r&BQOP8Z@I919`bN;DBEFCn1l|vH0?e|FN`j}m3wpXM*es)JXi=1 zu5m5%=XI`omD^tAw&#bJ=%wQ;J%79N$2*_3{B`7SBHG^5gYcvnnl7DH-lE2V=XySPdGTent(J0po}}+B+@2>!L?J(Pi|LH{VNB8MfzqZ z$V%`ch)!w8p8_jLyZ@B>p36Sc@6c+^tWH)83{=`PQK$FhnMzx=_cUd15Bfm}>@3up z$IZ|X?CsC_RMbDq0oa2N#_W3P`54CXsWsPgY?@8`VrZc73Oc;NvKR+jSWHb-Xpmwj zr6!57B(8PDwy#P+trgfnI!%IWwNcsmwRGfHBb*7;+>vlbO-7+u1avr&NWE(j$4c%m zA(Ge`i-cL#{MP6&na@V4^S({wJPlH3S>nX*?BlJetU)Uy5PCi``wD-f=O#a_g1{>% zD)2?g;+cvy{B?)T@1Au>26!KDvIAOv4VD!sc<%88)FPjR6k!cAZY07R)eZ+|T_oVlCrDS3l0D5raCiRH5CBD=Z3Zbqq@(SzGpgS(2sUBE|%m(m~< z558L1drAuq>%rmda4EP2fVZXK-Bb#rrg)H!0ycXlDG2(<#A8arR{@R-@?(H z@c6jK@TJzRc}{EX&oWu&i$JKvwJqFz^!1-Q9YNpEC;-SOCTSCX&c>f_DX?+;-JCEZ zKXqe1pqf}g-@kjP&aI2H`Jw#w55Bedt<{c!V#mPJU9DqC?-*Ks8#u{3TE}av?dOZ_ z=e72Wdi%x9>ubK2g`S0IKCu!n_|9v-^SbYR=5&c|UI-M}?t-hk(c0mG=_EYk?*VRX z?-6NGOF^!vT9flNf<4qx4t12wp;j{B{wweXhv*cYAf!7{RIe2#MYp{dbIxC{g<1n% zja>_D>3488z}_&QQ^muD-Y5q#mE`?N~fE?H0XhpS{grg4txpLgR{hJ|apsW(5@~Hl(XjDQUwH?oHSbNG%plnGYFG zF+MdSzYL>`ILI@?>Lx1Vm5*T#YpN}t@tG-QW=eh?p4Blb!oduWbblIIm1hDBJCba; zV~LcY8t!)?Aw`-Ek2yHQg}pbL5k|32=(?yF!D^Z)FpAM?;=_SQ615|OYp~^qSkQ3X z0)Cb-e90)9q=h7~H>WsGfMj7ppB2MjsY^;okaWWIC}{PpFgo*Yvb%P{e05QLXHn>u z`Of4i!2ezNE1wa*_wPZfee>cw`M#y$LU=$659r~6)$oC0_<$BZq=ye>M%FsEEZ)~U z_LRBC@2v=UJ!^f|`>b^(rVpNdc2DaV(K|*8uC}$V?hkkCUHewMhKgN7Kicub{*`;r zTYo%Pc=JkO|5a`ORek@}!nJp_uA6$-&4R0A&8F9Uw&Q94^1Ww6g*|7rJ!kbjSaWDZ z4~=9lea>~Qa@|F)TjP3kuIJllGp~U@)za}~^W)9A(GRvQZUc_(>&gw0OUw`Bl{=>U zcCPyN6@B}bZY}QtzU(`t`%VED^L6H0$aTh4>T+GWuW!}2x9Hou^xCp-Wmxl_)O{y2 zr@v_JTs)pe@uZE(V4G08)ozgMzlHpZmZYAQ1YPjMmEKuwkEJ?B+f8jfhIpe9R>65cN##49>p~5LF z+A+YUFmODPg7Z%0Bt$A`e*(po9RM;^A^3{<%k5e|V#byZF0@&XAwp)y?2MygTR>w) z3~NDZSFsngG&C;HK=vZy?EwJiiA>8vA6&03^vwq{AirVb`B4B`@D)AyO6CG6>#*7z zB_O1+9II*Nv8R`WHum70KKI?_(m0PA?K zN3GEafvdXg^=+U#(~}lwudC0S_HNuh)DC^CIa&bhS_9`~mDhSzTu$X6Iz-0=9H|pX z;uCddNPDj>X`)VgBU(JPnWk|jKtl1ta(2)D;RY-q=Da{!MuX8%e3iB$#pmv zdJk0a%C`SHyec0mb0(ks3e141AdNxHaNUbb?-J|0;ee9>TzUq5j;KszL4v)Fsq`97 z^r2@4eampF(fbG<7#uJUi>@0jwW(+fPJay-_EN#u0Gp76vLZAgfw~6AG{gx){th7g zPxvd(0DvOYOtp3_?)mm5(0P>X=`}(CnWY}kNA%YI%-28f>|5=O6gwkJJxk-C?9w{- z>z(@xuJ9V$S=h8^>9oeaq_Z!rvImOnf#owlyr{9S>g=nTGbJ|g{>zVEUKq_eb63n$ z1oH5Ydlq-(T>rdnaJgrBbh&+{XJvGy{aMem(P!<2m(FY3F6i4XXrYUG=;HGsa*aQi zpN~I3tcTvz*f&WP3w;k?A(70b#w}^l$Twhkpp*ct1UXo{6~Q=Cw*jwEwZ@zE^3CIC zr{Ot${p$JGM~qfDo2hXxd7$1-C5@2nVRB|_`T?E}5ne76R$NP-xLRyE3w?bG|1xD- z^ZL=E`O;dTH-BiUPYVp{fx!YZxW;bHN%^Z9yGLjD6kK~6rvdjkp-le*jUlTj?;D6! zX%rB5s;a@vIU$8zBVm0?U}q|Moa}~BhXcE{F{r4smqXP`R!5JhT8mr_eW{~^>(e<~ z3l^QMYrzE+`?o$~WSx2B3=N@PdqUcXnrf|<`WuV}4vIEtx9$-4x7@Br#WVYIfFm2U zs@7Q?!L6lwszXg$mR8^@z&We>WFE!ahJgI6N(g%-P>qA;AgF@j9G{*s+=Lpy z5wL=Wv9k9AQ8*6lmtTd_h8qs959~LbsI%bCF|`<9Ohz9nBqj2FcpCA_e~6h#O{y)) z--aaE+`w)ILV=Akb>LA_eUO$XxtW^+p@~_7P(%H>(jZuRc>#Q+xyvTPcLfR5uH1dIlIRC=TGFq z8r!Y2-K*@*BD?eZ@<;bgCP8TS{LAm3cy!|7tMjj}vV4){HMUo0doyP~Zw|eG`_XOi zySX!er+kz#n(Nb>yH}fc7Msm#nqShJU&?%~^ef=5xk3vEbK$&qsY7!O>8_!IYiNxP z>g?v+8IvuovVBFiZ|U61jsn}Kv8VLv;kM1C42cLKseb`421u$S)9iM2$$IoK+{J_)TOvq*0Z^pOIW0N(jcw{8HLF9HKggbZ(JoVZSfIu9lf$`AwCek@B-Gs)VG-vt- z%r)Cai@t>QA#VCpl#ErP(_`WI${WZctw^9YBSs+^cfgD#su` z2KBy8P4mCML2Wej!&%q6jo$r6t7yYFIM zzj@I~6rqr95%mvLtwZqr|s{i(yRF` zP=fvnqqAn$-Y!w?TGU9j&y&`1X1c|Wn{TgTRby!<9Cy|^ufrQ_0db*e_7kEkUQQ)b zYAP8Y=fTl>B8fgraO5d~hZ#JWNXg)HJw*^lkl*f$$#F3CB_3F!1%E-^rSQTn@YvuF z@*sFf3LgWGZuwZCvzmoXYATBzH*WRwz?~61xJfch1+?)wxV4)OV*M~QVzMEEhQWX_ z0}PEx6*LMLI9EzWpzWpMBmWWB{6FxC3w=t6 zx@x$IM+cEY4j+`?N0dQC@q(u?@dY`BkMNjJ{v8A#05BL?La|E{O>OrvW)LZ*ttzD$ zEJz~Jq%?uutm#r3k>?Qn9#+*P#%1t_0SV5a?-}0vroWp(Pa2MUFTsDvV6Yyd9lP%l zF~?`RjNl_v00I#T97-yFC2$HjAt`N8d07Oq-H3)P{{gm-;2+_yAR#7tG1FS`cCB@6 zTD-R~0X`I5izyy`UN0WKf8o)EhnMCrt#aFn+%}Elb&g-yB&_bQ-TwymIuyE{*B>JiH}OKYl0cLkH~h3uF24Qh4bu zUSKM~Yjo+|r`xoiBP*LLt~$_FQh!_S0az@8{d%xJbKwh+L60t+UAzR6=WK!9ud(}e zc0UN8TyUXDfWnI}DxgGhIg1=W|wwHq8TvLAU z(#*0@=slu$9@T#hq(F`458gD+I`P9j0q20V<`8i9bBV<5(2fbL z;xbs{z!eaQH+q|xSda!Cm=N?P5WG+aDF-0YO#K86?=&~@ScmU> z{L^6bk-*=5JT;R92ev6GQI0m_hcn>!9E}O%VDK~OHw^kRaejc$k^oDU?t?{KPTf-E zGgv<@$Vd$hhkVcQAwt~DMP6A;v`niWj~_in$yZNtApw~26de12EnSY^g3oJGN%_Z6 z?%&|Ad)`po zw68K-i_F&Cq{c*aCQ=|jR1%LajA-n3o!wq=Z8xn~mRbb8g_UHkUOaKIdlsN#m97^B zooOe$LwfcAe02nlkl>K6g4ZVbgo;cIFlhM93LQeLEI%o!@22EC1HfreXVX!xL_q&c~Y3m)KmFy}vnM;p*ow2C4BEnxfy{FPn+8S0Ccp%UXKzxJPc zT_MlUC;*v@FMJf&4klDGZquabkPktTa}n2w894{NdJ0b>n@m2v1>&YDfK|rh(+>tf z2v8MsT{5P3eAz!!egLROFMMe=0AE@SkT0zUrYa&&l#G91JS9t}dEq_8ZoN!C(-=vq z=U@?wXJt7ho8MU+#B7i00ZZI>h#w6(R3t`gVofGiOVg-8I4MpinG5s42qic&XTM;M zo8N<+l#$Wm_EUZY@P(76=@JzxRDLCj%aE%?^%U&C64h6*|6aJ_qSxu zWzPM)mKJrvoo23+#8& zVQ@^U04_j-pOOXoob+DIEdwq{`dkBd?m~P!xJozCeJ?0G==9R<;L5r_P>?MB3DZse MFK@)TL`R(e1y6JxPXGV_ literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/__pycache__/languages.cpython-311.pyc b/TTS/tts/utils/__pycache__/languages.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff47018b38b3be351458472a5f247c3d533fa795 GIT binary patch literal 8666 zcmb_BTWs4_mX{LsmMuG$9mny5iMvVMI&z+!X=1x>>^60ps*j{It(rohMaovHheJ|n zEy%k@VIWQrpqWkv@$Pgm!l2EhXg`d_4Au+mV*9n|CutCfK)^tN-D0tS8e|tJ`nBg= zN)#!(=@yu4`ts`Db06n@&*2~2+JXeieEMp7eQ*S)J4UKz5AOe#m34FV< zOwN^X<=hE(j!m$PIp#@tD2zAZqi=u04{vuikP9Y)6o$=)a^XanwmsRlTzjIOAue*B z2;Nm9_{8@6ZbH6R>`W%5 z80^K2tc);*ce-$GF@vrBsRc3helk_ar!(_OJ})G51wqUvd1+pT;kI+UEY8ln%IEoc zQA*V}r7vIo$72^m$PGYVBnd_!375bm+yN3GLR4TyR`7_PG`mL93?R^!@QOY_!VgFF zud?@Hi7%}+69GCFgt5Smv5*)Rf}#iJwxwAiv}3&892ML!8U`Xn0KQEO3hi(LpV*Q1 z3XvUiI$Iz*DDgUR23=`7tdcpJ&ES+QSNr{)&C8cBag_RUjJsIi1&-$gUg6W3tjGap zxJ6!)MY>WZ4=dz&B~!=;IVhPtz<$6l!62`2DL&85iQKiKC}l)}n_IGWjs+?B3&7kn zw1tx!$4X`dIffjjCQWjx5#8GH&XFo=~kQeJjdhtU(x0n@egqob3F5d{>sQJgAR zv5hn!2q`mPG52BQOKX%NWtX(KOe!GYJ4@Ns3N4UJVE;f#TyZce;f&BcG8ib2Tm+O2 ze=?cLXOv{p=m$0>oft}mT-`{Gum&rO*|ZdfouxLYj6gD(%JQ*3RCVk!3 zJSr!3Lv6I>i#dyda%rHIf{x)sI2o%I@@-_#Z%(bVpGF?`j&AjiYP~VNH>P!s>0M)5 zneG=SA0RCavZ z8&Z>`#;-Kkim-G1X`GI0AjVyHX70ZBFnVMwdPIvptw*2MBG2fNXH@?)6jOsqIgs_k zG>R--7g~_b5GV7N89Vwjwrp`*_|9^fIyth;#`j*w+|`}w+{e<3!vRi z=-(27{hix6=Oae&tP&;ASdkDVWNV}P)=Rm?#yHn;{+c09wt;2wmi}}}R4h{p&WB5b z(WtwMx=lxS=X8G9lWMpcaB|c1B4oNr9D9?hq0I?k=j9?ehIjEsH44r}dGCzn47k}D zFx-oa;MJ+iM!y5XJ_i=PkhTsdH=U2+Z-_?8KVv#5&R|iBO8dD5Ugq)zYZ@mnim6N* ze0t0w5-9(ufsgy3x<@jkOy2Wf0UNN)ECY+m%&%Ry1E9%7It&0{a9y!u29uHg27bvX zOMnv9+wX(p0w9$t(eHZlSYO3H?>v4|ZzWAMoslwv;-W5n)bsKEM1Nq6tM>sRORo?i=8_x9c`)d(9% zFlN1Wz8cxL{!2CT6g<_AefQcvX^)cm6r@FA?qd%lh$`wZpUe;aQ-i7P+7UjQ@gZAT1XF z2Sh4Rn{fsj_dq-<@ZMQkHLy&Qmhq%9g{$9(7vGXhq8;UFtoFiO^OXgo2( zIS?>o*y}14F7el!u@)F)8ej0-Rk@I-SN|u9r*%FW|FlFsg`%=pRO*WWeP{upHG)YQ zHcjFd4%wYRD%fP#=%8>&Ot?VdUM0(Q)Pu%YE5vKK1_u=t-ay;Okz0}VK;_hD&ujjZ zy8oowcuf9V8VfTpJ3-0%2WUWcf>~zlcmFbYaKY`&CrD?(I@s5MnPw4KfpL;t>Q);0 z4*&tyShDw($LX(OUkBt@-q!IlGS639nuQ1;!)G%kGtkY}uilMl$TT-RxM|E^{J)$# zb?6Qc@0e<_(ddC#wdvkOaN2?Ze*Ci0**Jw2A&)RL`hXW+U`AL8Z$*ZCUQ{H^6-m?B z>oS9r4r_KjqH_ua4SCRr*29~f0?`WbOITcV{lc6&oYIC<`fy6^;I@Zi8)F0s`iH1U0U~&1i;whRh(>S!NsxF<6cr&^kTk-{RN(Bh>lqJIrcP42F2T&-a15g=ZS%Gv$3i1jI1wlf4V0r>H_7cjK zguY8c`yfqW^%7J@TRregiP>y23G9eMFTI#g(F={q4P*)ON&YtwY>3qwe>GRwhJO?b2A^?FV{7{+Z+Z9GL^ zt(Ccd$ir27Y6M>D=mdQ?D{E&MZT2=>R3a%`9amt>(#3(rPQUzP0ifN7+h8qQ27+Ui za!Z8OY7xQCS+L|sLr<{3Ef#gzWsj7uS~jzF->axrv@E;J?usoYYxHB*#%W_E2m~&X zAB1kn*#NW)ayIZk;x7HeOBqN%+Vg8@31ifw7V`oGH#jz*xi028nmdH_Cid`i1#ra~ z#k}Fo!et16VW9#yhxtq%Vl)I3GU>D^!Ci??mN_%^0f3OWlSIhWNYKU!kla!-`64bM zLCOz|bFAbr&h$v}5x?Oo$Oel?G1#kxOdbM)jD+^d@TO&qqKqKe>UyrtaHVC#D}D&c z4H-=Nn?OP%jB}Iq&5btttFgO84KSK0^c`lxqdkc?AnWoVy$hph_?K@%1@VEGM0@W3 z?B4h%9J$iQK z)$Kq;5A>~{(gK{0&+y8*sxNS3>7%7P{hDu3_YI=jCFvATqJgqaF`$IN_ExStK+-`Xt zCk85*Wz7pKc~<>p=0{cnEp%u;Qk0g8rNfOLe3x6YBzT5pw_?kNT(xw%6}EzR{vE|h zHOz;^cd(e|2rwMKN9Hj{5QGE~e6eXJaDL^w9fGNh^b`P@$pV*j1lk5mqaxc)7E%S6 z4Kz~)$5ENAYt9@G&6Fr5KM*tX3yN&Z3|Lc;SoMfZ9Xw|~8#C)l<$^vmX7%iTqoruz zZ2)uG6p_8l%uQDXdUlppD+nm|LR#-IKtd+k19naP5aJ(-B$G9|i}?(EH)$e+MnkSA zNmk4|MH1$gemct6VVaElzzsz0dc}M;uBp+HhtaXE=-39MMNjC_6V}*H?2+>ndo(Q; z_D7w1U{3|xbPufm`^djm4`yS(Q#*iCF)9o=3rK{?CBv0pG&~k}9;5SnXMv%r9kT_dh zGCb*QfmcR+G!O~>TXf*7FXico_*_a zt>@{L*S;MX`t)s!($M&N94hV5>pIi}Z|DPWsBGV(fs=op`%~th^8c7uUwl`4_KN=O z6>Z=OjBJNG^w55_|AZDgsfSKJ3_Z6MdQJXcF6j-x7dW+qsbMpqK_6j8 zKyTrghw_LrSjto26b*7|2xSD}D^<+=xMv2NJ=g)Ctzz`ERjd(PN@yd@um|rT=7$R; zx+&B6?Sxvf@BQOu{5C0}1fs{2{|hSk4#hB45?G;sRdP`M;pY))TcLkdvPW$^RT5sI ze^s(ibv}>C3Dx;j$uZUW)V#l77`XZGUf;?6g^v7>eOz({nT{HQ2S Gx$=J!#zQXv literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/__pycache__/managers.cpython-311.pyc b/TTS/tts/utils/__pycache__/managers.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9873a6b4f560e70d13d12c53be3d6591716b7b5a GIT binary patch literal 22464 zcmch9du$X}x@T2C+-%cS!c3tX5ieS*3NWB(z9(Bi;FD*AX+T zDB3^v_nqpluIg?Rk~=%)_E)FQtIqTLzVkcZsee^fRl(u9I&yvF;x>-^dwR%S?yTeK zBRJmSL{8*Kxk2O35At|+j5@}ggU&J6pli%M=pHK@EF1F-dUzv_chJkyln++0d*xsy z?#@x)Sk+(^i{l!t9`g_SncF>DGgdoT3wN1RH{uXIcU^-4(JMLGvr8<;bG?H*%83kUXqQqt0H6BS^8|JfbrCS;PsTfc1!uK6+l1t&C_!9XK5&p56Qa+S03(s-a%}P$9 z!R^)F!A-%uAF*Tg_itDE1mh%#Kljpfh9FIQ{AuE>G zSy0}?E0)(~e;+GvWv+~`$3sK_ID|BqAH)e=&_gc2Yk?b>T7rCx)P7+y!^E* zG@iJoJLD_6b9j6LJ?a~ajE6_#!;u8W!PgBJV#>t$cwA0MqA(I2m4w9YaVhB0y;4k6 zZbp%|Y(!DUrD5F_AD3dfi)HMh!K1qqae4TfUh!IFbV52T%W*mAkg4W6KcY~N3c}Y1 zdoC!Ftn}QDDA#%hB;`gTKHhUEJ~ndu!m*y=2_+F93&$|nl%DhF&-Ek{O3y?hI;!-H zMPiYwNEjNwt(UW*N~M$Ak!v;nl>-<_cesqF@`qDDn40~b>S@Z?-TdA}#+ zRe#q0nQS$dH!*r`QDzn$h?#eY*z!@wZL{N*bWw& zCseQaX#98sI@c zb2c8_;QVZZ>)rNMN|Ih3R8$LG36aAOnGk7 zAvzzp?A(v>))Et+CipX^{z~O`+ozPg($we0g2{}j??_muKD89PCQ)wt#Q$z(PWW>< za$L!&FB5Oj`J2BbuJaMEmqlZu6idjIPIuhAA_w3IdgXfDDJX2!j__vvx8!AUY)%ln>Wks54TT=$2eiKRhEY5$ec? zi2t#f&7e0M?v={LGO>K542nr%oC>j0suX>c3yV<&CB`RKXP;f7AJ0{m=Ndd$Q#Gt1Eh6omdW4r(Ul*7*V8S11Akhk^Cq0C=4AN7!tBx!tiKB zQ9{CD4CUKGOuA`=W<`rI2I(V2W7i}(nh+A#qDrU&!&*3&5JYJt8j}=ZI6gKOkJ(d^ zER2pshlP=e*f7b^Y%-nGeNmCkgfLT*tQ?c34E;Yn0`d+Qh1wA|Y6v>X%S}*2Ox4H? zU}1J%WGF4f*FNG;1s!@tEHWmA6Y+3V)a#g-jEYKlM21wQIf)o9MH(HEHzTjCNH*rJ zHyn<}qKR-g*=TFVP|iO{DRzKB%C@#myJq}PDw}99Z_D^cZI8Tv~)2m5Y$Tx{zCPT@TW2N=SX{30EOIVuWy6J{i$lKW`)3S2PJ5pY!=4IC( zLMse9PRXQZ1YHF@Qf?++hun*s!3_;P$7s|FC{|Fasn*Hnf@U>CdXQAP19FG^f~hJU z^8vMLi&nLTT25G~{bQ*riVubOS^ZAuzjf{P{2jm70|5|8L{62K^|Oflt-8mNSfP6$ z!5t5re+UU4be)m~luKSiWGzT>hpi`Hr_o_MDO#}%s|Yh8RSQ4OnSf(3;Lk5ecOoF_;7j+;e7%=K;ay}}E96})a{RQ0QJwzzBU3aO)niq^S z%P%Qaac?8hQ!-L5?{L3IYpqKiQ(Hq?Ysl6_Hg;oZ??o9rZ1gw1Bs+GsXk$KAN( zW7enA3oq*u%NjdHy-;-7HAA~vVt*3(yatzdoddE%T;pr;D7Z#MkRve>TtgbWB8g%& zcC}~#n>^!HrZp5Wk=S@BCPrjAa(j<84`UExbwd3CAdK}86c6??-7|bG9tD)8yG98U zbAdf7AMtFAE5zKgi<+vdU<^hQ@Q36od*|5#&mxs_6a);RhJy&`BOZR9&j4@e`{2d* z5BzG~LgJTiFYSC~apx=Q&Xd~Clj^op+O|__=V`6;bo$I$t@Bkaa4zjT_rxEVP8gj` zh0k5)lo*H44|)5fnC3@#k-zJm~}X26rBe4ZTY|?uvxbB}^zNBc51H#`lQf z=jBPsLB=rC%Y>r7u}6UBAGsmP)C~D#w)se1MeFOQ0?1#WtFB>AzELs7Px3!<amz}~2GAY`Vx zhF2(rntgSLOSkMZe)Hnoy1Co;drXgF)?!wLFaTr4@`m?yjLRv0$}#Dfh@22GOW}eL>D~jhF#i*T`00zy<4l^J?+f+ z>!vG=lJoK=WEl+TSfH`BU0AOO6C6JQH6pD0!r?b3BBR+5PdF^bhvDsnRF}6Rq3%-> zKz4`6q{OwjC{xAc7s=m64JniQB9mG!lm5q~xO|9QBt_&CM9PRz6Bu1fN(>u`GA#%h zSqFy_1y{IZ{w+HkZtpTj#0$Wap`NL8>{dv8Tv;7O7gD{HK$zdoZh7*h6EW_-&f+Y4 zv$2Cl?BKje5rYc}cFPk>qTsT(%zc_qAqw3eI*t1ymvJvaYs%a`^M@#)XJLTd3JGF_ z<^d(+wgCJVw?a}==?3Jy%;8o@T3H<`-GLOUirWHSh1qZ(cd1@#)v-j2~i_T4A1YF83#Pi7v?{0qT+5c_uVsVMu_0+yG?Uq6hz8KtUDL z9OO#*-6{Y;<+K-Nmo0-8@U1NJt%NUjrwZcw;9FJXOMsAD^ouosod^V~mi*)cNT?X_ zsjp9X~0iH(%6REB(;@Ej1Ejc2LMQg@fwg0OnTYrq>@;cgT+vY4x+ zCb3QOAf)-anP9{#wgW6{h3_iF??C)E_6~=*8t!&6C<=JX8hR6IR3@%P><+Q>8(Q5j ztwt%=AqPQRkGN~Dug&$X?d{f{GHSK~X*#F5Tm0ZU$mmUa&EcFDm4&_{6JXWE!h|6+ z=vla!CDONt$4VqFg_1En0O*=@3lLgD$!l#`KSD=VfW;Z>k@)Cn{3gllk+?h-N$fG! zl6$gu68C>%>J;?U6`@T^180KF@7iMzW>JV^cI^?muZjmx(gbM5N3<-;OMxt8p@ zyw7xp-=Zxp<-i-Z_*3@ep-^5em@>*dWG~r}5LKZ0PQ;>bPMGQmEx}kc)(W{sAms#- zKSXfka3lsLhP4eLi`I3U5SN8W?6wd`F0v(Kqa;IS1=ySEO{S0V7CHf`<0CBNdp^$o=ZI78PjS)M$?`-Tx8=hXMY~kSvF)~at zDR+_dB5C#X3YZ@0SXRW)&U#jWR0-KY+K=&XT)Y9uno2i=XXzd6x=LUz2~u{6^kf2C z?8z+Mq#I$hCSw<1-Io%zxhH~o6Lt9~JDMps0sv-43i%j@s}K#~xmW|@=2*WidU4!I zr@U&?g?}zZU|-QS>6~;;x{IMWtknQ9(?5_%xWNykn1WD_V)5{n5lu`utL2?(_at_y_+dh$(F-8 zW{_p8COZBD#8>_Y2q}LxTzU16JTsmpZ}XzJIW?O0HmlxV&D)#y_C7JN^GxUZX_w{| zGR>_3KdLfKt8nvWS~~~|$utY(`J|Gzi^~W2$JnTC-SUfs>ff&Up))wE2KY=({Zh@& z#hRVzU9YG$C$yRqY4&?U{-MR1P`c-!T60LNIh3wB^rRA7;e7d7KE0N02%qH{X5YT( zYfpE)kluFq(Jj??M)RFX=YE7zp5={1R$9C;pn1XOAL*&~*#`{4oKyB0M%xw17Fzd1 z8@~ch1?G|O?UsVD3QW)ms7EbaVp8yl0{}=?;)u|`>vz( z)86jl;?h86%|8!E0pxSsw!XK3K~^N*2K1Or4w;#kor&5zvNW_KS~L`Ud@vCL=A6l6 z1JN-lreMD;G8#=lVA%$MaLoEBL}W>b#S;QebV=;aqQZ)hl;n7bAv;*t<)$Xqb!^bEY|Z&mAiwfbIzjitTYSrf>FCCjv%V&7;oTjM(-zheyfHA}CMmh?vefZ5RN zrzW6o%2`D;F@QH;7!wEHSk#&;#Qa-or2!(e0=J<|3I7d*BRN^_avpoG_1QBaW?2pP38 zP*5nBui@E%LLFHM@s@Qm$*&?jxxZ-Q$P%{NEmO=idMJT{eugZRHV}Y^tp|ZMdw|kQ!`|Od=I**9gv*FM{Z0#=2a- zL_Q?hfP_j3rU2+=k#QKTidY3Oi9?K9_huKbim`DwM#-2gybG6Q5n%M4-m>BtOKy7p z9{n}t-3ZMX0RSx;siA%?)DJ(k^`HhCIHUy* zO?xw}$A58Y;g-7PC2h+~=xep1Uu)>6ESlbRzI()Iu!q-w6aRHwJ#bn(a2h`9+A|tx z+ZnCxjM{uwYd)(6u-EkJwD(DekoK<1)UH~pUAtJjR;^v9)dJUal^?+xfar8Tl$P1X zncHchcS+c_DC|;&J({p5?WbR6TkpavzYL|f_NQw)Q~jxt_m8Pqfz*Va~empp6SH+8AVl0dHIkPBDAJ4z5h{1Kb;bj~yj0W%fxf z9o-8R3+m%3=ag&8Jym8^Be1G5`}cVSw0-2*a(XTSZ%>~@U8g*g9=XA0pUY2r{tBDk zla9aQ-gVhi{D8Nu!@Sly=iHM8M-X_7?)(BfWl^YdDbQX8?_p`owojFQPno@JEKR;O zOQ$KYk%lDspCZ(^1TAH=m$ioWHx#OGn31_eoJfj`JNb(hW+XvDw0JZvI{>K0L_v(l zx?og5z_B3Wpi7i+UfKYDwk1-2a2w`Gm$8rb26m!PWZyz?D_cnrbXqu*ExW);LAbo; z4Pn51v#FOD3ol_`(tUuS-O_5(KUTdsS|p50vIQHAX3>&NJ>!3)7jk+=>&Z0xy>~P9 z=KQ+3?WtPh8I*B`Q^86L<6=CEaWBTvevFKaWjIw$FPj(#o-OHR(yatUG85O3;|!hq zGm6S2^ks4}k!-R-y~6BFMWKP1Wt~~w>P`40d%r{FGf1d(Gm%VYT0_Dw{Y(2VFYdpbeqB=ck7)ZxRNtQIL$l3ud(vHdzG!KiJAXgC)V_7G zeXH8utF`y4E!(x0?Y~kU?)gpMulv-!C$zmMvSuJHC#R1?7y@pqy23wh6w+&k9vugv z--YiR4z)3&HAd1kktZ-MIlbgtyXae+>NQQ|0qf12T=EHvJ|Wet`Zj64O=;gI_-ej& zOTNvEzRl_GKGpY<=6fmadx>J_^4O#LLYgm>Wq)KkBsEUnpe+aM-m-}UVll)ro^AxO!%~wLYalCpglX+Y2!0L|_PncK~Sp(&I zp6*#>`Ic$yVaV9xq$Q0yXmVR@hJ*<1$R-^9p?c;P`3;ck z`2`38-&BC)hNAIUGTxVzT_XtOa~)SxKlAq7EzQ4iewF6mk@oH|q%W)!R%mBh%&gqb z3<3H#)V5%W*5w~KXC!CH^2{d2iwjzp76Xm(AYG0Vk(3@>LPD*YE>1y;XDV8rq zu4l0nBsP+3?>TB9YkN1^+R@x;NOnED!Oe*Omolsa$+o!W@0xe6gtMAD=>I|j(r>V4 zvLLD{Q@cU1c@=^bfDa)3t%LsgfCmO20A$I70e}x+l55RrAKnW1pujhuB4tW&vv_cj z?R7qmT`Dv9z(8xqN(>#$O#?*CoA#C*0|PKLS*iD>$ndq|jyDhhHs!Kd?vGGZLw@{o zxN>H^`IQ>g4a3H;x^F5yE|g?3*tSAU9OmVeY2?Xt<<)E0+Nj~)>ZRJQ#o8{lHmKFY z2K$e+I$E3dt~F+VCG8+&ZJ+gW!yYl3l!l=Ln-_sm)EZCUM;)Gjtu*DDblll!1>Y=3 za0_(E(jy6H_VC91ChC+cHfsg>3N_J5#|^e^s?;3E`RGX=nlZ}2G-DM1W3xigjGgkn z(j~L%>DNsd4qKRx2X6a@s=aK_U4x#|@11msm}mcN#hLTG`A;%XE^-bHM-yyJ!p6Ro z+s7d87!kmCEljqA#pDWnImbllZW%Vh#%L_jEyTy^q*7!Qs_iLr11>wV3=>I#&imoS ztr!vpq>;$PC=4`!pr4AzEFd3cZVp3Xk6hnpEJ=Rwofx+T8 z3Kpxi?@3#;<%RYswlkD;GgUm(Y_-zD zD)IvJY2+GPml|GJYE#8g@)qWGWlel^u^;R^4w-ZBkpdXf0coTDC2=Y+Kl> zw(QVac1$0iKK=#vrIJ7CyVsYxu^8BzHh#S$4|~(SBlNo@8V)tERbzn}e=}_?zsx^w zUw!}L2P5yt76u>fO>a1@ZaA%hwx7}3&n&fHSZu$Le(if|`$et&;%voi#TOJkb$y|2 zVe>+pw&8%<-lw(qEwv9UwhufytF|B2+Kl3Q!_G(fUe<>stUt}i)63ZreN zL2}XEKn-_@|CQknFdy))fhA6*?R~U^}vX#oIl}M2; zwQX*O?V|QD5b?+UhFS4me5rBMV&f*Yv0H2GR{dKvKOLMft<0ps?FzO@E!%(Cm7j{})hb!zPHthPONU#W5JrM6pGmgi%^fQxmcV zH3DnLXNsqMOXpS(C+SNfhG`q2DS38HoYT@7_V`A?O38ADVI@QmzBVBox-igh7h_o& zL$9RkJdEyL67~)ZJ0SNk)Kck`u>n%S{A6kPq5lG?WM6# z+A;)24cD@UY=<6q1m`!a9WQDfFOr?ns>e;6=C^B`52#IjT2mil_%dtOzu&gBX8+=v z{R|&iooZcJ_oz92_QJB0TYKP3&asvt;k6Z%z5|vstAgp)!}zg$w9Ny?Qvn!oGx8R* z%Cdn0CKKsENy#o*te@UAHJ4A%qDxmK+c>BT){_N47x?=)pWyGQjvICV7!B{C+)MC{z*B8P@_KhFHGP@a) z$xuy#HP0J3389`i2Sbz5%&z28jzEz;LbiOCZRGqUmPwG?L_9Sh+xz_+bUxW<{>XLR zY{}Bg=1E(eJhv-6fjY$_FLS_r6Wr4wS;=yK?=cqqOCxeb5n*P}=HKELg5hQU1PMnNyes#aB`3jl*7U*m~*? zMb@i{!?LgYgvpZo2-A3i)r^hlUdH<{+Vo08sfJPmmiIS!4Zbc!>>i?4j^Gfo6Y^TG z9+O5XlY|_<3SZ;(H_9t?APA%Q*k11vY4b-lf3K#Q>}}U;cc( z8aSf`&HzZL+5yYY`#npoyB1q_J#02MduKh5TUTqXo91__t-V@n@2qFm^M&EFOKsh* zwQisFKx>NiZ6V>LBbJ2ScDU%TjEtNPbz{&i{ZIz#((k4TY(ee={5kY3cYz=DfUnA#q`YGJzV zoJ_8_3bi~ZEaF~(4X>M8qR8K1`N<7{Axm<QjN!?jke4e4(^L8HC_1TrGO-fI=;YtP zl>@&2Gu$@dyRK-aC?$}E;z;8(i)fDwh9`A2KcshGy+5c{ZqzE-hmpD;*SBc(>rxjm z?6vyM(}y#aH8XqWYJc1}-Iu9vdFS*`PpA5`LmXlLnf+uMzJ`L1|Md7zPTV_zB;ZIG z_gfbITjnLZr6-unCaR#F9Knf#BSHR1aJ~FGg}y=L`$T9DSf;^d@FW_xx{EL-<7A_E zT*epZ@;gKb>M;&O-iHf^KJi5?>LmHU5P6>n z=@l}KV|ktk>EQ-P{7>W}%v>)s#*_SC>7goncwkr>9Sw)EcR?yHvs-`3(Bwr*M`Vlw znvg%FM(%IfQAY?#i@vWY(_3A%RRw$8uG(n@sVEK555p-@I%7+=KGe0j+)8 zvYUmKaZRnu9_I9N?ZR?7b5?L2YnCgS)5isxtoazZm7v?XvT8bM3F99SI(=Au_oCr} zqn9iQpQUlQw^)L<(5XxWx!kpoW$s!th&*t-lcmDB1BZKu83Z>S%M2~xR3mN=>kH3M zvNM%6?4)F^`!GL$ZkeOIl~QKxU?>aN;B{9l^e%I_J>1T2g`~d8eTsj0{!!;AmzFtl ze~x%`FG07JyI-6ihS+ewxFE7yA!#gkZ=oZgxGm6;{!PqTNE&_a&UsW0w?fiZ?`~b@ zKvH-QZgemex6c#omL9sI4P9r>Leh;hrIe=B(Nf+WnnSH{D0ZBZ_+jV6#G^x>+F2_8~f%&@?C^x5se^(3qGmnoPtIjPuEiOOUCF?lyEt zHksKZt-yCAL)rB@Tg0y>kL>C^#6B(=A}d5WIvi$CY*nxODV7mxtk^5)WkW>TiF6SO z5@GZDG`Y4BX#@G%yAOIVPG}#H|2rOe{44#SI90>*8LsjU`^#`uci3Nsdm;UNzbE-5 zb!jg9%W%Fs?C%M;Hf{f9xW2Ugm*F<0?Z4$RFTZW}5M~RHgW7h|NI|{KcMc=Y9>^XW lkMKN(QEB=m^Z%VS^i{{JE*{LFG@YILl7;+kg>cry{|{Ew7XttQ literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/__pycache__/speakers.cpython-311.pyc b/TTS/tts/utils/__pycache__/speakers.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0f22e830893528c6293013da7ddae4e01a1c225c GIT binary patch literal 13431 zcmb_DTWs9cmE^pinvpcJWIbv5rLm=v-;PyTj{KBUTW%~TwiSCP)R2-#o`+1%Se81D zl|b0ofw6FlwDz_MA}rEGL5xBux?6qp!&wxI{p1MT^(+?x4D1$Y|Hg0%1o>F(IhPy` zIU~glP>OSSc^~KAd+xdSyr|Fpeh&rL#Q5a+KW?R{|G}5i<<$}|{uL4rDURak1U0HY z=}{WqEeT7~I%-YYMr}#^s6FWzb0cpL&hfa;|x5w2pHFtmixcLtH)Q<$auww{w2J zfvbUEfUh0*aKU-|=mxHqZydL9b@R5-CXODV!l6GSs^PFru@0tY6x;E5ETh=Y#Knwa zeJ)Wg_t&O^6+afFa89H_kor?h4dS>aP+9vO_l$+5NGFY@Mqts zw=btS2k!vNI5-#l-0<`8&T-#eYMkc0^Zrp6@8*2-)MwE0OTG1|2Xg&Yxn7`_4X6_U z>iBp+*T4n2TCR?VT;5jh5#9pjw{c53&DC(!xPz;ow^#HRyi47pM{9xJ8FA2pQ*|if*}{n|v&j zuIeYAiY2lfAO^Yts{?UZVFqb~n8(wJM0y%Wt&&coL7z4?q~ecBZl%4TpLI4hkLxXPGO;wL^8gOw5@%;^l10A2!WSC^{XhP*`v5+kT_VN95j zSy^Txev?lT?m;fny3vR@2nR%;K9|xg#VB%abc)SfW4gqQ(9NW$$f6{|eaw$A7Z?D$ zO7a?$s|$#8yT#~Bd|W+q2)lH4mT965AsXktcRys8yp$pJ2^sh0dZW?kt(}-JpL4}J zX)A#Qi!d+iLpz`Ux=s?jz-WWR4Gjl7vvx0Jx+iY2$*BZySj~Zf0cJB{A%@Lz@ia3f zq+`4&B2&PbR51^oX4s6KVh%Hd*yu;4HeEY*sDp?QWMNbNq$)|E(TM37?W!j3;p(jJ z(lkr*3+bcRGtkW3X=!eZNM^205~rqZhw_?8 zmE;p@bw~i=Vi_SlA+X7=5-xM?(msMF8f7BdkfQpbB6b0)$OX0GutJ?wJSZ}v@eH37 zmHHA2!&X#6+LDAX<5Coh;ErL1#imyZ?JmJ5f6*m#R(d+z7*s9pY>=Z93u4pu# zif5uxrB(#Z2_%`SJt6(WF2yGDiE#lHA)y9A06|4J?r1cYU_~(+6MwPayXKzrzVoT8X~or4VD=U^_7pmHW1^w0=%w5n zpHmLE=W7H-0)mupLy@B0r|E((_|(_A;_F=KTW*(poq6Ao>>H9wchQQK)T%F_J1S;e zd}J8HG&>++W|D zDw~X!XfoSUK?`HyYuFN3evbNMpzguG50B&no8`b}$+cPNfNaySZID5RJq(HUhrKqx zn)gEG)Pzld5?IWZ03}`63@?fem`)V7K+-rhzcwwRpNO-q6>~F{;xI)d2u`m4!SN5z zJq>lOgu3#f9dc+#KCn{`?37$P$wU>0z-O{T$~0jlDVZ>`??VdK2A=9QLXjZ@@o}(K z!}Os|p$$f?S^;C>d1x+r0L)Q^K>htYlB-=Ckb3wCc)}=7^>`Iy`d)`z{p|TQ3>3^Z z`nJV*`sBcD@NTRY_LlqD*Q79>;vqV)Alv+elQXGiDRQ#q_XonIl zY!Ogs3FtKmTZIUwdjTkcQvcctinz^SHtL}qBtp~RXPE8Krg>IX|00zY-hk%f24Isp zs@O!ebleTBdiO7lO9O967q8^KS7qk_evud^MOlp;8JDd>r(PYJ}k&#p}Mh)@*Ti|zAlYi%6l)%-pfVG?yg&H@6NYJ zdSyb^wqc^f_LMw4bqg*-`jS8rG3AkvA6{L`28%w7XM4wPxp!|dGlbI2~cSZ?e z1U*>MCuYDK5d%x^TACBU>p=-N1P8Iuuc7m+~qL(CnSiw({HB^105u1v04cJ9FYKj)xz6IFLzU8?UUgs~V z84j(e8;iOPMcqXQZSQ!*VSYy?fL3)yOFI?-SZE@Tr4!3rmNUQK@r3*LeSesg`hFzu zdz}^n zM?VM>le&JJUI&{|$m>`^oL9EDWZZf&HK`j8mHnbKPQ7khbJhi;`BEwXQ^Pz~!IA?{ z>bkWpRkcB=Wzde$F3yspeo1{?QSUuEY@Pl2$v7C92Cs}5nH1A0CYu7k4arwsF2u|S zYa45_W9geb6W7Gq=~RY?%77%zjG@KK##Mx6XC5{d8C9o)ir@yGG5&2dZ2V^+2oy=RCt}<_HXSzDZx0zHr1Aa6ZDxwU#6-!!FY}lD%n@q=3 zij#{Ac&sRnaS@uv6b~wU;BrCB$uce~4*nKMR8grt36f|yojHlfP6$F;2-}3?Kt08W z=%U(i6+d|^jaN7gHAFmNK(1(lr6!7I7z#FGc?+oaPGhxS001+`L4`Jac<^EWPy2sm z`PBJ&;GY6Zwmh?6X7(@L$=9Ef>rc&{S%oU69-jH>nT628oqXt^96C66cGVq_-OZ2o z<=u>o_vX2y1*iM{nfGQMwC0`dva=ocb88{cBDq>hC2e_UhwLOfy{!;vm0Yc>o))R~ z)RPbZxbvPN*)t^BhN|rgWK*&&{|^#iUjQs!FWZ)tEYHyf2e#!p*-(NtN8P4VfsA1T zRJMaHx%QT#L5DPL-u)D4iY0eH40BS)nyX;?n1!?9pSe8^milIDH!S9~<(Bm_HBHmh zWr~JB&IbCSeKw4q@UrI^SqL43@o|Pt2t3QpfJ2HG!Eqh=Gn``7tevo}LJPxTmv9Vn zRBm#KQwea2+{zHX5KcjXVgp&00)~ZM2NoA}1D2NyR5`*wiW~D){f8bm>o6hW?kTG| zWJtngz&Q^;@wWiL)^fV%r=L0-SDcNHTzTgv*|})}z?MblinCX8_7*~|cing0e++dl zypj)f%c1VOZeX?hEe|d)So7{JvYW`4)F zN&Q66SvZTC5rC1bMwXn!-A%3>owIUQFwX!}IX(sqLgSwq3+ArNP*6hRV!%`*;JsUu zsqkQ%;WIi@4J-8YpK&M{D}w_Y7%gls^N=tCMZyIDijB)Ar{M5<6JHEEFkud*u5^G# z30vW5CR~D=6^B?zgn{V_yvFPT3l;wXD}MoCj(X+{NY0kkww;SZ`L_LX+kVN_yy~yN z|CY3I=i*4-zgzb2hE;X%TY7!f7rcM^L3-iFQmy3MoA>RNeS0B+ewlrw3IV`D?tQ>P z#=LTAMM`C(@2YC2T&*xb5#`|LO@lt`S3cWd(D_w84N#$DKq~N&DL(CNSC4BXn}Cj> zn7;7vf))lj3;!N+-*=k5EuTSQIq>&^)95sMU=hMO`rXER&b1Yowf%-Ui%>K&VdWWi z+Xj8;;}d?8L63!YmDP)NRErA#f_TNGC{&cv)c3+oP3q^hc>{`pOhaje6vf%irSH)| zA2i^qE|}*!7-?$VHdS*-{n|(`aqymV5)Bk`-L?ZBgXAdXQ&g@BPX;Xp?YirP@ULUu zm9rnO2>qJpCVF0ju)3K?-lJ2zaYRC=ah^iUx-%l8Ldzmiu zXFC*f#Zkh^LkSZ(5K+DfRaxRK3o829Em9>Aqs&X8R%03g-jE1YXrETm-rD16{-83tjO(x?a zcEeiDkbu65{@A(Ulc!HbOg^_+_HsH4pNs&}W9cb1)Q_f4ML#e=GiD6cgEqr5@WlZb zholKuY-@F4at)x&-3z}LD3O0g{}A+XRQ9g=Z__#Yo@Ekp)$1npCiRe}sr&GGj1~+K zx}gYEX-i}mIPBG}Asm1_O^Kc(PM>ex{S&Z=x2A-F*=>eM02qnnd!k5xA{}EB;y^^N z_dCSV58#_WQGcSq8%SAN>If(doOp2R(H*(tkQ6!uemE_=T%nvI_SWnGv1*Z~@UaZ) zeDM?nW4SEs*%UvmQxz?1vZs2;o&a_u-{(YThpU{V$k-QzEZ@zDX=Vlxr+BD71<^gg z0Coot_Hd$vxV%NwV7!2|7En8$-OUW>e%_J`98jah0r>DMEau0zOL)XSc$u|yGs019 z+lFn~o5)LVYlJTu>@bmU=}5Ri^|kHB?phF_l`QN-a16n51g`;5>{9~LWIv{T=d<7^ zNbVQ=0*I+;)3uu21 zKk;)|7z~s)Q;l2m8@9yE|#eCsQ6>nnH9J_`ipKxdH(_%G0ZY~1+a^uu?4`p!Z! z-?&e11l8P8dyZZWZIqfb`A}94Wu;JdwWaN&gP#oiVqkI8r8#{`t9nN4-yz*uVnvY#nZPN34l>z_u|)H9^$ z{uST;CHA-Jyziv!J1LR7;IARoZBTu&_Az)_Fjc4O3@m30mUS!-O5wwBKiTw$ zUU(3UkriKL@y6pjdEXJ)cSQ0XdFE}Fz1tTzt$24z-kqzy+CuQy)8K0>!Pg3*&2nhh z(w5(ae-mClC-=XZ-*Zvka}j7=i*u{3dv3M)Yrducz!(73P`(hx$Z@gTKRWfvnO~e) zY+5>!Z#^cr9=m%Ma7%SZmwEuGciwkQ_8pUa#|j!dSp7hO8=v06Fat~pSL@(P@ZhpP zA3P}sPfFx|hS|GTg1eU5^1&l=@Q4&VQt&oQwq`YEYA2t6m=1JQ4z2Ke@o%7tzHfWq z{=UO(OqEqsj-H33{=1dy4{S!Oziijvt5Dq+u+OT?tdxIXziS7T-ZJmHXRGWL98tRJ z49+Dzeb)vptkB{mM!;=wI^`_0VB&n>SO=2>FxkPJpey?}tDERE&KPD=HUk%QkF%i| z)L?Ua90Pbb3*-H`iijT^4b1K`9*<*e0(Dy{y{vlfxaMT;cT=b$;5lczAhm zW#{31=YZTfuzW-A9L%>LmD`W%6$!y`9pNe-S)1`0DAOqitdDf5WPQRLDubweK_;ox zsN7uQjK9Tv{~Ja_zPrKsZW~tb<<}S9Sz_hwujn~sVqv%NE>u_StRS#6ibLg7#gR$l zo>Cl%IB1U(iY*0tlVXPePVAcEAwhKbjA?2{v5$k&l?gkAn~240(1rByK71AlTAZ>0 z?LUl^XjY%Ljn!E09cV4gh`M{NY+Z8BT^#)}xglzWYy?bTv-lyI} zE8atS?+;|}43jAe!WfAlvK+RRy zjsVw1<#_yzO*On+{ptq*0|IKdC{o3X0Kq3r(*?>qNB#| zPAc66$~Q;;3e-l)d_SWGCG%aN_Dbfv2y&SQ&2DWdHEexOioP~hIB)gP_$`0|Halqu zi|8O^r$M>aL9m~0DN+C)9UzZIP~PC}$uaVfM&ZK}c$MPQ1#3mS7ddo1%)cZuwBjj&0GVDNFGwA8Ey>;v<$VACcuy6ssuF;vrX&x=c;f zjm(a0XkaDqAWYgbVS+|l*?1S8!2siK1ZXeN3wQY}Lsgo*sqn2UV-xsIYf zqBx49gH(v_r$f4aU8thJBBbxv)0nRd8u|_7*BCPSbTkz*_ghFwMbH|u_1hr6jWh6u zo0WOuoRPP4CT))2$eG_a^jBFZ{KswQY&@+kZ_1-mTqR$@Gd$zhY4Yrl=inVhc~!h_ zC8P$&#OeH2j)7XMIS0>h)x2Xxo7TOr?_a01sSt8)CB13Zwv^CX547yO9@<&lChYeH zzJ}Wff13cNh16N5r~Z15h1S*Ts8bZz`aZ>N=51e8P}EoOr#$_2WVDN6OM4a*xuK7sXZDM z+hfr{P;3{)K*&8dl`(n9xbZ>_&?AIzKxCH6R#E2ag_BFBX35l?W*l?ow8^1K-y6R> zo@kvLzcVvGBbhf~;fW`r_>E}7Rqx7JE^y|VRU%6d`KU;Gv- zpuW_RA$##Z;rUWGj9K{PBH3SMOi?o9V#WeP>Jhx*VLoH!yispHE(WG~CGOKJ$RxxO zB0EEXHTe-7+i_U2g#&g*AE%B}3%VO$&KRPa^?g%~#|_2nS8RTZ;`G`TUJCJ3>d$pw zK($|~&G-o&H{4VY#JIi?S9TYnY0&2Xl;Vt99H6ORL9=4k6E`Hay;W;0hK=jj0+nma z9_XKGZFHfP=C#pYt^MJDwYFN~71|ktl%+6#f2_G+*VdA?Qqzovb#3kZ{YC@MXwABW zl%|)tGx!v=LvymKlTDS(b`hmqKiXhcZN+scYiunPSDn4FVrnHVCXEx`ErD56`+d4e z-6+|ALS+e6k}JL*rfCZLgQc1=BjaB*Zr`hi#^O|kl>W_^z!;Lo!mJcx z#gvu4i8v$nV*Fft*Fa;n8G+SdD%4qs71v(U4`oELEmg%Oxwfo<;%aSK(9M5ESRTf^ zAF4Pu!uv+y_Vwar4_AKMF44)dqBj~4HTUwZSU})`1Bi;`J}tiVbLW5l>W8~L!upUb zfyK*Py%@UK?qD!70T*jH78>FOyjxY)LUgljp+MNhhP;!`04R zwCv=0aw|@+S2=}fMDUHc74FBy4o9NwbaSClxgv37hJc|6v(v^L%+?h!3g_ev@uW#P zFazribLae&ZiXolmjDd`3#u5-c%H(GB$QEXd96uiL@fkhNn83h@ zWGZ0UKmg0eD4^si;58&PW3V0rWHmB$RK)Fz>k)X+b(qC?pveK?Rd^3s{K7WKMKtlh z0@nt-)h61&WQ~-?u|UsTXSJKFZcc8z}E+!y!G_fug^(+?@FG*RG(Km?oAyUT5=3YQr0k}22h zNaaz|_WFfe^8<;Vr1!zuC3~A>Z(DXWC0g%SCh4CWJ~w=3`k6^`?1y)Wgo_6s^*%hT z#Fcyq(@gkr;3@reP-4RP_V!EH2U5K^a!KC*jK8_W-<0`aiqFAdFAu=z0XBBBe!)9` zB+&(pII?7GlWcA2nvDtX{UgclpP%^r#Al~|c510+r=+~QCZ&E(;{B=qVF}{!M#i2+ zqDzse92rkV#+6LDW@n1PYlrLRV)cWNhSIonZAkKQscXEn3-HFJQ21F$SPBVpD4GgI zl}we=f6F>?RV!;-sKu>e8|8&CAN1fR%NWGKaL8LE*CKCAxcAd=CVfPe;n1llfaRZ? zMDE4ZL;4Gdf2F2pba9Y|Io&kKHC)BC4Gy^mio#q@KMhjbOhvpRsdDLgdicwg$N**G zbj7m5G~n7mS^z$U)G!U)8`irPZX8h{BLpU;Q7%DTms}K zg_C;|#4|>(B0-w`!H75ddS}KG3yZg6JP*sTf}b1{P}X+ph3$ygMlgJlh`@;%dRW9o zsX5)%miq%ixQc|=q4)&+i61~POZ~Hhs;Yq#V&}iP{P2ckKP1}^rR;}hPp0+Ox!Z}} zJMSg8$W>cq{npu&%jT*D`i^C`J8i4Hcj)e+gi@i=m_z*u zt=;Ew&|VGeI0MMTdcW?WsaUy4gFGMA?AdsU4hwhN3{ZF_46T*uIbBr4_eSwz&pn_$ zTf>gz%$!9lnHHZ4oE2MDoL6#ES>sk1DO_lj;EtOFSJa+w0Y%+Z+XiD}eP}DerOLq4 ztH9FH>H=Lj9V?1yB{e_i_o#dqMi-4qr%2_y7{ShJL4MV+(^e`F-d53u-Gbp?>oTd}3 z$VX8#S~wrJvq#uIA;u#`LbCWe0c~zKD13<~SRvzPFA0Osz)H+eSL7}i2;t^739Oipn=Zaaw2f~f$%Vl zP8fR6#&DQnfVJY@IZ(l(a0b_HgyR&+vY@k8D}i$sZ&>(2+F=~_AjnyTOSFJfn6fVR zoOd!1iiH%ixrqgQGEE}e785~23Qn;d?%g7fly?L0N;Wcbpe+>5WAqK^RV0mpqFro{V)Y3VH>UtyM)mu0FNS(GC>x zha<30MnWtI;vfXWesZyAIzSqSfer$dFo@;hsAhOZJIi5pz61*wW1ld^dRf?)!ytX0 z=~RicZM=Kf&F*&Zfqm*{ry?;H*Q*x{AH&draoF7gd)|8Cm=%2Xldd(b?m$%v`^VlSR-yVA!iNB_;|i~MUfXp)D?BHCxu9e1;z`8YdjzZh5`fv&NtXD`J)lc8@Z#g zu&{bgoHzlq;{`T`Sk^>TnJCp<5F*eI5jL}n?ReeA?%Tb4*S-$uDNZOpBhl!X*wx<7 z`*?S30#y*fNW|^)w*S%e{@oMM>-MqOP%z*lD=}|g;ruR@!11?xz<|`T+uhNz`(S%t z&jmO1_jUJyj)MpGbqo?P!3DPm;bgztbcV=D%7BM*X9yf1io)gkck~UX?gd`5FYsPd zSY2H~Kk?#!fLZ1I3krlNC#^8G#{kZ0Pz*>1A)YZRhmUXs2^_^z2T@dH41`A(UPB;* z5Q&93VGF`+#tiyo#_Am(_TWbBl_aedJx z+xMsJ`=8m5F4>PhzV)l|UyeWV{$lFusi&<^d!-A#vi)Mpe({<8@{;|sbmgjSzm~FJ z1M%j_(O+Hu<>g;o{rc)t`j-Qb2mXOQD6t3s;q5_5zcJmgTWZ+-sN*sHWZTn=Qr{J+ z;i}wlH3gx*cJ}m~Kg~4Uo4Gra=#`n46w@+$I&ER@t-rf|q4&KGZ{Uqu{{ya?TjzJoQGS}MdEee|1qpC{?XZfv-tD_;uLls7boN3Ru6! zj2Uk@bPMo~0bhQMXg={g_Q6fY3urYb3U;tri!rz}h-AL%5vZ_*xg#^DVI?OH?*%@O z4-y?baxN&_D~wr=<(eE+xIKln%4JUF`MMu~RYXoJ7FKN19NQLf^KOOt?D9sVLf4>b zxF4i z*d|~y{E!?9L>J4ZaTwEj2Zx8^+BSE6BROUvpbSWA>qJ1o|f@%{Kp$+5) z&MUwmgZ+qbe=JBRrAk%}Hb^7KBD~)p@CAUIg{2$>Zjy-SzK;AE>>YyHBnB=b*r=Na zg_3f}ov%VJ2Rj|=+UFf<#B`@H{|3}RLrSgG~ppjSl>Ybh1cd(?t+W_{};!;pE&k%X$(vU#2TQA8pJL@gbhU+*#kVJuor{< z7#x70PZ4JJVVWQl0j5BK$S$Z6RfHPUc%Jv6|6u+>k*)&%zD+~$XLPXztXLPCM63OZ zrx*sb<;FPW0WGBRfKHV1Abs+Hm1;E|J-xb)9&KqJY@dQ#0V!3d8V9dBk5Mf%K&qg# zHbF}5BM088PQ&?vs4M&k(1dOXoEESM#()RNm_1-P4@5m4WuVKBBua*Wr5E|2e+}cE zv3NYbAZQXi9uXDYxV7<_C9Q#UDfPJFxB4QkJSarg_sdbK{b^ zQ8qWH%*~Ry8BDUxTjuohRS?;*eNLY;H)hS0V?8nVv}z3VeNRTEt$oU?TI16j+7P8F zy{;B$nDRf(uR)4QD?fA097E)pf{5%>ClVTmUhHJyEJCDs*}$a3)M*?Xse;KA@>MDj zFHA3WQ=ME67R}AV#kict>oV%wX z1hY4lgp8FBS7Pvt>yj&hAg2^=lt78DvUE#Kux06%z$tWvgIY5=nCHv1{tIZuS898a zJt$XCDI_?t7oGgzWLv=*i1Q7&zvp7Wq9j%7+DPfABr_(h(O!FVfxCeboY+kd&0oOm z6pXzy<}w)Q?7KRKC2r2O%Nn=jV#JV)&O9@fW$1y+5caAZ)wmUJycv5LZ2A@0HiBJM z1{+M>*tW8DR}yT7v&0z!XPkyXj@#o^v~+k^ukDhY&vlvv|7@=H<(5qFR=sE$`B zRZgL3zR)?LXufCG#T{#gb;JSMkKavIdJVv{9`go$F_ezi!V;j+eT@kJ@1W%xT!o3@ zA;%XK^90|IA1KJ~-|xaN;3*n|u2r0`G%#zoQo2C($+OoxZfLA!-@{zF8am$IKeXNQ zB-nm6tW3dHcS*I?t>VTY)WZrBjzjQW_Zf{*479x4iDfn3rt1NE?ilbJS8@G^^$Gfe zru$8^Z#}nF&L3FlPWTsl!K}1HvYnG{=Tf$Fv&YMqELS=fTJM~iJ(1Q%_S`u$d*V05 zjhaq$e5_AidDQ***3)K*>64kh6w@bN9gvv;Fgtw@nC_1aN&lnXC-l=D5_4H*E~l8w z(zP2hb7S^2@IJPx1@j$uqEohROtvjvlQ-{utdlo)$(BR2Zz{!xJ3A9P+1i|lC1W4Y zEDCZ2Qgv`8_VqQawneAS~-G+s0a}Ba(Q_2EXv$U;d z;o{t3N#CHD!r+onAC+=5!4b-J zaqHTkWI5vujz8EGYbvjCHay+HP~0dh$yZc$1JY-5?VS66P9flWkeO=a-v%i>iN zyyh}^4Q27x5xn(f@EXhF)eyW5QDANJk4Dk8A->)ZUj%~xD6j^auL?GnfnOJ|SrvX$ zd?TC!Fvf9Hyf(jL;!Uf6yH|QT#+%m$hcl(6PZ;`^_@-RVMorC*wNV5nAMc12PiYHD z%^LcoX84*u-hzFdfml-2{%VPFRWP!}!a{4jWzDdg&rm?bT{xWsHbJ9T6Z1&;H`SgKL}+gq0|SFhHL> zr_P;op>r$HC<_Q-42UVGiohr|evENT3AZnZkejI(i%ck*HUoH*`bLFzq(B6KEoB7& zVH|;?7{ri>aT5MV25?M+Uv%Tm7^iaSDJ5+V#X6K65+g7W>s0ePb9e|R-pUxU41Oyy zt-`4(f?tUmU(gK_6?{fNiSYU<49%0CT>T_Qpe;oBAjFlZLSMW(y90^VJDzz@V)E1YC-KGZ zkKcRno@{qx17BKNvpz8**KAvCd~{lN9+qnk&z?=MlDAw_KYMoBR(-GQZr6hM&Jp-< zao?w1pTNh9A0K&eM7Fif9#6l#l&MP?;k&8&#ciLp$jok#bynp3#!(F)o7So#m*cYR zxSVo;$n~!5cz3qv`K!unYT+x`(~DOgcR#r$^;T& zHJUiQxIt>(_xMez>J7Q-ja1bev!|YyD_E|oOVD>l6K^Ir$QyRaRl6QJpLL#C>OApe zNbc-Ob@n{%d3M&bbk-vc4#{VIsk1)0(F@5*O9sWTpVk4GBfq+wAW66HOj z%tUcS8xq@OhMhgVthiz(8|P+ZW=D$Ip*mwG8t2x_mdz>4W)MJ&MN6?@edL5;E>!O?UM{*FfYk`FX30_xKRGu@Zi#At`MXS2&XRv=szoaE zrm5{x@tZZ;=}YicErxTk`M45WtWbU**L@A9jsiqc{55K`Grg^2*|j@sYoqbgTny9p z>V>WMT`7A@wgS`3Ozpy@#Fm9?^Y3L11Ps3Du3z92mluL5$Cj*#$}l+l{C6A(gOsUsI&p&7b4S(ByUXYn%mc@+C==mMlmgB?_3bGmtB`nB!p zy7g#zYgnIl)WVys>!xY=WFEt`sq+1JwgRKeBx)dKMrvJM)w+}x&Qhy~bFpto$vinyZ|YL<}xHGlw-I+U7LhGe7tFT#VXga7~l literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/__pycache__/synthesis.cpython-311.pyc b/TTS/tts/utils/__pycache__/synthesis.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd8e2071d0485d83c193b1b774589dea141e8b60 GIT binary patch literal 12714 zcmd^FTWlLwdLG{I*BMHpWH}?rH^!D^J8|yL)|Mg4gXyQ_mbzb2cGru-dm2#Uij~W|Gofa{_$zvFL)OHW(Ryg za4!a|UqK)IZ@lMnBbU;^YL4r%V9ooi@F@)lMqLtXBv8r!G8w+$N3cG zCd^S7|M`Bt2c9K6bNK;Jd`_8#gi{o!bXTsloO7FFhW{S; zy5RfuBt#!ms#9|EjU!Y(3~JuT+9Z_SsvCNC&Fb#l zGS6k4x`$U*OM>p@g_V3xzzn4*IXdZi(k3qCXHGH8a;+AwjJma$;a4UL9=9WqYAuDl; zpa+&k`De?5a6j5@t8GsbUJ`=g(QfXNcvPM-XAq_%4&UcxI zs7r_2@st)HYWRa&{`96ly*~VR2mkh9HUIGB7qjaJ>-`g2|3uwCsre@xu~gIJN_c-m zLG%qq|LLWC@elKN9XAWN3QZs7i8lRII8}2qQXEI(Sps7#=-#{tiI1N)2OW!+ z&Upv#{7YxrtWVi>`4lVaupxW-vQG(FO;IYWjdlx?A}Y1$HA^g7BQN_!A3W*!?W9yc ztQ0=gsd`kG>Q=qH`xEa2kG;KeKn=jU;pK+lh0ZUFEXR_W$r9R9JbrR4PgedzDji3mt`mL0m3QNb~Y*B!$0>#tw+$*i4aME(n>|*ozB7 zD_beD5_H_|#;RthmOVg5G-en&RGmhRv&I6&evnrd*xUjqaXCeR!SeE$RljW#?18+f z9Ab+lC0`V|g4IZ?1Xd&Oy0lzS@}p)h8MZveP6~4zJdi;&>}*jKI(i@z2xftOkhY#K zv}dKvF3SQ>dRi&Ms?4H|=EZsTS$&wc`;0F5QNz5S`VOdso%$f6I!|Unhq`drdab3M z;wQ$G$lVM{F zaLd=TM9&#NS=FQn7W2pp9Bf`r&^+0Wd<&e+DT*W=z^4JrRx&|pH{?jqVZ>sDbDwl2 z4P$BqB0WMnutw?0&Mk{M-Pd|G8K*o7$Oy(d7Xy>b68*P!D%_w3;4A+EqBZK<=c!cx zy|=49pYGb=9y&I1+R)KD^Mb~_u*JNz$-MM%<%@EiIi)eD)@HVS(aQ4Og}U!K&G%fB za(UQBJasRu#rIV_70+Wx$10vil&%O3fAn#BUt?fzBR$ke4>$VvG(NqCRzyL`8YW64t$QcV%}!09mR^AY$v8Z;S0Z@nIQ$~f zX?!*d5$?m0Tk4iCz3EHWeFK_rU|p^G4#KAqjc-NS%_v*ltqmQiM~`aJqZL=h^%(O< zHlrieS#8gY_2@AzdaU9CwXL0bhH@&<87Rj=psQ1*Zod8$pw(q&WlHG)Lv3Yro8way z;p|cxm^cwX0Wkn=oMK7$!J0v^EkR4cISCOe;!kND2Ay%~Ub6V`ZQvuLO_);ELwUZ9 zR8g|$;45RI3*~^QYrgb$g1*9G1mOFd7Dx@sX15QY`(^RH@Q5vu?s_%6j&5v@NF z)UHxxL%l8o5gQ;lAWc9$X%Zt;uyhI{-HlF??MCCxxY} z0?)&K+*bQpDEcmZd#|l8)am^iy}#n$ru#H{r21B!9@Xg4E&Avt zee{>dv==7p^eK%#Rq;3Ifh~GulOCzldo_CR7CpX6k3V$O>7yEbbc;T=NgsPSSEr9_ z^l>O0qSwwDX40V=3dK*B#z&C&n3|``?oRXQo8>;*{ZX@gbO)pjgz zfWmFRL8<^pEi-_pUI3Eaj;CLBKLzCad4Qjt+lk$f9Mqk+iPP%j{tT@BM5$C*W#^^* z9BekDh5QntErA1UY4|T}9`GHfAX-uuq~h}Y0$@M@k#l)T23VS30zU}AodHzr@YBSR z27+V((~70B6Sy=05S$V?Z~_3-LU&gMHdho^1W5suc6F6K{q`6;WpAI&LrLh?fW315 zMh6d*s-r8lPp=0*y5b|RxRk%hLpxC&mKS!hB*1+1su~{e!g9VE3pSoEc5x| zJ5n(x$mq%&1v$)QOJ=T7V-_})fB*|0icipk3&m0k3Xv@XR>4vUw)RYbpeQ}qehcXY zn3u8nxM9PWbyuFhuDfvy(|wSE%^2Gef;3n+jnMKGq1nP}wx!T4C{(%xrE>6Z#ueV%Y7u?Glzls*CK0A>LUItInP02e@`%_V?jy1nCukIMytej;NlS z<2Gl|qlC?}0Cr$Lw}}VIrFzO?u~$Jtr4_*KquPX-8hYUFqKuwCGEdjYUV@RKyhxD7 zVPvDs7tge)D}g|MNk+HI?l>#J+P6F*Wubdx$A7w?_cPB-n=A^RbQ|lQ6GoHBs%DH_xb>SY*wo zyw8}I(qYvnjVg%xT0u?NR=93tLaTl$r64Rc%hR3ZyJikXDl*NjMt##9d}o z2E2*{cOO8|g%*!}LtqW=2Dvdr0`vJ5K?FMLx&rJD!jsc24D&W%n3ZA+Ko_rBnLLfmN zBG55Od3YHEGa!xDY91?)w>cmy04yvv&1{I^1EU1A)WS=o(UKtKI$JTJ%tY~Lr96Nc zTT?Sg-?pI5G6FlqZdM84$+CEm!5+9;ECOF=J-|_%61l4dVVuv)_@AA)GR%5IcvI z{rg>GdWQoJ6Rn9*`5?-1z=s?Q2ulDXTWXVrhV>W_*dPyuj`3dQ(0Ga>d-}pfY;J-p zak)Ga5pC#*!|SFq53x%ECmOACN=G%rUS!}{3k4Ieq8Cl%Rg&`12j~DupQ~`x1O2w_ z7W=?3FNg$kvJxlqPzy@2lE4+9NQhY*T9I>7o=~_1K%0jbg!OE{ zY-`ld)Q~_dC@U1@>{a+8e+-y?2U9Z&vIu$e9GEL9h%+A$GZ};+g4S{{jo`Pea6pJ7 zwr}Oy$1ueeWbhU&Q|93mtb;Jyv41SUwiq>*n?s7*oV#7b^b@VOjy&wNahK)&&%`m9 zKX9>h-3J&A+S=^>n^25~P-FQN!Wbhn6SZNU$=RXdU?$0mZ&Q;VP;kxz7b@s+bIjQ#PK4t-Ne_z93lcSOkmmtzU9G`_|CfD+{cW=2}F*@IY^HVu5}&u@Hq z5^S`(1%qN2713GF_SevM;ld zd3foIcOT8w`ey2VGg{wFW%{wvW(|hm8_|5g5;#3$8(!_e36gr0uT8zT&FqD<<ddIdj8+1VnZC7|hA&z3?QWRU3?^H3uQvRO zHaxL4JiR$QT^~NH4WEVVI(uK>9hOWu9ucKJ@Z>e!48)r`-`bUiktk#wVvttLJR zRU+SdDWCW;Z!a4dOh{JmU_Jse2iBifN9)YU3j=squ;F4Z))_L75`)M0^2=9)q@Y` zYCXs6J;$}42G_;)__@p0?q+(m~gPZY#8)qI3 z)Zz#0@fj^Xv)$8o4~NoapuuVEj?RL&uZz}%9%!b zWGg+onI7HX>gmH;`f%mUmr1rd06i~0>Zv8)swdyll5ee@Z6pV`lKVH4pp<&@c`f<; z+SxCogCMVlpKkb*4SxdTM7qH|*Npk`)C;0-FoILBB$e8=eokW!feM+S>X63l(U{B@ zGrGx))|qjQ8Q)@#Z8FE|%*z_{G89Vmt?z=vrnNI*puh%AK&>q%v&m%Y%t4Jgh?c-; zzV!N!H+{R&B0~LOsez;0$)V~f+S9LIswYos$&;0d$FbdD?vbP0J;T)(Harjf^`0YI z&yl;X-#(5G01*{SHhS1b?{I_eZw5THw@E<+str@&-kQ6|prJac5OmRCqETPt>ioY- zLIqp*iGqYSzdn^_Zf@^eMBP|6qXwPE)*5egW*NISwn(JL4m6q09UueRR+htU#v!s3P( zI0r6LXF=ms0?-1|0xjmj@VSlC4hwYu@NTbp-{S}HINCCMwiwQvvSly)5qN<1sJ@eE z+JpPUxMlx$onC~qhSNvjI| zu*-H_nv{^hmJVY?t{?SV*N?J<3Cy1m@x~vL0z02CM~! zYcPxtnD&FV9qY+DK zIQ_`;Z$a%(ep=goeO>ruQQQ4S4L*Ec;|m&pz3HL?{lB4{f#5e7HN8~42hdguP$ER% zV6-;#Z4}U={od7)*1fBjTlcQC_+hws#iZ|^S|8o$fB0_gM;B_B=IhLY#w@JO{K2uJ zb+abpoFdL;#xsi{Tx)6=k2BkA9$$*)AV7=PsLMr)MfzFLCg(d zDon+gJB4Nh(@`qfdvBM~V4U&>n+eRtraPi{;4KdT<~Jk1#{nL%6~*muKwb!5gTdG6 zjC!Juc$2cCLsXn;M3Zp49qbQ3kwhcTG<)B2I(nPb6Y&){mWMLw!3KzfRB5K(b>L8* I6%#G~H^Zj@*#H0l literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/__pycache__/visual.cpython-311.pyc b/TTS/tts/utils/__pycache__/visual.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b751b22d2571e41261e9d141582d56e484eed5f4 GIT binary patch literal 10594 zcmeHN-ESLLb{~>M&TvSPq9mH~M=V;7WYd-{OHS-W);6(~b!^9B)V1T(u6JpgGnOb* zBy(rvNZMgQ8(xG-wTQBCuzJ`0+Bgcr#Eq#N?K>#y-|?XyJhjYMOOW{` z#Z$a1M_sW}*AfL0GvcK$x(mhC6< zmP}Rai&JMd)B6xF+mtG8XPYuDJl>`vJKMt3Jo+toO1`{Xrb<59H^tOhBwT=6nDWF;NsiRh)UgF%eyi4|#eA4!MDMd}S)$_9Zk;}<_6?!Z&C01sup*mM{ z3S$n}pLsX$;c3}RLeic(jiQ|Ek%y!A$a^1FZrNVFl3!-3kw%S1PWH>}BcF2z;m|Q9 zPIld9WsYZM*F3P#!wgECV@@6QR%FgOBNyf1>#Xzf{%7)a)%l$BaZ$48c9)~YXq{8u z@Vdyf*y-TiR_EQ&Dg&g{M~4khF((q5@(%~@$s%1%s5sa$%lFeAaZvQiGZ%;XB9kO&z5bS^uQpBD0xL1*(f3I+@7 zmcoT2`>6i^KoyH4c_69#A3N*Nv~B6w^JcXpb*2pJJyp?D*F^ZMYsqCmvK zxwJSr_=X_flnTYcvBLC?xp&VFW@bdGFrCW78jFJ$FJ2gwByn&?%I3tuTUl`?ojVL` z6*9ZFdnLor1NI_b1}H02kD16~;@9u~>Z-~d(3k`3l*iknM|ZDA`&OcTYIL6#-M1P& zxDq|6Mh|JxL#xrFE77BB^kpsj@@n)4E72dQ(bu%-YZb1-twncNIDL3j8$R`S*Z+2E zWq4c}9$(zC8tGk$^nN+~#fTa?tVIqh@Vj)aeC}~5vXHC3qJ#$2(0~>ixcz2%tkR+T z1D|gHWV_CGECiP>J$UnPjwwGDl<~s9jK8OjU)IJiEAYEnSocs|=kF;u=ld^=%BR+S zl$ZVJ{GIcQ9ZT-=d6nL&(K{7-r{306KCLqWg^B5GXt96krOytjY){4am}c&ttK3rQ zZ5q8zp|_dON_dY-_iA*nLiawQ8IA6;%XKEGFx`*Yz=D6VPi4C`wp(Gl^+0r;a(TN< z`p1EVKqa6DLgjN`!3tBEW=KcyF$Kvu0NfuXNXEZ=f}|lnN_3u)spe;yoV705)I%Sm0eX^tkiiim1$><5-q*ZC~pXpjiRXJ zb#U4OQl?SxJfiQzelFXh9bmq>U0b}g0Zi3>IpsY-E5v)y6q7W1o1f>9pVbqNgie}P_5omQ3*@@9EjBo&CH$+B&7ZBM2 zM*!b?km3XMnjr!~$xHTP)Q{1Ah!Q>{U`h#6m>}tC5C%nZ5DNz&GJ;J-DKZEo(Fc)* zdSnP$+D*`E;v*-3Y$D`;LdJrvgTJ2nm8>!Y8Z&^f)u+dLS7V8lSVD~*)M5u$V}mQP zK{Ym{#fDa6$5&#<)!3_A?A6uS$(7hiH8!TE9h!=o2b^LZeSW2Fd4dC+{Z9Nys`-W(zzaM`7Ba zrQ%)?)Y&q&G6N4bww!{~*n%yt=TcI~;q9?cGPGz1=V|-DR8pI;#0%KsYJQfz&y6kK zZ-p)1l2>*Ewz#*z7HOEsBdr>(C)fe`)8Kpq6wo#{e~S?WHjKdcto1b2;AS*^wi+Cq|JQL)*y6>kl$nfYX2>l;j2H58P`=`Y z8*yn;h&N%=VU9!WI!PwP5i^HRR@MFa;^8zQ>AA#6eB5m8RysE$h=*;yrgq}Z+!s*E zw%{{fuVYraFqtk2tsRZTN2b%#gLKb=-hQw4^ZNWia zhK(_3RM`zbjxKeB2=D)gwOd`U8y+I?58Z}8m(2?wWO->4G-6bs4fp9IiP*OmG~^ic zWC<9SWX97VK132n8p;`i69p+%Q`60SO;z_wA7t~h290vV5<#5dHTQtLf|Xyz$QD6l z1ltN0ay)A1md=iH$Wq2V>Dgfsl@#1ZD_ByAGo_N%>Nu3+N{QD@HTvsritSkZ&Pk*v zo!jn>D)A#)=MW05u)^%r6GK1uECdzM?%Dn)sDpo;U&vd^_Zsc{=jiTLj-VE% zY&~DX*QgexCT&!1F>*N3%lT9SSyy744NVQnCo?kJYMcPmjIC$a`^I#m?F=|=!;XWq zMMDPU-@@GETniJ=n`Yc-6Y$*2`yMfy;Q8hwV4Z6KBcPwPQ>J#@yjzXi0Ssc<9tgC9 z+OdEBAN~-YXY+7Zk7p(Ua57-e0E9MC^p68|Y(|Xl7bOC?KW4WH@*`f%r85F%P>9SP zO3!BNT|F5;w)&!+Vy;`JKW%p|A z-ttMkJz73%sp9W1CRBQtM(2Ab8+8mX>%*7czO6=1X4Zs?`9%4#7S`4KENxqy)q(F~te zlU#(d1V?^?ESE7lh!NTrh!c&-6{P(+Mq46L*i`D$fJC*{Q^5Wq{QtjzqDxESgFaNS zj%f4|g+5YOuwH4P+;b~fU|v8);whShGR54{pW_CQV!IBd^Nd%@r@%-7@PCaBiyh$e|Wax72h! zZf&P!UiJ<(`*$Ri@3qic-`Q8QiD3gL>`cRh+0t*g-o8^AM5Xcej~uJ%+~2lm?yqg+ z*m5>4=80#_rj_$qvvH2mvtf+A_1VHnvdt!&8zsO*WuHMfDYij`4>hhKvIiX3?T*q& zQ)zRpjt2cv=Y2SL%l6r8*^$Nx@7nCtI`*V-!lQqX*FKA_JMCz3>+`_e{%Z%PbFJOq zYOR^?*LvALn(*$iP(3;yf?#cmujF#AigvX7ll6K@zSP3OB{W$*1@ZJw>=Dcvelf##6-ehHoR` zY4|n>0>^yB>1j?+7r_P1-YDcHG;YmDoGxc5eu`%ioD`7F2`Q3>E8pu#@u67@T*2(L zkW1Z|2LE;~m!1WuhFc^CH(4mA;KnoomM`CI3NLWrFt$?KtO0QvOi>rng62LmpXj#k zV8^k2rsjsWye0@Prr($JVPO!KBUPLPKRdX~jesP~N-3$3%JT345$2HwhAje4cj(o+ zvR%Sf-opmyTo&$}d+`}BZU&RfUMB@HZ7_URgq!CaxWU1qXS%<==@~v-Fnu;Ks`M=Z zEMeA7FH90xIaDl&QXb50mLD5Tg3vd~W07y{o#g4WZyJ87a8t;mM+lY@jH56@covNi zZ#n+!RRc9wcoA%r1homv5yXNwKZWX-5i%X_srrVQ_esMe3h#@EE_Lm*mRiO*zvbe_ z&i?{<{u2Jhe}i2uQ|r4acALWN(L1{;Y=vFJWGxy4UyJFVc~9Z`RIX3s`pRSAEsz!qp51dKy3DlIp!LL&Bb|boNIU?yvoK`*tj0tp#^ur z@D}8yx3%yAJqE{NyWY7|kHqfnTAI=#iAD~_>fZxn^>Z**KL=y=b6=>k8K6aIw!0?HBCMHj>{KbK3V zLS^$$ry@%Ir&B$O^QTiCO8uu(!Ls?MQ+ 0 else max_len + return np.stack([_pad_tensor(x, pad_len) for x in inputs]) + + +def _pad_stop_target(x: np.ndarray, length: int, pad_val=1) -> np.ndarray: + """Pad stop target array. + + Args: + x (np.ndarray): Stop target array. + length (int): Length after padding. + pad_val (int, optional): Padding value. Defaults to 1. + + Returns: + np.ndarray: Padded stop target array. + """ + assert x.ndim == 1 + return np.pad(x, (0, length - x.shape[0]), mode="constant", constant_values=pad_val) + + +def prepare_stop_target(inputs, out_steps): + """Pad row vectors with 1.""" + max_len = max((x.shape[0] for x in inputs)) + remainder = max_len % out_steps + pad_len = max_len + (out_steps - remainder) if remainder > 0 else max_len + return np.stack([_pad_stop_target(x, pad_len) for x in inputs]) + + +def pad_per_step(inputs, pad_len): + return np.pad(inputs, [[0, 0], [0, 0], [0, pad_len]], mode="constant", constant_values=0.0) + + +def get_length_balancer_weights(items: list, num_buckets=10): + # get all durations + audio_lengths = np.array([item["audio_length"] for item in items]) + # create the $num_buckets buckets classes based in the dataset max and min length + max_length = int(max(audio_lengths)) + min_length = int(min(audio_lengths)) + step = int((max_length - min_length) / num_buckets) + 1 + buckets_classes = [i + step for i in range(min_length, (max_length - step) + num_buckets + 1, step)] + # add each sample in their respective length bucket + buckets_names = np.array( + [buckets_classes[bisect.bisect_left(buckets_classes, item["audio_length"])] for item in items] + ) + # count and compute the weights_bucket for each sample + unique_buckets_names = np.unique(buckets_names).tolist() + bucket_ids = [unique_buckets_names.index(l) for l in buckets_names] + bucket_count = np.array([len(np.where(buckets_names == l)[0]) for l in unique_buckets_names]) + weight_bucket = 1.0 / bucket_count + dataset_samples_weight = np.array([weight_bucket[l] for l in bucket_ids]) + # normalize + dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight) + return torch.from_numpy(dataset_samples_weight).float() diff --git a/TTS/tts/utils/fairseq.py b/TTS/tts/utils/fairseq.py new file mode 100644 index 0000000..3d8eec2 --- /dev/null +++ b/TTS/tts/utils/fairseq.py @@ -0,0 +1,48 @@ +import torch + + +def rehash_fairseq_vits_checkpoint(checkpoint_file): + chk = torch.load(checkpoint_file, map_location=torch.device("cpu"))["model"] + new_chk = {} + for k, v in chk.items(): + if "enc_p." in k: + new_chk[k.replace("enc_p.", "text_encoder.")] = v + elif "dec." in k: + new_chk[k.replace("dec.", "waveform_decoder.")] = v + elif "enc_q." in k: + new_chk[k.replace("enc_q.", "posterior_encoder.")] = v + elif "flow.flows.2." in k: + new_chk[k.replace("flow.flows.2.", "flow.flows.1.")] = v + elif "flow.flows.4." in k: + new_chk[k.replace("flow.flows.4.", "flow.flows.2.")] = v + elif "flow.flows.6." in k: + new_chk[k.replace("flow.flows.6.", "flow.flows.3.")] = v + elif "dp.flows.0.m" in k: + new_chk[k.replace("dp.flows.0.m", "duration_predictor.flows.0.translation")] = v + elif "dp.flows.0.logs" in k: + new_chk[k.replace("dp.flows.0.logs", "duration_predictor.flows.0.log_scale")] = v + elif "dp.flows.1" in k: + new_chk[k.replace("dp.flows.1", "duration_predictor.flows.1")] = v + elif "dp.flows.3" in k: + new_chk[k.replace("dp.flows.3", "duration_predictor.flows.2")] = v + elif "dp.flows.5" in k: + new_chk[k.replace("dp.flows.5", "duration_predictor.flows.3")] = v + elif "dp.flows.7" in k: + new_chk[k.replace("dp.flows.7", "duration_predictor.flows.4")] = v + elif "dp.post_flows.0.m" in k: + new_chk[k.replace("dp.post_flows.0.m", "duration_predictor.post_flows.0.translation")] = v + elif "dp.post_flows.0.logs" in k: + new_chk[k.replace("dp.post_flows.0.logs", "duration_predictor.post_flows.0.log_scale")] = v + elif "dp.post_flows.1" in k: + new_chk[k.replace("dp.post_flows.1", "duration_predictor.post_flows.1")] = v + elif "dp.post_flows.3" in k: + new_chk[k.replace("dp.post_flows.3", "duration_predictor.post_flows.2")] = v + elif "dp.post_flows.5" in k: + new_chk[k.replace("dp.post_flows.5", "duration_predictor.post_flows.3")] = v + elif "dp.post_flows.7" in k: + new_chk[k.replace("dp.post_flows.7", "duration_predictor.post_flows.4")] = v + elif "dp." in k: + new_chk[k.replace("dp.", "duration_predictor.")] = v + else: + new_chk[k] = v + return new_chk diff --git a/TTS/tts/utils/helpers.py b/TTS/tts/utils/helpers.py new file mode 100644 index 0000000..7b37201 --- /dev/null +++ b/TTS/tts/utils/helpers.py @@ -0,0 +1,258 @@ +import numpy as np +import torch +from scipy.stats import betabinom +from torch.nn import functional as F + +try: + from TTS.tts.utils.monotonic_align.core import maximum_path_c + + CYTHON = True +except ModuleNotFoundError: + CYTHON = False + + +class StandardScaler: + """StandardScaler for mean-scale normalization with the given mean and scale values.""" + + def __init__(self, mean: np.ndarray = None, scale: np.ndarray = None) -> None: + self.mean_ = mean + self.scale_ = scale + + def set_stats(self, mean, scale): + self.mean_ = mean + self.scale_ = scale + + def reset_stats(self): + delattr(self, "mean_") + delattr(self, "scale_") + + def transform(self, X): + X = np.asarray(X) + X -= self.mean_ + X /= self.scale_ + return X + + def inverse_transform(self, X): + X = np.asarray(X) + X *= self.scale_ + X += self.mean_ + return X + + +# from https://gist.github.com/jihunchoi/f1434a77df9db1bb337417854b398df1 +def sequence_mask(sequence_length, max_len=None): + """Create a sequence mask for filtering padding in a sequence tensor. + + Args: + sequence_length (torch.tensor): Sequence lengths. + max_len (int, Optional): Maximum sequence length. Defaults to None. + + Shapes: + - mask: :math:`[B, T_max]` + """ + if max_len is None: + max_len = sequence_length.max() + seq_range = torch.arange(max_len, dtype=sequence_length.dtype, device=sequence_length.device) + # B x T_max + return seq_range.unsqueeze(0) < sequence_length.unsqueeze(1) + + +def segment(x: torch.tensor, segment_indices: torch.tensor, segment_size=4, pad_short=False): + """Segment each sample in a batch based on the provided segment indices + + Args: + x (torch.tensor): Input tensor. + segment_indices (torch.tensor): Segment indices. + segment_size (int): Expected output segment size. + pad_short (bool): Pad the end of input tensor with zeros if shorter than the segment size. + """ + # pad the input tensor if it is shorter than the segment size + if pad_short and x.shape[-1] < segment_size: + x = torch.nn.functional.pad(x, (0, segment_size - x.size(2))) + + segments = torch.zeros_like(x[:, :, :segment_size]) + + for i in range(x.size(0)): + index_start = segment_indices[i] + index_end = index_start + segment_size + x_i = x[i] + if pad_short and index_end >= x.size(2): + # pad the sample if it is shorter than the segment size + x_i = torch.nn.functional.pad(x_i, (0, (index_end + 1) - x.size(2))) + segments[i] = x_i[:, index_start:index_end] + return segments + + +def rand_segments( + x: torch.tensor, x_lengths: torch.tensor = None, segment_size=4, let_short_samples=False, pad_short=False +): + """Create random segments based on the input lengths. + + Args: + x (torch.tensor): Input tensor. + x_lengths (torch.tensor): Input lengths. + segment_size (int): Expected output segment size. + let_short_samples (bool): Allow shorter samples than the segment size. + pad_short (bool): Pad the end of input tensor with zeros if shorter than the segment size. + + Shapes: + - x: :math:`[B, C, T]` + - x_lengths: :math:`[B]` + """ + _x_lenghts = x_lengths.clone() + B, _, T = x.size() + if pad_short: + if T < segment_size: + x = torch.nn.functional.pad(x, (0, segment_size - T)) + T = segment_size + if _x_lenghts is None: + _x_lenghts = T + len_diff = _x_lenghts - segment_size + if let_short_samples: + _x_lenghts[len_diff < 0] = segment_size + len_diff = _x_lenghts - segment_size + else: + assert all( + len_diff > 0 + ), f" [!] At least one sample is shorter than the segment size ({segment_size}). \n {_x_lenghts}" + segment_indices = (torch.rand([B]).type_as(x) * (len_diff + 1)).long() + ret = segment(x, segment_indices, segment_size, pad_short=pad_short) + return ret, segment_indices + + +def average_over_durations(values, durs): + """Average values over durations. + + Shapes: + - values: :math:`[B, 1, T_de]` + - durs: :math:`[B, T_en]` + - avg: :math:`[B, 1, T_en]` + """ + durs_cums_ends = torch.cumsum(durs, dim=1).long() + durs_cums_starts = torch.nn.functional.pad(durs_cums_ends[:, :-1], (1, 0)) + values_nonzero_cums = torch.nn.functional.pad(torch.cumsum(values != 0.0, dim=2), (1, 0)) + values_cums = torch.nn.functional.pad(torch.cumsum(values, dim=2), (1, 0)) + + bs, l = durs_cums_ends.size() + n_formants = values.size(1) + dcs = durs_cums_starts[:, None, :].expand(bs, n_formants, l) + dce = durs_cums_ends[:, None, :].expand(bs, n_formants, l) + + values_sums = (torch.gather(values_cums, 2, dce) - torch.gather(values_cums, 2, dcs)).float() + values_nelems = (torch.gather(values_nonzero_cums, 2, dce) - torch.gather(values_nonzero_cums, 2, dcs)).float() + + avg = torch.where(values_nelems == 0.0, values_nelems, values_sums / values_nelems) + return avg + + +def convert_pad_shape(pad_shape): + l = pad_shape[::-1] + pad_shape = [item for sublist in l for item in sublist] + return pad_shape + + +def generate_path(duration, mask): + """ + Shapes: + - duration: :math:`[B, T_en]` + - mask: :math:'[B, T_en, T_de]` + - path: :math:`[B, T_en, T_de]` + """ + b, t_x, t_y = mask.shape + cum_duration = torch.cumsum(duration, 1) + + cum_duration_flat = cum_duration.view(b * t_x) + path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype) + path = path.view(b, t_x, t_y) + path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1] + path = path * mask + return path + + +def maximum_path(value, mask): + if CYTHON: + return maximum_path_cython(value, mask) + return maximum_path_numpy(value, mask) + + +def maximum_path_cython(value, mask): + """Cython optimised version. + Shapes: + - value: :math:`[B, T_en, T_de]` + - mask: :math:`[B, T_en, T_de]` + """ + value = value * mask + device = value.device + dtype = value.dtype + value = value.data.cpu().numpy().astype(np.float32) + path = np.zeros_like(value).astype(np.int32) + mask = mask.data.cpu().numpy() + + t_x_max = mask.sum(1)[:, 0].astype(np.int32) + t_y_max = mask.sum(2)[:, 0].astype(np.int32) + maximum_path_c(path, value, t_x_max, t_y_max) + return torch.from_numpy(path).to(device=device, dtype=dtype) + + +def maximum_path_numpy(value, mask, max_neg_val=None): + """ + Monotonic alignment search algorithm + Numpy-friendly version. It's about 4 times faster than torch version. + value: [b, t_x, t_y] + mask: [b, t_x, t_y] + """ + if max_neg_val is None: + max_neg_val = -np.inf # Patch for Sphinx complaint + value = value * mask + + device = value.device + dtype = value.dtype + value = value.cpu().detach().numpy() + mask = mask.cpu().detach().numpy().astype(bool) + + b, t_x, t_y = value.shape + direction = np.zeros(value.shape, dtype=np.int64) + v = np.zeros((b, t_x), dtype=np.float32) + x_range = np.arange(t_x, dtype=np.float32).reshape(1, -1) + for j in range(t_y): + v0 = np.pad(v, [[0, 0], [1, 0]], mode="constant", constant_values=max_neg_val)[:, :-1] + v1 = v + max_mask = v1 >= v0 + v_max = np.where(max_mask, v1, v0) + direction[:, :, j] = max_mask + + index_mask = x_range <= j + v = np.where(index_mask, v_max + value[:, :, j], max_neg_val) + direction = np.where(mask, direction, 1) + + path = np.zeros(value.shape, dtype=np.float32) + index = mask[:, :, 0].sum(1).astype(np.int64) - 1 + index_range = np.arange(b) + for j in reversed(range(t_y)): + path[index_range, index, j] = 1 + index = index + direction[index_range, index, j] - 1 + path = path * mask.astype(np.float32) + path = torch.from_numpy(path).to(device=device, dtype=dtype) + return path + + +def beta_binomial_prior_distribution(phoneme_count, mel_count, scaling_factor=1.0): + P, M = phoneme_count, mel_count + x = np.arange(0, P) + mel_text_probs = [] + for i in range(1, M + 1): + a, b = scaling_factor * i, scaling_factor * (M + 1 - i) + rv = betabinom(P, a, b) + mel_i_prob = rv.pmf(x) + mel_text_probs.append(mel_i_prob) + return np.array(mel_text_probs) + + +def compute_attn_prior(x_len, y_len, scaling_factor=1.0): + """Compute attention priors for the alignment network.""" + attn_prior = beta_binomial_prior_distribution( + x_len, + y_len, + scaling_factor, + ) + return attn_prior # [y_len, x_len] diff --git a/TTS/tts/utils/languages.py b/TTS/tts/utils/languages.py new file mode 100644 index 0000000..1e1836b --- /dev/null +++ b/TTS/tts/utils/languages.py @@ -0,0 +1,125 @@ +import os +from typing import Any, Dict, List + +import fsspec +import numpy as np +import torch +from coqpit import Coqpit + +from TTS.config import check_config_and_model_args +from TTS.tts.utils.managers import BaseIDManager + + +class LanguageManager(BaseIDManager): + """Manage the languages for multi-lingual 🐸TTS models. Load a datafile and parse the information + in a way that can be queried by language. + + Args: + language_ids_file_path (str, optional): Path to the metafile that maps language names to ids used by + TTS models. Defaults to "". + config (Coqpit, optional): Coqpit config that contains the language information in the datasets filed. + Defaults to None. + + Examples: + >>> manager = LanguageManager(language_ids_file_path=language_ids_file_path) + >>> language_id_mapper = manager.language_ids + """ + + def __init__( + self, + language_ids_file_path: str = "", + config: Coqpit = None, + ): + super().__init__(id_file_path=language_ids_file_path) + + if config: + self.set_language_ids_from_config(config) + + @property + def num_languages(self) -> int: + return len(list(self.name_to_id.keys())) + + @property + def language_names(self) -> List: + return list(self.name_to_id.keys()) + + @staticmethod + def parse_language_ids_from_config(c: Coqpit) -> Dict: + """Set language id from config. + + Args: + c (Coqpit): Config + + Returns: + Tuple[Dict, int]: Language ID mapping and the number of languages. + """ + languages = set({}) + for dataset in c.datasets: + if "language" in dataset: + languages.add(dataset["language"]) + else: + raise ValueError(f"Dataset {dataset['name']} has no language specified.") + return {name: i for i, name in enumerate(sorted(list(languages)))} + + def set_language_ids_from_config(self, c: Coqpit) -> None: + """Set language IDs from config samples. + + Args: + c (Coqpit): Config. + """ + self.name_to_id = self.parse_language_ids_from_config(c) + + @staticmethod + def parse_ids_from_data(items: List, parse_key: str) -> Any: + raise NotImplementedError + + def set_ids_from_data(self, items: List, parse_key: str) -> Any: + raise NotImplementedError + + def save_ids_to_file(self, file_path: str) -> None: + """Save language IDs to a json file. + + Args: + file_path (str): Path to the output file. + """ + self._save_json(file_path, self.name_to_id) + + @staticmethod + def init_from_config(config: Coqpit) -> "LanguageManager": + """Initialize the language manager from a Coqpit config. + + Args: + config (Coqpit): Coqpit config. + """ + language_manager = None + if check_config_and_model_args(config, "use_language_embedding", True): + if config.get("language_ids_file", None): + language_manager = LanguageManager(language_ids_file_path=config.language_ids_file) + language_manager = LanguageManager(config=config) + return language_manager + + +def _set_file_path(path): + """Find the language_ids.json under the given path or the above it. + Intended to band aid the different paths returned in restored and continued training.""" + path_restore = os.path.join(os.path.dirname(path), "language_ids.json") + path_continue = os.path.join(path, "language_ids.json") + fs = fsspec.get_mapper(path).fs + if fs.exists(path_restore): + return path_restore + if fs.exists(path_continue): + return path_continue + return None + + +def get_language_balancer_weights(items: list): + language_names = np.array([item["language"] for item in items]) + unique_language_names = np.unique(language_names).tolist() + language_ids = [unique_language_names.index(l) for l in language_names] + language_count = np.array([len(np.where(language_names == l)[0]) for l in unique_language_names]) + weight_language = 1.0 / language_count + # get weight for each sample + dataset_samples_weight = np.array([weight_language[l] for l in language_ids]) + # normalize + dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight) + return torch.from_numpy(dataset_samples_weight).float() diff --git a/TTS/tts/utils/managers.py b/TTS/tts/utils/managers.py new file mode 100644 index 0000000..1f94c53 --- /dev/null +++ b/TTS/tts/utils/managers.py @@ -0,0 +1,383 @@ +import json +import random +from typing import Any, Dict, List, Tuple, Union + +import fsspec +import numpy as np +import torch + +from TTS.config import load_config +from TTS.encoder.utils.generic_utils import setup_encoder_model +from TTS.utils.audio import AudioProcessor + + +def load_file(path: str): + if path.endswith(".json"): + with fsspec.open(path, "r") as f: + return json.load(f) + elif path.endswith(".pth"): + with fsspec.open(path, "rb") as f: + return torch.load(f, map_location="cpu") + else: + raise ValueError("Unsupported file type") + + +def save_file(obj: Any, path: str): + if path.endswith(".json"): + with fsspec.open(path, "w") as f: + json.dump(obj, f, indent=4) + elif path.endswith(".pth"): + with fsspec.open(path, "wb") as f: + torch.save(obj, f) + else: + raise ValueError("Unsupported file type") + + +class BaseIDManager: + """Base `ID` Manager class. Every new `ID` manager must inherit this. + It defines common `ID` manager specific functions. + """ + + def __init__(self, id_file_path: str = ""): + self.name_to_id = {} + + if id_file_path: + self.load_ids_from_file(id_file_path) + + @staticmethod + def _load_json(json_file_path: str) -> Dict: + with fsspec.open(json_file_path, "r") as f: + return json.load(f) + + @staticmethod + def _save_json(json_file_path: str, data: dict) -> None: + with fsspec.open(json_file_path, "w") as f: + json.dump(data, f, indent=4) + + def set_ids_from_data(self, items: List, parse_key: str) -> None: + """Set IDs from data samples. + + Args: + items (List): Data sampled returned by `load_tts_samples()`. + """ + self.name_to_id = self.parse_ids_from_data(items, parse_key=parse_key) + + def load_ids_from_file(self, file_path: str) -> None: + """Set IDs from a file. + + Args: + file_path (str): Path to the file. + """ + self.name_to_id = load_file(file_path) + + def save_ids_to_file(self, file_path: str) -> None: + """Save IDs to a json file. + + Args: + file_path (str): Path to the output file. + """ + save_file(self.name_to_id, file_path) + + def get_random_id(self) -> Any: + """Get a random embedding. + + Args: + + Returns: + np.ndarray: embedding. + """ + if self.name_to_id: + return self.name_to_id[random.choices(list(self.name_to_id.keys()))[0]] + + return None + + @staticmethod + def parse_ids_from_data(items: List, parse_key: str) -> Tuple[Dict]: + """Parse IDs from data samples retured by `load_tts_samples()`. + + Args: + items (list): Data sampled returned by `load_tts_samples()`. + parse_key (str): The key to being used to parse the data. + Returns: + Tuple[Dict]: speaker IDs. + """ + classes = sorted({item[parse_key] for item in items}) + ids = {name: i for i, name in enumerate(classes)} + return ids + + +class EmbeddingManager(BaseIDManager): + """Base `Embedding` Manager class. Every new `Embedding` manager must inherit this. + It defines common `Embedding` manager specific functions. + + It expects embeddings files in the following format: + + :: + + { + 'audio_file_key':{ + 'name': 'category_name', + 'embedding'[] + }, + ... + } + + `audio_file_key` is a unique key to the audio file in the dataset. It can be the path to the file or any other unique key. + `embedding` is the embedding vector of the audio file. + `name` can be name of the speaker of the audio file. + """ + + def __init__( + self, + embedding_file_path: Union[str, List[str]] = "", + id_file_path: str = "", + encoder_model_path: str = "", + encoder_config_path: str = "", + use_cuda: bool = False, + ): + super().__init__(id_file_path=id_file_path) + + self.embeddings = {} + self.embeddings_by_names = {} + self.clip_ids = [] + self.encoder = None + self.encoder_ap = None + self.use_cuda = use_cuda + + if embedding_file_path: + if isinstance(embedding_file_path, list): + self.load_embeddings_from_list_of_files(embedding_file_path) + else: + self.load_embeddings_from_file(embedding_file_path) + + if encoder_model_path and encoder_config_path: + self.init_encoder(encoder_model_path, encoder_config_path, use_cuda) + + @property + def num_embeddings(self): + """Get number of embeddings.""" + return len(self.embeddings) + + @property + def num_names(self): + """Get number of embeddings.""" + return len(self.embeddings_by_names) + + @property + def embedding_dim(self): + """Dimensionality of embeddings. If embeddings are not loaded, returns zero.""" + if self.embeddings: + return len(self.embeddings[list(self.embeddings.keys())[0]]["embedding"]) + return 0 + + @property + def embedding_names(self): + """Get embedding names.""" + return list(self.embeddings_by_names.keys()) + + def save_embeddings_to_file(self, file_path: str) -> None: + """Save embeddings to a json file. + + Args: + file_path (str): Path to the output file. + """ + save_file(self.embeddings, file_path) + + @staticmethod + def read_embeddings_from_file(file_path: str): + """Load embeddings from a json file. + + Args: + file_path (str): Path to the file. + """ + embeddings = load_file(file_path) + speakers = sorted({x["name"] for x in embeddings.values()}) + name_to_id = {name: i for i, name in enumerate(speakers)} + clip_ids = list(set(sorted(clip_name for clip_name in embeddings.keys()))) + # cache embeddings_by_names for fast inference using a bigger speakers.json + embeddings_by_names = {} + for x in embeddings.values(): + if x["name"] not in embeddings_by_names.keys(): + embeddings_by_names[x["name"]] = [x["embedding"]] + else: + embeddings_by_names[x["name"]].append(x["embedding"]) + return name_to_id, clip_ids, embeddings, embeddings_by_names + + def load_embeddings_from_file(self, file_path: str) -> None: + """Load embeddings from a json file. + + Args: + file_path (str): Path to the target json file. + """ + self.name_to_id, self.clip_ids, self.embeddings, self.embeddings_by_names = self.read_embeddings_from_file( + file_path + ) + + def load_embeddings_from_list_of_files(self, file_paths: List[str]) -> None: + """Load embeddings from a list of json files and don't allow duplicate keys. + + Args: + file_paths (List[str]): List of paths to the target json files. + """ + self.name_to_id = {} + self.clip_ids = [] + self.embeddings_by_names = {} + self.embeddings = {} + for file_path in file_paths: + ids, clip_ids, embeddings, embeddings_by_names = self.read_embeddings_from_file(file_path) + # check colliding keys + duplicates = set(self.embeddings.keys()) & set(embeddings.keys()) + if duplicates: + raise ValueError(f" [!] Duplicate embedding names <{duplicates}> in {file_path}") + # store values + self.name_to_id.update(ids) + self.clip_ids.extend(clip_ids) + self.embeddings_by_names.update(embeddings_by_names) + self.embeddings.update(embeddings) + + # reset name_to_id to get the right speaker ids + self.name_to_id = {name: i for i, name in enumerate(self.name_to_id)} + + def get_embedding_by_clip(self, clip_idx: str) -> List: + """Get embedding by clip ID. + + Args: + clip_idx (str): Target clip ID. + + Returns: + List: embedding as a list. + """ + return self.embeddings[clip_idx]["embedding"] + + def get_embeddings_by_name(self, idx: str) -> List[List]: + """Get all embeddings of a speaker. + + Args: + idx (str): Target name. + + Returns: + List[List]: all the embeddings of the given speaker. + """ + return self.embeddings_by_names[idx] + + def get_embeddings_by_names(self) -> Dict: + """Get all embeddings by names. + + Returns: + Dict: all the embeddings of each speaker. + """ + embeddings_by_names = {} + for x in self.embeddings.values(): + if x["name"] not in embeddings_by_names.keys(): + embeddings_by_names[x["name"]] = [x["embedding"]] + else: + embeddings_by_names[x["name"]].append(x["embedding"]) + return embeddings_by_names + + def get_mean_embedding(self, idx: str, num_samples: int = None, randomize: bool = False) -> np.ndarray: + """Get mean embedding of a idx. + + Args: + idx (str): Target name. + num_samples (int, optional): Number of samples to be averaged. Defaults to None. + randomize (bool, optional): Pick random `num_samples` of embeddings. Defaults to False. + + Returns: + np.ndarray: Mean embedding. + """ + embeddings = self.get_embeddings_by_name(idx) + if num_samples is None: + embeddings = np.stack(embeddings).mean(0) + else: + assert len(embeddings) >= num_samples, f" [!] {idx} has number of samples < {num_samples}" + if randomize: + embeddings = np.stack(random.choices(embeddings, k=num_samples)).mean(0) + else: + embeddings = np.stack(embeddings[:num_samples]).mean(0) + return embeddings + + def get_random_embedding(self) -> Any: + """Get a random embedding. + + Args: + + Returns: + np.ndarray: embedding. + """ + if self.embeddings: + return self.embeddings[random.choices(list(self.embeddings.keys()))[0]]["embedding"] + + return None + + def get_clips(self) -> List: + return sorted(self.embeddings.keys()) + + def init_encoder(self, model_path: str, config_path: str, use_cuda=False) -> None: + """Initialize a speaker encoder model. + + Args: + model_path (str): Model file path. + config_path (str): Model config file path. + use_cuda (bool, optional): Use CUDA. Defaults to False. + """ + self.use_cuda = use_cuda + self.encoder_config = load_config(config_path) + self.encoder = setup_encoder_model(self.encoder_config) + self.encoder_criterion = self.encoder.load_checkpoint( + self.encoder_config, model_path, eval=True, use_cuda=use_cuda, cache=True + ) + self.encoder_ap = AudioProcessor(**self.encoder_config.audio) + + def compute_embedding_from_clip(self, wav_file: Union[str, List[str]]) -> list: + """Compute a embedding from a given audio file. + + Args: + wav_file (Union[str, List[str]]): Target file path. + + Returns: + list: Computed embedding. + """ + + def _compute(wav_file: str): + waveform = self.encoder_ap.load_wav(wav_file, sr=self.encoder_ap.sample_rate) + if not self.encoder_config.model_params.get("use_torch_spec", False): + m_input = self.encoder_ap.melspectrogram(waveform) + m_input = torch.from_numpy(m_input) + else: + m_input = torch.from_numpy(waveform) + + if self.use_cuda: + m_input = m_input.cuda() + m_input = m_input.unsqueeze(0) + embedding = self.encoder.compute_embedding(m_input) + return embedding + + if isinstance(wav_file, list): + # compute the mean embedding + embeddings = None + for wf in wav_file: + embedding = _compute(wf) + if embeddings is None: + embeddings = embedding + else: + embeddings += embedding + return (embeddings / len(wav_file))[0].tolist() + embedding = _compute(wav_file) + return embedding[0].tolist() + + def compute_embeddings(self, feats: Union[torch.Tensor, np.ndarray]) -> List: + """Compute embedding from features. + + Args: + feats (Union[torch.Tensor, np.ndarray]): Input features. + + Returns: + List: computed embedding. + """ + if isinstance(feats, np.ndarray): + feats = torch.from_numpy(feats) + if feats.ndim == 2: + feats = feats.unsqueeze(0) + if self.use_cuda: + feats = feats.cuda() + return self.encoder.compute_embedding(feats) diff --git a/TTS/tts/utils/measures.py b/TTS/tts/utils/measures.py new file mode 100644 index 0000000..90e862e --- /dev/null +++ b/TTS/tts/utils/measures.py @@ -0,0 +1,15 @@ +def alignment_diagonal_score(alignments, binary=False): + """ + Compute how diagonal alignment predictions are. It is useful + to measure the alignment consistency of a model + Args: + alignments (torch.Tensor): batch of alignments. + binary (bool): if True, ignore scores and consider attention + as a binary mask. + Shape: + - alignments : :math:`[B, T_de, T_en]` + """ + maxs = alignments.max(dim=1)[0] + if binary: + maxs[maxs > 0] = 1 + return maxs.mean(dim=1).mean(dim=0).item() diff --git a/TTS/tts/utils/monotonic_align/__init__.py b/TTS/tts/utils/monotonic_align/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/monotonic_align/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ebada3fa0876ded4091b3164d76040250547aae6 GIT binary patch literal 191 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09tD@Z@IIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`+sq*%YSBr~U2KQ})w zza&2|GdVsnCo?@yKR!M)FS8^*Uaz3?7l%!5eoARhs$CH)&] + }, + ... + } + + + 3. Computing the d-vectors by the speaker encoder. It loads the speaker encoder model and + computes the d-vectors for a given clip or speaker. + + Args: + d_vectors_file_path (str, optional): Path to the metafile including x vectors. Defaults to "". + speaker_id_file_path (str, optional): Path to the metafile that maps speaker names to ids used by + TTS models. Defaults to "". + encoder_model_path (str, optional): Path to the speaker encoder model file. Defaults to "". + encoder_config_path (str, optional): Path to the spealer encoder config file. Defaults to "". + + Examples: + >>> # load audio processor and speaker encoder + >>> ap = AudioProcessor(**config.audio) + >>> manager = SpeakerManager(encoder_model_path=encoder_model_path, encoder_config_path=encoder_config_path) + >>> # load a sample audio and compute embedding + >>> waveform = ap.load_wav(sample_wav_path) + >>> mel = ap.melspectrogram(waveform) + >>> d_vector = manager.compute_embeddings(mel.T) + """ + + def __init__( + self, + data_items: List[List[Any]] = None, + d_vectors_file_path: str = "", + speaker_id_file_path: str = "", + encoder_model_path: str = "", + encoder_config_path: str = "", + use_cuda: bool = False, + ): + super().__init__( + embedding_file_path=d_vectors_file_path, + id_file_path=speaker_id_file_path, + encoder_model_path=encoder_model_path, + encoder_config_path=encoder_config_path, + use_cuda=use_cuda, + ) + + if data_items: + self.set_ids_from_data(data_items, parse_key="speaker_name") + + @property + def num_speakers(self): + return len(self.name_to_id) + + @property + def speaker_names(self): + return list(self.name_to_id.keys()) + + def get_speakers(self) -> List: + return self.name_to_id + + @staticmethod + def init_from_config(config: "Coqpit", samples: Union[List[List], List[Dict]] = None) -> "SpeakerManager": + """Initialize a speaker manager from config + + Args: + config (Coqpit): Config object. + samples (Union[List[List], List[Dict]], optional): List of data samples to parse out the speaker names. + Defaults to None. + + Returns: + SpeakerEncoder: Speaker encoder object. + """ + speaker_manager = None + if get_from_config_or_model_args_with_default(config, "use_speaker_embedding", False): + if samples: + speaker_manager = SpeakerManager(data_items=samples) + if get_from_config_or_model_args_with_default(config, "speaker_file", None): + speaker_manager = SpeakerManager( + speaker_id_file_path=get_from_config_or_model_args_with_default(config, "speaker_file", None) + ) + if get_from_config_or_model_args_with_default(config, "speakers_file", None): + speaker_manager = SpeakerManager( + speaker_id_file_path=get_from_config_or_model_args_with_default(config, "speakers_file", None) + ) + + if get_from_config_or_model_args_with_default(config, "use_d_vector_file", False): + speaker_manager = SpeakerManager() + if get_from_config_or_model_args_with_default(config, "d_vector_file", None): + speaker_manager = SpeakerManager( + d_vectors_file_path=get_from_config_or_model_args_with_default(config, "d_vector_file", None) + ) + return speaker_manager + + +def _set_file_path(path): + """Find the speakers.json under the given path or the above it. + Intended to band aid the different paths returned in restored and continued training.""" + path_restore = os.path.join(os.path.dirname(path), "speakers.json") + path_continue = os.path.join(path, "speakers.json") + fs = fsspec.get_mapper(path).fs + if fs.exists(path_restore): + return path_restore + if fs.exists(path_continue): + return path_continue + raise FileNotFoundError(f" [!] `speakers.json` not found in {path}") + + +def load_speaker_mapping(out_path): + """Loads speaker mapping if already present.""" + if os.path.splitext(out_path)[1] == ".json": + json_file = out_path + else: + json_file = _set_file_path(out_path) + with fsspec.open(json_file, "r") as f: + return json.load(f) + + +def save_speaker_mapping(out_path, speaker_mapping): + """Saves speaker mapping if not yet present.""" + if out_path is not None: + speakers_json_path = _set_file_path(out_path) + with fsspec.open(speakers_json_path, "w") as f: + json.dump(speaker_mapping, f, indent=4) + + +def get_speaker_manager(c: Coqpit, data: List = None, restore_path: str = None, out_path: str = None) -> SpeakerManager: + """Initiate a `SpeakerManager` instance by the provided config. + + Args: + c (Coqpit): Model configuration. + restore_path (str): Path to a previous training folder. + data (List): Data samples used in training to infer speakers from. It must be provided if speaker embedding + layers is used. Defaults to None. + out_path (str, optional): Save the generated speaker IDs to a output path. Defaults to None. + + Returns: + SpeakerManager: initialized and ready to use instance. + """ + speaker_manager = SpeakerManager() + if c.use_speaker_embedding: + if data is not None: + speaker_manager.set_ids_from_data(data, parse_key="speaker_name") + if restore_path: + speakers_file = _set_file_path(restore_path) + # restoring speaker manager from a previous run. + if c.use_d_vector_file: + # restore speaker manager with the embedding file + if not os.path.exists(speakers_file): + print("WARNING: speakers.json was not found in restore_path, trying to use CONFIG.d_vector_file") + if not os.path.exists(c.d_vector_file): + raise RuntimeError( + "You must copy the file speakers.json to restore_path, or set a valid file in CONFIG.d_vector_file" + ) + speaker_manager.load_embeddings_from_file(c.d_vector_file) + speaker_manager.load_embeddings_from_file(speakers_file) + elif not c.use_d_vector_file: # restor speaker manager with speaker ID file. + speaker_ids_from_data = speaker_manager.name_to_id + speaker_manager.load_ids_from_file(speakers_file) + assert all( + speaker in speaker_manager.name_to_id for speaker in speaker_ids_from_data + ), " [!] You cannot introduce new speakers to a pre-trained model." + elif c.use_d_vector_file and c.d_vector_file: + # new speaker manager with external speaker embeddings. + speaker_manager.load_embeddings_from_file(c.d_vector_file) + elif c.use_d_vector_file and not c.d_vector_file: + raise "use_d_vector_file is True, so you need pass a external speaker embedding file." + elif c.use_speaker_embedding and "speakers_file" in c and c.speakers_file: + # new speaker manager with speaker IDs file. + speaker_manager.load_ids_from_file(c.speakers_file) + + if speaker_manager.num_speakers > 0: + print( + " > Speaker manager is loaded with {} speakers: {}".format( + speaker_manager.num_speakers, ", ".join(speaker_manager.name_to_id) + ) + ) + + # save file if path is defined + if out_path: + out_file_path = os.path.join(out_path, "speakers.json") + print(f" > Saving `speakers.json` to {out_file_path}.") + if c.use_d_vector_file and c.d_vector_file: + speaker_manager.save_embeddings_to_file(out_file_path) + else: + speaker_manager.save_ids_to_file(out_file_path) + return speaker_manager + + +def get_speaker_balancer_weights(items: list): + speaker_names = np.array([item["speaker_name"] for item in items]) + unique_speaker_names = np.unique(speaker_names).tolist() + speaker_ids = [unique_speaker_names.index(l) for l in speaker_names] + speaker_count = np.array([len(np.where(speaker_names == l)[0]) for l in unique_speaker_names]) + weight_speaker = 1.0 / speaker_count + dataset_samples_weight = np.array([weight_speaker[l] for l in speaker_ids]) + # normalize + dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight) + return torch.from_numpy(dataset_samples_weight).float() diff --git a/TTS/tts/utils/ssim.py b/TTS/tts/utils/ssim.py new file mode 100644 index 0000000..4bc3bef --- /dev/null +++ b/TTS/tts/utils/ssim.py @@ -0,0 +1,383 @@ +# Adopted from https://github.com/photosynthesis-team/piq + +from typing import List, Optional, Tuple, Union + +import torch +import torch.nn.functional as F +from torch.nn.modules.loss import _Loss + + +def _reduce(x: torch.Tensor, reduction: str = "mean") -> torch.Tensor: + r"""Reduce input in batch dimension if needed. + Args: + x: Tensor with shape (N, *). + reduction: Specifies the reduction type: + ``'none'`` | ``'mean'`` | ``'sum'``. Default: ``'mean'`` + """ + if reduction == "none": + return x + if reduction == "mean": + return x.mean(dim=0) + if reduction == "sum": + return x.sum(dim=0) + raise ValueError("Unknown reduction. Expected one of {'none', 'mean', 'sum'}") + + +def _validate_input( + tensors: List[torch.Tensor], + dim_range: Tuple[int, int] = (0, -1), + data_range: Tuple[float, float] = (0.0, -1.0), + # size_dim_range: Tuple[float, float] = (0., -1.), + size_range: Optional[Tuple[int, int]] = None, +) -> None: + r"""Check that input(-s) satisfies the requirements + Args: + tensors: Tensors to check + dim_range: Allowed number of dimensions. (min, max) + data_range: Allowed range of values in tensors. (min, max) + size_range: Dimensions to include in size comparison. (start_dim, end_dim + 1) + """ + + if not __debug__: + return + + x = tensors[0] + + for t in tensors: + assert torch.is_tensor(t), f"Expected torch.Tensor, got {type(t)}" + assert t.device == x.device, f"Expected tensors to be on {x.device}, got {t.device}" + + if size_range is None: + assert t.size() == x.size(), f"Expected tensors with same size, got {t.size()} and {x.size()}" + else: + assert ( + t.size()[size_range[0] : size_range[1]] == x.size()[size_range[0] : size_range[1]] + ), f"Expected tensors with same size at given dimensions, got {t.size()} and {x.size()}" + + if dim_range[0] == dim_range[1]: + assert t.dim() == dim_range[0], f"Expected number of dimensions to be {dim_range[0]}, got {t.dim()}" + elif dim_range[0] < dim_range[1]: + assert ( + dim_range[0] <= t.dim() <= dim_range[1] + ), f"Expected number of dimensions to be between {dim_range[0]} and {dim_range[1]}, got {t.dim()}" + + if data_range[0] < data_range[1]: + assert data_range[0] <= t.min(), f"Expected values to be greater or equal to {data_range[0]}, got {t.min()}" + assert t.max() <= data_range[1], f"Expected values to be lower or equal to {data_range[1]}, got {t.max()}" + + +def gaussian_filter(kernel_size: int, sigma: float) -> torch.Tensor: + r"""Returns 2D Gaussian kernel N(0,`sigma`^2) + Args: + size: Size of the kernel + sigma: Std of the distribution + Returns: + gaussian_kernel: Tensor with shape (1, kernel_size, kernel_size) + """ + coords = torch.arange(kernel_size, dtype=torch.float32) + coords -= (kernel_size - 1) / 2.0 + + g = coords**2 + g = (-(g.unsqueeze(0) + g.unsqueeze(1)) / (2 * sigma**2)).exp() + + g /= g.sum() + return g.unsqueeze(0) + + +def ssim( + x: torch.Tensor, + y: torch.Tensor, + kernel_size: int = 11, + kernel_sigma: float = 1.5, + data_range: Union[int, float] = 1.0, + reduction: str = "mean", + full: bool = False, + downsample: bool = True, + k1: float = 0.01, + k2: float = 0.03, +) -> List[torch.Tensor]: + r"""Interface of Structural Similarity (SSIM) index. + Inputs supposed to be in range ``[0, data_range]``. + To match performance with skimage and tensorflow set ``'downsample' = True``. + + Args: + x: An input tensor. Shape :math:`(N, C, H, W)` or :math:`(N, C, H, W, 2)`. + y: A target tensor. Shape :math:`(N, C, H, W)` or :math:`(N, C, H, W, 2)`. + kernel_size: The side-length of the sliding window used in comparison. Must be an odd value. + kernel_sigma: Sigma of normal distribution. + data_range: Maximum value range of images (usually 1.0 or 255). + reduction: Specifies the reduction type: + ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'`` + full: Return cs map or not. + downsample: Perform average pool before SSIM computation. Default: True + k1: Algorithm parameter, K1 (small constant). + k2: Algorithm parameter, K2 (small constant). + Try a larger K2 constant (e.g. 0.4) if you get a negative or NaN results. + + Returns: + Value of Structural Similarity (SSIM) index. In case of 5D input tensors, complex value is returned + as a tensor of size 2. + + References: + Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). + Image quality assessment: From error visibility to structural similarity. + IEEE Transactions on Image Processing, 13, 600-612. + https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf, + DOI: `10.1109/TIP.2003.819861` + """ + assert kernel_size % 2 == 1, f"Kernel size must be odd, got [{kernel_size}]" + _validate_input([x, y], dim_range=(4, 5), data_range=(0, data_range)) + + x = x / float(data_range) + y = y / float(data_range) + + # Averagepool image if the size is large enough + f = max(1, round(min(x.size()[-2:]) / 256)) + if (f > 1) and downsample: + x = F.avg_pool2d(x, kernel_size=f) + y = F.avg_pool2d(y, kernel_size=f) + + kernel = gaussian_filter(kernel_size, kernel_sigma).repeat(x.size(1), 1, 1, 1).to(y) + _compute_ssim_per_channel = _ssim_per_channel_complex if x.dim() == 5 else _ssim_per_channel + ssim_map, cs_map = _compute_ssim_per_channel(x=x, y=y, kernel=kernel, k1=k1, k2=k2) + ssim_val = ssim_map.mean(1) + cs = cs_map.mean(1) + + ssim_val = _reduce(ssim_val, reduction) + cs = _reduce(cs, reduction) + + if full: + return [ssim_val, cs] + + return ssim_val + + +class SSIMLoss(_Loss): + r"""Creates a criterion that measures the structural similarity index error between + each element in the input :math:`x` and target :math:`y`. + + To match performance with skimage and tensorflow set ``'downsample' = True``. + + The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: + + .. math:: + SSIM = \{ssim_1,\dots,ssim_{N \times C}\}\\ + ssim_{l}(x, y) = \frac{(2 \mu_x \mu_y + c_1) (2 \sigma_{xy} + c_2)} + {(\mu_x^2 +\mu_y^2 + c_1)(\sigma_x^2 +\sigma_y^2 + c_2)}, + + where :math:`N` is the batch size, `C` is the channel size. If :attr:`reduction` is not ``'none'`` + (default ``'mean'``), then: + + .. math:: + SSIMLoss(x, y) = + \begin{cases} + \operatorname{mean}(1 - SSIM), & \text{if reduction} = \text{'mean';}\\ + \operatorname{sum}(1 - SSIM), & \text{if reduction} = \text{'sum'.} + \end{cases} + + :math:`x` and :math:`y` are tensors of arbitrary shapes with a total + of :math:`n` elements each. + + The sum operation still operates over all the elements, and divides by :math:`n`. + The division by :math:`n` can be avoided if one sets ``reduction = 'sum'``. + In case of 5D input tensors, complex value is returned as a tensor of size 2. + + Args: + kernel_size: By default, the mean and covariance of a pixel is obtained + by convolution with given filter_size. + kernel_sigma: Standard deviation for Gaussian kernel. + k1: Coefficient related to c1 in the above equation. + k2: Coefficient related to c2 in the above equation. + downsample: Perform average pool before SSIM computation. Default: True + reduction: Specifies the reduction type: + ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'`` + data_range: Maximum value range of images (usually 1.0 or 255). + + Examples: + >>> loss = SSIMLoss() + >>> x = torch.rand(3, 3, 256, 256, requires_grad=True) + >>> y = torch.rand(3, 3, 256, 256) + >>> output = loss(x, y) + >>> output.backward() + + References: + Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). + Image quality assessment: From error visibility to structural similarity. + IEEE Transactions on Image Processing, 13, 600-612. + https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf, + DOI:`10.1109/TIP.2003.819861` + """ + __constants__ = ["kernel_size", "k1", "k2", "sigma", "kernel", "reduction"] + + def __init__( + self, + kernel_size: int = 11, + kernel_sigma: float = 1.5, + k1: float = 0.01, + k2: float = 0.03, + downsample: bool = True, + reduction: str = "mean", + data_range: Union[int, float] = 1.0, + ) -> None: + super().__init__() + + # Generic loss parameters. + self.reduction = reduction + + # Loss-specific parameters. + self.kernel_size = kernel_size + + # This check might look redundant because kernel size is checked within the ssim function anyway. + # However, this check allows to fail fast when the loss is being initialised and training has not been started. + assert kernel_size % 2 == 1, f"Kernel size must be odd, got [{kernel_size}]" + self.kernel_sigma = kernel_sigma + self.k1 = k1 + self.k2 = k2 + self.downsample = downsample + self.data_range = data_range + + def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + r"""Computation of Structural Similarity (SSIM) index as a loss function. + + Args: + x: An input tensor. Shape :math:`(N, C, H, W)` or :math:`(N, C, H, W, 2)`. + y: A target tensor. Shape :math:`(N, C, H, W)` or :math:`(N, C, H, W, 2)`. + + Returns: + Value of SSIM loss to be minimized, i.e ``1 - ssim`` in [0, 1] range. In case of 5D input tensors, + complex value is returned as a tensor of size 2. + """ + + score = ssim( + x=x, + y=y, + kernel_size=self.kernel_size, + kernel_sigma=self.kernel_sigma, + downsample=self.downsample, + data_range=self.data_range, + reduction=self.reduction, + full=False, + k1=self.k1, + k2=self.k2, + ) + return torch.ones_like(score) - score + + +def _ssim_per_channel( + x: torch.Tensor, + y: torch.Tensor, + kernel: torch.Tensor, + k1: float = 0.01, + k2: float = 0.03, +) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + r"""Calculate Structural Similarity (SSIM) index for X and Y per channel. + + Args: + x: An input tensor. Shape :math:`(N, C, H, W)`. + y: A target tensor. Shape :math:`(N, C, H, W)`. + kernel: 2D Gaussian kernel. + k1: Algorithm parameter, K1 (small constant, see [1]). + k2: Algorithm parameter, K2 (small constant, see [1]). + Try a larger K2 constant (e.g. 0.4) if you get a negative or NaN results. + + Returns: + Full Value of Structural Similarity (SSIM) index. + """ + if x.size(-1) < kernel.size(-1) or x.size(-2) < kernel.size(-2): + raise ValueError( + f"Kernel size can't be greater than actual input size. Input size: {x.size()}. " + f"Kernel size: {kernel.size()}" + ) + + c1 = k1**2 + c2 = k2**2 + n_channels = x.size(1) + mu_x = F.conv2d(x, weight=kernel, stride=1, padding=0, groups=n_channels) + mu_y = F.conv2d(y, weight=kernel, stride=1, padding=0, groups=n_channels) + + mu_xx = mu_x**2 + mu_yy = mu_y**2 + mu_xy = mu_x * mu_y + + sigma_xx = F.conv2d(x**2, weight=kernel, stride=1, padding=0, groups=n_channels) - mu_xx + sigma_yy = F.conv2d(y**2, weight=kernel, stride=1, padding=0, groups=n_channels) - mu_yy + sigma_xy = F.conv2d(x * y, weight=kernel, stride=1, padding=0, groups=n_channels) - mu_xy + + # Contrast sensitivity (CS) with alpha = beta = gamma = 1. + cs = (2.0 * sigma_xy + c2) / (sigma_xx + sigma_yy + c2) + + # Structural similarity (SSIM) + ss = (2.0 * mu_xy + c1) / (mu_xx + mu_yy + c1) * cs + + ssim_val = ss.mean(dim=(-1, -2)) + cs = cs.mean(dim=(-1, -2)) + return ssim_val, cs + + +def _ssim_per_channel_complex( + x: torch.Tensor, + y: torch.Tensor, + kernel: torch.Tensor, + k1: float = 0.01, + k2: float = 0.03, +) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + r"""Calculate Structural Similarity (SSIM) index for Complex X and Y per channel. + + Args: + x: An input tensor. Shape :math:`(N, C, H, W, 2)`. + y: A target tensor. Shape :math:`(N, C, H, W, 2)`. + kernel: 2-D gauss kernel. + k1: Algorithm parameter, K1 (small constant, see [1]). + k2: Algorithm parameter, K2 (small constant, see [1]). + Try a larger K2 constant (e.g. 0.4) if you get a negative or NaN results. + + Returns: + Full Value of Complex Structural Similarity (SSIM) index. + """ + n_channels = x.size(1) + if x.size(-2) < kernel.size(-1) or x.size(-3) < kernel.size(-2): + raise ValueError( + f"Kernel size can't be greater than actual input size. Input size: {x.size()}. " + f"Kernel size: {kernel.size()}" + ) + + c1 = k1**2 + c2 = k2**2 + + x_real = x[..., 0] + x_imag = x[..., 1] + y_real = y[..., 0] + y_imag = y[..., 1] + + mu1_real = F.conv2d(x_real, weight=kernel, stride=1, padding=0, groups=n_channels) + mu1_imag = F.conv2d(x_imag, weight=kernel, stride=1, padding=0, groups=n_channels) + mu2_real = F.conv2d(y_real, weight=kernel, stride=1, padding=0, groups=n_channels) + mu2_imag = F.conv2d(y_imag, weight=kernel, stride=1, padding=0, groups=n_channels) + + mu1_sq = mu1_real.pow(2) + mu1_imag.pow(2) + mu2_sq = mu2_real.pow(2) + mu2_imag.pow(2) + mu1_mu2_real = mu1_real * mu2_real - mu1_imag * mu2_imag + mu1_mu2_imag = mu1_real * mu2_imag + mu1_imag * mu2_real + + compensation = 1.0 + + x_sq = x_real.pow(2) + x_imag.pow(2) + y_sq = y_real.pow(2) + y_imag.pow(2) + x_y_real = x_real * y_real - x_imag * y_imag + x_y_imag = x_real * y_imag + x_imag * y_real + + sigma1_sq = F.conv2d(x_sq, weight=kernel, stride=1, padding=0, groups=n_channels) - mu1_sq + sigma2_sq = F.conv2d(y_sq, weight=kernel, stride=1, padding=0, groups=n_channels) - mu2_sq + sigma12_real = F.conv2d(x_y_real, weight=kernel, stride=1, padding=0, groups=n_channels) - mu1_mu2_real + sigma12_imag = F.conv2d(x_y_imag, weight=kernel, stride=1, padding=0, groups=n_channels) - mu1_mu2_imag + sigma12 = torch.stack((sigma12_imag, sigma12_real), dim=-1) + mu1_mu2 = torch.stack((mu1_mu2_real, mu1_mu2_imag), dim=-1) + # Set alpha = beta = gamma = 1. + cs_map = (sigma12 * 2 + c2 * compensation) / (sigma1_sq.unsqueeze(-1) + sigma2_sq.unsqueeze(-1) + c2 * compensation) + ssim_map = (mu1_mu2 * 2 + c1 * compensation) / (mu1_sq.unsqueeze(-1) + mu2_sq.unsqueeze(-1) + c1 * compensation) + ssim_map = ssim_map * cs_map + + ssim_val = ssim_map.mean(dim=(-2, -3)) + cs = cs_map.mean(dim=(-2, -3)) + + return ssim_val, cs diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py new file mode 100644 index 0000000..797151c --- /dev/null +++ b/TTS/tts/utils/synthesis.py @@ -0,0 +1,343 @@ +from typing import Dict + +import numpy as np +import torch +from torch import nn + + +def numpy_to_torch(np_array, dtype, cuda=False, device="cpu"): + if cuda: + device = "cuda" + if np_array is None: + return None + tensor = torch.as_tensor(np_array, dtype=dtype, device=device) + return tensor + + +def compute_style_mel(style_wav, ap, cuda=False, device="cpu"): + if cuda: + device = "cuda" + style_mel = torch.FloatTensor( + ap.melspectrogram(ap.load_wav(style_wav, sr=ap.sample_rate)), + device=device, + ).unsqueeze(0) + return style_mel + + +def run_model_torch( + model: nn.Module, + inputs: torch.Tensor, + speaker_id: int = None, + style_mel: torch.Tensor = None, + style_text: str = None, + d_vector: torch.Tensor = None, + language_id: torch.Tensor = None, +) -> Dict: + """Run a torch model for inference. It does not support batch inference. + + Args: + model (nn.Module): The model to run inference. + inputs (torch.Tensor): Input tensor with character ids. + speaker_id (int, optional): Input speaker ids for multi-speaker models. Defaults to None. + style_mel (torch.Tensor, optional): Spectrograms used for voice styling . Defaults to None. + d_vector (torch.Tensor, optional): d-vector for multi-speaker models . Defaults to None. + + Returns: + Dict: model outputs. + """ + input_lengths = torch.tensor(inputs.shape[1:2]).to(inputs.device) + if hasattr(model, "module"): + _func = model.module.inference + else: + _func = model.inference + outputs = _func( + inputs, + aux_input={ + "x_lengths": input_lengths, + "speaker_ids": speaker_id, + "d_vectors": d_vector, + "style_mel": style_mel, + "style_text": style_text, + "language_ids": language_id, + }, + ) + return outputs + + +def trim_silence(wav, ap): + return wav[: ap.find_endpoint(wav)] + + +def inv_spectrogram(postnet_output, ap, CONFIG): + if CONFIG.model.lower() in ["tacotron"]: + wav = ap.inv_spectrogram(postnet_output.T) + else: + wav = ap.inv_melspectrogram(postnet_output.T) + return wav + + +def id_to_torch(aux_id, cuda=False, device="cpu"): + if cuda: + device = "cuda" + if aux_id is not None: + aux_id = np.asarray(aux_id) + aux_id = torch.from_numpy(aux_id).to(device) + return aux_id + + +def embedding_to_torch(d_vector, cuda=False, device="cpu"): + if cuda: + device = "cuda" + if d_vector is not None: + d_vector = np.asarray(d_vector) + d_vector = torch.from_numpy(d_vector).type(torch.FloatTensor) + d_vector = d_vector.squeeze().unsqueeze(0).to(device) + return d_vector + + +# TODO: perform GL with pytorch for batching +def apply_griffin_lim(inputs, input_lens, CONFIG, ap): + """Apply griffin-lim to each sample iterating throught the first dimension. + Args: + inputs (Tensor or np.Array): Features to be converted by GL. First dimension is the batch size. + input_lens (Tensor or np.Array): 1D array of sample lengths. + CONFIG (Dict): TTS config. + ap (AudioProcessor): TTS audio processor. + """ + wavs = [] + for idx, spec in enumerate(inputs): + wav_len = (input_lens[idx] * ap.hop_length) - ap.hop_length # inverse librosa padding + wav = inv_spectrogram(spec, ap, CONFIG) + # assert len(wav) == wav_len, f" [!] wav lenght: {len(wav)} vs expected: {wav_len}" + wavs.append(wav[:wav_len]) + return wavs + + +def synthesis( + model, + text, + CONFIG, + use_cuda, + speaker_id=None, + style_wav=None, + style_text=None, + use_griffin_lim=False, + do_trim_silence=False, + d_vector=None, + language_id=None, +): + """Synthesize voice for the given text using Griffin-Lim vocoder or just compute output features to be passed to + the vocoder model. + + Args: + model (TTS.tts.models): + The TTS model to synthesize audio with. + + text (str): + The input text to convert to speech. + + CONFIG (Coqpit): + Model configuration. + + use_cuda (bool): + Enable/disable CUDA. + + speaker_id (int): + Speaker ID passed to the speaker embedding layer in multi-speaker model. Defaults to None. + + style_wav (str | Dict[str, float]): + Path or tensor to/of a waveform used for computing the style embedding based on GST or Capacitron. + Defaults to None, meaning that Capacitron models will sample from the prior distribution to + generate random but realistic prosody. + + style_text (str): + Transcription of style_wav for Capacitron models. Defaults to None. + + enable_eos_bos_chars (bool): + enable special chars for end of sentence and start of sentence. Defaults to False. + + do_trim_silence (bool): + trim silence after synthesis. Defaults to False. + + d_vector (torch.Tensor): + d-vector for multi-speaker models in share :math:`[1, D]`. Defaults to None. + + language_id (int): + Language ID passed to the language embedding layer in multi-langual model. Defaults to None. + """ + # device + device = next(model.parameters()).device + if use_cuda: + device = "cuda" + + # GST or Capacitron processing + # TODO: need to handle the case of setting both gst and capacitron to true somewhere + style_mel = None + if CONFIG.has("gst") and CONFIG.gst and style_wav is not None: + if isinstance(style_wav, dict): + style_mel = style_wav + else: + style_mel = compute_style_mel(style_wav, model.ap, device=device) + + if CONFIG.has("capacitron_vae") and CONFIG.use_capacitron_vae and style_wav is not None: + style_mel = compute_style_mel(style_wav, model.ap, device=device) + style_mel = style_mel.transpose(1, 2) # [1, time, depth] + + language_name = None + if language_id is not None: + language = [k for k, v in model.language_manager.name_to_id.items() if v == language_id] + assert len(language) == 1, "language_id must be a valid language" + language_name = language[0] + + # convert text to sequence of token IDs + text_inputs = np.asarray( + model.tokenizer.text_to_ids(text, language=language_name), + dtype=np.int32, + ) + # pass tensors to backend + if speaker_id is not None: + speaker_id = id_to_torch(speaker_id, device=device) + + if d_vector is not None: + d_vector = embedding_to_torch(d_vector, device=device) + + if language_id is not None: + language_id = id_to_torch(language_id, device=device) + + if not isinstance(style_mel, dict): + # GST or Capacitron style mel + style_mel = numpy_to_torch(style_mel, torch.float, device=device) + if style_text is not None: + style_text = np.asarray( + model.tokenizer.text_to_ids(style_text, language=language_id), + dtype=np.int32, + ) + style_text = numpy_to_torch(style_text, torch.long, device=device) + style_text = style_text.unsqueeze(0) + + text_inputs = numpy_to_torch(text_inputs, torch.long, device=device) + text_inputs = text_inputs.unsqueeze(0) + # synthesize voice + outputs = run_model_torch( + model, + text_inputs, + speaker_id, + style_mel, + style_text, + d_vector=d_vector, + language_id=language_id, + ) + model_outputs = outputs["model_outputs"] + model_outputs = model_outputs[0].data.cpu().numpy() + alignments = outputs["alignments"] + + # convert outputs to numpy + # plot results + wav = None + model_outputs = model_outputs.squeeze() + if model_outputs.ndim == 2: # [T, C_spec] + if use_griffin_lim: + wav = inv_spectrogram(model_outputs, model.ap, CONFIG) + # trim silence + if do_trim_silence: + wav = trim_silence(wav, model.ap) + else: # [T,] + wav = model_outputs + return_dict = { + "wav": wav, + "alignments": alignments, + "text_inputs": text_inputs, + "outputs": outputs, + } + return return_dict + + +def transfer_voice( + model, + CONFIG, + use_cuda, + reference_wav, + speaker_id=None, + d_vector=None, + reference_speaker_id=None, + reference_d_vector=None, + do_trim_silence=False, + use_griffin_lim=False, +): + """Synthesize voice for the given text using Griffin-Lim vocoder or just compute output features to be passed to + the vocoder model. + + Args: + model (TTS.tts.models): + The TTS model to synthesize audio with. + + CONFIG (Coqpit): + Model configuration. + + use_cuda (bool): + Enable/disable CUDA. + + reference_wav (str): + Path of reference_wav to be used to voice conversion. + + speaker_id (int): + Speaker ID passed to the speaker embedding layer in multi-speaker model. Defaults to None. + + d_vector (torch.Tensor): + d-vector for multi-speaker models in share :math:`[1, D]`. Defaults to None. + + reference_speaker_id (int): + Reference Speaker ID passed to the speaker embedding layer in multi-speaker model. Defaults to None. + + reference_d_vector (torch.Tensor): + Reference d-vector for multi-speaker models in share :math:`[1, D]`. Defaults to None. + + enable_eos_bos_chars (bool): + enable special chars for end of sentence and start of sentence. Defaults to False. + + do_trim_silence (bool): + trim silence after synthesis. Defaults to False. + """ + # device + device = next(model.parameters()).device + if use_cuda: + device = "cuda" + + # pass tensors to backend + if speaker_id is not None: + speaker_id = id_to_torch(speaker_id, device=device) + + if d_vector is not None: + d_vector = embedding_to_torch(d_vector, device=device) + + if reference_d_vector is not None: + reference_d_vector = embedding_to_torch(reference_d_vector, device=device) + + # load reference_wav audio + reference_wav = embedding_to_torch( + model.ap.load_wav( + reference_wav, sr=model.args.encoder_sample_rate if model.args.encoder_sample_rate else model.ap.sample_rate + ), + device=device, + ) + + if hasattr(model, "module"): + _func = model.module.inference_voice_conversion + else: + _func = model.inference_voice_conversion + model_outputs = _func(reference_wav, speaker_id, d_vector, reference_speaker_id, reference_d_vector) + + # convert outputs to numpy + # plot results + wav = None + model_outputs = model_outputs.squeeze() + if model_outputs.ndim == 2: # [T, C_spec] + if use_griffin_lim: + wav = inv_spectrogram(model_outputs, model.ap, CONFIG) + # trim silence + if do_trim_silence: + wav = trim_silence(wav, model.ap) + else: # [T,] + wav = model_outputs + + return wav diff --git a/TTS/tts/utils/text/__init__.py b/TTS/tts/utils/text/__init__.py new file mode 100644 index 0000000..593372d --- /dev/null +++ b/TTS/tts/utils/text/__init__.py @@ -0,0 +1 @@ +from TTS.tts.utils.text.tokenizer import TTSTokenizer diff --git a/TTS/tts/utils/text/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/text/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7a8868f680fa0075cbba420ad52a0b2fc453724 GIT binary patch literal 259 zcmZ3^%ge<81nFs6X=XtBF^B^LOi;#WAs}NqLkdF*V-74UfYS1wFxZjEy*T(WD{TcHqO~2U*&qJuJi0%QJ1@mt8T0AF)o~|l>fQ= ze1ARN(>)I?$9C4<+Mb?o|N8g-eSfd->(@W1sw(GjjSQa{KDCYG{yV+smn-YIb<)Oh zFK_}U*dpAhZOCRLw>@GXbqqO}+Yxb&x`teEJI`=KZoxHFCb)+@LfMd4@C=m;-k}Pi ze5g{W7^)H~hpL6DA)in^R3rF?YK5AiI>q_-oft0}H7~NJCyoBMWInNR!xj(ScO>XHzw?ux5m{6ofU4Ekf%PtORMRofDfc zBGDB+DL2##zotCDHnDZC-H2jxSmW6^v29`Tgtmhm{r&C#NV)lK=`u+iiv&X=oX&x8 zD2``M|M8#{48=uB?vF)>!y_S6$y^y;sKvMT1H8b+O<{2Ze;gNg<^tkwliOo*voWH* z3$~ZtsF!`v?@l|$fMa3{bj?fGJ^Bz>?A%(34)~ZczcxYE| zXk3oRMg!59Aj-Xm4jt@`$K~GfcsL^W#>La|-dsUFV-xABv7jW20eNEdSS%t-O~{_G zCU*f$bMN`;Z)$f_=gpSxn+=OHRb`d#45tB?TWUbON!OPB+XlAcao@g!^w>@h_m=Gs z4DQ*Q-neah|G<4acHX~h_nrs#4({9k;DLjO9(wpIk32dQJQfnf;gRFv6Hi1&qp`6k zB{@ER^3>^x$;pb&6)XK~*7j`Ld=EeVB@_~#NL-qKF7emniNBox=KR;iUtCYjCSFYZ zB=Kr2@zO89m3TSvo%yqg3%|ISI2TO(ZQ^|5`TzQppN&)X=4TV%dTToI!drhlf99=c z6LW7pGyl@Kh=8$UabbQY@n@mLcb`an@5#jX=Px9F{L4Q{{3!7^(Zml&=U zYT{qd|4HJfiJzTFT#F?BYW`2>zdKHY=$)B&&c1W*o%7?3RG)90O1w7z^8EMbznA#- z{Q1P+C4Mmfqj4H{i9dhmJBcfnAzT4(x^Asv3y{bS*#vILF4%?~LW|%Koq|(z2`>EI z_?L;U;c~%q#x>*?%LK3J8MX`MXPnk4IOH|LDhk5Njj+msunHrrsvxWqVby22yppQK zsuR^(12|6b6$$qVH3hykLaR`B#yM1rJh80h{+}7+cyk<-nVR0`Zd?y_yr z?~>XO%4#9e`k$)(?rb<#2uwCxJFiC%=tVTy4=~N8 z8rx=^XUkLdO?Y_n-Zak{&y0tyP-(X$#>b^-$Y?0CpoTHDMBo5gxD$A4m((`J&6(rs z0hU-75#x#xD_S4dsRhJFZU*A4@h~`Tlg}Pt^07D#Rw}-g zuXkM+etzPoy+7|&JiA!md~W*W=~8{d=2mWk8afT6@9ormD+mw)^0^t| zCD95(vmSQMG@-6pVjUL9AG1DNAQN- z6mQxEx3Q7HZKR#DJ+sfcUki>iT%7j&>}B4ztQ*Zh9_3^%gP7FcI~SvNL}@W<%Qd;} zpcrTSPEq7ClE#>~{Ah4&j0hToqzZ$o@jC`_S>_6%tsehdv=Y9+g@LliJV#LcQ?{q< zAY5$UvY+)HZ0qF8V%^!ZB?5vk zB5&=<#e5Vw$g~+wbIOtf+I=~EDRgb)oO3Q@cooj{+t=PUWzVm@M4PCk#?mJEEWD~i zlzugn*ssdgHZ2KR%pXy-i)c zjWz6jYWD{OSX8C519zs3t6YTq3;T#B60umfegj}#Hl8^mfHcn0geN)ap!xlTF=3^h zd6XaW(ym+_OG+A)?6UaMcq#NXXi7_5gS@|p@GGV`!{^lm6wiOe^~wU zpt6?T2Om`r3hKcaUf-)(oN8!O8@knob-Kf^J67us!^i)Ag}b)y0}dcl&DAz%YP}7Z zGTy4{-!^bHixuAr+;eX2d8tY9fn@DvLr5ZoTUII}8Q21U6b?w5jIqHu0pksQXe^#^ zo5z8<_l^D(Y=Z5o{UUIE#aQyuhmOG==n&QytQ*+Q%BU8iAk*Hk92k#=pBxvNQ;tb- zQSiH2t=T-+I?tLaf745QY3j4_FxisVPew=-#mRR8OmjDVOH$447yc+!({RCK0(DIn zR;8NSac6v7OG>S#2BjB6Jk^b z-<-EI^ z&d2x<+jR1;5SN3O9TKq|Y0n;U;yy`=NogkpwGzB(*OUkkX`5)>*Sg?2*_t=3Xrs~g zo~IEk9|6F`cRIWWY^nNYwZ3a^K&|&HmHyw|^fhNV8~n}iW?h@ozU#G(mz>|*^y(%! zwL4k2Tdmuz?0-lR_{G_ds=I&=7A=*(dkG^X@rwY*)EDT$l~hI z6c=84+>ev*!sFUq?I7e_?M9j8;o;mBXO3j-9v8MO01B+rJsRN7*sEOYG8}*rL>`O1 zF08y((8QW*ckxsVZz_h*cwDXnwpp1FdB8T8eQIwOSZ#N8&xR;{_nh`%?qWb&9jT{7 z_!}1u2fA*(iV61uXBKpOu~wRYLN5f~P6)i65O_NU5B^^K%ki&(Xt)ypD#1Qn2Z6LE z2(My-a9@GYngromSWQ6~3Bt9o+JZ0=gll0CQ(r`-udu2U7$QM#C#+Gh05drkdaZw7#b>cj%D=8?kDchhV@-ayi zMEv=n%n!#Rk=QAjKYH}&Vd&%5@X*Gs;h~EIcwpP$o;7^`{o4*~>pyhg0eJ0yXt4j# zL)#AR+BbOcF(k<_=Zq=Dkb%-p`RJ{C3Y)bx|MukG@+8YE}g58H`K2vN| z@tDYu%Ro;d*^=uI%E7E~EmevM^;*tx))t0)Q+iWdSk6JARE7sheH4ro%KU+?V4Oe2 z1_N3U9*I&v^T#F_8nJzgk-Sn!R96By_fa=!Dg;@K#>HsJq^Ziw2GYw?*=!d_!qF&6 zZ5EsvsiaC}w{MIo(;~V4(mLvx7(XQvCz|VYG+U01iacvK4RZ@M=>yYVSFqS4q-SVQ=Jb+mM1Y@1ycoj_yKV^I39`1M9v+O#zjp7 zCq@kUF?BSG)i|w^#U2DB#(L1yI1J2SWbAklEkacy+BECo_mL73QnCmnV^r!?C5Ih zdo;C^P%PED6pw2C;i-Jr({{ttmh^O}o(@I7Z^JnMq1BnJE2O1pYQ7AC@d}#4(1amP zVfgome~Z`%FfnV^ebLl~TATgF+DvUK7!|s1?%yL&&bGQ|!xv36r?r}N@C7|hY+yl8 zYg(ImiXSHM`B8>xEg>aoK}(3MEoh0R*85fImXL^#NeO5{vPuc`#mc5FR(awkwpwe< zzG;r+R(jGH$+SM{%)TyQDlViTNld}YUBZ^3^tx)-syc$q{8@2-In{$U2wmJy9=ECY zf1>!bD4^c$-P-E6;I(W)*kr4IJ~jzclkHXlyvIbH$^V(^Wx{8t;_1`|CN)mK$j!R* z)50gKtQntF1q(huvgZGVQe-iz?<1csM%fmck5Ss5C+qSt8Wes5|64Q&*uXqHp?F&U z0C;3oG~Lt@?DqTVVvo@t`Pd_ZrT}|b7ylRhC5ZrA6hG6k$0}yV z8CA-Hvr@HupDGwPYDv>lzZNt_i4rSVV2P3{KnWJu|4WhoKqz4qGoyqmWkE@)T7F2i z`~)Z&ru6xvgw!^gfq`icU^K)3Lx^dX6*n*V)K@E3Ni%w=f)@0YuCW6nL(a~#EJgb% z&NFF4G5nGIxd4Amh3P?Uq)AFsFwdOuWo!H#9Qk{uV1t1Kg=R0a=FX~F8OW2rfuk|f z3HDEr3)PQZ#P2}oXHhs(Ac;&DY#MP};{wyDV^}L@2jQcBC6)WVF_jxcHHfXWnrMa4dAY_(=I)1YI+c@-QBSDb2; z@NT3jSe5p`Q&WMOkZ!0z8>o?Wh(gB%>IxO0mf`9`JXX0jz^|ct&zUSMp0eT1Xo}Qf z)@@phl=AeP)_KTsp|Qp;$rmSrDtqN?NT0f`{*>k9*3&-I6{p=Id!eRAh_!sd_5>?k zsxZeHu}#@%`HAAm@};eoT?P3XA-Bobh*OX+?N`O~H9~HauMwvpU!w4e=WB%ACSN0t zHQy-<{(&!}udI0y=APpMWMIcG%Qz8dxM3SsSPy)S82PI)^yb#yGh$jx%8Ny0CcCV$ zLiW@4?766xyc9%a6-lARmTRLn8@HPi98>m}xL52K5%WqmKF2+aBONfLqgCEHDA8tK zFn8EezaJsrw!Od|Cp);M7)|yQMjms<{4u=X$RGCtr;X9S=BI4W+s}59-NZjqN^p6! zoTXi%*myMVcSv{Ao9(n_1x_=U=ck#}>qGArNZK_Uy52mlz#?EX?IC)CZdx*Ef&Cr! zkk&ogJ!iYN@yDCq?d!kM*Z;=0WZw?8Z^zrUH~MxfeY|6M?{|V zABG6mB(K$l{UKe1Osat%-tW_x+8xv~XCy4gq2m_gOcV;;H8YZth7qWlhA;||QO=Cz zVEh}QD7Pa3OH6`D4N8|AHIbR6!%>}0CMs&O<93K@A*@^h09I19jcRS@j5}4~JG+&P z8&=$GY<;C`cKF5BFRxBEE>|0u&+IS)m)&e^pWSi(@e7a7?6_Imcy9Q7$Au0gUfp|j zv$A^oFXgv4{)f%UonKLRenlBNd}HTfW#{2@%Py=ww`i^@S-V2P9UMV?|Jtjqiht`b z``(DZIq~ndzPVL7^w5p|hm`(@5ZiyQefDIswp+n{IQSXIZft)=nA@m!eJQ#44t4Pz z$;LY%3oXw!*y(-LmePj}2G87~SBLOOw(e0|_sr~m4`w(67j~cbUGTlu$E*sIU3aNn zcPTXw;=aM)>-}#y)qD3Rw>+qB$$DwQfGOW2Z|_!okKl$5y!L=iWl1t+RBnc2&`th` z%SIzJZLx@9%Fx+tn>z!?KZ%JCmuKSR7?k#+F47=@eFP2wlrqLK?ejEyG-Dh(d8QlV z*gq>{9O)3%?e{mn(a0E*w@Y842GT03PeC#_DUej<%xTt9`$%ku=~rk@!`9=pDqDD&vSy0kYEr&$;wS6KXTRx)FZ{gBFDU3v zXyA1x^zd+2P=xKI7}Z}MrW1yE@lnX&_$b@R-*6+ z;7+^gR2f>SZQa=;Twa!6AQBrXAnT2gKAH_U#*WzlBcr;eS=V`NDK|Y;=bDu2&A2Z$ zsQ$Y(XVSAp^=wf*Te89TDAo5|+NiGnGM>1To_kf#y^81FZ17s8dhOh@SN(Y4PI`J& zPmkj1`4mFeE7j}g243BR2kxY2gX-C!cs68nUZ+&AL+EZia3?+MRnL0GvpySomr{M# zCFj*@Ja8vHcdMSe75&!M`Rxp&{jn#UJ*1p{50dP!ekANEu)oL4r(Uirfn(TM0^_!*T;|sCD7+AyHC&(i(WOxFF=aFk}bu|Uyw#4fmvPK4W7fZ;ZlR=C}!_{MSwe`BG$YP$&c z72_RCoPF@JN5eL8Q@7XsL;Fp|eZ^zyc61d;SwJ%9^@UVH# z-i@s|8Ix*ToT;yOEy{2Jziw#Gxah^rHMeET$mQXxYcpPQm6NN2T$SXiB3Ctx)H6PE z)o=|hnObtyk&9*7z?D~L8p)3`H-S~mG}M;0n58F}5(65hqRh*kjjLJA+L*;`4w~Y^ zVpflzH_}`b-r}imQY= zeEKKf)VO+vJ8c{CVd<|=SM1uqZGZL)GvjNWnjNaiFKd$tQ?k)uG&myiNAYQx9()=G zCYWSTXgZ0m?ftvRRFDkeG+RezMM>Yi(9Q71WwHe{mjDygaCB@uOE~X}lR=-%%M)^3 z93@*(&4iB3_lWWyN9JTc3QIfs4hN(;BRF6PiZyG^XSD-0sFkAji5B}$*xs&%p`(m5 zZm{t)B{Vgp&>Yla<9F2f;*QxY)@(&k%&aj-rDSnd!ag*=C=&}|DXzHT=qEB7%`ZAA zm`f~SM0$Gz(!4B@hdIj<2BixzCe6zjIax9;WnB8H8kXkQfeJO(;`XMdpWMvUU|DZ( zT>3E#ON*PbeoUj$L5tO>K?M^XMDl5~0L03il#){8EVlz6n{mNbv~0>_rEE?1o|o-u zmpqQMNQ{`fT>h~vIMd!hfGstFfJ{{;UWHwnT$^P%*LUy+ctOPTHkfY^@DG8zul@FdMxRAT=hJz=y#5Pl!#Qda1TUdf(GPn z{ZHi21V|jt7=IsBK0LMSnU|O-NPng9PIGupYxYc|* zEaC4e;qNZtUsl53Q^Ma{!oNJ{f3?CoTC`Kn=2n3bH#jStoUEd+2;?wqV#H0ctfLl; zxWR+bd0sH$*kB7UYpSc2B^mMR$=4Yp9<=PAv12F1Q1;YEFuN;j?6`>=w;XUF4G4V! zNZ(1b+nSbW4G&iTR5%i$Ll97O(56HiZ+0g(?~K_GPGy;YW6{wQkBQ~4`lR#sb{tT`nLVynQtjw1HJV(rCGXNw2 zHC$!O>>Wx)C+@jnb@`pj@_u6DA66IjC_cKc-J$jksC|2tzDH9V?ov1Ozp-20u|t+uQy;cVm8w$+z* zs%;xni|*{jC#GN;D#GN;D$GN;D%GN;D&GN;D(GN;D)GN;D*GAAoWE361? zV{F4BVjFZ|unoFLE{M6b#h)*arzkL_-mMabY#u&`HUEa5xqn@srJu? zO~`QeuG>r8m(Vj zsMz-!7NxrUYS+eVN7SzEsqWQk_Z`=sR=ankI+m&(YcGjv$Ho#)jWjJ&7vGgyx>8-* zcWsZl^j?z_RBB@rsMN+LP^pbgpi&##L8Z3Tfl95d`)wQ7ut=$2k9+ncsASD~4XU=g zmCCzsHZDGY_`=~&lQ<>2jE_&8O0Q8e8ti6*lywb(za$j@Z#XOjX&Vuw1h#PP%fBG> zXe-g9Ixy%_-6I#oT-w@72%zu5jN;IIATa3fNLPTp3o_A&D6^rPcE=~^1AW?;U>0X3 z#XTBdDfh@w9gBjlv(*%dO~^Ix2(D(HV02)*45$82i4i&7f{&9&@s0DyXbkoil#h(> zDns;}ZYb(o6M)c1f?Rim1jP6}U%EOF11l@(pDo9C>x6JHB!zLDTb39y~4Gmnx9s< zhE)9`g{w)`uAHt;RW~7GWxdkWldSAf*58+`yid9Rk!0l~%HeRbGOR>SB`Z%&do%8! z%?8Ctari#-|G*r2zhj@n2CYYN_&)Rhz#J;L_t=N6q^1gF#ewFjPk;F-ZQ UgVTeSmuYxpyqvQQY1zpC|DuT6SO5S3 literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/__pycache__/cleaners.cpython-311.pyc b/TTS/tts/utils/text/__pycache__/cleaners.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f1a6ba8676aff2640ed417fc9c96afe8b8c0bf4e GIT binary patch literal 7133 zcmd5AOH3TewR=AN8kpf{KE}8h?~{!W1LYl-2-j+bdRcg zY+yk`D@Vc>l6^>IB1H;{A_wFgHo4@ILk>CQI1`N&G?FDtQ6lXnLb7tX+o0=LpD6`4C$@W%``v-Q4#b-BO{~H?5IEjmK5-)jFKE}g; zuj)y8Vjdp9`BZPp7xO{eFV(32R86dgwQJSdR9&o|wd>S|ln@hGyIyTfHN~2E&cn(4 z-R9zkQp0ne<6c0uN-Q89l^P%Nv7prSkc+iQ%>Y}a0KhgW2(TT-TVT9HYK7h*sSRMK z)DEyq>Hyd+9Rk=Rbpkvrbph;^x&a=MdgMOou-upQNxct!v7^!v_)4Tcfc=tZk_(f? z-bvXcdXh+TQp~6(G39xaBve_{WNLhi1Ws)6g*s7N5RHVQK>IkA)2f(|<60&)14waG zk0<6tDke;jmwZdybLhWtzAbZWy-OU3(>p;M0juCi z(y))#L6ZusPvwHw$jpTO1s@8h;8R6yw$L1R38&zv@~k{x5NrXbWX&{?2!dtzdh|;J z1RMoL%tgm!~W7%gfvl3pmZ z+md!|PCH?vHmJq~fE8}LR#-K^URqgt*&SYOUTxm4Z+hDPxOcUe^|$+mb~xUDhJjpN z&#Hg*tCxY6wfj%y_1l{Rxj-}@h_3opecM7{<%h3fMK0mkJ9JR&OkLlD2B?eMfdAFy zIedEIe4gaC_z5rxzp6iw=@w60V93B_2|Wa`P|pSuB8ce-^syR20Kkn9cyjUa#YZD+ zBUxW3?ShXkdQi%Yo@r=2cR>=qUh4;wz0{!!ZXKc1k4&Vb4T?Ctw@_K%qz+ zSiyri#gvUSo+&UCzhP1(O?zRioVm7yuBu|%kjv0%2$1>_5gRMqOQC7){LdpB$2QEI zFq9XD{w|#Rn{X~C4CjU6tZ%rQaU_~CPN3mt92f-FOKxhCqdkB`5xt7KU_#gK%hW_? zz#Ih~0JP&!4fK0RTHDjHN0-+wXMNq(WH|Ca2gq(RO7cgqTq*TsupM^r7F%vlk4#^l zJ~bVlzCC?sI&^2Sz=b^}IaiU~O68P(Uyh5J`M9x=n$cB*o`mhgNc3TwXpm>N^~9!U zb1WyE%?oE&#+XH09H12!3)%(~WFCaJzeOVt&tcvRXD)C6QRW`b<>;aPPJ-l<{O);L z#{sxCZd{bo25wMQq}n~{sk0qq=f3mmA&OP-p{c-$@D?*0t7hJWnN4pov$<;KIVL?O zO*(V{wSmf+`{D+rJ){RV)*?!v+Apl&M{J8BGvI9`qHDyQlZgs(D`qN zMHXIn`4fwN0=WXA=vx+j1X7rSCv7fziA(?}WS2j=#TRPrKniXELj{G*k2BCzh#fEj z#Z;zv{y&1j2x9#UzzVlBz%>S+j65D$_pVRog#NtH4-VjXcysF4(;L&P!|tK1(EpA3 z$E80k{dx4St+@*~@)vF}cj3l@PsPx>9cX*9^mu7~bfYsD29QE!3{nb*3B@2s2M6mNy_rb$d-picozSv z9D6gINhJ|4rabgKkaiR5YqX>l?w>;Y)2TsNf4T2+aerNgn*caZ$WixZt;160EW<1DbqUz{2~>IkK~+BIL}Lwx-z^#+ zY|!PGf_=>0yV&hwaVkUa!Ci*&Sm4Jq=z#w6`>>Z{u5*AHAF?LA>;BZC4wU*ZKNdYcX8r-@nuCwei zMKPGLjB;-`6@&nfm)w$OufPN}SYrmjL5KCFaCF0$6OQMFwXCYd{-^O*5>CfZKmgRO-_NQgltECM|Jw?r@-#p>CE1zq_a_&`V~~; za{w4zT*`2DK14H)L;DSEQ>ht=|6SFk;$7U>p7TH8tXfyH#h6P&*C;&MSY9wfATOmd zQ4ENFM3XT$)vW_2+2_v~B9^G(k%$serLrh35J|^a2+0FW{NM!uGrF+fQXO9R-*n~z zNk&CUeb->pPoWyO0NCo=9L@@7a>ALsaHhhSf(AFFtT31p2J^z;dmJ!FjbFodm1=Zf zw<}YtJ(Yy_X1na+6c1FPt)zvD)C^OSTRJr}km4JVsX<^qP+cw`2{}1S{q2{*H|Q`5 zl+(`uZ22sQ`UZ#DmUieWDR3O-KY?m|3ZTkozK_D#U7=7z*-gszO5~)11F+Ydv}e1i zo;~E7n1du39;r}!Rk>WZJsO9+ND?VLr#gx34Z!~eRHGdL8ta*F+H=DByl{Ts?De~6 zaD6VXkb*a71?~iJZC07FO)!g~=uN9c(@~CH63YbO{tCr{nNdZ4|2_ub(zzo*%(mVw zleA74y!SP*>nlAAD^&3$Xz+S&Key8#0mdd&10UK-shw|q#<_grdn(I!!#ukTheL(h zcwEvG@woNYg#;G{;R!ebPrwoO1RP+`IHSYPMk3`0+F)skEYkYsXda>A5Ts9>6ZX#97`PuWuCZ zF%*-2ib|%KDNzi)6dgE4CjbxZCZ%{pC>jKl<(73dV6Vfwbt8})WK{Vwy$$1d#WVf{ zaL3E@{5BWLa-p3Xj&E40$G=zHk*xdoio26_|6Xw)W!=AR?gv@-Z<{-pb^ofrz_zWu zPg{T1vwkhtaxC9+Y^8aprj`%xaBk4f^Ih=ZUIcYK|1oxL0Noz!+Mv#Z3pk*m54&z~ zmG|LC2Q>S!>wus$z)*tD07D5jAhH8`Do8!J>*a$RqdOe@+qC|%b{S}_!POnm3YcZ1 KIFWVPLHI9{3nv}` literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/__pycache__/punctuation.cpython-311.pyc b/TTS/tts/utils/text/__pycache__/punctuation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dbaf841c3c39870e193822d26c7bd8efe8d78a8e GIT binary patch literal 8053 zcmb_BZEPFYk-Pj7MN+0D%KEmmq9jO^^-;-oYU|5(EXnal<;1q!REogR+!alQBAMM~ zV#!oX9DE1HFs^FgLITbP*ub%!kD}_qMb*0iHQfFP(EeBo3z*o#g^)vm%RdF~0f+p! zyP3C3a+iwZ^zI(Xw{PdYH*em|yqS5!dleN93QA~hey)EvMg1GTc)V4gP9D$Fu%Va?N46U9;UwmcHn!dh8d$aL34Q6IxkEz@Nt>IB7> zU87h#XTEEOKKN;UcG6c4eU6e@Mj@!1RiSXQ>rQVdi2yBVOhhTtrL+a0P+MuO;xf z|1WTrFR(S~D^u)#e>AYb`TdH+?_Y?qi3qk`e*asEKt$^)^ZVIY5Wv=##!sA_RLtX( zV~Xv2C&$LljDJWy1x(aer~0M@ju-kC1H$FLF-~|>jK%wo#}?)mr%v_-6M`69@JC}T zC-hyoaK29zg}#IsjtG4scSY>eQQI3|y9Im;Z)nmFs6i8WudPTROL)^d%y3*$b9pIWBvTsdpq z0d;VWd1rnMYunk6nl+ph?raI5+FqF|HDz&K=My)&&BtP@E`WO^SgDkl4$C zC>tpeZaB&y4|^RB2FfYmG$V+7IBrydIX<>P#xbBEYCqs&cn$=^bEq;4Oe7$3yoTo( z9})%$M#PHp5tvSx=Nn`$l&Y5)V=e(vd@RTb0>}0;W87RI5fKFjdfxDljZgSrnVLL) z{tXTH_?5syJi-;xkB*L#C72OLx8qJL6aW(!Qq^Fha@4$`Sui9CY^V+e|jZ9FPZYoJ=Cl5~>tLrX0z zb)Nc=27AZX0l;TcECLsqBQF9@WEdW&p)CnV!=m56RHKt)ZywqOV}uH*QdDktW7=}V zk%kXBw}vB&_KDC^35``sggRf~SkOzY=QuIJM;TUCGe+Po)40MvQLN>**$9Jm3Ohp8_gN>G+Gkp3^~OnH z7SPgy-2=@N7vmfn)n0{KvUa{Mbj>W4&F+JzZy#83M_33)1u+l}a*A09Ur}6WeWD>h zP7@Tz#Xuy%jq`krS1f2p6iXy5h>C>`2Svq>hWkx!QQ#TapeI}x1iuG;8tv{pB(zX`?yKoJv6!jN(!&>_X9=WAI>mHEZ z1KSi`K0!aJZkG0pWvj>K>T$_6o~x~24ZRn-&3>ySv$aR$+9Pnxolja??=(u?7k+i_!4EdXPp@VVo|X@u27vm_woJ(_Q)!EA-z$;t zPCds+kmz{|8b&>zFK8=BxuE4N4SEJMicwt#=kF*+Z~yhWZX5&pU|RaaIc_2LHpc`a zk)7ezrA6?o@kk&DLT26xid06d7;9Pbf6wv-10ZHK3k}OMEZ3Y&>z1iY-E(d4;kdReF6=tfb#)zX)gq z&`{hmd%USm+*?mf~ zpW3MaC_JJ7gU|r~L}lQ)vy>W_*Fhnc>17l6Gt-=jc#l78qQgUzcNa`CJOYku&U9x` zgbe=w5}d#ceoN8K%;bgY?Pk1M#N(aqW@fy-2x;-o&K8vbTuqf4-sqwjpPASL{vdEn zb=$~G*p2G9D~Z5#FP~IXsNUbDI z#<48@CNUTMP}0Pj&`ql)lfQQ60pw{k0?GW`Jt0cOkVY%uba&<3l-}tWXb#axTZmdV zCrxA2?4xCiVOD|WWoy!6q)nsvK$6ZBoL+G~ib`6(4fRGo@D+yD3yyNx8a+bRHy3%M zt-l}EFF4o#^ZG8s`UTeuyEkWaye+O{;P|)NankxtT0>Jeeh1R9iF=A`BujzM3T(};;Ry=g(Z`Q6W4yJQA%`)<;f zG!uJlTiSOlaU~oH2lz#1ffFysSdzog+n+cXWR8EE>-6z>G}I)qj4|n8WN96~`D^2R1nev7t>!%)wK@Qvmk4P>7-C4*_&(kDfO>O#Sy^i0~(fi6D@x zuKg*0^J?aR+P&Qp-@zv-@^=rtRl0xpOdEKP1-= zNp-`LXIOF#Z&g;Ot-0OJ+mx+*gjDJACv~lIU03?$oV#YNEJNRNY`VRY+nW*Z{pkH4 zttWnc^`Ec)a{0k>wqsoG7>AB0O&xMm&x(D;o~x=|>&SR-?c1zsm#W(DxBa5$qn-`x zZ=L_*{7uED71^!{xoZMC$c%k3-c|MEGuO}DZpa+GH}w9{&qnTytaoM`hUA8!tZP_y zA%#d}ePg=fNrN|YFx%jh8+_Z8qr8Vy@19H_U#S7Mte$%B)S7r}F#!DM7JqTwzj>;WJS0>UEIZs1+Ea%?6R((C0PHt7!=Ufes zUCgG7$<$_DowBP_a&>OGtA83?YyZjo&G}6KPokUdHp$)gbI<*YA2xl`wEosdFWi42 zSGnhLW!q+DTjpZ6vRkg~mMXjdGv{tz<0N;hWN%f&qk>BWiQ^H(_oo=VeN%V@PSD_r zFeC~zfx4X2GC{hqh=J5nK%ZPfZT#QHKLt&O2Cf0=q%&~MnStP9R!b`3wWstV&tPsw zje--9TOzhWjn5#s*78Y+S__AFMpZNUG)PA`)1d`V9T+@y^dh9CV^$v?7jh*nUPDM_ z&*y`DX2l$g2+xY7%Cy*V$)i7{8t@&EjZhC2#7&J&dmTOncVdDZ4u*d*@EM z7DA<)AT^MyNq|uHJfvp?hNUxrYG+|h*I?XONgmT^v_pskvYti94xxy-c#{%QpR`g8 zy)<`l2!d~2e|{VUALjW&=#-@O*q2~X2ySGUi`Tm}gaaKV&{7S3CS_<>NhwffSXEDt zM!~Zv-$B{)s6u=YUh2Yt(yIq+(WvTqCcmCX?ejVF?EUCuTgnHNLs9Yt*}&flf4gja zWlLSpb>N!*ga1Z<9V}ef;RV#1!b{@QHmFWRDh-X-EXFnTJV4ZPky@o8YH(pPM$4xW z*9n=uct(2=@+0V!=Q}80{?#{6|;pf#R(12lTIg0JThk?Z_T3NuBJxqBh)0K{_pID|7El2m$hErMY4P2|Hx94S z*GJQ%u&M5?u5 zv8`*_C|mWm$z*NZrl9)L46oN-R{?g)wrkszpO0O;uDzxLV3reyS*#c7Z3^qcOagK_ ztf!$vSJ%6cjzZPE*V?vC6{@2&WvkjYxvUU%L50YwuFa@`X3|whR>Hc_Sp;oU*j5iA z^2O%>PR1e;E=d0DRIdRR4A*&#i1;?Fh|C=LcP1+);J+vS2tx6yt1{Tg^sns0>OY~Q z{45MXdnNo1s%bbI%71fId5U~;)NaZ6-J%>R^4Y5BN;$S%-t=kN<;{$&zqm0d w4NPQRCuG-&RQa~eO1rnIoz=>LFA@6JVpruU6O9sh_Tx*n@2}r5gz)Wu0HSUpq5uE@ literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/__pycache__/tokenizer.cpython-311.pyc b/TTS/tts/utils/text/__pycache__/tokenizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1b19ec47a9183282a495f6ac01e86f8b91b2b0bf GIT binary patch literal 11351 zcmb6HvxU-%GPu9EOC1rc2HQTn( zM)Hn~FY90MXWJLrY05&KrP!8-6zlxZLQx;XU%eJ~v92pr#QilEBR0i(I+4jF7Bie; zn@%T1#r9%a5Ec6?xpY1kp%rH`!zFSYFGMVg>nxvGUgENxpm=6qo_cvHpTh#Qgr;ZC z#9y46I~$+B7=QWP#krZ6W*25IE1?^l7+)s+(glu>FW!&m5?K!5`d(Veh-noBHUSVH zc1~xP^E`}6h)asMUPxvVf{?7QpFCXk!nc=c#3e^9&@8oJVd({{i_(8A!`e6-YiAv7 z3ukAYoFirF(%`Tzt|jGU-4E>xPR_-89#S7d+s9h_f}3;S^fZdBcY8bA%6Zu~u9a)k zp{8uC@1bqM$NJ6n(9a8P?a=0D`^d;$JB)U)0a#Bv8{Bbp2aMjuhM>)Bn9;&^LVbYk zZ*F13@P;=Ym0!uR zaedP348qGKU;*`+ruZAeIGGdYT@koCGjakmyqM2tB6=fscx{OjmpC354}BSxaiMQI zCx~2vW%4P*E5?{hN3!7OsH2*|7SPLRGx$;N&rHD)Z#fkZH@q|p;lI&9eLW~5JP0HIqQ3Gng> zUl`X?1T{Xo)e}{!$w}4Aj~nM(^HtxBxtr%RY+t{rN17F&gElc)aa_#Dp?(-wza3s2acCR@U2z2V9)`Sm1jt(4 z(S!viU=(yh0Avkk<`gffsI!;i);Lj6+D;_|PG@Mvt*QBl36_m70y%FbTN=_8 zo~WU$@c(uRl6R=0rAWmgr(Q)GN+9d&B|DTfjaGC(2}hEvXoXTk*HSkqDA~0(85b%wW{owlH+~_NSYGL$#LRcD|=7d{fetGnCKAXD#%Is)z1w>vpo)48-5j~61kkUXGjyQrQ>$tF9ys(XgjXIFp8 z_NcA8dkBl+YWDyXeboU{?5h_`zLM`-06--*DnJ4Agl2xo0L`eI@KmoW6d#5hsCth< zL`P_T57Y`ckl}kEC0HtSnwG6I)pOVxb?^h~D_=nN^-9iOz8A_#jr>r+C6Hy*AOp)l zW`Ue~!J@P!-_%C&1IGHY#y*Wv_%!y=M}7#2S4kT4^3A>GjeD~f_pIqJm#ssbm@tLgXhE*K4;Q%{?@Crv#% zvX2rqx=_>r#lJ!WM3_|*Mt5j_)>!@#*iEaTDN!>*VU2Y2%*Ixh7F+doHowoi;99)5 z4f-|e(OY+^d)8~zUAkxisywVX zV6`Ae3jvdqQ7fmQtMDI?01F30C08{Vu2C*midNIobj{*yP0`g*x6aI!&@nl5Y|UEr zcRlqpn|`JosQ3qE|DZ(f@3#WGYm^;E8Pl%kQHtAipqSpPn+oS+2w`zIB z5azKIV$+<)JZ45go2{9nVhROPi0D>#nnihqRE*eE664ZUIy*Hw<90HTmL;<_kyfpI z8F~t+K~Qeo-c?Xkx^m)sprnbLA5lp(Ybjbjv3>|^ z`B+;I!YLzOwp}~Uh-)VoAA)fPq1C8IF~i!-aRahe&0gd29Z}qFyLOn72BBh4{4XHt zwCXMrAz~IRDr*?{h0vRb7u{wzyK~3{D3pdlMm2di*fihpV*qr+O@-`EaIt%X%6|b` zqYLpsh)AqY@uO}BS;xici)N1}%1p5=BdjcAsH5()s=iHBSmu)HR5}SB8S%zIL>m05 zLosF^rEo6KXA_xp0X(pr<{KQ2F%2VI_0@*cDYMI)7MOW{h11RbBQdfPVh#}R1bja6 zF305zu8Ic#sM`6|#Vf=Soq?xrr)j;OtuN7tX{~XNe-`)!BS`|c450$>>G|iOWlA)8Z0Z>u9w{th$~c(+Id> zfF#83R6NK#b;qd?HfaV!%WE)1co!0|oQEl|U-s;gJj2yM=hMKzW?-N^UkMzL0|!82 zczn=?qGg_j=C@Bg4UB9CMmFZ3T>f%c3XD_&SLDDIB3b_Km%shx`q7Gyk$ntEC6DjX zB#t{ypvE=>u?-PaLSS4DjMw3UI-y;oqJ2S-a2PTJ zvHc0sj#qs9W#4|uxnGsdt;m5pzcq?O^43=&V|Gn+k$Zy{HDS2jwkp6#qhP;@@W^$8 z#x{bXv6mCzkSBDb2w9L=JY+8?GR)39CI&i=s67X=D;Vdt3#(|EB{RtCa&2@_~tpZ{qXdAA04f*QAqIE556e z^Qwx?HgtDY-vIHtv%V_=U_hsZ_nJ_n8PqwCsb14(U|U!L-I@m3j&7DN05Mn#t{-z? ze$z!8YhkS{{nyqPptfiOl-mlwpVh^H8n4nN7zsb_AjZ%P)rn+~so;VWYF)p6oj`%7 zF4q_kp)8k5=OFaRL4c2h1<{&?UVwH}-5m^&8NZk`d|Glu!MEafO;jf^hfW{BVEH`yzik3px&=-?K zF)0-Loc%*yK69<)sqX4o>-k_`Ia}#JD)%3Ke7n*=_T*5d|Ak8XM9ER@94I+Tw@cz$ zUrGEMZ`IdP_rG|2JH?*i?r<5CoI%oB;s32fOd^&p5`wN;M2$IsgckHdfJL;rcd6|@ z$+i*bM`^6|lSV+ON8hhIjWD+JquXY*zh`Oov|t2E6>^mga-&g)EX_KQeVa#`yfW&h zwh7Gr*p^!aK0mW(*ujZ#cYa-44HK-3dpJIx$Fw|A>3;2ijHe z6`4s!i{Fhig?6k!xls?3LDjBCm8!xy0qV#&U8Rrn7XV9ylL|KMUI-aHM%}C!k2v`l z_QY@;QS)kejhF~-;uFR*`5OZNBDSNa*zg=KLSAvCb1at=`9n|;5ViVdx(ubJsFMz^ zg#CwrLmyDp_KtP?TYE~5EobXmNNO8{d&Blgo0_XQpO>A_OU~!(tuIJzFKi6R2gczE zcf~m&J0~RPM7?=ZYMb0RDnA2ln5{TZ$j%dz^F+gTu(%FydE3`sm3ED&cf}i(y-~>< z1z+XS#i!n(P47^7W#jfg-miEM$=*Yf_t2K7tt866;fg0Bdm&K7{PYTJdMXoe=fI!KO3Kp9v{kq<`KB z5%*yIYBnf~HXZMAO&~$^0iyW^Jp31`hdUJiGSs#;V6oO)MEvtJX=)YX{WpD$77%F9 zthN+eZu%QFtnC)6A&U1ytp)*32@HMEnt0SBqDHP?tldn{-_YVg) za@%F*qM|LXVI%hpgey_9)-MQG?p<}6@X=y?J5VgnW;I=O{RU3lS6#ny-K84=L=i=8 z{etltcSBr>E+U<^7l3?sqvwaPA3xUiWYLzwFr+Kd3EOxUjy2Q+V?29noDY(NJ5}EN zrRad#0}fz`M2d_vYKF-|FffBb0gzX^*@ntNS3S(I`8W$k!+9@Qf~v(?2LWcXYHTXClaG76&2>M5NSf<62hU>v0dJ{L)dP(o3nx>A0fd~g5`2bnxF zyy7QkW5_dc6~U?zfx? z!%tiID3(zcsUeFXm&1>18s-s+SHOWQKaV{~*qaV2ZiPYTv~%54Nu|?sB3Z}n^E-l&_#FzG8cS^AQj$I z@@{!L)>3jP^6BY+32q#(c%G9z&tX{N6uq^(|AVoOSLNu*%I+z7_f+Y{nuGF&Bu~#* zz7B|S)F^lBWxD*TJa|kVJgb(r`ePe+Q8%zrAWH6_olq}rWC%p)iWYRrz<@(a?i|~d(HjT;OOI!eCV7sFe~}{s)63} zF)0v%dm|;sPQY`k@3|)qd2Ckdi{t&$FVxK1`L**ko2#R5Yw-Bip2Lri$w$u1doI+h zmZ9z%1xfYdb-l3ehL=5b{9k?l?5iA{k`GR;yXA0H3P-=++Px3nr31p}jbv-@aLq#Z zT&F)Dl22U&K7gz-{l;qT?f#B}1Yk0KwH9kvADmQtU$X)Pc>cMKYVE4}L$ZHg`Hjcp zPYPcilP|n3y_uBXy!VaO(%SYN1qqC8?X0=%em9WbiD6<)j6Z?NDNHbw!=qErqcfs-K{$%(WEMU_%CkIf(T^x%S*yq7M5JJX z8(R1u;+#By-HE@NrOWQDQJ9QVq21NK!Rr2Jst1o&yL+p_uA0whAFNTBIH=CvS_|f! zRL5Y=g*i9X(Y-#hVUxN?;6$qC!8$Jmr(ms^Bg6EhpK`PT-R-caA^FA%#fMkb3J2xz z%l1&sVzp<16)?s2PQA|EYVTSm#GkjE4D^CSP3CfVj>(qbPjiyNokzYm;mAAWp=Jem`y#eR+6U>_g( z1fXDkht%nW*Q6kMJpCl_*(}L^DUwHX67<-?t%3xX*~lAm-IQZj&2oyi1A{lGh`n78 z-`L))as~3jTqK}4#QXRKn98EaorLWOvnsnFTGaG$6H%86Mi84CV!HBFwIb?AW$z9h zbv+{x%|~>gf*+|ggEaJvZ8KGfLd6e1>YrZS;NUycbTUppa#3|Ms&7@9-o_k$_(68V z56&0V9XrY2gF1BggbE}zD^1f?DklAZ_gB<@$$VF-lhRIimAWSFbXTbhlKHMuQ_@a% W%`s=CfxbJZ-;nz6ex#MW^8W!jrQpc` literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/bangla/__init__.py b/TTS/tts/utils/text/bangla/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/tts/utils/text/bangla/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/text/bangla/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00321927cfc8d7dee8c583ee81618c3d8a2e1b4b GIT binary patch literal 187 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%TGVFIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`+sq*%YSBr~U2za+Jy zL_aApFFhwwKR!M)FS8^*Uaz3?7l%!5eoARhs$CH)(0Gsoiur-W2WCb_#t#fIqKFwN F1_1f{F8%-j literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/bangla/__pycache__/phonemizer.cpython-311.pyc b/TTS/tts/utils/text/bangla/__pycache__/phonemizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..866d18a5e965c6a0c7b8bd87b31005e4fcaf9291 GIT binary patch literal 5071 zcmb6dYi}FJaqoD19$(graT2$(rP!b(i;mMet|Q2nEXPp{C@HM;gKR;dcqf}OMKbq@ zW8sKiV56Z722#=%X3>103x$%a)Nz9XO@N^3PY{g*LR?{hXyFg}M!+aq_)};0NSdT1 zpjjQ~_A#^b*qPZmZTNg10`1De;=-qXLjHw=dh^ygPjb+?Lj)o)2{L27nHdJgY=T{~ z&Da>6vn9AC`;46-ED`NRpxCPKL!u6W6YPRRa0)KLEqDZ8@WP)@@PEqBIE8@dT406X zEpEmw?7Kx~JVFORUg!kq#Z{pTFgT$bFnj`Yk%W7`$GxY*jOj|MOQIBC7NPH%N=Zx6 z1Pml~2G%RZuPVjARf=C$HajmEyNB_3KLUi%Rhupn$>7 zP425N(e2wQ0k;Vs+1*!%Ddq#pya>D3_&t;8nG~DIf8zu7)jA1CbDt8f!*r`2H9^}Y zQ2U!`ftN*&yS9<5M<^m}15$2Lxj9%D*AM# z2y19SoUn9jw!=-egCbeF)cgQdwx%rwZq{|ZsoDVhpZf1=@BC9HVseW3z7k_#q2BD! zLBuB-%-$hN)7oOAFE9eDks{!2Aueqhh%AjPLto$!W?E@k3kz25KnbI*#!QA09Jh$Z zOpw-S%rJ_xPDNn72>iDiGGrK}m>tcq3L3c9G`4^m>}ah!x9%i3uW?8qg)~yYq{;A2XS> zj7Qbm`}FO|JF+OrksDF@YGgu`uPLc?Ci?W&6?9e^QbCixox zI(f|dawpdM^S1nSi9clUhxCaG-?28mGMYbE;t%Q*kG=lf8{6GSHoHpRVZ%GDpQ}(c z@^%oqc^@(zA%AS|%62g>z zK|-I|;Yd~nfEBn+8Hi0%fd0O?!^S6~Z`cx|Q@Y`evKYg9r6a8oUU;Zaf#KM9?h zhle0ZG_hz;5(3%Z;0yl!9Bc~LZ?b;<3h3+eIK0P7m|#urdP)*%{!Y(a!eqL9ph2>m zbB+Rv+y?-h(|5*tHYYw3*Ctl3<*wz24F8eFJhiuk8LLG%G&t3*2Rb#~92mX#IU1Qx zh)iZm^1-Tf2*3{&@ifw)(}IXyW7d-w9&p>o&a6!XupTO0n3Y0+XmX3GcoK2|gi6wM z%u&NMvo+lfqex*O7(o_j3Y}D_6&{@Z>*SY{f4=aS3!r_;cgFCY(O)m~{?(JWPg)Wb z7T{g#8Z!X$;|4#z&5iGLNQ_A8ke@&Yt|Wl{ypVOw3YJ*DT4YUCo3FfD~SGZ`3Ds`SPI!{1?*dz2Y!lqzfYT0&K9;6`v2I&S)rW0>~ctVuW`@#Qc4))JozX}GF(~y_vB(cTyI%BCs zBAS-PCLHNF5Tk)=_r&Aw&~Ii6j#Bp#qx**)^<* z$K55{iI>6a2g{xvtu@Xwjuu;Dilt3(wAtOMISce-vW`)IE136cO>3iS+ruT;Gj7cV zC-PRt)55p@F-qVg(yn>hieyH4q0V8ql2*YKU2E-k8N}I->ubBwLcF<_;RNB*rXfI}9eQtYbfuxdr~rC2KYo+v4E zB9xAC`6^`9D{*B`NzGXZ*4T6|#FIiak+2di#v3iqXk{lI!y&!TY*5tF1VeK%&8akH zgn299*dWK4&NSp*QA(Q5rKl3SD$3G}uz|@)rgJ_?t5eet_n{&tW8%4_0M)SRu8(Dt z6QWAgTD1^SFY-*cRpRBT~wI)6N` zl>GgMzdvhx@3Cl>6pHJ5RGb2gpJ7(v6QccEV@E!B4zL799O9v4QB6tk}#%h}{(Q~w_ zG_2<6p+9wvETsfBA&yFKz#Qg#`9A=vHilu!#I4i63i0XmuS|NkTc0u+*lvB`;N6{B z&x$wa)$LUm=}0nV|4+;N#>)HNEO(tMht8CrAFQ(Eh1c0?7xDVbyszpZo>2AW{atL8 z0N5tM>d6i!xb{kwz^fe$*qF0Sl{7&whgc2Zw=qX?y#~AkOt8>bCGgs`UbMd(pm*;7 E06ocLuK)l5 literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/bangla/phonemizer.py b/TTS/tts/utils/text/bangla/phonemizer.py new file mode 100644 index 0000000..e15830f --- /dev/null +++ b/TTS/tts/utils/text/bangla/phonemizer.py @@ -0,0 +1,121 @@ +import re + +import bangla +from bnnumerizer import numerize +from bnunicodenormalizer import Normalizer + +# initialize +bnorm = Normalizer() + + +attribution_dict = { + "সাঃ": "সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম", + "আঃ": "আলাইহিস সালাম", + "রাঃ": "রাদিআল্লাহু আনহু", + "রহঃ": "রহমাতুল্লাহি আলাইহি", + "রহিঃ": "রহিমাহুল্লাহ", + "হাফিঃ": "হাফিযাহুল্লাহ", + "বায়ান": "বাইআন", + "দাঃবাঃ": "দামাত বারাকাতুহুম,দামাত বারাকাতুল্লাহ", + # "আয়াত" : "আইআত",#আইআত + # "ওয়া" : "ওআ", + # "ওয়াসাল্লাম" : "ওআসাল্লাম", + # "কেন" : "কেনো", + # "কোন" : "কোনো", + # "বল" : "বলো", + # "চল" : "চলো", + # "কর" : "করো", + # "রাখ" : "রাখো", + "’": "", + "‘": "", + # "য়" : "অ", + # "সম্প্রদায়" : "সম্প্রদাই", + # "রয়েছে" : "রইছে", + # "রয়েছ" : "রইছ", + "/": " বাই ", +} + + +def tag_text(text: str): + # remove multiple spaces + text = re.sub(" +", " ", text) + # create start and end + text = "start" + text + "end" + # tag text + parts = re.split("[\u0600-\u06FF]+", text) + # remove non chars + parts = [p for p in parts if p.strip()] + # unique parts + parts = set(parts) + # tag the text + for m in parts: + if len(m.strip()) > 1: + text = text.replace(m, f"{m}") + # clean-tags + text = text.replace("start", "") + text = text.replace("end", "") + return text + + +def normalize(sen): + global bnorm # pylint: disable=global-statement + _words = [bnorm(word)["normalized"] for word in sen.split()] + return " ".join([word for word in _words if word is not None]) + + +def expand_full_attribution(text): + for word, attr in attribution_dict.items(): + if word in text: + text = text.replace(word, normalize(attr)) + return text + + +def collapse_whitespace(text): + # Regular expression matching whitespace: + _whitespace_re = re.compile(r"\s+") + return re.sub(_whitespace_re, " ", text) + + +def bangla_text_to_phonemes(text: str) -> str: + # english numbers to bangla conversion + res = re.search("[0-9]", text) + if res is not None: + text = bangla.convert_english_digit_to_bangla_digit(text) + + # replace ':' in between two bangla numbers with ' এর ' + pattern = r"[০, ১, ২, ৩, ৪, ৫, ৬, ৭, ৮, ৯]:[০, ১, ২, ৩, ৪, ৫, ৬, ৭, ৮, ৯]" + matches = re.findall(pattern, text) + for m in matches: + r = m.replace(":", " এর ") + text = text.replace(m, r) + + # numerize text + text = numerize(text) + + # tag sections + text = tag_text(text) + + # text blocks + # blocks = text.split("") + # blocks = [b for b in blocks if b.strip()] + + # create tuple of (lang,text) + if "" in text: + text = text.replace("", "").replace("", "") + # Split based on sentence ending Characters + bn_text = text.strip() + + sentenceEnders = re.compile("[।!?]") + sentences = sentenceEnders.split(str(bn_text)) + + data = "" + for sent in sentences: + res = re.sub("\n", "", sent) + res = normalize(res) + # expand attributes + res = expand_full_attribution(res) + + res = collapse_whitespace(res) + res += "।" + data += res + return data diff --git a/TTS/tts/utils/text/belarusian/__init__.py b/TTS/tts/utils/text/belarusian/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/tts/utils/text/belarusian/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/text/belarusian/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bcb860fa9c8ca53ed8d01f8baebf19a032ce7491 GIT binary patch literal 191 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09tD@Z@IIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`+sq*%YSBr~U2za+Jy zL_aAtC$XrsI5RO%KR!M)FS8^*Uaz3?7l%!5eoARhs$CH)&Y}TbY^$GUfZ!-lO{kyqeVayAF|sPA*8AT5)va)>H~F=5>&Yx?>O0HKg`T* z8b@|zMU^muBT{og$|+EVe*iziMwW z83zp3N*kqLL;(KahjWm6o6{M#c?wKmLL00wY(P_Z0#*c5M4~Anu_T$1C(?>!!h0Yi z|H;L2#h!4ua7|1*IK2tErWn&<0vpu2E35(c>RW)G+hdGx1AG|*8VRLCWC{mN_KvBAW?;D@aM=y zDad1z?Wm*F1SeJ#A*c%bLv{z!2W)%TKh`7xRK@B`J{SoTuse|siR(Ys-kIge&N-!&I8_HZD0B#8Dr3Y0sj(j_bGUNBwy z;%+eR;Wf>$(f0fAYMUM|`$Stc2*Ui0ag?DTZMhb;44d6<7JJT;fdiSv&RvOMg>xt@ ziUN0hNGiV84WMAI5}vs#hZlMMKGRL~ib2-(1w=NfSJprFoKoffr+U#R)N=~1XCk8K z^Y?U0iSAR&COSpiRA24Cw?24pty~Et?jasyR=C%M*IXTR1UV8vQm!;c_&8GDXb#P@ z`dJ9jD$r8rl3y6fVX81`Kt85Q}$2#&dRd)`^ReEqFXS_WiBq)b;l0jfT=t zn3B;Bfb7RGK2D7{U!|>--%j~W#qWrmfvb=~B>r`7H@COGw^Glwq?xuf)0Ad7eGWFI zkN!PR(v!cZXMao2w$ge#t=E@Mk`s*wd$(H2YwhHOT4f_6;BhX*se~I>ReXhWOb+s str: + # Initialize only on first run + if finder is None: + init() + + from org.alex73.fanetyka.impl import FanetykaText + + return str(FanetykaText(finder, text).ipa) diff --git a/TTS/tts/utils/text/characters.py b/TTS/tts/utils/text/characters.py new file mode 100644 index 0000000..8fa45ed --- /dev/null +++ b/TTS/tts/utils/text/characters.py @@ -0,0 +1,501 @@ +from dataclasses import replace +from typing import Dict + +from TTS.tts.configs.shared_configs import CharactersConfig + + +def parse_symbols(): + return { + "pad": _pad, + "eos": _eos, + "bos": _bos, + "characters": _characters, + "punctuations": _punctuations, + "phonemes": _phonemes, + } + + +# DEFAULT SET OF GRAPHEMES +_pad = "" +_eos = "" +_bos = "" +_blank = "" # TODO: check if we need this alongside with PAD +_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +_punctuations = "!'(),-.:;? " + + +# DEFAULT SET OF IPA PHONEMES +# Phonemes definition (All IPA characters) +_vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ" +_non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ" +_pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ" +_suprasegmentals = "ˈˌːˑ" +_other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ" +_diacrilics = "ɚ˞ɫ" +_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics + + +class BaseVocabulary: + """Base Vocabulary class. + + This class only needs a vocabulary dictionary without specifying the characters. + + Args: + vocab (Dict): A dictionary of characters and their corresponding indices. + """ + + def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None): + self.vocab = vocab + self.pad = pad + self.blank = blank + self.bos = bos + self.eos = eos + + @property + def pad_id(self) -> int: + """Return the index of the padding character. If the padding character is not specified, return the length + of the vocabulary.""" + return self.char_to_id(self.pad) if self.pad else len(self.vocab) + + @property + def blank_id(self) -> int: + """Return the index of the blank character. If the blank character is not specified, return the length of + the vocabulary.""" + return self.char_to_id(self.blank) if self.blank else len(self.vocab) + + @property + def bos_id(self) -> int: + """Return the index of the bos character. If the bos character is not specified, return the length of the + vocabulary.""" + return self.char_to_id(self.bos) if self.bos else len(self.vocab) + + @property + def eos_id(self) -> int: + """Return the index of the eos character. If the eos character is not specified, return the length of the + vocabulary.""" + return self.char_to_id(self.eos) if self.eos else len(self.vocab) + + @property + def vocab(self): + """Return the vocabulary dictionary.""" + return self._vocab + + @vocab.setter + def vocab(self, vocab): + """Set the vocabulary dictionary and character mapping dictionaries.""" + self._vocab, self._char_to_id, self._id_to_char = None, None, None + if vocab is not None: + self._vocab = vocab + self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)} + self._id_to_char = { + idx: char for idx, char in enumerate(self._vocab) # pylint: disable=unnecessary-comprehension + } + + @staticmethod + def init_from_config(config, **kwargs): + """Initialize from the given config.""" + if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict: + return ( + BaseVocabulary( + config.characters.vocab_dict, + config.characters.pad, + config.characters.blank, + config.characters.bos, + config.characters.eos, + ), + config, + ) + return BaseVocabulary(**kwargs), config + + def to_config(self) -> "CharactersConfig": + return CharactersConfig( + vocab_dict=self._vocab, + pad=self.pad, + eos=self.eos, + bos=self.bos, + blank=self.blank, + is_unique=False, + is_sorted=False, + ) + + @property + def num_chars(self): + """Return number of tokens in the vocabulary.""" + return len(self._vocab) + + def char_to_id(self, char: str) -> int: + """Map a character to an token ID.""" + try: + return self._char_to_id[char] + except KeyError as e: + raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e + + def id_to_char(self, idx: int) -> str: + """Map an token ID to a character.""" + return self._id_to_char[idx] + + +class BaseCharacters: + """🐸BaseCharacters class + + Every new character class should inherit from this. + + Characters are oredered as follows ```[PAD, EOS, BOS, BLANK, CHARACTERS, PUNCTUATIONS]```. + + If you need a custom order, you need to define inherit from this class and override the ```_create_vocab``` method. + + Args: + characters (str): + Main set of characters to be used in the vocabulary. + + punctuations (str): + Characters to be treated as punctuation. + + pad (str): + Special padding character that would be ignored by the model. + + eos (str): + End of the sentence character. + + bos (str): + Beginning of the sentence character. + + blank (str): + Optional character used between characters by some models for better prosody. + + is_unique (bool): + Remove duplicates from the provided characters. Defaults to True. + el + is_sorted (bool): + Sort the characters in alphabetical order. Only applies to `self.characters`. Defaults to True. + """ + + def __init__( + self, + characters: str = None, + punctuations: str = None, + pad: str = None, + eos: str = None, + bos: str = None, + blank: str = None, + is_unique: bool = False, + is_sorted: bool = True, + ) -> None: + self._characters = characters + self._punctuations = punctuations + self._pad = pad + self._eos = eos + self._bos = bos + self._blank = blank + self.is_unique = is_unique + self.is_sorted = is_sorted + self._create_vocab() + + @property + def pad_id(self) -> int: + return self.char_to_id(self.pad) if self.pad else len(self.vocab) + + @property + def blank_id(self) -> int: + return self.char_to_id(self.blank) if self.blank else len(self.vocab) + + @property + def eos_id(self) -> int: + return self.char_to_id(self.eos) if self.eos else len(self.vocab) + + @property + def bos_id(self) -> int: + return self.char_to_id(self.bos) if self.bos else len(self.vocab) + + @property + def characters(self): + return self._characters + + @characters.setter + def characters(self, characters): + self._characters = characters + self._create_vocab() + + @property + def punctuations(self): + return self._punctuations + + @punctuations.setter + def punctuations(self, punctuations): + self._punctuations = punctuations + self._create_vocab() + + @property + def pad(self): + return self._pad + + @pad.setter + def pad(self, pad): + self._pad = pad + self._create_vocab() + + @property + def eos(self): + return self._eos + + @eos.setter + def eos(self, eos): + self._eos = eos + self._create_vocab() + + @property + def bos(self): + return self._bos + + @bos.setter + def bos(self, bos): + self._bos = bos + self._create_vocab() + + @property + def blank(self): + return self._blank + + @blank.setter + def blank(self, blank): + self._blank = blank + self._create_vocab() + + @property + def vocab(self): + return self._vocab + + @vocab.setter + def vocab(self, vocab): + self._vocab = vocab + self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)} + self._id_to_char = { + idx: char for idx, char in enumerate(self.vocab) # pylint: disable=unnecessary-comprehension + } + + @property + def num_chars(self): + return len(self._vocab) + + def _create_vocab(self): + _vocab = self._characters + if self.is_unique: + _vocab = list(set(_vocab)) + if self.is_sorted: + _vocab = sorted(_vocab) + _vocab = list(_vocab) + _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab + _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab + _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab + _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab + self.vocab = _vocab + list(self._punctuations) + if self.is_unique: + duplicates = {x for x in self.vocab if self.vocab.count(x) > 1} + assert ( + len(self.vocab) == len(self._char_to_id) == len(self._id_to_char) + ), f" [!] There are duplicate characters in the character set. {duplicates}" + + def char_to_id(self, char: str) -> int: + try: + return self._char_to_id[char] + except KeyError as e: + raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e + + def id_to_char(self, idx: int) -> str: + return self._id_to_char[idx] + + def print_log(self, level: int = 0): + """ + Prints the vocabulary in a nice format. + """ + indent = "\t" * level + print(f"{indent}| > Characters: {self._characters}") + print(f"{indent}| > Punctuations: {self._punctuations}") + print(f"{indent}| > Pad: {self._pad}") + print(f"{indent}| > EOS: {self._eos}") + print(f"{indent}| > BOS: {self._bos}") + print(f"{indent}| > Blank: {self._blank}") + print(f"{indent}| > Vocab: {self.vocab}") + print(f"{indent}| > Num chars: {self.num_chars}") + + @staticmethod + def init_from_config(config: "Coqpit"): # pylint: disable=unused-argument + """Init your character class from a config. + + Implement this method for your subclass. + """ + # use character set from config + if config.characters is not None: + return BaseCharacters(**config.characters), config + # return default character set + characters = BaseCharacters() + new_config = replace(config, characters=characters.to_config()) + return characters, new_config + + def to_config(self) -> "CharactersConfig": + return CharactersConfig( + characters=self._characters, + punctuations=self._punctuations, + pad=self._pad, + eos=self._eos, + bos=self._bos, + blank=self._blank, + is_unique=self.is_unique, + is_sorted=self.is_sorted, + ) + + +class IPAPhonemes(BaseCharacters): + """🐸IPAPhonemes class to manage `TTS.tts` model vocabulary + + Intended to be used with models using IPAPhonemes as input. + It uses system defaults for the undefined class arguments. + + Args: + characters (str): + Main set of case-sensitive characters to be used in the vocabulary. Defaults to `_phonemes`. + + punctuations (str): + Characters to be treated as punctuation. Defaults to `_punctuations`. + + pad (str): + Special padding character that would be ignored by the model. Defaults to `_pad`. + + eos (str): + End of the sentence character. Defaults to `_eos`. + + bos (str): + Beginning of the sentence character. Defaults to `_bos`. + + blank (str): + Optional character used between characters by some models for better prosody. Defaults to `_blank`. + + is_unique (bool): + Remove duplicates from the provided characters. Defaults to True. + + is_sorted (bool): + Sort the characters in alphabetical order. Defaults to True. + """ + + def __init__( + self, + characters: str = _phonemes, + punctuations: str = _punctuations, + pad: str = _pad, + eos: str = _eos, + bos: str = _bos, + blank: str = _blank, + is_unique: bool = False, + is_sorted: bool = True, + ) -> None: + super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted) + + @staticmethod + def init_from_config(config: "Coqpit"): + """Init a IPAPhonemes object from a model config + + If characters are not defined in the config, it will be set to the default characters and the config + will be updated. + """ + # band-aid for compatibility with old models + if "characters" in config and config.characters is not None: + if "phonemes" in config.characters and config.characters.phonemes is not None: + config.characters["characters"] = config.characters["phonemes"] + return ( + IPAPhonemes( + characters=config.characters["characters"], + punctuations=config.characters["punctuations"], + pad=config.characters["pad"], + eos=config.characters["eos"], + bos=config.characters["bos"], + blank=config.characters["blank"], + is_unique=config.characters["is_unique"], + is_sorted=config.characters["is_sorted"], + ), + config, + ) + # use character set from config + if config.characters is not None: + return IPAPhonemes(**config.characters), config + # return default character set + characters = IPAPhonemes() + new_config = replace(config, characters=characters.to_config()) + return characters, new_config + + +class Graphemes(BaseCharacters): + """🐸Graphemes class to manage `TTS.tts` model vocabulary + + Intended to be used with models using graphemes as input. + It uses system defaults for the undefined class arguments. + + Args: + characters (str): + Main set of case-sensitive characters to be used in the vocabulary. Defaults to `_characters`. + + punctuations (str): + Characters to be treated as punctuation. Defaults to `_punctuations`. + + pad (str): + Special padding character that would be ignored by the model. Defaults to `_pad`. + + eos (str): + End of the sentence character. Defaults to `_eos`. + + bos (str): + Beginning of the sentence character. Defaults to `_bos`. + + is_unique (bool): + Remove duplicates from the provided characters. Defaults to True. + + is_sorted (bool): + Sort the characters in alphabetical order. Defaults to True. + """ + + def __init__( + self, + characters: str = _characters, + punctuations: str = _punctuations, + pad: str = _pad, + eos: str = _eos, + bos: str = _bos, + blank: str = _blank, + is_unique: bool = False, + is_sorted: bool = True, + ) -> None: + super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted) + + @staticmethod + def init_from_config(config: "Coqpit"): + """Init a Graphemes object from a model config + + If characters are not defined in the config, it will be set to the default characters and the config + will be updated. + """ + if config.characters is not None: + # band-aid for compatibility with old models + if "phonemes" in config.characters: + return ( + Graphemes( + characters=config.characters["characters"], + punctuations=config.characters["punctuations"], + pad=config.characters["pad"], + eos=config.characters["eos"], + bos=config.characters["bos"], + blank=config.characters["blank"], + is_unique=config.characters["is_unique"], + is_sorted=config.characters["is_sorted"], + ), + config, + ) + return Graphemes(**config.characters), config + characters = Graphemes() + new_config = replace(config, characters=characters.to_config()) + return characters, new_config + + +if __name__ == "__main__": + gr = Graphemes() + ph = IPAPhonemes() + gr.print_log() + ph.print_log() diff --git a/TTS/tts/utils/text/chinese_mandarin/__init__.py b/TTS/tts/utils/text/chinese_mandarin/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b52b61cfcfcd99d0535f11bed18f24240bf1161f GIT binary patch literal 197 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09tD?&fCIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`+sq*%YSBr~U2za+Jy zL_awrGcUC`H9j{nFD0=kGfzK0J~J<~BtBlRpz;@oO>TZlX-=wL5i8IlkW-5Jfy4)9 PMn=XD3^1aI87Kw-R7x`| literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-311.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/numbers.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a38005e782ebcd84b3cc347f0ca8e051697af9d0 GIT binary patch literal 6950 zcmbUme{2)y_0F+lJ5C%YcAOu?a0xBM18fpTfi&e;(m=O{u0Y$B4rLeL#W`^7@ZA|m zc1CU+T8D+zWNZm-8D?W`7$9X`H3e3>P1+>>Ja;G6x(Nws2|xbXGNDNon$*4T{PR*A zpE5rD1jB<$DPn~% z3i`5y@T3WTCGdOL0H5tM#0l&C`>h#<+7MoPsJXd5GXkwlg1xCzG1r)rq8h*9lZ zpkE#_1??(i1@x>iTM4szl!mauSQS9%nCgg0U1Nt4N5st3j1rNurRCu#A{NoWIO}uU zVrmcTa0fMtmV`bxT+k3p3*oi9BZ=VZMfmD6_?9h!uRc$IF+Qe2mFZg1kSNeso*ibH zc15BzKbLPY4O*xlz#dOIBjv%mC&kM&`gC}OP?^U%=G0=1>(KXN&VG_{jhX~FAlXN- zXIliZcn#xZmVX3Rc_n9=UBpKKekHdgQUO}B3N^U_Laflz?IPmj!N21@MJgk^&m=KE z1QMtcZ3#VRghV+L@*d|I5WHv`nn}zog|B zt8)@}7ssKIqg~5Yo_*~&Nag#0y*|UG2RLrdv|B_QQ*fekJ83M&foNP(=i@4!w9Z?A z&dQu-i%11DYs71wK(jZB@ibQRWKwxzO8?RkVwqT_m14x9+H>0D)fTl>hjtYS=WYB( zap)(tYrt~5m{(M}wQA`mD)LsJZ!L9Vwd=t<7xltRVIc{z9KBSGQ@9US6t~i1tC*(} z*8Cpwtym5C&!@a#Z-zFlqM z9^j!`cmcWFwU||mY2rE9KF&|I^o2r!wNz+G;6gz<0L#Kav!4snK_3U~`38VNp8%|R zs4ljj4hIAt@0BDnBWfa~X2rT`hm1$lv`87kCI35aGmUz&aah?d@d z6fPhaDnbhjr$26tqFe;Onq5t? z$A>~Z8x%llne}Pa?^T1-0$QN>cFLDlGsrt_R20o;j3}>6m71TwburO8KmWprJp&mE z3s@tF4*FSzWJjLjeH_pQ?SX}c1R5&|vkq;F4-XB6kiarwGaRJ5*M4>X(+l_Bx}NKO zl6dfkUv!6;!{mbxen0=K>-RsMnSbxi`)|E7_u1(O=ia${VdC!T@q0h{c_GLrzng#W z?JUS_9z-Sao6w>`0Dk)?OHI%K&>i0wHj+{8nhtgMAE*8`+WU61*{f6;$`(FG@%& zdZ1in67+Ocf=u@c>xTeIxb=$uP>2gEW)6I#06vsgjD8df5A_WzCd7i1=UK*0DrA7i zTBZ<=;2)W?Y>X^lnoX6bHdqoRrgRsK?;Fz`T;pWVw!fNgRsGrW7faIfqU?DQCXy?9 zWT@+U!9q)_!|sD4J~Ouqh` z!qJ|syg)KXq5&TXgg;?md4K@2^3eGP6r-=VkLEd_Mj*%rz#}V)NFr;s3;^*SsJ_aW zt*cXVbIwU}RBI<>0PVO5mzcO6WdbG4uyQw4jvb?;WKSE;=^I zD5U{_;)WCh4>!1gT7a5jNM8;m0Tzs(g|o&YxDFM=NIw?{Ae;jMI9`RSq3y0kSl3Dk z#2ShohXM*2QcBa8uc8AQCSNZ3MP->4>CN&Ac`5W zgESXpSfrR#?%*PV!x$7Y^SWSI(PMOqA&kunX!(#Ev!Vw-gV492i72S}TByJ|ee>DA zUA0f8yigP9I{=RDng2c*`d46u|5-(pRmAp<)s5FhciuLZo!%I)9@{d$<;@eLC#D=< z7+sRlb=z7e)q9dwuWa>7CU44E7VC=+M~Bt%ZAt5P*}7daZBH$4mMS;IeiZxBv?aPb zWi%}iq@gxd<&2fWk_Dp7;7UEyE>-_fs&2z&`wwHrRJ9ALMoO};iEoT=Ot3Li%yj3O zO;Yv24AgT6vrv|*1){|4N>NWqwcdC*9-e+WwmY`_PHkgscWUDn$=WAbTXEUe7u}Vz z_DI&A=&sx5^6c(X6_wHMl+he(8V$!w-V&o?%4B`7;#5U^VCu-(!%0)KY-*O&k}_Lj zJH~6`D^E4X8XpgplS0b1;==3izn&1z^?umSG1 zm}y2zkAt{lo8A;h#eWkk4b7>nKY&wz({R)Gr`6X3p9OC1xP_9N_sX01!f?{QPqy!i znNkjC%zV49DbbRwTO-%4Ss+~IE&s;V*zVZQxPHE}I{xjk;ql?A9j8vjPNZ!0U)rcI zY*eCly6RFx(zaH%t(9zRQ?|NPWmSB~siU!@=T}ZK=iDE<6Gz_nOnLGv>ZW(i?3}Io zZTHoel1~{i?&R&HudI@)AMo1~T;V4^y~|FS+&KP}Ac zxO!}Med}=Wye}cEqGni#H;aF6S{=&m*#{yOK8!S9UZcxLsH9~ z$FMT9Yj)?Ks;+l`3i9bpI=W;>m!y`Iy&=A7YU7!$XSc?>u$*5$bu4x)FIh*@wo10G zl5DG>aVMuD$Fy-qH}mpuORrWYn>HnFn`PT($+j69Sv&N{{Zm}h*&;hzzI1xNaC)YX z%^bNpl61Dq&i16~S=scgq?S7w^Y%~CXAdP!O|q#;GBu@)wgp0Gc%IDS(^`Lav~SWs z;V*Q*v-QwRx;ka3ot?6?GimCQOlA|;i>b~U_f+DFS$*|_`{!{CK&68+r4svhNWObB_|Z!+IRHO? z1yoUDp_*_mn|x#9jWY+&9*n+l+fqAaPFk8}OS7bJRyC#QafGVT7Y-m)eRV)X`wxOc zxv-xh1spA99thfDMgEg)1hkAJs*%t4rZ=wYVA1yiRauxi)&vNGLlk=?%pIlSepd6v z>}Q0$cHsL!E+v2LHv|=3<|6_Z9Do!b!Z*3fh>8#4&%7#pHdG%BG2sBa z6}5?MMQT1=3^mUS4wm{%I?n}RKfdkKvYTCzjW#q~! afP>fKs-cXu&6MC#+id!!wjWoiJpT{5@8~-K literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-311.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/phonemizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c4466629920349fdb39b33cb7efa7a7e22c2878 GIT binary patch literal 2183 zcmaJ>O>7fK6rTNc5+@FUnEW(NB2z?-LY6{XwG^3F1k#9NsA5!zge>Eo*be(+&5mgu zY{>^IrIn?IiYh*E$SF;gRN{!#3&$R9$p>qtNRfKt7AZF_?VDL|NP^PYoj32zym|BH z&G+8#Jw06rT4sJ>p2!IO!vwzt+RSbqm=&ZWT`-U-%n1VHMMInu`6$gvx}?j7Y$|gK ztSMO4RV-k2$hU;%Wj*jjo(t$5d`$(p6XXuPhieDhv#Q?xM49W<#aR>$y=J?kf-8TM zb7=gv;7P&TGx2*f@#NIZ)r2ck`~!7Wf@y(RX{O;f6&MNnt^s5P(S2(RdwdB|UmwN= z(H9ojPTq7u7dNFfh|C6buayN|eu7HE6k3Cc4SyPy#S-&diC<4dC0B81(ZFtJ7E?@Y zt7y6>EnJ`^WhAL>;pF!@noZ8<9^nISvqMfm-yH$6f-%IjfknCNCRyI%PO9VEi3HaL_poq=M>$Gv2>?=GB15 zz3i{`?@a@<3yLTzyB+zWXVO*MqcY2btR!y5uU(8vguS{O@D?4nugxUqjTB9?!~MvR zKBhSGe{PduQ0av3Tmn)-TcMLxyQcs7;pSxXZ0zsT^{i13nT?Ry44IYb7Qe;%ci?Sf zaHa{89Bu_i>hcI-&q=~S-g#(_&n&F0z%#S#`cZtFG4SXxvriU}z%U>*Bdh^-H}>$W zOZ)aoC8;DX!YP1FIsW=zXf7g5H!2bZ23N{p8kM<^fFztTu?0wCD1}(^N5>8WCO~T3 zi`?*Dgu&=5p3vo5aEMrfiL znppp~8Pb@-@#Vx%vAR6OAeerdFD9};8{rLqZ3>gHpPR%DT}(a zy5ABQ@>)q;16vz=<)JK6zXXE9JF8L#mF1EInfYZ|DJi=AHt4$Y2$2Y7@M*V_a>%w? zk||3Ae_cJez6jY}maCJ8v<_KmYkgG`j<}J0&?-PcjbDeFDt&$PrYnpSR%Tq~qYD>5 z{v@ip%0do5NV!tF0PnwG=PaHC{6$@v^`^0awcB?Tk(Hxj)bWbczFHL)M*PcFp_PEg< z;WNShUt&MUYG)e3NHZ9z%Mp&+s6bAEpV2N?rL1CQ2m>mKGSUykJ?4cXVc%|NS33_K zO5R4CUz!(ruY|CY^`_&pY3l_8$H=E3GNd|x0ojoRL1>|Y`oVu2byxV;LcJCKZKG)Y z;J>3v!uzYv4uWBwzPP$sJ=ceKvpmE;e|K6I>DtQ3BTNb Dup}BU literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-311.pyc b/TTS/tts/utils/text/chinese_mandarin/__pycache__/pinyinToPhonemes.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d5134cef381078accdd4c89262038da33664974 GIT binary patch literal 16530 zcmY+J2Vh)BwT7jYTqUO`q=)n#C8T%T>XI!>RNwZq5wrY#CyDmzcKvqd0j?)SC zK}e${^xgwlln??u(Bsh`4|tR{p@#PUbN+KDGU3ko=euX-o-;Fd=C1tss#Pni@W06T z-L zDee@tr@B+sp5|Vy_H=iK+B4l*YR`7(s6E%6r}lhzf!Yh*MQSg0m#A%Um#V$Yy+-Zj z?h3V6x~tS)?XD5~n8b3eyH45l?gq6tx|`JA>{hE?(s7yU1~SDZnZsbquO5Ar?%e>s2y~h)DF4LVl|d6ZmY6wZoArHw?plS8&x~zoZ7Gp zs~vaaY9lV9cEUx~PP&-dxJ#%_x}@5aORJr7Q))9Vqc-buYV&Sdtj03qW|S3NQEkcX zRJ+UVR=daTQ9J8q)!yQ6QG2V~tM)c`o7&sm?P_1^UZ?gBcZb@tn^SwIyHo95?k=@= zySvrC-n~Jr#&VClN7+7iuiE|YKDGC|`_(?+9#H$Bdr<8|?jf}gyNA_2;vP}^M)#=N zH@P>feT#dm+PAs4seQZq54G=b?^OGkdzaeB-Q!|4mM7d3%ARyjs(s2mrS@s}wAy#O z_o#iZd!O28+%syQbNWnxv#7JhI?M^H{ABbU#x2WA_uaKXpG-`*Zg{Vl|duxc^o5OZO|azjptp_BZagYJcZ` zueRb2s{Mm|N$nrqAJzWJ{YmYg-JjL|#r;+7-`vY;|L*>w_Mh&bYX9Z_Ep}hEtHPx_ z8f*wxjc|zxx>hdpTLQhr^it@hrk6o4Grb&IcRkn@xKt zrjLL=!t{~QN18qg`e@U~Kp$)RIOtcIJ|6mb()s7}Jucn9VQ<8x zdpPXPa+&{X=ryL-LN}Ujf^IV14BcY76DyeVggq zp>H?+TIkoAz5}{!dJcNd^qtUmnZ6tP^`_qdt>+o+J#rb}2Ys*U{m}bO-v@o4>HDGY zH~j$ggQ6{;hfsgm?mYtiM$?Z%zsdBQq2FTqtGGc=ntCy5cG#le+2rYqAj0~q5g5Z_X+4v zn*J2@r%itb`m?4#2mN`|Ux5Ci>F1!oWctg{Uom|EdfxO`p}%JO>(JjY{XF#ZroRdO zP17$xzhL@X&@Y<)Hng6ju;0a{XQ}1$J=FC~g!IsSuYz7>`Y`CjOdk$?xalLHk2HN0^wFXGW02?Ple|B8^ZY;vhz3OPKW0C8**ntpK1PQL7!#% zZ0NI1p96ia>GPn^Gkrev`KB*`zQFW_&=-ofd@e%$BD;4n^d+Vnpc_nI3VoUB*Faxx z`U>bPOkWAjvpD3gg68=fa@Rm#Yd+UOUvK&b=o?Mn1bwsV)zE89uZ3wBx{Pjs<_Q~eJ06=q zn!XL1Ctk?C7Mdqs$lU>579Y!J4)r^&&0WxUn|?j?8%*B=z0dT$&^+Bj?mlRqXd(9i z^n>Q}5cI>QAA#m+7IKe5^E3;&H$%V0eBKJp(=3G3EM%uy2&Y-dPP34E44S7|$UP4I zgtdJV`YF+mRYN@;k@DSIzDLXVYWco5)jn3eS$4E%daKTRHdIx$P(88`8j0dcF4T-9 zBl%kVq2<86&)o%*%GaPWGJp3%Xmn(uW)yBHk_)vcBKak_H>#e1DQ!0@zBQxaC~F5E zl?LG?YtP*$EyKR$hvXTfQLyMplD$O_JSsj>bf>*X*v%dq@)eL=SRx>jUyd%tMFSA0RDPKuO>%4^>{}rWcp#3z zLn1*x5{mD~f_dfP^Y@5P3O+%z6yilksl4wwm6rtaK7qX4&yV=~`4Q)DqeWh3dHR&^qNPAyd*0CAPPP8Ye5l$yz|c2RJSTXG_=VN^oIa z@=g|llki=zUfls*uTJ`jV%|}3oj5L9)VXLO^1t~c>$;Kg%>hPcNx>g~w5O@fyLtai<8KH1yAvBIG$0b1&OmLhDj!Q4& z_7aZknno>f(WJ&x@Q?_`aYfKkM20IOy`i9MguO*GhASe^iP&=@`W)1P z#`2unNRV_yla9zEBN&kgmR&6hB#jbtD<$8$2p1My^rk*TMkFFLJAtjMfN=2&F4PpH zz^f(-f$K{r3#q`s<%N^+E@t$p8oj<+dRwE%YlUW# z!UV1YDtIG6Bo}#u7HT0;%6p<3dGVc*ov2`WO-Nn^o#_e5s~`p55K+i*lJAC_zZc<6 zM!EAy4G%$;@1)da<6I(yJA@+H#Kns{jkc1NIT;P>8zul7>txi{F{%g@Rc;`-eLxm1 zdJXmzX`#6zEZ>DhndefTzqg1Ekgx6@@GeSNHIsS&z@j`58-fh0+{faLVbEeScQF|! z6kIYfPFu{TEvC~JBu-;-CbxbBJhKY*F9HGF_qr|{T z$@Lbqd5Ouq#B^R_G9@vYjW|M$OQ>-P6$PIbXQ*)tHLjt?8ERZYjU&|Bco1BiJ4syn zk$FVH0f}=KrOsdgbPhqdw?N1Y#%12(GFvEu7<7Y)%LWt21`~>7lZglU$2I@BTUnRs3cVUlhF=_VxIgrtjtk4iAzgr%F%bQ4TBA?YS$W)e0t37r|# zg2uYzBrqU}AnycsP}G?vgk+hJEEAF?ilDV-nUE|Kl4SxfN!KM=fZqsi!ej=^*PgrwVoE2q8J9CC9Ynh$3h$ zIfl}hptMGaD5vbCKkxvaI%T_A4mAucs@yiDuDh8K?q-T~aKX5nX)Ag3lQ5ssB$+$ox9nT-_68DhZ|w>lS2TaFwek28_6al>r{AIk8a>1nKcy{H41$#n{djjg53kaEu99~B_`RB-XP@4+^&uQQ} zS*$q(p3}vclfZMj5OWfEPV$~X;4>2Vj0BE?fzL4T8QH*QY=zC}i-TI=qVLU&bb{v; zIDmM`@o|OCN2V4PycDSMHwvn{^$6f(3DKPgh(BATiR4$n#$#Pz8z^Aku^OU8jSDr{ zbqc!c$cG=AYwCz5XuzM@*gy1VQLxY8qtG0RUf^@7h;LrRTvN1HgM*J6b7r)-%cY~C zhOkeCFdLy_bfFr_7qK&jWOszH-}|~(7co$B8Wn>{Vx&cxq+))d8rxw}mbK)`y|5_f zP!UrVk}tUu!g5lSFkQ7!cveX!4H6{^4vH&r?n<^`a9cLR64P`gS#~8^c1}u2zDcg6 z(~GL_Mt6`Bb`S~4&F{xtqQ;j>s^|lCYmPeJdZ`B=xcfP^K%81>B2!z!Ho`B|{j!gs z%H@Ikx^oD4r4I|jKUIE*m-NSi%s1M|e4`WWCf2EJCnanr7B$vCYQeYte%Vl@&R_Ou zuRDqmABAoyN`lY*{kEq7zS2&{5Wwdzy9(-jO{HT@c!^sc<^;dKOTnq)O1el(h!nFR z3!sFU9f~C=WE^2QMbarMsq<^mUT)Qt@EW5~mFhabdZDhny=$nwYq+VsvG2*MmoO?- zFYjpR&xSMEhT=$eqM<3Ajpb75hQ?H4yx8B~FqY5eQim(pNqz` z4Y_b3*Dy8_O@_1KVSJOikxVq%fX@yr=Dt)n#yy+}XD>+?UtWGgBIWY&@J*Q)tE;N; z|Elb{Cs$QfYO1TNE1}V)hljpbyR3X>`PfR0RDN8$ynIFZoXQfZRF+mPS)tWsT3ueX zWM#Rfd`)GAR#(d1E6e9r>aKQ@xOs$?3gmJc3tF$~PfahxU zyr9YXTD>6X|3a-^6jU$P>LpT@L1@tGr9r5dY4tTh^>VFVAw6#@cT}#_>Q!>LwR~;m zYOP))U0qc^uX3$cuM6&8uhkoZwl`|^rr_?)T3uZo++I_yV66v@1Wf?-<;n8aN;5%= zdJL62E3E_^43=%Uxw70=zOK^Fn;qV}lVF|iZ#_YmZ?b`)TRo!XEtMXEjoOFg-%HS^ zO=JZ62?o5!Ai*Zz#}L7054I3&1z1(ySx!{85p35!B(GtD9SY>JBLt%!j1f2h4trcMhGSVxb~w2iwA2`o691I@n+mNPY@)vxg?q*Nc;Y#2r?dI3332@`SJwQ>LG8` z3_-zzB0&j&Z`DqMUFsok>~4ZR09>xK1gk@mmvp*DH-kzO0Q+brXaQgxtpshpNjpJ@ zZ_-Jy&V%&?iwCRAH{ZaU-QK&0V59fwCFoO+fpS}=pJ2dy3=(VtV30!un|+fl1Y5O< z48=Br?Y@s;f*l@=5R3vWFP~LDt};g80Qi(J!MFmM_y|GVdn5>w9;67;037fsf(!r$ zJWH^6uyWoz&zsW3YodmmlAG-wF*U3A%if4Fuf)?6ikqqXL=uUV=W~M?b;h!5Y9# zW|gz?9e_WuMvV#1@gv>5jYRR1mggVBSJ9Y zJ)#7Y9>fUZ0L(l=kW>$OY>FTaz~P)C$oM|81UcU%PcRL@PG<-f4^}~M%Fq^hv*i2R zNw5omY3wH0<2_~xR@ce{q>nYVJajy0BxnNQa5fXP0B|2|C1_J1!`V*I;Xxj}ElLpIY51l<62W!dq22sQ%ntm`G{^B(;K1HO+zf=wO_5iB08&A#~--rNdMUzVe7 z8^Lzpe3)Pd0HYou81;RO5jfu@Ofc?2gkVDZkk7Fw!K8Y~V8saH9wZ2o0G#s_L0Uaz zAx#lv0JxB{1UcVFo?zNHnNe`C08lq4r(uzxq(CNqCz~u9tb@Cx`J4o~o8Ef>R?JBn zvjnS`1RiVjJM-XLfVw&9uaTh1gJyyjfciOkFIx%Pv=7~H2s#wVV>=1fd9a?K3xIKK zAn5iUJp>y)=q2a_VCMY<1L`4<9VFNUuwqV*fFXj-zK<;gTYVqf6jT-u)^=?!V>3+e z9lrSp!KiOOM&JNA6k&pK?-3!G@O?xHCOwD|#C?+lLDGAq2+{x?tSN$wHjz2X66Ac7 zJi)YYGDA@Cph!^iee5LI<@?x8u*ZX01&0jQYWyDI%38Bj_M3xiJ!m9ol3y&k)S3xe zyhkfRn+NR#9RQr-PJ(snAyd4bpbLOgyn&$G_t8VJ(KqQO=mW5ILolFy$UF}cZ1NsM z1e-nBLa-Hp*={4)?!hp@4&P)%!6Ab+>b=M4?EpA7VS;h*5h0lH9#Mix4`Kvy-y}hh z^dLo$24Ln>1R3>^;mi``063g^f@$B!3_-y+DH4MzK>ag)ysk= zYxKwK!LzKlVFzzy9xIACbI;q@kb7h&6?%hW8|+MdYm;9GgVJGWsqQ# zZ!$!%8DPbnM7@PztM6kQ!FJzdm|%woBLt%gWS++eoc19Srb1R3o^_ZxzoHjxuLPcZF0W(W$tk0L=yJ!H~$672F# zb`$LJV3uI@3cqgjuVV+-0@%7CX!4+$pap;{yp^C$JtXvYf(`(_Hk}F%8LV~Qdp*6o ze197Vy46EIiFyb&s)y`Iy##$8^b-sKaF-k;*rXmZzC#3?0angQkXs10`aZT1Z1+uu z33dRm(-DGE-^Un%^L>N~#??bYj}T0Fk0`+;K>b{#JXDDh#63tTIApMrxVdVsSdLXv z^iFGkx^4(E3iSJiAm>4zU>bn4F+))B9z}u@fbBN~yR?a(Qv`c_lUah*_$w-hbInTb zHwq;4MuH{}nh9C}xJ9)Rw0Vzqf({Ql3DyB{sjVmIQV*HM4GIn!tZv+7qCNE9== z==1&c6AWk*{k|dCkE@>pa1<_n09lD3F+n1SQ{O zC&4ZNhOnDpj|Z~^tLuU}TBCn{Ke!fv3#pNy37~FH_TpxO766_Jtpsh}qn)6`_t8nP z&V%&?UB1Z%f^Go5+dTvueUn}ViwCRHhnrkk{k%Eg`x|8QO}_aMJvMu=g&teA4_Rv4 z2)28VVS*h1oPiO7QSUKE;Cvrpf^h|xm(Q+52qv^iQ~9b&lwcBozfofZao`~ZXoFP zpod^109R)(L7#fa`t2te0AQqp3Jw{p&E9(py|?=Qwh?Ui&4&qgsE7PbVT52*`;Z{V z2%I*N%)e2x&8a~N0+bte*ID9YqU7n$bYk2^poh8x?AgR Lsmr{8e=+=jD5#^% literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/chinese_mandarin/numbers.py b/TTS/tts/utils/text/chinese_mandarin/numbers.py new file mode 100644 index 0000000..4787ea6 --- /dev/null +++ b/TTS/tts/utils/text/chinese_mandarin/numbers.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Licensed under WTFPL or the Unlicense or CC0. +# This uses Python 3, but it's easy to port to Python 2 by changing +# strings to u'xx'. + +import itertools +import re + + +def _num2chinese(num: str, big=False, simp=True, o=False, twoalt=False) -> str: + """Convert numerical arabic numbers (0->9) to chinese hanzi numbers (〇 -> 九) + + Args: + num (str): arabic number to convert + big (bool, optional): use financial characters. Defaults to False. + simp (bool, optional): use simplified characters instead of tradictional characters. Defaults to True. + o (bool, optional): use 〇 for 'zero'. Defaults to False. + twoalt (bool, optional): use 两/兩 for 'two' when appropriate. Defaults to False. + + Raises: + ValueError: if number is more than 1e48 + ValueError: if 'e' exposent in number + + Returns: + str: converted number as hanzi characters + """ + + # check num first + nd = str(num) + if abs(float(nd)) >= 1e48: + raise ValueError("number out of range") + if "e" in nd: + raise ValueError("scientific notation is not supported") + c_symbol = "正负点" if simp else "正負點" + if o: # formal + twoalt = False + if big: + c_basic = "零壹贰叁肆伍陆柒捌玖" if simp else "零壹貳參肆伍陸柒捌玖" + c_unit1 = "拾佰仟" + c_twoalt = "贰" if simp else "貳" + else: + c_basic = "〇一二三四五六七八九" if o else "零一二三四五六七八九" + c_unit1 = "十百千" + if twoalt: + c_twoalt = "两" if simp else "兩" + else: + c_twoalt = "二" + c_unit2 = "万亿兆京垓秭穰沟涧正载" if simp else "萬億兆京垓秭穰溝澗正載" + revuniq = lambda l: "".join(k for k, g in itertools.groupby(reversed(l))) + nd = str(num) + result = [] + if nd[0] == "+": + result.append(c_symbol[0]) + elif nd[0] == "-": + result.append(c_symbol[1]) + if "." in nd: + integer, remainder = nd.lstrip("+-").split(".") + else: + integer, remainder = nd.lstrip("+-"), None + if int(integer): + splitted = [integer[max(i - 4, 0) : i] for i in range(len(integer), 0, -4)] + intresult = [] + for nu, unit in enumerate(splitted): + # special cases + if int(unit) == 0: # 0000 + intresult.append(c_basic[0]) + continue + if nu > 0 and int(unit) == 2: # 0002 + intresult.append(c_twoalt + c_unit2[nu - 1]) + continue + ulist = [] + unit = unit.zfill(4) + for nc, ch in enumerate(reversed(unit)): + if ch == "0": + if ulist: # ???0 + ulist.append(c_basic[0]) + elif nc == 0: + ulist.append(c_basic[int(ch)]) + elif nc == 1 and ch == "1" and unit[1] == "0": + # special case for tens + # edit the 'elif' if you don't like + # 十四, 三千零十四, 三千三百一十四 + ulist.append(c_unit1[0]) + elif nc > 1 and ch == "2": + ulist.append(c_twoalt + c_unit1[nc - 1]) + else: + ulist.append(c_basic[int(ch)] + c_unit1[nc - 1]) + ustr = revuniq(ulist) + if nu == 0: + intresult.append(ustr) + else: + intresult.append(ustr + c_unit2[nu - 1]) + result.append(revuniq(intresult).strip(c_basic[0])) + else: + result.append(c_basic[0]) + if remainder: + result.append(c_symbol[2]) + result.append("".join(c_basic[int(ch)] for ch in remainder)) + return "".join(result) + + +def _number_replace(match) -> str: + """function to apply in a match, transform all numbers in a match by chinese characters + + Args: + match (re.Match): numbers regex matches + + Returns: + str: replaced characters for the numbers + """ + match_str: str = match.group() + return _num2chinese(match_str) + + +def replace_numbers_to_characters_in_text(text: str) -> str: + """Replace all arabic numbers in a text by their equivalent in chinese characters (simplified) + + Args: + text (str): input text to transform + + Returns: + str: output text + """ + text = re.sub(r"[0-9]+", _number_replace, text) + return text diff --git a/TTS/tts/utils/text/chinese_mandarin/phonemizer.py b/TTS/tts/utils/text/chinese_mandarin/phonemizer.py new file mode 100644 index 0000000..727c881 --- /dev/null +++ b/TTS/tts/utils/text/chinese_mandarin/phonemizer.py @@ -0,0 +1,37 @@ +from typing import List + +import jieba +import pypinyin + +from .pinyinToPhonemes import PINYIN_DICT + + +def _chinese_character_to_pinyin(text: str) -> List[str]: + pinyins = pypinyin.pinyin(text, style=pypinyin.Style.TONE3, heteronym=False, neutral_tone_with_five=True) + pinyins_flat_list = [item for sublist in pinyins for item in sublist] + return pinyins_flat_list + + +def _chinese_pinyin_to_phoneme(pinyin: str) -> str: + segment = pinyin[:-1] + tone = pinyin[-1] + phoneme = PINYIN_DICT.get(segment, [""])[0] + return phoneme + tone + + +def chinese_text_to_phonemes(text: str, seperator: str = "|") -> str: + tokenized_text = jieba.cut(text, HMM=False) + tokenized_text = " ".join(tokenized_text) + pinyined_text: List[str] = _chinese_character_to_pinyin(tokenized_text) + + results: List[str] = [] + + for token in pinyined_text: + if token[-1] in "12345": # TODO transform to is_pinyin() + pinyin_phonemes = _chinese_pinyin_to_phoneme(token) + + results += list(pinyin_phonemes) + else: # is ponctuation or other + results += list(token) + + return seperator.join(results) diff --git a/TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py b/TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py new file mode 100644 index 0000000..4e25c3a --- /dev/null +++ b/TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py @@ -0,0 +1,419 @@ +PINYIN_DICT = { + "a": ["a"], + "ai": ["ai"], + "an": ["an"], + "ang": ["ɑŋ"], + "ao": ["aʌ"], + "ba": ["ba"], + "bai": ["bai"], + "ban": ["ban"], + "bang": ["bɑŋ"], + "bao": ["baʌ"], + # "be": ["be"], doesnt exist + "bei": ["bɛi"], + "ben": ["bœn"], + "beng": ["bɵŋ"], + "bi": ["bi"], + "bian": ["biɛn"], + "biao": ["biaʌ"], + "bie": ["bie"], + "bin": ["bin"], + "bing": ["bɨŋ"], + "bo": ["bo"], + "bu": ["bu"], + "ca": ["tsa"], + "cai": ["tsai"], + "can": ["tsan"], + "cang": ["tsɑŋ"], + "cao": ["tsaʌ"], + "ce": ["tsø"], + "cen": ["tsœn"], + "ceng": ["tsɵŋ"], + "cha": ["ʈʂa"], + "chai": ["ʈʂai"], + "chan": ["ʈʂan"], + "chang": ["ʈʂɑŋ"], + "chao": ["ʈʂaʌ"], + "che": ["ʈʂø"], + "chen": ["ʈʂœn"], + "cheng": ["ʈʂɵŋ"], + "chi": ["ʈʂʏ"], + "chong": ["ʈʂoŋ"], + "chou": ["ʈʂou"], + "chu": ["ʈʂu"], + "chua": ["ʈʂua"], + "chuai": ["ʈʂuai"], + "chuan": ["ʈʂuan"], + "chuang": ["ʈʂuɑŋ"], + "chui": ["ʈʂuei"], + "chun": ["ʈʂun"], + "chuo": ["ʈʂuo"], + "ci": ["tsɪ"], + "cong": ["tsoŋ"], + "cou": ["tsou"], + "cu": ["tsu"], + "cuan": ["tsuan"], + "cui": ["tsuei"], + "cun": ["tsun"], + "cuo": ["tsuo"], + "da": ["da"], + "dai": ["dai"], + "dan": ["dan"], + "dang": ["dɑŋ"], + "dao": ["daʌ"], + "de": ["dø"], + "dei": ["dei"], + # "den": ["dœn"], + "deng": ["dɵŋ"], + "di": ["di"], + "dia": ["dia"], + "dian": ["diɛn"], + "diao": ["diaʌ"], + "die": ["die"], + "ding": ["dɨŋ"], + "diu": ["dio"], + "dong": ["doŋ"], + "dou": ["dou"], + "du": ["du"], + "duan": ["duan"], + "dui": ["duei"], + "dun": ["dun"], + "duo": ["duo"], + "e": ["ø"], + "ei": ["ei"], + "en": ["œn"], + # "ng": ["œn"], + # "eng": ["ɵŋ"], + "er": ["er"], + "fa": ["fa"], + "fan": ["fan"], + "fang": ["fɑŋ"], + "fei": ["fei"], + "fen": ["fœn"], + "feng": ["fɵŋ"], + "fo": ["fo"], + "fou": ["fou"], + "fu": ["fu"], + "ga": ["ga"], + "gai": ["gai"], + "gan": ["gan"], + "gang": ["gɑŋ"], + "gao": ["gaʌ"], + "ge": ["gø"], + "gei": ["gei"], + "gen": ["gœn"], + "geng": ["gɵŋ"], + "gong": ["goŋ"], + "gou": ["gou"], + "gu": ["gu"], + "gua": ["gua"], + "guai": ["guai"], + "guan": ["guan"], + "guang": ["guɑŋ"], + "gui": ["guei"], + "gun": ["gun"], + "guo": ["guo"], + "ha": ["xa"], + "hai": ["xai"], + "han": ["xan"], + "hang": ["xɑŋ"], + "hao": ["xaʌ"], + "he": ["xø"], + "hei": ["xei"], + "hen": ["xœn"], + "heng": ["xɵŋ"], + "hong": ["xoŋ"], + "hou": ["xou"], + "hu": ["xu"], + "hua": ["xua"], + "huai": ["xuai"], + "huan": ["xuan"], + "huang": ["xuɑŋ"], + "hui": ["xuei"], + "hun": ["xun"], + "huo": ["xuo"], + "ji": ["dʑi"], + "jia": ["dʑia"], + "jian": ["dʑiɛn"], + "jiang": ["dʑiɑŋ"], + "jiao": ["dʑiaʌ"], + "jie": ["dʑie"], + "jin": ["dʑin"], + "jing": ["dʑɨŋ"], + "jiong": ["dʑioŋ"], + "jiu": ["dʑio"], + "ju": ["dʑy"], + "juan": ["dʑyɛn"], + "jue": ["dʑye"], + "jun": ["dʑyn"], + "ka": ["ka"], + "kai": ["kai"], + "kan": ["kan"], + "kang": ["kɑŋ"], + "kao": ["kaʌ"], + "ke": ["kø"], + "kei": ["kei"], + "ken": ["kœn"], + "keng": ["kɵŋ"], + "kong": ["koŋ"], + "kou": ["kou"], + "ku": ["ku"], + "kua": ["kua"], + "kuai": ["kuai"], + "kuan": ["kuan"], + "kuang": ["kuɑŋ"], + "kui": ["kuei"], + "kun": ["kun"], + "kuo": ["kuo"], + "la": ["la"], + "lai": ["lai"], + "lan": ["lan"], + "lang": ["lɑŋ"], + "lao": ["laʌ"], + "le": ["lø"], + "lei": ["lei"], + "leng": ["lɵŋ"], + "li": ["li"], + "lia": ["lia"], + "lian": ["liɛn"], + "liang": ["liɑŋ"], + "liao": ["liaʌ"], + "lie": ["lie"], + "lin": ["lin"], + "ling": ["lɨŋ"], + "liu": ["lio"], + "lo": ["lo"], + "long": ["loŋ"], + "lou": ["lou"], + "lu": ["lu"], + "lv": ["ly"], + "luan": ["luan"], + "lve": ["lye"], + "lue": ["lue"], + "lun": ["lun"], + "luo": ["luo"], + "ma": ["ma"], + "mai": ["mai"], + "man": ["man"], + "mang": ["mɑŋ"], + "mao": ["maʌ"], + "me": ["mø"], + "mei": ["mei"], + "men": ["mœn"], + "meng": ["mɵŋ"], + "mi": ["mi"], + "mian": ["miɛn"], + "miao": ["miaʌ"], + "mie": ["mie"], + "min": ["min"], + "ming": ["mɨŋ"], + "miu": ["mio"], + "mo": ["mo"], + "mou": ["mou"], + "mu": ["mu"], + "na": ["na"], + "nai": ["nai"], + "nan": ["nan"], + "nang": ["nɑŋ"], + "nao": ["naʌ"], + "ne": ["nø"], + "nei": ["nei"], + "nen": ["nœn"], + "neng": ["nɵŋ"], + "ni": ["ni"], + "nia": ["nia"], + "nian": ["niɛn"], + "niang": ["niɑŋ"], + "niao": ["niaʌ"], + "nie": ["nie"], + "nin": ["nin"], + "ning": ["nɨŋ"], + "niu": ["nio"], + "nong": ["noŋ"], + "nou": ["nou"], + "nu": ["nu"], + "nv": ["ny"], + "nuan": ["nuan"], + "nve": ["nye"], + "nue": ["nye"], + "nuo": ["nuo"], + "o": ["o"], + "ou": ["ou"], + "pa": ["pa"], + "pai": ["pai"], + "pan": ["pan"], + "pang": ["pɑŋ"], + "pao": ["paʌ"], + "pe": ["pø"], + "pei": ["pei"], + "pen": ["pœn"], + "peng": ["pɵŋ"], + "pi": ["pi"], + "pian": ["piɛn"], + "piao": ["piaʌ"], + "pie": ["pie"], + "pin": ["pin"], + "ping": ["pɨŋ"], + "po": ["po"], + "pou": ["pou"], + "pu": ["pu"], + "qi": ["tɕi"], + "qia": ["tɕia"], + "qian": ["tɕiɛn"], + "qiang": ["tɕiɑŋ"], + "qiao": ["tɕiaʌ"], + "qie": ["tɕie"], + "qin": ["tɕin"], + "qing": ["tɕɨŋ"], + "qiong": ["tɕioŋ"], + "qiu": ["tɕio"], + "qu": ["tɕy"], + "quan": ["tɕyɛn"], + "que": ["tɕye"], + "qun": ["tɕyn"], + "ran": ["ʐan"], + "rang": ["ʐɑŋ"], + "rao": ["ʐaʌ"], + "re": ["ʐø"], + "ren": ["ʐœn"], + "reng": ["ʐɵŋ"], + "ri": ["ʐʏ"], + "rong": ["ʐoŋ"], + "rou": ["ʐou"], + "ru": ["ʐu"], + "rua": ["ʐua"], + "ruan": ["ʐuan"], + "rui": ["ʐuei"], + "run": ["ʐun"], + "ruo": ["ʐuo"], + "sa": ["sa"], + "sai": ["sai"], + "san": ["san"], + "sang": ["sɑŋ"], + "sao": ["saʌ"], + "se": ["sø"], + "sen": ["sœn"], + "seng": ["sɵŋ"], + "sha": ["ʂa"], + "shai": ["ʂai"], + "shan": ["ʂan"], + "shang": ["ʂɑŋ"], + "shao": ["ʂaʌ"], + "she": ["ʂø"], + "shei": ["ʂei"], + "shen": ["ʂœn"], + "sheng": ["ʂɵŋ"], + "shi": ["ʂʏ"], + "shou": ["ʂou"], + "shu": ["ʂu"], + "shua": ["ʂua"], + "shuai": ["ʂuai"], + "shuan": ["ʂuan"], + "shuang": ["ʂuɑŋ"], + "shui": ["ʂuei"], + "shun": ["ʂun"], + "shuo": ["ʂuo"], + "si": ["sɪ"], + "song": ["soŋ"], + "sou": ["sou"], + "su": ["su"], + "suan": ["suan"], + "sui": ["suei"], + "sun": ["sun"], + "suo": ["suo"], + "ta": ["ta"], + "tai": ["tai"], + "tan": ["tan"], + "tang": ["tɑŋ"], + "tao": ["taʌ"], + "te": ["tø"], + "tei": ["tei"], + "teng": ["tɵŋ"], + "ti": ["ti"], + "tian": ["tiɛn"], + "tiao": ["tiaʌ"], + "tie": ["tie"], + "ting": ["tɨŋ"], + "tong": ["toŋ"], + "tou": ["tou"], + "tu": ["tu"], + "tuan": ["tuan"], + "tui": ["tuei"], + "tun": ["tun"], + "tuo": ["tuo"], + "wa": ["wa"], + "wai": ["wai"], + "wan": ["wan"], + "wang": ["wɑŋ"], + "wei": ["wei"], + "wen": ["wœn"], + "weng": ["wɵŋ"], + "wo": ["wo"], + "wu": ["wu"], + "xi": ["ɕi"], + "xia": ["ɕia"], + "xian": ["ɕiɛn"], + "xiang": ["ɕiɑŋ"], + "xiao": ["ɕiaʌ"], + "xie": ["ɕie"], + "xin": ["ɕin"], + "xing": ["ɕɨŋ"], + "xiong": ["ɕioŋ"], + "xiu": ["ɕio"], + "xu": ["ɕy"], + "xuan": ["ɕyɛn"], + "xue": ["ɕye"], + "xun": ["ɕyn"], + "ya": ["ia"], + "yan": ["iɛn"], + "yang": ["iɑŋ"], + "yao": ["iaʌ"], + "ye": ["ie"], + "yi": ["i"], + "yin": ["in"], + "ying": ["ɨŋ"], + "yo": ["io"], + "yong": ["ioŋ"], + "you": ["io"], + "yu": ["y"], + "yuan": ["yɛn"], + "yue": ["ye"], + "yun": ["yn"], + "za": ["dza"], + "zai": ["dzai"], + "zan": ["dzan"], + "zang": ["dzɑŋ"], + "zao": ["dzaʌ"], + "ze": ["dzø"], + "zei": ["dzei"], + "zen": ["dzœn"], + "zeng": ["dzɵŋ"], + "zha": ["dʒa"], + "zhai": ["dʒai"], + "zhan": ["dʒan"], + "zhang": ["dʒɑŋ"], + "zhao": ["dʒaʌ"], + "zhe": ["dʒø"], + # "zhei": ["dʒei"], it doesn't exist + "zhen": ["dʒœn"], + "zheng": ["dʒɵŋ"], + "zhi": ["dʒʏ"], + "zhong": ["dʒoŋ"], + "zhou": ["dʒou"], + "zhu": ["dʒu"], + "zhua": ["dʒua"], + "zhuai": ["dʒuai"], + "zhuan": ["dʒuan"], + "zhuang": ["dʒuɑŋ"], + "zhui": ["dʒuei"], + "zhun": ["dʒun"], + "zhuo": ["dʒuo"], + "zi": ["dzɪ"], + "zong": ["dzoŋ"], + "zou": ["dzou"], + "zu": ["dzu"], + "zuan": ["dzuan"], + "zui": ["dzuei"], + "zun": ["dzun"], + "zuo": ["dzuo"], +} diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py new file mode 100644 index 0000000..74d3910 --- /dev/null +++ b/TTS/tts/utils/text/cleaners.py @@ -0,0 +1,171 @@ +"""Set of default text cleaners""" +# TODO: pick the cleaner for languages dynamically + +import re + +from anyascii import anyascii + +from TTS.tts.utils.text.chinese_mandarin.numbers import replace_numbers_to_characters_in_text + +from .english.abbreviations import abbreviations_en +from .english.number_norm import normalize_numbers as en_normalize_numbers +from .english.time_norm import expand_time_english +from .french.abbreviations import abbreviations_fr + +# Regular expression matching whitespace: +_whitespace_re = re.compile(r"\s+") + + +def expand_abbreviations(text, lang="en"): + if lang == "en": + _abbreviations = abbreviations_en + elif lang == "fr": + _abbreviations = abbreviations_fr + for regex, replacement in _abbreviations: + text = re.sub(regex, replacement, text) + return text + + +def lowercase(text): + return text.lower() + + +def collapse_whitespace(text): + return re.sub(_whitespace_re, " ", text).strip() + + +def convert_to_ascii(text): + return anyascii(text) + + +def remove_aux_symbols(text): + text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text) + return text + + +def replace_symbols(text, lang="en"): + """Replace symbols based on the lenguage tag. + + Args: + text: + Input text. + lang: + Lenguage identifier. ex: "en", "fr", "pt", "ca". + + Returns: + The modified text + example: + input args: + text: "si l'avi cau, diguem-ho" + lang: "ca" + Output: + text: "si lavi cau, diguemho" + """ + text = text.replace(";", ",") + text = text.replace("-", " ") if lang != "ca" else text.replace("-", "") + text = text.replace(":", ",") + if lang == "en": + text = text.replace("&", " and ") + elif lang == "fr": + text = text.replace("&", " et ") + elif lang == "pt": + text = text.replace("&", " e ") + elif lang == "ca": + text = text.replace("&", " i ") + text = text.replace("'", "") + return text + + +def basic_cleaners(text): + """Basic pipeline that lowercases and collapses whitespace without transliteration.""" + text = lowercase(text) + text = collapse_whitespace(text) + return text + + +def transliteration_cleaners(text): + """Pipeline for non-English text that transliterates to ASCII.""" + # text = convert_to_ascii(text) + text = lowercase(text) + text = collapse_whitespace(text) + return text + + +def basic_german_cleaners(text): + """Pipeline for German text""" + text = lowercase(text) + text = collapse_whitespace(text) + return text + + +# TODO: elaborate it +def basic_turkish_cleaners(text): + """Pipeline for Turkish text""" + text = text.replace("I", "ı") + text = lowercase(text) + text = collapse_whitespace(text) + return text + + +def english_cleaners(text): + """Pipeline for English text, including number and abbreviation expansion.""" + # text = convert_to_ascii(text) + text = lowercase(text) + text = expand_time_english(text) + text = en_normalize_numbers(text) + text = expand_abbreviations(text) + text = replace_symbols(text) + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + + +def phoneme_cleaners(text): + """Pipeline for phonemes mode, including number and abbreviation expansion.""" + text = en_normalize_numbers(text) + text = expand_abbreviations(text) + text = replace_symbols(text) + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + + +def french_cleaners(text): + """Pipeline for French text. There is no need to expand numbers, phonemizer already does that""" + text = expand_abbreviations(text, lang="fr") + text = lowercase(text) + text = replace_symbols(text, lang="fr") + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + + +def portuguese_cleaners(text): + """Basic pipeline for Portuguese text. There is no need to expand abbreviation and + numbers, phonemizer already does that""" + text = lowercase(text) + text = replace_symbols(text, lang="pt") + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + + +def chinese_mandarin_cleaners(text: str) -> str: + """Basic pipeline for chinese""" + text = replace_numbers_to_characters_in_text(text) + return text + + +def multilingual_cleaners(text): + """Pipeline for multilingual text""" + text = lowercase(text) + text = replace_symbols(text, lang=None) + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + + +def no_cleaners(text): + # remove newline characters + text = text.replace("\n", "") + return text diff --git a/TTS/tts/utils/text/cmudict.py b/TTS/tts/utils/text/cmudict.py new file mode 100644 index 0000000..f206fb0 --- /dev/null +++ b/TTS/tts/utils/text/cmudict.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- + +import re + +VALID_SYMBOLS = [ + "AA", + "AA0", + "AA1", + "AA2", + "AE", + "AE0", + "AE1", + "AE2", + "AH", + "AH0", + "AH1", + "AH2", + "AO", + "AO0", + "AO1", + "AO2", + "AW", + "AW0", + "AW1", + "AW2", + "AY", + "AY0", + "AY1", + "AY2", + "B", + "CH", + "D", + "DH", + "EH", + "EH0", + "EH1", + "EH2", + "ER", + "ER0", + "ER1", + "ER2", + "EY", + "EY0", + "EY1", + "EY2", + "F", + "G", + "HH", + "IH", + "IH0", + "IH1", + "IH2", + "IY", + "IY0", + "IY1", + "IY2", + "JH", + "K", + "L", + "M", + "N", + "NG", + "OW", + "OW0", + "OW1", + "OW2", + "OY", + "OY0", + "OY1", + "OY2", + "P", + "R", + "S", + "SH", + "T", + "TH", + "UH", + "UH0", + "UH1", + "UH2", + "UW", + "UW0", + "UW1", + "UW2", + "V", + "W", + "Y", + "Z", + "ZH", +] + + +class CMUDict: + """Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict""" + + def __init__(self, file_or_path, keep_ambiguous=True): + if isinstance(file_or_path, str): + with open(file_or_path, encoding="latin-1") as f: + entries = _parse_cmudict(f) + else: + entries = _parse_cmudict(file_or_path) + if not keep_ambiguous: + entries = {word: pron for word, pron in entries.items() if len(pron) == 1} + self._entries = entries + + def __len__(self): + return len(self._entries) + + def lookup(self, word): + """Returns list of ARPAbet pronunciations of the given word.""" + return self._entries.get(word.upper()) + + @staticmethod + def get_arpabet(word, cmudict, punctuation_symbols): + first_symbol, last_symbol = "", "" + if word and word[0] in punctuation_symbols: + first_symbol = word[0] + word = word[1:] + if word and word[-1] in punctuation_symbols: + last_symbol = word[-1] + word = word[:-1] + arpabet = cmudict.lookup(word) + if arpabet is not None: + return first_symbol + "{%s}" % arpabet[0] + last_symbol + return first_symbol + word + last_symbol + + +_alt_re = re.compile(r"\([0-9]+\)") + + +def _parse_cmudict(file): + cmudict = {} + for line in file: + if line and (line[0] >= "A" and line[0] <= "Z" or line[0] == "'"): + parts = line.split(" ") + word = re.sub(_alt_re, "", parts[0]) + pronunciation = _get_pronunciation(parts[1]) + if pronunciation: + if word in cmudict: + cmudict[word].append(pronunciation) + else: + cmudict[word] = [pronunciation] + return cmudict + + +def _get_pronunciation(s): + parts = s.strip().split(" ") + for part in parts: + if part not in VALID_SYMBOLS: + return None + return " ".join(parts) diff --git a/TTS/tts/utils/text/english/__init__.py b/TTS/tts/utils/text/english/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/tts/utils/text/english/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/text/english/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d29e02e0d6462560d066329b590d49e6322b6fe GIT binary patch literal 188 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%U?gVIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`+sq*%YSBr~U2za+Jy zL_akzJtq^WFFrmqFS8^*Uaz3?7l%!5eoARhs$CH)(0q^!iur-W2WCb_#t#fIqKFwN F1^_y+FN6R9 literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-311.pyc b/TTS/tts/utils/text/english/__pycache__/abbreviations.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7ef772345b6b6a3cfaaaa1ec3638ffa4d62fc19 GIT binary patch literal 875 zcmY*YJx>%t7=Cx>zBt8P)R@>|p~*F@fF>45Vt^$DXq@$IP7J2qqMi z7Ft1JsTf0IDEuLiSeU6ytmw|1Hdp!1URY#y_t|-7p7(QhKaY-<0Y|gZYJBwpemQbJ zkf)=64`2I0fqEQPf%d{bvxav`F8LGK6x3U;{*#k8vc9#?G)xiwhL?ZyLxS0IpvuP zQzh0{%oZb=v}4X}dEx2e^B428PnPGazV*lNTW?D{_1M^;7OZHdbkGg;Mli?pDAt+pH(ql1o#xeg55G654!9;01YuPgRC4oxhD9&dLCDz;o+@l`3WY$MtP`!@jkVI))NMg;jcGV>+WriuoC^9M|BU7f0&Fe50#%)D%*r8B# z9c`hq)e&*3BuUt^d4wTrGGU7dOobe^gks3z(5;I}uy|WSV2TOqoN;5zJZ2rkM3_b+ z8&cR21gXM0XU<4BjV)pCn`|h_MPb`uN{E=EQ!u@0iwK=qr54;!TjX&P8|I>rNOD&K zmx8&p{D!f)hE!Oc%42(M<2G&}e;k}ZWq7A<{{ao|?*IS* literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-311.pyc b/TTS/tts/utils/text/english/__pycache__/number_norm.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eced88c7209b676487899efff11db7df0b29ae99 GIT binary patch literal 5298 zcmcf_OKcm*b(dU{D=x{EL@AOSyQU>sHZ4n(Ke8-2mg7jN6eST5e}GnTO3<8@M43O# zE)$EPT*mN05)_z7-I!<`l=;|^VWZHY2lZwIJ@mLMtRZ3n18I9`eG?EADBMfm>@F#i zid~;N+*v-a|CuoYgO(ANELNu~O=_wj& zYt$06PFZOK$4oI6N?_(}rOEf8@u1Y9rtC0E&pAp6#M}d(58yS*6k9WHs~NAW8MoJr zJ8Q-rHRD{(I9oIBLUlK&DK~Ng^dL^)kxQ@&^#VU_X)A4#LT>2Wk>>_8)qr?ZulE_$ z0R2YP2s6FN3$O__0c=K=^OV0i-R+ykiI{Izl9J+3ATSe=W>c4Y!--hniU941ln6+n za6-a~xbG=hYSK?@%xEMmrJD}E-`6wv0Rfi|f6%RW{ONiF+kL55M|AsByJ6PB@4v}h z>dbt9*RKy>I((`3gF_}k+G{ipic%(yG8ko~S$~@CIb)#1<<9_o7DonS4r!pV5ak>b^y@&EP~TsX4HajE2I(eOjYq zA|dcSI~TYp3Rn!xhs4>ys32aE63M_wA~rpL@vT5OB}$1{FrGky7`SlZd_a=KKuU^4 z#egJSl>$P1CK`cx@l@=xfPoIjdXw{-Cy0eu;zJ<_Umz9|u?JSFhgWO|kfn;8>*m1n zK<>!W(8|yzw`YUflMfe;DqNS!b;(TEk3dEJ-wzBEvwBANrX9XyA`+K;m<0sp02oJw z-O$vUgTmEhD2{?i2uEU}Xiz7{jWE_kNM9u4pu+X2T#wB3V4m=@eHew+y~EmX*{Ju}1T`?it2R$2H1_Ofh) z&y~%N*fgjbNLaVMQjLU;=SpbTC3O5x32Wqsed&6JIpnN9Ti^tq71;`<__TeT(8|>C z0f)m)VnT|^SH}$io)XR)?Zc$jZR;t5);~UO`oq{?)3?r~ZD|1~e0~Qe!BMk`$!J8< ztWhDZS;6`=>x>|2_E0h@#F56#fkpaR>?OR2nQ69U2uq^I5I3YbV1h6sV9jMVf~hz# zW2bS*uqt<{r4NY2uzhARthymsGV5>(V<8@%#|MC@6<%=y93?m{SHsPBm*35IG?(`UKne`Vr zcXqV6w-b<8Wl!&?;oSZ_ee2*-Y$diBTYWD(R&;oB4c9-;eq3zXcl+9B*9tSM>GgdN z7nGLMYRltAp9 z^OQ!&?{Z@6`GX<=nyap6%zcD(4|bNjAmvYBFQ`|mx!^AxVGb$oSgE&Zrd9T-zu z$JN&HMR#ent8&v3cnUp6t*}Q`_NdGrB~l;%Wd6pyDK+1fOBeRtT9ElJ@~~Y+&U5qR z^2yxj((uZ#%(UsA54+%C(<~Nx-wjFO*(zU8G)VV$>(GFV2(lq$MGUgtVf3~gIrKJ* z>W~w0dfSEE$aBYn_&dx3{oHU`Uu~TWmJFS#$UMM@Q3Y6YXF&}$GZ~5O&@AAWb_U?a zNsS2$pvN`>Ma_;9(P#*34x<+R%#2|PH_qT)z(}_c+yX6)5mGpz*$5Ot&xLq=Ua!JJ z+LKJA;>agT0**%FGihG$i)Dmn6^Z@K3vs{2U`sgqnP+roitPAr*B}0i*yH!#7}eeF zWnn@cjOc3bx9J~9bDG8w5x~+6lPa4BF-4L1TiuQfE2`gNM1w$Zh){kFAWLo8DQ{b` zv8~9r6#3SzI-AG2MG@dB;hT&6D}b*jcmROkrPJ>%@;<=x1P`E|^0pTFrY#3UFa#WE zZM1Ju0J0NXM=6(kJHr5T^mzpN_7eL*#^al-gvjTBE>&S|)W@>9=ZZ&`bz4-b}2dMDU{oMxybFYEu%v zbnHM9iHD;2G^{=Wujm7irJiu!O|D~u>nNQ4ZtUx^)w5qsd^@3V0hJ5LOh7ltDw&BR z*Jb`Qcs`JsB#LGM=_-09ECp6tEl+iC0cnvSM({^j;v=vL&Pa8QwT(4UA5ZZ_?b zK1%qc*+fc&gf^lPl)Dn;!z33;v%XpII#@t@bk{72S=~{AraE9AQA~{?!CcqQwB@i5 zlep=p@$00r;XP0pntO`0UL$}6eQ-G>4ro@;a*-H~&u91%Pf4K-^~WPHHVLozZvddr zPWNKy=FIX;F0?eaGPe|131ml#4sJ0lJ6hq{&|z-Ws0|!Ih(x*0;UdT9 zI%IA)JR3U9Ge<02aONcYm?D|MbTG)9c|!BY%o0`^VM&P z?`-lHHuwwj#b3(r|4QM5Dj$^D;I^@jtToD9hr%6Kx#Kc(e5ak3ebdi(*y&u^N-K3n zs>tRt)ICs!2W87ysIv2l`6O8~i$uPfSzdZR`NU?C$VJdBFFl`J9L*w;Tb5Z~dOk@P z%_6;0YC#UWL;ei-1wTFMcWD+ZKqO6F*6NKLY|wPWSkSMsnk!fegK*KsB*tlt<@?|^ zE!nNc1x>F^NQqtz|zdiie*SFTGa04ngVB#J(c0TSpwSGb2-cY$WN`&2C{(6m3xPF!E zFQE>78DB*TcTDAuLDb~}Nev5!{0!UoKZxcY2LBBV{SxZ>6O-B*5H&Eh|maMe2x59r=N(%j&-()go6vKTw{m{`-Mz zTO3#!R=8JH?$s7FbZ;IqAE*kVUe$uv;VAmZ8YWBV1LZuI=5Ko!Ob$ z*_oez^!vRC#_G!Y%J)1%e=w#soK50Y6^K0~Aql6@Vq?XNm_ssN=UU3of!J>`Xwd;u zvZz*=01 zWRw%)T3V)9r7j}tS)wK&SMA{div3<)hG`G!Z2>~n<9hA=JbDiM`)%0*dWq-592H_o zbtNSybb3n7D$6p7>FL-vX(DN1Oqd6a9SOevEOJMai5A(6Yip4iS=-RlnaGW_va)&S zW+ai-^t2LF(~_)3Zr@&r=(-lk>dBNA(dB!3L{?W*NjRq`6&bXMGMd?>Vobi5iK|i! z{1R5!Z!vFYU@D^D#nZouL%)ba)$4{hY>LA*e)ttg5OpeXJ+YP(h&+0WQMRlIpkZaD z1O*CNY2Oy1e22V9*q#?VSHO8JaT``M6u4DXa9F(6D#XR;+}j8hu<8Zcv0=3Z;U%ts zCBDGr9o2TDu;5!mhc;@B{E7bY|GR_J9<`k-@Od6$YsCB!(5?;3IqB^bpX9b%z^Id^ z?Iyhal$`?`onAn&YZjb&XYpJok9m%8l1Igo*M15k^)j4kgIFuSY}Lg#T;5r2!`Yi% zgd|b&*`dvD-_Btq`K!lX@r|Xt!>$Qn*EN603h~ef6JZ5$`;dij$ZDQ)>9s>@k~ZmC96}Y3%Uvz%0pY})J^0}DxQ!jzn)I2 zVHaVZD9;d)dNOfMhn`ETlxOWnT@47M&bSiGC>o2T0eQs3P6b+BY4vKeamf&fSZ8V@ za1+*^U8U^q{q6ge=+=Xs2ZlIkii5?OdUv40J@yuF)0dFs6I?cBq;r($Ks=ss_DpEvjmCV!#EU-;ACUGClMf81aB z{JX&?gN8q7`h&%f>u!G;Kg<<#b+M;By?wQGweIWQo!g$P@P==|^bOQ}1K@I3+!mkC zRb574#0-oWLdXzL+(HaZ?ztS>J=E zt(i*i4+GBzs@jWbqc39iMU23h85lE!s3}BiLbNV+?M`k_mS?u6cBX24A7NDoNr$Jh zffNH13#6uju!Gck8?f|ag{&VdSb%XjyU)&BV*wE+4fB*EGIeO#Wk7W6>4w4Y%*a$t<>$WydF2dJxPeGbIF(!|b`A@-YMe^EFPdrRXxSB^NR zGkD}fPWPi*<>@Cg27mUiQ-Y8C8^Kf*j?Q2_QoeMAV5u|~i#|39z4% str: + parts = value.replace(",", "").split(".") + if len(parts) > 2: + return f"{value} {inflection[2]}" # Unexpected format + text = [] + integer = int(parts[0]) if parts[0] else 0 + if integer > 0: + integer_unit = inflection.get(integer, inflection[2]) + text.append(f"{integer} {integer_unit}") + fraction = int(parts[1]) if len(parts) > 1 and parts[1] else 0 + if fraction > 0: + fraction_unit = inflection.get(fraction / 100, inflection[0.02]) + text.append(f"{fraction} {fraction_unit}") + if len(text) == 0: + return f"zero {inflection[2]}" + return " ".join(text) + + +def _expand_currency(m: "re.Match") -> str: + currencies = { + "$": { + 0.01: "cent", + 0.02: "cents", + 1: "dollar", + 2: "dollars", + }, + "€": { + 0.01: "cent", + 0.02: "cents", + 1: "euro", + 2: "euros", + }, + "£": { + 0.01: "penny", + 0.02: "pence", + 1: "pound sterling", + 2: "pounds sterling", + }, + "¥": { + # TODO rin + 0.02: "sen", + 2: "yen", + }, + } + unit = m.group(1) + currency = currencies[unit] + value = m.group(2) + return __expand_currency(value, currency) + + +def _expand_ordinal(m): + return _inflect.number_to_words(m.group(0)) + + +def _expand_number(m): + num = int(m.group(0)) + if 1000 < num < 3000: + if num == 2000: + return "two thousand" + if 2000 < num < 2010: + return "two thousand " + _inflect.number_to_words(num % 100) + if num % 100 == 0: + return _inflect.number_to_words(num // 100) + " hundred" + return _inflect.number_to_words(num, andword="", zero="oh", group=2).replace(", ", " ") + return _inflect.number_to_words(num, andword="") + + +def normalize_numbers(text): + text = re.sub(_comma_number_re, _remove_commas, text) + text = re.sub(_currency_re, _expand_currency, text) + text = re.sub(_decimal_number_re, _expand_decimal_point, text) + text = re.sub(_ordinal_re, _expand_ordinal, text) + text = re.sub(_number_re, _expand_number, text) + return text diff --git a/TTS/tts/utils/text/english/time_norm.py b/TTS/tts/utils/text/english/time_norm.py new file mode 100644 index 0000000..c8ac09e --- /dev/null +++ b/TTS/tts/utils/text/english/time_norm.py @@ -0,0 +1,47 @@ +import re + +import inflect + +_inflect = inflect.engine() + +_time_re = re.compile( + r"""\b + ((0?[0-9])|(1[0-1])|(1[2-9])|(2[0-3])) # hours + : + ([0-5][0-9]) # minutes + \s*(a\\.m\\.|am|pm|p\\.m\\.|a\\.m|p\\.m)? # am/pm + \b""", + re.IGNORECASE | re.X, +) + + +def _expand_num(n: int) -> str: + return _inflect.number_to_words(n) + + +def _expand_time_english(match: "re.Match") -> str: + hour = int(match.group(1)) + past_noon = hour >= 12 + time = [] + if hour > 12: + hour -= 12 + elif hour == 0: + hour = 12 + past_noon = True + time.append(_expand_num(hour)) + + minute = int(match.group(6)) + if minute > 0: + if minute < 10: + time.append("oh") + time.append(_expand_num(minute)) + am_pm = match.group(7) + if am_pm is None: + time.append("p m" if past_noon else "a m") + else: + time.extend(list(am_pm.replace(".", ""))) + return " ".join(time) + + +def expand_time_english(text: str) -> str: + return re.sub(_time_re, _expand_time_english, text) diff --git a/TTS/tts/utils/text/french/__init__.py b/TTS/tts/utils/text/french/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/tts/utils/text/french/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/text/french/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..025be305b30398af8a28eb32e30358d1614bba8a GIT binary patch literal 187 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%TGVFIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`+sq*%YSBr~U2za+Jy zL_e)4H7_|sKR!M)FS8^*Uaz3?7l%!5eoARhs$CH)(0Gsoiur-W2WCb_#t#fIqKFwN F1_1sBFAo3! literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-311.pyc b/TTS/tts/utils/text/french/__pycache__/abbreviations.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac2f0d12d08012f264f223b692a54681e88e315c GIT binary patch literal 1524 zcmb7EOK%%h6u$HFt93}#NC-A{qyn{sJxyVOP^l0iStSq1cUOMUzjiF-|s%okXuZYZ5IRdk{t_I#!)3tujN_04?i`qKQi zuJq<*rHur#Yqg^#`;K5-*lSeu>>EscFQeFQML~D%&VubELPo*;Fmjo&mzS4pDTSR# z&lk32BWZUz3!R=#I~~r39+h4cie|iaRJrPVLSp%A{4CTl;T1Q*tdlXhIHRAMKTG9* z3{ogPT=}W8aeHI%WO;vj_LnP9KijwFo?COz=APaD{o_6JldI2 z(~ATW4ek!pGq80oOB=%|fjY_v=tvd8ctH|m*b4!B;RNepl;AB3lVEq7N64TqS4A*b z4zP~UjU0JgTNhGUf_kCEIVVyjJPsOq8Mn_>p`V0agl!sPRjMM~f>oBWO^3!3_%!w; zBtV7}`FNfDC}ck7qeE4oLl)w~(-IvvYqy$>R&pLXq(d6Y+O6Gfk%&gC$KfIXr%Yo$ zA(lV1Zhe^gMw##5|*@y+Pj|7vA9A8z@Cc+!C0G7=^#ay{Y0pyy#3t*z@5B`s) zw;X>nVe{PP+|!vo^Zl`LLe9c?74pF~GFyo=uBZq)Q^ z62IT&+a!z1x;iRd4I&qg=o+7g7+kVg0W&r eZfbc?riIf04{%X0(HtyjAI|7|Wcv7@)&3VHI*!o* literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/french/abbreviations.py b/TTS/tts/utils/text/french/abbreviations.py new file mode 100644 index 0000000..f580dfe --- /dev/null +++ b/TTS/tts/utils/text/french/abbreviations.py @@ -0,0 +1,48 @@ +import re + +# List of (regular expression, replacement) pairs for abbreviations in french: +abbreviations_fr = [ + (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1]) + for x in [ + ("M", "monsieur"), + ("Mlle", "mademoiselle"), + ("Mlles", "mesdemoiselles"), + ("Mme", "Madame"), + ("Mmes", "Mesdames"), + ("N.B", "nota bene"), + ("M", "monsieur"), + ("p.c.q", "parce que"), + ("Pr", "professeur"), + ("qqch", "quelque chose"), + ("rdv", "rendez-vous"), + ("max", "maximum"), + ("min", "minimum"), + ("no", "numéro"), + ("adr", "adresse"), + ("dr", "docteur"), + ("st", "saint"), + ("co", "companie"), + ("jr", "junior"), + ("sgt", "sergent"), + ("capt", "capitain"), + ("col", "colonel"), + ("av", "avenue"), + ("av. J.-C", "avant Jésus-Christ"), + ("apr. J.-C", "après Jésus-Christ"), + ("art", "article"), + ("boul", "boulevard"), + ("c.-à-d", "c’est-à-dire"), + ("etc", "et cetera"), + ("ex", "exemple"), + ("excl", "exclusivement"), + ("boul", "boulevard"), + ] +] + [ + (re.compile("\\b%s" % x[0]), x[1]) + for x in [ + ("Mlle", "mademoiselle"), + ("Mlles", "mesdemoiselles"), + ("Mme", "Madame"), + ("Mmes", "Mesdames"), + ] +] diff --git a/TTS/tts/utils/text/japanese/__init__.py b/TTS/tts/utils/text/japanese/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/text/japanese/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..28517f85373095691481cb3be9e0d408ab6c34e2 GIT binary patch literal 189 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09tD?mTAIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`+sq*%YSBr~U2za+Jy zL_aIBATcksI8{GBJ~J<~BtBlRpz;@oO>TZlX-=wL5i8IFkQ0jefy4)9Mn=XD3^1aI H87Kw-a(6G{ literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-311.pyc b/TTS/tts/utils/text/japanese/__pycache__/phonemizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..161f4bcddcad543868f47fd88de24ddd1c5b1b4c GIT binary patch literal 14421 zcmb_iYitxpw(g!8zh-P~W1H{_jCniw!7C4*!8Q)qfC1yYoUq=pYiw-%qGueyv9rSr zfwAMn<`tY>lFTNAHGw4LwSk0eqSfAeSGrd>Gtped8VM=d(Mr3&VhKfxxPR_~P?iQu*Q|w#+qy{6avN+GP|R{sRg}y47nj@Sw=lKA_KfY2dPq5JeOfuB!kV=s zr8A`=MO9iAW_wF*NQJJ_o~CQDlw>On=~MDkr{rg_v|~zx9i{9SVdS!INO{(xC=HH= zOqTA>Vj1qN78|o4vo%a(j!<@@kcnWK#}rt}!GA8xO0I>=rX|Z*b|17aqL$0}7uOag z8AGy0!}NRBonxvyH(861@p0^%+CVs7<-W& zXD80v-%%Upp%=_EFF2)_=CeUI#9m^O0R)7|W9D(N}m-QV4{+Ue?I z&fcyz*4A9o(dKoRUQ%NjUA>*F4s~nHv#bzNsn-EZasOL!|F6rOE~iWK7vlc6%bab_ zHiQ2S@$Lj4K>iVwrKYY${^uZy905~A%H3Yx`fsW$_$I)lt_#Ma5XW;mXEV=r?Y zU4}ni;W)Ys93vbDdO>gPDUQR`xo0_!mc%FvoaH$7Cq@|wfCB9@IF9`W zj$d&c=vuwnQI4a@z;T-6XfpiqD#y_T92&(*>H1fB-A$-lqmd@1ukretjQY;=`kIXT zPV)MCjQY;-`g)A|&hh$s4F8|w?d~!B4*}W@f$o0@&~6CycK?#&=rM4-&T;e@INsnm zIt?6eavYroju6MuY2XNP9GwOZ2!I0u9R~!!0fCO=*BnQufdgv{W=^Mp;{wM4n|gS+)_9NB)C5CEfTiH86d zOiTP{T*b5qLI4%hB8Zg~DiFx{?MDeA(0lGEw_#coAb<_iqJYH*DiG*v07?jf?sJ5E z@@B%B!Kv!SI6K~chUh4A=Qs6W%kJUo=B?M5Q56LrJ#T4}+ zfQl*VLjV;X^~GF7=)~|b<|cTw@=#)Kf``H(-3zo~$K*Stw-U=I>9-`j0v`G;x<2It z0aj$b5NgykvG@t6Asgmfbn{dk=35fxvE_w%EVa_f;9(vst-+HE68d6`jI~p`=o7j>(yNI&A6b$j^f$#xE!MsOIn`Ay9rdKi_662Q4 z$HeFcKBbW8>ZK4J_kYGCi*DgMW$6~aAV#AP7>OY;OpL@3c$pa8z$?V)21YP-69!HZqZ>HOJ6W_mFh)7L zfnO1$8+d~ja_Pswo5bh_Ld56@(C63yNNeaaEdG+g=)jUJ8FUbqW67Y4uoO$?3Ng9? z^cU7sk>f`WUfd+}J~7gO|0*$hKYT=tG~oY)7~Q~cY31=m=gXMmK<|Lvcx%#^j-xBp6H|ibsOM+`zs?%%%X`E2mok z(?rg(0Q}r73t+y~N-|L9JSc6z_tYfX8N>rO%GSxbi4mgDV*~_!9wXG~^B7^l{iB%Z z7&AH|bT|-c(_l>)@?jui%E=#YKw2nCjM7I%fdD3Fai3+~^1f-${LJYp=_@a3 zSXW&7=)Pu{OI$eURjl~G^(>MTbm*%dQ@n|6MVWf6Iiz^0yu5-+i^?p=(wTK?x#S}t z9Ou|-n-k~5(55X@9{z9DAvveCD*ej7MFXnWRHk1InK%@a7gLgrCvzt)!;4X!dRr|k zvcyuny*(Z7Sc<0yM?>H(r7L3UVGo_!I-R%c${zQ)HBZ?QmuG+37Psesx4WmTvb(e8 z$m7*z&AlFPcV}Z)H*g)HGdA**pUT<56r_Ad<>@91T$5LfIIE8X|w)MHS(w-x? z?HfAUJl^K+&Yn$u&c;sH0k_uM;qG+xly2zgZgzEeHi6JftV07m4*-l$38$|VcPRJe zg5bfCLqmr~J%jzj{gI5h(Tur?>|5l8Bv(>@tj7PXhmf3APpI1!6)uHCrw^$?6+NUJ zOCPjM?V%@>bE>Kg=kdcfOOizzzr1rQxW#iy8(Et}$La&>fF-$;lHEUOJ;0B|KbYCC z_FGO{hUe~8E}=VO)(&@9kxip>=$LvSral)_+dMSrVU$Ud&x1?$_3gtsHZ`XB{uUlzJugYT>(%amQ%k=$>&r#~s>C)K{y~ zF?5lYW76h>j;S3U@}_gMM3k{Xudi?Z-vPu?tcCJVKmA4d3w3Bt74B=f2}mG9E0IKU z*F*u6XPZ+Fs>t$J8uWi9Ct|$X8!LC!K2i7hj;g)dbP}mrAs!wovzdD>mT3m2EkVXw zatMdwQzrPZdZ!)ZCz8H0n!eIke$$a-uJ&3BxJXV_6se;s>Zl6asyLLGjmCK_RdaiL zwXQn`2oD=?p(+|`Ayj+kbrGV=3Go1e%YfRiXs68~q<_k6W|jjq{1k1Cmqv%YLZ+aj z%M{eh@a^eE%dCVbc`y!G`YlIuP|Cur=28O+4bDEilMJZC)n1DK^15d|w!d3P%HOEt z`F@q90*A>8eL)fkAiq;rxq8}5uQ&1v(c*B&#nSrK$MXg3-Pg{s2raEt{>x$A4KfPL8k2aTN8I3d? zcb4y76|=Uwy#~J$WaX|MyJ{Qjo{CvLy-k=}_{6BhtT?mR3P6d;<#ft{OHfwH>a{h< zFQM-O#$h4Z|CJTPEz8zRC&vv`Hl^W#&!Cw4irz?eN+$b!by?)6JY& z=dw;^jj_2&xl~%;3g}y!*z~G0T|c%p<`q6PekgD+jj)D&jPc zFRw4JuPd+J8%xvNJsqxQ_a&9j7#`l`EXmm4rny$pRM9F>^RqNXXjUP#V~T7V+Vpx^ z#PLAX@c^a?1RI7njHQPbM;yzdj%8unGT|Tk>KzY!5%(R9`}{xLcoTcZUK(g|-*DXb za@_Yy+&A(k-`O8-1b?_O_`{7aez@`FGyCE`-=BOJlYB{$nfvlDfN?HqqZoqZ&TDFP zA5dErL}m3}{!I4#7G>X?1J-`Tc(JL_EUMD=YQOa!+SX+40b9S#9C5*=7*Tv6bs(ib zRh!Xoxkt>O@@oTZmA3Y$nAeKul)s@p<+lvbZs|{9seem{r6uEsjt@1ORsm?M!$bE( zd}o~@FP*!|i=~HX*L}y;KjqgO$tW&v%pt*VQq^td!^vuQ&aOLg`1xVuEz?zXO?G_i`Lax+Y$t!~WL(bk1+Jw#ql%tl{&W40sRooya!A$F8> ziB%?p);TeM_9Cx^=Fk6v@F_R5@<;X!?HgMY$#O=soW94zGLtjoT;{3FvHD2Pf@scy zNhK$xkS9SK=GW+hLmRO~Wab8|CUWx6*-zP_p5u(>ID;wQ7t9`8dFH9H-q4=&hl7ll zpH%D_g;+#JYKCe?y^+kh(agDFet(~xclL#F_F~*vm&}&JIZoUYxdmhM!nsRv>-i5x zatfn4g<*ap|MmId+-10d_msfZ{uw>9yoVfUEC`|#YqzQzlaAujt7Fb)g|&Q=q)Z7ysa*mhdJls0epJ69#c!? z85TafT!;to5$?KH3@=MPSi!OO$G6q>!aiBX_ZTUTsT+8?KVKb*AH|1@p_sZ+V#C)H z6au!(F?EwDbm2Fm&~0_Iv<_4Oz>KN-OZGTaVrr9o5kAeiX2DTwk;jB8{~o~w`Y{^7 zBfCWwhM}`DwN-LB^hDl%Su4K5pzu-t;vfs)LK(BaDx4o`nAx)v~Y!wxHp9G6It>ESG$u5$J<=(PyQ>tvmssf%%b1F^ZU2K?kieGwUq9AfLB)3m&>2a zmpxl5h@wZ6wl=i)=3AThLHr|I8#*b&_9kijhn+gvk=Na~L^v*pPMrWd?>=vnWk z*RNuMo^rNE5cu)UIuNwuWva372}y9t4|+pRhQ!VC#s0QhEY@^Y+YBx2QCsma?NM*1 zZ|HD!HZj+xzSa7vSYxwz-)zJ$v6^tk*68hS{f1@Rtw(&mYYT?!LPZoVv z{88~W&({@^Ra>L0w)(^#KdH0GS-gc!Af~p^3z>f1-Ia7AV0a{%OZEc2rSm;Ke>LWB zJ2;ieYOH*`uCA)K^65sr2Ww?0q#Z$G@`*;8|CBrao!qe$+CC+;Cv@;a{d+sVE8h5R z@y2Veuhkpu>+-K%f7lW!u8tO0M;2G#%{z(yUjZu^$}p&3$zy?8uH3WNW0rOoj_qt6 zuC7*%CdR+6h5tbJZmHFYo3U`GHS2n@Bh9UzVk{N$%!{AhU)0}J%s0-7Y`cB-AD696gN)UrXKU& zv75Kn-g)qlm`__^OPm3a`(D%jy5Hf;rxBxUPT#n+Te@X zvKE%jJ)_ZZ=v#kB_qaNG_PZKAM>?CjJMJDs^rQ6uqCP8v6`Z{0Rg3#-)Bofz{w}}h z+x(*O1>;ST{FTxCm5E&58ATRtCzN6-9o>iAnpRG@U`dg04@>_+MA{&9{*xdq2SM-V zStAb*JuK?Fw&kmu&ugylxUmC#B(FM()KL?4)P!w!84)%Z;0sY0MDpxyCuIC$I7dy6 zROl^!ZYuK8D=2IHA|JW*W+bn>kW8A?IuKcE?W<`6{Iaz?w%` zY*wNxsXkhd^pj8R6_ovkI?Kiq3C-!{^})YjqFRuX4#tgEW2s;s9FxP;13 zgx5+b%Zejf(R?wUaoE)Aj#=r=IHQqHnzonMmV=~|r>&f%Z&UnCN_&*7R0u0L(Z$O( zjBO|34icu;yINZ@Iku66yZHoBK1%v2IZBD25~DlFr#uSqR`Yjp>=Qb>8NP;X(q2a{ z4Q9_}4BAPns;ZOeifZ>|(BFhIJ*?l8HmkZvov`Kjc8I^9Ceo%)Dk_BLbi!T`TppgW zEn=^Z+N=L~Fl^iX6E3p6M+Nb{GC!PrPuf~+(=3yUk^1bDyEBxm`+^n2iFb8?fuL@< zAv|+gB&#TzRfJNh7WFaIZc6RS%tfR9Cto=8LPW{;Wdzqwrsk=6qn=3xk1_G!v?(c8 zva(0E4#|_&kUQiauN<$unsTY;O3gJZ{;ICqZ`i-ybA4K5$=2u+Di_Jy7Dbx5Et|MaI9oGUq163Q4~8=3xKbozt7Y-(E3XeG6+XiPjfZA$FQ^n%g6(fZN)@a*N0 zg5qdFaU{1Snp@(VHi4XlqimFg=WLG5D38u4k4&$KPOtD~PuOP!7Y`Tnd6G7nny)UT zO7Ngc@R*vEZYr=$*wTXYCas{px21(MW>2PooJ>_x(t~p*(}+$NbOzCOqUTLIh|U!9 zETX4@Cgp5mazsWh(bI)O9?|(iUO@B=LElI8OrmE_&LVoYpyv=hSJ3wpJx`RMPqb6W z3yEGJ=!HZplq?d0#Y8U=N=u1eCgep#KOp4Gi7poM5~525T}Jc@L9Zlwm7rG>y++Uv z68(^%A13+{qUTJmC3>Bp*Au-#l-Wr1CLw>6=*_CgC|8NAkX$9_wh%||tl}(h7f&AJ z#pX?J&oSQSbopU?DWdutncXF str: + """Convert katakana text to phonemes.""" + text = text.strip() + res = "" + while text: + if len(text) >= 2: + x = _RULEMAP2.get(text[:2]) + if x is not None: + text = text[2:] + res += x + continue + x = _RULEMAP1.get(text[0]) + if x is not None: + text = text[1:] + res += x + continue + res += " " + text[0] + text = text[1:] + res = _COLON_RX.sub(":", res) + return res[1:] + + +_KATAKANA = "".join(chr(ch) for ch in range(ord("ァ"), ord("ン") + 1)) +_HIRAGANA = "".join(chr(ch) for ch in range(ord("ぁ"), ord("ん") + 1)) +_HIRA2KATATRANS = str.maketrans(_HIRAGANA, _KATAKANA) + + +def hira2kata(text: str) -> str: + text = text.translate(_HIRA2KATATRANS) + return text.replace("う゛", "ヴ") + + +_SYMBOL_TOKENS = set(list("・、。?!")) +_NO_YOMI_TOKENS = set(list("「」『』―()[][] …")) +_TAGGER = MeCab.Tagger() + + +def text2kata(text: str) -> str: + parsed = _TAGGER.parse(text) + res = [] + for line in parsed.split("\n"): + if line == "EOS": + break + parts = line.split("\t") + + word, yomi = parts[0], parts[1] + if yomi: + res.append(yomi) + else: + if word in _SYMBOL_TOKENS: + res.append(word) + elif word in ("っ", "ッ"): + res.append("ッ") + elif word in _NO_YOMI_TOKENS: + pass + else: + res.append(word) + return hira2kata("".join(res)) + + +_ALPHASYMBOL_YOMI = { + "#": "シャープ", + "%": "パーセント", + "&": "アンド", + "+": "プラス", + "-": "マイナス", + ":": "コロン", + ";": "セミコロン", + "<": "小なり", + "=": "イコール", + ">": "大なり", + "@": "アット", + "a": "エー", + "b": "ビー", + "c": "シー", + "d": "ディー", + "e": "イー", + "f": "エフ", + "g": "ジー", + "h": "エイチ", + "i": "アイ", + "j": "ジェー", + "k": "ケー", + "l": "エル", + "m": "エム", + "n": "エヌ", + "o": "オー", + "p": "ピー", + "q": "キュー", + "r": "アール", + "s": "エス", + "t": "ティー", + "u": "ユー", + "v": "ブイ", + "w": "ダブリュー", + "x": "エックス", + "y": "ワイ", + "z": "ゼット", + "α": "アルファ", + "β": "ベータ", + "γ": "ガンマ", + "δ": "デルタ", + "ε": "イプシロン", + "ζ": "ゼータ", + "η": "イータ", + "θ": "シータ", + "ι": "イオタ", + "κ": "カッパ", + "λ": "ラムダ", + "μ": "ミュー", + "ν": "ニュー", + "ξ": "クサイ", + "ο": "オミクロン", + "π": "パイ", + "ρ": "ロー", + "σ": "シグマ", + "τ": "タウ", + "υ": "ウプシロン", + "φ": "ファイ", + "χ": "カイ", + "ψ": "プサイ", + "ω": "オメガ", +} + + +_NUMBER_WITH_SEPARATOR_RX = re.compile("[0-9]{1,3}(,[0-9]{3})+") +_CURRENCY_MAP = {"$": "ドル", "¥": "円", "£": "ポンド", "€": "ユーロ"} +_CURRENCY_RX = re.compile(r"([$¥£€])([0-9.]*[0-9])") +_NUMBER_RX = re.compile(r"[0-9]+(\.[0-9]+)?") + + +def japanese_convert_numbers_to_words(text: str) -> str: + res = _NUMBER_WITH_SEPARATOR_RX.sub(lambda m: m[0].replace(",", ""), text) + res = _CURRENCY_RX.sub(lambda m: m[2] + _CURRENCY_MAP.get(m[1], m[1]), res) + res = _NUMBER_RX.sub(lambda m: num2words(m[0], lang="ja"), res) + return res + + +def japanese_convert_alpha_symbols_to_words(text: str) -> str: + return "".join([_ALPHASYMBOL_YOMI.get(ch, ch) for ch in text.lower()]) + + +def japanese_text_to_phonemes(text: str) -> str: + """Convert Japanese text to phonemes.""" + res = unicodedata.normalize("NFKC", text) + res = japanese_convert_numbers_to_words(res) + res = japanese_convert_alpha_symbols_to_words(res) + res = text2kata(res) + res = kata2phoneme(res) + return res.replace(" ", "") diff --git a/TTS/tts/utils/text/korean/__init__.py b/TTS/tts/utils/text/korean/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/text/korean/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..110c8a0de1ceb384bcd4ac169659e1a89763e99e GIT binary patch literal 187 zcmZ3^%ge<81nFs6X(0MBh=2h`DC095kTIPhg&~+hlhJP_LlF~@{~09t%TGVFIJKx) zzcR5nL*FH}IJ+djK;Jn(H?1<%Q$M-1xFkO}J}*BdwOBtSBv`+sq*%YSBr~U2za+Jy zL_a&fC^a!pKR!M)FS8^*Uaz3?7l%!5eoARhs$CH)(0Gsoiur-W2WCb_#t#fIqKFwN F1^@tWFBt#; literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-311.pyc b/TTS/tts/utils/text/korean/__pycache__/ko_dictionary.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..057c9c6f7ff29dd3262146fe072b03e718d7e31f GIT binary patch literal 1519 zcmYk3%}-NV6u{r@Ymv5mNENJr--47P&w}3|TG}GDOslr&46`tVQ?w&c)7K`0D<)x* zMwtu~9;rGt!vaSqVc~)WCi*|HGq+^$rEE z`_sQnzfcwBGX=YsZAZQcl!5{ZFt8LY6G}<}6|5@^Y*2&T1GPvyIFL@LL)JqBvJv(o zo4|$K2m6uD(1JVwZsb8|MYcgZ@(>(G9)Y9CV{jaK0#2?l?9&0A7@dMHqzAf@Ug$xd zhF+uwXOKSVL-xa2mf@wLl;FISAE(mxyJC+p~Du9DxV&Uq}4RiD@QSnc!rxGfU99rEYw zwDf~f`gfzey2!2JU~Ek0Ru{`JH%osmaZ5N+m8m>^i~1tygHh?}diljBx7-YvVa0e~Hr{Y+Adt9|tD|b;4c@q8>D?k%)Lcu7T z!Y%%=tj6%=CQgauY9JJoGvFSWeYVb--|RxGuz(Gl1JTYbAZNqgEGV;v!C6RVsdi*{ zWyQeq>+-?t1mM!INO)vZjbz-!W@STT5p9yXNGjKaA3r zQ7*3Stp5wwOMOmLN?j{fo@{wk&VJ(6$TPz=~&iv0pMf+rRGdkc&WW-V!>&u7~Myg1it1B^0%agR?K}70-XyI92Rk|U zJLfxpzVrS0o%7{RbF&vg85tfOb~zBLP-e3*R%hcVbY_u&1e`*Hv}IB_%?vVWc96wL zV6UJ-hu{F}6r4a^0*;}OYn^WI4>5W`ltxlX<-(AVOsGj&ij%C~BC3fxK7p;XXyb&B zg>T~zAhRfk3eavZ^Dwk%4^JVL?y<_~rtYA#z~s<8%q`mUu-3h~5`lcxQ{(1g=c2tE zR{CmlAP4kZv~``Ly18oewcb|UatJ)a_K&{}OU|q=YuxJc$E(Yqa;p!%U;PQVM>i|l z`xWhKMZ3*aw1tYc1kG|qdr-NdaclQ~TwBt(^+)eL`7Y0`-~VCl;WylR;lbL{Jh%Sv z!?QXHxpk+al4M3_h^Vv5_<6lel1VzAN=}MH6G^q+_XOgM^j)=iD?GY0B~OSXgms51 zUQ!imX8W@f{7FS53ZIQD7x+F=8B^s9|E8QC&YpaWPmC+7oF0;7K~(sGff%o<3O}wU zQwmQ-@MAI&r> z@hnp9SR#~1!&tyqJkz*PH%g_Lr7@;`sF80(26;!ND7MO|jXZ%VQ15Rm^bY+W=Qe6F z#pIYmogTGg^B8P(fUT$z?=dt@k?QPkxR6x+ZumCZf!xGVgpF%?)-#23c$&G0PN7+h z(M*#7n-_5Z0$^Qd!bf#>Ow5vI*rZSq?d=+Af%aKb&xk0AmolVx@`w$HtuPJuq~t_A zrSyjF)d4E$Adsg|&=bH+(aS--)t&FY(UCuHa`lw#gta=(H5fzvZy3`H9ov+>Q@9=r zqc#F^xY4JUscqC$5(E(;hEN#mE>TIuGa|4r0ZVoRQ7HP_+YO`19_ar!bE$Nt3y5WH zXSCQET?&42elB^By)Dg2z;3CA#SP|C$5TNYuLHRWke|jc_Mo=Gdb4aAigo7*k;gOe zq~l9E3$M9ppcPe5Pb!{1FT{H%55GuIt;MZyhC&s5s-Z&c?yhh5e7R?#?W?xAcQs^K z6LRTnT*~SWz>;FR+MSTonPf`T9iwtmB2BP6#2PpwuRvdSXX2_V63IZ_xpP(kXrd#O z?1E4E0LW7~bH(kghfLwsJBL0$bh~S=3r0&`t_;-8mEBwwG0x*Ff!2?@ZpA)3{mE(5 z@jtV_dw%u&;{DAFe5o^92D(35-XARmPLu;D^6tm}-PbQ)yIg#^+phNpO8!XMA1Qhw z#zVC|W_ALkF@wa5ub_W}CnPtB=hpOQrJLsi;esLb#XvwWxLWsyUPPqz`FVEhJ&B9u8R- z*Mtva!iV0rdX)&x{s!4gXAT=DR~d)1wTd>$-#gHbmYHKWxI&-+`KYVx>oW3@7B%tf zHbHw3@?#kCW7x=#VVWPqW`2aG?yr#HCL>21Dbhf!sRq3&hDh@j_4K4=VLT=Fk|Qud tA1q}ENR`DHS6v8uwI=%e3!N%HuU1_yyt9gI5^zw~CM_PyZYPEU{{XX7RE+=t literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-311.pyc b/TTS/tts/utils/text/korean/__pycache__/phonemizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..959737b8408f86a199c0ca98b63666b0e1685948 GIT binary patch literal 1381 zcmbspO=}ZDbT+%mCJp`27OY@#4lOn)S(IY2w0f{0UW(B6(o0yglV+R!2(z=&XljF` zl1RWJh&k$|Djq!aF8+j6dI$?bgY+OG6pEK#^v!PCnyM#<**EX|y?JlmzV7bsLa-Jy zOPLQMLSHy>xWa8@e+-ZpNJE-nAX8Wn1dfM{kSQ*RfQzc6MYS*qYY`I8NSa&@FGMtP z7A0a|xr>D0^<-6R(KS-cPA#dX4V;)|Q&Tnc`((zGxsczy?8uvQQ|!qH!(5J&verif<$%PWf?#z%;CALAz@oy=8>yp)f%kFk?> z&=}eM3G%Kk3~Me+ux{mDhE+?$w#ztrPc>ZPV8ga^n1O<$n&c9`3lg9TSfvDOdL~0C zu^3q6c}q{*8c7B&$+BwZ4T2{El1B+5OyLXPpVz-Vsb9dOZ4*3!16JY^9t8|v#2`=j zsd2-0LSWe{KWnYMYLy?g);3#@-~aC7gowKW7PtZf4$F^xe3Qe+?-M~w znb8PWIZ}4GGQZ2g9cA7j)KS2FR+%PFj@fzTrfp^l^S70>>oD6)Sx`qunVXwc7;_Yt z>4u|lr%KMIM75NB*0zWVgF}<~f_En1rnm+luRuy1+7B_K@Hu=CO6bQRipEP1n$e*~ z=icc#RY_M#ja0-Pxo`dUCwZtL4>hABjn2K>J6Ip6uGUs7(&ygO>toeItx%Dgy=N-R zb*cKW_K*|(mH2LaV12H7qjsY-{W;!KVbyE3YaH!;-cx5=eZ$**!&~u@?f6J(x*6|& zI{suFd`;D+8q(lCWD5PDe9jXxloWmyL3J^dBMIe5`sGOS%O-=1w-3J`ln;w1LQDMH zzeRKa81yuNgXpAbYpy}A)3ZSGsdiET_C!Gtn&^C^bMK(5jn2Ij=_-kPk#oY?Jp|y* Otbcs$_m6)9KY?F<>1MwG literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/korean/ko_dictionary.py b/TTS/tts/utils/text/korean/ko_dictionary.py new file mode 100644 index 0000000..9b73933 --- /dev/null +++ b/TTS/tts/utils/text/korean/ko_dictionary.py @@ -0,0 +1,44 @@ +# coding: utf-8 +# Add the word you want to the dictionary. +etc_dictionary = {"1+1": "원플러스원", "2+1": "투플러스원"} + + +english_dictionary = { + "KOREA": "코리아", + "IDOL": "아이돌", + "IT": "아이티", + "IQ": "아이큐", + "UP": "업", + "DOWN": "다운", + "PC": "피씨", + "CCTV": "씨씨티비", + "SNS": "에스엔에스", + "AI": "에이아이", + "CEO": "씨이오", + "A": "에이", + "B": "비", + "C": "씨", + "D": "디", + "E": "이", + "F": "에프", + "G": "지", + "H": "에이치", + "I": "아이", + "J": "제이", + "K": "케이", + "L": "엘", + "M": "엠", + "N": "엔", + "O": "오", + "P": "피", + "Q": "큐", + "R": "알", + "S": "에스", + "T": "티", + "U": "유", + "V": "브이", + "W": "더블유", + "X": "엑스", + "Y": "와이", + "Z": "제트", +} diff --git a/TTS/tts/utils/text/korean/korean.py b/TTS/tts/utils/text/korean/korean.py new file mode 100644 index 0000000..423aeed --- /dev/null +++ b/TTS/tts/utils/text/korean/korean.py @@ -0,0 +1,32 @@ +# coding: utf-8 +# Code based on https://github.com/carpedm20/multi-speaker-tacotron-tensorflow/blob/master/text/korean.py +import re + +from TTS.tts.utils.text.korean.ko_dictionary import english_dictionary, etc_dictionary + + +def normalize(text): + text = text.strip() + text = re.sub("[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]", "", text) + text = normalize_with_dictionary(text, etc_dictionary) + text = normalize_english(text) + text = text.lower() + return text + + +def normalize_with_dictionary(text, dic): + if any(key in text for key in dic.keys()): + pattern = re.compile("|".join(re.escape(key) for key in dic.keys())) + return pattern.sub(lambda x: dic[x.group()], text) + return text + + +def normalize_english(text): + def fn(m): + word = m.group() + if word in english_dictionary: + return english_dictionary.get(word) + return word + + text = re.sub("([A-Za-z]+)", fn, text) + return text diff --git a/TTS/tts/utils/text/korean/phonemizer.py b/TTS/tts/utils/text/korean/phonemizer.py new file mode 100644 index 0000000..2c69217 --- /dev/null +++ b/TTS/tts/utils/text/korean/phonemizer.py @@ -0,0 +1,36 @@ +from jamo import hangul_to_jamo + +from TTS.tts.utils.text.korean.korean import normalize + +g2p = None + + +def korean_text_to_phonemes(text, character: str = "hangeul") -> str: + """ + + The input and output values look the same, but they are different in Unicode. + + example : + + input = '하늘' (Unicode : \ud558\ub298), (하 + 늘) + output = '하늘' (Unicode :\u1112\u1161\u1102\u1173\u11af), (ᄒ + ᅡ + ᄂ + ᅳ + ᆯ) + + """ + global g2p # pylint: disable=global-statement + if g2p is None: + from g2pkk import G2p + + g2p = G2p() + + if character == "english": + from anyascii import anyascii + + text = normalize(text) + text = g2p(text) + text = anyascii(text) + return text + + text = normalize(text) + text = g2p(text) + text = list(hangul_to_jamo(text)) # '하늘' --> ['ᄒ', 'ᅡ', 'ᄂ', 'ᅳ', 'ᆯ'] + return "".join(text) diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py new file mode 100644 index 0000000..f9a0340 --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/__init__.py @@ -0,0 +1,79 @@ +from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer +from TTS.tts.utils.text.phonemizers.base import BasePhonemizer +from TTS.tts.utils.text.phonemizers.belarusian_phonemizer import BEL_Phonemizer +from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak +from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut +from TTS.tts.utils.text.phonemizers.ko_kr_phonemizer import KO_KR_Phonemizer +from TTS.tts.utils.text.phonemizers.zh_cn_phonemizer import ZH_CN_Phonemizer + +try: + from TTS.tts.utils.text.phonemizers.ja_jp_phonemizer import JA_JP_Phonemizer +except ImportError: + JA_JP_Phonemizer = None + pass + +PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut, KO_KR_Phonemizer, BN_Phonemizer)} + + +ESPEAK_LANGS = list(ESpeak.supported_languages().keys()) +GRUUT_LANGS = list(Gruut.supported_languages()) + + +# Dict setting default phonemizers for each language +# Add Gruut languages +_ = [Gruut.name()] * len(GRUUT_LANGS) +DEF_LANG_TO_PHONEMIZER = dict(list(zip(GRUUT_LANGS, _))) + + +# Add ESpeak languages and override any existing ones +_ = [ESpeak.name()] * len(ESPEAK_LANGS) +_new_dict = dict(list(zip(list(ESPEAK_LANGS), _))) +DEF_LANG_TO_PHONEMIZER.update(_new_dict) + + +# Force default for some languages +DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"] +DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name() +DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name() +DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name() +DEF_LANG_TO_PHONEMIZER["be"] = BEL_Phonemizer.name() + + +# JA phonemizer has deal breaking dependencies like MeCab for some systems. +# So we only have it when we have it. +if JA_JP_Phonemizer is not None: + PHONEMIZERS[JA_JP_Phonemizer.name()] = JA_JP_Phonemizer + DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name() + + +def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: + """Initiate a phonemizer by name + + Args: + name (str): + Name of the phonemizer that should match `phonemizer.name()`. + + kwargs (dict): + Extra keyword arguments that should be passed to the phonemizer. + """ + if name == "espeak": + return ESpeak(**kwargs) + if name == "gruut": + return Gruut(**kwargs) + if name == "zh_cn_phonemizer": + return ZH_CN_Phonemizer(**kwargs) + if name == "ja_jp_phonemizer": + if JA_JP_Phonemizer is None: + raise ValueError(" ❗ You need to install JA phonemizer dependencies. Try `pip install TTS[ja]`.") + return JA_JP_Phonemizer(**kwargs) + if name == "ko_kr_phonemizer": + return KO_KR_Phonemizer(**kwargs) + if name == "bn_phonemizer": + return BN_Phonemizer(**kwargs) + if name == "be_phonemizer": + return BEL_Phonemizer(**kwargs) + raise ValueError(f"Phonemizer {name} not found") + + +if __name__ == "__main__": + print(DEF_LANG_TO_PHONEMIZER) diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-311.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e9dadd0a3535fd278eed6a8885d2b10441cd86c GIT binary patch literal 3954 zcmb7G%}*Oi7O%G3-!?X2oPhvtAOWl;Hc3dxOg^;Cz-02VfMSRg&qSSuF2G=Sd%D{J zyv)WKabQ-XU8Ft4(rP3QtIUjc50Un|hxrq_QA=aB#AWxiH%qxhIqj?V2X2Q1ZFN;w z)vMq8s8{b*_m6>q7eQHFSzp=lA@m-6T{ig`(aOlZ}T>Z-}~yiaBeJ_2~p`k6YvRrV?cs~_Qrm8al9DAf%sJvCitaRhh2 zw1FMas>RA-ouzt#&vlBI0Zy4deI#CPXke`1~9WViHxs_hD!8+>|MSr)UoTT(md2||+x;Z;Ox+hVnrzTW%#&rkpO^t-L^ zw@U2|@@qh$f&&W|^0HyWLcvj3yRZ!1hInB^GHgo%G+c13a~IZgQUJh)Q^HDK5-e_m zWJhqxqSy1_%opG^o~i|cUukkL?+1rcf200HB?-pXzz~<$N(;u`zz`iLW9S-YP;u4~ zu=k)+zvh9+JFTJkEv2}Qu`N1X4V`39wbFudH!#F0Cu2Mf3^|b#Fcu5FJLs>7_x)gR zwKNteRPMlGr#S^P9CI86%+fYPqRi`MpiD`t@+anq%m|$sl@%#=Oe6GJLKjyUWeqn= zQr0+yk=Mk0hG(*zvb@GTIo1;=!bW3H;#EaN%*G}M>X=aycr6-cwiJnDHt_bQDDe!y z@>wh>GH5(ju>{IFS;jo0h%KhX%`F&CERz&B9IGV3jZk5YT^875n~cyp$FAp^D6{!F za53}mA6_tz#5^P5s`RuVD_kbS+?{IPFOPFr;IXir#&VolkhWpJ=^RrBL-KvO&OLq- zH$odCyCF5HF#<~#I|56%X|>>+ZzL&G@G^p^Fe_qS;3Yp87jsJ_LM0M<5{XWU{G%A& zhg>F)XCz6KBsZawbdX5A$85EntNOcki%22D6Xfphrzzt9d zBe#EV>(F&Zb)7j1cB#QA)RME>tL|vo<=fl*_F3`SQ82vsZ136rQ}R~b{ji`02M%2W zs%zkgaK)e=3|D*A9kZK-P>HkSL;;0V+M*bC`Z_sEeqtae>Kn1ENcYTgOmJE77OQU~#wn`N_HT434O zkTDL29ETe=Zr-qQa>K@t<4cY&al&ebYx1ypEXL5OY;eJo&xkQ`8WLwXDKDqF=6EuX z-MDZHYc)C8O^M6puw(|6F?I@%nMe&55`qT#$b85Jt+8)L=c!z+scJ_0sa&n;Tulw% zoor5&6f-vr-~1PIvopWDlbU&uH0VrPR*X(LpCbUwvl%$AJhzHvgNDmTHvBWm`I)JE z?ER_P+eyQB`@!PkLUmHjDSHMB2E`gQxx);5A)PZK(=(r&5O!gXJyvFTSOIUcCeE49 z@o*0tcDP#2R3kA(@{VNuyfxm%@oua5R zx~p1i8QoH?wT$LeYb~SSs@8fE*L@XPYS}u4)e22ho-#eJzEj_nT^$uu2M>Q*R41l?$*SiTV5+q(>TQcfU&V<+;XOqS$M-+i z!*MMZ?^f3g;Ku zS2WaH9-LGMFX@Am+Th3f;Kzi#siElm$kB_XZ}jLzEqYmxUT(;UUMl@Tk6zNE6MA%_ zP6emXVbvZj|K@t}o*Fu@h8R6`UJH%rA=uAodHA|Id{rO5t_|PRhi|HMPia!4hbqn< z>dHZ91wr@fmf5wwK0&m9uW)GZR_)zo+EbiSy^KZ===6YEuN6C@dtg~6AGCisFQb3D LZrcAv-L(G?0vjJ5 literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/bangla_phonemizer.cpython-311.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/bangla_phonemizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1791144a36800cdad098a7929a323b0c0d14506a GIT binary patch literal 3956 zcma(TTWlN0aqoDfNJ=ziy)4UZbdp_LlwC5i^JTPdW6O^7BL$HB5!S` Km*(PiRXXX7?-x zfMGA-TB&!vt&VCM*m?6{*FE^G#*wO|uZC&vUqtfKo|?q}(d@6$oBJ2hyx!g{Q5!#B zQt`k}?5*)P_tayzF$+Y+gV2tLGx1KaFVOlgR1n}yVKf%e?{en8X*oF)mnEHE)+Ie9 zrDsz~nxGo6>^UuK^bIzTWvQAO0Lg9RO7Pf^emoTT!N9ihho5a5zuY!%Y#S@v#yi`_ z>bCK21ohV%L=$T}zk%)=#1AE5P9b4l;ld7u4-3?(IHLR#iE@gt=n1=M3r3B7Pr`1% z`jr;yQQYu(s3+=Dyo=6VTwbMhk%U`;*F)Q)ywbKP?Beh-Szhri3gJC4_5;@*7`HEx z%UpQx1PQd8zSCn;lgk|trLntOPI zjer~GvD{zZ&3*MYV5W0ly`M|wP5_b#q!2xlo43%%0SyYPdp=r)gvke2&4vqTvae)R~Ly$V`4Zc`i~fX%O2^_*&uH z2?O|myG8VRNrGeqT%)+HH&K94Z8>s@l|-E8xEz0oA zcP&&cFbz~Ny`W;1&Gy)mEm*VpTR_u>0sPk>e{=hmTUJ^gyL%tGd)K-?{Pu&3f4Nj} z_ZHo6mfUX^YX2SJCbrUpyNoBa;{pH|7sqzb^i3zLB@VOrE>ZUZS?xm53BVMRa)M?% z?1mw(AE;Uj0E3kMFD?C?$(UTu6wqPQqtP@Zr=C(xmrC`FnvB%X6NJzbX3ZfO*rOx2 zt62Y(&+!xFE;ptMz#DL=ebA`~0GNDwRzodtK9;p@^-Yzgli5DI)y=enAf=&~2Kk5Y z(E4Q2H(c@!FY#Ocy-ObEiY=URWoB*=2DeC#tV93JhJl&~H}BP5ognHf~K$E%bUNm5b~NmB7qW%_J46)O>Dn}&5alA!u*N>MQmOb6VLEWFe~n5uZv z>UR)esF(uYWGYq<&}Wsk2#v=j2`uXXEOYgTEPYgbo-lx6q4mKgK*g#3D*PU&yFw6% zq`k{HUp_RlNh^eNhPmfJ7jbn|c+T06eEnZLV7hqGLRci;*-;??)=seQj@4~;vRMPv zC6V9FIgg{hDyXdi$$(g~0qRnSb#cypYoHgp-2g~kYYJkShUWlwx@@!XWD{~MDM^{5 zuw~`^gL3|l<@{!O<7zqoyYj{r2H!!5_%B4H;RtATZ1eaFkO8J&)FiHfP&xl4px!TU z+y>ESYWM`Op8;Dr{{hqcxy6`A6i97=mR~b?2N*wv(J|e$za{ZE4w=7`X?lzrsbUvTWN@TB#!V4T@3&LQPb|Y{-uzYmoXi?}X2|Wd&r;1-0E(+Zxp}Qb-*EkPf zKfFA&GIV8paoiZMA%oY0MQ>lp+gB70l!OBX;ebUsxXRsXeXn)xOtD=owTnfsxPGGO XJzNxyl!PON+P|`w2z_8XOQin+A}&|M literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-311.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/base.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44b74f3a11ecad004303112e25239231749f4e74 GIT binary patch literal 6894 zcmbtYO>7&-6`m!z6e-anEn5GUv{4$zOk^sFWv8}dJBj1i4g$xB=_F-h&@1jpT8ZSc zvr9Q*slq69Pzs|>g90%S1Q2>~WzVW%Bk7OA)S;RQO-myqpS{p*L0C zFvU!zT%akz?731Y<&BDLs9Gs+&*6_QyYc7~0p+12D<~uzP=stSKxk;OEibtXg!i49 zN?4vrS`DE(poRdu|F?xXZ(}CgkLJ94k3%D)VseNmq>>$t$zBP$1$2s%*GkKj z8InOigi5pb00eSAcoaBjy7BncENW`WorfQ77hI`sXbiU2U5Qu1SJ=E~+X`}Nz9P?4 z>9B6FlwZ;}+hY}-D$?uGF3dB zanp0i9{A2`TA_pGy9*Rh!T2uPvv;ps(wJ1zj5Ocg*YhC<;)I|lLjUuKaL-XA8=gV; z)oSO}LI56KY1?F^Cip39Be zE?$gy`;CxVVArfjdsHD{r-94WiJz_FUP)?6H{?PA^sbep6|KS~eFZG8n2MS)RnN)G zvRaU53)GCgT`3uAk)CHvV`g|xV@268LnXOL&EA|^QjJ_2V`kKS?a8^CWhlGZ!_|!L zU1O<`8PaKC&Wt#7V_uAz-R^81cL>q{-CNVw06+TlimWe8zee>XLn}|8(~5H|*Dg)x zD_|AHTuD=?KAp*2oi+@8x?-pWeH!g>+Qqd#Jqw;GU0yLGIcovA>Y(3$rP~9O@NHd( z>K6GXp4^U0TXCropKQh_TSSPyBYfRAUY~e*d*Z^@#D%Xye~!9(kzW@PB=(L)=x#^_9QbcVO< zksI;&;uWrUr-B*6&fhG0G(3l zDxELVlA%kp;EHKUNxAH*-EF)`lhu()378|8^-8&{F~fCY)rd1WX!Zl19SR4b4AUQm zasOU`K>|crA5=PSi6pUp?AQlYb*vK^KFFtW&RbC3BCQxn9N3Phw&JPH!;SdSX8b5( zWXZyax_Plaq2e#|-gf5tR_1ymlWS&jcISlJ=v!>|E!Ja;JMqEwI2#0>dB<-+1K+vs z6Yzq%UxcSsKo>&wL^#B{y_B#gQ9b7UYtMoIrIDr9~ft@dD$%t?SG8 z2pa=J&d1UMexjP}&;m|94Fiv&P=Npu5{<0~zh~EcUqA6SH7cy6OBq(7lIoKHG37YN zDd}~&09lNNLFlHc)5~1Cdx9Cv7j*Adp&ZrG4k^8k^c)bdajYJ%TAz@oEFrzSBZ2AY zkQ__0F3FOCX|r^=C@*SkN-C-)e3WG)zmT%TWDbBi>)b51a=4tFfV9r_7O(Gk(N_#YS33^Q(dzNqG? z2DctnvjNZD#jluEP~ajilovJvEl1sZ=VhHy@hbXIcTSs?@7XzLUYI9y0S<}Zxu+3s zqCDB3rQluNLr+IAgd>c7O3}+Q9Alj0030an0Z$U-M!mPdJedj(-N0xH?h5iXsYJo8 zqutHTQ3l}>LhFLOtU@N|u!Ss3gfv|(GfWa01`ad9J937WvyTxeJNg}lQ7=Z#epi(_ z$lUF-$}ZIINp&5Wy$uz(<^;rsUsr!w{it@kwjSE)i8p&jTO<%Y^-XfHnS63PIkT0V zd5~!&Pd1Y$>#>tNi381qw4FG#l{mCvG!j$I#8f?gWRun7&(|Z*?}mE_zJnNgGz*nK zGO{PUDA_yT0ap;X3plv%U_lYwb1|Sq@V6m)&3g`ST=`7C>BLk}i&inp732R`AVZPB zCSCP)wY6p&YPVyF3QGd9-%tm<%5l$|)u$e7w7IfNwuJmJ%7YTSNi4zjCPXg@JGSDU z-AX>Y`NqRoBYD1=JpZNKOkS+VF7CvK?>upPwH`TWnY>WD%n)2Wzw#Z<@L0eV_h)!P z#cjZIcj+|34`5eTh1JkJSru!-YvlTo)iBI;oPn1^E!?Tx)vlWO7$9`{(_DevP6J<* za962`wU8e)Ad85OK|Tb6w8I5vHaE~S-0;E!GcX*PSC?tY$=O`E_)qZQcCDL)Jx=gA zma%Uc@POUEZ1JA3k;ZZ4dtC!?0&o*}=hAlgl656OjhhriwD?jzExktPIC}w! z1;Z4|Iv#FOcq~?AiaI+`9qPciGh)Cjy#f`$dW4KmZjZgVHTL4eP-EA7@Tg0#61(W5PPz5Vl z4HzDD*8A%UbsUa!4&-}RJRD| z>5llDgX`h1Bl|ucsP~?M-=^`!s`b{0oNY$V)+1-_(Np!_Q=7^k)jz2BmcCem2K*Y4 zmzt56>g^v_5(LH>V1F5sIPQiI;&lO@J90V5fO17msTA-%mdpLLA{XqDu3S#h^0}OK z=dn+*?8Um_VgtaBpDN7=gwrzI5w7r!F=KEUW2kxSplk-^*}T~+&qAn{^M=*QPpW1Q zzeBZq@W{cQ#EK`G7_+!3F${S@ zYXyeHNQ+=KL#8hZn@i2~g+}^PGkvN4&igIWEgtKvh9crni#XK*67FpU0^&GWlQ{lu z5SnZ6Ssf7)?%NR~O>q$0F(il~!4UM|vv2K#RtN`xaR2ua?q7TVe<%DeKOGQ{qhNLg zl0><@f;a`Id~vERZ!3U3;h6Zs=Cubi4}*U?@lbhiwt3`Si$Gszb+Ahu`|`;afk$W6 zmlQ`@1gblE{&)x+p!F48DzryDZ_fn9(=7tkMvy-qTUq`GH4IMn0zAyHu~G)#&jG`s zV~)f1SsJd-(){`?jXpc=NlYzh2VZ&hoCO&_jJ}J!WSPbphQ~4VTl$AkwSs~m?2ztT o{BMVh*Z2B;Lyp(|zgGBsK!COFU4MM_x9@(-$Nv2o3cld~0Ao3XZ~y=R literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/belarusian_phonemizer.cpython-311.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/belarusian_phonemizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf5feb650cd5a95f679d5b9fb0c78a33e9d0dc5e GIT binary patch literal 3439 zcma)8O>7&-6`ox#$>oX^Ws8<%35iygV2gG@N^ws`5H$Wt8X%AjNIql{bhY9RCEClM z%q)|L5Ktx%P}y)%qd;LD3RLL9jo}`8Dtzv#N3KD@!U6>h6g~9DKsf|H^}SjCENK_^ zaQDs3dvD&ndGDLI{7WjO5op`R+r?~*kbmQ(*I+K%$v-4Q9uk|_l0(XpAxSuv9l0Da zB4QkIlycOF!dNk4GWmqq(S2gaX!KEpkSFj4&4^Rgj^B}S#{O=rMnWbu{s?AI0tKLk z2D~w<9VPp$&|p~i5%~24-k?dOyE4_|g!{Kyd3(IK;{RAq_vGRJZ7d&6gje*&Z(eC9 zAQGo~`r)1?hJ9>+&`uy7=0i@jb@67${)+~(lCNAV72L;?Ket64i`99_s+cbQ#x>ol zS=*^rXqj?Q8^2<4dQ;3x4aypk_ZD&mG|_8ncX0A&82knxLWv>S#E@;th}g2B(5M|L z$_Jz<*~)&xh|xF#WBAyH3f#0ErwLnyH$fA{n62$cN3=A1Y@Zlopqrq{qHHJkl@X1U zSS#D9eZ?4uaT>J7VLWj_zLAVm>m)k?N4ox*IXF`N6F^VrPY<4MY^>{BR1Xf1ht(k} zm2JIPuN2%;wZipMMc)Lioa^#j-7PshhY`ySBA6R2-qbW5+DB}g|6Gi4Rzp*DE3mfA zU6vilP`*xGy;{@@I~KDFE@fPItLT%*mc{ zYggAdbHV;-^$w*q6SwC2@>aF#jBNU8QQu&7`kv6!-5m%)*iEYlj`X7`KUW6R+* zC(h{0TXdt)zw3aAD7<6vo)|#>AUz~*|E>Z`fj7AIMWlYay$qIv-xr}e~UwE^1Bw_$zP-Vj$5teuU5;&-CLjL3w7>R%Vwo& zQ=SK`SS-Ty2+O32ICa!!*}(dv)Zv1}y zxjOSqojJ_>@twbY@z2{Wb;eUaZ>yiTdd~@Hlc8ISp~f_w#DG9qmqZBt)D{i7@|dt` z5VH)DNg%#bvC6bDIpP^I&w`l8fi%gB^t%VY7K?mo&sXr+euC2)VA`!RKgOtAXO%)f ztFQ=N5$@N+v(T|QARMjfdf;WUA7pcjHs(gW4GZ7FrC64m zKLljlQMj0c!9%h~j-Y?FAw(a6Gf(=*TPN&Y(923}9=iy` z{X(9jQNUH;x;6-ZqnWRT`#@bPOrAq)xdpF=r?pn zxR0NDOt>wVFzJysmH>{gUFm-Yf~1dReiE)vt;(PXnZ0|%8|Owme2vZ$80Yvg23$;J z9yqP^VoO~N{0rar*8{QFG|N@H z?%+6SnqSr}rz?q@rd=(VCd2<5erl+=ivr;%xeL{}P^Ruq)n*6>KLVc=0W-D$Q-%p= zm;rVHr3w~*mc?PJTi6QJaZD3DI}KcM)?Wx9GW@Iyg6QD05CteVyPKs=RB;Dr5V#Vk2@sJOSR20jFVznMhU=m^jl+bz+MW1ol<(;8#B&_s4jw+ZwfSe-yr(U+wFOT(*H+H8lyhC#@3Qx@cQ4()1m$7cmD43{pD|O?B8hK=rMEma-KHZ)@D6ruC2_ql(|5%_&|C%_WQBJ itKNj(p3pr_Kf3H`=RM^@Te;BcJ)KiTnFZGcQ2qyqUoThy literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-311.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/espeak_wrapper.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d01db294328a7c5f471e4ad8d5fbc6f91b278a3a GIT binary patch literal 12262 zcmdTqTWlNGl{4fFIedwdNK?;PBiXS<*`gxHjvd=kVq22qIFX%LaTC=ELvuzFZHi=O zhPFjesM;C`tqN<^n}vmKfK`Dubv9nWKVA5vA6*1nwB21`MxDaM00s;!`q6$daJmTc zwddR+hY!)M`>`K89G((pdYzpK+NhJ1&~}{? z+C|r=K$*|sZ(cLo#E=lUY{NI{D;U8xA$WIN{TOCCAiq=W*r*~dx5h{PxawQdRJmn$ zO#>_WG%YXnRH{D#b8n@*_W3lJH=e0&QAz*RmOvlDdpGncQ0#L;HEd~8Naz3>ZLeUe zeF~lRqt8L1(1EZ%pU?|?==#6wL7>lpRGg#2J%7X@PK9j-GoFYm2J?DCR>HL5JTFRe zBAtR{%kh{jo}Evp#Dzpolpx1*Hk*no*_Z+q;uhZn9^@$eKOO`Cl1GDtQ491EjdRM= z`G%PaR2IizpOTP>fSA2HpNP-HU>x#%R!Jl!7v{s*Qn3Y*yvSIS=f8P(%r3rtY9yYOmGnY1l@>&KWP1ACh@!|Nh)*6-#B0h(#+r;gBFY&t zb~$=gie)mQ6v-?a&V(FQ(&?mx67Ge+yc@tWRb+bA`umZmwQyi9c;m?Gk##}!45-Y& zW2j2yBG}Eak{$-+VtoGa1$pr0vq!I(BptgT?70vDFi2qdn*4@p*%tlhWA?E_k^CyAV_2^W{~R zkorbDpo`YykvgHE1ODfX`i z30SO~M^=ukEoz>A-P5l!{Zb1wZch96q2%dlpQkn~EdULzt6q-?wyNqhDHU!58dv%#uBC-++RxSH6LPX-cWVc*!ECz$ngY>%6_5G3ymD;6Qy*zAmwq zRNk&sOajQdgrg-_U9Q#3UABI6JU}fm3a&=^$}^X&UM>+M1PcU5{aBXRyiJ~~t-Il= zV5oh53NKXyfTo}YGCjGZuG+V!Zf_YTq`md_lp9`em*pHj+&#E9C>z}daq;2 znRjfU^lZ8E4aN{nt#5;GD}Ee3lw zoe@(8bN1BP3Bz%2di>1W(*~0O?JVttnHroV#)M=dB^qoznTEagS}lGJY@c z*{QpyH0A}Jc|m1f_=78;ySj>;=b^uA^*aD`KVNb(ZpW7tfUgjgsBLUpiKE<}hb`@E zqaPpA13Ne3TFa2$GE}e^{ecqYaCH>DzSUQ6POVI7-d^3?t9pBj{?^rNC5ra-7hBst z=5Dv#YSCJGy_Hv6c`Un%WjvNSZ~Jcfe(3+$zveFnLZvS3k92B*FERC%^=g)OQ?2a< zrWou1M6T@w6vm6*)|;nSPOmeX_Zi*$j7shz=UU}%wyd;hT&K=;s$6H$<6S+3O`d5o za40*7WxZH-^5z>WZ`?SudZus&(A;#aIBsyOT!Aa?sT*d~5jG4MU%&qP>xQ|RuX`zP z>+)$CU4VZJA3Hc{vwtys+y#$c*_cU(^H(k!U>r)VtD=w}cLFbb1iP_Fq3;HM8$Dwe znHg4e%s2%b_!eAMA=eA2V4t-MjOd=V6{uNSV6XdTJm8Ii4_9Ar#s|61Ex9eC?~XT7=!7B1%wXp!RVPFv~h{sW^F?Ib*2Gt zh}5?U0l?iMb_&5QxVs9}HF~BS)Jdn|m^g3;fA_XWed;*JH5vA0{b$OU6=j+1Q-O z56X%ZuI6Le`B*X`RBOb+2=~>dcQVom| zDJCuQN}30Rk;;@Brx)Q*&GJ*=w(R9)QQ>1*@Cm?WL82^LGr{gNk~k|$HH0F>NrORU zp^ot5;%qFNRAlTGk=6`9Q}u@AW^*_(ACqEn(5$5KMW8U~Rs|>#llhvq^?lU1SP{YO zJBpR%!EjC2mqjsSX7hs=!Dne$3n0L>lm!A{J#%4e#~Pc3LGcxDh`4k0gTTh+-X^Za z7BWfkaCOzj#>Pwz0uL(!9#({SSj6>-RB+;)6APKNr0^zhT7<{e)mr>Z^-L zOHfysCf4t7A@L9NZ7NS)vb5N;zt*m8blbiME^)PPo{Cy_ZysFdazMr=*wIxJ80(!~ zqHC?TO%FNo?PBVW11u0R zIV&B76_6$n97Av%fZ>Wp-cj`VWnzU$C*eKkH(4}NL6(6``8xo<2It@7 z*F8O|XUD_#9<_H)YoFKK=T+}~v7<-t*j->&y zp6$hs-omSY;CwfntIh{p=O)*=-g(da>u%VpHEvqxrd4jb=xN8AJp+aD)z=H~E7>V; zA7q(+Dms3Z3)6(s#+Bf~0}0MJ*yiOM6+KT9g`Tu9u~h@5yoo*l;xD zBJ#pO6iq_UTf(4XF7S5haw>f_RoUGRgUL=x!mKoowQz-qw@#uN(iwOK1h`;im1<)k zO_%K>s42e<0G16{r+ZZO;7u5G^xjFW_lVYhRBu13dXFMgF<==M4;=laXY;^m^}y-h z_l#&gqk7M%+B5q5H{;ryqW-2xDgjTzO=vIm1N!C>zW;=h`Upv*a+7+6I+8z`%s@(9 zRB|CH>zA{BApt9@D{CQZUHu%%K#w4<7rTU1PRhf+=e52Adfx%^1}+QS4}HP4cQoI2-3J~8Ln`ksS02^+ zp4a=Huc`b8)OGJ^?eFXD@2lSTO>rcvR0ftFLVb$yUf4ElQ-p+{Q4;c|$wnB`kdXX2 zPkBlh&|QZN8O?muI~|2&q?^-6Z`j zoy1oSPFItA~{c>o+EGHs&g@#)x7@J=%3(RVwD;J1!^Hrkt@##+}(yF zD~Li}TXHCtdj@emE5;|dl|ZE}1yPrJiZ;fzAd+Q8@v5yLqStT;_cZadC8tu2@lrKM zgy3F6WDNMVdgul*G&b*i8mu{p;VrSNi+Q$r%uVp+ZS}JPF>J{$0Q;Xvr@)O-_(Te# z@jOZu`Y)#4Ey^{HM7@89mJzAk#J|I`=`telW#VQJ;LS8gOi(4jQ$+VtIQtCIWG)G_Pl%{Y|zoLAI#Q@+hH2CqtHY`wIH2EfH5<3pV^J6K?Gp{hC>k1 z`No)?l*kx1Ni^(mN@F-72$+n;MTpxGW8+)c;vH;Zhmo0KJg&kb5+1M^?#z7D#3nQe z;k8EEsTcNbfOw>`H!>!-BVU3%*-mE4b-WrF$L8mu;a80cQ#uY-*fcuo&I_w$qYPigxn z_5G8Zfk`zmSqygGK7H%-hD!?$>A@kD+>e@OZt1ItH>`TY zk2vp5&x&VlT;sZQu1n>*9tE})xQD^M4MyjmySMZHv>H6F1&`~&<2c_v1Qf;}dIPI( ztPlNExU>Jqmws}|Jhf82qs6Xg{;B65dp6E%U3>Mey$`zfZFcR`y7uc``wOrAIY@Q) zeiGVn-#LEIt#uvIyN(oId*ltSyB~B8Zo;TKhxE=N%{x?h72FYj#{+-groV5)zVWW+ zAJP3IDmP+o>Y$md3t0=_AMXQz&cy~iw_adRLn!X9tzN5v=CMyrg8v~x3wX@q!`{GX zGNK1bHY339sKq9^wld>Xvs9=mqd^Tmm_P8qzjM>Sb7T6>i}#;Z{W~@PE4u#`m3w8Y zX<}Ik)4*t=3DgtqW&aOyh|V>^UIMsPOwkbygTv^ zSR)oa;o6?LPAsIX$ciC89>hFAokASVdE54C)?E@vBO93N`4JbOKJA$yWo@(zjuP(t7~@ zh%}cl#4KF^5atYjbbMm6;*vrP!BmzXV2v08{H{c*SDRs{sR3CWfDDHp5b97&Ey%>v zNjTlcf+*3fW)e+VV#eZB;4-;7V9~)+-?C-UH!1ra3`PDqfMx2>EtD662m?h>9H)w{ z-Gw|H1Hf4T3x^2>7BtCE{r7_R$JO95EqF{19xG9t3l0eY!1+L+`&A?tQ5YYn>DD(%{K3rKWEJD=ccJ5a0hxw25YfQ11-)Q^f-B?2;{S zYi>ab4L11GNZHd!mB7|Wko^gAQ)hRA?FBQe%6q*fhIVt@hNq{36J&usHrTjCs|f_sXW6!VYTZ3VAJfI31%nVjn6-)7a#WiKt1@;amPp1f zf^{mQQ96sZBe53Epb$+01`tBC$CGkR#Ln=-=SWuDT%f!`)@J?>3|z(|SWq2c1$C@l z)BHRfbgKSCKRtQpw6^n*zVndE9U^mw5K})?#aPvOcuty;yB-Xoq9Th%4RDlI@uVsqAOSPc4+I0p*9;j25g0NYcMNSxK+;6ovZ+A52t zJ&RT>eh`FT21)1e#S39&IH*|=mHD(l;s(TykT6s(;aW*D0^~o#LqfXcs@E{bR}S;W z#bgp9!nAY>atTFWF@wEFurUJcOx^(L{dyT_XZ}jI4wfrX2=-Bdf#uW1_WtG5k0Sfj zV>7?((T~k&$1do{E~trx66Ire5U>Gf#Fcc@)OO0zQnJx350)d#e`$x;>+hHuj=xys zyZ(O3#;~(A1Zx0d%b=Cz<=7|`Y=J?_(T>PFvA0fa(|-Menc>*SK3AduY@8#Hd*p&ZLG=fgb(N?nA*cVF_0BqHE z{oB-We?xze>zs$RPxJHXtMEyMhzAki zt_iWlm#4$@WVp}V@-Jcw{IJJxD2o|z98K0@FsCg8hm@sj@ zsZ?dbqyYv|Bq9hd%dTz+#eKsaH6?sZx(T&G04VtfuqTx4G))(&mSyrQQm$q4D^hK0 z+d4MHIvXlcELPi&)dtnjpcWd^Lqk~YQK%a;0hAp5bZdzMuzrF(HWc!>KmNoOh{)!DibQ_uMTOfGp4H-*-4m%1pX8%RMU{(k}bOSL!v literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-311.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/gruut_wrapper.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f65242a731590af1dadc4a07d7e5036693ace164 GIT binary patch literal 7261 zcmb6-TWlLiay{gb8d9Q2S)xVRx>HNKSfZ_^BuyB(E~!h{u?-9=#MDj za*xx~`g{x@zi>O+DLK-hJmey@y!qysBSYo$%l%SuWrW^{(O)MCbjY!8SylQte-YMK1Dq7>|YI=-CGXZDD5Jq5EB zl?jS?6oqJ8t(1ag(L%$2n9sB?V*!X;W&;g~idImh+oDaRfW+T4Ef7$ffM%3?>0~~q z(R5DJ6-lP~6m1AbZPPgz?Mt{SWwh2cFp0r%iuQ)NU6C8e$)+9ND0l0mSMEvKLPj~? zwD`=-3{9!|EL~W*nb37DQPk5JEuky-^n{~KFn3}FeN%KiGMz2tRo#}=^+jaWB%#yx z@c5XLd#k98#b1NhoMn9EeT7Q&PClIkE9de$m2$&WNv`H+M<%GI6u|fNygEH%j5L{z zEgNZrzW7Un-Z5T$ZoK$ALjzjl#ixeOx`D%spBT4|yzy_J%#N`&EL^n&XWu0KPI!PP zh6+qkuQC9B%snEyn~e5`=yJ;lAUB%986Ly?lxt6R95Hxr^wK`iID&Ar7nvzMWSQ@BZ2to)V2r#8u z0Z~k5Bux`V4Z|qze@ZvM{=>Px}>d6URJc*dcH7uDW6TPe|&v1S=97=R?I;x z(I&xVCc$MUnafO~%S<-yuT8eOOro%^hGCXcH`^cyN02!5Y3BiawMqUbLIU0QJ05hr z6k7v=@f&XP}qqWRvWM4{zUPxE5zvJPK(ATwk>O zs-hRwT#^S0r{Sb*_7O1ok35ga3fbUWU1`mA9&i}4)^rTA=DNjRC%Q~-{c^)+_~7|Z z8-ByT=FX_j8oWAe@UmyXSpg}#S%H8`WhYlmq&Tk`V1i>?X z#s35sxRlS`Q6Q$KbA_T#(a$i&I;p88aWOY-23kn5kP27Qca$7mg7mONm)9{^DiAEw zxfQBN$yJ99b@K+jo7PtW-wCQq6I4MCSZcUPnj5n9RY|9q@$|G_gfc>+8t~32RLd9J z3W)1EmGg?08-w7TN-0U5)_?Z!IRpwMNGddq0?O$-X}Kt6GV90_rPl0ax-+BbdLD`s zdi}E2B6}l$Y3JK3osm@~lgZQ7{9P)k3SG|^fm)WJ*_i+?cano%^UZa@N&4F%H2QZqHSS6UbxnN<#`v~X;pVKFF*8bU!?bg%^(t2ESX zSZ++vd;tqwDHEsX=^w0u%vh;lg$8OK`}d!ZH4Oq4mws1Ka;{yF6E-xBl7u*X2WD^9 z22hr`mTGuXf?k0d#L1>mL$QKM*~gLj$Cx;lpbM*Mt+7U`DnBfyp2^kGag302z+yf~<7Zmo%jBUJvr9oM zpsP|&%Rm)w^<4Yt1$Q2RZYOjbvMG~TJ=ftayo%CNk8TCq9` zt0Kx~3C_wa9u=peh*npt4lMz2u|u;wg;fnN0w`T_Y;7mg@E#48q6aGWU7*$G!4W`- z^<$(ry4~?#v2Xn}{DgmY`tPT|>@%-RmDqAMwrs|h>*Pq_ECbu$t;J}a420klz(c;) z+yD7=oy2<1?)HDXPJGeRyRo6N=ck=7W5;)5$Df>j`ffFL#*Ce*uwUIz2I$Ya|El{5 z|1?nk<7(`b89P;Bzgl$Q^ILTi0Z!jJ_VB~q@rkFQItfH)8Tg{BHZ*DuP1cEL-~;Yg zClcm~>F1g1iSL;wzW2okyJM$bj!o~3P5;Nte0A)GIdn_Te zg0j~XNY+$NCs`U&*YPy#%HZA~w5H*PO52k#-sYUA2$6xKGe}KvR7kk_1wu2A(Z<$p z|B3>E%rSwhFKusy-g^K}X~0(M0Yf^-tYHL&GetLv^!@k>MC5AUDYNg?9@VyHB7A46 zJpwPFT7XV#`dCwCSaw0}qCxIPfxqEa?ZHY$g-!1 zCoXU4En9MSZ$n}0UAc+(Ex7HZJ5o9$EkgyVU^$Y)ixK7$bKoEW`+6ahUbc8VTUvn> z6pbQW)+F(|FqzR>UM`W}Ky0IW`SGH>8t$DUB*w4Q`g zm)Rdf2GhfPEbgiSBu0Q|4=oP1h1BZ&)jctv0C^I?d2)F2;SbDcq8gntqf?MUYlFkJ zzJYou=!296VE0h8?sGm^&t4EC{zG*S=j(?o=Ij5B7e2TC$nNmJ?K@N_0LquxV z@+SM*3syPK_a28_TI5E5;7*IZ5x;L3hZ-P4{5^G##|O0m z0L1Ls`a!dU^)>;sJkuQ-Ju9ViqFC~=dn?#z804>lU*MD7p?u9Pm13_uax9bv+@Xm* z$B}rydIgwReto?F=O^3fc2&5-?$^?A$HgPY=o_x%mf@~k#msJf`ykBbH7;{ZbsjMT zS?RXIszo&c-?qd2K8|pT4pxS+^%Pe=S_F_r{ER3wU8j(S8ZA z?&UbHMj{pGS0nwE_TMh)+GKwF`>#gMR@#41%td35KK$&%@@QGF_Dz_5 z6BW`^_m?;h%ziN3p8E|O`EBcX$xHaI?b42SpyC~<388J@!@Ct>s45JZ!qDc$8Xwra zvOWAVIJ6TSss@M6;Bb{6G5L`SKhhZci})w;`zIcpsPczR{&0mq+~|LByvj#SK3d_U z4(72>j@=)BF#f0WTl1UqPG{niL^XKC3?8ZSM@{}{g+ID$FXPFXig2#Pi(twzq7k#p7Hx#y$R;B=LrG5MJaKjV->0Wnn-rc7aKAL+Sj a^HbH}OqHKC`Pqu|s}B(V2&jT>i~j>S3Mg~{ literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-311.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/ja_jp_phonemizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..51a505d5b623f20b0f436cb8e5d1d4f9c5107d69 GIT binary patch literal 3522 zcma(UOKcm*b#}SSpGc9Go_e-5Tm4M<3dv#~!IbK!gPf2;ikXm4SLFeCnIwPb3wj zBkr3w@6F76^L{h>LpU5n&{nd!tma3k&Iau!w~(VB1M(2*NEb|$7t(^jaM2X=p0tPK z9#hK8X&G=S?bT)7YY1td?%PCZzldH%x_=Yt0W3fAAhZi#v!w%ia0$hS{>6rI!IkEX zj1B#noL1Bdm}1q&cWl+R)Z(gDzdgYfTCKF~o6%rYNNozHBK*l;M0(gk*7MXQB$LDqEj{w{4 zjONjs&)yUfx+~lj9kc`3U~>dVd<2}`4)-qFI_DrbxUd-axFL1hjzxuU~Ox z!^U~)`UozXS_T7OVm5FE!xeIrsj4W-(>GGrDJC?vuF=)hJf>^5RZLyB^4az4ucb02 zYFl}=VCk5qmY0`OwoOwd+c0U0ZIx=;lG%=?a+;bewyeu2$>O>z+e9l+5&>C5@X_xA zsGxsD&wUn+eHxAJMW^=>m59i>HGNPBcKEn!*rWW_4V3c9ozNPDqQmp4(b>C=8a zs{3$2_rn*!fvi^#Zp!JPeu`5WMbbk+8G_XT9LkD%cvI@zBg|Kd`tYWd9tJ!Dvm$^; ztLTo9J_Rlsaia@!>cTbkSj7DvBBSxqgC}5HWkH)+C@Ni(DkW+ZR+Mb1kg*M`Koz5) zyaf}J;DjW1LXtTl*^cmJcj;TfAox~UzB+&P1%+C93?Z-FHtbbpVNO{qShr2AuV96) zS|w9gGOIYVrr4_nh{zXBNREQ7F_Fz(uH4ZW^@Zk$?K?X=t7M6lS5DBTs|4T8A~wkE zJvG;@y-#<(^3uS}>G&7S?VfcyR^NDB-}t!x(|h%e-G6?(Rp0oazVX-k#)tKdpa1*M zAH@<1#YK&1wnbiwIkE1J&+CrmWSmvUaEPNh1?UZD4Zb{#s8a%5b6$SFdF-chPMgUU zmoqGH+Kr_=i|eB_fL{xc^v4ptH$F}-0S?M_Qkf<9pM>73#_AzNj<^^X-B6JQLWw(u zy1_M!A=`jNA+cpzDq`aLRn;gMwyL^*Q!A{Lv=tneU5R2dOHPA|+$PU@C>H2dd0rWU>~SNkzlPW6e2xq2{q?&0jO zW_MzHqv!WW&(}id4>k98@UhF7s=oTiTSY zMHDE63v=C?vb1JE@gsPqXi*ywD}yPof7HfB3bhdH%B^*%QdqG7knV1TCOcIkE)aJ1 z0b7zYKW%*+AV)7DK>aoCslCqht81%gZw>?}A<%t1ZTAc4N z`Rrz{po7RW)!YBC;N_+rxW$hN3>j3_-q8$GyJh0?a|2S3vtK+edbDbD(GV1qK!im`OL%^!nt}k9*;uX>j zQFcvc^4MOrbfUoCu7}!$M<8MKk+3Kr2?kP(E8VgzlU!nGxK(^J*fdoYG&~1XZszYb zYw!qL#lR`XxB&KmYTN#6h(of!ffzW8qT`hx98O(&qO&(tTKE#^Mc$Ey*nV++?t$Uv z%_hMs$s-K}V4L#pkInAM5+C)0kSt$pAOPDL-u>+|@A|=TOWw@R7}IQ~3y#U2X?O*B ze0!8J$NK@W#C=OS(Cqd!sJd-fRb|y z{&gVi-Xz~$TqB0h+D# r{tlzxsDuxrrytO&QwcS^9wFL59S}+gV5ULv;N8y|^*<-DxZVB-Z{dM1 literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-311.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/ko_kr_phonemizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ceb7205d99659f5dc13ade7f9090f10175f46b14 GIT binary patch literal 4597 zcmd^CZ)_CD72n<4JKvqpoUs8L1I^lj;KR-xP1FyCVwwUil*W!~Bb6i7a@_9Ohwoo^ z_X3=@$w77(+c?2NLRb}~CIteR5EDr1Hl)qRe!N2`#agKnBY&zek|X+|;#1$u{XZLl z)Q{b}ee-7Co0&K7{pQW=rwt8G3ZB8x(a_hx&Erb(u@;yoU(pnGff6Wzj!;qBPt%xZ zB23icw-DYEVWU>R6?oQfW2j$Hf>omgn`pgep{Qwi^N+tyvbc=s~4S9u2NG4Q>}Xq?3#u*|Ik#SXG5{2vV4)AM^_YkJY!^cQC?ZT$jDQT zl|7353-w%C)UGT=N4163HHd?wpS&LDzi>AXZL+(=K?TZ|;kYCQV!R?AS9m4PCx+rN zF)GR+TJf`hEWSnR;gl#P+o9)t`lPq}&~H0EJKN{e-@H4Y{%}5hc0N5mpZ;JzJu#pD zC|GHhqfmKr6PVM08IkhS0_A4}+HVmUKPy@VONbevLbSkY4!=#T!$_(2gWoPR33kyT zIN)`{TQ537cEP1t{Z65I8OaKe)Wb;iqASD*ZjG%OuYruf2#p%+Uje)uv{nM&I6@t# z{VV$^&nmNVe;>d9*L=n5|G-kz;vXkw2L}4N;W$4m6*2jV?iYxkI7s( z#vKMlucFA_q!NzEUhH3Q-q+r|k8#JtPA3P?K;Q1ZZQL7z!0ji|;FLHQ912K*paO$; zA}x)ylSn&-wD*ywAx%e`fwa>|8$sGA(#DW>25I9+`v7SlBJC{FCXn_K(#|3MGSWXm z`k#<~1?g9jehulLBK+@;m_^24knvY!e2t9z$oK{s ze?!LK(djfgeG;8M<;gF1&+$Ms5fQiLYuEujJw04VibuI+oL2T0_%)h{OA1#T9OA^{ zIAwiq#}ds>&ogw(LFw#RxO9(ubZd6u2RcKlI&E41e6PpN*yJKGp?$%%G{!> z4{&_gL%L*oJ&fr};Ov(SkOXKthea{L<0jc-F|BekAxfs5=fknE!tvdr@`UI^aa>8FQ$2V_x_eJ29)kZ-pr4=eG6 zZ&y4TI&om1FPM~-c$AOD1yS}vChSe`pV?Sz&kagi0~%5@CQU$0HWrAAsntu&!_D|;mhC{&)I)cB#@$Z(nUq;K z1dbMy5tGFpFde1DWI9R*jA@fZB`L*%aKe=y%o97008@<|K!OF30LYjwq2u?g%A{>7 zBQV1RfzoP~c?PD~>dQ$#rLy1yORtBP*27#!_Hj8&Jkv=epRNPo7y+i z^hw`T-(2(h3q9w0raH3Cn{v&YGOkT??v*1B;@{pN24?ZWJn`68U@lN9HC2>Um3SYi zmg;>3>!~R zZ5jJEvX&qylpY*bvJ|{4F-Qd_jDc$Dl`zOs?gFYh^A{RV9;!8XZH7&cPis-&i@RM; zZCqmKGFo1kL&ggRG&i$cU+OTBSO7V{> zr*0lm$kebKaQvjTKs=T*-|K0@ITa@W$f-`AkH&>$1oJMQ|6MW=DM;#gUWf;Io`A?~ z;CJu&IS*F6b)a`wKS_F~OI9Fp2cx1g6c?lxn8&opibMb@y^QC^aFFo6kvg%IJra*c zB%BOkynLPxijfG<1B&q%3R(Y8$*z>JCkery1M&{=3?$IvZ;@%V+7~HIHd2i(>Aep( z{%}@Uq-d+3eh#%!wx&gfwsv5>j_)l{)!xoG*jQ`RA_ZhpCdKu9@$D{B%lzgLDQk&~ zwr_iL;}8KNVLt3%Us5ne8J@N^vlbrmJ*O8dyl}E zq=XZsuk?C+p}y(j_t^>9R=B0&zNVl zYrHGVuF0`$GVGc{`}meDyE@0N&akVCny-w#GTJ%bd9qLIOZOEUy<^_2vn}Ur%d+co m?79rQt|;jm>zWX=&hI3bCh#Ds=SlAf2j4eV|-9srQ&i{)Bb3712tOkowd&pXd`${m!htv7J;M&wf2W zzVn^$e7-+s{?^ymgP>$H4>Cj0`WNdoi`;0u{S`Dek&bjBhw?&7$crgaV0|$s<>i!| zS5k_AM05k`(i+m`=ORKc;qyu=pevuESm58R8xvi5#zEF zNJ$1t$p*>@x=0jV(&db-D{D$BK)SpUqQFRCO@0n5UpB-blnTOp*Ba^=?bbtj&zg|x z(Is*uBkD)iyw&7Ipn z6SkH#3l~eo)GR{r{shIDqUo`has=nJLbjaMvV`{ZU?^A3ES=&l)=QVERm`i5NwtbC zkl~*(pkw=-jQbkrZrKaxwNl9_WU-OKE5$N?tQ9P56>)kA1QJk@V4Y+%P@Tr{Y=IAe zc6p|G##e%gUi>o3l}HzL5@xl@x3Kdz=nklij!d4vv~1v^Di=vG9`K( z&op*tN-cf~LOc)_Zq3}9!WpBWV-1vEVbdjz(gq%Q`LPBtu~Bw}eO)Nm**$g;FRrB< zFo{(*3(2(azu+AK_$2t=hQXYhs48qh=U{1fn;G=^&Cfu}s<xmY*Lspm10FhZ0zlCW0v-BJ^ehHsfo(f;j z!D^&7V;fb`Rx^y6O#Hu!9{4=%GGH~sgBKlv+sjE8RZiaaogU0V3#TniiNzh6`!BdJ zu8d+0?4=h8pI_1*5inlEz2dPhS1~M-rzTU#m6yp1b$jk=xiYzKnnly>X2W5lV6-L% z>JA<>WlVNuN^%)*iLmSnk0?70bJKUfPu!uzq=^-cE+u9NUABs)#I<5RvvOxP(TJu3 z_%KaC&?YR4Cd!tPqX~2Lo*luXd4vv|}KwKI=zc3^@dQ5q^QN7bu!kst6cgTDXIJf|Q2v=s8 zUnSCc3O0HYIB5_FXb$^^ynS7vYWBiln)!XRur}Yn01e1v`&lVuwOCmf+avxd*!Y1z zUzPSV(N;@_YHk9#3i4CCMZtSVKPt0CnVZBb=*h$=|C1#z( ztT$E*&O5<*J2>CqyJYuYdZGWr_`C7l^7qTofm#b*c7m7fX62TI{CWa5;=u~;#88Zd z*uBPbjjFmms+uqA3NKux@+qGg)rqgk??2?xZLFuPz3aU>z|MLB~gJdHBu6U)E8Vau;$n zqd&;r|F+&b+SVRKfx)^cE72du;qFzU+ZXuXKKW4sje*)Z;BHq(1k@jKIlvmN4h!ONaar_D(a#-fmBfz%~I5O&cC`>^^3{mwqD)&pk+ RIMYG$BOCkAArt&;{{@^bkI4W4 literal 0 HcmV?d00001 diff --git a/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-311.pyc b/TTS/tts/utils/text/phonemizers/__pycache__/zh_cn_phonemizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..847defc78c15c1197eabd5648abbb63bedd2ae50 GIT binary patch literal 3091 zcmb7GU2GIp6uvXNvpZY5-9nKT{2K~tm!Dn!6|#>3js^t%><#W~Y z`_=OKYWX80&=*j-Hdy=;I`4oZ7^#AZRM8aFkSVHRESaH)(5gcXaUjpf? zwWSEDGH?@S6vs>%o*0g0B4*p9v`nhaTs4W*RUjL~?HSQ*pA0XPi1W3g8J`TR9nepJ zR0s4sr_dQeT@B&s^gGo^rGc*u@PEVnrOp16Lms8vs^zzPmF@eL5!1WzvG>!g zx5rbw8*h6zKIA=+hH=jOu7Bd$-fm#>1rB5H^dn^j(|9gs3^I};W*$FM@PPSG+U*4P*auXAw7%=A3AUG5}5wEI#iyWOx4 z8uf$&__8Ek1<#@ia(2$uG+(xKd#tFB;iTk;DYi0%*-6@g_>u7m9l+?vG|jMd zN;Qo#M;JiL_fMpcQGi?etWLA(0ZhkTr;zS<@|m;84yTPGb)CFsJ0_-S$bvLvL7Hbl znq@({0eYG)Wi`XD6&sdGslr*(0cuL?mnUFq$?C^onr;E|PZ|9x$KQ>fi{6oYZpl5< zYcD?j?a7}{Rpg$V^6}g9@ygPF5u{N|MPhMgXS5gr0)?5;0#Vp2L^|QsaHiFfwgyy6kFM~cG1&)u&>kRVG?(bQlh|}Cv1w-Da`I+k&+WvX zio7Rq00iod1(pY&-Np2n~`La7u~(;D%KCSVvC=xL5{`F37NgL!kC*OQ}Um zn^ze60I98@gEHg;bUM2$>$g-ATPyO`z|Q8X@L=SYq#dapsh!-YD*}%pWs)@jL{CGu z!p&6bSz$|UCe8H2=Le72x~Yl(s(3hv14C@H!uA~1^wWCI(nl>^dVGb}=ILF{s=KCK zt{U*L9{9;MtX#3&f^yZSX?e#iTCCr$X|ES`t2PtWG}AFOjU-@=A8)zZcs=pkDJ#=P z9=ln`Bn%pU2rg6(I?@Y6!VaHoX5<+*8)jD@NwRLdRss!dS(*lxtpP6g_643_grz(u z3@LO21RiSB(r-cRkmLo#h=Mx1%ZKJS@0`U8NRU+FAvA&_YZgR7>Sc1h_d+n7JQ*;8 zDN$OpfPhRJ{B=2a&Gz$AgVa|td0CL2W&730l8$XNYZ-T2gVa|sZbXo}r@Po>_i_SL zYo|@d `d|ʒ|ø|4| |ʂ|ʏ|4| |,| |i|ɑ|ŋ|4|b|œ|n|3| |d|ʒ|o|ŋ|1|w|œ|n|2| |。` + + TODO: someone with Bangla knowledge should check this implementation + """ + + language = "bn" + + def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs): # pylint: disable=unused-argument + super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs) + + @staticmethod + def name(): + return "bn_phonemizer" + + @staticmethod + def phonemize_bn(text: str, separator: str = "|") -> str: # pylint: disable=unused-argument + ph = bangla_text_to_phonemes(text) + return ph + + def _phonemize(self, text, separator): + return self.phonemize_bn(text, separator) + + @staticmethod + def supported_languages() -> Dict: + return {"bn": "Bangla"} + + def version(self) -> str: + return "0.0.1" + + def is_available(self) -> bool: + return True + + +if __name__ == "__main__": + txt = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন বলে." + e = BN_Phonemizer() + print(e.supported_languages()) + print(e.version()) + print(e.language) + print(e.name()) + print(e.is_available()) + print("`" + e.phonemize(txt) + "`") diff --git a/TTS/tts/utils/text/phonemizers/base.py b/TTS/tts/utils/text/phonemizers/base.py new file mode 100644 index 0000000..4fc7987 --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/base.py @@ -0,0 +1,140 @@ +import abc +from typing import List, Tuple + +from TTS.tts.utils.text.punctuation import Punctuation + + +class BasePhonemizer(abc.ABC): + """Base phonemizer class + + Phonemization follows the following steps: + 1. Preprocessing: + - remove empty lines + - remove punctuation + - keep track of punctuation marks + + 2. Phonemization: + - convert text to phonemes + + 3. Postprocessing: + - join phonemes + - restore punctuation marks + + Args: + language (str): + Language used by the phonemizer. + + punctuations (List[str]): + List of punctuation marks to be preserved. + + keep_puncs (bool): + Whether to preserve punctuation marks or not. + """ + + def __init__(self, language, punctuations=Punctuation.default_puncs(), keep_puncs=False): + # ensure the backend is installed on the system + if not self.is_available(): + raise RuntimeError("{} not installed on your system".format(self.name())) # pragma: nocover + + # ensure the backend support the requested language + self._language = self._init_language(language) + + # setup punctuation processing + self._keep_puncs = keep_puncs + self._punctuator = Punctuation(punctuations) + + def _init_language(self, language): + """Language initialization + + This method may be overloaded in child classes (see Segments backend) + + """ + if not self.is_supported_language(language): + raise RuntimeError(f'language "{language}" is not supported by the ' f"{self.name()} backend") + return language + + @property + def language(self): + """The language code configured to be used for phonemization""" + return self._language + + @staticmethod + @abc.abstractmethod + def name(): + """The name of the backend""" + ... + + @classmethod + @abc.abstractmethod + def is_available(cls): + """Returns True if the backend is installed, False otherwise""" + ... + + @classmethod + @abc.abstractmethod + def version(cls): + """Return the backend version as a tuple (major, minor, patch)""" + ... + + @staticmethod + @abc.abstractmethod + def supported_languages(): + """Return a dict of language codes -> name supported by the backend""" + ... + + def is_supported_language(self, language): + """Returns True if `language` is supported by the backend""" + return language in self.supported_languages() + + @abc.abstractmethod + def _phonemize(self, text, separator): + """The main phonemization method""" + + def _phonemize_preprocess(self, text) -> Tuple[List[str], List]: + """Preprocess the text before phonemization + + 1. remove spaces + 2. remove punctuation + + Override this if you need a different behaviour + """ + text = text.strip() + if self._keep_puncs: + # a tuple (text, punctuation marks) + return self._punctuator.strip_to_restore(text) + return [self._punctuator.strip(text)], [] + + def _phonemize_postprocess(self, phonemized, punctuations) -> str: + """Postprocess the raw phonemized output + + Override this if you need a different behaviour + """ + if self._keep_puncs: + return self._punctuator.restore(phonemized, punctuations)[0] + return phonemized[0] + + def phonemize(self, text: str, separator="|", language: str = None) -> str: # pylint: disable=unused-argument + """Returns the `text` phonemized for the given language + + Args: + text (str): + Text to be phonemized. + + separator (str): + string separator used between phonemes. Default to '_'. + + Returns: + (str): Phonemized text + """ + text, punctuations = self._phonemize_preprocess(text) + phonemized = [] + for t in text: + p = self._phonemize(t, separator) + phonemized.append(p) + phonemized = self._phonemize_postprocess(phonemized, punctuations) + return phonemized + + def print_logs(self, level: int = 0): + indent = "\t" * level + print(f"{indent}| > phoneme language: {self.language}") + print(f"{indent}| > phoneme backend: {self.name()}") diff --git a/TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py b/TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py new file mode 100644 index 0000000..e5fcab6 --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py @@ -0,0 +1,55 @@ +from typing import Dict + +from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes +from TTS.tts.utils.text.phonemizers.base import BasePhonemizer + +_DEF_BE_PUNCS = ",!." # TODO + + +class BEL_Phonemizer(BasePhonemizer): + """🐸TTS be phonemizer using functions in `TTS.tts.utils.text.belarusian.phonemizer` + + Args: + punctuations (str): + Set of characters to be treated as punctuation. Defaults to `_DEF_BE_PUNCS`. + + keep_puncs (bool): + If True, keep the punctuations after phonemization. Defaults to False. + """ + + language = "be" + + def __init__(self, punctuations=_DEF_BE_PUNCS, keep_puncs=True, **kwargs): # pylint: disable=unused-argument + super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs) + + @staticmethod + def name(): + return "be_phonemizer" + + @staticmethod + def phonemize_be(text: str, separator: str = "|") -> str: # pylint: disable=unused-argument + return belarusian_text_to_phonemes(text) + + def _phonemize(self, text, separator): + return self.phonemize_be(text, separator) + + @staticmethod + def supported_languages() -> Dict: + return {"be": "Belarusian"} + + def version(self) -> str: + return "0.0.1" + + def is_available(self) -> bool: + return True + + +if __name__ == "__main__": + txt = "тэст" + e = BEL_Phonemizer() + print(e.supported_languages()) + print(e.version()) + print(e.language) + print(e.name()) + print(e.is_available()) + print("`" + e.phonemize(txt) + "`") diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py new file mode 100644 index 0000000..328e52f --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -0,0 +1,264 @@ +import logging +import re +import subprocess +from typing import Dict, List + +from packaging.version import Version + +from TTS.tts.utils.text.phonemizers.base import BasePhonemizer +from TTS.tts.utils.text.punctuation import Punctuation + + +def is_tool(name): + from shutil import which + + return which(name) is not None + + +# Use a regex pattern to match the espeak version, because it may be +# symlinked to espeak-ng, which moves the version bits to another spot. +espeak_version_pattern = re.compile(r"text-to-speech:\s(?P\d+\.\d+(\.\d+)?)") + + +def get_espeak_version(): + output = subprocess.getoutput("espeak --version") + match = espeak_version_pattern.search(output) + + return match.group("version") + + +def get_espeakng_version(): + output = subprocess.getoutput("espeak-ng --version") + return output.split()[3] + + +# priority: espeakng > espeak +if is_tool("espeak-ng"): + _DEF_ESPEAK_LIB = "espeak-ng" + _DEF_ESPEAK_VER = get_espeakng_version() +elif is_tool("espeak"): + _DEF_ESPEAK_LIB = "espeak" + _DEF_ESPEAK_VER = get_espeak_version() +else: + _DEF_ESPEAK_LIB = None + _DEF_ESPEAK_VER = None + + +def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]: + """Run espeak with the given arguments.""" + cmd = [ + espeak_lib, + "-q", + "-b", + "1", # UTF8 text encoding + ] + cmd.extend(args) + logging.debug("espeakng: executing %s", repr(cmd)) + + with subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) as p: + res = iter(p.stdout.readline, b"") + if not sync: + p.stdout.close() + if p.stderr: + p.stderr.close() + if p.stdin: + p.stdin.close() + return res + res2 = [] + for line in res: + res2.append(line) + p.stdout.close() + if p.stderr: + p.stderr.close() + if p.stdin: + p.stdin.close() + p.wait() + return res2 + + +class ESpeak(BasePhonemizer): + """ESpeak wrapper calling `espeak` or `espeak-ng` from the command-line the perform G2P + + Args: + language (str): + Valid language code for the used backend. + + backend (str): + Name of the backend library to use. `espeak` or `espeak-ng`. If None, set automatically + prefering `espeak-ng` over `espeak`. Defaults to None. + + punctuations (str): + Characters to be treated as punctuation. Defaults to Punctuation.default_puncs(). + + keep_puncs (bool): + If True, keep the punctuations after phonemization. Defaults to True. + + Example: + + >>> from TTS.tts.utils.text.phonemizers import ESpeak + >>> phonemizer = ESpeak("tr") + >>> phonemizer.phonemize("Bu Türkçe, bir örnektir.", separator="|") + 'b|ʊ t|ˈø|r|k|tʃ|ɛ, b|ɪ|r œ|r|n|ˈɛ|c|t|ɪ|r.' + + """ + + _ESPEAK_LIB = _DEF_ESPEAK_LIB + _ESPEAK_VER = _DEF_ESPEAK_VER + + def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True): + if self._ESPEAK_LIB is None: + raise Exception(" [!] No espeak backend found. Install espeak-ng or espeak to your system.") + self.backend = self._ESPEAK_LIB + + # band-aid for backwards compatibility + if language == "en": + language = "en-us" + if language == "zh-cn": + language = "cmn" + + super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs) + if backend is not None: + self.backend = backend + + @property + def backend(self): + return self._ESPEAK_LIB + + @property + def backend_version(self): + return self._ESPEAK_VER + + @backend.setter + def backend(self, backend): + if backend not in ["espeak", "espeak-ng"]: + raise Exception("Unknown backend: %s" % backend) + self._ESPEAK_LIB = backend + self._ESPEAK_VER = get_espeakng_version() if backend == "espeak-ng" else get_espeak_version() + + def auto_set_espeak_lib(self) -> None: + if is_tool("espeak-ng"): + self._ESPEAK_LIB = "espeak-ng" + self._ESPEAK_VER = get_espeakng_version() + elif is_tool("espeak"): + self._ESPEAK_LIB = "espeak" + self._ESPEAK_VER = get_espeak_version() + else: + raise Exception("Cannot set backend automatically. espeak-ng or espeak not found") + + @staticmethod + def name(): + return "espeak" + + def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str: + """Convert input text to phonemes. + + Args: + text (str): + Text to be converted to phonemes. + + tie (bool, optional) : When True use a '͡' character between + consecutive characters of a single phoneme. Else separate phoneme + with '_'. This option requires espeak>=1.49. Default to False. + """ + # set arguments + args = ["-v", f"{self._language}"] + # espeak and espeak-ng parses `ipa` differently + if tie: + # use '͡' between phonemes + if self.backend == "espeak": + args.append("--ipa=1") + else: + args.append("--ipa=3") + else: + # split with '_' + if self.backend == "espeak": + if Version(self.backend_version) >= Version("1.48.15"): + args.append("--ipa=1") + else: + args.append("--ipa=3") + else: + args.append("--ipa=1") + if tie: + args.append("--tie=%s" % tie) + + args.append(text) + # compute phonemes + phonemes = "" + for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True): + logging.debug("line: %s", repr(line)) + ph_decoded = line.decode("utf8").strip() + # espeak: + # version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n" + # espeak-ng: + # "p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n" + + # espeak-ng backend can add language flags that need to be removed: + # "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." + # phonemize needs to remove the language flags of the returned text: + # "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." + ph_decoded = re.sub(r"\(.+?\)", "", ph_decoded) + + phonemes += ph_decoded.strip() + return phonemes.replace("_", separator) + + def _phonemize(self, text, separator=None): + return self.phonemize_espeak(text, separator, tie=False) + + @staticmethod + def supported_languages() -> Dict: + """Get a dictionary of supported languages. + + Returns: + Dict: Dictionary of language codes. + """ + if _DEF_ESPEAK_LIB is None: + return {} + args = ["--voices"] + langs = {} + count = 0 + for line in _espeak_exe(_DEF_ESPEAK_LIB, args, sync=True): + line = line.decode("utf8").strip() + if count > 0: + cols = line.split() + lang_code = cols[1] + lang_name = cols[3] + langs[lang_code] = lang_name + logging.debug("line: %s", repr(line)) + count += 1 + return langs + + def version(self) -> str: + """Get the version of the used backend. + + Returns: + str: Version of the used backend. + """ + args = ["--version"] + for line in _espeak_exe(self.backend, args, sync=True): + version = line.decode("utf8").strip().split()[2] + logging.debug("line: %s", repr(line)) + return version + + @classmethod + def is_available(cls): + """Return true if ESpeak is available else false""" + return is_tool("espeak") or is_tool("espeak-ng") + + +if __name__ == "__main__": + e = ESpeak(language="en-us") + print(e.supported_languages()) + print(e.version()) + print(e.language) + print(e.name()) + print(e.is_available()) + + e = ESpeak(language="en-us", keep_puncs=False) + print("`" + e.phonemize("hello how are you today?") + "`") + + e = ESpeak(language="en-us", keep_puncs=True) + print("`" + e.phonemize("hello how are you today?") + "`") diff --git a/TTS/tts/utils/text/phonemizers/gruut_wrapper.py b/TTS/tts/utils/text/phonemizers/gruut_wrapper.py new file mode 100644 index 0000000..f3e9c9a --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/gruut_wrapper.py @@ -0,0 +1,151 @@ +import importlib +from typing import List + +import gruut +from gruut_ipa import IPA + +from TTS.tts.utils.text.phonemizers.base import BasePhonemizer +from TTS.tts.utils.text.punctuation import Punctuation + +# Table for str.translate to fix gruut/TTS phoneme mismatch +GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ") + + +class Gruut(BasePhonemizer): + """Gruut wrapper for G2P + + Args: + language (str): + Valid language code for the used backend. + + punctuations (str): + Characters to be treated as punctuation. Defaults to `Punctuation.default_puncs()`. + + keep_puncs (bool): + If true, keep the punctuations after phonemization. Defaults to True. + + use_espeak_phonemes (bool): + If true, use espeak lexicons instead of default Gruut lexicons. Defaults to False. + + keep_stress (bool): + If true, keep the stress characters after phonemization. Defaults to False. + + Example: + + >>> from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut + >>> phonemizer = Gruut('en-us') + >>> phonemizer.phonemize("Be a voice, not an! echo?", separator="|") + 'b|i| ə| v|ɔ|ɪ|s, n|ɑ|t| ə|n! ɛ|k|o|ʊ?' + """ + + def __init__( + self, + language: str, + punctuations=Punctuation.default_puncs(), + keep_puncs=True, + use_espeak_phonemes=False, + keep_stress=False, + ): + super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs) + self.use_espeak_phonemes = use_espeak_phonemes + self.keep_stress = keep_stress + + @staticmethod + def name(): + return "gruut" + + def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str: # pylint: disable=unused-argument + """Convert input text to phonemes. + + Gruut phonemizes the given `str` by seperating each phoneme character with `separator`, even for characters + that constitude a single sound. + + It doesn't affect 🐸TTS since it individually converts each character to token IDs. + + Examples:: + "hello how are you today?" -> `h|ɛ|l|o|ʊ| h|a|ʊ| ɑ|ɹ| j|u| t|ə|d|e|ɪ` + + Args: + text (str): + Text to be converted to phonemes. + + tie (bool, optional) : When True use a '͡' character between + consecutive characters of a single phoneme. Else separate phoneme + with '_'. This option requires espeak>=1.49. Default to False. + """ + ph_list = [] + for sentence in gruut.sentences(text, lang=self.language, espeak=self.use_espeak_phonemes): + for word in sentence: + if word.is_break: + # Use actual character for break phoneme (e.g., comma) + if ph_list: + # Join with previous word + ph_list[-1].append(word.text) + else: + # First word is punctuation + ph_list.append([word.text]) + elif word.phonemes: + # Add phonemes for word + word_phonemes = [] + + for word_phoneme in word.phonemes: + if not self.keep_stress: + # Remove primary/secondary stress + word_phoneme = IPA.without_stress(word_phoneme) + + word_phoneme = word_phoneme.translate(GRUUT_TRANS_TABLE) + + if word_phoneme: + # Flatten phonemes + word_phonemes.extend(word_phoneme) + + if word_phonemes: + ph_list.append(word_phonemes) + + ph_words = [separator.join(word_phonemes) for word_phonemes in ph_list] + ph = f"{separator} ".join(ph_words) + return ph + + def _phonemize(self, text, separator): + return self.phonemize_gruut(text, separator, tie=False) + + def is_supported_language(self, language): + """Returns True if `language` is supported by the backend""" + return gruut.is_language_supported(language) + + @staticmethod + def supported_languages() -> List: + """Get a dictionary of supported languages. + + Returns: + List: List of language codes. + """ + return list(gruut.get_supported_languages()) + + def version(self): + """Get the version of the used backend. + + Returns: + str: Version of the used backend. + """ + return gruut.__version__ + + @classmethod + def is_available(cls): + """Return true if ESpeak is available else false""" + return importlib.util.find_spec("gruut") is not None + + +if __name__ == "__main__": + e = Gruut(language="en-us") + print(e.supported_languages()) + print(e.version()) + print(e.language) + print(e.name()) + print(e.is_available()) + + e = Gruut(language="en-us", keep_puncs=False) + print("`" + e.phonemize("hello how are you today?") + "`") + + e = Gruut(language="en-us", keep_puncs=True) + print("`" + e.phonemize("hello how, are you today?") + "`") diff --git a/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py b/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py new file mode 100644 index 0000000..878e5e5 --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py @@ -0,0 +1,72 @@ +from typing import Dict + +from TTS.tts.utils.text.japanese.phonemizer import japanese_text_to_phonemes +from TTS.tts.utils.text.phonemizers.base import BasePhonemizer + +_DEF_JA_PUNCS = "、.,[]()?!〽~『』「」【】" + +_TRANS_TABLE = {"、": ","} + + +def trans(text): + for i, j in _TRANS_TABLE.items(): + text = text.replace(i, j) + return text + + +class JA_JP_Phonemizer(BasePhonemizer): + """🐸TTS Ja-Jp phonemizer using functions in `TTS.tts.utils.text.japanese.phonemizer` + + TODO: someone with JA knowledge should check this implementation + + Example: + + >>> from TTS.tts.utils.text.phonemizers import JA_JP_Phonemizer + >>> phonemizer = JA_JP_Phonemizer() + >>> phonemizer.phonemize("どちらに行きますか?", separator="|") + 'd|o|c|h|i|r|a|n|i|i|k|i|m|a|s|u|k|a|?' + + """ + + language = "ja-jp" + + def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwargs): # pylint: disable=unused-argument + super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs) + + @staticmethod + def name(): + return "ja_jp_phonemizer" + + def _phonemize(self, text: str, separator: str = "|") -> str: + ph = japanese_text_to_phonemes(text) + if separator is not None or separator != "": + return separator.join(ph) + return ph + + def phonemize(self, text: str, separator="|", language=None) -> str: + """Custom phonemize for JP_JA + + Skip pre-post processing steps used by the other phonemizers. + """ + return self._phonemize(text, separator) + + @staticmethod + def supported_languages() -> Dict: + return {"ja-jp": "Japanese (Japan)"} + + def version(self) -> str: + return "0.0.1" + + def is_available(self) -> bool: + return True + + +# if __name__ == "__main__": +# text = "これは、電話をかけるための私の日本語の例のテキストです。" +# e = JA_JP_Phonemizer() +# print(e.supported_languages()) +# print(e.version()) +# print(e.language) +# print(e.name()) +# print(e.is_available()) +# print("`" + e.phonemize(text) + "`") diff --git a/TTS/tts/utils/text/phonemizers/ko_kr_phonemizer.py b/TTS/tts/utils/text/phonemizers/ko_kr_phonemizer.py new file mode 100644 index 0000000..0bdba21 --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/ko_kr_phonemizer.py @@ -0,0 +1,65 @@ +from typing import Dict + +from TTS.tts.utils.text.korean.phonemizer import korean_text_to_phonemes +from TTS.tts.utils.text.phonemizers.base import BasePhonemizer + +_DEF_KO_PUNCS = "、.,[]()?!〽~『』「」【】" + + +class KO_KR_Phonemizer(BasePhonemizer): + """🐸TTS ko_kr_phonemizer using functions in `TTS.tts.utils.text.korean.phonemizer` + + TODO: Add Korean to character (ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆨᆩᆪᆫᆬᆭᆮᆯᆰᆱᆲᆳᆴᆵᆶᆷᆸᆹᆺᆻᆼᆽᆾᆿᇀᇁᇂ) + + Example: + + >>> from TTS.tts.utils.text.phonemizers import KO_KR_Phonemizer + >>> phonemizer = KO_KR_Phonemizer() + >>> phonemizer.phonemize("이 문장은 음성합성 테스트를 위한 문장입니다.", separator="|") + 'ᄋ|ᅵ| |ᄆ|ᅮ|ᆫ|ᄌ|ᅡ|ᆼ|ᄋ|ᅳ| |ᄂ|ᅳ|ᆷ|ᄉ|ᅥ|ᆼ|ᄒ|ᅡ|ᆸ|ᄊ|ᅥ|ᆼ| |ᄐ|ᅦ|ᄉ|ᅳ|ᄐ|ᅳ|ᄅ|ᅳ| |ᄅ|ᅱ|ᄒ|ᅡ|ᆫ| |ᄆ|ᅮ|ᆫ|ᄌ|ᅡ|ᆼ|ᄋ|ᅵ|ᆷ|ᄂ|ᅵ|ᄃ|ᅡ|.' + + >>> from TTS.tts.utils.text.phonemizers import KO_KR_Phonemizer + >>> phonemizer = KO_KR_Phonemizer() + >>> phonemizer.phonemize("이 문장은 음성합성 테스트를 위한 문장입니다.", separator="|", character='english') + 'I| |M|u|n|J|a|n|g|E|u| |N|e|u|m|S|e|o|n|g|H|a|b|S|s|e|o|n|g| |T|e|S|e|u|T|e|u|L|e|u| |L|w|i|H|a|n| |M|u|n|J|a|n|g|I|m|N|i|D|a|.' + + """ + + language = "ko-kr" + + def __init__(self, punctuations=_DEF_KO_PUNCS, keep_puncs=True, **kwargs): # pylint: disable=unused-argument + super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs) + + @staticmethod + def name(): + return "ko_kr_phonemizer" + + def _phonemize(self, text: str, separator: str = "", character: str = "hangeul") -> str: + ph = korean_text_to_phonemes(text, character=character) + if separator is not None or separator != "": + return separator.join(ph) + return ph + + def phonemize(self, text: str, separator: str = "", character: str = "hangeul", language=None) -> str: + return self._phonemize(text, separator, character) + + @staticmethod + def supported_languages() -> Dict: + return {"ko-kr": "hangeul(korean)"} + + def version(self) -> str: + return "0.0.2" + + def is_available(self) -> bool: + return True + + +if __name__ == "__main__": + texts = "이 문장은 음성합성 테스트를 위한 문장입니다." + e = KO_KR_Phonemizer() + print(e.supported_languages()) + print(e.version()) + print(e.language) + print(e.name()) + print(e.is_available()) + print(e.phonemize(texts)) diff --git a/TTS/tts/utils/text/phonemizers/multi_phonemizer.py b/TTS/tts/utils/text/phonemizers/multi_phonemizer.py new file mode 100644 index 0000000..62a9c39 --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/multi_phonemizer.py @@ -0,0 +1,65 @@ +from typing import Dict, List + +from TTS.tts.utils.text.phonemizers import DEF_LANG_TO_PHONEMIZER, get_phonemizer_by_name + + +class MultiPhonemizer: + """🐸TTS multi-phonemizer that operates phonemizers for multiple langugages + + Args: + custom_lang_to_phonemizer (Dict): + Custom phonemizer mapping if you want to change the defaults. In the format of + `{"lang_code", "phonemizer_name"}`. When it is None, `DEF_LANG_TO_PHONEMIZER` is used. Defaults to `{}`. + + TODO: find a way to pass custom kwargs to the phonemizers + """ + + lang_to_phonemizer = {} + + def __init__(self, lang_to_phonemizer_name: Dict = {}) -> None: # pylint: disable=dangerous-default-value + for k, v in lang_to_phonemizer_name.items(): + if v == "" and k in DEF_LANG_TO_PHONEMIZER.keys(): + lang_to_phonemizer_name[k] = DEF_LANG_TO_PHONEMIZER[k] + elif v == "": + raise ValueError(f"Phonemizer wasn't set for language {k} and doesn't have a default.") + self.lang_to_phonemizer_name = lang_to_phonemizer_name + self.lang_to_phonemizer = self.init_phonemizers(self.lang_to_phonemizer_name) + + @staticmethod + def init_phonemizers(lang_to_phonemizer_name: Dict) -> Dict: + lang_to_phonemizer = {} + for k, v in lang_to_phonemizer_name.items(): + lang_to_phonemizer[k] = get_phonemizer_by_name(v, language=k) + return lang_to_phonemizer + + @staticmethod + def name(): + return "multi-phonemizer" + + def phonemize(self, text, separator="|", language=""): + if language == "": + raise ValueError("Language must be set for multi-phonemizer to phonemize.") + return self.lang_to_phonemizer[language].phonemize(text, separator) + + def supported_languages(self) -> List: + return list(self.lang_to_phonemizer.keys()) + + def print_logs(self, level: int = 0): + indent = "\t" * level + print(f"{indent}| > phoneme language: {self.supported_languages()}") + print(f"{indent}| > phoneme backend: {self.name()}") + + +# if __name__ == "__main__": +# texts = { +# "tr": "Merhaba, bu Türkçe bit örnek!", +# "en-us": "Hello, this is English example!", +# "de": "Hallo, das ist ein Deutches Beipiel!", +# "zh-cn": "这是中国的例子", +# } +# phonemes = {} +# ph = MultiPhonemizer({"tr": "espeak", "en-us": "", "de": "gruut", "zh-cn": ""}) +# for lang, text in texts.items(): +# phoneme = ph.phonemize(text, lang) +# phonemes[lang] = phoneme +# print(phonemes) diff --git a/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py b/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py new file mode 100644 index 0000000..41480c4 --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py @@ -0,0 +1,62 @@ +from typing import Dict + +from TTS.tts.utils.text.chinese_mandarin.phonemizer import chinese_text_to_phonemes +from TTS.tts.utils.text.phonemizers.base import BasePhonemizer + +_DEF_ZH_PUNCS = "、.,[]()?!〽~『』「」【】" + + +class ZH_CN_Phonemizer(BasePhonemizer): + """🐸TTS Zh-Cn phonemizer using functions in `TTS.tts.utils.text.chinese_mandarin.phonemizer` + + Args: + punctuations (str): + Set of characters to be treated as punctuation. Defaults to `_DEF_ZH_PUNCS`. + + keep_puncs (bool): + If True, keep the punctuations after phonemization. Defaults to False. + + Example :: + + "这是,样本中文。" -> `d|ʒ|ø|4| |ʂ|ʏ|4| |,| |i|ɑ|ŋ|4|b|œ|n|3| |d|ʒ|o|ŋ|1|w|œ|n|2| |。` + + TODO: someone with Mandarin knowledge should check this implementation + """ + + language = "zh-cn" + + def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs): # pylint: disable=unused-argument + super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs) + + @staticmethod + def name(): + return "zh_cn_phonemizer" + + @staticmethod + def phonemize_zh_cn(text: str, separator: str = "|") -> str: + ph = chinese_text_to_phonemes(text, separator) + return ph + + def _phonemize(self, text, separator): + return self.phonemize_zh_cn(text, separator) + + @staticmethod + def supported_languages() -> Dict: + return {"zh-cn": "Chinese (China)"} + + def version(self) -> str: + return "0.0.1" + + def is_available(self) -> bool: + return True + + +# if __name__ == "__main__": +# text = "这是,样本中文。" +# e = ZH_CN_Phonemizer() +# print(e.supported_languages()) +# print(e.version()) +# print(e.language) +# print(e.name()) +# print(e.is_available()) +# print("`" + e.phonemize(text) + "`") diff --git a/TTS/tts/utils/text/punctuation.py b/TTS/tts/utils/text/punctuation.py new file mode 100644 index 0000000..36c467d --- /dev/null +++ b/TTS/tts/utils/text/punctuation.py @@ -0,0 +1,171 @@ +import collections +import re +from enum import Enum + +import six + +_DEF_PUNCS = ';:,.!?¡¿—…"«»“”' + +_PUNC_IDX = collections.namedtuple("_punc_index", ["punc", "position"]) + + +class PuncPosition(Enum): + """Enum for the punctuations positions""" + + BEGIN = 0 + END = 1 + MIDDLE = 2 + + +class Punctuation: + """Handle punctuations in text. + + Just strip punctuations from text or strip and restore them later. + + Args: + puncs (str): The punctuations to be processed. Defaults to `_DEF_PUNCS`. + + Example: + >>> punc = Punctuation() + >>> punc.strip("This is. example !") + 'This is example' + + >>> text_striped, punc_map = punc.strip_to_restore("This is. example !") + >>> ' '.join(text_striped) + 'This is example' + + >>> text_restored = punc.restore(text_striped, punc_map) + >>> text_restored[0] + 'This is. example !' + """ + + def __init__(self, puncs: str = _DEF_PUNCS): + self.puncs = puncs + + @staticmethod + def default_puncs(): + """Return default set of punctuations.""" + return _DEF_PUNCS + + @property + def puncs(self): + return self._puncs + + @puncs.setter + def puncs(self, value): + if not isinstance(value, six.string_types): + raise ValueError("[!] Punctuations must be of type str.") + self._puncs = "".join(list(dict.fromkeys(list(value)))) # remove duplicates without changing the oreder + self.puncs_regular_exp = re.compile(rf"(\s*[{re.escape(self._puncs)}]+\s*)+") + + def strip(self, text): + """Remove all the punctuations by replacing with `space`. + + Args: + text (str): The text to be processed. + + Example:: + + "This is. example !" -> "This is example " + """ + return re.sub(self.puncs_regular_exp, " ", text).rstrip().lstrip() + + def strip_to_restore(self, text): + """Remove punctuations from text to restore them later. + + Args: + text (str): The text to be processed. + + Examples :: + + "This is. example !" -> [["This is", "example"], [".", "!"]] + + """ + text, puncs = self._strip_to_restore(text) + return text, puncs + + def _strip_to_restore(self, text): + """Auxiliary method for Punctuation.preserve()""" + matches = list(re.finditer(self.puncs_regular_exp, text)) + if not matches: + return [text], [] + # the text is only punctuations + if len(matches) == 1 and matches[0].group() == text: + return [], [_PUNC_IDX(text, PuncPosition.BEGIN)] + # build a punctuation map to be used later to restore punctuations + puncs = [] + for match in matches: + position = PuncPosition.MIDDLE + if match == matches[0] and text.startswith(match.group()): + position = PuncPosition.BEGIN + elif match == matches[-1] and text.endswith(match.group()): + position = PuncPosition.END + puncs.append(_PUNC_IDX(match.group(), position)) + # convert str text to a List[str], each item is separated by a punctuation + splitted_text = [] + for idx, punc in enumerate(puncs): + split = text.split(punc.punc) + prefix, suffix = split[0], punc.punc.join(split[1:]) + text = suffix + if prefix == "": + # We don't want to insert an empty string in case of initial punctuation + continue + splitted_text.append(prefix) + # if the text does not end with a punctuation, add it to the last item + if idx == len(puncs) - 1 and len(suffix) > 0: + splitted_text.append(suffix) + return splitted_text, puncs + + @classmethod + def restore(cls, text, puncs): + """Restore punctuation in a text. + + Args: + text (str): The text to be processed. + puncs (List[str]): The list of punctuations map to be used for restoring. + + Examples :: + + ['This is', 'example'], ['.', '!'] -> "This is. example!" + + """ + return cls._restore(text, puncs) + + @classmethod + def _restore(cls, text, puncs): # pylint: disable=too-many-return-statements + """Auxiliary method for Punctuation.restore()""" + if not puncs: + return text + + # nothing have been phonemized, returns the puncs alone + if not text: + return ["".join(m.punc for m in puncs)] + + current = puncs[0] + + if current.position == PuncPosition.BEGIN: + return cls._restore([current.punc + text[0]] + text[1:], puncs[1:]) + + if current.position == PuncPosition.END: + return [text[0] + current.punc] + cls._restore(text[1:], puncs[1:]) + + # POSITION == MIDDLE + if len(text) == 1: # pragma: nocover + # a corner case where the final part of an intermediate + # mark (I) has not been phonemized + return cls._restore([text[0] + current.punc], puncs[1:]) + + return cls._restore([text[0] + current.punc + text[1]] + text[2:], puncs[1:]) + + +# if __name__ == "__main__": +# punc = Punctuation() +# text = "This is. This is, example!" + +# print(punc.strip(text)) + +# split_text, puncs = punc.strip_to_restore(text) +# print(split_text, " ---- ", puncs) + +# restored_text = punc.restore(split_text, puncs) +# print(restored_text) diff --git a/TTS/tts/utils/text/tokenizer.py b/TTS/tts/utils/text/tokenizer.py new file mode 100644 index 0000000..b7faf86 --- /dev/null +++ b/TTS/tts/utils/text/tokenizer.py @@ -0,0 +1,216 @@ +from typing import Callable, Dict, List, Union + +from TTS.tts.utils.text import cleaners +from TTS.tts.utils.text.characters import Graphemes, IPAPhonemes +from TTS.tts.utils.text.phonemizers import DEF_LANG_TO_PHONEMIZER, get_phonemizer_by_name +from TTS.tts.utils.text.phonemizers.multi_phonemizer import MultiPhonemizer +from TTS.utils.generic_utils import get_import_path, import_class + + +class TTSTokenizer: + """🐸TTS tokenizer to convert input characters to token IDs and back. + + Token IDs for OOV chars are discarded but those are stored in `self.not_found_characters` for later. + + Args: + use_phonemes (bool): + Whether to use phonemes instead of characters. Defaults to False. + + characters (Characters): + A Characters object to use for character-to-ID and ID-to-character mappings. + + text_cleaner (callable): + A function to pre-process the text before tokenization and phonemization. Defaults to None. + + phonemizer (Phonemizer): + A phonemizer object or a dict that maps language codes to phonemizer objects. Defaults to None. + + Example: + + >>> from TTS.tts.utils.text.tokenizer import TTSTokenizer + >>> tokenizer = TTSTokenizer(use_phonemes=False, characters=Graphemes()) + >>> text = "Hello world!" + >>> ids = tokenizer.text_to_ids(text) + >>> text_hat = tokenizer.ids_to_text(ids) + >>> assert text == text_hat + """ + + def __init__( + self, + use_phonemes=False, + text_cleaner: Callable = None, + characters: "BaseCharacters" = None, + phonemizer: Union["Phonemizer", Dict] = None, + add_blank: bool = False, + use_eos_bos=False, + ): + self.text_cleaner = text_cleaner + self.use_phonemes = use_phonemes + self.add_blank = add_blank + self.use_eos_bos = use_eos_bos + self.characters = characters + self.not_found_characters = [] + self.phonemizer = phonemizer + + @property + def characters(self): + return self._characters + + @characters.setter + def characters(self, new_characters): + self._characters = new_characters + self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None + self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None + + def encode(self, text: str) -> List[int]: + """Encodes a string of text as a sequence of IDs.""" + token_ids = [] + for char in text: + try: + idx = self.characters.char_to_id(char) + token_ids.append(idx) + except KeyError: + # discard but store not found characters + if char not in self.not_found_characters: + self.not_found_characters.append(char) + print(text) + print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.") + return token_ids + + def decode(self, token_ids: List[int]) -> str: + """Decodes a sequence of IDs to a string of text.""" + text = "" + for token_id in token_ids: + text += self.characters.id_to_char(token_id) + return text + + def text_to_ids(self, text: str, language: str = None) -> List[int]: # pylint: disable=unused-argument + """Converts a string of text to a sequence of token IDs. + + Args: + text(str): + The text to convert to token IDs. + + language(str): + The language code of the text. Defaults to None. + + TODO: + - Add support for language-specific processing. + + 1. Text normalizatin + 2. Phonemization (if use_phonemes is True) + 3. Add blank char between characters + 4. Add BOS and EOS characters + 5. Text to token IDs + """ + # TODO: text cleaner should pick the right routine based on the language + if self.text_cleaner is not None: + text = self.text_cleaner(text) + if self.use_phonemes: + text = self.phonemizer.phonemize(text, separator="", language=language) + text = self.encode(text) + if self.add_blank: + text = self.intersperse_blank_char(text, True) + if self.use_eos_bos: + text = self.pad_with_bos_eos(text) + return text + + def ids_to_text(self, id_sequence: List[int]) -> str: + """Converts a sequence of token IDs to a string of text.""" + return self.decode(id_sequence) + + def pad_with_bos_eos(self, char_sequence: List[str]): + """Pads a sequence with the special BOS and EOS characters.""" + return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id] + + def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False): + """Intersperses the blank character between characters in a sequence. + + Use the ```blank``` character if defined else use the ```pad``` character. + """ + char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad + result = [char_to_use] * (len(char_sequence) * 2 + 1) + result[1::2] = char_sequence + return result + + def print_logs(self, level: int = 0): + indent = "\t" * level + print(f"{indent}| > add_blank: {self.add_blank}") + print(f"{indent}| > use_eos_bos: {self.use_eos_bos}") + print(f"{indent}| > use_phonemes: {self.use_phonemes}") + if self.use_phonemes: + print(f"{indent}| > phonemizer:") + self.phonemizer.print_logs(level + 1) + if len(self.not_found_characters) > 0: + print(f"{indent}| > {len(self.not_found_characters)} not found characters:") + for char in self.not_found_characters: + print(f"{indent}| > {char}") + + @staticmethod + def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None): + """Init Tokenizer object from config + + Args: + config (Coqpit): Coqpit model config. + characters (BaseCharacters): Defines the model character set. If not set, use the default options based on + the config values. Defaults to None. + """ + # init cleaners + text_cleaner = None + if isinstance(config.text_cleaner, (str, list)): + text_cleaner = getattr(cleaners, config.text_cleaner) + + # init characters + if characters is None: + # set characters based on defined characters class + if config.characters and config.characters.characters_class: + CharactersClass = import_class(config.characters.characters_class) + characters, new_config = CharactersClass.init_from_config(config) + # set characters based on config + else: + if config.use_phonemes: + # init phoneme set + characters, new_config = IPAPhonemes().init_from_config(config) + else: + # init character set + characters, new_config = Graphemes().init_from_config(config) + + else: + characters, new_config = characters.init_from_config(config) + + # set characters class + new_config.characters.characters_class = get_import_path(characters) + + # init phonemizer + phonemizer = None + if config.use_phonemes: + if "phonemizer" in config and config.phonemizer == "multi_phonemizer": + lang_to_phonemizer_name = {} + for dataset in config.datasets: + if dataset.language != "": + lang_to_phonemizer_name[dataset.language] = dataset.phonemizer + else: + raise ValueError("Multi phonemizer requires language to be set for each dataset.") + phonemizer = MultiPhonemizer(lang_to_phonemizer_name) + else: + phonemizer_kwargs = {"language": config.phoneme_language} + if "phonemizer" in config and config.phonemizer: + phonemizer = get_phonemizer_by_name(config.phonemizer, **phonemizer_kwargs) + else: + try: + phonemizer = get_phonemizer_by_name( + DEF_LANG_TO_PHONEMIZER[config.phoneme_language], **phonemizer_kwargs + ) + new_config.phonemizer = phonemizer.name() + except KeyError as e: + raise ValueError( + f"""No phonemizer found for language {config.phoneme_language}. + You may need to install a third party library for this language.""" + ) from e + + return ( + TTSTokenizer( + config.use_phonemes, text_cleaner, characters, phonemizer, config.add_blank, config.enable_eos_bos_chars + ), + new_config, + ) diff --git a/TTS/tts/utils/visual.py b/TTS/tts/utils/visual.py new file mode 100644 index 0000000..fba7bc5 --- /dev/null +++ b/TTS/tts/utils/visual.py @@ -0,0 +1,238 @@ +import librosa +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import torch +from matplotlib.colors import LogNorm + +matplotlib.use("Agg") + + +def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False, plot_log=False): + if isinstance(alignment, torch.Tensor): + alignment_ = alignment.detach().cpu().numpy().squeeze() + else: + alignment_ = alignment + alignment_ = alignment_.astype(np.float32) if alignment_.dtype == np.float16 else alignment_ + fig, ax = plt.subplots(figsize=fig_size) + im = ax.imshow( + alignment_.T, aspect="auto", origin="lower", interpolation="none", norm=LogNorm() if plot_log else None + ) + fig.colorbar(im, ax=ax) + xlabel = "Decoder timestep" + if info is not None: + xlabel += "\n\n" + info + plt.xlabel(xlabel) + plt.ylabel("Encoder timestep") + # plt.yticks(range(len(text)), list(text)) + plt.tight_layout() + if title is not None: + plt.title(title) + if not output_fig: + plt.close() + return fig + + +def plot_spectrogram(spectrogram, ap=None, fig_size=(16, 10), output_fig=False): + if isinstance(spectrogram, torch.Tensor): + spectrogram_ = spectrogram.detach().cpu().numpy().squeeze().T + else: + spectrogram_ = spectrogram.T + spectrogram_ = spectrogram_.astype(np.float32) if spectrogram_.dtype == np.float16 else spectrogram_ + if ap is not None: + spectrogram_ = ap.denormalize(spectrogram_) # pylint: disable=protected-access + fig = plt.figure(figsize=fig_size) + plt.imshow(spectrogram_, aspect="auto", origin="lower") + plt.colorbar() + plt.tight_layout() + if not output_fig: + plt.close() + return fig + + +def plot_pitch(pitch, spectrogram, ap=None, fig_size=(30, 10), output_fig=False): + """Plot pitch curves on top of the spectrogram. + + Args: + pitch (np.array): Pitch values. + spectrogram (np.array): Spectrogram values. + + Shapes: + pitch: :math:`(T,)` + spec: :math:`(C, T)` + """ + + if isinstance(spectrogram, torch.Tensor): + spectrogram_ = spectrogram.detach().cpu().numpy().squeeze().T + else: + spectrogram_ = spectrogram.T + spectrogram_ = spectrogram_.astype(np.float32) if spectrogram_.dtype == np.float16 else spectrogram_ + if ap is not None: + spectrogram_ = ap.denormalize(spectrogram_) # pylint: disable=protected-access + + old_fig_size = plt.rcParams["figure.figsize"] + if fig_size is not None: + plt.rcParams["figure.figsize"] = fig_size + + fig, ax = plt.subplots() + + ax.imshow(spectrogram_, aspect="auto", origin="lower") + ax.set_xlabel("time") + ax.set_ylabel("spec_freq") + + ax2 = ax.twinx() + ax2.plot(pitch, linewidth=5.0, color="red") + ax2.set_ylabel("F0") + + plt.rcParams["figure.figsize"] = old_fig_size + if not output_fig: + plt.close() + return fig + + +def plot_avg_pitch(pitch, chars, fig_size=(30, 10), output_fig=False): + """Plot pitch curves on top of the input characters. + + Args: + pitch (np.array): Pitch values. + chars (str): Characters to place to the x-axis. + + Shapes: + pitch: :math:`(T,)` + """ + old_fig_size = plt.rcParams["figure.figsize"] + if fig_size is not None: + plt.rcParams["figure.figsize"] = fig_size + + fig, ax = plt.subplots() + + x = np.array(range(len(chars))) + my_xticks = chars + plt.xticks(x, my_xticks) + + ax.set_xlabel("characters") + ax.set_ylabel("freq") + + ax2 = ax.twinx() + ax2.plot(pitch, linewidth=5.0, color="red") + ax2.set_ylabel("F0") + + plt.rcParams["figure.figsize"] = old_fig_size + if not output_fig: + plt.close() + return fig + + +def plot_avg_energy(energy, chars, fig_size=(30, 10), output_fig=False): + """Plot energy curves on top of the input characters. + + Args: + energy (np.array): energy values. + chars (str): Characters to place to the x-axis. + + Shapes: + energy: :math:`(T,)` + """ + old_fig_size = plt.rcParams["figure.figsize"] + if fig_size is not None: + plt.rcParams["figure.figsize"] = fig_size + + fig, ax = plt.subplots() + + x = np.array(range(len(chars))) + my_xticks = chars + plt.xticks(x, my_xticks) + + ax.set_xlabel("characters") + ax.set_ylabel("freq") + + ax2 = ax.twinx() + ax2.plot(energy, linewidth=5.0, color="red") + ax2.set_ylabel("energy") + + plt.rcParams["figure.figsize"] = old_fig_size + if not output_fig: + plt.close() + return fig + + +def visualize( + alignment, + postnet_output, + text, + hop_length, + CONFIG, + tokenizer, + stop_tokens=None, + decoder_output=None, + output_path=None, + figsize=(8, 24), + output_fig=False, +): + """Intended to be used in Notebooks.""" + + if decoder_output is not None: + num_plot = 4 + else: + num_plot = 3 + + label_fontsize = 16 + fig = plt.figure(figsize=figsize) + + plt.subplot(num_plot, 1, 1) + plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None) + plt.xlabel("Decoder timestamp", fontsize=label_fontsize) + plt.ylabel("Encoder timestamp", fontsize=label_fontsize) + # compute phoneme representation and back + if CONFIG.use_phonemes: + seq = tokenizer.text_to_ids(text) + text = tokenizer.ids_to_text(seq) + print(text) + plt.yticks(range(len(text)), list(text)) + plt.colorbar() + + if stop_tokens is not None: + # plot stopnet predictions + plt.subplot(num_plot, 1, 2) + plt.plot(range(len(stop_tokens)), list(stop_tokens)) + + # plot postnet spectrogram + plt.subplot(num_plot, 1, 3) + librosa.display.specshow( + postnet_output.T, + sr=CONFIG.audio["sample_rate"], + hop_length=hop_length, + x_axis="time", + y_axis="linear", + fmin=CONFIG.audio["mel_fmin"], + fmax=CONFIG.audio["mel_fmax"], + ) + + plt.xlabel("Time", fontsize=label_fontsize) + plt.ylabel("Hz", fontsize=label_fontsize) + plt.tight_layout() + plt.colorbar() + + if decoder_output is not None: + plt.subplot(num_plot, 1, 4) + librosa.display.specshow( + decoder_output.T, + sr=CONFIG.audio["sample_rate"], + hop_length=hop_length, + x_axis="time", + y_axis="linear", + fmin=CONFIG.audio["mel_fmin"], + fmax=CONFIG.audio["mel_fmax"], + ) + plt.xlabel("Time", fontsize=label_fontsize) + plt.ylabel("Hz", fontsize=label_fontsize) + plt.tight_layout() + plt.colorbar() + + if output_path: + print(output_path) + fig.savefig(output_path) + plt.close() + + if not output_fig: + plt.close()