RVC-Boss · yitenghao · Oct 28, 2024 · Nov 20, 2024 · Dec 9, 2024
diff --git a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py
@@ -117,6 +117,7 @@ def segment_and_extract_feature_for_text(self, text:str, language:str, version:s
         return self.get_phones_and_bert(text, language, version)
 
     def get_phones_and_bert(self, text:str, language:str, version:str, final:bool=False):
+        LangSegment.setKeepPinyin(True)
         if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
             language = language.replace("all_","")
             if language == "en":

diff --git a/GPT_SoVITS/text/chinese2.py b/GPT_SoVITS/text/chinese2.py
@@ -131,13 +131,32 @@ def _merge_erhua(initials: list[str],
         new_finals.append(phn)
 
     return new_initials, new_finals
-
+def remove_brackets(s):
+    if s.startswith('(') and s.endswith(')'):
+        return s[1:-1]
+    return s
+def custom_pinyin(seg:str):
+    '''
+    自定义拼音，格式：这个字的读音是角(jue2)色，而不是角(jiao3)色
+    拼音格式符合 pinyin.Style.TON3(声调风格3，即拼音声调在各个拼音之后，用数字 [1-4] 进行表示。如： 中国 -> ``zhong1 guo2``)
+    逻辑：使用正则截取原字符串中指定的拼音，符合截取拼音后的纯汉字字符串和自定义拼音列表，无自定义拼音的返回空： return:这个字的读音是角色，而不是角色 ["","","","","","","","jue2","","","","","","jiao3",""]
+    '''
+    reg=re.compile('\([a-zA-Z]+\d\)')
+    custom_pys=reg.findall(seg)
+    result=["" for s in range(len(seg))]
+    for custom_py in custom_pys:
+        index=seg.index(custom_py)
+        result[index-1]=remove_brackets(custom_py)
+        seg=seg.replace(custom_py,"",1)
+    result=result[:len(seg)]
+    return seg,result
 
 def _g2p(segments):
     phones_list = []
     word2ph = []
     for seg in segments:
         pinyins = []
+        seg, customer_pinyin = custom_pinyin(seg)
         # Replace all English words in the sentence
         seg = re.sub("[a-zA-Z]+", "", seg)
         seg_cut = psg.lcut(seg)
@@ -158,11 +177,15 @@ def _g2p(segments):
                 # assert len(sub_initials) == len(sub_finals) == len(word)
             initials = sum(initials, [])
             finals = sum(finals, [])
+            initials=[to_initials(customer_pinyin[index]) if customer_pinyin[index] != "" else item for index,item in enumerate(initials)]
+            finals=[to_finals_tone3(customer_pinyin[index]) if customer_pinyin[index] != "" else item for index,item in enumerate(finals)]
+            print("customer_pinyin:",customer_pinyin)
             print("pypinyin结果",initials,finals)
         else:
             # g2pw采用整句推理
             pinyins = g2pw.lazy_pinyin(seg, neutral_tone_with_five=True, style=Style.TONE3)
-
+            pinyins = [customer_pinyin[index] if customer_pinyin[index] != "" else item for index,item in enumerate(pinyins) ]
+            print(f"g2pw seg:{seg} pinyins:{pinyins}")
             pre_word_length = 0
             for word, pos in seg_cut:
                 sub_initials = []
@@ -176,7 +199,7 @@ def _g2p(segments):
                 word_pinyins = pinyins[pre_word_length:now_word_length]
 
                 # 多音字消歧
-                word_pinyins = correct_pronunciation(word,word_pinyins)
+                # word_pinyins = correct_pronunciation(word,word_pinyins) # 拿缓存
 
                 for pinyin in word_pinyins:
                     if pinyin[0].isalpha():
@@ -195,7 +218,7 @@ def _g2p(segments):
 
             initials = sum(initials, [])
             finals = sum(finals, [])
-            # print("g2pw结果",initials,finals)
+            print("g2pw结果",initials,finals)
 
         for c, v in zip(initials, finals):
             raw_pinyin = c + v
@@ -272,15 +295,27 @@ def replace_consecutive_punctuation(text):
 def text_normalize(text):
     # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
     tx = TextNormalizer()
+    text, custom_py = custom_pinyin(text)
     sentences = tx.normalize(text)
     dest_text = ""
     for sentence in sentences:
         dest_text += replace_punctuation(sentence)
 
     # 避免重复标点引起的参考泄露
     dest_text = replace_consecutive_punctuation(dest_text)
+    if len(dest_text)==len(text):
+        result=""
+        for index,rune in enumerate(dest_text):
+            result=result+rune
+            if text[index]==dest_text[index] and custom_py[index]!="":
+                result=result+"("+custom_py[index]+")"
+        dest_text=result
+    else:
+        print("text_normalize 后长度不一致")
     return dest_text
-
+def clean_custom_pinyin(text):
+    text, _ = custom_pinyin(text)
+    return text
 # 不排除英文的文本格式化
 def mix_text_normalize(text):
     # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
@@ -296,11 +331,8 @@ def mix_text_normalize(text):
 
 
 if __name__ == "__main__":
-    text = "啊——但是《原神》是由,米哈\游自主，研发的一款全.新开放世界.冒险游戏"
-    text = "呣呣呣～就是…大人的鼹鼠党吧？"
-    text = "你好"
-    text = text_normalize(text)
-    print(g2p(text))
+    text = "这个字的读音是角(jue2)色，而不是角(jiao3)色"
+    print(custom_pinyin(text))
 
 
 # # 示例用法

diff --git a/GPT_SoVITS/text/cleaner.py b/GPT_SoVITS/text/cleaner.py
@@ -40,6 +40,8 @@ def clean_text(text, language, version=None):
         norm_text=text
     if language == "zh" or language=="yue":##########
         phones, word2ph = language_module.g2p(norm_text)
+        if hasattr(language_module,"clean_custom_pinyin"):
+            norm_text=language_module.clean_custom_pinyin(norm_text)
         assert len(phones) == sum(word2ph)
         assert len(norm_text) == len(word2ph)
     elif language == "en":

diff --git a/api.py b/api.py
@@ -314,6 +314,7 @@ def get_bert_inf(phones, word2ph, norm_text, language):
 
 from text import chinese
 def get_phones_and_bert(text,language,version,final=False):
+    LangSegment.setKeepPinyin(True) #用于保留切分后的中文拼音
     if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
         language = language.replace("all_","")
         if language == "en":