Jelajahi Sumber

feat: new voice class pytts

lanvent 3 tahun lalu
induk
melakukan
e78886fb35

+ 23 - 41
channel/wechat/wechat_channel.py

@@ -68,8 +68,7 @@ class WechatChannel(Channel):
             itchat.auto_login(enableCmdQR=2, hotReload=hotReload)
             itchat.auto_login(enableCmdQR=2, hotReload=hotReload)
         except Exception as e:
         except Exception as e:
             if hotReload:
             if hotReload:
-                logger.error(
-                    "Hot reload failed, try to login without hot reload")
+                logger.error("Hot reload failed, try to login without hot reload")
                 itchat.logout()
                 itchat.logout()
                 os.remove("itchat.pkl")
                 os.remove("itchat.pkl")
                 itchat.auto_login(enableCmdQR=2, hotReload=hotReload)
                 itchat.auto_login(enableCmdQR=2, hotReload=hotReload)
@@ -112,8 +111,7 @@ class WechatChannel(Channel):
 
 
     @time_checker
     @time_checker
     def handle_text(self, msg):
     def handle_text(self, msg):
-        logger.debug("[WX]receive text msg: " +
-                     json.dumps(msg, ensure_ascii=False))
+        logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
         content = msg['Text']
         content = msg['Text']
         from_user_id = msg['FromUserName']
         from_user_id = msg['FromUserName']
         to_user_id = msg['ToUserName']              # 接收人id
         to_user_id = msg['ToUserName']              # 接收人id
@@ -141,8 +139,7 @@ class WechatChannel(Channel):
         context.kwargs = {'isgroup': False, 'msg': msg,
         context.kwargs = {'isgroup': False, 'msg': msg,
                           'receiver': other_user_id, 'session_id': other_user_id}
                           'receiver': other_user_id, 'session_id': other_user_id}
 
 
-        img_match_prefix = check_prefix(
-            content, conf().get('image_create_prefix'))
+        img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
         if img_match_prefix:
         if img_match_prefix:
             content = content.replace(img_match_prefix, '', 1).strip()
             content = content.replace(img_match_prefix, '', 1).strip()
             context.type = ContextType.IMAGE_CREATE
             context.type = ContextType.IMAGE_CREATE
@@ -150,13 +147,11 @@ class WechatChannel(Channel):
             context.type = ContextType.TEXT
             context.type = ContextType.TEXT
 
 
         context.content = content
         context.content = content
-        thread_pool.submit(self.handle, context).add_done_callback(
-            thread_pool_callback)
+        thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
 
 
     @time_checker
     @time_checker
     def handle_group(self, msg):
     def handle_group(self, msg):
-        logger.debug("[WX]receive group msg: " +
-                     json.dumps(msg, ensure_ascii=False))
+        logger.debug("[WX]receive group msg: " + json.dumps(msg, ensure_ascii=False))
         group_name = msg['User'].get('NickName', None)
         group_name = msg['User'].get('NickName', None)
         group_id = msg['User'].get('UserName', None)
         group_id = msg['User'].get('UserName', None)
         create_time = msg['CreateTime']             # 消息时间
         create_time = msg['CreateTime']             # 消息时间
@@ -181,11 +176,9 @@ class WechatChannel(Channel):
             or check_contain(origin_content, config.get('group_chat_keyword'))
             or check_contain(origin_content, config.get('group_chat_keyword'))
         if ('ALL_GROUP' in config.get('group_name_white_list') or group_name in config.get('group_name_white_list') or check_contain(group_name, config.get('group_name_keyword_white_list'))) and match_prefix:
         if ('ALL_GROUP' in config.get('group_name_white_list') or group_name in config.get('group_name_white_list') or check_contain(group_name, config.get('group_name_keyword_white_list'))) and match_prefix:
             context = Context()
             context = Context()
-            context.kwargs = {'isgroup': True,
-                              'msg': msg, 'receiver': group_id}
+            context.kwargs = { 'isgroup': True, 'msg': msg, 'receiver': group_id}
 
 
-            img_match_prefix = check_prefix(
-                content, conf().get('image_create_prefix'))
+            img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
             if img_match_prefix:
             if img_match_prefix:
                 content = content.replace(img_match_prefix, '', 1).strip()
                 content = content.replace(img_match_prefix, '', 1).strip()
                 context.type = ContextType.IMAGE_CREATE
                 context.type = ContextType.IMAGE_CREATE
@@ -201,8 +194,7 @@ class WechatChannel(Channel):
             else:
             else:
                 context['session_id'] = msg['ActualUserName']
                 context['session_id'] = msg['ActualUserName']
 
 
-            thread_pool.submit(self.handle, context).add_done_callback(
-                thread_pool_callback)
+            thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
 
 
     def handle_group_voice(self, msg):
     def handle_group_voice(self, msg):
         if conf().get('group_speech_recognition', False) != True:
         if conf().get('group_speech_recognition', False) != True:
@@ -241,9 +233,8 @@ class WechatChannel(Channel):
             logger.info('[WX] sendMsg={}, receiver={}'.format(reply, receiver))
             logger.info('[WX] sendMsg={}, receiver={}'.format(reply, receiver))
         elif reply.type == ReplyType.VOICE:
         elif reply.type == ReplyType.VOICE:
             itchat.send_file(reply.content, toUserName=receiver)
             itchat.send_file(reply.content, toUserName=receiver)
-            logger.info('[WX] sendFile={}, receiver={}'.format(
-                reply.content, receiver))
-        elif reply.type == ReplyType.IMAGE_URL:  # 从网络下载图片
+            logger.info('[WX] sendFile={}, receiver={}'.format(reply.content, receiver))
+        elif reply.type == ReplyType.IMAGE_URL: # 从网络下载图片
             img_url = reply.content
             img_url = reply.content
             pic_res = requests.get(img_url, stream=True)
             pic_res = requests.get(img_url, stream=True)
             image_storage = io.BytesIO()
             image_storage = io.BytesIO()
@@ -251,9 +242,8 @@ class WechatChannel(Channel):
                 image_storage.write(block)
                 image_storage.write(block)
             image_storage.seek(0)
             image_storage.seek(0)
             itchat.send_image(image_storage, toUserName=receiver)
             itchat.send_image(image_storage, toUserName=receiver)
-            logger.info('[WX] sendImage url={}, receiver={}'.format(
-                img_url, receiver))
-        elif reply.type == ReplyType.IMAGE:  # 从文件读取图片
+            logger.info('[WX] sendImage url={}, receiver={}'.format(img_url,receiver))
+        elif reply.type == ReplyType.IMAGE: # 从文件读取图片
             image_storage = reply.content
             image_storage = reply.content
             image_storage.seek(0)
             image_storage.seek(0)
             itchat.send_image(image_storage, toUserName=receiver)
             itchat.send_image(image_storage, toUserName=receiver)
@@ -291,20 +281,16 @@ class WechatChannel(Channel):
                 if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO:
                 if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO:
                     content = reply.content  # 语音转文字后,将文字内容作为新的context
                     content = reply.content  # 语音转文字后,将文字内容作为新的context
                     context.type = ContextType.TEXT
                     context.type = ContextType.TEXT
-                    if (context["isgroup"] == True):
+                    if context["isgroup"]:
                         # 校验关键字
                         # 校验关键字
-                        match_prefix = check_prefix(content, conf().get('group_chat_prefix')) \
-                            or check_contain(content, conf().get('group_chat_keyword'))
-                        # Wechaty判断is_at为True,返回的内容是过滤掉@之后的内容;而is_at为False,则会返回完整的内容
-                        if match_prefix is not None:
-                            # 故判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容,用于实现类似自定义+前缀触发生成AI图片的功能
-                            prefixes = conf().get('group_chat_prefix')
-                            for prefix in prefixes:
-                                if content.startswith(prefix):
-                                    content = content.replace(prefix, '', 1).strip()
-                                    break
+                        match_prefix = check_prefix(content, conf().get('group_chat_prefix'))
+                        match_contain = check_contain(content, conf().get('group_chat_keyword'))
+                        if match_prefix is not None or match_contain is not None:
+                            # 判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容,用于实现类似自定义+前缀触发生成AI图片的功能
+                            if match_prefix:
+                                content = content.replace(match_prefix, '', 1).strip()
                         else:
                         else:
-                            logger.info("[WX]receive voice check prefix: " + 'False')
+                            logger.info("[WX]receive voice, checkprefix didn't match")
                             return
                             return
                        
                        
                     img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
                     img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
@@ -333,9 +319,7 @@ class WechatChannel(Channel):
                 if reply.type == ReplyType.TEXT:
                 if reply.type == ReplyType.TEXT:
                     reply_text = reply.content
                     reply_text = reply.content
                     if context['isgroup']:
                     if context['isgroup']:
-                        reply_text = '@' + \
-                            context['msg']['ActualNickName'] + \
-                            ' ' + reply_text.strip()
+                        reply_text = '@' +  context['msg']['ActualNickName'] + ' ' + reply_text.strip()
                         reply_text = conf().get("group_chat_reply_prefix", "")+reply_text
                         reply_text = conf().get("group_chat_reply_prefix", "")+reply_text
                     else:
                     else:
                         reply_text = conf().get("single_chat_reply_prefix", "")+reply_text
                         reply_text = conf().get("single_chat_reply_prefix", "")+reply_text
@@ -345,8 +329,7 @@ class WechatChannel(Channel):
                 elif reply.type == ReplyType.IMAGE_URL or reply.type == ReplyType.VOICE or reply.type == ReplyType.IMAGE:
                 elif reply.type == ReplyType.IMAGE_URL or reply.type == ReplyType.VOICE or reply.type == ReplyType.IMAGE:
                     pass
                     pass
                 else:
                 else:
-                    logger.error(
-                        '[WX] unknown reply type: {}'.format(reply.type))
+                    logger.error('[WX] unknown reply type: {}'.format(reply.type))
                     return
                     return
 
 
         # reply的发送步骤
         # reply的发送步骤
@@ -355,8 +338,7 @@ class WechatChannel(Channel):
                 'channel': self, 'context': context, 'reply': reply}))
                 'channel': self, 'context': context, 'reply': reply}))
             reply = e_context['reply']
             reply = e_context['reply']
             if not e_context.is_pass() and reply and reply.type:
             if not e_context.is_pass() and reply and reply.type:
-                logger.debug('[WX] ready to send reply: {} to {}'.format(
-                    reply, context['receiver']))
+                logger.debug('[WX] ready to send reply: {} to {}'.format(reply, context['receiver']))
                 self.send(reply, context['receiver'])
                 self.send(reply, context['receiver'])
 
 
 def check_prefix(content, prefix_list):
 def check_prefix(content, prefix_list):

+ 2 - 2
config.py

@@ -47,8 +47,8 @@ available_setting = {
     "speech_recognition": False,  # 是否开启语音识别
     "speech_recognition": False,  # 是否开启语音识别
     "group_speech_recognition": False,  # 是否开启群组语音识别
     "group_speech_recognition": False,  # 是否开启群组语音识别
     "voice_reply_voice": False,  # 是否使用语音回复语音,需要设置对应语音合成引擎的api key
     "voice_reply_voice": False,  # 是否使用语音回复语音,需要设置对应语音合成引擎的api key
-    "voice_to_text": "openai",  # 语音识别引擎,支持openaigoogle
-    "text_to_voice": "baidu",  # 语音合成引擎,支持baidu和google
+    "voice_to_text": "openai",  # 语音识别引擎,支持openai,google
+    "text_to_voice": "baidu",  # 语音合成引擎,支持baidu,google,pytts(offline)
 
 
     # baidu api的配置, 使用百度语音识别和语音合成时需要
     # baidu api的配置, 使用百度语音识别和语音合成时需要
     "baidu_app_id": "",
     "baidu_app_id": "",

+ 1 - 14
voice/google/google_voice.py

@@ -5,7 +5,6 @@ google voice service
 
 
 import time
 import time
 import speech_recognition
 import speech_recognition
-import pyttsx3
 from gtts import gTTS
 from gtts import gTTS
 from bridge.reply import Reply, ReplyType
 from bridge.reply import Reply, ReplyType
 from common.log import logger
 from common.log import logger
@@ -15,21 +14,11 @@ from voice.voice import Voice
 
 
 class GoogleVoice(Voice):
 class GoogleVoice(Voice):
     recognizer = speech_recognition.Recognizer()
     recognizer = speech_recognition.Recognizer()
-    engine = pyttsx3.init()
 
 
     def __init__(self):
     def __init__(self):
-        # 语速
-        self.engine.setProperty('rate', 125)
-        # 音量
-        self.engine.setProperty('volume', 1.0)
-        # 0为男声,1为女声
-        voices = self.engine.getProperty('voices')
-        self.engine.setProperty('voice', voices[1].id)
+        pass
 
 
     def voiceToText(self, voice_file):
     def voiceToText(self, voice_file):
-        # new_file = voice_file.replace('.mp3', '.wav')
-        # subprocess.call('ffmpeg -i ' + voice_file +
-        #                 ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
         with speech_recognition.AudioFile(voice_file) as source:
         with speech_recognition.AudioFile(voice_file) as source:
             audio = self.recognizer.record(source)
             audio = self.recognizer.record(source)
         try:
         try:
@@ -46,8 +35,6 @@ class GoogleVoice(Voice):
     def textToVoice(self, text):
     def textToVoice(self, text):
         try:
         try:
             mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
             mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
-            # self.engine.save_to_file(text, textFile)
-            # self.engine.runAndWait()
             tts = gTTS(text=text, lang='zh')
             tts = gTTS(text=text, lang='zh')
             tts.save(mp3File)            
             tts.save(mp3File)            
             logger.info(
             logger.info(

+ 0 - 3
voice/openai/openai_voice.py

@@ -28,6 +28,3 @@ class OpenaiVoice(Voice):
             reply = Reply(ReplyType.ERROR, str(e))
             reply = Reply(ReplyType.ERROR, str(e))
         finally:
         finally:
             return reply
             return reply
-
-    def textToVoice(self, text):
-        pass

+ 37 - 0
voice/pytts/pytts_voice.py

@@ -0,0 +1,37 @@
+
+"""
+pytts voice service (offline)
+"""
+
+import time
+import pyttsx3
+from bridge.reply import Reply, ReplyType
+from common.log import logger
+from common.tmp_dir import TmpDir
+from voice.voice import Voice
+
+
+class PyttsVoice(Voice):
+    engine = pyttsx3.init()
+
+    def __init__(self):
+        # 语速
+        self.engine.setProperty('rate', 125)
+        # 音量
+        self.engine.setProperty('volume', 1.0)
+        for voice in self.engine.getProperty('voices'):
+            if "Chinese" in voice.name:
+                self.engine.setProperty('voice', voice.id)
+
+    def textToVoice(self, text):
+        try:
+            mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
+            self.engine.save_to_file(text, mp3File)
+            self.engine.runAndWait()
+            logger.info(
+                '[Pytts] textToVoice text={} voice file name={}'.format(text, mp3File))
+            reply = Reply(ReplyType.VOICE, mp3File)
+        except Exception as e:
+            reply = Reply(ReplyType.ERROR, str(e))
+        finally:
+            return reply

+ 3 - 0
voice/voice_factory.py

@@ -17,4 +17,7 @@ def create_voice(voice_type):
     elif voice_type == 'openai':
     elif voice_type == 'openai':
         from voice.openai.openai_voice import OpenaiVoice
         from voice.openai.openai_voice import OpenaiVoice
         return OpenaiVoice()
         return OpenaiVoice()
+    elif voice_type == 'pytts':
+        from voice.pytts.pytts_voice import PyttsVoice
+        return PyttsVoice()
     raise RuntimeError
     raise RuntimeError