3 年前 · 3db452ef71
--- a/README.md
+++ b/README.md
@@ -72,9 +72,8 @@ cd chatgpt-on-wechat/
 
				 pip3 install itchat-uos==1.5.0.dev0
			
 
				 pip3 install --upgrade openai
			
 
				 
			
 
				-默认使用openai的whisper-1模型
			
 
				 如果使用百度的语音识别，需要安装百度的pythonSDK
			
 
				-pip3 install baidu-aip
			
 
				+pip3 install baidu-aip chardet
			
 
				 如果使用google的语音识别，需要安装speech_recognition和依赖的ffmpeg和espeak
			
 
				 pip3 install SpeechRecognition
			
 
				 --在MacOS中安装ffmpeg，brew install ffmpeg espeak
			
@@ -122,7 +121,8 @@ cp config-template.json config.json
 
				 + 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称，`group_chat_keyword`配置项则支持模糊匹配群消息内容，用法与上述两个配置项相同。（Contributed by [evolay](https://github.com/evolay))
			
 
				 
			
 
				 **3.语音识别**
			
 
				-+ 配置`speech_recognition=true`开启语音识别
			
 
				++ 配置`speech_recognition=true`开启语音识别，默认使用openai的whisper模型
			
 
				++ 配置`voice_reply_voice=true`语音回复语音，但是需要配置对应语音合成平台的key
			
 
				 
			
 
				 **4.其他配置**
			
 
				 
			
--- a/channel/wechat/wechat_channel.py
+++ b/channel/wechat/wechat_channel.py
@@ -4,14 +4,13 @@
 
				 wechat channel
			
 
				 """
			
 
				 
			
 
				-import os
			
 
				-import pathlib
			
 
				 import itchat
			
 
				 import json
			
 
				 from itchat.content import *
			
 
				 from channel.channel import Channel
			
 
				 from concurrent.futures import ThreadPoolExecutor
			
 
				 from common.log import logger
			
 
				+from common.tmp_dir import TmpDir
			
 
				 from config import conf
			
 
				 import requests
			
 
				 import io
			
@@ -38,12 +37,8 @@ def handler_single_voice(msg):
 
				 
			
 
				 
			
 
				 class WechatChannel(Channel):
			
 
				-    tmpFilePath = pathlib.Path('./tmp/')
			
 
				-
			
 
				     def __init__(self):
			
 
				-        pathExists = os.path.exists(self.tmpFilePath)
			
 
				-        if not pathExists and conf().get('speech_recognition') == True: 
			
 
				-            os.makedirs(self.tmpFilePath)
			
 
				+        pass
			
 
				 
			
 
				     def startup(self):
			
 
				         # login by scan QRCode
			
@@ -59,17 +54,17 @@ class WechatChannel(Channel):
 
				         thread_pool.submit(self._do_handle_voice, msg)
			
 
				 
			
 
				     def _do_handle_voice(self, msg):
			
 
				-        fileName = self.tmpFilePath+msg['FileName']
			
 
				+        fileName = TmpDir().path() + msg['FileName']
			
 
				         msg.download(fileName)
			
 
				         content = super().build_voice_to_text(fileName)
			
 
				-        self._handle_single_msg(msg, content, False)
			
 
				+        self._handle_single_msg(msg, content, conf().get('voice_reply_voice'))
			
 
				 
			
 
				     def handle_text(self, msg):
			
 
				         logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
			
 
				         content = msg['Text']
			
 
				         self._handle_single_msg(msg, content, False)
			
 
				 
			
 
				-    def _handle_single_msg(self, msg, content, is_voice):
			
 
				+    def _handle_single_msg(self, msg, content, reply_voice=False):
			
 
				         from_user_id = msg['FromUserName']
			
 
				         to_user_id = msg['ToUserName']              # 接收人id
			
 
				         other_user_id = msg['User']['UserName']     # 对手方id
			
@@ -88,7 +83,7 @@ class WechatChannel(Channel):
 
				             if img_match_prefix:
			
 
				                 content = content.split(img_match_prefix, 1)[1].strip()
			
 
				                 thread_pool.submit(self._do_send_img, content, from_user_id)
			
 
				-            elif is_voice:
			
 
				+            elif reply_voice:
			
 
				                 thread_pool.submit(self._do_send_voice, content, from_user_id)
			
 
				             else :
			
 
				                 thread_pool.submit(self._do_send_text, content, from_user_id)
			
@@ -101,7 +96,7 @@ class WechatChannel(Channel):
 
				             if img_match_prefix:
			
 
				                 content = content.split(img_match_prefix, 1)[1].strip()
			
 
				                 thread_pool.submit(self._do_send_img, content, to_user_id)
			
 
				-            elif is_voice:
			
 
				+            elif reply_voice:
			
 
				                 thread_pool.submit(self._do_send_voice, content, to_user_id)
			
 
				             else:
			
 
				                 thread_pool.submit(self._do_send_text, content, to_user_id)
			
--- a/common/tmp_dir.py
+++ b/common/tmp_dir.py
@@ -0,0 +1,20 @@
 
				+
			
 
				+import os
			
 
				+import pathlib
			
 
				+from config import conf
			
 
				+
			
 
				+
			
 
				+class TmpDir(object):
			
 
				+    """A temporary directory that is deleted when the object is destroyed.
			
 
				+    """
			
 
				+
			
 
				+    tmpFilePath = pathlib.Path('./tmp/')
			
 
				+    
			
 
				+    def __init__(self):
			
 
				+        pathExists = os.path.exists(self.tmpFilePath)
			
 
				+        if not pathExists and conf().get('speech_recognition') == True:
			
 
				+            os.makedirs(self.tmpFilePath)
			
 
				+
			
 
				+    def path(self):
			
 
				+        return str(self.tmpFilePath) + '/'
			
 
				+    
			
--- a/config-template.json
+++ b/config-template.json
@@ -8,6 +8,7 @@
 
				   "image_create_prefix": ["画", "看", "找"],
			
 
				   "conversation_max_tokens": 1000,
			
 
				   "speech_recognition": false,
			
 
				+  "voice_reply_voice": false,
			
 
				   "baidu_app_id": "YOUR BAIDU APP ID",
			
 
				   "baidu_api_key": "YOUR BAIDU API KEY",
			
 
				   "baidu_secret_key": "YOUR BAIDU SERVICE KEY",
			
--- a/voice/baidu/baidu_voice.py
+++ b/voice/baidu/baidu_voice.py
@@ -2,7 +2,10 @@
 
				 """
			
 
				 baidu voice service
			
 
				 """
			
 
				+import time
			
 
				 from aip import AipSpeech
			
 
				+from common.log import logger
			
 
				+from common.tmp_dir import TmpDir
			
 
				 from voice.voice import Voice
			
 
				 from config import conf
			
 
				 
			
@@ -19,4 +22,15 @@ class BaiduVoice(Voice):
 
				         pass
			
 
				 
			
 
				     def textToVoice(self, text):
			
 
				-        pass
			
 
				+        result = self.client.synthesis(text, 'zh', 1, {
			
 
				+            'spd': 5, 'pit': 5, 'vol': 5, 'per': 111
			
 
				+        })
			
 
				+        if not isinstance(result, dict):
			
 
				+            fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
			
 
				+            with open(fileName, 'wb') as f:
			
 
				+                f.write(result)
			
 
				+            logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
			
 
				+            return fileName
			
 
				+        else:
			
 
				+            logger.error('[Baidu] textToVoice error={}'.format(result))
			
 
				+            return None
			
--- a/voice/google/google_voice.py
+++ b/voice/google/google_voice.py
@@ -9,6 +9,7 @@ import time
 
				 import speech_recognition
			
 
				 import pyttsx3
			
 
				 from common.log import logger
			
 
				+from common.tmp_dir import TmpDir
			
 
				 from voice.voice import Voice
			
 
				 
			
 
				 
			
@@ -42,7 +43,7 @@ class GoogleVoice(Voice):
 
				             return "抱歉，无法连接到 Google 语音识别服务；{0}".format(e)
			
 
				 
			
 
				     def textToVoice(self, text):
			
 
				-        textFile = self.tmpFilePath + '语音回复_' + str(int(time.time())) + '.mp3'
			
 
				+        textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
			
 
				         self.engine.save_to_file(text, textFile)
			
 
				         self.engine.runAndWait()
			
 
				         logger.info(