3 years ago · 574f05cc6f
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ venv*
 
															 config.json
														
 
															 QR.png
														
 
															 nohup.out
														
 
															+tmp
														
--- a/README.md
+++ b/README.md
@@ -71,6 +71,14 @@ cd chatgpt-on-wechat/
 
															 ```bash
														
 
															 pip3 install itchat-uos==1.5.0.dev0
														
 
															 pip3 install --upgrade openai
														
 
															+
														
 
															+如果使用百度的语音识别，需要安装百度的pythonSDK
														
 
															+pip3 install baidu-aip chardet
														
 
															+如果使用google的语音识别，需要安装speech_recognition和依赖的ffmpeg和espeak
														
 
															+pip3 install SpeechRecognition
														
 
															+--在MacOS中安装ffmpeg，brew install ffmpeg espeak
														
 
															+--在Windows中安装ffmpeg，下载ffmpeg.exe
														
 
															+--在Linux中安装ffmpeg，apt-get install ffmpeg espeak
														
 
															 ```
														
 
															 注：`itchat-uos`使用指定版本1.5.0.dev0，`openai`使用最新版本，需高于0.27.0。
														
@@ -112,7 +120,11 @@ cp config-template.json config.json
 
															 + 默认只要被人 @ 就会触发机器人自动回复；另外群聊天中只要检测到以 "@bot" 开头的内容，同样会自动回复（方便自己触发），这对应配置项 `group_chat_prefix`
														
 
															 + 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称，`group_chat_keyword`配置项则支持模糊匹配群消息内容，用法与上述两个配置项相同。（Contributed by [evolay](https://github.com/evolay))
														
 
															-**3.其他配置**
														
 
															+**3.语音识别**
														
 
															++ 配置`speech_recognition=true`开启语音识别，默认使用openai的whisper模型
														
 
															++ 配置`voice_reply_voice=true`语音回复语音，但是需要配置对应语音合成平台的key，由于itchat协议的限制，只能发送语音mp3文件。使用wechaty则回复的是微信语音。
														
 
															+
														
 
															+**4.其他配置**
														
 
															 + `proxy`：由于目前 `openai` 接口国内无法访问，需配置代理客户端的地址，详情参考  [#351](https://github.com/zhayujie/chatgpt-on-wechat/issues/351)
														
 
															 + 对于图像生成，在满足个人或群组触发条件外，还需要额外的关键词前缀来触发，对应配置 `image_create_prefix `
														
--- a/bridge/bridge.py
+++ b/bridge/bridge.py
@@ -1,4 +1,5 @@
 
															 from bot import bot_factory
														
 
															+from voice import voice_factory
														
 
															 class Bridge(object):
														
@@ -7,3 +8,9 @@ class Bridge(object):
 
															     def fetch_reply_content(self, query, context):
														
 
															         return bot_factory.create_bot("chatGPT").reply(query, context)
														
 
															+
														
 
															+    def fetch_voice_to_text(self, voiceFile):
														
 
															+        return voice_factory.create_voice("openai").voiceToText(voiceFile)
														
 
															+
														
 
															+    def fetch_text_to_voice(self, text):
														
 
															+        return voice_factory.create_voice("baidu").textToVoice(text)
														
--- a/channel/channel.py
+++ b/channel/channel.py
@@ -11,7 +11,7 @@ class Channel(object):
 
															         """
														
 
															         raise NotImplementedError
														
 
															-    def handle(self, msg):
														
 
															+    def handle_text(self, msg):
														
 
															         """
														
 
															         process received msg
														
 
															         :param msg: message object
														
@@ -29,3 +29,9 @@ class Channel(object):
 
															     def build_reply_content(self, query, context=None):
														
 
															         return Bridge().fetch_reply_content(query, context)
														
 
															+
														
 
															+    def build_voice_to_text(self, voice_file):
														
 
															+        return Bridge().fetch_voice_to_text(voice_file)
														
 
															+    
														
 
															+    def build_text_to_voice(self, text):
														
 
															+        return Bridge().fetch_text_to_voice(text)
														
--- a/channel/wechat/wechat_channel.py
+++ b/channel/wechat/wechat_channel.py
@@ -3,12 +3,14 @@
 
															 """
														
 
															 wechat channel
														
 
															 """
														
 
															+
														
 
															 import itchat
														
 
															 import json
														
 
															 from itchat.content import *
														
 
															 from channel.channel import Channel
														
 
															 from concurrent.futures import ThreadPoolExecutor
														
 
															 from common.log import logger
														
 
															+from common.tmp_dir import TmpDir
														
 
															 from config import conf
														
 
															 import requests
														
 
															 import io
														
@@ -18,7 +20,7 @@ thread_pool = ThreadPoolExecutor(max_workers=8)
 
															 @itchat.msg_register(TEXT)
														
 
															 def handler_single_msg(msg):
														
 
															-    WechatChannel().handle(msg)
														
 
															+    WechatChannel().handle_text(msg)
														
 
															     return None
														
@@ -28,6 +30,12 @@ def handler_group_msg(msg):
 
															     return None
														
 
															+@itchat.msg_register(VOICE)
														
 
															+def handler_single_voice(msg):
														
 
															+    WechatChannel().handle_voice(msg)
														
 
															+    return None
														
 
															+
														
 
															+
														
 
															 class WechatChannel(Channel):
														
 
															     def __init__(self):
														
 
															         pass
														
@@ -39,12 +47,27 @@ class WechatChannel(Channel):
 
															         # start message listener
														
 
															         itchat.run()
														
 
															-    def handle(self, msg):
														
 
															-        logger.debug("[WX]receive msg: " + json.dumps(msg, ensure_ascii=False))
														
 
															+    def handle_voice(self, msg):
														
 
															+        if conf().get('speech_recognition') != True :
														
 
															+            return
														
 
															+        logger.debug("[WX]receive voice msg: " + msg['FileName'])
														
 
															+        thread_pool.submit(self._do_handle_voice, msg)
														
 
															+
														
 
															+    def _do_handle_voice(self, msg):
														
 
															+        fileName = TmpDir().path() + msg['FileName']
														
 
															+        msg.download(fileName)
														
 
															+        content = super().build_voice_to_text(fileName)
														
 
															+        self._handle_single_msg(msg, content, conf().get('voice_reply_voice'))
														
 
															+
														
 
															+    def handle_text(self, msg):
														
 
															+        logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
														
 
															+        content = msg['Text']
														
 
															+        self._handle_single_msg(msg, content, False)
														
 
															+
														
 
															+    def _handle_single_msg(self, msg, content, reply_voice=False):
														
 
															         from_user_id = msg['FromUserName']
														
 
															         to_user_id = msg['ToUserName']              # 接收人id
														
 
															         other_user_id = msg['User']['UserName']     # 对手方id
														
 
															-        content = msg['Text']
														
 
															         match_prefix = self.check_prefix(content, conf().get('single_chat_prefix'))
														
 
															         if "」\n- - - - - - - - - - - - - - -" in content:
														
 
															             logger.debug("[WX]reference query skipped")
														
@@ -60,9 +83,10 @@ class WechatChannel(Channel):
 
															             if img_match_prefix:
														
 
															                 content = content.split(img_match_prefix, 1)[1].strip()
														
 
															                 thread_pool.submit(self._do_send_img, content, from_user_id)
														
 
															-            else:
														
 
															-                thread_pool.submit(self._do_send, content, from_user_id)
														
 
															-
														
 
															+            elif reply_voice:
														
 
															+                thread_pool.submit(self._do_send_voice, content, from_user_id)
														
 
															+            else :
														
 
															+                thread_pool.submit(self._do_send_text, content, from_user_id)
														
 
															         elif to_user_id == other_user_id and match_prefix:
														
 
															             # 自己给好友发送消息
														
 
															             str_list = content.split(match_prefix, 1)
														
@@ -72,8 +96,10 @@ class WechatChannel(Channel):
 
															             if img_match_prefix:
														
 
															                 content = content.split(img_match_prefix, 1)[1].strip()
														
 
															                 thread_pool.submit(self._do_send_img, content, to_user_id)
														
 
															+            elif reply_voice:
														
 
															+                thread_pool.submit(self._do_send_voice, content, to_user_id)
														
 
															             else:
														
 
															-                thread_pool.submit(self._do_send, content, to_user_id)
														
 
															+                thread_pool.submit(self._do_send_text, content, to_user_id)
														
 
															     def handle_group(self, msg):
														
@@ -105,10 +131,24 @@ class WechatChannel(Channel):
 
															                 thread_pool.submit(self._do_send_group, content, msg)
														
 
															     def send(self, msg, receiver):
														
 
															-        logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver))
														
 
															         itchat.send(msg, toUserName=receiver)
														
 
															+        logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver))
														
 
															+
														
 
															+    def _do_send_voice(self, query, reply_user_id):
														
 
															+        try:
														
 
															+            if not query:
														
 
															+                return
														
 
															+            context = dict()
														
 
															+            context['from_user_id'] = reply_user_id
														
 
															+            reply_text = super().build_reply_content(query, context)
														
 
															+            if reply_text:
														
 
															+                replyFile = super().build_text_to_voice(reply_text)
														
 
															+                itchat.send_file(replyFile, toUserName=reply_user_id)
														
 
															+                logger.info('[WX] sendFile={}, receiver={}'.format(replyFile, reply_user_id))
														
 
															+        except Exception as e:
														
 
															+            logger.exception(e)
														
 
															-    def _do_send(self, query, reply_user_id):
														
 
															+    def _do_send_text(self, query, reply_user_id):
														
 
															         try:
														
 
															             if not query:
														
 
															                 return
														
@@ -138,8 +178,8 @@ class WechatChannel(Channel):
 
															             image_storage.seek(0)
														
 
															             # 图片发送
														
 
															-            logger.info('[WX] sendImage, receiver={}'.format(reply_user_id))
														
 
															             itchat.send_image(image_storage, reply_user_id)
														
 
															+            logger.info('[WX] sendImage, receiver={}'.format(reply_user_id))
														
 
															         except Exception as e:
														
 
															             logger.exception(e)
														
--- a/common/tmp_dir.py
+++ b/common/tmp_dir.py
@@ -0,0 +1,20 @@
 
															+
														
 
															+import os
														
 
															+import pathlib
														
 
															+from config import conf
														
 
															+
														
 
															+
														
 
															+class TmpDir(object):
														
 
															+    """A temporary directory that is deleted when the object is destroyed.
														
 
															+    """
														
 
															+
														
 
															+    tmpFilePath = pathlib.Path('./tmp/')
														
 
															+    
														
 
															+    def __init__(self):
														
 
															+        pathExists = os.path.exists(self.tmpFilePath)
														
 
															+        if not pathExists and conf().get('speech_recognition') == True:
														
 
															+            os.makedirs(self.tmpFilePath)
														
 
															+
														
 
															+    def path(self):
														
 
															+        return str(self.tmpFilePath) + '/'
														
 
															+    
														
--- a/config-template.json
+++ b/config-template.json
@@ -8,6 +8,11 @@
 
															   "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],
														
 
															   "image_create_prefix": ["画", "看", "找"],
														
 
															   "conversation_max_tokens": 1000,
														
 
															+  "speech_recognition": false,
														
 
															+  "voice_reply_voice": false,
														
 
															+  "baidu_app_id": "YOUR BAIDU APP ID",
														
 
															+  "baidu_api_key": "YOUR BAIDU API KEY",
														
 
															+  "baidu_secret_key": "YOUR BAIDU SERVICE KEY",
														
 
															   "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题，并且可以使用多种语言与人交流。",
														
 
															   "expires_in_seconds": 3600
														
 
															 }
														
--- a/voice/baidu/baidu_voice.py
+++ b/voice/baidu/baidu_voice.py
@@ -0,0 +1,36 @@
 
															+
														
 
															+"""
														
 
															+baidu voice service
														
 
															+"""
														
 
															+import time
														
 
															+from aip import AipSpeech
														
 
															+from common.log import logger
														
 
															+from common.tmp_dir import TmpDir
														
 
															+from voice.voice import Voice
														
 
															+from config import conf
														
 
															+
														
 
															+class BaiduVoice(Voice):
														
 
															+    APP_ID = conf().get('baidu_app_id')
														
 
															+    API_KEY = conf().get('baidu_api_key')
														
 
															+    SECRET_KEY = conf().get('baidu_secret_key')
														
 
															+    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
														
 
															+    
														
 
															+    def __init__(self):
														
 
															+        pass
														
 
															+
														
 
															+    def voiceToText(self, voice_file):
														
 
															+        pass
														
 
															+
														
 
															+    def textToVoice(self, text):
														
 
															+        result = self.client.synthesis(text, 'zh', 1, {
														
 
															+            'spd': 5, 'pit': 5, 'vol': 5, 'per': 111
														
 
															+        })
														
 
															+        if not isinstance(result, dict):
														
 
															+            fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
														
 
															+            with open(fileName, 'wb') as f:
														
 
															+                f.write(result)
														
 
															+            logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
														
 
															+            return fileName
														
 
															+        else:
														
 
															+            logger.error('[Baidu] textToVoice error={}'.format(result))
														
 
															+            return None
														
--- a/voice/google/google_voice.py
+++ b/voice/google/google_voice.py
@@ -0,0 +1,51 @@
 
															+
														
 
															+"""
														
 
															+google voice service
														
 
															+"""
														
 
															+
														
 
															+import pathlib
														
 
															+import subprocess
														
 
															+import time
														
 
															+import speech_recognition
														
 
															+import pyttsx3
														
 
															+from common.log import logger
														
 
															+from common.tmp_dir import TmpDir
														
 
															+from voice.voice import Voice
														
 
															+
														
 
															+
														
 
															+class GoogleVoice(Voice):
														
 
															+    recognizer = speech_recognition.Recognizer()
														
 
															+    engine = pyttsx3.init()
														
 
															+
														
 
															+    def __init__(self):
														
 
															+        # 语速
														
 
															+        self.engine.setProperty('rate', 125)
														
 
															+        # 音量
														
 
															+        self.engine.setProperty('volume', 1.0)
														
 
															+        # 0为男声，1为女声
														
 
															+        voices = self.engine.getProperty('voices')
														
 
															+        self.engine.setProperty('voice', voices[1].id)
														
 
															+
														
 
															+    def voiceToText(self, voice_file):
														
 
															+        new_file = voice_file.replace('.mp3', '.wav')
														
 
															+        subprocess.call('ffmpeg -i ' + voice_file +
														
 
															+                        ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
														
 
															+        with speech_recognition.AudioFile(new_file) as source:
														
 
															+            audio = self.recognizer.record(source)
														
 
															+        try:
														
 
															+            text = self.recognizer.recognize_google(audio, language='zh-CN')
														
 
															+            logger.info(
														
 
															+                '[Google] voiceToText text={} voice file name={}'.format(text, voice_file))
														
 
															+            return text
														
 
															+        except speech_recognition.UnknownValueError:
														
 
															+            return "抱歉，我听不懂。"
														
 
															+        except speech_recognition.RequestError as e:
														
 
															+            return "抱歉，无法连接到 Google 语音识别服务；{0}".format(e)
														
 
															+
														
 
															+    def textToVoice(self, text):
														
 
															+        textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
														
 
															+        self.engine.save_to_file(text, textFile)
														
 
															+        self.engine.runAndWait()
														
 
															+        logger.info(
														
 
															+            '[Google] textToVoice text={} voice file name={}'.format(text, textFile))
														
 
															+        return textFile
														
--- a/voice/openai/openai_voice.py
+++ b/voice/openai/openai_voice.py
@@ -0,0 +1,27 @@
 
															+
														
 
															+"""
														
 
															+google voice service
														
 
															+"""
														
 
															+import json
														
 
															+import openai
														
 
															+from config import conf
														
 
															+from common.log import logger
														
 
															+from voice.voice import Voice
														
 
															+
														
 
															+
														
 
															+class OpenaiVoice(Voice):
														
 
															+    def __init__(self):
														
 
															+        openai.api_key = conf().get('open_ai_api_key')
														
 
															+
														
 
															+    def voiceToText(self, voice_file):
														
 
															+        logger.debug(
														
 
															+            '[Openai] voice file name={}'.format(voice_file))
														
 
															+        file = open(voice_file, "rb")
														
 
															+        reply = openai.Audio.transcribe("whisper-1", file)
														
 
															+        text = reply["text"]
														
 
															+        logger.info(
														
 
															+            '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
														
 
															+        return text
														
 
															+
														
 
															+    def textToVoice(self, text):
														
 
															+        pass
														
--- a/voice/voice.py
+++ b/voice/voice.py
@@ -0,0 +1,16 @@
 
															+"""
														
 
															+Voice service abstract class
														
 
															+"""
														
 
															+
														
 
															+class Voice(object):
														
 
															+    def voiceToText(self, voice_file):
														
 
															+        """
														
 
															+        Send voice to voice service and get text
														
 
															+        """
														
 
															+        raise NotImplementedError
														
 
															+
														
 
															+    def textToVoice(self, text):
														
 
															+        """
														
 
															+        Send text to voice service and get voice
														
 
															+        """
														
 
															+        raise NotImplementedError
														
--- a/voice/voice_factory.py
+++ b/voice/voice_factory.py
@@ -0,0 +1,20 @@
 
															+"""
														
 
															+voice factory
														
 
															+"""
														
 
															+
														
 
															+def create_voice(voice_type):
														
 
															+    """
														
 
															+    create a voice instance
														
 
															+    :param voice_type: voice type code
														
 
															+    :return: voice instance
														
 
															+    """
														
 
															+    if voice_type == 'baidu':
														
 
															+        from voice.baidu.baidu_voice import BaiduVoice
														
 
															+        return BaiduVoice()
														
 
															+    elif voice_type == 'google':
														
 
															+        from voice.google.google_voice import GoogleVoice
														
 
															+        return GoogleVoice()
														
 
															+    elif voice_type == 'openai':
														
 
															+        from voice.openai.openai_voice import OpenaiVoice
														
 
															+        return OpenaiVoice()
														
 
															+    raise RuntimeError