Просмотр исходного кода

Merge pull request #476 from Chiaki-Chan/master

 1.新增wechaty方案的语音识别、语音回复功能;2.更新README;
zhayujie 3 лет назад
Родитель
Сommit
5b0f0e8b6c
4 измененных файлов с 131 добавлено и 8 удалено
  1. 1 0
      .gitignore
  2. 43 5
      README.md
  3. 79 0
      channel/wechat/wechaty_channel.py
  4. 8 3
      config-template.json

+ 1 - 0
.gitignore

@@ -1,5 +1,6 @@
 .DS_Store
 .DS_Store
 .idea
 .idea
+.wechaty/
 __pycache__/
 __pycache__/
 venv*
 venv*
 *.pyc
 *.pyc

+ 43 - 5
README.md

@@ -62,15 +62,14 @@
 支持 Linux、MacOS、Windows 系统(可在Linux服务器上长期运行),同时需安装 `Python`。 
 支持 Linux、MacOS、Windows 系统(可在Linux服务器上长期运行),同时需安装 `Python`。 
 > 建议Python版本在 3.7.1~3.9.X 之间,3.10及以上版本在 MacOS 可用,其他系统上不确定能否正常运行。
 > 建议Python版本在 3.7.1~3.9.X 之间,3.10及以上版本在 MacOS 可用,其他系统上不确定能否正常运行。
 
 
-
-1.克隆项目代码:
+**1.克隆项目代码:**
 
 
 ```bash
 ```bash
 git clone https://github.com/zhayujie/chatgpt-on-wechat
 git clone https://github.com/zhayujie/chatgpt-on-wechat
 cd chatgpt-on-wechat/
 cd chatgpt-on-wechat/
 ```
 ```
 
 
-2.安装所需核心依赖:
+**2.安装所需核心依赖:**
 
 
 ```bash
 ```bash
 pip3 install itchat-uos==1.5.0.dev0
 pip3 install itchat-uos==1.5.0.dev0
@@ -78,13 +77,45 @@ pip3 install --upgrade openai
 ```
 ```
 注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。
 注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。
 
 
+**3.安装所需拓展依赖(接收语音,回复语音):**
+### wechaty 处理语音所需
+
+```bash
+pip3 install pysilk
+pip3 install pysilk-mod
+pip3 install pydub
+```
+
+### 百度
+
+```bash
+  pip3 install baidu-aip chardet
+```
+### Google
+
+(1) 安装 SpeechRecognition
+```bash
+  pip3 install SpeechRecognition
+```
+(2) 安装 ffmpeg 和 espeak
+
+  MacOS: 
+  ```bash
+    brew install ffmpeg espeak
+  ```
+  Windows:下载ffmpeg.exe
+
+  Linux: 
+  ```bash
+    apt-get install ffmpeg espeak
+  ```
 
 
 ## 配置
 ## 配置
 
 
 配置文件的模板在根目录的`config-template.json`中,需复制该模板创建最终生效的 `config.json` 文件:
 配置文件的模板在根目录的`config-template.json`中,需复制该模板创建最终生效的 `config.json` 文件:
 
 
 ```bash
 ```bash
-cp config-template.json config.json
+  cp config-template.json config.json
 ```
 ```
 
 
 然后在`config.json`中填入配置,以下是对默认配置的说明,可根据需要进行自定义修改:
 然后在`config.json`中填入配置,以下是对默认配置的说明,可根据需要进行自定义修改:
@@ -93,14 +124,21 @@ cp config-template.json config.json
 # config.json文件内容示例
 # config.json文件内容示例
 { 
 { 
   "open_ai_api_key": "YOUR API KEY",                          # 填入上面创建的 OpenAI API KEY
   "open_ai_api_key": "YOUR API KEY",                          # 填入上面创建的 OpenAI API KEY
+  "open_ai_api_base": "https://api.openai.com/v1",            # 自定义 OpenAI API 地址
   "proxy": "127.0.0.1:7890",                                  # 代理客户端的ip和端口
   "proxy": "127.0.0.1:7890",                                  # 代理客户端的ip和端口
+  "baidu_app_id": "",                                         # 百度AI的App Id
+  "baidu_api_key": "",                                        # 百度AI的API KEY
+  "baidu_secret_key": "",                                     # 百度AI的Secret KEY
+  "wechaty_puppet_service_token":"",                          # wechaty服务token
   "single_chat_prefix": ["bot", "@bot"],                      # 私聊时文本需要包含该前缀才能触发机器人回复
   "single_chat_prefix": ["bot", "@bot"],                      # 私聊时文本需要包含该前缀才能触发机器人回复
   "single_chat_reply_prefix": "[bot] ",                       # 私聊时自动回复的前缀,用于区分真人
   "single_chat_reply_prefix": "[bot] ",                       # 私聊时自动回复的前缀,用于区分真人
   "group_chat_prefix": ["@bot"],                              # 群聊时包含该前缀则会触发机器人回复
   "group_chat_prefix": ["@bot"],                              # 群聊时包含该前缀则会触发机器人回复
   "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], # 开启自动回复的群名称列表
   "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], # 开启自动回复的群名称列表
   "image_create_prefix": ["画", "看", "找"],                   # 开启图片回复的前缀
   "image_create_prefix": ["画", "看", "找"],                   # 开启图片回复的前缀
   "conversation_max_tokens": 1000,                            # 支持上下文记忆的最多字符数
   "conversation_max_tokens": 1000,                            # 支持上下文记忆的最多字符数
-  "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。"  # 人格描述
+  "speech_recognition": false,                                # 是否开启语音识别
+  "voice_reply_voice": false,                                 # 是否开启语音回复
+  "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。",  # 人格描述
 }
 }
 ```
 ```
 **配置说明:**
 **配置说明:**

+ 79 - 0
channel/wechat/wechaty_channel.py

@@ -10,12 +10,16 @@ import json
 import time
 import time
 import asyncio
 import asyncio
 import requests
 import requests
+import pysilk
+import wave
+from pydub import AudioSegment
 from typing import Optional, Union
 from typing import Optional, Union
 from wechaty_puppet import MessageType, FileBox, ScanStatus  # type: ignore
 from wechaty_puppet import MessageType, FileBox, ScanStatus  # type: ignore
 from wechaty import Wechaty, Contact
 from wechaty import Wechaty, Contact
 from wechaty.user import Message, Room, MiniProgram, UrlLink
 from wechaty.user import Message, Room, MiniProgram, UrlLink
 from channel.channel import Channel
 from channel.channel import Channel
 from common.log import logger
 from common.log import logger
+from common.tmp_dir import TmpDir
 from config import conf
 from config import conf
 
 
 
 
@@ -89,6 +93,48 @@ class WechatyChannel(Channel):
                     await self._do_send_img(content, to_user_id)
                     await self._do_send_img(content, to_user_id)
                 else:
                 else:
                     await self._do_send(content, to_user_id)
                     await self._do_send(content, to_user_id)
+        elif room is None and msg.type() == MessageType.MESSAGE_TYPE_AUDIO:
+            if not msg.is_self(): # 接收语音消息
+                # 下载语音文件
+                voice_file = await msg.to_file_box()
+                silk_file = TmpDir().path() + voice_file.name
+                await voice_file.to_file(silk_file)
+                logger.info("[WX]receive voice file: " + silk_file)
+                # 将文件转成wav格式音频
+                wav_file = silk_file.replace(".slk", ".wav")
+                with open(silk_file, 'rb') as f:
+                    silk_data = f.read()
+                pcm_data = pysilk.decode(silk_data)
+
+                with wave.open(wav_file, 'wb') as wav_data:
+                    wav_data.setnchannels(1)
+                    wav_data.setsampwidth(2)
+                    wav_data.setframerate(24000)
+                    wav_data.writeframes(pcm_data)
+                if os.path.exists(wav_file): 
+                    converter_state = "true" # 转换wav成功
+                else:
+                    converter_state = "false" # 转换wav失败
+                logger.info("[WX]receive voice converter: " + converter_state)
+                # 语音识别为文本
+                query = super().build_voice_to_text(wav_file)
+                # 交验关键字
+                match_prefix = self.check_prefix(query, conf().get('single_chat_prefix'))
+                if match_prefix is not None:
+                    if match_prefix != '':
+                        str_list = query.split(match_prefix, 1)
+                        if len(str_list) == 2:
+                            query = str_list[1].strip()
+                    # 返回消息
+                    if conf().get('voice_reply_voice'):
+                        await self._do_send_voice(query, from_user_id)
+                    else:
+                        await self._do_send(query, from_user_id)
+                else:
+                    logger.info("[WX]receive voice check prefix: " + 'False')
+                # 清除缓存文件
+                os.remove(wav_file)
+                os.remove(silk_file)
         elif room and msg.type() == MessageType.MESSAGE_TYPE_TEXT:
         elif room and msg.type() == MessageType.MESSAGE_TYPE_TEXT:
             # 群组&文本消息
             # 群组&文本消息
             room_id = room.room_id
             room_id = room.room_id
@@ -135,6 +181,39 @@ class WechatyChannel(Channel):
         except Exception as e:
         except Exception as e:
             logger.exception(e)
             logger.exception(e)
 
 
+
+    async def _do_send_voice(self, query, reply_user_id):
+        try:
+            if not query:
+                return
+            context = dict()
+            context['session_id'] = reply_user_id
+            reply_text = super().build_reply_content(query, context)
+            if reply_text:
+                # 转换 mp3 文件为 silk 格式
+                mp3_file = super().build_text_to_voice(reply_text)
+                silk_file = mp3_file.replace(".mp3", ".silk")
+                # Load the MP3 file
+                audio = AudioSegment.from_file(mp3_file, format="mp3")
+                # Convert to WAV format
+                audio = audio.set_frame_rate(24000).set_channels(1)
+                wav_data = audio.raw_data
+                sample_width = audio.sample_width
+                # Encode to SILK format
+                silk_data = pysilk.encode(wav_data, 24000)
+                # Save the silk file
+                with open(silk_file, "wb") as f:
+                    f.write(silk_data)
+                # 发送语音
+                t = int(time.time())
+                file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
+                await self.send(file_box, reply_user_id)
+                # 清除缓存文件
+                os.remove(mp3_file)
+                os.remove(silk_file)
+        except Exception as e:
+            logger.exception(e)
+            
     async def _do_send_img(self, query, reply_user_id):
     async def _do_send_img(self, query, reply_user_id):
         try:
         try:
             if not query:
             if not query:

+ 8 - 3
config-template.json

@@ -1,13 +1,18 @@
 {
 {
   "open_ai_api_key": "YOUR API KEY",
   "open_ai_api_key": "YOUR API KEY",
   "proxy": "",
   "proxy": "",
+  "wechaty_puppet_service_token":"",
+  "baidu_app_id": "",
+  "baidu_api_key": "",
+  "baidu_secret_key": "",
   "single_chat_prefix": ["bot", "@bot"],
   "single_chat_prefix": ["bot", "@bot"],
   "single_chat_reply_prefix": "[bot] ",
   "single_chat_reply_prefix": "[bot] ",
   "group_chat_prefix": ["@bot"],
   "group_chat_prefix": ["@bot"],
   "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],
   "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],
   "image_create_prefix": ["画", "看", "找"],
   "image_create_prefix": ["画", "看", "找"],
-  "conversation_max_tokens": 1000,
   "speech_recognition": false,
   "speech_recognition": false,
-  "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。",
-  "expires_in_seconds": 3600
+  "voice_reply_voice": false,
+  "conversation_max_tokens": 1000,
+  "expires_in_seconds": 3600,
+  "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。"
 }
 }