3 gadi atpakaļ · 24de670c2c
--- a/channel/wechat/wechat_channel.py
+++ b/channel/wechat/wechat_channel.py
@@ -5,6 +5,9 @@ wechat channel
 
				 """
			
 
				 
			
 
				 import os
			
 
				+import requests
			
 
				+import io
			
 
				+import time
			
 
				 from lib import itchat
			
 
				 import json
			
 
				 from lib.itchat.content import *
			
@@ -17,9 +20,7 @@ from common.tmp_dir import TmpDir
 
				 from config import conf
			
 
				 from common.time_check import time_checker
			
 
				 from plugins import *
			
 
				-import requests
			
 
				-import io
			
 
				-import time
			
 
				+from voice.audio_convert import mp3_to_wav
			
 
				 
			
 
				 
			
 
				 thread_pool = ThreadPoolExecutor(max_workers=8)
			
@@ -28,8 +29,7 @@ thread_pool = ThreadPoolExecutor(max_workers=8)
 
				 def thread_pool_callback(worker):
			
 
				     worker_exception = worker.exception()
			
 
				     if worker_exception:
			
 
				-        logger.exception(
			
 
				-            "Worker return exception: {}".format(worker_exception))
			
 
				+        logger.exception("Worker return exception: {}".format(worker_exception))
			
 
				 
			
 
				 
			
 
				 @itchat.msg_register(TEXT)
			
@@ -247,9 +247,16 @@ class WechatChannel(Channel):
 
				                 reply = super().build_reply_content(context.content, context)
			
 
				             elif context.type == ContextType.VOICE:
			
 
				                 msg = context['msg']
			
 
				-                file_name = TmpDir().path() + context.content
			
 
				-                msg.download(file_name)
			
 
				-                reply = super().build_voice_to_text(file_name)
			
 
				+                mp3_path = TmpDir().path() + context.content
			
 
				+                msg.download(mp3_path)
			
 
				+                # mp3转wav
			
 
				+                wav_path = os.path.splitext(mp3_path)[0] + '.wav'
			
 
				+                mp3_to_wav(mp3_path=mp3_path, wav_path=wav_path)
			
 
				+                # 语音识别
			
 
				+                reply = super().build_voice_to_text(wav_path)
			
 
				+                # 删除临时文件
			
 
				+                os.remove(wav_path)
			
 
				+                os.remove(mp3_path)
			
 
				                 if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO:
			
 
				                     context.content = reply.content  # 语音转文字后，将文字内容作为新的context
			
 
				                     context.type = ContextType.TEXT
			
@@ -263,12 +270,13 @@ class WechatChannel(Channel):
 
				                             prefixes = conf().get('group_chat_prefix')
			
 
				                             for prefix in prefixes:
			
 
				                                 if context.content.startswith(prefix):
			
 
				-                                    context.content = context.content.replace(prefix, '', 1).strip()
			
 
				+                                    context.content = context.content.replace(
			
 
				+                                        prefix, '', 1).strip()
			
 
				                                     break
			
 
				                         else:
			
 
				                             logger.info("[WX]receive voice check prefix: " + 'False')
			
 
				                             return
			
 
				-                            
			
 
				+
			
 
				                     reply = super().build_reply_content(context.content, context)
			
 
				                     if reply.type == ReplyType.TEXT:
			
 
				                         if conf().get('voice_reply_voice'):
			
--- a/channel/wechat/wechaty_channel.py
+++ b/channel/wechat/wechaty_channel.py
@@ -4,25 +4,19 @@
 
				 wechaty channel
			
 
				 Python Wechaty - https://github.com/wechaty/python-wechaty
			
 
				 """
			
 
				-import io
			
 
				 import os
			
 
				-import json
			
 
				 import time
			
 
				 import asyncio
			
 
				-import requests
			
 
				-import pysilk
			
 
				-import wave
			
 
				-from pydub import AudioSegment
			
 
				 from typing import Optional, Union
			
 
				 from bridge.context import Context, ContextType
			
 
				 from wechaty_puppet import MessageType, FileBox, ScanStatus  # type: ignore
			
 
				 from wechaty import Wechaty, Contact
			
 
				-from wechaty.user import Message, Room, MiniProgram, UrlLink
			
 
				+from wechaty.user import Message, MiniProgram, UrlLink
			
 
				 from channel.channel import Channel
			
 
				 from common.log import logger
			
 
				 from common.tmp_dir import TmpDir
			
 
				 from config import conf
			
 
				-
			
 
				+from voice.audio_convert import sil_to_wav, mp3_to_sil
			
 
				 
			
 
				 class WechatyChannel(Channel):
			
 
				 
			
@@ -50,8 +44,8 @@ class WechatyChannel(Channel):
 
				 
			
 
				     async def on_scan(self, status: ScanStatus, qr_code: Optional[str] = None,
			
 
				                       data: Optional[str] = None):
			
 
				-        contact = self.Contact.load(self.contact_id)
			
 
				-        logger.info('[WX] scan user={}, scan status={}, scan qr_code={}'.format(contact, status.name, qr_code))
			
 
				+        # contact = self.Contact.load(self.contact_id)
			
 
				+        # logger.info('[WX] scan user={}, scan status={}, scan qr_code={}'.format(contact, status.name, qr_code))
			
 
				         # print(f'user <{contact}> scan status: {status.name} , 'f'qr_code: {qr_code}')
			
 
				 
			
 
				     async def on_message(self, msg: Message):
			
@@ -67,7 +61,7 @@ class WechatyChannel(Channel):
 
				         content = msg.text()
			
 
				         mention_content = await msg.mention_text()  # 返回过滤掉@name后的消息
			
 
				         match_prefix = self.check_prefix(content, conf().get('single_chat_prefix'))
			
 
				-        conversation: Union[Room, Contact] = from_contact if room is None else room
			
 
				+        # conversation: Union[Room, Contact] = from_contact if room is None else room
			
 
				 
			
 
				         if room is None and msg.type() == MessageType.MESSAGE_TYPE_TEXT:
			
 
				             if not msg.is_self() and match_prefix is not None:
			
@@ -102,21 +96,8 @@ class WechatyChannel(Channel):
 
				                 await voice_file.to_file(silk_file)
			
 
				                 logger.info("[WX]receive voice file: " + silk_file)
			
 
				                 # 将文件转成wav格式音频
			
 
				-                wav_file = silk_file.replace(".slk", ".wav")
			
 
				-                with open(silk_file, 'rb') as f:
			
 
				-                    silk_data = f.read()
			
 
				-                pcm_data = pysilk.decode(silk_data)
			
 
				-
			
 
				-                with wave.open(wav_file, 'wb') as wav_data:
			
 
				-                    wav_data.setnchannels(1)
			
 
				-                    wav_data.setsampwidth(2)
			
 
				-                    wav_data.setframerate(24000)
			
 
				-                    wav_data.writeframes(pcm_data)
			
 
				-                if os.path.exists(wav_file): 
			
 
				-                    converter_state = "true" # 转换wav成功
			
 
				-                else:
			
 
				-                    converter_state = "false" # 转换wav失败
			
 
				-                logger.info("[WX]receive voice converter: " + converter_state)
			
 
				+                wav_file = os.path.splitext(silk_file)[0] + '.wav'
			
 
				+                sil_to_wav(silk_file, wav_file)
			
 
				                 # 语音识别为文本
			
 
				                 query = super().build_voice_to_text(wav_file).content
			
 
				                 # 交验关键字
			
@@ -183,21 +164,8 @@ class WechatyChannel(Channel):
 
				                 await voice_file.to_file(silk_file)
			
 
				                 logger.info("[WX]receive voice file: " + silk_file)
			
 
				                 # 将文件转成wav格式音频
			
 
				-                wav_file = silk_file.replace(".slk", ".wav")
			
 
				-                with open(silk_file, 'rb') as f:
			
 
				-                    silk_data = f.read()
			
 
				-                pcm_data = pysilk.decode(silk_data)
			
 
				-
			
 
				-                with wave.open(wav_file, 'wb') as wav_data:
			
 
				-                    wav_data.setnchannels(1)
			
 
				-                    wav_data.setsampwidth(2)
			
 
				-                    wav_data.setframerate(24000)
			
 
				-                    wav_data.writeframes(pcm_data)
			
 
				-                if os.path.exists(wav_file): 
			
 
				-                    converter_state = "true" # 转换wav成功
			
 
				-                else:
			
 
				-                    converter_state = "false" # 转换wav失败
			
 
				-                logger.info("[WX]receive voice converter: " + converter_state)
			
 
				+                wav_file = os.path.splitext(silk_file)[0] + '.wav'
			
 
				+                sil_to_wav(silk_file, wav_file)
			
 
				                 # 语音识别为文本
			
 
				                 query = super().build_voice_to_text(wav_file).content
			
 
				                 # 校验关键字
			
@@ -260,21 +228,12 @@ class WechatyChannel(Channel):
 
				             if reply_text:
			
 
				                 # 转换 mp3 文件为 silk 格式
			
 
				                 mp3_file = super().build_text_to_voice(reply_text).content
			
 
				-                silk_file = mp3_file.replace(".mp3", ".silk")
			
 
				-                # Load the MP3 file
			
 
				-                audio = AudioSegment.from_file(mp3_file, format="mp3")
			
 
				-                # Convert to WAV format
			
 
				-                audio = audio.set_frame_rate(24000).set_channels(1)
			
 
				-                wav_data = audio.raw_data
			
 
				-                sample_width = audio.sample_width
			
 
				-                # Encode to SILK format
			
 
				-                silk_data = pysilk.encode(wav_data, 24000)
			
 
				-                # Save the silk file
			
 
				-                with open(silk_file, "wb") as f:
			
 
				-                    f.write(silk_data)
			
 
				+                silk_file = os.path.splitext(mp3_file)[0] + '.sil'
			
 
				+                voiceLength = mp3_to_sil(mp3_file, silk_file)
			
 
				                 # 发送语音
			
 
				                 t = int(time.time())
			
 
				-                file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
			
 
				+                file_box = FileBox.from_file(silk_file, name=str(t) + '.sil')
			
 
				+                file_box.metadata = {'voiceLength': voiceLength}                
			
 
				                 await self.send(file_box, reply_user_id)
			
 
				                 # 清除缓存文件
			
 
				                 os.remove(mp3_file)
			
@@ -337,21 +296,12 @@ class WechatyChannel(Channel):
 
				             reply_text = '@' + group_user_name + ' ' + reply_text.strip()
			
 
				             # 转换 mp3 文件为 silk 格式
			
 
				             mp3_file = super().build_text_to_voice(reply_text).content
			
 
				-            silk_file = mp3_file.replace(".mp3", ".silk")
			
 
				-            # Load the MP3 file
			
 
				-            audio = AudioSegment.from_file(mp3_file, format="mp3")
			
 
				-            # Convert to WAV format
			
 
				-            audio = audio.set_frame_rate(24000).set_channels(1)
			
 
				-            wav_data = audio.raw_data
			
 
				-            sample_width = audio.sample_width
			
 
				-            # Encode to SILK format
			
 
				-            silk_data = pysilk.encode(wav_data, 24000)
			
 
				-            # Save the silk file
			
 
				-            with open(silk_file, "wb") as f:
			
 
				-                f.write(silk_data)
			
 
				+            silk_file = os.path.splitext(mp3_file)[0] + '.sil'
			
 
				+            voiceLength = mp3_to_sil(mp3_file, silk_file)
			
 
				             # 发送语音
			
 
				             t = int(time.time())
			
 
				             file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
			
 
				+            file_box.metadata = {'voiceLength': voiceLength}            
			
 
				             await self.send_group(file_box, group_id)
			
 
				             # 清除缓存文件
			
 
				             os.remove(mp3_file)
			
--- a/voice/google/google_voice.py
+++ b/voice/google/google_voice.py
@@ -3,17 +3,14 @@
 
				 google voice service
			
 
				 """
			
 
				 
			
 
				-import pathlib
			
 
				-import subprocess
			
 
				 import time
			
 
				-from bridge.reply import Reply, ReplyType
			
 
				 import speech_recognition
			
 
				 import pyttsx3
			
 
				 from gtts import gTTS
			
 
				+from bridge.reply import Reply, ReplyType
			
 
				 from common.log import logger
			
 
				 from common.tmp_dir import TmpDir
			
 
				 from voice.voice import Voice
			
 
				-from voice.audio_convert import mp3_to_wav
			
 
				 
			
 
				 
			
 
				 class GoogleVoice(Voice):
			
@@ -30,11 +27,10 @@ class GoogleVoice(Voice):
 
				         self.engine.setProperty('voice', voices[1].id)
			
 
				 
			
 
				     def voiceToText(self, voice_file):
			
 
				-        new_file = voice_file.replace('.mp3', '.wav')
			
 
				+        # new_file = voice_file.replace('.mp3', '.wav')
			
 
				         # subprocess.call('ffmpeg -i ' + voice_file +
			
 
				         #                 ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
			
 
				-        mp3_to_wav(voice_file, new_file)
			
 
				-        with speech_recognition.AudioFile(new_file) as source:
			
 
				+        with speech_recognition.AudioFile(voice_file) as source:
			
 
				             audio = self.recognizer.record(source)
			
 
				         try:
			
 
				             text = self.recognizer.recognize_google(audio, language='zh-CN')