Przeglądaj źródła

百度语音转写支持8000采样率, pcm_s16le编码, 单通道语音的组合

FMStereo 2 lat temu
rodzic
commit
977d3bc02e
2 zmienionych plików z 4 dodań i 2 usunięć
  1. 3 1
      voice/audio_convert.py
  2. 1 1
      voice/baidu/baidu_voice.py

+ 3 - 1
voice/audio_convert.py

@@ -64,7 +64,9 @@ def any_to_wav(any_path, wav_path):
     if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
         return sil_to_wav(any_path, wav_path)
     audio = AudioSegment.from_file(any_path)
-    audio.export(wav_path, format="wav")
+    audio.set_frame_rate(8000)    # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别
+    audio.set_channels(1)
+    audio.export(wav_path, format="wav", codec='pcm_s16le')
 
 
 def any_to_sil(any_path, sil_path):

+ 1 - 1
voice/baidu/baidu_voice.py

@@ -62,7 +62,7 @@ class BaiduVoice(Voice):
         # 识别本地文件
         logger.debug("[Baidu] voice file name={}".format(voice_file))
         pcm = get_pcm_from_wav(voice_file)
-        res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id})
+        res = self.client.asr(pcm, "pcm", 8000, {"dev_pid": self.dev_id})
         if res["err_no"] == 0:
             logger.info("百度语音识别到了:{}".format(res["result"]))
             text = "".join(res["result"])