添加asr校验有效人声
This commit is contained in:
parent
cd4855f567
commit
aee1d6b797
BIN
app/src/main/assets/itn_zh_number.fst
Normal file
BIN
app/src/main/assets/itn_zh_number.fst
Normal file
Binary file not shown.
@ -0,0 +1,4 @@
|
||||
# Introduction
|
||||
|
||||
Model in this directory is converted from
|
||||
https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/u2pp_conformer_yue
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -2,12 +2,17 @@ package com.zs.smarthuman.sherpa
|
||||
|
||||
import android.content.res.AssetManager
|
||||
import android.util.Log
|
||||
import com.k2fsa.sherpa.onnx.OfflineModelConfig
|
||||
import com.k2fsa.sherpa.onnx.OfflineRecognizer
|
||||
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
|
||||
import com.k2fsa.sherpa.onnx.OfflineWenetCtcModelConfig
|
||||
import com.k2fsa.sherpa.onnx.getFeatureConfig
|
||||
import java.util.ArrayDeque
|
||||
|
||||
class VoiceController(
|
||||
assetManager: AssetManager,
|
||||
private val onWakeup: () -> Unit,
|
||||
private val onFinalAudio: (FloatArray) -> Unit,
|
||||
private val onFinalAudio: (FloatArray, String) -> Unit, // 修改:传回识别结果文本
|
||||
private val idleTimeoutSeconds: Int = 10,
|
||||
private val maxRecordingSeconds: Int = 10,
|
||||
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
||||
@ -53,6 +58,57 @@ class VoiceController(
|
||||
onSpeechEnd = { avgEnergy, peakRms -> onVadEnd(avgEnergy, peakRms) }
|
||||
)
|
||||
|
||||
|
||||
// 初始化非流式 ASR
|
||||
private var offlineRecognizer: OfflineRecognizer? = null
|
||||
|
||||
init {
|
||||
offlineRecognizer = initOfflineRecognizer(assetManager)
|
||||
}
|
||||
|
||||
private fun initOfflineRecognizer(assetManager: AssetManager): OfflineRecognizer {
|
||||
// Wenetspeech 模型配置
|
||||
val wenetConfig = OfflineWenetCtcModelConfig(
|
||||
model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx"
|
||||
)
|
||||
|
||||
val modelConfig = OfflineModelConfig(
|
||||
wenetCtc = wenetConfig,
|
||||
tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt",
|
||||
numThreads = 4,
|
||||
debug = true
|
||||
)
|
||||
|
||||
val featConfig = getFeatureConfig(sampleRate = sampleRate, featureDim = 80)
|
||||
|
||||
return OfflineRecognizer(
|
||||
assetManager = assetManager,
|
||||
config = OfflineRecognizerConfig(
|
||||
featConfig = featConfig,
|
||||
modelConfig = modelConfig
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
// runSecondPass 里直接用 offlineRecognizer
|
||||
private fun runSecondPass(samples: FloatArray): String {
|
||||
return try {
|
||||
offlineRecognizer?.let {
|
||||
val stream = it.createStream()
|
||||
stream.acceptWaveform(samples, sampleRate)
|
||||
it.decode(stream)
|
||||
val result = it.getResult(stream)
|
||||
stream.release()
|
||||
result.text
|
||||
}?:""
|
||||
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Error during ASR processing: ${e.message}")
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ================= 音频入口 ================= */
|
||||
fun acceptAudio(samples: FloatArray) {
|
||||
cachePreBuffer(samples)
|
||||
@ -167,6 +223,23 @@ class VoiceController(
|
||||
return
|
||||
}
|
||||
|
||||
// 检查音频数据是否有效
|
||||
if (audio.isEmpty()) {
|
||||
Log.d(TAG, "❌ Audio is empty")
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
}
|
||||
|
||||
val asrText = runSecondPass(audio)
|
||||
if (asrText.isEmpty()) {
|
||||
resetToWaitSpeech()
|
||||
Log.d(TAG, "识别不出来asr")
|
||||
return
|
||||
} else {
|
||||
Log.d(TAG, "识别出来:${asrText}")
|
||||
}
|
||||
|
||||
|
||||
// 评分逻辑
|
||||
var score = 0
|
||||
when {
|
||||
@ -194,7 +267,7 @@ class VoiceController(
|
||||
|
||||
audioBuffer.clear()
|
||||
state = VoiceState.UPLOADING
|
||||
onFinalAudio(audio)
|
||||
onFinalAudio(audio, asrText) // 传递音频和识别文本
|
||||
}
|
||||
|
||||
/* ================= 播放回调 ================= */
|
||||
@ -242,6 +315,7 @@ class VoiceController(
|
||||
fun release() {
|
||||
wakeupManager.release()
|
||||
vadManager.reset()
|
||||
offlineRecognizer?.release()
|
||||
}
|
||||
|
||||
/* ================= Utils ================= */
|
||||
|
||||
@ -196,8 +196,8 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
}
|
||||
)
|
||||
},
|
||||
onFinalAudio = { audio ->
|
||||
Log.d("lrs", "检测到语音,长度=${audio.size}")
|
||||
onFinalAudio = { audio, asrText ->
|
||||
Log.d("lrsxx", "检测到语音,长度=${audio.size},文本:${asrText}")
|
||||
mViewModel?.uploadVoice(
|
||||
AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
||||
1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user