添加asr校验有效人声
This commit is contained in:
parent
aee1d6b797
commit
3143f4007e
@ -53,7 +53,8 @@ class VoiceController(
|
|||||||
private val idleTimeoutMs = idleTimeoutSeconds * 1000L
|
private val idleTimeoutMs = idleTimeoutSeconds * 1000L
|
||||||
private val maxRecordingMs = maxRecordingSeconds * 1000L
|
private val maxRecordingMs = maxRecordingSeconds * 1000L
|
||||||
|
|
||||||
private val vadManager = VadManager(assetManager,
|
private val vadManager = VadManager(
|
||||||
|
assetManager,
|
||||||
onSpeechStart = { onVadStart() },
|
onSpeechStart = { onVadStart() },
|
||||||
onSpeechEnd = { avgEnergy, peakRms -> onVadEnd(avgEnergy, peakRms) }
|
onSpeechEnd = { avgEnergy, peakRms -> onVadEnd(avgEnergy, peakRms) }
|
||||||
)
|
)
|
||||||
@ -100,7 +101,7 @@ class VoiceController(
|
|||||||
val result = it.getResult(stream)
|
val result = it.getResult(stream)
|
||||||
stream.release()
|
stream.release()
|
||||||
result.text
|
result.text
|
||||||
}?:""
|
} ?: ""
|
||||||
|
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
Log.e(TAG, "Error during ASR processing: ${e.message}")
|
Log.e(TAG, "Error during ASR processing: ${e.message}")
|
||||||
@ -137,7 +138,8 @@ class VoiceController(
|
|||||||
|
|
||||||
VoiceState.WAIT_SPEECH -> {
|
VoiceState.WAIT_SPEECH -> {
|
||||||
if ((waitSpeechStartMs > 0 && now - waitSpeechStartMs >= idleTimeoutMs) ||
|
if ((waitSpeechStartMs > 0 && now - waitSpeechStartMs >= idleTimeoutMs) ||
|
||||||
(waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutMs)) {
|
(waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutMs)
|
||||||
|
) {
|
||||||
Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
|
Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
|
||||||
resetAll()
|
resetAll()
|
||||||
return
|
return
|
||||||
@ -215,7 +217,10 @@ class VoiceController(
|
|||||||
val peakAvgRatio = if (avgEnergy > 0f) peakRms / avgEnergy else 0f
|
val peakAvgRatio = if (avgEnergy > 0f) peakRms / avgEnergy else 0f
|
||||||
|
|
||||||
Log.d(TAG, "📊 Finish Sentence - duration: $duration ms, vadEnded: true")
|
Log.d(TAG, "📊 Finish Sentence - duration: $duration ms, vadEnded: true")
|
||||||
Log.d(TAG, "📊 vadRatio=$vadRatio, avgEnergy=$avgEnergy, peakRms=$peakRms, peakAvgRatio=$peakAvgRatio")
|
Log.d(
|
||||||
|
TAG,
|
||||||
|
"📊 vadRatio=$vadRatio, avgEnergy=$avgEnergy, peakRms=$peakRms, peakAvgRatio=$peakAvgRatio"
|
||||||
|
)
|
||||||
|
|
||||||
if (avgEnergy < 0.02f || peakAvgRatio < 1.2f || vadRatio < 0.4f) {
|
if (avgEnergy < 0.02f || peakAvgRatio < 1.2f || vadRatio < 0.4f) {
|
||||||
Log.d(TAG, "❌ Sentence rejected")
|
Log.d(TAG, "❌ Sentence rejected")
|
||||||
@ -230,7 +235,8 @@ class VoiceController(
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
val asrText = runSecondPass(audio)
|
var asrText = runSecondPass(audio)
|
||||||
|
|
||||||
if (asrText.isEmpty()) {
|
if (asrText.isEmpty()) {
|
||||||
resetToWaitSpeech()
|
resetToWaitSpeech()
|
||||||
Log.d(TAG, "识别不出来asr")
|
Log.d(TAG, "识别不出来asr")
|
||||||
@ -256,7 +262,10 @@ class VoiceController(
|
|||||||
vadRatio >= 0.55f -> score += 2
|
vadRatio >= 0.55f -> score += 2
|
||||||
vadRatio >= 0.40f -> score += 1
|
vadRatio >= 0.40f -> score += 1
|
||||||
}
|
}
|
||||||
Log.d(TAG, "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score")
|
Log.d(
|
||||||
|
TAG,
|
||||||
|
"📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score"
|
||||||
|
)
|
||||||
|
|
||||||
val pass = score >= 5 || (score == 3 && avgEnergy >= 0.06f)
|
val pass = score >= 5 || (score == 3 && avgEnergy >= 0.06f)
|
||||||
if (!pass) {
|
if (!pass) {
|
||||||
@ -271,13 +280,19 @@ class VoiceController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 播放回调 ================= */
|
/* ================= 播放回调 ================= */
|
||||||
fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT }
|
fun onPlayStartPrompt() {
|
||||||
|
state = VoiceState.PLAYING_PROMPT
|
||||||
|
}
|
||||||
|
|
||||||
fun onPlayEndPrompt() {
|
fun onPlayEndPrompt() {
|
||||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND }
|
fun onPlayStartBackend() {
|
||||||
|
state = VoiceState.PLAYING_BACKEND
|
||||||
|
}
|
||||||
|
|
||||||
fun onPlayEndBackend() {
|
fun onPlayEndBackend() {
|
||||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user