优化后的代码

This commit is contained in:
林若思 2026-01-03 14:33:43 +08:00
parent 0261c5ccaa
commit 102afa291d
2 changed files with 45 additions and 122 deletions

View File

@ -3,6 +3,7 @@ package com.zs.smarthuman.sherpa
import android.content.res.AssetManager import android.content.res.AssetManager
import android.util.Log import android.util.Log
import kotlin.math.sqrt import kotlin.math.sqrt
import java.util.ArrayDeque
class VoiceController( class VoiceController(
assetManager: AssetManager, assetManager: AssetManager,
@ -17,8 +18,6 @@ class VoiceController(
private val TAG = "VoiceController" private val TAG = "VoiceController"
private val sampleRate = 16000 private val sampleRate = 16000
/* ================= 状态 ================= */
private var state: VoiceState = VoiceState.WAIT_WAKEUP private var state: VoiceState = VoiceState.WAIT_WAKEUP
set(value) { set(value) {
field = value field = value
@ -26,42 +25,29 @@ class VoiceController(
onStateChanged?.invoke(value) onStateChanged?.invoke(value)
} }
/* ================= KWS ================= */
private val wakeupManager = WakeupManager(assetManager) { private val wakeupManager = WakeupManager(assetManager) {
Log.d(TAG, "🔥 WakeWord detected") Log.d(TAG, "🔥 WakeWord detected")
handleWakeupEvent() handleWakeupEvent()
} }
/* ================= VAD ================= */
private val vadManager = VadManager( private val vadManager = VadManager(
assetManager, assetManager,
onSpeechStart = { onVadStart() }, onSpeechStart = { onVadStart() },
onSpeechEnd = {} onSpeechEnd = {}
) )
/* ================= Buffer ================= */
private val audioBuffer = mutableListOf<Float>() private val audioBuffer = mutableListOf<Float>()
/** 前导音缓存2 秒) */
private val preBuffer = ArrayDeque<Float>() private val preBuffer = ArrayDeque<Float>()
private val PRE_BUFFER_SIZE = sampleRate * 2 private val PRE_BUFFER_SIZE = sampleRate * 2
/* ================= 时间 ================= */
private var recordingStartMs = 0L private var recordingStartMs = 0L
private var silenceStartMs = 0L private var silenceStartMs = 0L
private var waitSpeechFailStartMs = 0L private var waitSpeechFailStartMs = 0L
private var waitSpeechStartMs = 0L
/* ================= 近讲统计(⭐关键新增) ================= */
private var speechEnergySum = 0f private var speechEnergySum = 0f
private var speechFrameCount = 0 private var speechFrameCount = 0
/* ================= 控制 ================= */
private var vadStarted = false private var vadStarted = false
private var inKwsObserve = false private var inKwsObserve = false
@ -71,28 +57,17 @@ class VoiceController(
private var speechEnableAtMs = 0L private var speechEnableAtMs = 0L
private val SPEECH_COOLDOWN_MS = 300L private val SPEECH_COOLDOWN_MS = 300L
/* ================= 阈值(⭐已校正) ================= */ private val RMS_SILENCE_THRESHOLD = 0.012f
private val RMS_SILENCE_THRESHOLD = 0.012f // 静音阈值(修正)
private val SILENCE_END_MS = 1200L private val SILENCE_END_MS = 1200L
private val MIN_SPEECH_MS = 1000L // 句子级 private val MIN_SPEECH_MS = 1000L
private val MIN_AVG_ENERGY = 0.02f // 近讲能量门 private val MIN_AVG_ENERGY = 0.02f
/** ⭐ 唤醒后等待人声起点 */
private var waitSpeechStartMs = 0L
/** ⭐ 唤醒后最大等待时间(没说一句话) */
private val WAIT_SPEECH_TIMEOUT_MS = 8000L private val WAIT_SPEECH_TIMEOUT_MS = 8000L
/* ================= 音频入口 ================= */ /* ================= 音频入口 ================= */
fun acceptAudio(samples: FloatArray) { fun acceptAudio(samples: FloatArray) {
cachePreBuffer(samples) cachePreBuffer(samples)
wakeupManager.acceptAudio(samples) wakeupManager.acceptAudio(samples)
if (wakeupManager.consumeWakeupFlag()) { if (wakeupManager.consumeWakeupFlag()) {
handleWakeupEvent() handleWakeupEvent()
@ -102,7 +77,6 @@ class VoiceController(
val now = System.currentTimeMillis() val now = System.currentTimeMillis()
when (state) { when (state) {
VoiceState.WAIT_WAKEUP, VoiceState.WAIT_WAKEUP,
VoiceState.PLAYING_PROMPT, VoiceState.PLAYING_PROMPT,
VoiceState.PLAYING_BACKEND, VoiceState.PLAYING_BACKEND,
@ -110,40 +84,38 @@ class VoiceController(
VoiceState.WAIT_SPEECH_COOLDOWN -> { VoiceState.WAIT_SPEECH_COOLDOWN -> {
if (now >= speechEnableAtMs) { if (now >= speechEnableAtMs) {
waitSpeechFailStartMs = 0L // ⭐ 必须清 waitSpeechFailStartMs = System.currentTimeMillis()
state = VoiceState.WAIT_SPEECH state = VoiceState.WAIT_SPEECH
waitSpeechStartMs = now // ⭐ 关键:开始等人说话 waitSpeechStartMs = now
} }
return return
} }
VoiceState.WAIT_SPEECH -> { VoiceState.WAIT_SPEECH -> {
// ⭐ 唤醒后长时间没人说话 → 自动退出 if (waitSpeechStartMs > 0 && now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS) {
if (waitSpeechStartMs > 0 && Log.d(TAG, "⏱ Wakeup but no speech → WAIT_WAKEUP")
now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS
) {
Log.d(TAG, "⏱ Wakeup but no speech, exit to WAIT_WAKEUP")
resetAll() resetAll()
return return
} }
if (inKwsObserve) { if (waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutSeconds * 1000) {
if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
inKwsObserve = false resetAll()
return
} }
if (inKwsObserve && now - kwsObserveStartMs < KWS_OBSERVE_MS) return
inKwsObserve = false
vadManager.accept(samples) vadManager.accept(samples)
} }
VoiceState.RECORDING -> { VoiceState.RECORDING -> {
audioBuffer.addAll(samples.asList()) audioBuffer.addAll(samples.asList())
vadManager.accept(samples) vadManager.accept(samples)
val rms = calcRms(samples) val rms = calcRms(samples)
if (rms > RMS_SILENCE_THRESHOLD) { if (rms > RMS_SILENCE_THRESHOLD) {
speechEnergySum += rms speechEnergySum += rms
speechFrameCount++ speechFrameCount++
@ -169,22 +141,20 @@ class VoiceController(
private fun handleWakeupEvent() { private fun handleWakeupEvent() {
when (state) { when (state) {
VoiceState.UPLOADING -> return VoiceState.UPLOADING -> return
VoiceState.RECORDING, VoiceState.RECORDING,
VoiceState.PLAYING_BACKEND -> { VoiceState.PLAYING_BACKEND -> {
stopBackendAudio?.invoke() stopBackendAudio?.invoke()
enterWakeup(interrupt = true) enterWakeup(interrupt = true)
} }
else -> enterWakeup(interrupt = false) else -> enterWakeup(interrupt = false)
} }
} }
private fun enterWakeup(interrupt: Boolean) { private fun enterWakeup(interrupt: Boolean) {
waitSpeechFailStartMs = 0L // ⭐ 唤醒即新会话 waitSpeechFailStartMs = System.currentTimeMillis()
waitSpeechStartMs = 0L waitSpeechStartMs = System.currentTimeMillis()
if (interrupt) { if (interrupt) {
audioBuffer.clear() audioBuffer.clear()
vadManager.reset() vadManager.reset()
@ -200,31 +170,23 @@ class VoiceController(
onWakeup() onWakeup()
} }
/* ================= VAD START ================= */
private fun onVadStart() { private fun onVadStart() {
if (state != VoiceState.WAIT_SPEECH) return if (state != VoiceState.WAIT_SPEECH) return
Log.d(TAG, "🎤 REAL VAD START") Log.d(TAG, "🎤 REAL VAD START")
vadStarted = true vadStarted = true
recordingStartMs = System.currentTimeMillis() recordingStartMs = System.currentTimeMillis()
silenceStartMs = 0L silenceStartMs = 0L
waitSpeechFailStartMs = 0L // ⭐ 新一轮有效说话
waitSpeechStartMs = 0L // ⭐ 清掉“等待说话”超时
resetEnergyStat() resetEnergyStat()
audioBuffer.clear() audioBuffer.clear()
audioBuffer.addAll(preBuffer) audioBuffer.addAll(preBuffer)
state = VoiceState.RECORDING state = VoiceState.RECORDING
} }
/* ================= 结束录音 ================= */
/* ================= 结束录音(⭐核心) ================= */
private fun finishSentence() { private fun finishSentence() {
val now = System.currentTimeMillis() val now = System.currentTimeMillis()
val duration = now - recordingStartMs val duration = now - recordingStartMs
@ -235,46 +197,28 @@ class VoiceController(
} }
val vadRatio = vadManager.activeSpeechRatio() val vadRatio = vadManager.activeSpeechRatio()
val avgEnergy = val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
/* ================= 评分制判定 ================= */
var score = 0 var score = 0
// 1⃣ 时长评分(最重要)
when { when {
duration >= 4000 -> score += 3 duration >= 4000 -> score += 3
duration >= 2500 -> score += 2 duration >= 2500 -> score += 2
duration >= 1500 -> score += 1 duration >= 1500 -> score += 1
} }
// 2⃣ 能量评分(近讲人声强信号)
when { when {
avgEnergy >= 0.10f -> score += 3 avgEnergy >= 0.10f -> score += 3
avgEnergy >= 0.06f -> score += 2 avgEnergy >= 0.06f -> score += 2
avgEnergy >= MIN_AVG_ENERGY -> score += 1 avgEnergy >= MIN_AVG_ENERGY -> score += 1
} }
// 3⃣ VAD 评分(只作为辅助)
when { when {
vadRatio >= 0.55f -> score += 2 vadRatio >= 0.55f -> score += 2
vadRatio >= 0.40f -> score += 1 vadRatio >= 0.40f -> score += 1
} }
Log.d( Log.d(TAG, "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score")
TAG,
"📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score"
)
/**
* 评分阈值
* - >=4 : 必然是真实人声
* - 3 : 在近讲/长句条件下允许
* - <3 : 拦截
*/
val pass = when { val pass = when {
score >= 4 -> true score >= 6 -> true
score == 3 && avgEnergy >= 0.06f -> true score == 3 && avgEnergy >= 0.06f -> true
else -> false else -> false
} }
@ -285,33 +229,22 @@ class VoiceController(
return return
} }
/* ================= 通过,进入上传 ================= */
waitSpeechFailStartMs = 0L waitSpeechFailStartMs = 0L
val finalAudio = audioBuffer.toFloatArray() val finalAudio = audioBuffer.toFloatArray()
audioBuffer.clear() audioBuffer.clear()
state = VoiceState.UPLOADING state = VoiceState.UPLOADING
onFinalAudio(finalAudio) onFinalAudio(finalAudio)
} }
/* ================= 播放回调 ================= */ /* ================= 播放回调 ================= */
fun onPlayStartPrompt() { fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT } // ⭐ 补全
state = VoiceState.PLAYING_PROMPT
}
fun onPlayEndPrompt() { fun onPlayEndPrompt() {
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
state = VoiceState.WAIT_SPEECH_COOLDOWN state = VoiceState.WAIT_SPEECH_COOLDOWN
} }
fun onPlayStartBackend() { fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND } // ⭐ 补全
state = VoiceState.PLAYING_BACKEND
}
fun onPlayEndBackend() { fun onPlayEndBackend() {
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
state = VoiceState.WAIT_SPEECH_COOLDOWN state = VoiceState.WAIT_SPEECH_COOLDOWN
@ -321,10 +254,8 @@ class VoiceController(
fun onUploadFinished(success: Boolean) { fun onUploadFinished(success: Boolean) {
if (state != VoiceState.UPLOADING) return if (state != VoiceState.UPLOADING) return
state = if (success) VoiceState.PLAYING_BACKEND
state = if (success) { else {
VoiceState.PLAYING_BACKEND
} else {
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
VoiceState.WAIT_SPEECH_COOLDOWN VoiceState.WAIT_SPEECH_COOLDOWN
} }
@ -335,13 +266,9 @@ class VoiceController(
fun checkIdleTimeout() { fun checkIdleTimeout() {
if (state != VoiceState.WAIT_SPEECH) return if (state != VoiceState.WAIT_SPEECH) return
if (waitSpeechFailStartMs == 0L) return if (waitSpeechFailStartMs == 0L) return
if (System.currentTimeMillis() - waitSpeechFailStartMs > idleTimeoutSeconds * 1000) {
if (System.currentTimeMillis() - waitSpeechFailStartMs > Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
idleTimeoutSeconds * 1000
) {
Log.d(TAG, "⏱ WAIT_SPEECH timeout")
resetAll() resetAll()
waitSpeechFailStartMs = 0L
} }
} }
@ -355,9 +282,7 @@ class VoiceController(
silenceStartMs = 0L silenceStartMs = 0L
state = VoiceState.WAIT_SPEECH state = VoiceState.WAIT_SPEECH
if (waitSpeechFailStartMs == 0L) { if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis()
waitSpeechFailStartMs = System.currentTimeMillis()
}
} }
private fun resetAll() { private fun resetAll() {
@ -367,11 +292,11 @@ class VoiceController(
resetEnergyStat() resetEnergyStat()
vadStarted = false vadStarted = false
silenceStartMs = 0L silenceStartMs = 0L
waitSpeechStartMs = 0L // ⭐ waitSpeechStartMs = 0L
waitSpeechFailStartMs = 0L
state = VoiceState.WAIT_WAKEUP state = VoiceState.WAIT_WAKEUP
} }
fun release() { fun release() {
wakeupManager.release() wakeupManager.release()
vadManager.reset() vadManager.reset()
@ -387,9 +312,7 @@ class VoiceController(
private fun cachePreBuffer(samples: FloatArray) { private fun cachePreBuffer(samples: FloatArray) {
for (s in samples) { for (s in samples) {
preBuffer.addLast(s) preBuffer.addLast(s)
if (preBuffer.size > PRE_BUFFER_SIZE) { if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
preBuffer.removeFirst()
}
} }
} }

View File

@ -161,7 +161,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
voiceInfo = mutableListOf<VoiceBeanResp>().apply { voiceInfo = mutableListOf<VoiceBeanResp>().apply {
add( add(
VoiceBeanResp( VoiceBeanResp(
audioUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" audioUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
) )
) )
} }
@ -169,17 +169,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
}, },
onFinalAudio = { audio -> onFinalAudio = { audio ->
Log.d("lrs", "检测到语音,长度=${audio.size}") Log.d("lrs", "检测到语音,长度=${audio.size}")
// mViewModel?.uploadVoice( mViewModel?.uploadVoice(
// AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)), AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
// 1 1
// )
loadLocalJsonAndPlay()
val file = File(
getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
"xxx.wav"
) )
AudioDebugUtil.saveFloatPcmAsWav(audio, file) loadLocalJsonAndPlay()
LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}") // val file = File(
// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
// "xxx.wav"
// )
// AudioDebugUtil.saveFloatPcmAsWav(audio, file)
// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
}, },
onStateChanged = { state -> onStateChanged = { state ->
@ -343,7 +343,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
word: String, word: String,
audioUrl: String audioUrl: String
) { ) {
val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return val wakeupUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
if (audioUrl != wakeupUrl) return if (audioUrl != wakeupUrl) return