From 761e98f1b6e4bfa24927475caa401c1ed6427a72 Mon Sep 17 00:00:00 2001 From: ross <3024454314@qq.com> Date: Sun, 4 Jan 2026 15:47:31 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/zs/smarthuman/sherpa/VadManager.kt | 64 +++++++++---------- .../zs/smarthuman/sherpa/VoiceController.kt | 12 +--- 2 files changed, 34 insertions(+), 42 deletions(-) diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt index 18d22aa..4029b3c 100644 --- a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt +++ b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt @@ -8,85 +8,83 @@ import kotlin.math.sqrt class VadManager( assetManager: AssetManager, private val onSpeechStart: () -> Unit, - private val onSpeechEnd: (Float, Float) -> Unit // avgEnergy, peakRms + private val onSpeechEnd: (avgEnergy: Float, peakRms: Float) -> Unit ) { + private val vad: Vad private var isSpeaking = false private var lastSpeechTime = 0L - /** 有效语音统计 */ + private val END_SILENCE_MS = 800L + private var activeFrameCount = 0 private var activeSpeechFrameCount = 0 + private var speechEnergySum = 0f private var speechFrameCount = 0 private var peakRms = 0f - private val END_SILENCE_MS = 800L - init { - val config = getVadModelConfig(0) - ?: throw IllegalStateException("VAD config not found") + val config = getVadModelConfig(0) ?: throw IllegalStateException("VAD config not found") vad = Vad(assetManager, config) } - /** 外部调用的音频输入 */ fun accept(samples: FloatArray) { val now = System.currentTimeMillis() vad.acceptWaveform(samples) val hasSpeech = vad.isSpeechDetected() + // RMS & peak 统计 val rms = calcRms(samples) + if (hasSpeech) { + speechEnergySum += rms + speechFrameCount++ + peakRms = maxOf(peakRms, rms) + } + + // VAD逻辑 if (hasSpeech) { lastSpeechTime = now if (!isSpeaking) { isSpeaking = true - resetStats() onSpeechStart() } - - // 累计有效语音能量和峰值 - speechEnergySum += rms - speechFrameCount++ - if (rms > peakRms) peakRms = rms - activeFrameCount++ activeSpeechFrameCount++ } else { - if (isSpeaking) activeFrameCount++ - // 检查结束 - if (isSpeaking && now - lastSpeechTime >= END_SILENCE_MS) { - isSpeaking = false - val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f - onSpeechEnd(avgEnergy, peakRms) + if (isSpeaking) { + activeFrameCount++ + if (now - lastSpeechTime >= END_SILENCE_MS) { + isSpeaking = false + val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f + val peak = peakRms + onSpeechEnd(avgEnergy, peak) + } } } } - /** 统计有效语音比例,用于 VoiceController */ - fun activeSpeechRatio(): Float { - if (activeFrameCount == 0) return 0f - return activeSpeechFrameCount.toFloat() / activeFrameCount - } + fun activeSpeechRatio(): Float = if (activeFrameCount == 0) 0f else activeSpeechFrameCount.toFloat() / activeFrameCount fun reset() { isSpeaking = false lastSpeechTime = 0L activeFrameCount = 0 activeSpeechFrameCount = 0 - resetStats() - vad.reset() - } - - private fun resetStats() { speechEnergySum = 0f speechFrameCount = 0 peakRms = 0f + vad.reset() } - private fun calcRms(audio: FloatArray): Float { + private fun calcRms(samples: FloatArray): Float { var sum = 0f - for (v in audio) sum += v * v - return sqrt(sum / audio.size) + var peak = 0f + for (v in samples) { + sum += v * v + peak = maxOf(peak, kotlin.math.abs(v)) + } + return sqrt(sum / samples.size) } } diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt index c845d26..c0a2eb3 100644 --- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt +++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt @@ -107,15 +107,9 @@ class VoiceController( /* ================= 唤醒 ================= */ private fun handleWakeupEvent() { - when (state) { - VoiceState.UPLOADING -> return - VoiceState.RECORDING, - VoiceState.PLAYING_BACKEND -> { - stopBackendAudio?.invoke() - enterWakeup(interrupt = true) - } - else -> enterWakeup(interrupt = false) - } + if (state == VoiceState.UPLOADING) return + stopBackendAudio?.invoke() + enterWakeup(interrupt = true) } private fun enterWakeup(interrupt: Boolean) {