更新vad模型

2026-01-27 11:59:40 +08:00 · 2026-01-27 11:59:40 +08:00 · 13b08c8e7a
commit 13b08c8e7a
parent 7a0bd086e7
5 changed files with 36 additions and 19 deletions
--- a/app/src/main/assets/silero_vad.onnx
+++ b/app/src/main/assets/silero_vad.onnx
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
@ -18,10 +18,13 @@ class VadManager(
    private var isSpeaking = false
    private var lastSpeechMs = 0L
    private var lastActiveMs = 0L
+    private var speechStartMs = 0L // 新增：记录语音启动时间，用于启动保护

-    private val END_SILENCE_MS = 350L
-    private val RESET_IDLE_MS = 3_000L
-    private val MIN_RMS = 0.002f
+    // 核心参数优化：适配中文说话停顿
+    private val END_SILENCE_MS = 350L // 静音多久判定结束（350→600，留足停顿缓冲）
+    private val RESET_IDLE_MS = 3_000L // 超长空闲重置时间，保持不变
+    private val MIN_RMS = 0.001f // 能量检测阈值（0.002→0.001，降低误判静音）
+    private val SPEECH_START_PROTECT_MS = 90L // 语音启动保护期，避免开头停顿误判

    init {
        vad = Vad(
@ -29,7 +32,7 @@ class VadManager(
            VadModelConfig(
                sileroVadModelConfig = SileroVadModelConfig(
                    model = "silero_vad.onnx",
-                    threshold = 0.5F,
+                    threshold = 0.40F, // 模型敏感度（0.45→0.40，降低误判）
                    minSilenceDuration = 0.1F,
                    minSpeechDuration = 0.25F,
                    windowSize = 512,
@ -39,20 +42,21 @@ class VadManager(
                provider = "cpu"
            )
        )
-        LogUtils.i(TAG, "✅ VAD init")
+        LogUtils.i(TAG, "✅ VAD init（已优化抗停顿参数）")
    }

    fun accept(samples: FloatArray) {
+        if (samples.isEmpty()) return
        val now = System.currentTimeMillis()

-        // 1️⃣ 先快速 RMS 判断
+        // 1️⃣ 快速RMS能量判断，过滤纯静音
        val rms = fastRms(samples)
        if (rms < MIN_RMS) {
            handleSilence(now)
            return
        }

-        // 2️⃣ 有能量再喂 VAD
+        // 2️⃣ 有有效能量才喂给VAD模型，减少计算
        vad.acceptWaveform(samples)
        val hasSpeech = vad.isSpeechDetected()

@ -61,7 +65,9 @@ class VadManager(
            lastActiveMs = now
            if (!isSpeaking) {
                isSpeaking = true
+                speechStartMs = now
                onSpeechStart()
+                LogUtils.d(TAG, "🗣 VAD检测到语音开始")
            }
        } else {
            handleSilence(now)
@ -69,12 +75,16 @@ class VadManager(
    }

    private fun handleSilence(now: Long) {
-        if (isSpeaking && now - lastSpeechMs > END_SILENCE_MS) {
+        // 语音启动保护期内 + 静音超阈值，才判定结束
+        if (isSpeaking
+            && now - speechStartMs > SPEECH_START_PROTECT_MS
+            && now - lastSpeechMs > END_SILENCE_MS) {
            isSpeaking = false
            onSpeechEnd()
+            LogUtils.d(TAG, "🔇 VAD检测到语音结束（静音超${END_SILENCE_MS}ms）")
        }

-        // 超长 idle 才 reset
+        // 超长空闲重置VAD，避免状态残留
        if (!isSpeaking && now - lastActiveMs > RESET_IDLE_MS) {
            vad.reset()
            lastActiveMs = now
@ -82,6 +92,7 @@ class VadManager(
        }
    }

+    // 快速RMS计算，步采样减少计算量，保持原有逻辑
    private fun fastRms(samples: FloatArray): Float {
        var sum = 0f
        var count = 0
@ -93,14 +104,17 @@ class VadManager(
            count++
            i += step
        }
-        return sqrt(sum / count)
+        // 避免除0异常（极端情况count=0）
+        return if (count == 0) 0f else sqrt(sum / count)
    }

+    // 重置所有状态，包括新增的语音启动时间
    fun reset() {
        isSpeaking = false
-        lastSpeechMs = 0
-        lastActiveMs = 0
+        lastSpeechMs = 0L
+        lastActiveMs = 0L
+        speechStartMs = 0L
        vad.reset()
+        LogUtils.d(TAG, "🔄 VAD手动重置所有状态")
    }
 }
-
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
@ -36,7 +36,7 @@ class VoiceController(


        // 统一的声纹验证阈值（不再分场景）
-        private const val SPEAKER_THRESHOLD = 0.38f
+        private const val SPEAKER_THRESHOLD = 0.36f

        private const val MIN_VERIFY_MS = 600L
        private const val MAX_VERIFY_MS = 1200L
--- a/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt
@ -41,9 +41,9 @@ class WakeupManager(assetManager: AssetManager, function: () -> Unit) {
    /** ⭐ 永远喂 KWS */
    fun acceptAudio(samples: FloatArray) {
        val s = stream ?: return
-//        for (i in samples.indices) {
-//            samples[i] *= 2.5f
-//        }
+        for (i in samples.indices) {
+            samples[i] *= 2.5f
+        }
        s.acceptWaveform(samples, sampleRate)

        while (kws.isReady(s)) {
--- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
+++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
@ -14,6 +14,7 @@ import android.media.audiofx.NoiseSuppressor
 import android.os.Bundle
 import android.os.Environment
 import android.os.IBinder
+import android.os.Looper
 import android.text.TextUtils
 import android.util.Base64
 import android.util.Log
@ -206,7 +207,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
            onWakeup = {
                cancelSSE()
                voicePlayer?.onWakeupStop()
+                if (backPlaying || promptPlaying){
                    UnityPlayerHolder.getInstance().cancelPCM()
+                }
                UnityPlayerHolder.getInstance()
                    .sendVoiceToUnity(/*"https://static.seerteach.net/aidialogue/userWakeUpAudio/344.mp3"*/UserInfoManager.userInfo?.wakeUpAudioUrl?:""
                    )