优化后的代码

2026-01-03 14:33:43 +08:00 · 2026-01-03 14:33:43 +08:00 · 102afa291d
commit 102afa291d
parent 0261c5ccaa
2 changed files with 45 additions and 122 deletions
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
@ -3,6 +3,7 @@ package com.zs.smarthuman.sherpa
 import android.content.res.AssetManager
 import android.util.Log
 import kotlin.math.sqrt
+import java.util.ArrayDeque

 class VoiceController(
    assetManager: AssetManager,
@ -17,8 +18,6 @@ class VoiceController(
    private val TAG = "VoiceController"
    private val sampleRate = 16000

-    /* ================= 状态 ================= */
-
    private var state: VoiceState = VoiceState.WAIT_WAKEUP
        set(value) {
            field = value
@ -26,42 +25,29 @@ class VoiceController(
            onStateChanged?.invoke(value)
        }

-    /* ================= KWS ================= */
-
    private val wakeupManager = WakeupManager(assetManager) {
        Log.d(TAG, "🔥 WakeWord detected")
        handleWakeupEvent()
    }

-    /* ================= VAD ================= */
-
    private val vadManager = VadManager(
        assetManager,
        onSpeechStart = { onVadStart() },
        onSpeechEnd = {}
    )

-    /* ================= Buffer ================= */
-
    private val audioBuffer = mutableListOf<Float>()
-
-    /** 前导音缓存（2 秒） */
    private val preBuffer = ArrayDeque<Float>()
    private val PRE_BUFFER_SIZE = sampleRate * 2

-    /* ================= 时间 ================= */
-
    private var recordingStartMs = 0L
    private var silenceStartMs = 0L
    private var waitSpeechFailStartMs = 0L
-
-    /* ================= 近讲统计（⭐关键新增） ================= */
+    private var waitSpeechStartMs = 0L

    private var speechEnergySum = 0f
    private var speechFrameCount = 0

-    /* ================= 控制 ================= */
-
    private var vadStarted = false

    private var inKwsObserve = false
@ -71,28 +57,17 @@ class VoiceController(
    private var speechEnableAtMs = 0L
    private val SPEECH_COOLDOWN_MS = 300L

-    /* ================= 阈值（⭐已校正） ================= */
-
-    private val RMS_SILENCE_THRESHOLD = 0.012f     // 静音阈值（修正）
+    private val RMS_SILENCE_THRESHOLD = 0.012f
    private val SILENCE_END_MS = 1200L
-    private val MIN_SPEECH_MS = 1000L              // 句子级
-    private val MIN_AVG_ENERGY = 0.02f             // 近讲能量门
+    private val MIN_SPEECH_MS = 1000L
+    private val MIN_AVG_ENERGY = 0.02f

-
-    /** ⭐ 唤醒后等待人声起点 */
-    private var waitSpeechStartMs = 0L
-
-    /** ⭐ 唤醒后最大等待时间（没说一句话） */
    private val WAIT_SPEECH_TIMEOUT_MS = 8000L

-
-
    /* ================= 音频入口 ================= */

    fun acceptAudio(samples: FloatArray) {
-
        cachePreBuffer(samples)
-
        wakeupManager.acceptAudio(samples)
        if (wakeupManager.consumeWakeupFlag()) {
            handleWakeupEvent()
@ -102,7 +77,6 @@ class VoiceController(
        val now = System.currentTimeMillis()

        when (state) {
-
            VoiceState.WAIT_WAKEUP,
            VoiceState.PLAYING_PROMPT,
            VoiceState.PLAYING_BACKEND,
@ -110,40 +84,38 @@ class VoiceController(

            VoiceState.WAIT_SPEECH_COOLDOWN -> {
                if (now >= speechEnableAtMs) {
-                    waitSpeechFailStartMs = 0L   // ⭐ 必须清
+                    waitSpeechFailStartMs = System.currentTimeMillis()
                    state = VoiceState.WAIT_SPEECH
-                    waitSpeechStartMs = now   // ⭐ 关键：开始等人说话
+                    waitSpeechStartMs = now
                }
                return
            }

            VoiceState.WAIT_SPEECH -> {

-                // ⭐ 唤醒后长时间没人说话 → 自动退出
-                if (waitSpeechStartMs > 0 &&
-                    now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS
-                ) {
-                    Log.d(TAG, "⏱ Wakeup but no speech, exit to WAIT_WAKEUP")
+                if (waitSpeechStartMs > 0 && now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS) {
+                    Log.d(TAG, "⏱ Wakeup but no speech → WAIT_WAKEUP")
                    resetAll()
                    return
                }

-                if (inKwsObserve) {
-                    if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return
-                    inKwsObserve = false
+                if (waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutSeconds * 1000) {
+                    Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
+                    resetAll()
+                    return
                }

+                if (inKwsObserve && now - kwsObserveStartMs < KWS_OBSERVE_MS) return
+                inKwsObserve = false
+
                vadManager.accept(samples)
            }

-
            VoiceState.RECORDING -> {
-
                audioBuffer.addAll(samples.asList())
                vadManager.accept(samples)

                val rms = calcRms(samples)
-
                if (rms > RMS_SILENCE_THRESHOLD) {
                    speechEnergySum += rms
                    speechFrameCount++
@ -169,22 +141,20 @@ class VoiceController(

    private fun handleWakeupEvent() {
        when (state) {
-
            VoiceState.UPLOADING -> return
-
            VoiceState.RECORDING,
            VoiceState.PLAYING_BACKEND -> {
                stopBackendAudio?.invoke()
                enterWakeup(interrupt = true)
            }
-
            else -> enterWakeup(interrupt = false)
        }
    }

    private fun enterWakeup(interrupt: Boolean) {
-        waitSpeechFailStartMs = 0L   // ⭐ 唤醒即新会话
-        waitSpeechStartMs = 0L
+        waitSpeechFailStartMs = System.currentTimeMillis()
+        waitSpeechStartMs = System.currentTimeMillis()
+
        if (interrupt) {
            audioBuffer.clear()
            vadManager.reset()
@ -200,31 +170,23 @@ class VoiceController(
        onWakeup()
    }

-    /* ================= VAD START ================= */
-
    private fun onVadStart() {
        if (state != VoiceState.WAIT_SPEECH) return

        Log.d(TAG, "🎤 REAL VAD START")
-
        vadStarted = true
        recordingStartMs = System.currentTimeMillis()
        silenceStartMs = 0L
-        waitSpeechFailStartMs = 0L   // ⭐ 新一轮有效说话
-        waitSpeechStartMs = 0L   // ⭐ 清掉“等待说话”超时
        resetEnergyStat()

        audioBuffer.clear()
        audioBuffer.addAll(preBuffer)
-
        state = VoiceState.RECORDING
    }

-
-    /* ================= 结束录音（⭐核心） ================= */
+    /* ================= 结束录音 ================= */

    private fun finishSentence() {
-
        val now = System.currentTimeMillis()
        val duration = now - recordingStartMs

@ -235,46 +197,28 @@ class VoiceController(
        }

        val vadRatio = vadManager.activeSpeechRatio()
-        val avgEnergy =
-            if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
-
-        /* ================= 评分制判定 ================= */
+        val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f

        var score = 0
-
-        // 1️⃣ 时长评分（最重要）
        when {
            duration >= 4000 -> score += 3
            duration >= 2500 -> score += 2
            duration >= 1500 -> score += 1
        }
-
-        // 2️⃣ 能量评分（近讲人声强信号）
        when {
            avgEnergy >= 0.10f -> score += 3
            avgEnergy >= 0.06f -> score += 2
            avgEnergy >= MIN_AVG_ENERGY -> score += 1
        }
-
-        // 3️⃣ VAD 评分（只作为辅助）
        when {
            vadRatio >= 0.55f -> score += 2
            vadRatio >= 0.40f -> score += 1
        }

-        Log.d(
-            TAG,
-            "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score"
-        )
+        Log.d(TAG, "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score")

-        /**
-         * 评分阈值：
-         * - >=4 : 必然是真实人声
-         * - 3   : 在近讲/长句条件下允许
-         * - <3  : 拦截
-         */
        val pass = when {
-            score >= 4 -> true
+            score >= 6 -> true
            score == 3 && avgEnergy >= 0.06f -> true
            else -> false
        }
@ -285,33 +229,22 @@ class VoiceController(
            return
        }

-        /* ================= 通过，进入上传 ================= */
-
        waitSpeechFailStartMs = 0L
-
        val finalAudio = audioBuffer.toFloatArray()
        audioBuffer.clear()
-
        state = VoiceState.UPLOADING
        onFinalAudio(finalAudio)
    }

-
    /* ================= 播放回调 ================= */

-    fun onPlayStartPrompt() {
-        state = VoiceState.PLAYING_PROMPT
-    }
-
+    fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT }        // ⭐ 补全
    fun onPlayEndPrompt() {
        speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
        state = VoiceState.WAIT_SPEECH_COOLDOWN
    }

-    fun onPlayStartBackend() {
-        state = VoiceState.PLAYING_BACKEND
-    }
-
+    fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND }   // ⭐ 补全
    fun onPlayEndBackend() {
        speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
        state = VoiceState.WAIT_SPEECH_COOLDOWN
@ -321,10 +254,8 @@ class VoiceController(

    fun onUploadFinished(success: Boolean) {
        if (state != VoiceState.UPLOADING) return
-
-        state = if (success) {
-            VoiceState.PLAYING_BACKEND
-        } else {
+        state = if (success) VoiceState.PLAYING_BACKEND
+        else {
            speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
            VoiceState.WAIT_SPEECH_COOLDOWN
        }
@ -335,13 +266,9 @@ class VoiceController(
    fun checkIdleTimeout() {
        if (state != VoiceState.WAIT_SPEECH) return
        if (waitSpeechFailStartMs == 0L) return
-
-        if (System.currentTimeMillis() - waitSpeechFailStartMs >
-            idleTimeoutSeconds * 1000
-        ) {
-            Log.d(TAG, "⏱ WAIT_SPEECH timeout")
+        if (System.currentTimeMillis() - waitSpeechFailStartMs > idleTimeoutSeconds * 1000) {
+            Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
            resetAll()
-            waitSpeechFailStartMs = 0L
        }
    }

@ -355,9 +282,7 @@ class VoiceController(
        silenceStartMs = 0L
        state = VoiceState.WAIT_SPEECH

-        if (waitSpeechFailStartMs == 0L) {
-            waitSpeechFailStartMs = System.currentTimeMillis()
-        }
+        if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis()
    }

    private fun resetAll() {
@ -367,11 +292,11 @@ class VoiceController(
        resetEnergyStat()
        vadStarted = false
        silenceStartMs = 0L
-        waitSpeechStartMs = 0L   // ⭐
+        waitSpeechStartMs = 0L
+        waitSpeechFailStartMs = 0L
        state = VoiceState.WAIT_WAKEUP
    }

-
    fun release() {
        wakeupManager.release()
        vadManager.reset()
@ -387,9 +312,7 @@ class VoiceController(
    private fun cachePreBuffer(samples: FloatArray) {
        for (s in samples) {
            preBuffer.addLast(s)
-            if (preBuffer.size > PRE_BUFFER_SIZE) {
-                preBuffer.removeFirst()
-            }
+            if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
        }
    }

--- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
+++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
@ -161,7 +161,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
                        voiceInfo = mutableListOf<VoiceBeanResp>().apply {
                            add(
                                VoiceBeanResp(
-                                    audioUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
+                                    audioUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
                                )
                            )
                        }
@ -169,17 +169,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
            },
            onFinalAudio = { audio ->
                Log.d("lrs", "检测到语音，长度=${audio.size}")
-//                mViewModel?.uploadVoice(
-//                    AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
-//                    1
-//                )
-                loadLocalJsonAndPlay()
-                val file = File(
-                    getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
-                    "xxx.wav"
+                mViewModel?.uploadVoice(
+                    AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
+                    1
                )
-                AudioDebugUtil.saveFloatPcmAsWav(audio, file)
-                LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
+                loadLocalJsonAndPlay()
+//                val file = File(
+//                    getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
+//                    "xxx.wav"
+//                )
+//                AudioDebugUtil.saveFloatPcmAsWav(audio, file)
+//                LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")

            },
            onStateChanged = { state ->
@ -343,7 +343,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
        word: String,
        audioUrl: String
    ) {
-        val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
+        val wakeupUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return

        if (audioUrl != wakeupUrl) return