优化后的代码

2026-01-03 14:33:43 +08:00 · 2026-01-03 14:33:43 +08:00 · 102afa291d
commit 102afa291d
parent 0261c5ccaa
2 changed files with 45 additions and 122 deletions
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
@ -3,6 +3,7 @@ package com.zs.smarthuman.sherpa
 import android.content.res.AssetManager
 import android.util.Log
 import kotlin.math.sqrt
 import java.util.ArrayDeque
 class VoiceController(
    assetManager: AssetManager,
@ -17,8 +18,6 @@ class VoiceController(
    private val TAG = "VoiceController"
    private val sampleRate = 16000
    /* ================= 状态 ================= */
    private var state: VoiceState = VoiceState.WAIT_WAKEUP
        set(value) {
            field = value
@ -26,42 +25,29 @@ class VoiceController(
            onStateChanged?.invoke(value)
        }
    /* ================= KWS ================= */
    private val wakeupManager = WakeupManager(assetManager) {
        Log.d(TAG, "🔥 WakeWord detected")
        handleWakeupEvent()
    }
    /* ================= VAD ================= */
    private val vadManager = VadManager(
        assetManager,
        onSpeechStart = { onVadStart() },
        onSpeechEnd = {}
    )
    /* ================= Buffer ================= */
    private val audioBuffer = mutableListOf<Float>()
    /** 前导音缓存（2 秒） */
    private val preBuffer = ArrayDeque<Float>()
    private val PRE_BUFFER_SIZE = sampleRate * 2
    /* ================= 时间 ================= */
    private var recordingStartMs = 0L
    private var silenceStartMs = 0L
    private var waitSpeechFailStartMs = 0L
-
+    private var waitSpeechStartMs = 0L
    /* ================= 近讲统计（⭐关键新增） ================= */
    private var speechEnergySum = 0f
    private var speechFrameCount = 0
    /* ================= 控制 ================= */
    private var vadStarted = false
    private var inKwsObserve = false
@ -71,28 +57,17 @@ class VoiceController(
    private var speechEnableAtMs = 0L
    private val SPEECH_COOLDOWN_MS = 300L
-    /* ================= 阈值（⭐已校正） ================= */
+    private val RMS_SILENCE_THRESHOLD = 0.012f
    private val RMS_SILENCE_THRESHOLD = 0.012f     // 静音阈值（修正）
    private val SILENCE_END_MS = 1200L
-    private val MIN_SPEECH_MS = 1000L              // 句子级
+    private val MIN_SPEECH_MS = 1000L
-    private val MIN_AVG_ENERGY = 0.02f             // 近讲能量门
+    private val MIN_AVG_ENERGY = 0.02f
    /** ⭐ 唤醒后等待人声起点 */
    private var waitSpeechStartMs = 0L
    /** ⭐ 唤醒后最大等待时间（没说一句话） */
    private val WAIT_SPEECH_TIMEOUT_MS = 8000L
    /* ================= 音频入口 ================= */
    fun acceptAudio(samples: FloatArray) {
        cachePreBuffer(samples)
        wakeupManager.acceptAudio(samples)
        if (wakeupManager.consumeWakeupFlag()) {
            handleWakeupEvent()
@ -102,7 +77,6 @@ class VoiceController(
        val now = System.currentTimeMillis()
        when (state) {
            VoiceState.WAIT_WAKEUP,
            VoiceState.PLAYING_PROMPT,
            VoiceState.PLAYING_BACKEND,
@ -110,40 +84,38 @@ class VoiceController(
            VoiceState.WAIT_SPEECH_COOLDOWN -> {
                if (now >= speechEnableAtMs) {
-                    waitSpeechFailStartMs = 0L   // ⭐ 必须清
+                    waitSpeechFailStartMs = System.currentTimeMillis()
                    state = VoiceState.WAIT_SPEECH
-                    waitSpeechStartMs = now   // ⭐ 关键：开始等人说话
+                    waitSpeechStartMs = now
                }
                return
            }
            VoiceState.WAIT_SPEECH -> {
-                // ⭐ 唤醒后长时间没人说话 → 自动退出
+                if (waitSpeechStartMs > 0 && now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS) {
-                if (waitSpeechStartMs > 0 &&
+                    Log.d(TAG, "⏱ Wakeup but no speech → WAIT_WAKEUP")
                    now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS
                ) {
                    Log.d(TAG, "⏱ Wakeup but no speech, exit to WAIT_WAKEUP")
                    resetAll()
                    return
                }
-                if (inKwsObserve) {
+                if (waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutSeconds * 1000) {
-                    if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return
+                    Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
-                    inKwsObserve = false
+                    resetAll()
                    return
                }
                if (inKwsObserve && now - kwsObserveStartMs < KWS_OBSERVE_MS) return
                inKwsObserve = false
                vadManager.accept(samples)
            }
            VoiceState.RECORDING -> {
                audioBuffer.addAll(samples.asList())
                vadManager.accept(samples)
                val rms = calcRms(samples)
                if (rms > RMS_SILENCE_THRESHOLD) {
                    speechEnergySum += rms
                    speechFrameCount++
@ -169,22 +141,20 @@ class VoiceController(
    private fun handleWakeupEvent() {
        when (state) {
            VoiceState.UPLOADING -> return
            VoiceState.RECORDING,
            VoiceState.PLAYING_BACKEND -> {
                stopBackendAudio?.invoke()
                enterWakeup(interrupt = true)
            }
            else -> enterWakeup(interrupt = false)
        }
    }
    private fun enterWakeup(interrupt: Boolean) {
-        waitSpeechFailStartMs = 0L   // ⭐ 唤醒即新会话
+        waitSpeechFailStartMs = System.currentTimeMillis()
-        waitSpeechStartMs = 0L
+        waitSpeechStartMs = System.currentTimeMillis()
        if (interrupt) {
            audioBuffer.clear()
            vadManager.reset()
@ -200,31 +170,23 @@ class VoiceController(
        onWakeup()
    }
    /* ================= VAD START ================= */
    private fun onVadStart() {
        if (state != VoiceState.WAIT_SPEECH) return
        Log.d(TAG, "🎤 REAL VAD START")
        vadStarted = true
        recordingStartMs = System.currentTimeMillis()
        silenceStartMs = 0L
        waitSpeechFailStartMs = 0L   // ⭐ 新一轮有效说话
        waitSpeechStartMs = 0L   // ⭐ 清掉“等待说话”超时
        resetEnergyStat()
        audioBuffer.clear()
        audioBuffer.addAll(preBuffer)
        state = VoiceState.RECORDING
    }
-
+    /* ================= 结束录音 ================= */
    /* ================= 结束录音（⭐核心） ================= */
    private fun finishSentence() {
        val now = System.currentTimeMillis()
        val duration = now - recordingStartMs
@ -235,46 +197,28 @@ class VoiceController(
        }
        val vadRatio = vadManager.activeSpeechRatio()
-        val avgEnergy =
+        val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
            if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
        /* ================= 评分制判定 ================= */
        var score = 0
        // 1️⃣ 时长评分（最重要）
        when {
            duration >= 4000 -> score += 3
            duration >= 2500 -> score += 2
            duration >= 1500 -> score += 1
        }
        // 2️⃣ 能量评分（近讲人声强信号）
        when {
            avgEnergy >= 0.10f -> score += 3
            avgEnergy >= 0.06f -> score += 2
            avgEnergy >= MIN_AVG_ENERGY -> score += 1
        }
        // 3️⃣ VAD 评分（只作为辅助）
        when {
            vadRatio >= 0.55f -> score += 2
            vadRatio >= 0.40f -> score += 1
        }
-        Log.d(
+        Log.d(TAG, "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score")
            TAG,
            "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score"
        )
        /**
         * 评分阈值：
         * - >=4 : 必然是真实人声
         * - 3   : 在近讲/长句条件下允许
         * - <3  : 拦截
         */
        val pass = when {
-            score >= 4 -> true
+            score >= 6 -> true
            score == 3 && avgEnergy >= 0.06f -> true
            else -> false
        }
@ -285,33 +229,22 @@ class VoiceController(
            return
        }
        /* ================= 通过，进入上传 ================= */
        waitSpeechFailStartMs = 0L
        val finalAudio = audioBuffer.toFloatArray()
        audioBuffer.clear()
        state = VoiceState.UPLOADING
        onFinalAudio(finalAudio)
    }
    /* ================= 播放回调 ================= */
-    fun onPlayStartPrompt() {
+    fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT }        // ⭐ 补全
        state = VoiceState.PLAYING_PROMPT
    }
    fun onPlayEndPrompt() {
        speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
        state = VoiceState.WAIT_SPEECH_COOLDOWN
    }
-    fun onPlayStartBackend() {
+    fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND }   // ⭐ 补全
        state = VoiceState.PLAYING_BACKEND
    }
    fun onPlayEndBackend() {
        speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
        state = VoiceState.WAIT_SPEECH_COOLDOWN
@ -321,10 +254,8 @@ class VoiceController(
    fun onUploadFinished(success: Boolean) {
        if (state != VoiceState.UPLOADING) return
-
+        state = if (success) VoiceState.PLAYING_BACKEND
-        state = if (success) {
+        else {
            VoiceState.PLAYING_BACKEND
        } else {
            speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
            VoiceState.WAIT_SPEECH_COOLDOWN
        }
@ -335,13 +266,9 @@ class VoiceController(
    fun checkIdleTimeout() {
        if (state != VoiceState.WAIT_SPEECH) return
        if (waitSpeechFailStartMs == 0L) return
-
+        if (System.currentTimeMillis() - waitSpeechFailStartMs > idleTimeoutSeconds * 1000) {
-        if (System.currentTimeMillis() - waitSpeechFailStartMs >
+            Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
            idleTimeoutSeconds * 1000
        ) {
            Log.d(TAG, "⏱ WAIT_SPEECH timeout")
            resetAll()
            waitSpeechFailStartMs = 0L
        }
    }
@ -355,9 +282,7 @@ class VoiceController(
        silenceStartMs = 0L
        state = VoiceState.WAIT_SPEECH
-        if (waitSpeechFailStartMs == 0L) {
+        if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis()
            waitSpeechFailStartMs = System.currentTimeMillis()
        }
    }
    private fun resetAll() {
@ -367,11 +292,11 @@ class VoiceController(
        resetEnergyStat()
        vadStarted = false
        silenceStartMs = 0L
-        waitSpeechStartMs = 0L   // ⭐
+        waitSpeechStartMs = 0L
        waitSpeechFailStartMs = 0L
        state = VoiceState.WAIT_WAKEUP
    }
    fun release() {
        wakeupManager.release()
        vadManager.reset()
@ -387,9 +312,7 @@ class VoiceController(
    private fun cachePreBuffer(samples: FloatArray) {
        for (s in samples) {
            preBuffer.addLast(s)
-            if (preBuffer.size > PRE_BUFFER_SIZE) {
+            if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
                preBuffer.removeFirst()
            }
        }
    }
--- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
+++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
@ -161,7 +161,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
                        voiceInfo = mutableListOf<VoiceBeanResp>().apply {
                            add(
                                VoiceBeanResp(
-                                    audioUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
+                                    audioUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
                                )
                            )
                        }
@ -169,17 +169,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
            },
            onFinalAudio = { audio ->
                Log.d("lrs", "检测到语音，长度=${audio.size}")
-//                mViewModel?.uploadVoice(
+                mViewModel?.uploadVoice(
-//                    AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
+                    AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
-//                    1
+                    1
 //                )
                loadLocalJsonAndPlay()
                val file = File(
                    getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
                    "xxx.wav"
                )
-                AudioDebugUtil.saveFloatPcmAsWav(audio, file)
+                loadLocalJsonAndPlay()
-                LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
+//                val file = File(
 //                    getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
 //                    "xxx.wav"
 //                )
 //                AudioDebugUtil.saveFloatPcmAsWav(audio, file)
 //                LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
            },
            onStateChanged = { state ->
@ -343,7 +343,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
        word: String,
        audioUrl: String
    ) {
-        val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
+        val wakeupUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
        if (audioUrl != wakeupUrl) return