From 956dd78c1bff909ff5fbbe5fab847f0aa4701fd5 Mon Sep 17 00:00:00 2001
From: ross <3024454314@qq.com>
Date: Sat, 3 Jan 2026 10:11:28 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=90=8E=E7=9A=84=E4=BB=A3?=
 =?UTF-8?q?=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/src/main/AndroidManifest.xml              |   6 +-
 .../com/zs/smarthuman/sherpa/VadManager.kt    |  63 ++++---
 .../zs/smarthuman/sherpa/VoiceController.kt   | 163 ++++++++++++++----
 .../java/com/zs/smarthuman/ui/MainActivity.kt |  19 +-
 .../zs/smarthuman/utils/SerialNumberUtil.kt   |  16 +-
 5 files changed, 197 insertions(+), 70 deletions(-)
diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml
index d698cc6..d64e047 100644
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -52,7 +52,7 @@
         tools:targetApi="31">
 
         <activity
-            android:name=".ui.MainActivity"
+            android:name=".ui.SplashActivity"
             android:exported="true"
             android:theme="@style/Theme.Splash"
             android:screenOrientation="portrait">
@@ -66,9 +66,9 @@
             </intent-filter>
         </activity>
 
-        <!--<activity
+        <activity
             android:name="com.zs.smarthuman.ui.MainActivity"
-            android:screenOrientation="portrait"/>-->
+            android:screenOrientation="portrait"/>
         <activity
             android:name="com.zs.smarthuman.ui.ActivateActivity"
             android:screenOrientation="portrait"/>
diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
index 7848ca6..ae254ed 100644
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
@@ -4,11 +4,6 @@ import android.content.res.AssetManager
 import com.k2fsa.sherpa.onnx.Vad
 import com.k2fsa.sherpa.onnx.getVadModelConfig
 
-/**
- * @description:
- * @author: lrs
- * @date: 2025/12/17 10:22
- */
 class VadManager(
     assetManager: AssetManager,
     private val onSpeechStart: () -> Unit,
@@ -19,9 +14,13 @@ class VadManager(
     private var isSpeaking = false
     private var lastSpeechTime = 0L
 
-    // ⭐ 统计用
-    private var speechFrameCount = 0
-    private var totalFrameCount = 0
+    /** ⭐ 仅统计“有效语音段” */
+    private var activeFrameCount = 0
+    private var activeSpeechFrameCount = 0
+
+    /** ⭐ 用于调试（可选） */
+    private var rawFrameCount = 0
+    private var rawSpeechFrameCount = 0
 
     private val END_SILENCE_MS = 600L
 
@@ -37,36 +36,58 @@ class VadManager(
         vad.acceptWaveform(samples)
         val hasSpeech = vad.isSpeechDetected()
 
-        totalFrameCount++
+        /* ===== raw 统计（仅日志） ===== */
+        rawFrameCount++
+        if (hasSpeech) rawSpeechFrameCount++
 
         if (hasSpeech) {
-            speechFrameCount++
             lastSpeechTime = now
 
             if (!isSpeaking) {
                 isSpeaking = true
                 onSpeechStart()
             }
+
+            activeFrameCount++
+            activeSpeechFrameCount++
         } else {
-            if (isSpeaking && now - lastSpeechTime >= END_SILENCE_MS) {
-                isSpeaking = false
-                onSpeechEnd()
-                vad.clear()
+            if (isSpeaking) {
+                activeFrameCount++
+
+                if (now - lastSpeechTime >= END_SILENCE_MS) {
+                    isSpeaking = false
+                    onSpeechEnd()
+                }
             }
         }
     }
 
-    /** 👉 人声占比（真正用到 VAD 的地方） */
-    fun speechRatio(): Float {
-        if (totalFrameCount == 0) return 0f
-        return speechFrameCount.toFloat() / totalFrameCount
+    /**
+     * ✅ 真正用于判断「是不是有效人声」
+     * 只统计 VAD 激活期间
+     */
+    fun activeSpeechRatio(): Float {
+        if (activeFrameCount == 0) return 0f
+        return activeSpeechFrameCount.toFloat() / activeFrameCount
+    }
+
+    /**
+     * ⚠️ 仅用于调参观察
+     */
+    fun rawSpeechRatio(): Float {
+        if (rawFrameCount == 0) return 0f
+        return rawSpeechFrameCount.toFloat() / rawFrameCount
     }
 
     fun reset() {
         isSpeaking = false
-        lastSpeechTime = 0
-        speechFrameCount = 0
-        totalFrameCount = 0
+        lastSpeechTime = 0L
+
+        activeFrameCount = 0
+        activeSpeechFrameCount = 0
+        rawFrameCount = 0
+        rawSpeechFrameCount = 0
+
         vad.reset()
     }
 }
diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
index a88e03f..77524ec 100644
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
@@ -8,7 +8,7 @@ class VoiceController(
     assetManager: AssetManager,
     private val onWakeup: () -> Unit,
     private val onFinalAudio: (FloatArray) -> Unit,
-    private val idleTimeoutSeconds: Int = 5,
+    private val idleTimeoutSeconds: Int = 8,
     private val maxRecordingSeconds: Int = 10,
     private val onStateChanged: ((VoiceState) -> Unit)? = null,
     private val stopBackendAudio: (() -> Unit)? = null
@@ -53,28 +53,39 @@ class VoiceController(
 
     private var recordingStartMs = 0L
     private var silenceStartMs = 0L
-
-    /** ⭐ WAIT_SPEECH 连续失败起点（关键） */
     private var waitSpeechFailStartMs = 0L
 
+    /* ================= 近讲统计（⭐关键新增） ================= */
+
+    private var speechEnergySum = 0f
+    private var speechFrameCount = 0
+
     /* ================= 控制 ================= */
 
     private var vadStarted = false
 
-    /** 唤醒观察期 */
     private var inKwsObserve = false
     private var kwsObserveStartMs = 0L
     private val KWS_OBSERVE_MS = 500L
 
-    /** 播放冷却 */
     private var speechEnableAtMs = 0L
     private val SPEECH_COOLDOWN_MS = 300L
 
-    /* ================= 阈值 ================= */
+    /* ================= 阈值（⭐已校正） ================= */
 
-    private val RMS_SILENCE_THRESHOLD = 0.005f
+    private val RMS_SILENCE_THRESHOLD = 0.012f     // 静音阈值（修正）
     private val SILENCE_END_MS = 1200L
-    private val MIN_SPEECH_MS = 300L
+    private val MIN_SPEECH_MS = 1000L              // 句子级
+    private val MIN_AVG_ENERGY = 0.02f             // 近讲能量门
+
+
+    /** ⭐ 唤醒后等待人声起点 */
+    private var waitSpeechStartMs = 0L
+
+    /** ⭐ 唤醒后最大等待时间（没说一句话） */
+    private val WAIT_SPEECH_TIMEOUT_MS = 8000L
+
+
 
     /* ================= 音频入口 ================= */
 
@@ -100,12 +111,22 @@ class VoiceController(
             VoiceState.WAIT_SPEECH_COOLDOWN -> {
                 if (now >= speechEnableAtMs) {
                     state = VoiceState.WAIT_SPEECH
+                    waitSpeechStartMs = now   // ⭐ 关键：开始等人说话
                 }
                 return
             }
 
             VoiceState.WAIT_SPEECH -> {
 
+                // ⭐ 唤醒后长时间没人说话 → 自动退出
+                if (waitSpeechStartMs > 0 &&
+                    now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS
+                ) {
+                    Log.d(TAG, "⏱ Wakeup but no speech, exit to WAIT_WAKEUP")
+                    resetAll()
+                    return
+                }
+
                 if (inKwsObserve) {
                     if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return
                     inKwsObserve = false
@@ -114,26 +135,30 @@ class VoiceController(
                 vadManager.accept(samples)
             }
 
+
             VoiceState.RECORDING -> {
 
                 audioBuffer.addAll(samples.asList())
                 vadManager.accept(samples)
 
+                val rms = calcRms(samples)
+
+                if (rms > RMS_SILENCE_THRESHOLD) {
+                    speechEnergySum += rms
+                    speechFrameCount++
+                    silenceStartMs = 0L
+                } else {
+                    if (silenceStartMs == 0L) silenceStartMs = now
+                    else if (now - silenceStartMs >= SILENCE_END_MS) {
+                        Log.d(TAG, "🔇 Silence end")
+                        finishSentence()
+                        return
+                    }
+                }
+
                 if (now - recordingStartMs > maxRecordingSeconds * 1000) {
                     Log.w(TAG, "⏱ Max recording reached")
                     finishSentence()
-                    return
-                }
-
-                val rms = calcRms(samples)
-                if (rms < RMS_SILENCE_THRESHOLD) {
-                    if (silenceStartMs == 0L) silenceStartMs = now
-                    else if (now - silenceStartMs >= SILENCE_END_MS) {
-                        Log.d(TAG, "🔇 RMS silence end")
-                        finishSentence()
-                    }
-                } else {
-                    silenceStartMs = 0L
                 }
             }
         }
@@ -146,8 +171,12 @@ class VoiceController(
 
             VoiceState.UPLOADING -> return
 
+            // ⭐ 关键：只要不是纯等待唤醒，一律打断
             VoiceState.RECORDING,
+            VoiceState.WAIT_SPEECH,
+            VoiceState.WAIT_SPEECH_COOLDOWN,
             VoiceState.PLAYING_BACKEND -> {
+                Log.d(TAG, "⚠ WakeWord interrupt state=$state")
                 stopBackendAudio?.invoke()
                 enterWakeup(interrupt = true)
             }
@@ -156,13 +185,21 @@ class VoiceController(
         }
     }
 
+
     private fun enterWakeup(interrupt: Boolean) {
 
         if (interrupt) {
+            Log.d(TAG, "🛑 Interrupt current speech / recording")
+
             audioBuffer.clear()
+            preBuffer.clear()              // ⭐ 防止把旧唤醒词带进去
             vadManager.reset()
+            resetEnergyStat()
+
             vadStarted = false
             silenceStartMs = 0L
+            waitSpeechStartMs = 0L         // ⭐
+            waitSpeechFailStartMs = 0L     // ⭐
         }
 
         inKwsObserve = true
@@ -172,6 +209,7 @@ class VoiceController(
         onWakeup()
     }
 
+
     /* ================= VAD START ================= */
 
     private fun onVadStart() {
@@ -182,6 +220,8 @@ class VoiceController(
         vadStarted = true
         recordingStartMs = System.currentTimeMillis()
         silenceStartMs = 0L
+        waitSpeechStartMs = 0L   // ⭐ 清掉“等待说话”超时
+        resetEnergyStat()
 
         audioBuffer.clear()
         audioBuffer.addAll(preBuffer)
@@ -189,26 +229,73 @@ class VoiceController(
         state = VoiceState.RECORDING
     }
 
-    /* ================= 结束录音 ================= */
+
+    /* ================= 结束录音（⭐核心） ================= */
 
     private fun finishSentence() {
 
-        val duration = System.currentTimeMillis() - recordingStartMs
+        val now = System.currentTimeMillis()
+        val duration = now - recordingStartMs
+
         if (!vadStarted || duration < MIN_SPEECH_MS) {
+            Log.d(TAG, "❌ Too short or no VAD start: ${duration}ms")
             resetToWaitSpeech()
             return
         }
 
-        val vadRatio = vadManager.speechRatio()
-        Log.d(TAG, "🎙 VAD speech ratio=$vadRatio")
+        val vadRatio = vadManager.activeSpeechRatio()
+        val avgEnergy =
+            if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
 
-        if (vadRatio < 0.25f) {
-            Log.d(TAG, "❌ VAD says NOT human speech")
+        /* ================= 评分制判定 ================= */
+
+        var score = 0
+
+        // 1️⃣ 时长评分（最重要）
+        when {
+            duration >= 4000 -> score += 3
+            duration >= 2500 -> score += 2
+            duration >= 1500 -> score += 1
+        }
+
+        // 2️⃣ 能量评分（近讲人声强信号）
+        when {
+            avgEnergy >= 0.10f -> score += 3
+            avgEnergy >= 0.06f -> score += 2
+            avgEnergy >= MIN_AVG_ENERGY -> score += 1
+        }
+
+        // 3️⃣ VAD 评分（只作为辅助）
+        when {
+            vadRatio >= 0.55f -> score += 2
+            vadRatio >= 0.40f -> score += 1
+        }
+
+        Log.d(
+            TAG,
+            "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score"
+        )
+
+        /**
+         * 评分阈值：
+         * - >=4 : 必然是真实人声
+         * - 3   : 在近讲/长句条件下允许
+         * - <3  : 拦截
+         */
+        val pass = when {
+            score >= 4 -> true
+            score == 3 && avgEnergy >= 0.06f -> true
+            else -> false
+        }
+
+        if (!pass) {
+            Log.d(TAG, "❌ Sentence rejected (score=$score)")
             resetToWaitSpeech()
             return
         }
 
-        // ✅ 成功一次，清空失败计时
+        /* ================= 通过，进入上传 ================= */
+
         waitSpeechFailStartMs = 0L
 
         val finalAudio = audioBuffer.toFloatArray()
@@ -218,6 +305,7 @@ class VoiceController(
         onFinalAudio(finalAudio)
     }
 
+
     /* ================= 播放回调 ================= */
 
     fun onPlayStartPrompt() {
@@ -238,7 +326,7 @@ class VoiceController(
         state = VoiceState.WAIT_SPEECH_COOLDOWN
     }
 
-    /* ================= 上传回调（保留 public） ================= */
+    /* ================= 上传回调 ================= */
 
     fun onUploadFinished(success: Boolean) {
         if (state != VoiceState.UPLOADING) return
@@ -251,15 +339,16 @@ class VoiceController(
         }
     }
 
-    /* ================= Idle 超时（关键修复） ================= */
+    /* ================= Idle 超时 ================= */
 
     fun checkIdleTimeout() {
         if (state != VoiceState.WAIT_SPEECH) return
         if (waitSpeechFailStartMs == 0L) return
 
-        val now = System.currentTimeMillis()
-        if (now - waitSpeechFailStartMs > idleTimeoutSeconds * 1000) {
-            Log.d(TAG, "⏱ WAIT_SPEECH continuous fail timeout")
+        if (System.currentTimeMillis() - waitSpeechFailStartMs >
+            idleTimeoutSeconds * 1000
+        ) {
+            Log.d(TAG, "⏱ WAIT_SPEECH timeout")
             resetAll()
             waitSpeechFailStartMs = 0L
         }
@@ -270,11 +359,11 @@ class VoiceController(
     private fun resetToWaitSpeech() {
         audioBuffer.clear()
         vadManager.reset()
+        resetEnergyStat()
         vadStarted = false
         silenceStartMs = 0L
         state = VoiceState.WAIT_SPEECH
 
-        // ⭐ 只在第一次失败时记录
         if (waitSpeechFailStartMs == 0L) {
             waitSpeechFailStartMs = System.currentTimeMillis()
         }
@@ -284,11 +373,14 @@ class VoiceController(
         audioBuffer.clear()
         preBuffer.clear()
         vadManager.reset()
+        resetEnergyStat()
         vadStarted = false
         silenceStartMs = 0L
+        waitSpeechStartMs = 0L   // ⭐
         state = VoiceState.WAIT_WAKEUP
     }
 
+
     fun release() {
         wakeupManager.release()
         vadManager.reset()
@@ -296,6 +388,11 @@ class VoiceController(
 
     /* ================= Utils ================= */
 
+    private fun resetEnergyStat() {
+        speechEnergySum = 0f
+        speechFrameCount = 0
+    }
+
     private fun cachePreBuffer(samples: FloatArray) {
         for (s in samples) {
             preBuffer.addLast(s)
diff --git a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
index 23d3316..74183a0 100644
--- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
+++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
@@ -161,7 +161,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
                         voiceInfo = mutableListOf<VoiceBeanResp>().apply {
                             add(
                                 VoiceBeanResp(
-                                    audioUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?:*/ "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
+                                    audioUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
                                 )
                             )
                         }
@@ -187,8 +187,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
                     VoiceState.WAIT_WAKEUP -> {
                         Log.d("lrs", "当前状态: 等待唤醒")
                         lifecycleScope.launch(Dispatchers.Main) {
-
                             mVerticalAnimator?.hide()
+                            UnityPlayerHolder.getInstance()
+                                .sendVoiceToUnity(
+                                    voiceInfo = mutableListOf<VoiceBeanResp>().apply {
+                                        add(
+                                            VoiceBeanResp(
+                                                audioUrl = "https://static.seerteach.net/aidialogue/userWakeUpAudio/ttsmaker-file-2025-12-31-16-2-51.mp3"
+                                            )
+                                        )
+                                    }
+                                )
                         }
                     }
 
@@ -334,9 +343,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
         word: String,
         audioUrl: String
     ) {
-//        val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: return
-//
-//        if (audioUrl != wakeupUrl) return
+        val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
+
+        if (audioUrl != wakeupUrl) return
 
         when (state) {
             1 -> { // play
diff --git a/app/src/main/java/com/zs/smarthuman/utils/SerialNumberUtil.kt b/app/src/main/java/com/zs/smarthuman/utils/SerialNumberUtil.kt
index a78f6b3..f3228c2 100644
--- a/app/src/main/java/com/zs/smarthuman/utils/SerialNumberUtil.kt
+++ b/app/src/main/java/com/zs/smarthuman/utils/SerialNumberUtil.kt
@@ -25,14 +25,14 @@ object SerialNumberUtil {
      * 外部调用，获取最终序列号
      */
     fun getSerialNumber(): String {
-        for (key in snKeys) {
-            val sn = getProp(key)
-            if (!sn.isNullOrBlank()) {
-                return limitSerialDigit(sn)
-            }
-        }
-        return ""
-//        return "zd09312051870556"
+//        for (key in snKeys) {
+//            val sn = getProp(key)
+//            if (!sn.isNullOrBlank()) {
+//                return limitSerialDigit(sn)
+//            }
+//        }
+//        return ""
+        return "zd09312051870556"
     }
 
     /**