diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
index 61115c7..fc83c8b 100644
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
@@ -12,13 +12,15 @@ class VadManager(
 ) {
 
     private val TAG = "VadManager"
+
     private val vad: Vad
 
     private var isSpeaking = false
     private var lastSpeechMs = 0L
+    private var lastActiveMs = 0L
 
-    /** 更果断结束 */
     private val END_SILENCE_MS = 350L
+    private val RESET_IDLE_MS = 3_000L
     private val MIN_RMS = 0.002f
 
     init {
@@ -43,31 +45,62 @@ class VadManager(
     fun accept(samples: FloatArray) {
         val now = System.currentTimeMillis()
 
+        // 1️⃣ 先快速 RMS 判断
+        val rms = fastRms(samples)
+        if (rms < MIN_RMS) {
+            handleSilence(now)
+            return
+        }
+
+        // 2️⃣ 有能量再喂 VAD
         vad.acceptWaveform(samples)
         val hasSpeech = vad.isSpeechDetected()
-        val rms = calcRms(samples)
 
-        if (hasSpeech && rms >= MIN_RMS) {
+        if (hasSpeech) {
             lastSpeechMs = now
+            lastActiveMs = now
             if (!isSpeaking) {
                 isSpeaking = true
                 onSpeechStart()
             }
-        } else if (isSpeaking && now - lastSpeechMs > END_SILENCE_MS) {
-            onSpeechEnd()
-            reset()
+        } else {
+            handleSilence(now)
         }
     }
 
+    private fun handleSilence(now: Long) {
+        if (isSpeaking && now - lastSpeechMs > END_SILENCE_MS) {
+            isSpeaking = false
+            onSpeechEnd()
+        }
+
+        // 超长 idle 才 reset
+        if (!isSpeaking && now - lastActiveMs > RESET_IDLE_MS) {
+            vad.reset()
+            lastActiveMs = now
+            LogUtils.d(TAG, "🔄 VAD reset (idle)")
+        }
+    }
+
+    private fun fastRms(samples: FloatArray): Float {
+        var sum = 0f
+        var count = 0
+        var i = 0
+        val step = 4
+        while (i < samples.size) {
+            val v = samples[i]
+            sum += v * v
+            count++
+            i += step
+        }
+        return sqrt(sum / count)
+    }
+
     fun reset() {
         isSpeaking = false
         lastSpeechMs = 0
+        lastActiveMs = 0
         vad.reset()
     }
-
-    private fun calcRms(samples: FloatArray): Float {
-        var sum = 0f
-        for (v in samples) sum += v * v
-        return sqrt(sum / samples.size)
-    }
 }
+
diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
index 39b3ec8..87a5ef1 100644
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
@@ -34,11 +34,12 @@ class VoiceController(
 
         private const val INVALID_RESET_DEBOUNCE_MS = 1500L
 
-        // 最小语音时长
-        private const val MIN_SPEECH_MS = 600L
 
         // 统一的声纹验证阈值（不再分场景）
-        private const val SPEAKER_THRESHOLD = 0.45f
+        private const val SPEAKER_THRESHOLD = 0.38f
+
+        private const val MIN_VERIFY_MS = 600L
+        private const val MAX_VERIFY_MS = 1200L
     }
 
     var state: VoiceState = VoiceState.WAIT_WAKEUP
@@ -219,23 +220,59 @@ class VoiceController(
         onWakeup()
         LogUtils.d(TAG, "🔔 唤醒成功")
     }
+    @Volatile private var speakerVerifyFinished = false
+    @Volatile private var speakerVerifyPassed = true
 
     private fun onVadStart() {
         if (state != VoiceState.WAIT_SPEECH) return
-        LogUtils.d(TAG, "🎤 REAL VAD START")
+
         vadStarted = true
         recordingStartMs = System.currentTimeMillis()
         audioBuffer.clear()
         audioBuffer.addAll(preBuffer)
+
+        startAsyncSpeakerVerify()
+
         state = VoiceState.RECORDING
     }
 
+
     private fun onVadEnd() {
         if (state != VoiceState.RECORDING) return
         LogUtils.d(TAG, "🧠 VAD END")
         finishSentence()
     }
 
+    private fun startAsyncSpeakerVerify() {
+        speakerVerifyFinished = false
+        speakerVerifyPassed = true // fail-open
+
+        CoroutineScope(Dispatchers.IO).launch {
+            // 等 600ms 音频
+            val needSamples = SAMPLE_RATE * 600 / 1000
+            var waited = 0L
+
+            while (audioBuffer.size < needSamples && waited < 800) {
+                kotlinx.coroutines.delay(20)
+                waited += 20
+            }
+
+            if (audioBuffer.size < needSamples) {
+                speakerVerifyFinished = true
+                return@launch
+            }
+
+            val input = audioBuffer
+                .takeLast(needSamples)
+                .toFloatArray()
+
+            val pass = verifySpeaker(input)
+            speakerVerifyPassed = pass
+            speakerVerifyFinished = true
+        }
+    }
+
+
     /* ================= 结束录音 ================= */
     private fun finishSentence() {
         val now = System.currentTimeMillis()
@@ -251,17 +288,17 @@ class VoiceController(
         val audio = audioBuffer.toFloatArray()
 
         // 声纹验证（保留核心逻辑）
-        if (ENABLE_STRICT_SPEAKER_VERIFY) {
-            val isCurrentUser = verifySpeaker(audio)
-            if (!isCurrentUser) {
-                LogUtils.w(TAG, "❌ 非当前唤醒用户，拒绝语音 | 录音时长: $duration ms")
-                hasInvalidSpeech = true
-                resetToWaitSpeech()
-                return
-            }
-            LogUtils.d(TAG, "✅ 当前用户语音，继续处理 | 录音时长: $duration ms")
+        if (ENABLE_STRICT_SPEAKER_VERIFY &&
+            speakerVerifyFinished &&
+            !speakerVerifyPassed
+        ) {
+            LogUtils.w(TAG, "❌ 声纹失败（已完成），拒绝")
+            hasInvalidSpeech = true
+            resetToWaitSpeech()
+            return
         }
 
+
         // 最终通过
         audioBuffer.clear()
         state = VoiceState.UPLOADING
@@ -390,65 +427,62 @@ class VoiceController(
 
     }
 
+
+
     private fun verifySpeaker(audio: FloatArray): Boolean {
-        if (audio.isEmpty()) {
-            LogUtils.w(TAG, "❌ 待验证音频为空，声纹验证失败")
-            return false
+        if (audio.isEmpty()) return true
+
+        val audioMs = audio.size * 1000L / SAMPLE_RATE
+        if (audioMs < MIN_VERIFY_MS) {
+            LogUtils.d(TAG, "🟡 短音频 $audioMs ms，跳过声纹")
+            return true
         }
 
-        // 1. 记录验证开始时间（关键：统计处理耗时）
         val verifyStartMs = System.currentTimeMillis()
 
-        // 2. 原有音频裁剪逻辑（保留）
-        val audioDurationMs = (audio.size.toFloat() / SAMPLE_RATE * 1000).toLong()
-        val validAudio = if (audioDurationMs > 0) {
-            val validSampleCount = (audioDurationMs * SAMPLE_RATE / 1000).toInt()
-            if (validSampleCount < audio.size) {
-                audio.copyOfRange(audio.size - validSampleCount, audio.size)
-            } else {
-                audio
-            }
+        val maxSamples = (SAMPLE_RATE * MAX_VERIFY_MS / 1000).toInt()
+        val input = if (audio.size > maxSamples) {
+            audio.copyOfRange(audio.size - maxSamples, audio.size)
         } else {
             audio
         }
 
+
         var stream: OnlineStream? = null
 
         return runCatching {
             stream = SpeakerRecognition.extractor.createStream()
-            stream.acceptWaveform(samples = validAudio, sampleRate = SAMPLE_RATE)
+            stream.acceptWaveform(input, SAMPLE_RATE)
             stream.inputFinished()
 
             if (!SpeakerRecognition.extractor.isReady(stream)) {
-                LogUtils.w(TAG, "❌ 音频Stream未就绪，验证失败")
-                return@runCatching false
+                LogUtils.w(TAG, "⚠️ stream not ready，放行")
+                return@runCatching true
             }
 
             val embedding = SpeakerRecognition.extractor.compute(stream)
-            speakerManagerLock.withLock {
-                val verifyPass = SpeakerRecognition.manager.verify(
-                    name = CURRENT_USER_ID,
-                    embedding = embedding,
-                    threshold = SPEAKER_THRESHOLD
-                )
 
-                // 3. 计算真实处理耗时（结束时间 - 开始时间）
-                val verifyCostMs = System.currentTimeMillis() - verifyStartMs
-                // 日志区分：音频时长 vs 处理耗时
-                LogUtils.d(
-                    TAG,
-                    "📊 声纹验证 | 统一阈值: $SPEAKER_THRESHOLD | 通过: $verifyPass | 音频时长: $audioDurationMs ms | 处理耗时: $verifyCostMs ms"
+            val pass = speakerManagerLock.withLock {
+                SpeakerRecognition.manager.verify(
+                    CURRENT_USER_ID,
+                    embedding,
+                    SPEAKER_THRESHOLD
                 )
-                verifyPass
             }
-        }.onFailure { e ->
-            LogUtils.e(TAG, "❌ 声纹验证异常，拒绝", e)
+
+            val cost = System.currentTimeMillis() - verifyStartMs
+            LogUtils.d(
+                TAG,
+                "📊 声纹 | pass=$pass | 音频=${audioMs}ms | 输入=${input.size} | 耗时=${cost}ms"
+            )
+
+            pass
+        }.onFailure {
+            LogUtils.e(TAG, "❌ 声纹异常，放行", it)
         }.also {
-            runCatching {
-                stream?.release()
-            }.onFailure { e ->
-                LogUtils.w(TAG, "⚠️ 释放 Stream 资源失败", e)
-            }
-        }.getOrDefault(false)
+            runCatching { stream?.release() }
+        }.getOrDefault(true)
     }
+
+
 }
\ No newline at end of file