diff --git a/app/build.gradle b/app/build.gradle
index 27f3a8a..c07dfcf 100644
--- a/app/build.gradle
+++ b/app/build.gradle
@@ -181,5 +181,6 @@ dependencies {
     implementation libs.androidautosize
 
     implementation files('libs/sherpa-onnx-1.12.20.aar')
+    implementation 'com.github.yyued:SVGAPlayer-Android:2.6.1'
 
 }
\ No newline at end of file
diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml
index d64e047..560b309 100644
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -39,7 +39,7 @@
 
     <application
         android:name=".App"
-        android:allowBackup="false"
+        android:allowBackup="true"
         android:dataExtractionRules="@xml/data_extraction_rules"
         android:fullBackupContent="@xml/backup_rules"
         android:icon="@mipmap/ic_launcher"
diff --git a/app/src/main/assets/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx b/app/src/main/assets/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
index 65ad555..446cad9 100644
Binary files a/app/src/main/assets/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx and b/app/src/main/assets/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx differ
diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
index 1ceaace..ff86af4 100644
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
@@ -2,10 +2,7 @@ package com.zs.smarthuman.sherpa
 
 import android.content.res.AssetManager
 import com.blankj.utilcode.util.LogUtils
-import com.k2fsa.sherpa.onnx.SileroVadModelConfig
-import com.k2fsa.sherpa.onnx.Vad
-import com.k2fsa.sherpa.onnx.VadModelConfig
-import com.k2fsa.sherpa.onnx.getVadModelConfig
+import com.k2fsa.sherpa.onnx.*
 import kotlin.math.sqrt
 
 class VadManager(
@@ -13,92 +10,62 @@ class VadManager(
     private val onSpeechStart: () -> Unit,
     private val onSpeechEnd: () -> Unit
 ) {
+
     private val TAG = "VadManager"
     private val vad: Vad
 
     private var isSpeaking = false
-    private var lastSpeechTime = 0L
+    private var lastSpeechMs = 0L
 
-    private val ACTIVE_END_SILENCE_MS = 1500L
-    private val ACTIVE_CONSECUTIVE_FRAMES = 10
-    private val FINAL_END_SILENCE_MS = 800L
-    private val FINAL_CONSECUTIVE_FRAMES = 5
-    private val FINAL_PHASE_TRIGGER_MS = 1000L
-    private val MAX_SILENCE_AFTER_SPEECH_MS = 2000L
-
-    private val MIN_EFFECTIVE_SPEECH_RMS = 0.001f
-
-    private var consecutiveSilenceFrames = 0
-    private var isInFinalPhase = false
-    private var lastEffectiveSpeechTime = 0L
+    /** 更果断结束 */
+    private val END_SILENCE_MS = 350L
+    private val MIN_RMS = 0.002f
 
     init {
-        val config = getVadModelConfig(0)
-            ?: throw IllegalStateException("[$TAG] VAD config not found")
-        vad = Vad(assetManager, VadModelConfig(sileroVadModelConfig = SileroVadModelConfig(model = "silero_vad.onnx", threshold = 0.2f)))
-        LogUtils.i(TAG, "✅ VAD 初始化成功")
+        vad = Vad(
+            assetManager,
+            VadModelConfig(
+                sileroVadModelConfig = SileroVadModelConfig(
+                    model = "silero_vad.onnx",
+                    threshold = 0.5F,
+                    minSilenceDuration = 0.25F,
+                    minSpeechDuration = 0.25F,
+                    windowSize = 512,
+                ),
+                sampleRate = 16000,
+                numThreads = 1,
+                provider = "cpu"
+            )
+        )
+        LogUtils.i(TAG, "✅ VAD init")
     }
 
     fun accept(samples: FloatArray) {
         val now = System.currentTimeMillis()
 
         vad.acceptWaveform(samples)
-        val vadHasSpeech = vad.isSpeechDetected()
+        val hasSpeech = vad.isSpeechDetected()
         val rms = calcRms(samples)
 
-        val isEffectiveSpeech = vadHasSpeech && rms >= MIN_EFFECTIVE_SPEECH_RMS
-
-        if (isEffectiveSpeech) {
-            lastEffectiveSpeechTime = now
-            isInFinalPhase = false
-            lastSpeechTime = now
-            consecutiveSilenceFrames = 0
-        } else {
-            consecutiveSilenceFrames++
-            if (now - lastEffectiveSpeechTime >= FINAL_PHASE_TRIGGER_MS) {
-                isInFinalPhase = true
-            }
-        }
-
-        val (endSilenceMs, endFrames) =
-            if (isInFinalPhase)
-                FINAL_END_SILENCE_MS to FINAL_CONSECUTIVE_FRAMES
-            else
-                ACTIVE_END_SILENCE_MS to ACTIVE_CONSECUTIVE_FRAMES
-
-        if (isEffectiveSpeech) {
+        if (hasSpeech && rms >= MIN_RMS) {
+            lastSpeechMs = now
             if (!isSpeaking) {
                 isSpeaking = true
                 onSpeechStart()
             }
-        } else if (isSpeaking) {
-            val silenceMs = now - lastSpeechTime
-            val effectiveSilenceMs = now - lastEffectiveSpeechTime
-
-            val shouldEnd =
-                (silenceMs >= endSilenceMs ||
-                        effectiveSilenceMs >= MAX_SILENCE_AFTER_SPEECH_MS) &&
-                        consecutiveSilenceFrames >= endFrames
-
-            if (shouldEnd) {
-                onSpeechEnd()
-                reset()
-                isSpeaking = false
-                isInFinalPhase = false
-            }
+        } else if (isSpeaking && now - lastSpeechMs > END_SILENCE_MS) {
+            onSpeechEnd()
+            reset()
         }
     }
 
     fun reset() {
         isSpeaking = false
-        lastSpeechTime = 0L
-        lastEffectiveSpeechTime = 0L
-        consecutiveSilenceFrames = 0
-        isInFinalPhase = false
+        lastSpeechMs = 0
         vad.reset()
     }
 
-    fun calcRms(samples: FloatArray): Float {
+    private fun calcRms(samples: FloatArray): Float {
         var sum = 0f
         for (v in samples) sum += v * v
         return sqrt(sum / samples.size)
diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
index 88c4072..5b3ea1f 100644
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
@@ -30,11 +30,9 @@ class VoiceController(
         // 预缓存大小（2秒）
         private const val PRE_BUFFER_SIZE = SAMPLE_RATE * 2
 
-        // 短语音判定阈值
-        private const val SHORT_AUDIO_DURATION_MS = 1000L
         private const val INVALID_RESET_DEBOUNCE_MS = 1500L
         // 最小语音时长
-        private const val MIN_SPEECH_MS = 800L
+        private const val MIN_SPEECH_MS = 600L
 
         // 统一的声纹验证阈值（不再分场景）
         private const val SPEAKER_THRESHOLD = 0.45f
@@ -275,6 +273,9 @@ class VoiceController(
     fun onPlayEndPrompt() {
         speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
         LogUtils.d(TAG, "🎵 提示音结束")
+        if (!preBuffer.isEmpty()) {
+            preBuffer.clear()
+        }
         state = VoiceState.WAIT_SPEECH_COOLDOWN
     }
 
@@ -321,7 +322,9 @@ class VoiceController(
     private fun resetAll() {
         LogUtils.d(TAG, "🔄 重置所有状态 | 本次超时类型: $currentTimeoutType")
         audioBuffer.clear()
-        preBuffer.clear()
+        if (!preBuffer.isEmpty()) {
+            preBuffer.clear()
+        }
         vadManager.reset()
         wakeupManager.reset()
         vadStarted = false
@@ -359,9 +362,15 @@ class VoiceController(
     }
 
     private fun cachePreBuffer(samples: FloatArray) {
+        // 空数据快速返回，避免无效循环
+        if (samples.isEmpty()) return
+
         for (s in samples) {
             preBuffer.addLast(s)
-            if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
+            // 关键修复：移除前先检查队列是否非空
+            if (preBuffer.size > PRE_BUFFER_SIZE && !preBuffer.isEmpty()) {
+                preBuffer.removeFirst()
+            }
         }
     }
 
@@ -371,7 +380,10 @@ class VoiceController(
             return false
         }
 
-        // 1. 裁剪音频：只保留本次录音的有效部分
+        // 1. 记录验证开始时间（关键：统计处理耗时）
+        val verifyStartMs = System.currentTimeMillis()
+
+        // 2. 原有音频裁剪逻辑（保留）
         val audioDurationMs = (audio.size.toFloat() / SAMPLE_RATE * 1000).toLong()
         val validAudio = if (audioDurationMs > 0) {
             val validSampleCount = (audioDurationMs * SAMPLE_RATE / 1000).toInt()
@@ -386,21 +398,16 @@ class VoiceController(
 
         var stream: OnlineStream? = null
 
-        // 使用 runCatching 统一处理异常
         return runCatching {
             stream = SpeakerRecognition.extractor.createStream()
+            stream.acceptWaveform(samples = validAudio, sampleRate = SAMPLE_RATE)
+            stream.inputFinished()
 
-            // 处理音频数据
-            stream?.acceptWaveform(samples = validAudio, sampleRate = SAMPLE_RATE)
-            stream?.inputFinished()
-
-            // 检查 stream 是否就绪
-            if (stream == null || !SpeakerRecognition.extractor.isReady(stream)) {
+            if (!SpeakerRecognition.extractor.isReady(stream)) {
                 LogUtils.w(TAG, "❌ 音频Stream未就绪，验证失败")
                 return@runCatching false
             }
 
-            // 计算特征并验证
             val embedding = SpeakerRecognition.extractor.compute(stream)
             speakerManagerLock.withLock {
                 val verifyPass = SpeakerRecognition.manager.verify(
@@ -409,19 +416,20 @@ class VoiceController(
                     threshold = SPEAKER_THRESHOLD
                 )
 
-                LogUtils.d(TAG, "📊 声纹验证 | 统一阈值: $SPEAKER_THRESHOLD | 通过: $verifyPass | 验证时长: ${(validAudio.size.toFloat()/SAMPLE_RATE*1000).toLong()}ms")
+                // 3. 计算真实处理耗时（结束时间 - 开始时间）
+                val verifyCostMs = System.currentTimeMillis() - verifyStartMs
+                // 日志区分：音频时长 vs 处理耗时
+                LogUtils.d(TAG, "📊 声纹验证 | 统一阈值: $SPEAKER_THRESHOLD | 通过: $verifyPass | 音频时长: $audioDurationMs ms | 处理耗时: $verifyCostMs ms")
                 verifyPass
             }
         }.onFailure { e ->
-            // 处理所有异常情况
             LogUtils.e(TAG, "❌ 声纹验证异常，拒绝", e)
         }.also {
-            // 确保 stream 资源释放
             runCatching {
                 stream?.release()
             }.onFailure { e ->
                 LogUtils.w(TAG, "⚠️ 释放 Stream 资源失败", e)
             }
-        }.getOrDefault(false) // 异常时默认返回 false
+        }.getOrDefault(false)
     }
 }
\ No newline at end of file
diff --git a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
index 7168ef6..63053cc 100644
--- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
+++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
@@ -102,7 +102,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
     private var startPlayTimeoutJob: Job? = null   // 统一管理所有播放场景的超时Job
 
     private var mEventSources: EventSource? = null
-    private var isManualCancel = false
+
 
     override fun getViewBinding(): ActivityMainBinding = ActivityMainBinding.inflate(layoutInflater)
     override fun initView() {
@@ -219,7 +219,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
                 }
             },
             onFinalAudio = { audio ->
-                sendRecordVoiceToServer(AudioPcmUtil.floatToPcm16Base64(audio))
+                sendRecordVoiceToServer(AudioPcmUtil.floatToPcm16(audio))
 //                mViewModel?.uploadVoice(
 //
 //                    AudioPcmUtil.floatToPcm16Base64(audio),
@@ -231,7 +231,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
                     "xxx.wav"
                 )
                 AudioDebugUtil.saveFloatPcmAsWav(audio, file)
-                LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
+//                LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
 //                lifecycleScope.launch(Dispatchers.Main) {
 //
 //                    mVerticalAnimator?.show()
@@ -560,16 +560,13 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
     }
 
 
-    private fun sendRecordVoiceToServer(audio: String) {
+    private fun sendRecordVoiceToServer(audio: ByteArray) {
         cancelSSE()
-        val request: Request? = RxHttp.postJson(ApiService.UPLOAD_RECORD_VOICE_URL)
-            .add("audio",audio)
+        val request: Request? = RxHttp.postBody(ApiService.UPLOAD_RECORD_VOICE_URL)
+            .setBody(audio)
             .buildRequest()
 
         request?.let {
-            // 重置手动取消标记
-            isManualCancel = false
-
             mEventSources = createFactory(RxHttpPlugins.getOkHttpClient())
                 .newEventSource(it, object : EventSourceListener() {
                     override fun onOpen(eventSource: EventSource, response: Response) {
@@ -601,22 +598,19 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
                         response: Response?
                     ) {
                         super.onFailure(eventSource, t, response)
-                        // 关键修复2：忽略手动取消导致的异常
-                        if (isManualCancel) {
-                            LogUtils.eTag("lrsxxx", "SSE手动取消，忽略失败回调")
-                            return
-                        }
-
                         // 正常失败逻辑
                         val errorMsg = t?.message ?: response?.message ?: "未知错误"
-                        voiceController?.onUploadFinished(false)
+                        LogUtils.eTag("lrsxxx", "流式请求失败:${errorMsg}")
+                        if (backPlaying){
+                            voiceController?.onPlayEndBackend()
+                            backPlaying = false
+                        }else{
+                            voiceController?.onUploadFinished(false)
+                        }
                     }
 
                     override fun onClosed(eventSource: EventSource) {
                         super.onClosed(eventSource)
-                        // 关键修复3：区分手动取消和正常关闭
-                        val isSuccess = !isManualCancel
-                        // 关键修复4：关闭后置空引用，避免内存泄漏
                         mEventSources = null
                     }
                 })
@@ -625,7 +619,6 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
 
 
     private fun cancelSSE() {
-        isManualCancel = true
         mEventSources?.cancel()
         mEventSources = null
     }
diff --git a/app/src/main/res/layout/custom_words_panel.xml b/app/src/main/res/layout/custom_words_panel.xml
index 842aee1..e4d30c2 100644
--- a/app/src/main/res/layout/custom_words_panel.xml
+++ b/app/src/main/res/layout/custom_words_panel.xml
@@ -26,6 +26,7 @@
                 android:layout_width="match_parent"
                 android:layout_height="wrap_content"
                 android:textColor="@color/white"
+                android:padding="20dp"
                 android:textSize="14sp" />
 
         </LinearLayout>