强化代码

2026-02-08 15:21:38 +08:00 · 2026-02-08 15:21:38 +08:00 · 3c9ec050c4
commit 3c9ec050c4
parent c20172ab1f
3 changed files with 166 additions and 175 deletions
--- a/app/src/main/java/com/zs/smarthuman/sherpa/AudioRecorder.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/AudioRecorder.kt
@ -0,0 +1,99 @@
+package com.zs.smarthuman.sherpa
+
+import android.annotation.SuppressLint
+import android.media.AudioFormat
+import android.media.AudioRecord
+import android.media.MediaRecorder
+import android.media.audiofx.AcousticEchoCanceler
+import android.media.audiofx.NoiseSuppressor
+import android.util.Log
+import kotlinx.coroutines.channels.Channel
+import kotlinx.coroutines.flow.Flow
+import kotlinx.coroutines.flow.receiveAsFlow
+
+class AudioRecorder(
+    private val sampleRate: Int,
+    private val channels: Int,
+    private val frameSizeMs: Int
+) {
+    companion object {
+        private const val TAG = "AudioRecorder"
+    }
+
+    private val channelConfig = if (channels == 1) AudioFormat.CHANNEL_IN_MONO else AudioFormat.CHANNEL_IN_STEREO
+    private val bufferSize = AudioRecord.getMinBufferSize(
+        sampleRate,
+        channelConfig,
+        AudioFormat.ENCODING_PCM_16BIT
+    ) * 2
+    private var audioRecord: AudioRecord? = null
+    private var aec: AcousticEchoCanceler? = null
+    private var ns: NoiseSuppressor? = null
+    private val frameSize = (sampleRate * frameSizeMs) / 1000
+    private val frameBytes = frameSize * channels * 2 // 16-bit PCM
+    private val audioChannel = Channel<ByteArray>(capacity = 50)
+
+
+    @SuppressLint("MissingPermission")
+    fun startRecording(): Flow<ByteArray> {
+        audioRecord = AudioRecord(
+            MediaRecorder.AudioSource.MIC,
+            sampleRate,
+            channelConfig,
+            AudioFormat.ENCODING_PCM_16BIT,
+            bufferSize
+        ).apply {
+            if (state == AudioRecord.STATE_INITIALIZED) {
+                // 初始化 AEC
+                if (AcousticEchoCanceler.isAvailable()) {
+                    aec = AcousticEchoCanceler.create(audioSessionId).apply {
+                        enabled = true
+                        Log.i(TAG, "AEC initialized")
+                    }
+                } else {
+                    Log.w(TAG, "AEC not available on this device")
+                }
+
+                if(NoiseSuppressor.isAvailable()) {
+                    ns = NoiseSuppressor.create(audioSessionId).apply {
+                        enabled = true
+                        Log.i(TAG, "NS initialized")
+                    }
+                } else {
+                    Log.w(TAG, "NS not available on this device")
+                }
+
+                startRecording()
+                Log.i(TAG, "Started recording")
+            } else {
+                throw IllegalStateException("Failed to initialize AudioRecord")
+            }
+        }
+
+        Thread {
+            val buffer = ByteArray(frameBytes)
+            while (audioRecord?.recordingState == AudioRecord.RECORDSTATE_RECORDING) {
+                val read = audioRecord?.read(buffer, 0, frameBytes) ?: 0
+                if (read > 0) {
+                    audioChannel.trySend(buffer.copyOf(read)).isSuccess
+                }
+            }
+        }.start()
+
+        return audioChannel.receiveAsFlow()
+    }
+
+    fun stopRecording() {
+        audioRecord?.stop()
+        audioRecord?.release()
+        audioRecord = null
+        aec?.enabled = false
+        aec?.release()
+        aec = null
+        ns?.enabled = false
+        ns?.release()
+        ns = null
+        audioChannel.close()
+        Log.i(TAG, "Stopped recording")
+    }
+}
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
@ -342,31 +342,9 @@ class VoiceController(
            return
        }
        LogUtils.d(TAG, "🧠 VAD END")
-        // 执行核心结束逻辑
-        doVadEndCoreLogic()
-    }
-
-    // 新增：VAD结束核心逻辑（抽离，便于手动调用）
-    private fun doVadEndCoreLogic() {
        finishSentence()
    }

-    // 修复：重构手动触发VAD结束逻辑，杜绝时序冲突和状态残留
-    private fun manualTriggerVadEnd() {
-        coroutineScope.launch(customIoDispatcher) {
-            // 增加日志，排查执行链路
-            LogUtils.d(TAG, "🔴 manualTriggerVadEnd 被调用，当前状态：$state，vadStarted：$vadStarted")
-            // 无论是否满足条件，都重置VAD运行状态，兜底防护
-            coroutineScope.launch { vadManager.reset() }
-            vadStarted = false
-
-            withContext(Dispatchers.Main.immediate) {
-                // 强制触发结束流程，不做状态拦截
-                doVadEndCoreLogic()
-                LogUtils.d(TAG, "🔴 声纹失败流程执行完成")
-            }
-        }
-    }

    /* ================= 声纹验证（统一Job管理 + 原生数组优化） ================= */
    private fun startSpeakerVerify() {
@ -411,15 +389,15 @@ class VoiceController(

        val audioMs = audio.size * 1000L / SAMPLE_RATE
        // 修复：严格模式下，短音频直接判定失败，不允许跳过
-        if (audioMs < MIN_VERIFY_MS) {
-            return if (ENABLE_STRICT_SPEAKER_VERIFY) {
-                LogUtils.w(TAG, "🔴 严格模式：短音频 $audioMs ms，声纹验证失败")
-                false
-            } else {
-                LogUtils.d(TAG, "🟡 非严格模式：短音频 $audioMs ms，跳过声纹")
-                true
-            }
-        }
+//        if (audioMs < MIN_VERIFY_MS) {
+//            return if (ENABLE_STRICT_SPEAKER_VERIFY) {
+//                LogUtils.w(TAG, "🔴 严格模式：短音频 $audioMs ms，声纹验证失败")
+//                false
+//            } else {
+//                LogUtils.d(TAG, "🟡 非严格模式：短音频 $audioMs ms，跳过声纹")
+//                true
+//            }
+//        }

        val verifyStartMs = System.currentTimeMillis()

@ -461,10 +439,13 @@ class VoiceController(
                // ================ 核心修复：移除状态判断，声纹失败强制触发终止流程 ================
                if (!pass) {
                    withContext(Dispatchers.Main.immediate) {
-                        // 直接标记失败并触发终止，无需校验状态，保证实时拦截
+                        // 标记失败状态
                        speakerVerifyState.failed = true
-                        manualTriggerVadEnd()
-                        LogUtils.d(TAG, "🔴 声纹验证不通过，立即触发终止流程")
+                        speakerVerifyState.finished = true
+                        LogUtils.d(TAG, "🔴 声纹验证不通过，安全终止录音流程")
+
+                        // 核心：主动结束录音，走标准finishSentence闭环，不暴力中断
+                        finishSentence()
                    }
                }
                pass
@ -479,22 +460,23 @@ class VoiceController(

    /* ================= 结束录音（优化：原生数组读取） ================= */
    private fun finishSentence() {
-        // 前置彻底重置声纹任务，防止并发干扰
+        // 取消旧任务
        speakerVerifyState.job?.cancel()
        speakerVerifyState.job = null

        val now = cachedTimeMs
        val duration = if (recordingStartMs != 0L) now - recordingStartMs else 0L

-        // ================ 修复：提升失败判断优先级，强制拦截 ================
+        // 声纹失败优先拦截，直接重置，不上传音频
        if (speakerVerifyState.failed) {
            LogUtils.w(TAG, "❌ 声纹标记为失败，强制拒绝本次语音")
            timeoutState.hasInvalidSpeech = true
+            // 重置到可交互状态，用户可以立即重新说话
            resetToWaitSpeech()
            return
        }

-        // 合并后的校验
+        // ============ 原有正常逻辑保留 ============
        val isSpeakerVerifyFailed = (ENABLE_STRICT_SPEAKER_VERIFY && speakerVerifyState.finished && !speakerVerifyState.passed)
        if (isSpeakerVerifyFailed) {
            LogUtils.w(TAG, "❌ 声纹验证未通过，拒绝本次语音")
@ -503,17 +485,15 @@ class VoiceController(
            return
        }

-        // 防护：无有效音频，直接拒绝
+        // 无音频防护
        if (audioBufferSize <= 0) {
            LogUtils.w(TAG, "❌ 无有效音频数据，拒绝上传")
            resetToWaitSpeech()
            return
        }

-        // 优化：仅拷贝有效数据，减少内存操作
+        // 正常上传逻辑
        val audio = audioBuffer.copyOfRange(0, audioBufferSize)
-
-        // 最终通过逻辑
        clearAudioBuffer()
        recordingStartMs = 0L
        state = VoiceState.UPLOADING
@ -570,15 +550,17 @@ class VoiceController(
        LogUtils.d(TAG, "🔄 重置到等待说话 | 已标记无效说话: ${timeoutState.hasInvalidSpeech}")
        val now = cachedTimeMs

-        // 基础重置：原子化清空所有核心状态
+        // 基础重置
        recordingStartMs = 0L
+        // 切换为等待说话状态，恢复监听
        state = VoiceState.WAIT_SPEECH
        coroutineScope.launch { vadManager.reset() }
        vadStarted = false
-        clearAudioBuffer() // 修复：彻底清空音频脏数据
-        synchronized(preBufferLock) { preBuffer.clear() } // 清空预缓存
+        // 清空所有音频缓存，杜绝脏数据
+        clearAudioBuffer()
+        synchronized(preBufferLock) { preBuffer.clear() }

-        // 修复：声纹失败场景，跳过防抖，执行完整重置
+        // 声纹失败场景：全量重置
        if (speakerVerifyState.failed) {
            LogUtils.d(TAG, "🛡 声纹失败，跳过防抖，强制全量重置")
            resetSpeakerVerifyState()
@ -587,13 +569,11 @@ class VoiceController(
            return
        }

-        // 普通无效语音：执行防抖逻辑
+        // 原有防抖逻辑
        if (now - timeoutState.lastInvalidResetMs < INVALID_RESET_DEBOUNCE_MS) {
-            LogUtils.d(TAG, "🛡 防抖：1.5秒内重复无效语音，跳过完整重置")
            return
        }

-        // 全量重置
        resetSpeakerVerifyState()
        timeoutState.lastInvalidResetMs = now
        timeoutState.waitSpeechFailStartMs = now
--- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
+++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
@ -1,5 +1,6 @@
 package com.zs.smarthuman.ui

+import AudioPcmUtil.pcm16ToFloat
 import android.Manifest
 import android.annotation.SuppressLint
 import android.content.ComponentName
@ -51,6 +52,7 @@ import com.zs.smarthuman.http.ApiService
 import com.zs.smarthuman.im.chat.MessageContentType
 import com.zs.smarthuman.im.chat.bean.SingleMessage
 import com.zs.smarthuman.kt.releaseIM
+import com.zs.smarthuman.sherpa.AudioRecorder
 import com.zs.smarthuman.sherpa.TimeoutType
 import com.zs.smarthuman.sherpa.VoiceController
 import com.zs.smarthuman.toast.Toaster
@ -89,12 +91,11 @@ import java.io.File
 class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>() {

    private var voiceController: VoiceController? = null
-    private var audioRecord: AudioRecord? = null
-    private var isRecording = false
-    private val audioSource = MediaRecorder.AudioSource.MIC
-    private val sampleRateInHz = 16000
-    private val channelConfig = AudioFormat.CHANNEL_IN_MONO
-    private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
+
+
+    private var audioRecorder: AudioRecorder? = null
+    private var recordJob: Job? = null
+
    private var mVerticalAnimator: ViewSlideAnimator? = null

    private var versionUpdateDialog: VersionUpdateDialog? = null
@ -116,11 +117,16 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
    }

    override fun initData() {
+        audioRecorder = AudioRecorder(
+            sampleRate = 16000,
+            channels = 1,
+            frameSizeMs = 32 // 推荐 20~32ms
+        )
+
        PermissionUtils.permissionGroup(PermissionConstants.MICROPHONE)
            .callback(object : PermissionUtils.FullCallback {
                override fun onGranted(granted: List<String?>) {
                    initVoiceController()
-                    initMicrophone()
                    startRecording()
                }

@ -139,6 +145,30 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
        initObserver()
    }

+    private fun startRecording() {
+        if (recordJob != null) return
+
+        recordJob = lifecycleScope.launch(Dispatchers.IO) {
+            audioRecorder!!
+                .startRecording()
+                .catch { e ->
+                    LogUtils.eTag("Audio", "录音流异常", e)
+                }
+                .collect { pcmBytes ->
+                    // PCM16 -> Float
+                    val floatAudio = pcm16ToFloat(pcmBytes)
+                    voiceController?.acceptAudio(floatAudio)
+                }
+        }
+    }
+
+    private fun stopRecording() {
+        recordJob?.cancel()
+        recordJob = null
+        audioRecorder?.stopRecording()
+    }
+
+
    private fun requestUserInfo() {
        mViewModel?.getUserInfo()
    }
@ -309,133 +339,14 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
        super.onResume()
        UnityPlayerHolder.getInstance().attachToContainer(findViewById(R.id.fl_digital_human))
        UnityPlayerHolder.getInstance().resume()
+        startRecording()

    }

    override fun onPause() {
        super.onPause()
-//        stopRecording()
        UnityPlayerHolder.getInstance().pause()
-    }
-
-    @SuppressLint("MissingPermission")
-    private fun initMicrophone(): Boolean {
-        val minBuf = AudioRecord.getMinBufferSize(
-            sampleRateInHz, channelConfig, audioFormat
-        )
-        if (minBuf <= 0) {
-            Log.e("VoiceService", "Invalid min buffer size")
-            return false
-        }
-        val bufferSize = minBuf * 2
-        audioRecord = AudioRecord(
-            audioSource,
-            sampleRateInHz,
-            channelConfig,
-            audioFormat,
-            bufferSize
-        )
-
-        if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) {
-            Log.e("VoiceService", "AudioRecord init failed")
-            audioRecord?.release()
-            audioRecord = null
-            return false
-        }
-        enableSystemAec(audioRecord!!)
-        enableSystemNs(audioRecord!!)
-        return true
-    }
-
-
-    private var aec: AcousticEchoCanceler? = null
-
-    private fun enableSystemAec(record: AudioRecord) {
-        if (!AcousticEchoCanceler.isAvailable()) {
-            Log.w("VoiceService", "System AEC not available")
-            return
-        }
-
-        aec = AcousticEchoCanceler.create(record.audioSessionId)
-        aec?.enabled = true
-
-        Log.d("VoiceService", "✅ System AEC enabled")
-    }
-
-    private var noiseSuppressor: NoiseSuppressor? = null
-
-
-    private fun enableSystemNs(record: AudioRecord) {
-        if (!NoiseSuppressor.isAvailable()) {
-            Log.w("VoiceService", "System NS not available")
-            return
-        }
-
-        noiseSuppressor = NoiseSuppressor.create(record.audioSessionId)
-        noiseSuppressor?.enabled = true
-
-        Log.d("VoiceService", "✅ System NoiseSuppressor enabled")
-    }
-
-
-    //    private val audioBuffer = mutableListOf<Float>()
-    //开始录音
-    private fun startRecording() {
-        if (isRecording) return
-        if (audioRecord == null && !initMicrophone()) return
-
-        try {
-            audioRecord?.startRecording()
-        } catch (e: IllegalStateException) {
-            recreateAudioRecord(); return
-        }
-        isRecording = true
-
-        lifecycleScope.launch(Dispatchers.IO) {
-            val buf = ShortArray(1024) // 32~64ms 够低延迟
-            val floatBuf = FloatArray(buf.size)
-
-            while (isRecording) {
-                val read = audioRecord?.read(buf, 0, buf.size) ?: break
-                if (read > 0) {
-                    // 避免每次都 new FloatArray，复用
-                    for (i in 0 until read) floatBuf[i] = buf[i] / 32768f
-                    val gg = floatBuf.copyOf(read)
-                    voiceController?.acceptAudio(gg)
-//                    audioBuffer.addAll(gg.asList())
-                }
-            }
-        }
-    }
-
-
-    private fun recreateAudioRecord() {
        stopRecording()
-        try {
-            audioRecord?.release()
-        } catch (_: Exception) {
-        }
-
-        audioRecord = null
-        initMicrophone()
-    }
-
-
-    //停止录音
-    fun stopRecording() {
-        isRecording = false
-        audioRecord?.run {
-            stop()
-            release()
-            audioRecord = null
-        }
-//        val file = File(
-//            getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
-//            "xxx.wav"
-//        )
-//        AudioDebugUtil.saveFloatPcmAsWav(audioBuffer.toFloatArray(), file)
-//        LogUtils.dTag("audioxx", "WAV saved: ${file.path}")
-//        audioBuffer.clear()
    }


@ -564,6 +475,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()


    private fun sendRecordVoiceToServer(audio: ByteArray) {
+        LogUtils.eTag("lrsxxx", "录音完成开始上传音频")
        cancelSSE()
        val request: Request? = RxHttp.postBody(ApiService.UPLOAD_RECORD_VOICE_URL)
            .setBody(audio)