From 3c9ec050c40da5269b56dfd3c1e2e23a666dbdf0 Mon Sep 17 00:00:00 2001 From: ross <3024454314@qq.com> Date: Sun, 8 Feb 2026 15:21:38 +0800 Subject: [PATCH] =?UTF-8?q?=E5=BC=BA=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/zs/smarthuman/sherpa/AudioRecorder.kt | 99 +++++++++++ .../zs/smarthuman/sherpa/VoiceController.kt | 76 +++----- .../java/com/zs/smarthuman/ui/MainActivity.kt | 166 ++++-------------- 3 files changed, 166 insertions(+), 175 deletions(-) create mode 100644 app/src/main/java/com/zs/smarthuman/sherpa/AudioRecorder.kt diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/AudioRecorder.kt b/app/src/main/java/com/zs/smarthuman/sherpa/AudioRecorder.kt new file mode 100644 index 0000000..7282f96 --- /dev/null +++ b/app/src/main/java/com/zs/smarthuman/sherpa/AudioRecorder.kt @@ -0,0 +1,99 @@ +package com.zs.smarthuman.sherpa + +import android.annotation.SuppressLint +import android.media.AudioFormat +import android.media.AudioRecord +import android.media.MediaRecorder +import android.media.audiofx.AcousticEchoCanceler +import android.media.audiofx.NoiseSuppressor +import android.util.Log +import kotlinx.coroutines.channels.Channel +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.receiveAsFlow + +class AudioRecorder( + private val sampleRate: Int, + private val channels: Int, + private val frameSizeMs: Int +) { + companion object { + private const val TAG = "AudioRecorder" + } + + private val channelConfig = if (channels == 1) AudioFormat.CHANNEL_IN_MONO else AudioFormat.CHANNEL_IN_STEREO + private val bufferSize = AudioRecord.getMinBufferSize( + sampleRate, + channelConfig, + AudioFormat.ENCODING_PCM_16BIT + ) * 2 + private var audioRecord: AudioRecord? = null + private var aec: AcousticEchoCanceler? = null + private var ns: NoiseSuppressor? = null + private val frameSize = (sampleRate * frameSizeMs) / 1000 + private val frameBytes = frameSize * channels * 2 // 16-bit PCM + private val audioChannel = Channel(capacity = 50) + + + @SuppressLint("MissingPermission") + fun startRecording(): Flow { + audioRecord = AudioRecord( + MediaRecorder.AudioSource.MIC, + sampleRate, + channelConfig, + AudioFormat.ENCODING_PCM_16BIT, + bufferSize + ).apply { + if (state == AudioRecord.STATE_INITIALIZED) { + // 初始化 AEC + if (AcousticEchoCanceler.isAvailable()) { + aec = AcousticEchoCanceler.create(audioSessionId).apply { + enabled = true + Log.i(TAG, "AEC initialized") + } + } else { + Log.w(TAG, "AEC not available on this device") + } + + if(NoiseSuppressor.isAvailable()) { + ns = NoiseSuppressor.create(audioSessionId).apply { + enabled = true + Log.i(TAG, "NS initialized") + } + } else { + Log.w(TAG, "NS not available on this device") + } + + startRecording() + Log.i(TAG, "Started recording") + } else { + throw IllegalStateException("Failed to initialize AudioRecord") + } + } + + Thread { + val buffer = ByteArray(frameBytes) + while (audioRecord?.recordingState == AudioRecord.RECORDSTATE_RECORDING) { + val read = audioRecord?.read(buffer, 0, frameBytes) ?: 0 + if (read > 0) { + audioChannel.trySend(buffer.copyOf(read)).isSuccess + } + } + }.start() + + return audioChannel.receiveAsFlow() + } + + fun stopRecording() { + audioRecord?.stop() + audioRecord?.release() + audioRecord = null + aec?.enabled = false + aec?.release() + aec = null + ns?.enabled = false + ns?.release() + ns = null + audioChannel.close() + Log.i(TAG, "Stopped recording") + } +} \ No newline at end of file diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt index b2acde2..5c90b5e 100644 --- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt +++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt @@ -342,31 +342,9 @@ class VoiceController( return } LogUtils.d(TAG, "🧠 VAD END") - // 执行核心结束逻辑 - doVadEndCoreLogic() - } - - // 新增:VAD结束核心逻辑(抽离,便于手动调用) - private fun doVadEndCoreLogic() { finishSentence() } - // 修复:重构手动触发VAD结束逻辑,杜绝时序冲突和状态残留 - private fun manualTriggerVadEnd() { - coroutineScope.launch(customIoDispatcher) { - // 增加日志,排查执行链路 - LogUtils.d(TAG, "🔴 manualTriggerVadEnd 被调用,当前状态:$state,vadStarted:$vadStarted") - // 无论是否满足条件,都重置VAD运行状态,兜底防护 - coroutineScope.launch { vadManager.reset() } - vadStarted = false - - withContext(Dispatchers.Main.immediate) { - // 强制触发结束流程,不做状态拦截 - doVadEndCoreLogic() - LogUtils.d(TAG, "🔴 声纹失败流程执行完成") - } - } - } /* ================= 声纹验证(统一Job管理 + 原生数组优化) ================= */ private fun startSpeakerVerify() { @@ -411,15 +389,15 @@ class VoiceController( val audioMs = audio.size * 1000L / SAMPLE_RATE // 修复:严格模式下,短音频直接判定失败,不允许跳过 - if (audioMs < MIN_VERIFY_MS) { - return if (ENABLE_STRICT_SPEAKER_VERIFY) { - LogUtils.w(TAG, "🔴 严格模式:短音频 $audioMs ms,声纹验证失败") - false - } else { - LogUtils.d(TAG, "🟡 非严格模式:短音频 $audioMs ms,跳过声纹") - true - } - } +// if (audioMs < MIN_VERIFY_MS) { +// return if (ENABLE_STRICT_SPEAKER_VERIFY) { +// LogUtils.w(TAG, "🔴 严格模式:短音频 $audioMs ms,声纹验证失败") +// false +// } else { +// LogUtils.d(TAG, "🟡 非严格模式:短音频 $audioMs ms,跳过声纹") +// true +// } +// } val verifyStartMs = System.currentTimeMillis() @@ -461,10 +439,13 @@ class VoiceController( // ================ 核心修复:移除状态判断,声纹失败强制触发终止流程 ================ if (!pass) { withContext(Dispatchers.Main.immediate) { - // 直接标记失败并触发终止,无需校验状态,保证实时拦截 + // 标记失败状态 speakerVerifyState.failed = true - manualTriggerVadEnd() - LogUtils.d(TAG, "🔴 声纹验证不通过,立即触发终止流程") + speakerVerifyState.finished = true + LogUtils.d(TAG, "🔴 声纹验证不通过,安全终止录音流程") + + // 核心:主动结束录音,走标准finishSentence闭环,不暴力中断 + finishSentence() } } pass @@ -479,22 +460,23 @@ class VoiceController( /* ================= 结束录音(优化:原生数组读取) ================= */ private fun finishSentence() { - // 前置彻底重置声纹任务,防止并发干扰 + // 取消旧任务 speakerVerifyState.job?.cancel() speakerVerifyState.job = null val now = cachedTimeMs val duration = if (recordingStartMs != 0L) now - recordingStartMs else 0L - // ================ 修复:提升失败判断优先级,强制拦截 ================ + // 声纹失败优先拦截,直接重置,不上传音频 if (speakerVerifyState.failed) { LogUtils.w(TAG, "❌ 声纹标记为失败,强制拒绝本次语音") timeoutState.hasInvalidSpeech = true + // 重置到可交互状态,用户可以立即重新说话 resetToWaitSpeech() return } - // 合并后的校验 + // ============ 原有正常逻辑保留 ============ val isSpeakerVerifyFailed = (ENABLE_STRICT_SPEAKER_VERIFY && speakerVerifyState.finished && !speakerVerifyState.passed) if (isSpeakerVerifyFailed) { LogUtils.w(TAG, "❌ 声纹验证未通过,拒绝本次语音") @@ -503,17 +485,15 @@ class VoiceController( return } - // 防护:无有效音频,直接拒绝 + // 无音频防护 if (audioBufferSize <= 0) { LogUtils.w(TAG, "❌ 无有效音频数据,拒绝上传") resetToWaitSpeech() return } - // 优化:仅拷贝有效数据,减少内存操作 + // 正常上传逻辑 val audio = audioBuffer.copyOfRange(0, audioBufferSize) - - // 最终通过逻辑 clearAudioBuffer() recordingStartMs = 0L state = VoiceState.UPLOADING @@ -570,15 +550,17 @@ class VoiceController( LogUtils.d(TAG, "🔄 重置到等待说话 | 已标记无效说话: ${timeoutState.hasInvalidSpeech}") val now = cachedTimeMs - // 基础重置:原子化清空所有核心状态 + // 基础重置 recordingStartMs = 0L + // 切换为等待说话状态,恢复监听 state = VoiceState.WAIT_SPEECH coroutineScope.launch { vadManager.reset() } vadStarted = false - clearAudioBuffer() // 修复:彻底清空音频脏数据 - synchronized(preBufferLock) { preBuffer.clear() } // 清空预缓存 + // 清空所有音频缓存,杜绝脏数据 + clearAudioBuffer() + synchronized(preBufferLock) { preBuffer.clear() } - // 修复:声纹失败场景,跳过防抖,执行完整重置 + // 声纹失败场景:全量重置 if (speakerVerifyState.failed) { LogUtils.d(TAG, "🛡 声纹失败,跳过防抖,强制全量重置") resetSpeakerVerifyState() @@ -587,13 +569,11 @@ class VoiceController( return } - // 普通无效语音:执行防抖逻辑 + // 原有防抖逻辑 if (now - timeoutState.lastInvalidResetMs < INVALID_RESET_DEBOUNCE_MS) { - LogUtils.d(TAG, "🛡 防抖:1.5秒内重复无效语音,跳过完整重置") return } - // 全量重置 resetSpeakerVerifyState() timeoutState.lastInvalidResetMs = now timeoutState.waitSpeechFailStartMs = now diff --git a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt index 1ba791d..9e733f1 100644 --- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt +++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt @@ -1,5 +1,6 @@ package com.zs.smarthuman.ui +import AudioPcmUtil.pcm16ToFloat import android.Manifest import android.annotation.SuppressLint import android.content.ComponentName @@ -51,6 +52,7 @@ import com.zs.smarthuman.http.ApiService import com.zs.smarthuman.im.chat.MessageContentType import com.zs.smarthuman.im.chat.bean.SingleMessage import com.zs.smarthuman.kt.releaseIM +import com.zs.smarthuman.sherpa.AudioRecorder import com.zs.smarthuman.sherpa.TimeoutType import com.zs.smarthuman.sherpa.VoiceController import com.zs.smarthuman.toast.Toaster @@ -89,12 +91,11 @@ import java.io.File class MainActivity : BaseViewModelActivity() { private var voiceController: VoiceController? = null - private var audioRecord: AudioRecord? = null - private var isRecording = false - private val audioSource = MediaRecorder.AudioSource.MIC - private val sampleRateInHz = 16000 - private val channelConfig = AudioFormat.CHANNEL_IN_MONO - private val audioFormat = AudioFormat.ENCODING_PCM_16BIT + + + private var audioRecorder: AudioRecorder? = null + private var recordJob: Job? = null + private var mVerticalAnimator: ViewSlideAnimator? = null private var versionUpdateDialog: VersionUpdateDialog? = null @@ -116,11 +117,16 @@ class MainActivity : BaseViewModelActivity() } override fun initData() { + audioRecorder = AudioRecorder( + sampleRate = 16000, + channels = 1, + frameSizeMs = 32 // 推荐 20~32ms + ) + PermissionUtils.permissionGroup(PermissionConstants.MICROPHONE) .callback(object : PermissionUtils.FullCallback { override fun onGranted(granted: List) { initVoiceController() - initMicrophone() startRecording() } @@ -139,6 +145,30 @@ class MainActivity : BaseViewModelActivity() initObserver() } + private fun startRecording() { + if (recordJob != null) return + + recordJob = lifecycleScope.launch(Dispatchers.IO) { + audioRecorder!! + .startRecording() + .catch { e -> + LogUtils.eTag("Audio", "录音流异常", e) + } + .collect { pcmBytes -> + // PCM16 -> Float + val floatAudio = pcm16ToFloat(pcmBytes) + voiceController?.acceptAudio(floatAudio) + } + } + } + + private fun stopRecording() { + recordJob?.cancel() + recordJob = null + audioRecorder?.stopRecording() + } + + private fun requestUserInfo() { mViewModel?.getUserInfo() } @@ -309,133 +339,14 @@ class MainActivity : BaseViewModelActivity() super.onResume() UnityPlayerHolder.getInstance().attachToContainer(findViewById(R.id.fl_digital_human)) UnityPlayerHolder.getInstance().resume() + startRecording() } override fun onPause() { super.onPause() -// stopRecording() UnityPlayerHolder.getInstance().pause() - } - - @SuppressLint("MissingPermission") - private fun initMicrophone(): Boolean { - val minBuf = AudioRecord.getMinBufferSize( - sampleRateInHz, channelConfig, audioFormat - ) - if (minBuf <= 0) { - Log.e("VoiceService", "Invalid min buffer size") - return false - } - val bufferSize = minBuf * 2 - audioRecord = AudioRecord( - audioSource, - sampleRateInHz, - channelConfig, - audioFormat, - bufferSize - ) - - if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) { - Log.e("VoiceService", "AudioRecord init failed") - audioRecord?.release() - audioRecord = null - return false - } - enableSystemAec(audioRecord!!) - enableSystemNs(audioRecord!!) - return true - } - - - private var aec: AcousticEchoCanceler? = null - - private fun enableSystemAec(record: AudioRecord) { - if (!AcousticEchoCanceler.isAvailable()) { - Log.w("VoiceService", "System AEC not available") - return - } - - aec = AcousticEchoCanceler.create(record.audioSessionId) - aec?.enabled = true - - Log.d("VoiceService", "✅ System AEC enabled") - } - - private var noiseSuppressor: NoiseSuppressor? = null - - - private fun enableSystemNs(record: AudioRecord) { - if (!NoiseSuppressor.isAvailable()) { - Log.w("VoiceService", "System NS not available") - return - } - - noiseSuppressor = NoiseSuppressor.create(record.audioSessionId) - noiseSuppressor?.enabled = true - - Log.d("VoiceService", "✅ System NoiseSuppressor enabled") - } - - - // private val audioBuffer = mutableListOf() - //开始录音 - private fun startRecording() { - if (isRecording) return - if (audioRecord == null && !initMicrophone()) return - - try { - audioRecord?.startRecording() - } catch (e: IllegalStateException) { - recreateAudioRecord(); return - } - isRecording = true - - lifecycleScope.launch(Dispatchers.IO) { - val buf = ShortArray(1024) // 32~64ms 够低延迟 - val floatBuf = FloatArray(buf.size) - - while (isRecording) { - val read = audioRecord?.read(buf, 0, buf.size) ?: break - if (read > 0) { - // 避免每次都 new FloatArray,复用 - for (i in 0 until read) floatBuf[i] = buf[i] / 32768f - val gg = floatBuf.copyOf(read) - voiceController?.acceptAudio(gg) -// audioBuffer.addAll(gg.asList()) - } - } - } - } - - - private fun recreateAudioRecord() { stopRecording() - try { - audioRecord?.release() - } catch (_: Exception) { - } - - audioRecord = null - initMicrophone() - } - - - //停止录音 - fun stopRecording() { - isRecording = false - audioRecord?.run { - stop() - release() - audioRecord = null - } -// val file = File( -// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(), -// "xxx.wav" -// ) -// AudioDebugUtil.saveFloatPcmAsWav(audioBuffer.toFloatArray(), file) -// LogUtils.dTag("audioxx", "WAV saved: ${file.path}") -// audioBuffer.clear() } @@ -564,6 +475,7 @@ class MainActivity : BaseViewModelActivity() private fun sendRecordVoiceToServer(audio: ByteArray) { + LogUtils.eTag("lrsxxx", "录音完成开始上传音频") cancelSSE() val request: Request? = RxHttp.postBody(ApiService.UPLOAD_RECORD_VOICE_URL) .setBody(audio)