强化代码

This commit is contained in:
林若思 2026-02-08 15:21:38 +08:00
parent c20172ab1f
commit 3c9ec050c4
3 changed files with 166 additions and 175 deletions

View File

@ -0,0 +1,99 @@
package com.zs.smarthuman.sherpa
import android.annotation.SuppressLint
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.media.audiofx.AcousticEchoCanceler
import android.media.audiofx.NoiseSuppressor
import android.util.Log
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.receiveAsFlow
class AudioRecorder(
private val sampleRate: Int,
private val channels: Int,
private val frameSizeMs: Int
) {
companion object {
private const val TAG = "AudioRecorder"
}
private val channelConfig = if (channels == 1) AudioFormat.CHANNEL_IN_MONO else AudioFormat.CHANNEL_IN_STEREO
private val bufferSize = AudioRecord.getMinBufferSize(
sampleRate,
channelConfig,
AudioFormat.ENCODING_PCM_16BIT
) * 2
private var audioRecord: AudioRecord? = null
private var aec: AcousticEchoCanceler? = null
private var ns: NoiseSuppressor? = null
private val frameSize = (sampleRate * frameSizeMs) / 1000
private val frameBytes = frameSize * channels * 2 // 16-bit PCM
private val audioChannel = Channel<ByteArray>(capacity = 50)
@SuppressLint("MissingPermission")
fun startRecording(): Flow<ByteArray> {
audioRecord = AudioRecord(
MediaRecorder.AudioSource.MIC,
sampleRate,
channelConfig,
AudioFormat.ENCODING_PCM_16BIT,
bufferSize
).apply {
if (state == AudioRecord.STATE_INITIALIZED) {
// 初始化 AEC
if (AcousticEchoCanceler.isAvailable()) {
aec = AcousticEchoCanceler.create(audioSessionId).apply {
enabled = true
Log.i(TAG, "AEC initialized")
}
} else {
Log.w(TAG, "AEC not available on this device")
}
if(NoiseSuppressor.isAvailable()) {
ns = NoiseSuppressor.create(audioSessionId).apply {
enabled = true
Log.i(TAG, "NS initialized")
}
} else {
Log.w(TAG, "NS not available on this device")
}
startRecording()
Log.i(TAG, "Started recording")
} else {
throw IllegalStateException("Failed to initialize AudioRecord")
}
}
Thread {
val buffer = ByteArray(frameBytes)
while (audioRecord?.recordingState == AudioRecord.RECORDSTATE_RECORDING) {
val read = audioRecord?.read(buffer, 0, frameBytes) ?: 0
if (read > 0) {
audioChannel.trySend(buffer.copyOf(read)).isSuccess
}
}
}.start()
return audioChannel.receiveAsFlow()
}
fun stopRecording() {
audioRecord?.stop()
audioRecord?.release()
audioRecord = null
aec?.enabled = false
aec?.release()
aec = null
ns?.enabled = false
ns?.release()
ns = null
audioChannel.close()
Log.i(TAG, "Stopped recording")
}
}

View File

@ -342,31 +342,9 @@ class VoiceController(
return return
} }
LogUtils.d(TAG, "🧠 VAD END") LogUtils.d(TAG, "🧠 VAD END")
// 执行核心结束逻辑
doVadEndCoreLogic()
}
// 新增VAD结束核心逻辑抽离便于手动调用
private fun doVadEndCoreLogic() {
finishSentence() finishSentence()
} }
// 修复重构手动触发VAD结束逻辑杜绝时序冲突和状态残留
private fun manualTriggerVadEnd() {
coroutineScope.launch(customIoDispatcher) {
// 增加日志,排查执行链路
LogUtils.d(TAG, "🔴 manualTriggerVadEnd 被调用,当前状态:$statevadStarted$vadStarted")
// 无论是否满足条件都重置VAD运行状态兜底防护
coroutineScope.launch { vadManager.reset() }
vadStarted = false
withContext(Dispatchers.Main.immediate) {
// 强制触发结束流程,不做状态拦截
doVadEndCoreLogic()
LogUtils.d(TAG, "🔴 声纹失败流程执行完成")
}
}
}
/* ================= 声纹验证统一Job管理 + 原生数组优化) ================= */ /* ================= 声纹验证统一Job管理 + 原生数组优化) ================= */
private fun startSpeakerVerify() { private fun startSpeakerVerify() {
@ -411,15 +389,15 @@ class VoiceController(
val audioMs = audio.size * 1000L / SAMPLE_RATE val audioMs = audio.size * 1000L / SAMPLE_RATE
// 修复:严格模式下,短音频直接判定失败,不允许跳过 // 修复:严格模式下,短音频直接判定失败,不允许跳过
if (audioMs < MIN_VERIFY_MS) { // if (audioMs < MIN_VERIFY_MS) {
return if (ENABLE_STRICT_SPEAKER_VERIFY) { // return if (ENABLE_STRICT_SPEAKER_VERIFY) {
LogUtils.w(TAG, "🔴 严格模式:短音频 $audioMs ms声纹验证失败") // LogUtils.w(TAG, "🔴 严格模式:短音频 $audioMs ms声纹验证失败")
false // false
} else { // } else {
LogUtils.d(TAG, "🟡 非严格模式:短音频 $audioMs ms跳过声纹") // LogUtils.d(TAG, "🟡 非严格模式:短音频 $audioMs ms跳过声纹")
true // true
} // }
} // }
val verifyStartMs = System.currentTimeMillis() val verifyStartMs = System.currentTimeMillis()
@ -461,10 +439,13 @@ class VoiceController(
// ================ 核心修复:移除状态判断,声纹失败强制触发终止流程 ================ // ================ 核心修复:移除状态判断,声纹失败强制触发终止流程 ================
if (!pass) { if (!pass) {
withContext(Dispatchers.Main.immediate) { withContext(Dispatchers.Main.immediate) {
// 直接标记失败并触发终止,无需校验状态,保证实时拦截 // 标记失败状态
speakerVerifyState.failed = true speakerVerifyState.failed = true
manualTriggerVadEnd() speakerVerifyState.finished = true
LogUtils.d(TAG, "🔴 声纹验证不通过,立即触发终止流程") LogUtils.d(TAG, "🔴 声纹验证不通过,安全终止录音流程")
// 核心主动结束录音走标准finishSentence闭环不暴力中断
finishSentence()
} }
} }
pass pass
@ -479,22 +460,23 @@ class VoiceController(
/* ================= 结束录音(优化:原生数组读取) ================= */ /* ================= 结束录音(优化:原生数组读取) ================= */
private fun finishSentence() { private fun finishSentence() {
// 前置彻底重置声纹任务,防止并发干扰 // 取消旧任务
speakerVerifyState.job?.cancel() speakerVerifyState.job?.cancel()
speakerVerifyState.job = null speakerVerifyState.job = null
val now = cachedTimeMs val now = cachedTimeMs
val duration = if (recordingStartMs != 0L) now - recordingStartMs else 0L val duration = if (recordingStartMs != 0L) now - recordingStartMs else 0L
// ================ 修复:提升失败判断优先级,强制拦截 ================ // 声纹失败优先拦截,直接重置,不上传音频
if (speakerVerifyState.failed) { if (speakerVerifyState.failed) {
LogUtils.w(TAG, "❌ 声纹标记为失败,强制拒绝本次语音") LogUtils.w(TAG, "❌ 声纹标记为失败,强制拒绝本次语音")
timeoutState.hasInvalidSpeech = true timeoutState.hasInvalidSpeech = true
// 重置到可交互状态,用户可以立即重新说话
resetToWaitSpeech() resetToWaitSpeech()
return return
} }
// 合并后的校验 // ============ 原有正常逻辑保留 ============
val isSpeakerVerifyFailed = (ENABLE_STRICT_SPEAKER_VERIFY && speakerVerifyState.finished && !speakerVerifyState.passed) val isSpeakerVerifyFailed = (ENABLE_STRICT_SPEAKER_VERIFY && speakerVerifyState.finished && !speakerVerifyState.passed)
if (isSpeakerVerifyFailed) { if (isSpeakerVerifyFailed) {
LogUtils.w(TAG, "❌ 声纹验证未通过,拒绝本次语音") LogUtils.w(TAG, "❌ 声纹验证未通过,拒绝本次语音")
@ -503,17 +485,15 @@ class VoiceController(
return return
} }
// 防护:无有效音频,直接拒绝 // 无音频防护
if (audioBufferSize <= 0) { if (audioBufferSize <= 0) {
LogUtils.w(TAG, "❌ 无有效音频数据,拒绝上传") LogUtils.w(TAG, "❌ 无有效音频数据,拒绝上传")
resetToWaitSpeech() resetToWaitSpeech()
return return
} }
// 优化:仅拷贝有效数据,减少内存操作 // 正常上传逻辑
val audio = audioBuffer.copyOfRange(0, audioBufferSize) val audio = audioBuffer.copyOfRange(0, audioBufferSize)
// 最终通过逻辑
clearAudioBuffer() clearAudioBuffer()
recordingStartMs = 0L recordingStartMs = 0L
state = VoiceState.UPLOADING state = VoiceState.UPLOADING
@ -570,15 +550,17 @@ class VoiceController(
LogUtils.d(TAG, "🔄 重置到等待说话 | 已标记无效说话: ${timeoutState.hasInvalidSpeech}") LogUtils.d(TAG, "🔄 重置到等待说话 | 已标记无效说话: ${timeoutState.hasInvalidSpeech}")
val now = cachedTimeMs val now = cachedTimeMs
// 基础重置:原子化清空所有核心状态 // 基础重置
recordingStartMs = 0L recordingStartMs = 0L
// 切换为等待说话状态,恢复监听
state = VoiceState.WAIT_SPEECH state = VoiceState.WAIT_SPEECH
coroutineScope.launch { vadManager.reset() } coroutineScope.launch { vadManager.reset() }
vadStarted = false vadStarted = false
clearAudioBuffer() // 修复:彻底清空音频脏数据 // 清空所有音频缓存,杜绝脏数据
synchronized(preBufferLock) { preBuffer.clear() } // 清空预缓存 clearAudioBuffer()
synchronized(preBufferLock) { preBuffer.clear() }
// 修复:声纹失败场景,跳过防抖,执行完整重置 // 声纹失败场景:全量重置
if (speakerVerifyState.failed) { if (speakerVerifyState.failed) {
LogUtils.d(TAG, "🛡 声纹失败,跳过防抖,强制全量重置") LogUtils.d(TAG, "🛡 声纹失败,跳过防抖,强制全量重置")
resetSpeakerVerifyState() resetSpeakerVerifyState()
@ -587,13 +569,11 @@ class VoiceController(
return return
} }
// 普通无效语音:执行防抖逻辑 // 原有防抖逻辑
if (now - timeoutState.lastInvalidResetMs < INVALID_RESET_DEBOUNCE_MS) { if (now - timeoutState.lastInvalidResetMs < INVALID_RESET_DEBOUNCE_MS) {
LogUtils.d(TAG, "🛡 防抖1.5秒内重复无效语音,跳过完整重置")
return return
} }
// 全量重置
resetSpeakerVerifyState() resetSpeakerVerifyState()
timeoutState.lastInvalidResetMs = now timeoutState.lastInvalidResetMs = now
timeoutState.waitSpeechFailStartMs = now timeoutState.waitSpeechFailStartMs = now

View File

@ -1,5 +1,6 @@
package com.zs.smarthuman.ui package com.zs.smarthuman.ui
import AudioPcmUtil.pcm16ToFloat
import android.Manifest import android.Manifest
import android.annotation.SuppressLint import android.annotation.SuppressLint
import android.content.ComponentName import android.content.ComponentName
@ -51,6 +52,7 @@ import com.zs.smarthuman.http.ApiService
import com.zs.smarthuman.im.chat.MessageContentType import com.zs.smarthuman.im.chat.MessageContentType
import com.zs.smarthuman.im.chat.bean.SingleMessage import com.zs.smarthuman.im.chat.bean.SingleMessage
import com.zs.smarthuman.kt.releaseIM import com.zs.smarthuman.kt.releaseIM
import com.zs.smarthuman.sherpa.AudioRecorder
import com.zs.smarthuman.sherpa.TimeoutType import com.zs.smarthuman.sherpa.TimeoutType
import com.zs.smarthuman.sherpa.VoiceController import com.zs.smarthuman.sherpa.VoiceController
import com.zs.smarthuman.toast.Toaster import com.zs.smarthuman.toast.Toaster
@ -89,12 +91,11 @@ import java.io.File
class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>() { class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>() {
private var voiceController: VoiceController? = null private var voiceController: VoiceController? = null
private var audioRecord: AudioRecord? = null
private var isRecording = false
private val audioSource = MediaRecorder.AudioSource.MIC private var audioRecorder: AudioRecorder? = null
private val sampleRateInHz = 16000 private var recordJob: Job? = null
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
private var mVerticalAnimator: ViewSlideAnimator? = null private var mVerticalAnimator: ViewSlideAnimator? = null
private var versionUpdateDialog: VersionUpdateDialog? = null private var versionUpdateDialog: VersionUpdateDialog? = null
@ -116,11 +117,16 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
} }
override fun initData() { override fun initData() {
audioRecorder = AudioRecorder(
sampleRate = 16000,
channels = 1,
frameSizeMs = 32 // 推荐 20~32ms
)
PermissionUtils.permissionGroup(PermissionConstants.MICROPHONE) PermissionUtils.permissionGroup(PermissionConstants.MICROPHONE)
.callback(object : PermissionUtils.FullCallback { .callback(object : PermissionUtils.FullCallback {
override fun onGranted(granted: List<String?>) { override fun onGranted(granted: List<String?>) {
initVoiceController() initVoiceController()
initMicrophone()
startRecording() startRecording()
} }
@ -139,6 +145,30 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
initObserver() initObserver()
} }
private fun startRecording() {
if (recordJob != null) return
recordJob = lifecycleScope.launch(Dispatchers.IO) {
audioRecorder!!
.startRecording()
.catch { e ->
LogUtils.eTag("Audio", "录音流异常", e)
}
.collect { pcmBytes ->
// PCM16 -> Float
val floatAudio = pcm16ToFloat(pcmBytes)
voiceController?.acceptAudio(floatAudio)
}
}
}
private fun stopRecording() {
recordJob?.cancel()
recordJob = null
audioRecorder?.stopRecording()
}
private fun requestUserInfo() { private fun requestUserInfo() {
mViewModel?.getUserInfo() mViewModel?.getUserInfo()
} }
@ -309,133 +339,14 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
super.onResume() super.onResume()
UnityPlayerHolder.getInstance().attachToContainer(findViewById(R.id.fl_digital_human)) UnityPlayerHolder.getInstance().attachToContainer(findViewById(R.id.fl_digital_human))
UnityPlayerHolder.getInstance().resume() UnityPlayerHolder.getInstance().resume()
startRecording()
} }
override fun onPause() { override fun onPause() {
super.onPause() super.onPause()
// stopRecording()
UnityPlayerHolder.getInstance().pause() UnityPlayerHolder.getInstance().pause()
}
@SuppressLint("MissingPermission")
private fun initMicrophone(): Boolean {
val minBuf = AudioRecord.getMinBufferSize(
sampleRateInHz, channelConfig, audioFormat
)
if (minBuf <= 0) {
Log.e("VoiceService", "Invalid min buffer size")
return false
}
val bufferSize = minBuf * 2
audioRecord = AudioRecord(
audioSource,
sampleRateInHz,
channelConfig,
audioFormat,
bufferSize
)
if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) {
Log.e("VoiceService", "AudioRecord init failed")
audioRecord?.release()
audioRecord = null
return false
}
enableSystemAec(audioRecord!!)
enableSystemNs(audioRecord!!)
return true
}
private var aec: AcousticEchoCanceler? = null
private fun enableSystemAec(record: AudioRecord) {
if (!AcousticEchoCanceler.isAvailable()) {
Log.w("VoiceService", "System AEC not available")
return
}
aec = AcousticEchoCanceler.create(record.audioSessionId)
aec?.enabled = true
Log.d("VoiceService", "✅ System AEC enabled")
}
private var noiseSuppressor: NoiseSuppressor? = null
private fun enableSystemNs(record: AudioRecord) {
if (!NoiseSuppressor.isAvailable()) {
Log.w("VoiceService", "System NS not available")
return
}
noiseSuppressor = NoiseSuppressor.create(record.audioSessionId)
noiseSuppressor?.enabled = true
Log.d("VoiceService", "✅ System NoiseSuppressor enabled")
}
// private val audioBuffer = mutableListOf<Float>()
//开始录音
private fun startRecording() {
if (isRecording) return
if (audioRecord == null && !initMicrophone()) return
try {
audioRecord?.startRecording()
} catch (e: IllegalStateException) {
recreateAudioRecord(); return
}
isRecording = true
lifecycleScope.launch(Dispatchers.IO) {
val buf = ShortArray(1024) // 32~64ms 够低延迟
val floatBuf = FloatArray(buf.size)
while (isRecording) {
val read = audioRecord?.read(buf, 0, buf.size) ?: break
if (read > 0) {
// 避免每次都 new FloatArray复用
for (i in 0 until read) floatBuf[i] = buf[i] / 32768f
val gg = floatBuf.copyOf(read)
voiceController?.acceptAudio(gg)
// audioBuffer.addAll(gg.asList())
}
}
}
}
private fun recreateAudioRecord() {
stopRecording() stopRecording()
try {
audioRecord?.release()
} catch (_: Exception) {
}
audioRecord = null
initMicrophone()
}
//停止录音
fun stopRecording() {
isRecording = false
audioRecord?.run {
stop()
release()
audioRecord = null
}
// val file = File(
// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
// "xxx.wav"
// )
// AudioDebugUtil.saveFloatPcmAsWav(audioBuffer.toFloatArray(), file)
// LogUtils.dTag("audioxx", "WAV saved: ${file.path}")
// audioBuffer.clear()
} }
@ -564,6 +475,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
private fun sendRecordVoiceToServer(audio: ByteArray) { private fun sendRecordVoiceToServer(audio: ByteArray) {
LogUtils.eTag("lrsxxx", "录音完成开始上传音频")
cancelSSE() cancelSSE()
val request: Request? = RxHttp.postBody(ApiService.UPLOAD_RECORD_VOICE_URL) val request: Request? = RxHttp.postBody(ApiService.UPLOAD_RECORD_VOICE_URL)
.setBody(audio) .setBody(audio)