diff --git a/app/src/main/assets/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx b/app/src/main/assets/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx index dd917a2..65ad555 100644 Binary files a/app/src/main/assets/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx and b/app/src/main/assets/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx differ diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt index 8e589d6..1ceaace 100644 --- a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt +++ b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt @@ -2,203 +2,100 @@ package com.zs.smarthuman.sherpa import android.content.res.AssetManager import com.blankj.utilcode.util.LogUtils +import com.k2fsa.sherpa.onnx.SileroVadModelConfig import com.k2fsa.sherpa.onnx.Vad +import com.k2fsa.sherpa.onnx.VadModelConfig import com.k2fsa.sherpa.onnx.getVadModelConfig import kotlin.math.sqrt class VadManager( assetManager: AssetManager, private val onSpeechStart: () -> Unit, - private val onSpeechEnd: (avgEnergy: Float, peakRms: Float) -> Unit + private val onSpeechEnd: () -> Unit ) { - private val TAG = "SmartHuman-VadManager" + private val TAG = "VadManager" private val vad: Vad + private var isSpeaking = false private var lastSpeechTime = 0L - // ========== 核心调整:区分活跃期/收尾期阈值 ========== - // 说话活跃期(容忍停顿) - private val ACTIVE_END_SILENCE_MS = 1500L // 活跃期基础静默(保留停顿容忍) - private val ACTIVE_CONSECUTIVE_FRAMES = 10 // 活跃期连续静音帧 - // 说话收尾期(快速结束) - private val FINAL_END_SILENCE_MS = 800L // 收尾期基础静默(缩短到800ms) - private val FINAL_CONSECUTIVE_FRAMES = 5 // 收尾期连续静音帧(5帧=100ms) - // 收尾期触发条件:最后一次有效语音后超过X秒,判定为进入收尾期 - private val FINAL_PHASE_TRIGGER_MS = 1000L // 1秒无有效语音,进入收尾期 + private val ACTIVE_END_SILENCE_MS = 1500L + private val ACTIVE_CONSECUTIVE_FRAMES = 10 + private val FINAL_END_SILENCE_MS = 800L + private val FINAL_CONSECUTIVE_FRAMES = 5 + private val FINAL_PHASE_TRIGGER_MS = 1000L + private val MAX_SILENCE_AFTER_SPEECH_MS = 2000L - private val MAX_SILENCE_AFTER_SPEECH_MS = 2000L // 兜底阈值从3秒降到2秒 - - // 原有基础配置 private val MIN_EFFECTIVE_SPEECH_RMS = 0.001f - private val ENV_BASELINE_FACTOR = 1.2f - private var envBaselineRms = 0.0005f - private var lastEffectiveSpeechTime = 0L private var consecutiveSilenceFrames = 0 - - // 新增:收尾期标记 private var isInFinalPhase = false - - // 统计变量 - private var speechEnergySum = 0f - private var speechFrameCount = 0 - private var peakRms = 0f - private var totalFrames = 0 - private var speechFrames = 0 - private var continuousSpeechFrames = 0 - private var lastFrameIsSpeech = false - private var peakPosition = 0 - private var frameIndex = 0 - private var activeFrameCount = 0 - private var activeSpeechFrameCount = 0 + private var lastEffectiveSpeechTime = 0L init { - val config = getVadModelConfig(0) ?: throw IllegalStateException("[$TAG] VAD config not found") - vad = Vad(assetManager, config) + val config = getVadModelConfig(0) + ?: throw IllegalStateException("[$TAG] VAD config not found") + vad = Vad(assetManager, VadModelConfig(sileroVadModelConfig = SileroVadModelConfig(model = "silero_vad.onnx", threshold = 0.2f))) LogUtils.i(TAG, "✅ VAD 初始化成功") } fun accept(samples: FloatArray) { val now = System.currentTimeMillis() + vad.acceptWaveform(samples) val vadHasSpeech = vad.isSpeechDetected() val rms = calcRms(samples) - // 环境基线更新 - if (!vadHasSpeech || rms < MIN_EFFECTIVE_SPEECH_RMS) { - envBaselineRms = (envBaselineRms * 0.9f) + (rms * 0.1f) - } - val effectiveSpeechThreshold = maxOf(MIN_EFFECTIVE_SPEECH_RMS, envBaselineRms * ENV_BASELINE_FACTOR) - val isEffectiveSpeech = vadHasSpeech && rms >= effectiveSpeechThreshold + val isEffectiveSpeech = vadHasSpeech && rms >= MIN_EFFECTIVE_SPEECH_RMS - // ========== 核心优化:动态判定收尾期 ========== if (isEffectiveSpeech) { lastEffectiveSpeechTime = now - isInFinalPhase = false // 有有效语音,退出收尾期 + isInFinalPhase = false + lastSpeechTime = now + consecutiveSilenceFrames = 0 } else { - // 最后一次有效语音后超过1秒,进入收尾期 + consecutiveSilenceFrames++ if (now - lastEffectiveSpeechTime >= FINAL_PHASE_TRIGGER_MS) { isInFinalPhase = true } } - // 语音能量统计 - if (isEffectiveSpeech) { - speechEnergySum += rms - speechFrameCount++ - peakRms = maxOf(peakRms, rms) - lastSpeechTime = now - consecutiveSilenceFrames = 0 - LogUtils.v(TAG, "🔊 有效语音帧 | RMS: $rms | 阈值: $effectiveSpeechThreshold | 收尾期: $isInFinalPhase") - } else { - consecutiveSilenceFrames++ - LogUtils.v(TAG, if (vadHasSpeech) "⚠ 低能量语音帧 | RMS: $rms | 阈值: $effectiveSpeechThreshold" - else "🔇 静音帧 | 连续静音帧: $consecutiveSilenceFrames | 收尾期: $isInFinalPhase") - } + val (endSilenceMs, endFrames) = + if (isInFinalPhase) + FINAL_END_SILENCE_MS to FINAL_CONSECUTIVE_FRAMES + else + ACTIVE_END_SILENCE_MS to ACTIVE_CONSECUTIVE_FRAMES - // 帧统计 - totalFrames++ - frameIndex++ - if (isEffectiveSpeech) { - speechFrames++ - continuousSpeechFrames = if (lastFrameIsSpeech) continuousSpeechFrames + 1 else 1 - lastFrameIsSpeech = true - if (rms == peakRms) peakPosition = frameIndex - } else { - lastFrameIsSpeech = false - } - - // ========== 核心优化:根据收尾期选择不同阈值 ========== - val (endSilenceMs, consecutiveFrames) = if (isInFinalPhase) { - Pair(FINAL_END_SILENCE_MS, FINAL_CONSECUTIVE_FRAMES) - } else { - Pair(ACTIVE_END_SILENCE_MS, ACTIVE_CONSECUTIVE_FRAMES) - } - - // VAD状态流转 if (isEffectiveSpeech) { if (!isSpeaking) { isSpeaking = true - LogUtils.d(TAG, "🎤 有效语音开始 | 阈值: $effectiveSpeechThreshold") onSpeechStart() } - activeFrameCount++ - activeSpeechFrameCount++ - } else { - if (isSpeaking) { - activeFrameCount++ - val vadSilenceDuration = now - lastSpeechTime - val effectiveSilenceDuration = now - lastEffectiveSpeechTime + } else if (isSpeaking) { + val silenceMs = now - lastSpeechTime + val effectiveSilenceMs = now - lastEffectiveSpeechTime - // 触发结束条件:适配当前阶段的阈值 - val isSilenceTimeout = (vadSilenceDuration >= endSilenceMs || - effectiveSilenceDuration >= MAX_SILENCE_AFTER_SPEECH_MS) && - consecutiveSilenceFrames >= consecutiveFrames + val shouldEnd = + (silenceMs >= endSilenceMs || + effectiveSilenceMs >= MAX_SILENCE_AFTER_SPEECH_MS) && + consecutiveSilenceFrames >= endFrames - if (isSilenceTimeout) { - isSpeaking = false - isInFinalPhase = false // 重置收尾期 - val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f - LogUtils.d(TAG, """ - 🛑 语音结束 - - 有效静默时长: ${effectiveSilenceDuration}ms - - 连续静音帧: $consecutiveSilenceFrames - - 平均能量: $avgEnergy | 峰值: $peakRms - - 收尾期: $isInFinalPhase | 所用阈值: $endSilenceMs ms - """.trimIndent()) - onSpeechEnd(avgEnergy, peakRms) - resetStats() - } else { - LogUtils.v(TAG, "⏳ 静默中 | 连续静音帧: $consecutiveSilenceFrames | 静默时长: ${effectiveSilenceDuration}ms | 所用阈值: $endSilenceMs ms") - } + if (shouldEnd) { + onSpeechEnd() + reset() + isSpeaking = false + isInFinalPhase = false } } } - // 保留原有方法... - fun isSpeechDetected(): Boolean { - return vad.isSpeechDetected() - } - - fun activeSpeechRatio(): Float { - val ratio = if (activeFrameCount == 0) 0f else activeSpeechFrameCount.toFloat() / activeFrameCount - LogUtils.d(TAG, "📊 语音占比: $ratio | 有效语音帧: $activeSpeechFrameCount | 总帧: $activeFrameCount") - return ratio - } - - fun getTotalFrames(): Int = totalFrames - fun getSpeechFrames(): Int = speechFrames - fun getContinuousSpeechFrames(): Int = continuousSpeechFrames - fun getPeakPositionRatio(): Float { - return if (totalFrames == 0) 0f else peakPosition.toFloat() / totalFrames - } - fun reset() { isSpeaking = false lastSpeechTime = 0L lastEffectiveSpeechTime = 0L - envBaselineRms = 0.0005f consecutiveSilenceFrames = 0 - isInFinalPhase = false // 重置收尾期 - resetStats() + isInFinalPhase = false vad.reset() - - totalFrames = 0 - speechFrames = 0 - continuousSpeechFrames = 0 - lastFrameIsSpeech = false - peakPosition = 0 - frameIndex = 0 - - LogUtils.d(TAG, "🔄 VAD 状态已完全重置") - } - - private fun resetStats() { - activeFrameCount = 0 - activeSpeechFrameCount = 0 - speechEnergySum = 0f - speechFrameCount = 0 - peakRms = 0f } fun calcRms(samples: FloatArray): Float { @@ -206,4 +103,4 @@ class VadManager( for (v in samples) sum += v * v return sqrt(sum / samples.size) } -} \ No newline at end of file +} diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt index beb3fa9..88c4072 100644 --- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt +++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt @@ -15,7 +15,7 @@ class VoiceController( assetManager: AssetManager, private val onWakeup: () -> Unit, private val onFinalAudio: (FloatArray) -> Unit, - idleTimeoutSeconds: Int = 200, + idleTimeoutSeconds: Int = 10, maxRecordingSeconds: Int = 10, private val onStateChanged: ((VoiceState) -> Unit)? = null, private val stopBackendAudio: (() -> Unit)? = null, @@ -30,20 +30,14 @@ class VoiceController( // 预缓存大小(2秒) private const val PRE_BUFFER_SIZE = SAMPLE_RATE * 2 - // ========== 核心:分场景声纹阈值(极简版) ========== - private const val SPEAKER_THRESHOLD_QUIET = 0.50f // 安静环境 - private const val SPEAKER_THRESHOLD_NOISY = 0.45f // 嘈杂环境(匹配你的真实相似度) - private const val SPEAKER_THRESHOLD_SHORT = 0.43f // 短语音(<1秒) - // 短语音判定阈值 private const val SHORT_AUDIO_DURATION_MS = 1000L private const val INVALID_RESET_DEBOUNCE_MS = 1500L // 最小语音时长 private const val MIN_SPEECH_MS = 800L - private const val MIN_EFFECTIVE_VOICE_DURATION = 400L - // 噪音场景判定阈值 - private const val NOISE_BASELINE_THRESHOLD = 0.01f + // 统一的声纹验证阈值(不再分场景) + private const val SPEAKER_THRESHOLD = 0.45f } var state: VoiceState = VoiceState.WAIT_WAKEUP @@ -53,26 +47,17 @@ class VoiceController( onStateChanged?.invoke(value) } - // 实时能量与帧统计变量 - private var realtimeEnergySum = 0f - private var realtimeEnergyCount = 0 - private var realtimePeakRms = 0f - private var realtimeTotalFrames = 0 - private var realtimeSpeechFrames = 0 - private var realtimeContinuousSpeechFrames = 0 - private var realtimeLastFrameIsSpeech = false - private var isMultiPersonDialogueDetected = false + // 无效说话标记 + 超时类型 + private var hasInvalidSpeech = false + private var currentTimeoutType: TimeoutType = TimeoutType.IDLE_TIMEOUT private var lastInvalidResetMs = 0L private val speakerManagerLock = ReentrantLock() - // 环境噪音状态标记 - private var isNoisyEnvironment = false - private val wakeupManager = WakeupManager(assetManager, onWakeup) private val vadManager = VadManager( assetManager, onSpeechStart = { onVadStart() }, - onSpeechEnd = { avgEnergy, peakRms -> onVadEnd(avgEnergy, peakRms) } + onSpeechEnd = { onVadEnd() } ) private val audioBuffer = mutableListOf() @@ -92,50 +77,6 @@ class VoiceController( private val idleTimeoutMs = idleTimeoutSeconds * 1000L private val maxRecordingMs = maxRecordingSeconds * 1000L - // 分场景动态系数(保留原有逻辑) - private val BASELINE_WINDOW_SIZE = 50 - private val envNoiseBuffer = ArrayDeque(BASELINE_WINDOW_SIZE) - private var currentEnvBaseline = 0.001f - - - // 分场景动态系数 - private val BASELINE_QUIET_THRESHOLD = 0.005f - private val SHORT_SPEECH_ENERGY_COEFF_QUIET = 1.5f - private val SHORT_SPEECH_ENERGY_COEFF_NOISY = 2.0f - private val LONG_SPEECH_ENERGY_COEFF_QUIET = 2.5f - private val LONG_SPEECH_ENERGY_COEFF_NOISY = 3.5f - private val SHORT_SPEECH_VAD_COEFF = 0.05f - private val LONG_SPEECH_VAD_COEFF = 0.10f - private val SHORT_SPEECH_MIN_SCORE = 1 - private val LONG_SPEECH_MIN_SCORE = 1 - - // 其他过滤参数 - private val MAX_FAR_FIELD_ENERGY = 0.015f - private val MIN_VALID_PEAK_AVG_RATIO = 0.5f - private val MIN_CONTINUOUS_FRAME_RATIO = 0.1f - private val MAX_PEAK_POSITION_RATIO = 0.95f - private val MIN_EFFECTIVE_SPEECH_FRAMES = 3 - private val SHORT_SPEECH_MIN = 500L - private val SHORT_SPEECH_MAX = 2000L - - // 多人对话过滤配置 - private val MULTI_DIALOGUE_MIN_DURATION = 2500L - private val MULTI_DIALOGUE_MAX_PEAK_AVG_RATIO = 2.5f - private val MULTI_DIALOGUE_MIN_PEAK_AVG_RATIO = 0.4f - private val MULTI_DIALOGUE_MAX_CONTINUOUS_RATIO = 0.3f - private val MULTI_DIALOGUE_MIN_VAD_RATIO = 0.55f - - // 微弱人声过滤配置 - private val MIN_VOICE_FRAME_RATIO = 0.08f - private val MIN_PEAK_ENERGY_RATIO = 1.5f - private val NORMAL_VOICE_ENERGY_THRESHOLD = 0.008f - private val MIN_CONTINUOUS_VOICE_FRAMES = 1 - private val MIN_EFFECTIVE_SPEECH_RMS = 0.0005f - - // 无效说话标记 + 超时类型 - private var hasInvalidSpeech = false - private var currentTimeoutType: TimeoutType = TimeoutType.IDLE_TIMEOUT - // 声纹验证相关 private val CURRENT_USER_ID = "current_wakeup_user" private val ENABLE_STRICT_SPEAKER_VERIFY = true @@ -200,12 +141,6 @@ class VoiceController( val now = System.currentTimeMillis() - if (state == VoiceState.WAIT_WAKEUP) { - calibrateEnvBaseline(samples) - isNoisyEnvironment = currentEnvBaseline >= NOISE_BASELINE_THRESHOLD - LogUtils.d(TAG, "📊 环境状态 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") - } - when (state) { VoiceState.WAIT_WAKEUP, VoiceState.PLAYING_PROMPT, @@ -246,81 +181,15 @@ class VoiceController( audioBuffer.addAll(samples.asList()) vadManager.accept(samples) - calibrateEnvBaseline(samples) - updateRealtimeEnergy(samples) - updateRealtimeFrameStats() - isNoisyEnvironment = currentEnvBaseline >= NOISE_BASELINE_THRESHOLD - - if (checkMultiPersonDialogueRealtime(now)) { - LogUtils.w(TAG, "🚨 录音中识别出多人对话,提前终止") - finishSentence(realtimeEnergySum / realtimeEnergyCount, realtimePeakRms) - return - } - + // 仅保留最大录音时长判断 if (System.currentTimeMillis() - recordingStartMs > maxRecordingMs) { - LogUtils.w(TAG, "⏱ Max recording reached | 当前环境基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") - finishSentence(realtimeEnergySum / realtimeEnergyCount, realtimePeakRms) + LogUtils.w(TAG, "⏱ Max recording reached") + finishSentence() } } } } - /* ================= 实时能量更新 ================= */ - private fun updateRealtimeEnergy(samples: FloatArray) { - val rms = vadManager.calcRms(samples) - val effectiveThreshold = if (isNoisyEnvironment) currentEnvBaseline * 1.8f else MIN_EFFECTIVE_SPEECH_RMS - if (rms >= effectiveThreshold) { - realtimeEnergySum += rms - realtimeEnergyCount++ - realtimePeakRms = maxOf(realtimePeakRms, rms) - } - } - - /* ================= 实时帧统计 ================= */ - private fun updateRealtimeFrameStats() { - realtimeTotalFrames = vadManager.getTotalFrames() - realtimeSpeechFrames = vadManager.getSpeechFrames() - realtimeContinuousSpeechFrames = vadManager.getContinuousSpeechFrames() - val currentFrameIsSpeech = vadManager.isSpeechDetected() - if (currentFrameIsSpeech) { - realtimeContinuousSpeechFrames = if (realtimeLastFrameIsSpeech) realtimeContinuousSpeechFrames + 1 else 1 - } else { - realtimeContinuousSpeechFrames = 0 - } - realtimeLastFrameIsSpeech = currentFrameIsSpeech - } - - /* ================= 多人对话检测 ================= */ - private fun checkMultiPersonDialogueRealtime(now: Long): Boolean { - val duration = now - recordingStartMs - if (duration < MULTI_DIALOGUE_MIN_DURATION) return false - - val avgEnergy = if (realtimeEnergyCount > 0) realtimeEnergySum / realtimeEnergyCount else 0f - val peakAvgRatio = if (avgEnergy > 0) realtimePeakRms / avgEnergy else 0f - val continuousRatio = if (realtimeSpeechFrames > 0) realtimeContinuousSpeechFrames.toFloat() / realtimeSpeechFrames else 0f - val vadRatio = vadManager.activeSpeechRatio() - - isMultiPersonDialogueDetected = duration >= MULTI_DIALOGUE_MIN_DURATION && - peakAvgRatio in MULTI_DIALOGUE_MIN_PEAK_AVG_RATIO..MULTI_DIALOGUE_MAX_PEAK_AVG_RATIO && - continuousRatio <= MULTI_DIALOGUE_MAX_CONTINUOUS_RATIO && - vadRatio >= MULTI_DIALOGUE_MIN_VAD_RATIO - - return isMultiPersonDialogueDetected - } - - /* ================= 环境基线校准 ================= */ - private fun calibrateEnvBaseline(samples: FloatArray) { - val rms = vadManager.calcRms(samples) - val validRms = if (rms < currentEnvBaseline + 0.002f) rms else currentEnvBaseline - if (rms < 0.015f) { - if (envNoiseBuffer.size >= BASELINE_WINDOW_SIZE) { - envNoiseBuffer.removeFirst() - } - envNoiseBuffer.addLast(validRms) - currentEnvBaseline = envNoiseBuffer.maxOrNull() ?: 0.001f - } - } - /* ================= 唤醒处理 ================= */ private fun handleWakeupEvent() { if (state == VoiceState.UPLOADING) return @@ -339,207 +208,73 @@ class VoiceController( audioBuffer.clear() vadManager.reset() vadStarted = false - resetRealtimeStats() } inKwsObserve = true kwsObserveStartMs = System.currentTimeMillis() onWakeup() - LogUtils.d(TAG, "🔔 唤醒成功 | 环境基线: $currentEnvBaseline") + LogUtils.d(TAG, "🔔 唤醒成功") } private fun onVadStart() { if (state != VoiceState.WAIT_SPEECH) return - LogUtils.d(TAG, "🎤 REAL VAD START | 环境基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") + LogUtils.d(TAG, "🎤 REAL VAD START") vadStarted = true recordingStartMs = System.currentTimeMillis() audioBuffer.clear() audioBuffer.addAll(preBuffer) - resetRealtimeStats() state = VoiceState.RECORDING } - private fun onVadEnd(avgEnergy: Float, peakRms: Float) { + private fun onVadEnd() { if (state != VoiceState.RECORDING) return - LogUtils.d(TAG, "🧠 VAD END | 环境基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") - val realAvgEnergy = if (realtimeEnergyCount > 0) realtimeEnergySum / realtimeEnergyCount else avgEnergy - val realPeakRms = if (realtimePeakRms > 0) realtimePeakRms else peakRms - finishSentence(realAvgEnergy, realPeakRms) - } - - /* ================= 微弱人声过滤 ================= */ - private fun filterWeakVoice(duration: Long, avgEnergy: Float, peakRms: Float): Boolean { - if (duration < MIN_EFFECTIVE_VOICE_DURATION) { - LogUtils.w(TAG, "❌ 微弱人声过滤:时长${duration}ms < ${MIN_EFFECTIVE_VOICE_DURATION}ms") - return true - } - - val voiceFrameRatio = if (realtimeTotalFrames > 0) realtimeSpeechFrames.toFloat() / realtimeTotalFrames else 0f - if (avgEnergy < NORMAL_VOICE_ENERGY_THRESHOLD && voiceFrameRatio < MIN_VOICE_FRAME_RATIO) { - LogUtils.w(TAG, "❌ 微弱人声过滤:帧占比${voiceFrameRatio} < ${MIN_VOICE_FRAME_RATIO}") - return true - } - - val peakBaselineRatio = peakRms / currentEnvBaseline - if (avgEnergy < NORMAL_VOICE_ENERGY_THRESHOLD && peakBaselineRatio < MIN_PEAK_ENERGY_RATIO) { - LogUtils.w(TAG, "❌ 微弱人声过滤:峰值/基线${peakBaselineRatio} < ${MIN_PEAK_ENERGY_RATIO}") - return true - } - - if (avgEnergy < NORMAL_VOICE_ENERGY_THRESHOLD && realtimeContinuousSpeechFrames < MIN_CONTINUOUS_VOICE_FRAMES) { - LogUtils.w(TAG, "❌ 微弱人声过滤:连续帧${realtimeContinuousSpeechFrames} < ${MIN_CONTINUOUS_VOICE_FRAMES}") - return true - } - - val energyBaselineRatio = avgEnergy / currentEnvBaseline - if (avgEnergy < 0.005f && energyBaselineRatio < 1.2f) { - LogUtils.w(TAG, "❌ 微弱人声过滤:能量/基线${energyBaselineRatio} < 1.2") - return true - } - - return false + LogUtils.d(TAG, "🧠 VAD END") + finishSentence() } /* ================= 结束录音 ================= */ - private fun finishSentence(avgEnergy: Float = 0f, peakRms: Float = 0f) { + private fun finishSentence() { val now = System.currentTimeMillis() val duration = now - recordingStartMs if (!vadStarted || duration < MIN_SPEECH_MS) { - LogUtils.d(TAG, "❌ 语音过短: $duration ms | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") - hasInvalidSpeech = true - resetToWaitSpeech() - return - } - - if (filterWeakVoice(duration, avgEnergy, peakRms)) { + LogUtils.d(TAG, "❌ 语音过短: $duration ms") hasInvalidSpeech = true resetToWaitSpeech() return } val audio = audioBuffer.toFloatArray() - val vadRatio = vadManager.activeSpeechRatio() - val peakAvgRatio = if (avgEnergy > 0f) peakRms / avgEnergy else 0f - LogUtils.d(TAG, "📊 录音信息 | 时长: $duration ms | 能量: $avgEnergy | 峰均比: $peakAvgRatio | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") - LogUtils.d(TAG, "📊 实时帧统计 | 总帧: $realtimeTotalFrames | 语音帧: $realtimeSpeechFrames | 连续语音帧: $realtimeContinuousSpeechFrames") - - if (isMultiPersonDialogueDetected) { - LogUtils.w(TAG, "❌ 过滤多人对话垃圾语音 | 时长: $duration ms") - hasInvalidSpeech = true - resetToWaitSpeech() - return - } - - // 声纹验证(核心极简版) + // 声纹验证(保留核心逻辑) if (ENABLE_STRICT_SPEAKER_VERIFY) { val isCurrentUser = verifySpeaker(audio) if (!isCurrentUser) { - LogUtils.w(TAG, "❌ 非当前唤醒用户,拒绝语音 | 录音时长: $duration ms | 嘈杂环境: $isNoisyEnvironment") + LogUtils.w(TAG, "❌ 非当前唤醒用户,拒绝语音 | 录音时长: $duration ms") hasInvalidSpeech = true resetToWaitSpeech() return } - LogUtils.d(TAG, "✅ 当前用户语音,继续处理 | 录音时长: $duration ms | 嘈杂环境: $isNoisyEnvironment") - } - - - // 远场过滤 - val isFarField = avgEnergy < MAX_FAR_FIELD_ENERGY - val isInvalidPeakRatio = peakAvgRatio < MIN_VALID_PEAK_AVG_RATIO - if (isFarField && isInvalidPeakRatio) { - LogUtils.w(TAG, "❌ 远场/无效语音过滤 | 能量: $avgEnergy < $MAX_FAR_FIELD_ENERGY") - hasInvalidSpeech = true - resetToWaitSpeech() - return - } - - // 非连续判定 - val continuousRatio = if (realtimeSpeechFrames > 0) realtimeContinuousSpeechFrames.toFloat() / realtimeSpeechFrames else 0f - val peakPositionRatio = vadManager.getPeakPositionRatio() - val isDiscontinuous = continuousRatio < MIN_CONTINUOUS_FRAME_RATIO && - realtimeSpeechFrames < MIN_EFFECTIVE_SPEECH_FRAMES && - peakPositionRatio > MAX_PEAK_POSITION_RATIO - if (isDiscontinuous) { - LogUtils.w(TAG, "❌ 非连续杂音过滤 | 连续占比: $continuousRatio < $MIN_CONTINUOUS_FRAME_RATIO") - hasInvalidSpeech = true - resetToWaitSpeech() - return - } - - // 分场景阈值过滤 - val isQuietEnv = currentEnvBaseline < BASELINE_QUIET_THRESHOLD - val thresholdConfig = when { - duration in SHORT_SPEECH_MIN..SHORT_SPEECH_MAX -> { - val coeff = if (isQuietEnv) SHORT_SPEECH_ENERGY_COEFF_QUIET else SHORT_SPEECH_ENERGY_COEFF_NOISY - val energyThreshold = currentEnvBaseline * coeff - ThresholdConfig(energyThreshold, SHORT_SPEECH_VAD_COEFF, SHORT_SPEECH_MIN_SCORE, "短语音") - } - else -> { - val coeff = if (isQuietEnv) LONG_SPEECH_ENERGY_COEFF_QUIET else LONG_SPEECH_ENERGY_COEFF_NOISY - val energyThreshold = currentEnvBaseline * coeff - ThresholdConfig(energyThreshold, LONG_SPEECH_VAD_COEFF, LONG_SPEECH_MIN_SCORE, "长语音") - } - } - - val energyPass = avgEnergy >= thresholdConfig.energyThreshold - val vadRatioPass = vadRatio >= thresholdConfig.vadRatioThreshold - if (!energyPass || !vadRatioPass) { - LogUtils.w(TAG, "❌ 低能量语音阈值过滤 | 能量: $avgEnergy < ${thresholdConfig.energyThreshold} | 占比: $vadRatio < ${thresholdConfig.vadRatioThreshold} | 场景: ${thresholdConfig.scene}") - hasInvalidSpeech = true - resetToWaitSpeech() - return - } - - // 评分判定 - var score = 0 - score += when { - duration >= 4000 -> 3 - duration >= 2500 -> 2 - else -> 1 - } - score += if (avgEnergy >= thresholdConfig.energyThreshold) 1 else 0 - score += if (continuousRatio >= MIN_CONTINUOUS_FRAME_RATIO) 1 else 0 - - val pass = score >= thresholdConfig.minScore - if (!pass) { - LogUtils.w(TAG, "❌ 评分不足过滤 | 总分: $score < ${thresholdConfig.minScore} | 场景: ${thresholdConfig.scene}") - hasInvalidSpeech = true - resetToWaitSpeech() - return + LogUtils.d(TAG, "✅ 当前用户语音,继续处理 | 录音时长: $duration ms") } // 最终通过 audioBuffer.clear() state = VoiceState.UPLOADING onFinalAudio(audio) - resetRealtimeStats() hasInvalidSpeech = false - LogUtils.i(TAG, "✅ 语音通过 | 时长: $duration ms | 能量: $avgEnergy | 场景: ${thresholdConfig.scene} | 嘈杂环境: $isNoisyEnvironment") - } - - /* ================= 重置实时统计 ================= */ - private fun resetRealtimeStats() { - realtimeEnergySum = 0f - realtimeEnergyCount = 0 - realtimePeakRms = 0f - realtimeTotalFrames = 0 - realtimeSpeechFrames = 0 - realtimeContinuousSpeechFrames = 0 - realtimeLastFrameIsSpeech = false - isMultiPersonDialogueDetected = false + LogUtils.i(TAG, "✅ 语音通过 | 时长: $duration ms") } /* ================= 播放/上传回调 ================= */ fun onPlayStartPrompt() { - LogUtils.d(TAG, "🎵 播放提示音 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") + LogUtils.d(TAG, "🎵 播放提示音") state = VoiceState.PLAYING_PROMPT } fun onPlayEndPrompt() { speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS - LogUtils.d(TAG, "🎵 提示音结束 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") + LogUtils.d(TAG, "🎵 提示音结束") state = VoiceState.WAIT_SPEECH_COOLDOWN } @@ -548,19 +283,19 @@ class VoiceController( LogUtils.w(TAG, "🎶 非上传完成状态,禁止切换到 PLAYING_BACKEND | 当前状态: $state") return } - LogUtils.d(TAG, "🎶 开始播放后台音频 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") + LogUtils.d(TAG, "🎶 开始播放后台音频") state = VoiceState.PLAYING_BACKEND } fun onPlayEndBackend() { speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS - LogUtils.d(TAG, "🎶 后台音频结束 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") + LogUtils.d(TAG, "🎶 后台音频结束") state = VoiceState.WAIT_SPEECH_COOLDOWN } fun onUploadFinished(success: Boolean) { if (state != VoiceState.UPLOADING) return - LogUtils.d(TAG, "📤 上传完成 | 成功: $success | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") + LogUtils.d(TAG, "📤 上传完成 | 成功: $success") if (!success) { speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS @@ -569,7 +304,7 @@ class VoiceController( } private fun resetToWaitSpeech() { - LogUtils.d(TAG, "🔄 重置到等待说话 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment | 已标记无效说话: $hasInvalidSpeech") + LogUtils.d(TAG, "🔄 重置到等待说话 | 已标记无效说话: $hasInvalidSpeech") val now = System.currentTimeMillis() if (now - lastInvalidResetMs < INVALID_RESET_DEBOUNCE_MS) { LogUtils.d(TAG, "🛡 防抖:1.5秒内重复无效语音,跳过重置") @@ -579,13 +314,12 @@ class VoiceController( audioBuffer.clear() vadManager.reset() vadStarted = false - resetRealtimeStats() state = VoiceState.WAIT_SPEECH if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis() } private fun resetAll() { - LogUtils.d(TAG, "🔄 重置所有状态 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment | 本次超时类型: $currentTimeoutType") + LogUtils.d(TAG, "🔄 重置所有状态 | 本次超时类型: $currentTimeoutType") audioBuffer.clear() preBuffer.clear() vadManager.reset() @@ -593,24 +327,17 @@ class VoiceController( vadStarted = false waitSpeechStartMs = 0L waitSpeechFailStartMs = 0L - envNoiseBuffer.clear() - currentEnvBaseline = 0.001f - isNoisyEnvironment = false - resetRealtimeStats() hasInvalidSpeech = false currentTimeoutType = TimeoutType.IDLE_TIMEOUT state = VoiceState.WAIT_WAKEUP } fun release() { - LogUtils.d(TAG, "🔌 释放资源 | 最终基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment") + LogUtils.d(TAG, "🔌 释放资源") wakeupManager.release() vadManager.reset() - envNoiseBuffer.clear() - resetRealtimeStats() hasInvalidSpeech = false currentTimeoutType = TimeoutType.IDLE_TIMEOUT - isNoisyEnvironment = false runCatching { SpeakerRecognition.extractor.release() @@ -638,24 +365,14 @@ class VoiceController( } } - // 阈值配置数据类 - private data class ThresholdConfig( - val energyThreshold: Float, - val vadRatioThreshold: Float, - val minScore: Int, - val scene: String - ) - - /* ================= 核心:极简版声纹验证 ================= */ private fun verifySpeaker(audio: FloatArray): Boolean { if (audio.isEmpty()) { LogUtils.w(TAG, "❌ 待验证音频为空,声纹验证失败") return false } - // 1. 裁剪音频:只保留本次录音的有效部分(解决时长不匹配问题) + // 1. 裁剪音频:只保留本次录音的有效部分 val audioDurationMs = (audio.size.toFloat() / SAMPLE_RATE * 1000).toLong() - // 只保留最后 N 毫秒的音频(N = 实际录音时长),避免缓存旧音频 val validAudio = if (audioDurationMs > 0) { val validSampleCount = (audioDurationMs * SAMPLE_RATE / 1000).toInt() if (validSampleCount < audio.size) { @@ -667,45 +384,44 @@ class VoiceController( audio } - // 2. 分场景选阈值(无容错,只调阈值) - val finalThreshold = when { - audioDurationMs < SHORT_AUDIO_DURATION_MS -> SPEAKER_THRESHOLD_SHORT - isNoisyEnvironment -> SPEAKER_THRESHOLD_NOISY - else -> SPEAKER_THRESHOLD_QUIET - } - var stream: OnlineStream? = null - return try { - stream = SpeakerRecognition.extractor.createStream() - stream.acceptWaveform(samples = validAudio, sampleRate = SAMPLE_RATE) // 用裁剪后的音频验证 - stream.inputFinished() - if (!SpeakerRecognition.extractor.isReady(stream)) { + // 使用 runCatching 统一处理异常 + return runCatching { + stream = SpeakerRecognition.extractor.createStream() + + // 处理音频数据 + stream?.acceptWaveform(samples = validAudio, sampleRate = SAMPLE_RATE) + stream?.inputFinished() + + // 检查 stream 是否就绪 + if (stream == null || !SpeakerRecognition.extractor.isReady(stream)) { LogUtils.w(TAG, "❌ 音频Stream未就绪,验证失败") - return false + return@runCatching false } + // 计算特征并验证 val embedding = SpeakerRecognition.extractor.compute(stream) - - // 3. 纯验证逻辑:过就过,不过就拒绝 speakerManagerLock.withLock { val verifyPass = SpeakerRecognition.manager.verify( name = CURRENT_USER_ID, embedding = embedding, - threshold = finalThreshold + threshold = SPEAKER_THRESHOLD ) - // 打印关键信息(补充裁剪后时长) - LogUtils.d(TAG, "📊 声纹验证 | 阈值: $finalThreshold | 通过: $verifyPass | 嘈杂环境: $isNoisyEnvironment | 原始时长: ${audioDurationMs}ms | 验证时长: ${(validAudio.size.toFloat()/SAMPLE_RATE*1000).toLong()}ms") - - // 无任何容错:验证结果就是最终结果 - return verifyPass + LogUtils.d(TAG, "📊 声纹验证 | 统一阈值: $SPEAKER_THRESHOLD | 通过: $verifyPass | 验证时长: ${(validAudio.size.toFloat()/SAMPLE_RATE*1000).toLong()}ms") + verifyPass } - } catch (e: Exception) { + }.onFailure { e -> + // 处理所有异常情况 LogUtils.e(TAG, "❌ 声纹验证异常,拒绝", e) - return false - } finally { - stream?.release() - } + }.also { + // 确保 stream 资源释放 + runCatching { + stream?.release() + }.onFailure { e -> + LogUtils.w(TAG, "⚠️ 释放 Stream 资源失败", e) + } + }.getOrDefault(false) // 异常时默认返回 false } } \ No newline at end of file diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt b/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt index 768f312..8f11b2a 100644 --- a/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt +++ b/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt @@ -26,7 +26,8 @@ class WakeupManager(assetManager: AssetManager, function: () -> Unit) { val config = KeywordSpotterConfig( featConfig = featConfig, modelConfig = modelConfig, - keywordsFile = keywordsFile + keywordsFile = keywordsFile, + keywordsThreshold = 0.2f ) kws = KeywordSpotter(assetManager, config) @@ -40,9 +41,9 @@ class WakeupManager(assetManager: AssetManager, function: () -> Unit) { /** ⭐ 永远喂 KWS */ fun acceptAudio(samples: FloatArray) { val s = stream ?: return - for (i in samples.indices) { - samples[i] *= 2.5f - } +// for (i in samples.indices) { +// samples[i] *= 2.5f +// } s.acceptWaveform(samples, sampleRate) while (kws.isReady(s)) { diff --git a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt index e6faeac..7168ef6 100644 --- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt +++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt @@ -159,27 +159,27 @@ class MainActivity : BaseViewModelActivity() } } - mViewModel?.uploadVoiceLiveData?.observe(this) { - when (it) { - is ApiResult.Error -> { - Toaster.showShort("上传失败") - voiceController?.onUploadFinished(true) - } - - is ApiResult.Success -> { - if (!TextUtils.isEmpty(it.data)) { - Toaster.showShort(it.data) - } - Toaster.showShort(it) - voiceController?.onUploadFinished(true) - startPlayTimeoutJob?.cancel() - startPlayTimeoutJob = lifecycleScope.launch { - delay(PLAY_WAIT_TIMEOUT_MS) - voiceController?.onPlayEndBackend() - } - } - } - } +// mViewModel?.uploadVoiceLiveData?.observe(this) { +// when (it) { +// is ApiResult.Error -> { +// Toaster.showShort("上传失败") +// voiceController?.onUploadFinished(true) +// } +// +// is ApiResult.Success -> { +// if (!TextUtils.isEmpty(it.data)) { +// Toaster.showShort(it.data) +// } +// Toaster.showShort(it) +// voiceController?.onUploadFinished(true) +// startPlayTimeoutJob?.cancel() +// startPlayTimeoutJob = lifecycleScope.launch { +// delay(PLAY_WAIT_TIMEOUT_MS) +// voiceController?.onPlayEndBackend() +// } +// } +// } +// } @@ -226,12 +226,12 @@ class MainActivity : BaseViewModelActivity() // 1 // ) // loadLocalJsonAndPlay() -// val file = File( -// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(), -// "xxx.wav" -// ) -// AudioDebugUtil.saveFloatPcmAsWav(audio, file) -// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}") + val file = File( + getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(), + "xxx.wav" + ) + AudioDebugUtil.saveFloatPcmAsWav(audio, file) + LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}") // lifecycleScope.launch(Dispatchers.Main) { // // mVerticalAnimator?.show() @@ -280,9 +280,9 @@ class MainActivity : BaseViewModelActivity() when (msg.msgContentType) { MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> { lifecycleScope.launch(Dispatchers.IO) { -// UnityPlayerHolder.getInstance().startTalking(msg.content) - val audioDTO = GsonUtils.fromJson(msg.content, AudioDTO::class.java) -// voicePlayer.onAudioDTO(audioDTO) +//// UnityPlayerHolder.getInstance().startTalking(msg.content) +// val audioDTO = GsonUtils.fromJson(msg.content, LmChatDTO::class.java) +// voicePlayer.handleSlice(audioDTO) } } } @@ -586,9 +586,9 @@ class MainActivity : BaseViewModelActivity() super.onEvent(eventSource, id, type, data) LogUtils.eTag("lrsxxx", "onEvent:${data}") runCatching { -// val audioDTO = GsonUtils.fromJson(data, LmChatDTO::class.java) -// voicePlayer.handleSlice(audioDTO) - UnityPlayerHolder.getInstance().startTalking(data) + val audioDTO = GsonUtils.fromJson(data, LmChatDTO::class.java) + voicePlayer.handleSlice(audioDTO) +// UnityPlayerHolder.getInstance().startTalking(data) }.onFailure { LogUtils.eTag("lrsxxx", "解析音频数据失败", it) voiceController?.onUploadFinished(false) diff --git a/app/src/main/java/com/zs/smarthuman/utils/PcmStreamPlayer.kt b/app/src/main/java/com/zs/smarthuman/utils/PcmStreamPlayer.kt index 0616216..212b2fd 100644 --- a/app/src/main/java/com/zs/smarthuman/utils/PcmStreamPlayer.kt +++ b/app/src/main/java/com/zs/smarthuman/utils/PcmStreamPlayer.kt @@ -4,92 +4,134 @@ import android.media.AudioAttributes import android.media.AudioFormat import android.media.AudioManager import android.media.AudioTrack -import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.SupervisorJob -import kotlinx.coroutines.cancel -import kotlinx.coroutines.delay -import kotlinx.coroutines.isActive -import kotlinx.coroutines.launch +import kotlinx.coroutines.* import java.util.ArrayDeque import java.util.Queue import java.util.concurrent.locks.ReentrantLock +import kotlin.concurrent.withLock -// ====================== PCM 播放器 ====================== class PcmStreamPlayer( private val sampleRate: Int ) { - var onPlayEnd: (() -> Unit)? = null - private val scope = CoroutineScope(SupervisorJob() + Dispatchers.IO) private val bufferQueue: Queue = ArrayDeque() - private val queueLock = ReentrantLock() + private val lock = ReentrantLock() private var audioTrack: AudioTrack? = null @Volatile private var playing = true + // 新增:标记是否已释放,防止空指针 + @Volatile + private var isReleased = false init { scope.launch { + val minBufferSize = AudioTrack.getMinBufferSize( + sampleRate, + AudioFormat.CHANNEL_OUT_MONO, + AudioFormat.ENCODING_PCM_16BIT + ) + audioTrack = AudioTrack( AudioAttributes.Builder() .setUsage(AudioAttributes.USAGE_MEDIA) .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) .build(), AudioFormat.Builder() - .setEncoding(AudioFormat.ENCODING_PCM_16BIT) .setSampleRate(sampleRate) + .setEncoding(AudioFormat.ENCODING_PCM_16BIT) .setChannelMask(AudioFormat.CHANNEL_OUT_MONO) .build(), - AudioTrack.getMinBufferSize( - sampleRate, - AudioFormat.CHANNEL_OUT_MONO, - AudioFormat.ENCODING_PCM_16BIT - ), + minBufferSize * 2, AudioTrack.MODE_STREAM, AudioManager.AUDIO_SESSION_ID_GENERATE ) - audioTrack?.play() + // 空安全检查:防止AudioTrack创建失败 + audioTrack?.play() ?: run { + playing = false + isReleased = true + return@launch + } - val silent = ByteArray(2048) + // 🔥 AudioTrack 预热(非常关键) + warmUp() - while (isActive && playing) { - val pcm = queueLock.run { bufferQueue.poll() } + val silent = ByteArray(1024) - if (pcm != null) { + while (isActive && playing && !isReleased) { + val pcm = lock.withLock { + if (bufferQueue.isEmpty()) null else bufferQueue.poll() + } + + if (pcm != null && !isReleased) { audioTrack?.write(pcm, 0, pcm.size) - } else { + } else if (!isReleased) { audioTrack?.write(silent, 0, silent.size) delay(5) } } - audioTrack?.stop() - audioTrack?.release() + // 释放前的空安全检查 + audioTrack?.takeIf { !isReleased }?.stop() + audioTrack?.takeIf { !isReleased }?.release() audioTrack = null - - onPlayEnd?.invoke() } } + private fun warmUp() { + if (isReleased) return + val warmUpMs = 50 + val bytes = sampleRate * 2 * warmUpMs / 1000 + val silence = ByteArray(bytes) + audioTrack?.write(silence, 0, silence.size) + audioTrack?.write(silence, 0, silence.size) + } + fun pushPcm(pcm: ByteArray) { - queueLock.run { bufferQueue.add(pcm) } + if (isReleased) return + lock.withLock { + bufferQueue.add(pcm) + } } fun clearQueue() { - queueLock.run { bufferQueue.clear() } + if (isReleased) return + lock.withLock { + bufferQueue.clear() + } } - fun queueEmpty(): Boolean = queueLock.run { bufferQueue.isEmpty() } + fun queueEmpty(): Boolean { + if (isReleased) return true + return lock.withLock { bufferQueue.isEmpty() } + } + + // 核心新增:强制停止当前播放,清空所有缓冲区 + fun forceStop() { + if (isReleased) return + lock.withLock { + bufferQueue.clear() + } + // 清空AudioTrack内部缓冲区,立即停止发声 + audioTrack?.flush() + audioTrack?.pause() // 暂停硬件播放,避免残留静音数据 + } + + // 核心新增:重启播放器(用于停止后播放新音频) + fun restart() { + if (isReleased) return + audioTrack?.play() + } fun release() { + isReleased = true playing = false - queueLock.run { bufferQueue.clear() } + clearQueue() scope.cancel() + audioTrack?.stop() + audioTrack?.release() + audioTrack = null } -} - - - +} \ No newline at end of file diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 98d3e72..a1596be 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -73,6 +73,7 @@ immersionbar-components = { module = "com.geyifeng.immersionbar:immersionbar-com immersionbar-ktx = { module = "com.geyifeng.immersionbar:immersionbar-ktx", version.ref = "immersionbarKtx" } immersionbar = { module = "com.geyifeng.immersionbar:immersionbar", version.ref = "immersionbar" } androidx-lifecycle-runtime-android = { group = "androidx.lifecycle", name = "lifecycle-runtime-android", version.ref = "lifecycleRuntimeAndroid" } + [plugins] android-application = { id = "com.android.application", version.ref = "agp" } kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }