去除多余的检测
This commit is contained in:
parent
cdc189c5f4
commit
c450d8d620
Binary file not shown.
@ -2,203 +2,100 @@ package com.zs.smarthuman.sherpa
|
|||||||
|
|
||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
import com.blankj.utilcode.util.LogUtils
|
import com.blankj.utilcode.util.LogUtils
|
||||||
|
import com.k2fsa.sherpa.onnx.SileroVadModelConfig
|
||||||
import com.k2fsa.sherpa.onnx.Vad
|
import com.k2fsa.sherpa.onnx.Vad
|
||||||
|
import com.k2fsa.sherpa.onnx.VadModelConfig
|
||||||
import com.k2fsa.sherpa.onnx.getVadModelConfig
|
import com.k2fsa.sherpa.onnx.getVadModelConfig
|
||||||
import kotlin.math.sqrt
|
import kotlin.math.sqrt
|
||||||
|
|
||||||
class VadManager(
|
class VadManager(
|
||||||
assetManager: AssetManager,
|
assetManager: AssetManager,
|
||||||
private val onSpeechStart: () -> Unit,
|
private val onSpeechStart: () -> Unit,
|
||||||
private val onSpeechEnd: (avgEnergy: Float, peakRms: Float) -> Unit
|
private val onSpeechEnd: () -> Unit
|
||||||
) {
|
) {
|
||||||
private val TAG = "SmartHuman-VadManager"
|
private val TAG = "VadManager"
|
||||||
private val vad: Vad
|
private val vad: Vad
|
||||||
|
|
||||||
private var isSpeaking = false
|
private var isSpeaking = false
|
||||||
private var lastSpeechTime = 0L
|
private var lastSpeechTime = 0L
|
||||||
|
|
||||||
// ========== 核心调整:区分活跃期/收尾期阈值 ==========
|
private val ACTIVE_END_SILENCE_MS = 1500L
|
||||||
// 说话活跃期(容忍停顿)
|
private val ACTIVE_CONSECUTIVE_FRAMES = 10
|
||||||
private val ACTIVE_END_SILENCE_MS = 1500L // 活跃期基础静默(保留停顿容忍)
|
private val FINAL_END_SILENCE_MS = 800L
|
||||||
private val ACTIVE_CONSECUTIVE_FRAMES = 10 // 活跃期连续静音帧
|
private val FINAL_CONSECUTIVE_FRAMES = 5
|
||||||
// 说话收尾期(快速结束)
|
private val FINAL_PHASE_TRIGGER_MS = 1000L
|
||||||
private val FINAL_END_SILENCE_MS = 800L // 收尾期基础静默(缩短到800ms)
|
private val MAX_SILENCE_AFTER_SPEECH_MS = 2000L
|
||||||
private val FINAL_CONSECUTIVE_FRAMES = 5 // 收尾期连续静音帧(5帧=100ms)
|
|
||||||
// 收尾期触发条件:最后一次有效语音后超过X秒,判定为进入收尾期
|
|
||||||
private val FINAL_PHASE_TRIGGER_MS = 1000L // 1秒无有效语音,进入收尾期
|
|
||||||
|
|
||||||
private val MAX_SILENCE_AFTER_SPEECH_MS = 2000L // 兜底阈值从3秒降到2秒
|
|
||||||
|
|
||||||
// 原有基础配置
|
|
||||||
private val MIN_EFFECTIVE_SPEECH_RMS = 0.001f
|
private val MIN_EFFECTIVE_SPEECH_RMS = 0.001f
|
||||||
private val ENV_BASELINE_FACTOR = 1.2f
|
|
||||||
private var envBaselineRms = 0.0005f
|
|
||||||
private var lastEffectiveSpeechTime = 0L
|
|
||||||
|
|
||||||
private var consecutiveSilenceFrames = 0
|
private var consecutiveSilenceFrames = 0
|
||||||
|
|
||||||
// 新增:收尾期标记
|
|
||||||
private var isInFinalPhase = false
|
private var isInFinalPhase = false
|
||||||
|
private var lastEffectiveSpeechTime = 0L
|
||||||
// 统计变量
|
|
||||||
private var speechEnergySum = 0f
|
|
||||||
private var speechFrameCount = 0
|
|
||||||
private var peakRms = 0f
|
|
||||||
private var totalFrames = 0
|
|
||||||
private var speechFrames = 0
|
|
||||||
private var continuousSpeechFrames = 0
|
|
||||||
private var lastFrameIsSpeech = false
|
|
||||||
private var peakPosition = 0
|
|
||||||
private var frameIndex = 0
|
|
||||||
private var activeFrameCount = 0
|
|
||||||
private var activeSpeechFrameCount = 0
|
|
||||||
|
|
||||||
init {
|
init {
|
||||||
val config = getVadModelConfig(0) ?: throw IllegalStateException("[$TAG] VAD config not found")
|
val config = getVadModelConfig(0)
|
||||||
vad = Vad(assetManager, config)
|
?: throw IllegalStateException("[$TAG] VAD config not found")
|
||||||
|
vad = Vad(assetManager, VadModelConfig(sileroVadModelConfig = SileroVadModelConfig(model = "silero_vad.onnx", threshold = 0.2f)))
|
||||||
LogUtils.i(TAG, "✅ VAD 初始化成功")
|
LogUtils.i(TAG, "✅ VAD 初始化成功")
|
||||||
}
|
}
|
||||||
|
|
||||||
fun accept(samples: FloatArray) {
|
fun accept(samples: FloatArray) {
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
|
|
||||||
vad.acceptWaveform(samples)
|
vad.acceptWaveform(samples)
|
||||||
val vadHasSpeech = vad.isSpeechDetected()
|
val vadHasSpeech = vad.isSpeechDetected()
|
||||||
val rms = calcRms(samples)
|
val rms = calcRms(samples)
|
||||||
|
|
||||||
// 环境基线更新
|
val isEffectiveSpeech = vadHasSpeech && rms >= MIN_EFFECTIVE_SPEECH_RMS
|
||||||
if (!vadHasSpeech || rms < MIN_EFFECTIVE_SPEECH_RMS) {
|
|
||||||
envBaselineRms = (envBaselineRms * 0.9f) + (rms * 0.1f)
|
|
||||||
}
|
|
||||||
val effectiveSpeechThreshold = maxOf(MIN_EFFECTIVE_SPEECH_RMS, envBaselineRms * ENV_BASELINE_FACTOR)
|
|
||||||
val isEffectiveSpeech = vadHasSpeech && rms >= effectiveSpeechThreshold
|
|
||||||
|
|
||||||
// ========== 核心优化:动态判定收尾期 ==========
|
|
||||||
if (isEffectiveSpeech) {
|
if (isEffectiveSpeech) {
|
||||||
lastEffectiveSpeechTime = now
|
lastEffectiveSpeechTime = now
|
||||||
isInFinalPhase = false // 有有效语音,退出收尾期
|
isInFinalPhase = false
|
||||||
|
lastSpeechTime = now
|
||||||
|
consecutiveSilenceFrames = 0
|
||||||
} else {
|
} else {
|
||||||
// 最后一次有效语音后超过1秒,进入收尾期
|
consecutiveSilenceFrames++
|
||||||
if (now - lastEffectiveSpeechTime >= FINAL_PHASE_TRIGGER_MS) {
|
if (now - lastEffectiveSpeechTime >= FINAL_PHASE_TRIGGER_MS) {
|
||||||
isInFinalPhase = true
|
isInFinalPhase = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 语音能量统计
|
val (endSilenceMs, endFrames) =
|
||||||
if (isEffectiveSpeech) {
|
if (isInFinalPhase)
|
||||||
speechEnergySum += rms
|
FINAL_END_SILENCE_MS to FINAL_CONSECUTIVE_FRAMES
|
||||||
speechFrameCount++
|
else
|
||||||
peakRms = maxOf(peakRms, rms)
|
ACTIVE_END_SILENCE_MS to ACTIVE_CONSECUTIVE_FRAMES
|
||||||
lastSpeechTime = now
|
|
||||||
consecutiveSilenceFrames = 0
|
|
||||||
LogUtils.v(TAG, "🔊 有效语音帧 | RMS: $rms | 阈值: $effectiveSpeechThreshold | 收尾期: $isInFinalPhase")
|
|
||||||
} else {
|
|
||||||
consecutiveSilenceFrames++
|
|
||||||
LogUtils.v(TAG, if (vadHasSpeech) "⚠ 低能量语音帧 | RMS: $rms | 阈值: $effectiveSpeechThreshold"
|
|
||||||
else "🔇 静音帧 | 连续静音帧: $consecutiveSilenceFrames | 收尾期: $isInFinalPhase")
|
|
||||||
}
|
|
||||||
|
|
||||||
// 帧统计
|
|
||||||
totalFrames++
|
|
||||||
frameIndex++
|
|
||||||
if (isEffectiveSpeech) {
|
|
||||||
speechFrames++
|
|
||||||
continuousSpeechFrames = if (lastFrameIsSpeech) continuousSpeechFrames + 1 else 1
|
|
||||||
lastFrameIsSpeech = true
|
|
||||||
if (rms == peakRms) peakPosition = frameIndex
|
|
||||||
} else {
|
|
||||||
lastFrameIsSpeech = false
|
|
||||||
}
|
|
||||||
|
|
||||||
// ========== 核心优化:根据收尾期选择不同阈值 ==========
|
|
||||||
val (endSilenceMs, consecutiveFrames) = if (isInFinalPhase) {
|
|
||||||
Pair(FINAL_END_SILENCE_MS, FINAL_CONSECUTIVE_FRAMES)
|
|
||||||
} else {
|
|
||||||
Pair(ACTIVE_END_SILENCE_MS, ACTIVE_CONSECUTIVE_FRAMES)
|
|
||||||
}
|
|
||||||
|
|
||||||
// VAD状态流转
|
|
||||||
if (isEffectiveSpeech) {
|
if (isEffectiveSpeech) {
|
||||||
if (!isSpeaking) {
|
if (!isSpeaking) {
|
||||||
isSpeaking = true
|
isSpeaking = true
|
||||||
LogUtils.d(TAG, "🎤 有效语音开始 | 阈值: $effectiveSpeechThreshold")
|
|
||||||
onSpeechStart()
|
onSpeechStart()
|
||||||
}
|
}
|
||||||
activeFrameCount++
|
} else if (isSpeaking) {
|
||||||
activeSpeechFrameCount++
|
val silenceMs = now - lastSpeechTime
|
||||||
} else {
|
val effectiveSilenceMs = now - lastEffectiveSpeechTime
|
||||||
if (isSpeaking) {
|
|
||||||
activeFrameCount++
|
|
||||||
val vadSilenceDuration = now - lastSpeechTime
|
|
||||||
val effectiveSilenceDuration = now - lastEffectiveSpeechTime
|
|
||||||
|
|
||||||
// 触发结束条件:适配当前阶段的阈值
|
val shouldEnd =
|
||||||
val isSilenceTimeout = (vadSilenceDuration >= endSilenceMs ||
|
(silenceMs >= endSilenceMs ||
|
||||||
effectiveSilenceDuration >= MAX_SILENCE_AFTER_SPEECH_MS) &&
|
effectiveSilenceMs >= MAX_SILENCE_AFTER_SPEECH_MS) &&
|
||||||
consecutiveSilenceFrames >= consecutiveFrames
|
consecutiveSilenceFrames >= endFrames
|
||||||
|
|
||||||
if (isSilenceTimeout) {
|
if (shouldEnd) {
|
||||||
|
onSpeechEnd()
|
||||||
|
reset()
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
isInFinalPhase = false // 重置收尾期
|
isInFinalPhase = false
|
||||||
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
|
||||||
LogUtils.d(TAG, """
|
|
||||||
🛑 语音结束
|
|
||||||
- 有效静默时长: ${effectiveSilenceDuration}ms
|
|
||||||
- 连续静音帧: $consecutiveSilenceFrames
|
|
||||||
- 平均能量: $avgEnergy | 峰值: $peakRms
|
|
||||||
- 收尾期: $isInFinalPhase | 所用阈值: $endSilenceMs ms
|
|
||||||
""".trimIndent())
|
|
||||||
onSpeechEnd(avgEnergy, peakRms)
|
|
||||||
resetStats()
|
|
||||||
} else {
|
|
||||||
LogUtils.v(TAG, "⏳ 静默中 | 连续静音帧: $consecutiveSilenceFrames | 静默时长: ${effectiveSilenceDuration}ms | 所用阈值: $endSilenceMs ms")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// 保留原有方法...
|
|
||||||
fun isSpeechDetected(): Boolean {
|
|
||||||
return vad.isSpeechDetected()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun activeSpeechRatio(): Float {
|
|
||||||
val ratio = if (activeFrameCount == 0) 0f else activeSpeechFrameCount.toFloat() / activeFrameCount
|
|
||||||
LogUtils.d(TAG, "📊 语音占比: $ratio | 有效语音帧: $activeSpeechFrameCount | 总帧: $activeFrameCount")
|
|
||||||
return ratio
|
|
||||||
}
|
|
||||||
|
|
||||||
fun getTotalFrames(): Int = totalFrames
|
|
||||||
fun getSpeechFrames(): Int = speechFrames
|
|
||||||
fun getContinuousSpeechFrames(): Int = continuousSpeechFrames
|
|
||||||
fun getPeakPositionRatio(): Float {
|
|
||||||
return if (totalFrames == 0) 0f else peakPosition.toFloat() / totalFrames
|
|
||||||
}
|
|
||||||
|
|
||||||
fun reset() {
|
fun reset() {
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
lastSpeechTime = 0L
|
lastSpeechTime = 0L
|
||||||
lastEffectiveSpeechTime = 0L
|
lastEffectiveSpeechTime = 0L
|
||||||
envBaselineRms = 0.0005f
|
|
||||||
consecutiveSilenceFrames = 0
|
consecutiveSilenceFrames = 0
|
||||||
isInFinalPhase = false // 重置收尾期
|
isInFinalPhase = false
|
||||||
resetStats()
|
|
||||||
vad.reset()
|
vad.reset()
|
||||||
|
|
||||||
totalFrames = 0
|
|
||||||
speechFrames = 0
|
|
||||||
continuousSpeechFrames = 0
|
|
||||||
lastFrameIsSpeech = false
|
|
||||||
peakPosition = 0
|
|
||||||
frameIndex = 0
|
|
||||||
|
|
||||||
LogUtils.d(TAG, "🔄 VAD 状态已完全重置")
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun resetStats() {
|
|
||||||
activeFrameCount = 0
|
|
||||||
activeSpeechFrameCount = 0
|
|
||||||
speechEnergySum = 0f
|
|
||||||
speechFrameCount = 0
|
|
||||||
peakRms = 0f
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fun calcRms(samples: FloatArray): Float {
|
fun calcRms(samples: FloatArray): Float {
|
||||||
|
|||||||
@ -15,7 +15,7 @@ class VoiceController(
|
|||||||
assetManager: AssetManager,
|
assetManager: AssetManager,
|
||||||
private val onWakeup: () -> Unit,
|
private val onWakeup: () -> Unit,
|
||||||
private val onFinalAudio: (FloatArray) -> Unit,
|
private val onFinalAudio: (FloatArray) -> Unit,
|
||||||
idleTimeoutSeconds: Int = 200,
|
idleTimeoutSeconds: Int = 10,
|
||||||
maxRecordingSeconds: Int = 10,
|
maxRecordingSeconds: Int = 10,
|
||||||
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
||||||
private val stopBackendAudio: (() -> Unit)? = null,
|
private val stopBackendAudio: (() -> Unit)? = null,
|
||||||
@ -30,20 +30,14 @@ class VoiceController(
|
|||||||
// 预缓存大小(2秒)
|
// 预缓存大小(2秒)
|
||||||
private const val PRE_BUFFER_SIZE = SAMPLE_RATE * 2
|
private const val PRE_BUFFER_SIZE = SAMPLE_RATE * 2
|
||||||
|
|
||||||
// ========== 核心:分场景声纹阈值(极简版) ==========
|
|
||||||
private const val SPEAKER_THRESHOLD_QUIET = 0.50f // 安静环境
|
|
||||||
private const val SPEAKER_THRESHOLD_NOISY = 0.45f // 嘈杂环境(匹配你的真实相似度)
|
|
||||||
private const val SPEAKER_THRESHOLD_SHORT = 0.43f // 短语音(<1秒)
|
|
||||||
|
|
||||||
// 短语音判定阈值
|
// 短语音判定阈值
|
||||||
private const val SHORT_AUDIO_DURATION_MS = 1000L
|
private const val SHORT_AUDIO_DURATION_MS = 1000L
|
||||||
private const val INVALID_RESET_DEBOUNCE_MS = 1500L
|
private const val INVALID_RESET_DEBOUNCE_MS = 1500L
|
||||||
// 最小语音时长
|
// 最小语音时长
|
||||||
private const val MIN_SPEECH_MS = 800L
|
private const val MIN_SPEECH_MS = 800L
|
||||||
private const val MIN_EFFECTIVE_VOICE_DURATION = 400L
|
|
||||||
|
|
||||||
// 噪音场景判定阈值
|
// 统一的声纹验证阈值(不再分场景)
|
||||||
private const val NOISE_BASELINE_THRESHOLD = 0.01f
|
private const val SPEAKER_THRESHOLD = 0.45f
|
||||||
}
|
}
|
||||||
|
|
||||||
var state: VoiceState = VoiceState.WAIT_WAKEUP
|
var state: VoiceState = VoiceState.WAIT_WAKEUP
|
||||||
@ -53,26 +47,17 @@ class VoiceController(
|
|||||||
onStateChanged?.invoke(value)
|
onStateChanged?.invoke(value)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 实时能量与帧统计变量
|
// 无效说话标记 + 超时类型
|
||||||
private var realtimeEnergySum = 0f
|
private var hasInvalidSpeech = false
|
||||||
private var realtimeEnergyCount = 0
|
private var currentTimeoutType: TimeoutType = TimeoutType.IDLE_TIMEOUT
|
||||||
private var realtimePeakRms = 0f
|
|
||||||
private var realtimeTotalFrames = 0
|
|
||||||
private var realtimeSpeechFrames = 0
|
|
||||||
private var realtimeContinuousSpeechFrames = 0
|
|
||||||
private var realtimeLastFrameIsSpeech = false
|
|
||||||
private var isMultiPersonDialogueDetected = false
|
|
||||||
private var lastInvalidResetMs = 0L
|
private var lastInvalidResetMs = 0L
|
||||||
private val speakerManagerLock = ReentrantLock()
|
private val speakerManagerLock = ReentrantLock()
|
||||||
|
|
||||||
// 环境噪音状态标记
|
|
||||||
private var isNoisyEnvironment = false
|
|
||||||
|
|
||||||
private val wakeupManager = WakeupManager(assetManager, onWakeup)
|
private val wakeupManager = WakeupManager(assetManager, onWakeup)
|
||||||
private val vadManager = VadManager(
|
private val vadManager = VadManager(
|
||||||
assetManager,
|
assetManager,
|
||||||
onSpeechStart = { onVadStart() },
|
onSpeechStart = { onVadStart() },
|
||||||
onSpeechEnd = { avgEnergy, peakRms -> onVadEnd(avgEnergy, peakRms) }
|
onSpeechEnd = { onVadEnd() }
|
||||||
)
|
)
|
||||||
|
|
||||||
private val audioBuffer = mutableListOf<Float>()
|
private val audioBuffer = mutableListOf<Float>()
|
||||||
@ -92,50 +77,6 @@ class VoiceController(
|
|||||||
private val idleTimeoutMs = idleTimeoutSeconds * 1000L
|
private val idleTimeoutMs = idleTimeoutSeconds * 1000L
|
||||||
private val maxRecordingMs = maxRecordingSeconds * 1000L
|
private val maxRecordingMs = maxRecordingSeconds * 1000L
|
||||||
|
|
||||||
// 分场景动态系数(保留原有逻辑)
|
|
||||||
private val BASELINE_WINDOW_SIZE = 50
|
|
||||||
private val envNoiseBuffer = ArrayDeque<Float>(BASELINE_WINDOW_SIZE)
|
|
||||||
private var currentEnvBaseline = 0.001f
|
|
||||||
|
|
||||||
|
|
||||||
// 分场景动态系数
|
|
||||||
private val BASELINE_QUIET_THRESHOLD = 0.005f
|
|
||||||
private val SHORT_SPEECH_ENERGY_COEFF_QUIET = 1.5f
|
|
||||||
private val SHORT_SPEECH_ENERGY_COEFF_NOISY = 2.0f
|
|
||||||
private val LONG_SPEECH_ENERGY_COEFF_QUIET = 2.5f
|
|
||||||
private val LONG_SPEECH_ENERGY_COEFF_NOISY = 3.5f
|
|
||||||
private val SHORT_SPEECH_VAD_COEFF = 0.05f
|
|
||||||
private val LONG_SPEECH_VAD_COEFF = 0.10f
|
|
||||||
private val SHORT_SPEECH_MIN_SCORE = 1
|
|
||||||
private val LONG_SPEECH_MIN_SCORE = 1
|
|
||||||
|
|
||||||
// 其他过滤参数
|
|
||||||
private val MAX_FAR_FIELD_ENERGY = 0.015f
|
|
||||||
private val MIN_VALID_PEAK_AVG_RATIO = 0.5f
|
|
||||||
private val MIN_CONTINUOUS_FRAME_RATIO = 0.1f
|
|
||||||
private val MAX_PEAK_POSITION_RATIO = 0.95f
|
|
||||||
private val MIN_EFFECTIVE_SPEECH_FRAMES = 3
|
|
||||||
private val SHORT_SPEECH_MIN = 500L
|
|
||||||
private val SHORT_SPEECH_MAX = 2000L
|
|
||||||
|
|
||||||
// 多人对话过滤配置
|
|
||||||
private val MULTI_DIALOGUE_MIN_DURATION = 2500L
|
|
||||||
private val MULTI_DIALOGUE_MAX_PEAK_AVG_RATIO = 2.5f
|
|
||||||
private val MULTI_DIALOGUE_MIN_PEAK_AVG_RATIO = 0.4f
|
|
||||||
private val MULTI_DIALOGUE_MAX_CONTINUOUS_RATIO = 0.3f
|
|
||||||
private val MULTI_DIALOGUE_MIN_VAD_RATIO = 0.55f
|
|
||||||
|
|
||||||
// 微弱人声过滤配置
|
|
||||||
private val MIN_VOICE_FRAME_RATIO = 0.08f
|
|
||||||
private val MIN_PEAK_ENERGY_RATIO = 1.5f
|
|
||||||
private val NORMAL_VOICE_ENERGY_THRESHOLD = 0.008f
|
|
||||||
private val MIN_CONTINUOUS_VOICE_FRAMES = 1
|
|
||||||
private val MIN_EFFECTIVE_SPEECH_RMS = 0.0005f
|
|
||||||
|
|
||||||
// 无效说话标记 + 超时类型
|
|
||||||
private var hasInvalidSpeech = false
|
|
||||||
private var currentTimeoutType: TimeoutType = TimeoutType.IDLE_TIMEOUT
|
|
||||||
|
|
||||||
// 声纹验证相关
|
// 声纹验证相关
|
||||||
private val CURRENT_USER_ID = "current_wakeup_user"
|
private val CURRENT_USER_ID = "current_wakeup_user"
|
||||||
private val ENABLE_STRICT_SPEAKER_VERIFY = true
|
private val ENABLE_STRICT_SPEAKER_VERIFY = true
|
||||||
@ -200,12 +141,6 @@ class VoiceController(
|
|||||||
|
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
|
|
||||||
if (state == VoiceState.WAIT_WAKEUP) {
|
|
||||||
calibrateEnvBaseline(samples)
|
|
||||||
isNoisyEnvironment = currentEnvBaseline >= NOISE_BASELINE_THRESHOLD
|
|
||||||
LogUtils.d(TAG, "📊 环境状态 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
|
||||||
}
|
|
||||||
|
|
||||||
when (state) {
|
when (state) {
|
||||||
VoiceState.WAIT_WAKEUP,
|
VoiceState.WAIT_WAKEUP,
|
||||||
VoiceState.PLAYING_PROMPT,
|
VoiceState.PLAYING_PROMPT,
|
||||||
@ -246,81 +181,15 @@ class VoiceController(
|
|||||||
audioBuffer.addAll(samples.asList())
|
audioBuffer.addAll(samples.asList())
|
||||||
vadManager.accept(samples)
|
vadManager.accept(samples)
|
||||||
|
|
||||||
calibrateEnvBaseline(samples)
|
// 仅保留最大录音时长判断
|
||||||
updateRealtimeEnergy(samples)
|
|
||||||
updateRealtimeFrameStats()
|
|
||||||
isNoisyEnvironment = currentEnvBaseline >= NOISE_BASELINE_THRESHOLD
|
|
||||||
|
|
||||||
if (checkMultiPersonDialogueRealtime(now)) {
|
|
||||||
LogUtils.w(TAG, "🚨 录音中识别出多人对话,提前终止")
|
|
||||||
finishSentence(realtimeEnergySum / realtimeEnergyCount, realtimePeakRms)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (System.currentTimeMillis() - recordingStartMs > maxRecordingMs) {
|
if (System.currentTimeMillis() - recordingStartMs > maxRecordingMs) {
|
||||||
LogUtils.w(TAG, "⏱ Max recording reached | 当前环境基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.w(TAG, "⏱ Max recording reached")
|
||||||
finishSentence(realtimeEnergySum / realtimeEnergyCount, realtimePeakRms)
|
finishSentence()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 实时能量更新 ================= */
|
|
||||||
private fun updateRealtimeEnergy(samples: FloatArray) {
|
|
||||||
val rms = vadManager.calcRms(samples)
|
|
||||||
val effectiveThreshold = if (isNoisyEnvironment) currentEnvBaseline * 1.8f else MIN_EFFECTIVE_SPEECH_RMS
|
|
||||||
if (rms >= effectiveThreshold) {
|
|
||||||
realtimeEnergySum += rms
|
|
||||||
realtimeEnergyCount++
|
|
||||||
realtimePeakRms = maxOf(realtimePeakRms, rms)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ================= 实时帧统计 ================= */
|
|
||||||
private fun updateRealtimeFrameStats() {
|
|
||||||
realtimeTotalFrames = vadManager.getTotalFrames()
|
|
||||||
realtimeSpeechFrames = vadManager.getSpeechFrames()
|
|
||||||
realtimeContinuousSpeechFrames = vadManager.getContinuousSpeechFrames()
|
|
||||||
val currentFrameIsSpeech = vadManager.isSpeechDetected()
|
|
||||||
if (currentFrameIsSpeech) {
|
|
||||||
realtimeContinuousSpeechFrames = if (realtimeLastFrameIsSpeech) realtimeContinuousSpeechFrames + 1 else 1
|
|
||||||
} else {
|
|
||||||
realtimeContinuousSpeechFrames = 0
|
|
||||||
}
|
|
||||||
realtimeLastFrameIsSpeech = currentFrameIsSpeech
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ================= 多人对话检测 ================= */
|
|
||||||
private fun checkMultiPersonDialogueRealtime(now: Long): Boolean {
|
|
||||||
val duration = now - recordingStartMs
|
|
||||||
if (duration < MULTI_DIALOGUE_MIN_DURATION) return false
|
|
||||||
|
|
||||||
val avgEnergy = if (realtimeEnergyCount > 0) realtimeEnergySum / realtimeEnergyCount else 0f
|
|
||||||
val peakAvgRatio = if (avgEnergy > 0) realtimePeakRms / avgEnergy else 0f
|
|
||||||
val continuousRatio = if (realtimeSpeechFrames > 0) realtimeContinuousSpeechFrames.toFloat() / realtimeSpeechFrames else 0f
|
|
||||||
val vadRatio = vadManager.activeSpeechRatio()
|
|
||||||
|
|
||||||
isMultiPersonDialogueDetected = duration >= MULTI_DIALOGUE_MIN_DURATION &&
|
|
||||||
peakAvgRatio in MULTI_DIALOGUE_MIN_PEAK_AVG_RATIO..MULTI_DIALOGUE_MAX_PEAK_AVG_RATIO &&
|
|
||||||
continuousRatio <= MULTI_DIALOGUE_MAX_CONTINUOUS_RATIO &&
|
|
||||||
vadRatio >= MULTI_DIALOGUE_MIN_VAD_RATIO
|
|
||||||
|
|
||||||
return isMultiPersonDialogueDetected
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ================= 环境基线校准 ================= */
|
|
||||||
private fun calibrateEnvBaseline(samples: FloatArray) {
|
|
||||||
val rms = vadManager.calcRms(samples)
|
|
||||||
val validRms = if (rms < currentEnvBaseline + 0.002f) rms else currentEnvBaseline
|
|
||||||
if (rms < 0.015f) {
|
|
||||||
if (envNoiseBuffer.size >= BASELINE_WINDOW_SIZE) {
|
|
||||||
envNoiseBuffer.removeFirst()
|
|
||||||
}
|
|
||||||
envNoiseBuffer.addLast(validRms)
|
|
||||||
currentEnvBaseline = envNoiseBuffer.maxOrNull() ?: 0.001f
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ================= 唤醒处理 ================= */
|
/* ================= 唤醒处理 ================= */
|
||||||
private fun handleWakeupEvent() {
|
private fun handleWakeupEvent() {
|
||||||
if (state == VoiceState.UPLOADING) return
|
if (state == VoiceState.UPLOADING) return
|
||||||
@ -339,207 +208,73 @@ class VoiceController(
|
|||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
vadStarted = false
|
vadStarted = false
|
||||||
resetRealtimeStats()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inKwsObserve = true
|
inKwsObserve = true
|
||||||
kwsObserveStartMs = System.currentTimeMillis()
|
kwsObserveStartMs = System.currentTimeMillis()
|
||||||
onWakeup()
|
onWakeup()
|
||||||
LogUtils.d(TAG, "🔔 唤醒成功 | 环境基线: $currentEnvBaseline")
|
LogUtils.d(TAG, "🔔 唤醒成功")
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun onVadStart() {
|
private fun onVadStart() {
|
||||||
if (state != VoiceState.WAIT_SPEECH) return
|
if (state != VoiceState.WAIT_SPEECH) return
|
||||||
LogUtils.d(TAG, "🎤 REAL VAD START | 环境基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.d(TAG, "🎤 REAL VAD START")
|
||||||
vadStarted = true
|
vadStarted = true
|
||||||
recordingStartMs = System.currentTimeMillis()
|
recordingStartMs = System.currentTimeMillis()
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
audioBuffer.addAll(preBuffer)
|
audioBuffer.addAll(preBuffer)
|
||||||
resetRealtimeStats()
|
|
||||||
state = VoiceState.RECORDING
|
state = VoiceState.RECORDING
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun onVadEnd(avgEnergy: Float, peakRms: Float) {
|
private fun onVadEnd() {
|
||||||
if (state != VoiceState.RECORDING) return
|
if (state != VoiceState.RECORDING) return
|
||||||
LogUtils.d(TAG, "🧠 VAD END | 环境基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.d(TAG, "🧠 VAD END")
|
||||||
val realAvgEnergy = if (realtimeEnergyCount > 0) realtimeEnergySum / realtimeEnergyCount else avgEnergy
|
finishSentence()
|
||||||
val realPeakRms = if (realtimePeakRms > 0) realtimePeakRms else peakRms
|
|
||||||
finishSentence(realAvgEnergy, realPeakRms)
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ================= 微弱人声过滤 ================= */
|
|
||||||
private fun filterWeakVoice(duration: Long, avgEnergy: Float, peakRms: Float): Boolean {
|
|
||||||
if (duration < MIN_EFFECTIVE_VOICE_DURATION) {
|
|
||||||
LogUtils.w(TAG, "❌ 微弱人声过滤:时长${duration}ms < ${MIN_EFFECTIVE_VOICE_DURATION}ms")
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
val voiceFrameRatio = if (realtimeTotalFrames > 0) realtimeSpeechFrames.toFloat() / realtimeTotalFrames else 0f
|
|
||||||
if (avgEnergy < NORMAL_VOICE_ENERGY_THRESHOLD && voiceFrameRatio < MIN_VOICE_FRAME_RATIO) {
|
|
||||||
LogUtils.w(TAG, "❌ 微弱人声过滤:帧占比${voiceFrameRatio} < ${MIN_VOICE_FRAME_RATIO}")
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
val peakBaselineRatio = peakRms / currentEnvBaseline
|
|
||||||
if (avgEnergy < NORMAL_VOICE_ENERGY_THRESHOLD && peakBaselineRatio < MIN_PEAK_ENERGY_RATIO) {
|
|
||||||
LogUtils.w(TAG, "❌ 微弱人声过滤:峰值/基线${peakBaselineRatio} < ${MIN_PEAK_ENERGY_RATIO}")
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
if (avgEnergy < NORMAL_VOICE_ENERGY_THRESHOLD && realtimeContinuousSpeechFrames < MIN_CONTINUOUS_VOICE_FRAMES) {
|
|
||||||
LogUtils.w(TAG, "❌ 微弱人声过滤:连续帧${realtimeContinuousSpeechFrames} < ${MIN_CONTINUOUS_VOICE_FRAMES}")
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
val energyBaselineRatio = avgEnergy / currentEnvBaseline
|
|
||||||
if (avgEnergy < 0.005f && energyBaselineRatio < 1.2f) {
|
|
||||||
LogUtils.w(TAG, "❌ 微弱人声过滤:能量/基线${energyBaselineRatio} < 1.2")
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 结束录音 ================= */
|
/* ================= 结束录音 ================= */
|
||||||
private fun finishSentence(avgEnergy: Float = 0f, peakRms: Float = 0f) {
|
private fun finishSentence() {
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
val duration = now - recordingStartMs
|
val duration = now - recordingStartMs
|
||||||
|
|
||||||
if (!vadStarted || duration < MIN_SPEECH_MS) {
|
if (!vadStarted || duration < MIN_SPEECH_MS) {
|
||||||
LogUtils.d(TAG, "❌ 语音过短: $duration ms | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.d(TAG, "❌ 语音过短: $duration ms")
|
||||||
hasInvalidSpeech = true
|
|
||||||
resetToWaitSpeech()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (filterWeakVoice(duration, avgEnergy, peakRms)) {
|
|
||||||
hasInvalidSpeech = true
|
hasInvalidSpeech = true
|
||||||
resetToWaitSpeech()
|
resetToWaitSpeech()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
val audio = audioBuffer.toFloatArray()
|
val audio = audioBuffer.toFloatArray()
|
||||||
val vadRatio = vadManager.activeSpeechRatio()
|
|
||||||
val peakAvgRatio = if (avgEnergy > 0f) peakRms / avgEnergy else 0f
|
|
||||||
|
|
||||||
LogUtils.d(TAG, "📊 录音信息 | 时长: $duration ms | 能量: $avgEnergy | 峰均比: $peakAvgRatio | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
// 声纹验证(保留核心逻辑)
|
||||||
LogUtils.d(TAG, "📊 实时帧统计 | 总帧: $realtimeTotalFrames | 语音帧: $realtimeSpeechFrames | 连续语音帧: $realtimeContinuousSpeechFrames")
|
|
||||||
|
|
||||||
if (isMultiPersonDialogueDetected) {
|
|
||||||
LogUtils.w(TAG, "❌ 过滤多人对话垃圾语音 | 时长: $duration ms")
|
|
||||||
hasInvalidSpeech = true
|
|
||||||
resetToWaitSpeech()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// 声纹验证(核心极简版)
|
|
||||||
if (ENABLE_STRICT_SPEAKER_VERIFY) {
|
if (ENABLE_STRICT_SPEAKER_VERIFY) {
|
||||||
val isCurrentUser = verifySpeaker(audio)
|
val isCurrentUser = verifySpeaker(audio)
|
||||||
if (!isCurrentUser) {
|
if (!isCurrentUser) {
|
||||||
LogUtils.w(TAG, "❌ 非当前唤醒用户,拒绝语音 | 录音时长: $duration ms | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.w(TAG, "❌ 非当前唤醒用户,拒绝语音 | 录音时长: $duration ms")
|
||||||
hasInvalidSpeech = true
|
hasInvalidSpeech = true
|
||||||
resetToWaitSpeech()
|
resetToWaitSpeech()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
LogUtils.d(TAG, "✅ 当前用户语音,继续处理 | 录音时长: $duration ms | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.d(TAG, "✅ 当前用户语音,继续处理 | 录音时长: $duration ms")
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// 远场过滤
|
|
||||||
val isFarField = avgEnergy < MAX_FAR_FIELD_ENERGY
|
|
||||||
val isInvalidPeakRatio = peakAvgRatio < MIN_VALID_PEAK_AVG_RATIO
|
|
||||||
if (isFarField && isInvalidPeakRatio) {
|
|
||||||
LogUtils.w(TAG, "❌ 远场/无效语音过滤 | 能量: $avgEnergy < $MAX_FAR_FIELD_ENERGY")
|
|
||||||
hasInvalidSpeech = true
|
|
||||||
resetToWaitSpeech()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// 非连续判定
|
|
||||||
val continuousRatio = if (realtimeSpeechFrames > 0) realtimeContinuousSpeechFrames.toFloat() / realtimeSpeechFrames else 0f
|
|
||||||
val peakPositionRatio = vadManager.getPeakPositionRatio()
|
|
||||||
val isDiscontinuous = continuousRatio < MIN_CONTINUOUS_FRAME_RATIO &&
|
|
||||||
realtimeSpeechFrames < MIN_EFFECTIVE_SPEECH_FRAMES &&
|
|
||||||
peakPositionRatio > MAX_PEAK_POSITION_RATIO
|
|
||||||
if (isDiscontinuous) {
|
|
||||||
LogUtils.w(TAG, "❌ 非连续杂音过滤 | 连续占比: $continuousRatio < $MIN_CONTINUOUS_FRAME_RATIO")
|
|
||||||
hasInvalidSpeech = true
|
|
||||||
resetToWaitSpeech()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// 分场景阈值过滤
|
|
||||||
val isQuietEnv = currentEnvBaseline < BASELINE_QUIET_THRESHOLD
|
|
||||||
val thresholdConfig = when {
|
|
||||||
duration in SHORT_SPEECH_MIN..SHORT_SPEECH_MAX -> {
|
|
||||||
val coeff = if (isQuietEnv) SHORT_SPEECH_ENERGY_COEFF_QUIET else SHORT_SPEECH_ENERGY_COEFF_NOISY
|
|
||||||
val energyThreshold = currentEnvBaseline * coeff
|
|
||||||
ThresholdConfig(energyThreshold, SHORT_SPEECH_VAD_COEFF, SHORT_SPEECH_MIN_SCORE, "短语音")
|
|
||||||
}
|
|
||||||
else -> {
|
|
||||||
val coeff = if (isQuietEnv) LONG_SPEECH_ENERGY_COEFF_QUIET else LONG_SPEECH_ENERGY_COEFF_NOISY
|
|
||||||
val energyThreshold = currentEnvBaseline * coeff
|
|
||||||
ThresholdConfig(energyThreshold, LONG_SPEECH_VAD_COEFF, LONG_SPEECH_MIN_SCORE, "长语音")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val energyPass = avgEnergy >= thresholdConfig.energyThreshold
|
|
||||||
val vadRatioPass = vadRatio >= thresholdConfig.vadRatioThreshold
|
|
||||||
if (!energyPass || !vadRatioPass) {
|
|
||||||
LogUtils.w(TAG, "❌ 低能量语音阈值过滤 | 能量: $avgEnergy < ${thresholdConfig.energyThreshold} | 占比: $vadRatio < ${thresholdConfig.vadRatioThreshold} | 场景: ${thresholdConfig.scene}")
|
|
||||||
hasInvalidSpeech = true
|
|
||||||
resetToWaitSpeech()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// 评分判定
|
|
||||||
var score = 0
|
|
||||||
score += when {
|
|
||||||
duration >= 4000 -> 3
|
|
||||||
duration >= 2500 -> 2
|
|
||||||
else -> 1
|
|
||||||
}
|
|
||||||
score += if (avgEnergy >= thresholdConfig.energyThreshold) 1 else 0
|
|
||||||
score += if (continuousRatio >= MIN_CONTINUOUS_FRAME_RATIO) 1 else 0
|
|
||||||
|
|
||||||
val pass = score >= thresholdConfig.minScore
|
|
||||||
if (!pass) {
|
|
||||||
LogUtils.w(TAG, "❌ 评分不足过滤 | 总分: $score < ${thresholdConfig.minScore} | 场景: ${thresholdConfig.scene}")
|
|
||||||
hasInvalidSpeech = true
|
|
||||||
resetToWaitSpeech()
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 最终通过
|
// 最终通过
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
state = VoiceState.UPLOADING
|
state = VoiceState.UPLOADING
|
||||||
onFinalAudio(audio)
|
onFinalAudio(audio)
|
||||||
resetRealtimeStats()
|
|
||||||
hasInvalidSpeech = false
|
hasInvalidSpeech = false
|
||||||
LogUtils.i(TAG, "✅ 语音通过 | 时长: $duration ms | 能量: $avgEnergy | 场景: ${thresholdConfig.scene} | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.i(TAG, "✅ 语音通过 | 时长: $duration ms")
|
||||||
}
|
|
||||||
|
|
||||||
/* ================= 重置实时统计 ================= */
|
|
||||||
private fun resetRealtimeStats() {
|
|
||||||
realtimeEnergySum = 0f
|
|
||||||
realtimeEnergyCount = 0
|
|
||||||
realtimePeakRms = 0f
|
|
||||||
realtimeTotalFrames = 0
|
|
||||||
realtimeSpeechFrames = 0
|
|
||||||
realtimeContinuousSpeechFrames = 0
|
|
||||||
realtimeLastFrameIsSpeech = false
|
|
||||||
isMultiPersonDialogueDetected = false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 播放/上传回调 ================= */
|
/* ================= 播放/上传回调 ================= */
|
||||||
fun onPlayStartPrompt() {
|
fun onPlayStartPrompt() {
|
||||||
LogUtils.d(TAG, "🎵 播放提示音 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.d(TAG, "🎵 播放提示音")
|
||||||
state = VoiceState.PLAYING_PROMPT
|
state = VoiceState.PLAYING_PROMPT
|
||||||
}
|
}
|
||||||
|
|
||||||
fun onPlayEndPrompt() {
|
fun onPlayEndPrompt() {
|
||||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||||
LogUtils.d(TAG, "🎵 提示音结束 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.d(TAG, "🎵 提示音结束")
|
||||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -548,19 +283,19 @@ class VoiceController(
|
|||||||
LogUtils.w(TAG, "🎶 非上传完成状态,禁止切换到 PLAYING_BACKEND | 当前状态: $state")
|
LogUtils.w(TAG, "🎶 非上传完成状态,禁止切换到 PLAYING_BACKEND | 当前状态: $state")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
LogUtils.d(TAG, "🎶 开始播放后台音频 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.d(TAG, "🎶 开始播放后台音频")
|
||||||
state = VoiceState.PLAYING_BACKEND
|
state = VoiceState.PLAYING_BACKEND
|
||||||
}
|
}
|
||||||
|
|
||||||
fun onPlayEndBackend() {
|
fun onPlayEndBackend() {
|
||||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||||
LogUtils.d(TAG, "🎶 后台音频结束 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.d(TAG, "🎶 后台音频结束")
|
||||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
fun onUploadFinished(success: Boolean) {
|
fun onUploadFinished(success: Boolean) {
|
||||||
if (state != VoiceState.UPLOADING) return
|
if (state != VoiceState.UPLOADING) return
|
||||||
LogUtils.d(TAG, "📤 上传完成 | 成功: $success | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.d(TAG, "📤 上传完成 | 成功: $success")
|
||||||
|
|
||||||
if (!success) {
|
if (!success) {
|
||||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||||
@ -569,7 +304,7 @@ class VoiceController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun resetToWaitSpeech() {
|
private fun resetToWaitSpeech() {
|
||||||
LogUtils.d(TAG, "🔄 重置到等待说话 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment | 已标记无效说话: $hasInvalidSpeech")
|
LogUtils.d(TAG, "🔄 重置到等待说话 | 已标记无效说话: $hasInvalidSpeech")
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
if (now - lastInvalidResetMs < INVALID_RESET_DEBOUNCE_MS) {
|
if (now - lastInvalidResetMs < INVALID_RESET_DEBOUNCE_MS) {
|
||||||
LogUtils.d(TAG, "🛡 防抖:1.5秒内重复无效语音,跳过重置")
|
LogUtils.d(TAG, "🛡 防抖:1.5秒内重复无效语音,跳过重置")
|
||||||
@ -579,13 +314,12 @@ class VoiceController(
|
|||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
vadStarted = false
|
vadStarted = false
|
||||||
resetRealtimeStats()
|
|
||||||
state = VoiceState.WAIT_SPEECH
|
state = VoiceState.WAIT_SPEECH
|
||||||
if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis()
|
if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis()
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun resetAll() {
|
private fun resetAll() {
|
||||||
LogUtils.d(TAG, "🔄 重置所有状态 | 基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment | 本次超时类型: $currentTimeoutType")
|
LogUtils.d(TAG, "🔄 重置所有状态 | 本次超时类型: $currentTimeoutType")
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
preBuffer.clear()
|
preBuffer.clear()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
@ -593,24 +327,17 @@ class VoiceController(
|
|||||||
vadStarted = false
|
vadStarted = false
|
||||||
waitSpeechStartMs = 0L
|
waitSpeechStartMs = 0L
|
||||||
waitSpeechFailStartMs = 0L
|
waitSpeechFailStartMs = 0L
|
||||||
envNoiseBuffer.clear()
|
|
||||||
currentEnvBaseline = 0.001f
|
|
||||||
isNoisyEnvironment = false
|
|
||||||
resetRealtimeStats()
|
|
||||||
hasInvalidSpeech = false
|
hasInvalidSpeech = false
|
||||||
currentTimeoutType = TimeoutType.IDLE_TIMEOUT
|
currentTimeoutType = TimeoutType.IDLE_TIMEOUT
|
||||||
state = VoiceState.WAIT_WAKEUP
|
state = VoiceState.WAIT_WAKEUP
|
||||||
}
|
}
|
||||||
|
|
||||||
fun release() {
|
fun release() {
|
||||||
LogUtils.d(TAG, "🔌 释放资源 | 最终基线: $currentEnvBaseline | 嘈杂环境: $isNoisyEnvironment")
|
LogUtils.d(TAG, "🔌 释放资源")
|
||||||
wakeupManager.release()
|
wakeupManager.release()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
envNoiseBuffer.clear()
|
|
||||||
resetRealtimeStats()
|
|
||||||
hasInvalidSpeech = false
|
hasInvalidSpeech = false
|
||||||
currentTimeoutType = TimeoutType.IDLE_TIMEOUT
|
currentTimeoutType = TimeoutType.IDLE_TIMEOUT
|
||||||
isNoisyEnvironment = false
|
|
||||||
|
|
||||||
runCatching {
|
runCatching {
|
||||||
SpeakerRecognition.extractor.release()
|
SpeakerRecognition.extractor.release()
|
||||||
@ -638,24 +365,14 @@ class VoiceController(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 阈值配置数据类
|
|
||||||
private data class ThresholdConfig(
|
|
||||||
val energyThreshold: Float,
|
|
||||||
val vadRatioThreshold: Float,
|
|
||||||
val minScore: Int,
|
|
||||||
val scene: String
|
|
||||||
)
|
|
||||||
|
|
||||||
/* ================= 核心:极简版声纹验证 ================= */
|
|
||||||
private fun verifySpeaker(audio: FloatArray): Boolean {
|
private fun verifySpeaker(audio: FloatArray): Boolean {
|
||||||
if (audio.isEmpty()) {
|
if (audio.isEmpty()) {
|
||||||
LogUtils.w(TAG, "❌ 待验证音频为空,声纹验证失败")
|
LogUtils.w(TAG, "❌ 待验证音频为空,声纹验证失败")
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. 裁剪音频:只保留本次录音的有效部分(解决时长不匹配问题)
|
// 1. 裁剪音频:只保留本次录音的有效部分
|
||||||
val audioDurationMs = (audio.size.toFloat() / SAMPLE_RATE * 1000).toLong()
|
val audioDurationMs = (audio.size.toFloat() / SAMPLE_RATE * 1000).toLong()
|
||||||
// 只保留最后 N 毫秒的音频(N = 实际录音时长),避免缓存旧音频
|
|
||||||
val validAudio = if (audioDurationMs > 0) {
|
val validAudio = if (audioDurationMs > 0) {
|
||||||
val validSampleCount = (audioDurationMs * SAMPLE_RATE / 1000).toInt()
|
val validSampleCount = (audioDurationMs * SAMPLE_RATE / 1000).toInt()
|
||||||
if (validSampleCount < audio.size) {
|
if (validSampleCount < audio.size) {
|
||||||
@ -667,45 +384,44 @@ class VoiceController(
|
|||||||
audio
|
audio
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. 分场景选阈值(无容错,只调阈值)
|
|
||||||
val finalThreshold = when {
|
|
||||||
audioDurationMs < SHORT_AUDIO_DURATION_MS -> SPEAKER_THRESHOLD_SHORT
|
|
||||||
isNoisyEnvironment -> SPEAKER_THRESHOLD_NOISY
|
|
||||||
else -> SPEAKER_THRESHOLD_QUIET
|
|
||||||
}
|
|
||||||
|
|
||||||
var stream: OnlineStream? = null
|
var stream: OnlineStream? = null
|
||||||
return try {
|
|
||||||
stream = SpeakerRecognition.extractor.createStream()
|
|
||||||
stream.acceptWaveform(samples = validAudio, sampleRate = SAMPLE_RATE) // 用裁剪后的音频验证
|
|
||||||
stream.inputFinished()
|
|
||||||
|
|
||||||
if (!SpeakerRecognition.extractor.isReady(stream)) {
|
// 使用 runCatching 统一处理异常
|
||||||
|
return runCatching {
|
||||||
|
stream = SpeakerRecognition.extractor.createStream()
|
||||||
|
|
||||||
|
// 处理音频数据
|
||||||
|
stream?.acceptWaveform(samples = validAudio, sampleRate = SAMPLE_RATE)
|
||||||
|
stream?.inputFinished()
|
||||||
|
|
||||||
|
// 检查 stream 是否就绪
|
||||||
|
if (stream == null || !SpeakerRecognition.extractor.isReady(stream)) {
|
||||||
LogUtils.w(TAG, "❌ 音频Stream未就绪,验证失败")
|
LogUtils.w(TAG, "❌ 音频Stream未就绪,验证失败")
|
||||||
return false
|
return@runCatching false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 计算特征并验证
|
||||||
val embedding = SpeakerRecognition.extractor.compute(stream)
|
val embedding = SpeakerRecognition.extractor.compute(stream)
|
||||||
|
|
||||||
// 3. 纯验证逻辑:过就过,不过就拒绝
|
|
||||||
speakerManagerLock.withLock {
|
speakerManagerLock.withLock {
|
||||||
val verifyPass = SpeakerRecognition.manager.verify(
|
val verifyPass = SpeakerRecognition.manager.verify(
|
||||||
name = CURRENT_USER_ID,
|
name = CURRENT_USER_ID,
|
||||||
embedding = embedding,
|
embedding = embedding,
|
||||||
threshold = finalThreshold
|
threshold = SPEAKER_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
// 打印关键信息(补充裁剪后时长)
|
LogUtils.d(TAG, "📊 声纹验证 | 统一阈值: $SPEAKER_THRESHOLD | 通过: $verifyPass | 验证时长: ${(validAudio.size.toFloat()/SAMPLE_RATE*1000).toLong()}ms")
|
||||||
LogUtils.d(TAG, "📊 声纹验证 | 阈值: $finalThreshold | 通过: $verifyPass | 嘈杂环境: $isNoisyEnvironment | 原始时长: ${audioDurationMs}ms | 验证时长: ${(validAudio.size.toFloat()/SAMPLE_RATE*1000).toLong()}ms")
|
verifyPass
|
||||||
|
|
||||||
// 无任何容错:验证结果就是最终结果
|
|
||||||
return verifyPass
|
|
||||||
}
|
}
|
||||||
} catch (e: Exception) {
|
}.onFailure { e ->
|
||||||
|
// 处理所有异常情况
|
||||||
LogUtils.e(TAG, "❌ 声纹验证异常,拒绝", e)
|
LogUtils.e(TAG, "❌ 声纹验证异常,拒绝", e)
|
||||||
return false
|
}.also {
|
||||||
} finally {
|
// 确保 stream 资源释放
|
||||||
|
runCatching {
|
||||||
stream?.release()
|
stream?.release()
|
||||||
|
}.onFailure { e ->
|
||||||
|
LogUtils.w(TAG, "⚠️ 释放 Stream 资源失败", e)
|
||||||
}
|
}
|
||||||
|
}.getOrDefault(false) // 异常时默认返回 false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -26,7 +26,8 @@ class WakeupManager(assetManager: AssetManager, function: () -> Unit) {
|
|||||||
val config = KeywordSpotterConfig(
|
val config = KeywordSpotterConfig(
|
||||||
featConfig = featConfig,
|
featConfig = featConfig,
|
||||||
modelConfig = modelConfig,
|
modelConfig = modelConfig,
|
||||||
keywordsFile = keywordsFile
|
keywordsFile = keywordsFile,
|
||||||
|
keywordsThreshold = 0.2f
|
||||||
)
|
)
|
||||||
|
|
||||||
kws = KeywordSpotter(assetManager, config)
|
kws = KeywordSpotter(assetManager, config)
|
||||||
@ -40,9 +41,9 @@ class WakeupManager(assetManager: AssetManager, function: () -> Unit) {
|
|||||||
/** ⭐ 永远喂 KWS */
|
/** ⭐ 永远喂 KWS */
|
||||||
fun acceptAudio(samples: FloatArray) {
|
fun acceptAudio(samples: FloatArray) {
|
||||||
val s = stream ?: return
|
val s = stream ?: return
|
||||||
for (i in samples.indices) {
|
// for (i in samples.indices) {
|
||||||
samples[i] *= 2.5f
|
// samples[i] *= 2.5f
|
||||||
}
|
// }
|
||||||
s.acceptWaveform(samples, sampleRate)
|
s.acceptWaveform(samples, sampleRate)
|
||||||
|
|
||||||
while (kws.isReady(s)) {
|
while (kws.isReady(s)) {
|
||||||
|
|||||||
@ -159,27 +159,27 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mViewModel?.uploadVoiceLiveData?.observe(this) {
|
// mViewModel?.uploadVoiceLiveData?.observe(this) {
|
||||||
when (it) {
|
// when (it) {
|
||||||
is ApiResult.Error -> {
|
// is ApiResult.Error -> {
|
||||||
Toaster.showShort("上传失败")
|
// Toaster.showShort("上传失败")
|
||||||
voiceController?.onUploadFinished(true)
|
// voiceController?.onUploadFinished(true)
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
is ApiResult.Success<String> -> {
|
// is ApiResult.Success<String> -> {
|
||||||
if (!TextUtils.isEmpty(it.data)) {
|
// if (!TextUtils.isEmpty(it.data)) {
|
||||||
Toaster.showShort(it.data)
|
// Toaster.showShort(it.data)
|
||||||
}
|
// }
|
||||||
Toaster.showShort(it)
|
// Toaster.showShort(it)
|
||||||
voiceController?.onUploadFinished(true)
|
// voiceController?.onUploadFinished(true)
|
||||||
startPlayTimeoutJob?.cancel()
|
// startPlayTimeoutJob?.cancel()
|
||||||
startPlayTimeoutJob = lifecycleScope.launch {
|
// startPlayTimeoutJob = lifecycleScope.launch {
|
||||||
delay(PLAY_WAIT_TIMEOUT_MS)
|
// delay(PLAY_WAIT_TIMEOUT_MS)
|
||||||
voiceController?.onPlayEndBackend()
|
// voiceController?.onPlayEndBackend()
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -226,12 +226,12 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
// 1
|
// 1
|
||||||
// )
|
// )
|
||||||
// loadLocalJsonAndPlay()
|
// loadLocalJsonAndPlay()
|
||||||
// val file = File(
|
val file = File(
|
||||||
// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
||||||
// "xxx.wav"
|
"xxx.wav"
|
||||||
// )
|
)
|
||||||
// AudioDebugUtil.saveFloatPcmAsWav(audio, file)
|
AudioDebugUtil.saveFloatPcmAsWav(audio, file)
|
||||||
// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
|
LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
|
||||||
// lifecycleScope.launch(Dispatchers.Main) {
|
// lifecycleScope.launch(Dispatchers.Main) {
|
||||||
//
|
//
|
||||||
// mVerticalAnimator?.show()
|
// mVerticalAnimator?.show()
|
||||||
@ -280,9 +280,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
when (msg.msgContentType) {
|
when (msg.msgContentType) {
|
||||||
MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> {
|
MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> {
|
||||||
lifecycleScope.launch(Dispatchers.IO) {
|
lifecycleScope.launch(Dispatchers.IO) {
|
||||||
// UnityPlayerHolder.getInstance().startTalking(msg.content)
|
//// UnityPlayerHolder.getInstance().startTalking(msg.content)
|
||||||
val audioDTO = GsonUtils.fromJson(msg.content, AudioDTO::class.java)
|
// val audioDTO = GsonUtils.fromJson(msg.content, LmChatDTO::class.java)
|
||||||
// voicePlayer.onAudioDTO(audioDTO)
|
// voicePlayer.handleSlice(audioDTO)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -586,9 +586,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
super.onEvent(eventSource, id, type, data)
|
super.onEvent(eventSource, id, type, data)
|
||||||
LogUtils.eTag("lrsxxx", "onEvent:${data}")
|
LogUtils.eTag("lrsxxx", "onEvent:${data}")
|
||||||
runCatching {
|
runCatching {
|
||||||
// val audioDTO = GsonUtils.fromJson(data, LmChatDTO::class.java)
|
val audioDTO = GsonUtils.fromJson(data, LmChatDTO::class.java)
|
||||||
// voicePlayer.handleSlice(audioDTO)
|
voicePlayer.handleSlice(audioDTO)
|
||||||
UnityPlayerHolder.getInstance().startTalking(data)
|
// UnityPlayerHolder.getInstance().startTalking(data)
|
||||||
}.onFailure {
|
}.onFailure {
|
||||||
LogUtils.eTag("lrsxxx", "解析音频数据失败", it)
|
LogUtils.eTag("lrsxxx", "解析音频数据失败", it)
|
||||||
voiceController?.onUploadFinished(false)
|
voiceController?.onUploadFinished(false)
|
||||||
|
|||||||
@ -4,92 +4,134 @@ import android.media.AudioAttributes
|
|||||||
import android.media.AudioFormat
|
import android.media.AudioFormat
|
||||||
import android.media.AudioManager
|
import android.media.AudioManager
|
||||||
import android.media.AudioTrack
|
import android.media.AudioTrack
|
||||||
import kotlinx.coroutines.CoroutineScope
|
import kotlinx.coroutines.*
|
||||||
import kotlinx.coroutines.Dispatchers
|
|
||||||
import kotlinx.coroutines.SupervisorJob
|
|
||||||
import kotlinx.coroutines.cancel
|
|
||||||
import kotlinx.coroutines.delay
|
|
||||||
import kotlinx.coroutines.isActive
|
|
||||||
import kotlinx.coroutines.launch
|
|
||||||
import java.util.ArrayDeque
|
import java.util.ArrayDeque
|
||||||
import java.util.Queue
|
import java.util.Queue
|
||||||
import java.util.concurrent.locks.ReentrantLock
|
import java.util.concurrent.locks.ReentrantLock
|
||||||
|
import kotlin.concurrent.withLock
|
||||||
|
|
||||||
// ====================== PCM 播放器 ======================
|
|
||||||
class PcmStreamPlayer(
|
class PcmStreamPlayer(
|
||||||
private val sampleRate: Int
|
private val sampleRate: Int
|
||||||
) {
|
) {
|
||||||
|
|
||||||
var onPlayEnd: (() -> Unit)? = null
|
|
||||||
|
|
||||||
private val scope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
|
private val scope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
|
||||||
private val bufferQueue: Queue<ByteArray> = ArrayDeque()
|
private val bufferQueue: Queue<ByteArray> = ArrayDeque()
|
||||||
private val queueLock = ReentrantLock()
|
private val lock = ReentrantLock()
|
||||||
|
|
||||||
private var audioTrack: AudioTrack? = null
|
private var audioTrack: AudioTrack? = null
|
||||||
@Volatile
|
@Volatile
|
||||||
private var playing = true
|
private var playing = true
|
||||||
|
// 新增:标记是否已释放,防止空指针
|
||||||
|
@Volatile
|
||||||
|
private var isReleased = false
|
||||||
|
|
||||||
init {
|
init {
|
||||||
scope.launch {
|
scope.launch {
|
||||||
|
val minBufferSize = AudioTrack.getMinBufferSize(
|
||||||
|
sampleRate,
|
||||||
|
AudioFormat.CHANNEL_OUT_MONO,
|
||||||
|
AudioFormat.ENCODING_PCM_16BIT
|
||||||
|
)
|
||||||
|
|
||||||
audioTrack = AudioTrack(
|
audioTrack = AudioTrack(
|
||||||
AudioAttributes.Builder()
|
AudioAttributes.Builder()
|
||||||
.setUsage(AudioAttributes.USAGE_MEDIA)
|
.setUsage(AudioAttributes.USAGE_MEDIA)
|
||||||
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||||
.build(),
|
.build(),
|
||||||
AudioFormat.Builder()
|
AudioFormat.Builder()
|
||||||
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
|
|
||||||
.setSampleRate(sampleRate)
|
.setSampleRate(sampleRate)
|
||||||
|
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
|
||||||
.setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
|
.setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
|
||||||
.build(),
|
.build(),
|
||||||
AudioTrack.getMinBufferSize(
|
minBufferSize * 2,
|
||||||
sampleRate,
|
|
||||||
AudioFormat.CHANNEL_OUT_MONO,
|
|
||||||
AudioFormat.ENCODING_PCM_16BIT
|
|
||||||
),
|
|
||||||
AudioTrack.MODE_STREAM,
|
AudioTrack.MODE_STREAM,
|
||||||
AudioManager.AUDIO_SESSION_ID_GENERATE
|
AudioManager.AUDIO_SESSION_ID_GENERATE
|
||||||
)
|
)
|
||||||
|
|
||||||
audioTrack?.play()
|
// 空安全检查:防止AudioTrack创建失败
|
||||||
|
audioTrack?.play() ?: run {
|
||||||
|
playing = false
|
||||||
|
isReleased = true
|
||||||
|
return@launch
|
||||||
|
}
|
||||||
|
|
||||||
val silent = ByteArray(2048)
|
// 🔥 AudioTrack 预热(非常关键)
|
||||||
|
warmUp()
|
||||||
|
|
||||||
while (isActive && playing) {
|
val silent = ByteArray(1024)
|
||||||
val pcm = queueLock.run { bufferQueue.poll() }
|
|
||||||
|
|
||||||
if (pcm != null) {
|
while (isActive && playing && !isReleased) {
|
||||||
|
val pcm = lock.withLock {
|
||||||
|
if (bufferQueue.isEmpty()) null else bufferQueue.poll()
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pcm != null && !isReleased) {
|
||||||
audioTrack?.write(pcm, 0, pcm.size)
|
audioTrack?.write(pcm, 0, pcm.size)
|
||||||
} else {
|
} else if (!isReleased) {
|
||||||
audioTrack?.write(silent, 0, silent.size)
|
audioTrack?.write(silent, 0, silent.size)
|
||||||
delay(5)
|
delay(5)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
audioTrack?.stop()
|
// 释放前的空安全检查
|
||||||
audioTrack?.release()
|
audioTrack?.takeIf { !isReleased }?.stop()
|
||||||
|
audioTrack?.takeIf { !isReleased }?.release()
|
||||||
audioTrack = null
|
audioTrack = null
|
||||||
|
|
||||||
onPlayEnd?.invoke()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun warmUp() {
|
||||||
|
if (isReleased) return
|
||||||
|
val warmUpMs = 50
|
||||||
|
val bytes = sampleRate * 2 * warmUpMs / 1000
|
||||||
|
val silence = ByteArray(bytes)
|
||||||
|
audioTrack?.write(silence, 0, silence.size)
|
||||||
|
audioTrack?.write(silence, 0, silence.size)
|
||||||
|
}
|
||||||
|
|
||||||
fun pushPcm(pcm: ByteArray) {
|
fun pushPcm(pcm: ByteArray) {
|
||||||
queueLock.run { bufferQueue.add(pcm) }
|
if (isReleased) return
|
||||||
|
lock.withLock {
|
||||||
|
bufferQueue.add(pcm)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun clearQueue() {
|
fun clearQueue() {
|
||||||
queueLock.run { bufferQueue.clear() }
|
if (isReleased) return
|
||||||
|
lock.withLock {
|
||||||
|
bufferQueue.clear()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun queueEmpty(): Boolean = queueLock.run { bufferQueue.isEmpty() }
|
fun queueEmpty(): Boolean {
|
||||||
|
if (isReleased) return true
|
||||||
|
return lock.withLock { bufferQueue.isEmpty() }
|
||||||
|
}
|
||||||
|
|
||||||
|
// 核心新增:强制停止当前播放,清空所有缓冲区
|
||||||
|
fun forceStop() {
|
||||||
|
if (isReleased) return
|
||||||
|
lock.withLock {
|
||||||
|
bufferQueue.clear()
|
||||||
|
}
|
||||||
|
// 清空AudioTrack内部缓冲区,立即停止发声
|
||||||
|
audioTrack?.flush()
|
||||||
|
audioTrack?.pause() // 暂停硬件播放,避免残留静音数据
|
||||||
|
}
|
||||||
|
|
||||||
|
// 核心新增:重启播放器(用于停止后播放新音频)
|
||||||
|
fun restart() {
|
||||||
|
if (isReleased) return
|
||||||
|
audioTrack?.play()
|
||||||
|
}
|
||||||
|
|
||||||
fun release() {
|
fun release() {
|
||||||
|
isReleased = true
|
||||||
playing = false
|
playing = false
|
||||||
queueLock.run { bufferQueue.clear() }
|
clearQueue()
|
||||||
scope.cancel()
|
scope.cancel()
|
||||||
|
audioTrack?.stop()
|
||||||
|
audioTrack?.release()
|
||||||
|
audioTrack = null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -73,6 +73,7 @@ immersionbar-components = { module = "com.geyifeng.immersionbar:immersionbar-com
|
|||||||
immersionbar-ktx = { module = "com.geyifeng.immersionbar:immersionbar-ktx", version.ref = "immersionbarKtx" }
|
immersionbar-ktx = { module = "com.geyifeng.immersionbar:immersionbar-ktx", version.ref = "immersionbarKtx" }
|
||||||
immersionbar = { module = "com.geyifeng.immersionbar:immersionbar", version.ref = "immersionbar" }
|
immersionbar = { module = "com.geyifeng.immersionbar:immersionbar", version.ref = "immersionbar" }
|
||||||
androidx-lifecycle-runtime-android = { group = "androidx.lifecycle", name = "lifecycle-runtime-android", version.ref = "lifecycleRuntimeAndroid" }
|
androidx-lifecycle-runtime-android = { group = "androidx.lifecycle", name = "lifecycle-runtime-android", version.ref = "lifecycleRuntimeAndroid" }
|
||||||
|
|
||||||
[plugins]
|
[plugins]
|
||||||
android-application = { id = "com.android.application", version.ref = "agp" }
|
android-application = { id = "com.android.application", version.ref = "agp" }
|
||||||
kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }
|
kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user