优化后的代码

This commit is contained in:
林若思 2026-01-03 15:31:49 +08:00
parent 0806376687
commit a1c430bd40

View File

@ -9,7 +9,7 @@ class VoiceController(
assetManager: AssetManager, assetManager: AssetManager,
private val onWakeup: () -> Unit, private val onWakeup: () -> Unit,
private val onFinalAudio: (FloatArray) -> Unit, private val onFinalAudio: (FloatArray) -> Unit,
private val idleTimeoutSeconds: Int = 8, private val idleTimeoutSeconds: Int = 10,
private val maxRecordingSeconds: Int = 10, private val maxRecordingSeconds: Int = 10,
private val onStateChanged: ((VoiceState) -> Unit)? = null, private val onStateChanged: ((VoiceState) -> Unit)? = null,
private val stopBackendAudio: (() -> Unit)? = null private val stopBackendAudio: (() -> Unit)? = null
@ -62,7 +62,6 @@ class VoiceController(
private val MIN_SPEECH_MS = 1000L private val MIN_SPEECH_MS = 1000L
private val MIN_AVG_ENERGY = 0.02f private val MIN_AVG_ENERGY = 0.02f
private val WAIT_SPEECH_TIMEOUT_MS = 8000L
/* ================= 音频入口 ================= */ /* ================= 音频入口 ================= */
@ -93,7 +92,7 @@ class VoiceController(
VoiceState.WAIT_SPEECH -> { VoiceState.WAIT_SPEECH -> {
if (waitSpeechStartMs > 0 && now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS) { if (waitSpeechStartMs > 0 && now - waitSpeechStartMs >= idleTimeoutSeconds * 1000) {
Log.d(TAG, "⏱ Wakeup but no speech → WAIT_WAKEUP") Log.d(TAG, "⏱ Wakeup but no speech → WAIT_WAKEUP")
resetAll() resetAll()
return return
@ -196,9 +195,17 @@ class VoiceController(
return return
} }
val audio = audioBuffer.toFloatArray()
val vadRatio = vadManager.activeSpeechRatio() val vadRatio = vadManager.activeSpeechRatio()
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
// ⭐ 多人说话检测:短时峰值方差
if (shortTermPeakVariance(audio) > 0.015f) {
Log.d(TAG, "❌ Short-term peak variance too high → likely multi-speaker")
resetToWaitSpeech()
return
}
var score = 0 var score = 0
when { when {
duration >= 4000 -> score += 3 duration >= 4000 -> score += 3
@ -230,12 +237,36 @@ class VoiceController(
} }
waitSpeechFailStartMs = 0L waitSpeechFailStartMs = 0L
val finalAudio = audioBuffer.toFloatArray()
audioBuffer.clear() audioBuffer.clear()
state = VoiceState.UPLOADING state = VoiceState.UPLOADING
onFinalAudio(finalAudio) onFinalAudio(audio)
} }
/** 计算短时峰值方差,用于多人说话检测 */
private fun shortTermPeakVariance(audio: FloatArray): Float {
val frameSize = 160 // 10ms @16kHz
val peaks = mutableListOf<Float>()
var i = 0
while (i + frameSize <= audio.size) {
val frame = audio.sliceArray(i until i + frameSize)
peaks.add(calcPeakRms(frame))
i += frameSize
}
val mean = peaks.average().toFloat()
return peaks.map { (it - mean) * (it - mean) }.average().toFloat()
}
/** 计算音频帧峰值 */
private fun calcPeakRms(audio: FloatArray): Float {
var peak = 0f
for (v in audio) {
val abs = kotlin.math.abs(v)
if (abs > peak) peak = abs
}
return peak
}
/* ================= 播放回调 ================= */ /* ================= 播放回调 ================= */
fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT } // ⭐ 补全 fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT } // ⭐ 补全