优化后的代码
This commit is contained in:
parent
0806376687
commit
a1c430bd40
@ -9,7 +9,7 @@ class VoiceController(
|
||||
assetManager: AssetManager,
|
||||
private val onWakeup: () -> Unit,
|
||||
private val onFinalAudio: (FloatArray) -> Unit,
|
||||
private val idleTimeoutSeconds: Int = 8,
|
||||
private val idleTimeoutSeconds: Int = 10,
|
||||
private val maxRecordingSeconds: Int = 10,
|
||||
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
||||
private val stopBackendAudio: (() -> Unit)? = null
|
||||
@ -62,7 +62,6 @@ class VoiceController(
|
||||
private val MIN_SPEECH_MS = 1000L
|
||||
private val MIN_AVG_ENERGY = 0.02f
|
||||
|
||||
private val WAIT_SPEECH_TIMEOUT_MS = 8000L
|
||||
|
||||
/* ================= 音频入口 ================= */
|
||||
|
||||
@ -93,7 +92,7 @@ class VoiceController(
|
||||
|
||||
VoiceState.WAIT_SPEECH -> {
|
||||
|
||||
if (waitSpeechStartMs > 0 && now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS) {
|
||||
if (waitSpeechStartMs > 0 && now - waitSpeechStartMs >= idleTimeoutSeconds * 1000) {
|
||||
Log.d(TAG, "⏱ Wakeup but no speech → WAIT_WAKEUP")
|
||||
resetAll()
|
||||
return
|
||||
@ -196,9 +195,17 @@ class VoiceController(
|
||||
return
|
||||
}
|
||||
|
||||
val audio = audioBuffer.toFloatArray()
|
||||
val vadRatio = vadManager.activeSpeechRatio()
|
||||
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
||||
|
||||
// ⭐ 多人说话检测:短时峰值方差
|
||||
if (shortTermPeakVariance(audio) > 0.015f) {
|
||||
Log.d(TAG, "❌ Short-term peak variance too high → likely multi-speaker")
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
}
|
||||
|
||||
var score = 0
|
||||
when {
|
||||
duration >= 4000 -> score += 3
|
||||
@ -230,12 +237,36 @@ class VoiceController(
|
||||
}
|
||||
|
||||
waitSpeechFailStartMs = 0L
|
||||
val finalAudio = audioBuffer.toFloatArray()
|
||||
audioBuffer.clear()
|
||||
state = VoiceState.UPLOADING
|
||||
onFinalAudio(finalAudio)
|
||||
onFinalAudio(audio)
|
||||
}
|
||||
|
||||
/** 计算短时峰值方差,用于多人说话检测 */
|
||||
private fun shortTermPeakVariance(audio: FloatArray): Float {
|
||||
val frameSize = 160 // 10ms @16kHz
|
||||
val peaks = mutableListOf<Float>()
|
||||
var i = 0
|
||||
while (i + frameSize <= audio.size) {
|
||||
val frame = audio.sliceArray(i until i + frameSize)
|
||||
peaks.add(calcPeakRms(frame))
|
||||
i += frameSize
|
||||
}
|
||||
val mean = peaks.average().toFloat()
|
||||
return peaks.map { (it - mean) * (it - mean) }.average().toFloat()
|
||||
}
|
||||
|
||||
/** 计算音频帧峰值 */
|
||||
private fun calcPeakRms(audio: FloatArray): Float {
|
||||
var peak = 0f
|
||||
for (v in audio) {
|
||||
val abs = kotlin.math.abs(v)
|
||||
if (abs > peak) peak = abs
|
||||
}
|
||||
return peak
|
||||
}
|
||||
|
||||
|
||||
/* ================= 播放回调 ================= */
|
||||
|
||||
fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT } // ⭐ 补全
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user