优化代码
This commit is contained in:
parent
c4a79eddd2
commit
761e98f1b6
@ -8,85 +8,83 @@ import kotlin.math.sqrt
|
||||
class VadManager(
|
||||
assetManager: AssetManager,
|
||||
private val onSpeechStart: () -> Unit,
|
||||
private val onSpeechEnd: (Float, Float) -> Unit // avgEnergy, peakRms
|
||||
private val onSpeechEnd: (avgEnergy: Float, peakRms: Float) -> Unit
|
||||
) {
|
||||
|
||||
private val vad: Vad
|
||||
private var isSpeaking = false
|
||||
private var lastSpeechTime = 0L
|
||||
|
||||
/** 有效语音统计 */
|
||||
private val END_SILENCE_MS = 800L
|
||||
|
||||
private var activeFrameCount = 0
|
||||
private var activeSpeechFrameCount = 0
|
||||
|
||||
private var speechEnergySum = 0f
|
||||
private var speechFrameCount = 0
|
||||
private var peakRms = 0f
|
||||
|
||||
private val END_SILENCE_MS = 800L
|
||||
|
||||
init {
|
||||
val config = getVadModelConfig(0)
|
||||
?: throw IllegalStateException("VAD config not found")
|
||||
val config = getVadModelConfig(0) ?: throw IllegalStateException("VAD config not found")
|
||||
vad = Vad(assetManager, config)
|
||||
}
|
||||
|
||||
/** 外部调用的音频输入 */
|
||||
fun accept(samples: FloatArray) {
|
||||
val now = System.currentTimeMillis()
|
||||
|
||||
vad.acceptWaveform(samples)
|
||||
val hasSpeech = vad.isSpeechDetected()
|
||||
|
||||
// RMS & peak 统计
|
||||
val rms = calcRms(samples)
|
||||
if (hasSpeech) {
|
||||
speechEnergySum += rms
|
||||
speechFrameCount++
|
||||
peakRms = maxOf(peakRms, rms)
|
||||
}
|
||||
|
||||
// VAD逻辑
|
||||
if (hasSpeech) {
|
||||
lastSpeechTime = now
|
||||
if (!isSpeaking) {
|
||||
isSpeaking = true
|
||||
resetStats()
|
||||
onSpeechStart()
|
||||
}
|
||||
|
||||
// 累计有效语音能量和峰值
|
||||
speechEnergySum += rms
|
||||
speechFrameCount++
|
||||
if (rms > peakRms) peakRms = rms
|
||||
|
||||
activeFrameCount++
|
||||
activeSpeechFrameCount++
|
||||
} else {
|
||||
if (isSpeaking) activeFrameCount++
|
||||
// 检查结束
|
||||
if (isSpeaking && now - lastSpeechTime >= END_SILENCE_MS) {
|
||||
if (isSpeaking) {
|
||||
activeFrameCount++
|
||||
if (now - lastSpeechTime >= END_SILENCE_MS) {
|
||||
isSpeaking = false
|
||||
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
||||
onSpeechEnd(avgEnergy, peakRms)
|
||||
val peak = peakRms
|
||||
onSpeechEnd(avgEnergy, peak)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** 统计有效语音比例,用于 VoiceController */
|
||||
fun activeSpeechRatio(): Float {
|
||||
if (activeFrameCount == 0) return 0f
|
||||
return activeSpeechFrameCount.toFloat() / activeFrameCount
|
||||
}
|
||||
fun activeSpeechRatio(): Float = if (activeFrameCount == 0) 0f else activeSpeechFrameCount.toFloat() / activeFrameCount
|
||||
|
||||
fun reset() {
|
||||
isSpeaking = false
|
||||
lastSpeechTime = 0L
|
||||
activeFrameCount = 0
|
||||
activeSpeechFrameCount = 0
|
||||
resetStats()
|
||||
vad.reset()
|
||||
}
|
||||
|
||||
private fun resetStats() {
|
||||
speechEnergySum = 0f
|
||||
speechFrameCount = 0
|
||||
peakRms = 0f
|
||||
vad.reset()
|
||||
}
|
||||
|
||||
private fun calcRms(audio: FloatArray): Float {
|
||||
private fun calcRms(samples: FloatArray): Float {
|
||||
var sum = 0f
|
||||
for (v in audio) sum += v * v
|
||||
return sqrt(sum / audio.size)
|
||||
var peak = 0f
|
||||
for (v in samples) {
|
||||
sum += v * v
|
||||
peak = maxOf(peak, kotlin.math.abs(v))
|
||||
}
|
||||
return sqrt(sum / samples.size)
|
||||
}
|
||||
}
|
||||
|
||||
@ -107,16 +107,10 @@ class VoiceController(
|
||||
|
||||
/* ================= 唤醒 ================= */
|
||||
private fun handleWakeupEvent() {
|
||||
when (state) {
|
||||
VoiceState.UPLOADING -> return
|
||||
VoiceState.RECORDING,
|
||||
VoiceState.PLAYING_BACKEND -> {
|
||||
if (state == VoiceState.UPLOADING) return
|
||||
stopBackendAudio?.invoke()
|
||||
enterWakeup(interrupt = true)
|
||||
}
|
||||
else -> enterWakeup(interrupt = false)
|
||||
}
|
||||
}
|
||||
|
||||
private fun enterWakeup(interrupt: Boolean) {
|
||||
waitSpeechFailStartMs = System.currentTimeMillis()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user