优化代码
This commit is contained in:
parent
c4a79eddd2
commit
761e98f1b6
@ -8,85 +8,83 @@ import kotlin.math.sqrt
|
|||||||
class VadManager(
|
class VadManager(
|
||||||
assetManager: AssetManager,
|
assetManager: AssetManager,
|
||||||
private val onSpeechStart: () -> Unit,
|
private val onSpeechStart: () -> Unit,
|
||||||
private val onSpeechEnd: (Float, Float) -> Unit // avgEnergy, peakRms
|
private val onSpeechEnd: (avgEnergy: Float, peakRms: Float) -> Unit
|
||||||
) {
|
) {
|
||||||
|
|
||||||
private val vad: Vad
|
private val vad: Vad
|
||||||
private var isSpeaking = false
|
private var isSpeaking = false
|
||||||
private var lastSpeechTime = 0L
|
private var lastSpeechTime = 0L
|
||||||
|
|
||||||
/** 有效语音统计 */
|
private val END_SILENCE_MS = 800L
|
||||||
|
|
||||||
private var activeFrameCount = 0
|
private var activeFrameCount = 0
|
||||||
private var activeSpeechFrameCount = 0
|
private var activeSpeechFrameCount = 0
|
||||||
|
|
||||||
private var speechEnergySum = 0f
|
private var speechEnergySum = 0f
|
||||||
private var speechFrameCount = 0
|
private var speechFrameCount = 0
|
||||||
private var peakRms = 0f
|
private var peakRms = 0f
|
||||||
|
|
||||||
private val END_SILENCE_MS = 800L
|
|
||||||
|
|
||||||
init {
|
init {
|
||||||
val config = getVadModelConfig(0)
|
val config = getVadModelConfig(0) ?: throw IllegalStateException("VAD config not found")
|
||||||
?: throw IllegalStateException("VAD config not found")
|
|
||||||
vad = Vad(assetManager, config)
|
vad = Vad(assetManager, config)
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 外部调用的音频输入 */
|
|
||||||
fun accept(samples: FloatArray) {
|
fun accept(samples: FloatArray) {
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
|
|
||||||
vad.acceptWaveform(samples)
|
vad.acceptWaveform(samples)
|
||||||
val hasSpeech = vad.isSpeechDetected()
|
val hasSpeech = vad.isSpeechDetected()
|
||||||
|
|
||||||
|
// RMS & peak 统计
|
||||||
val rms = calcRms(samples)
|
val rms = calcRms(samples)
|
||||||
|
if (hasSpeech) {
|
||||||
|
speechEnergySum += rms
|
||||||
|
speechFrameCount++
|
||||||
|
peakRms = maxOf(peakRms, rms)
|
||||||
|
}
|
||||||
|
|
||||||
|
// VAD逻辑
|
||||||
if (hasSpeech) {
|
if (hasSpeech) {
|
||||||
lastSpeechTime = now
|
lastSpeechTime = now
|
||||||
if (!isSpeaking) {
|
if (!isSpeaking) {
|
||||||
isSpeaking = true
|
isSpeaking = true
|
||||||
resetStats()
|
|
||||||
onSpeechStart()
|
onSpeechStart()
|
||||||
}
|
}
|
||||||
|
|
||||||
// 累计有效语音能量和峰值
|
|
||||||
speechEnergySum += rms
|
|
||||||
speechFrameCount++
|
|
||||||
if (rms > peakRms) peakRms = rms
|
|
||||||
|
|
||||||
activeFrameCount++
|
activeFrameCount++
|
||||||
activeSpeechFrameCount++
|
activeSpeechFrameCount++
|
||||||
} else {
|
} else {
|
||||||
if (isSpeaking) activeFrameCount++
|
if (isSpeaking) {
|
||||||
// 检查结束
|
activeFrameCount++
|
||||||
if (isSpeaking && now - lastSpeechTime >= END_SILENCE_MS) {
|
if (now - lastSpeechTime >= END_SILENCE_MS) {
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
||||||
onSpeechEnd(avgEnergy, peakRms)
|
val peak = peakRms
|
||||||
|
onSpeechEnd(avgEnergy, peak)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 统计有效语音比例,用于 VoiceController */
|
fun activeSpeechRatio(): Float = if (activeFrameCount == 0) 0f else activeSpeechFrameCount.toFloat() / activeFrameCount
|
||||||
fun activeSpeechRatio(): Float {
|
|
||||||
if (activeFrameCount == 0) return 0f
|
|
||||||
return activeSpeechFrameCount.toFloat() / activeFrameCount
|
|
||||||
}
|
|
||||||
|
|
||||||
fun reset() {
|
fun reset() {
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
lastSpeechTime = 0L
|
lastSpeechTime = 0L
|
||||||
activeFrameCount = 0
|
activeFrameCount = 0
|
||||||
activeSpeechFrameCount = 0
|
activeSpeechFrameCount = 0
|
||||||
resetStats()
|
|
||||||
vad.reset()
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun resetStats() {
|
|
||||||
speechEnergySum = 0f
|
speechEnergySum = 0f
|
||||||
speechFrameCount = 0
|
speechFrameCount = 0
|
||||||
peakRms = 0f
|
peakRms = 0f
|
||||||
|
vad.reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun calcRms(audio: FloatArray): Float {
|
private fun calcRms(samples: FloatArray): Float {
|
||||||
var sum = 0f
|
var sum = 0f
|
||||||
for (v in audio) sum += v * v
|
var peak = 0f
|
||||||
return sqrt(sum / audio.size)
|
for (v in samples) {
|
||||||
|
sum += v * v
|
||||||
|
peak = maxOf(peak, kotlin.math.abs(v))
|
||||||
|
}
|
||||||
|
return sqrt(sum / samples.size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -107,15 +107,9 @@ class VoiceController(
|
|||||||
|
|
||||||
/* ================= 唤醒 ================= */
|
/* ================= 唤醒 ================= */
|
||||||
private fun handleWakeupEvent() {
|
private fun handleWakeupEvent() {
|
||||||
when (state) {
|
if (state == VoiceState.UPLOADING) return
|
||||||
VoiceState.UPLOADING -> return
|
stopBackendAudio?.invoke()
|
||||||
VoiceState.RECORDING,
|
enterWakeup(interrupt = true)
|
||||||
VoiceState.PLAYING_BACKEND -> {
|
|
||||||
stopBackendAudio?.invoke()
|
|
||||||
enterWakeup(interrupt = true)
|
|
||||||
}
|
|
||||||
else -> enterWakeup(interrupt = false)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun enterWakeup(interrupt: Boolean) {
|
private fun enterWakeup(interrupt: Boolean) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user