更新vad模型

This commit is contained in:
林若思 2026-01-27 11:59:40 +08:00
parent 7a0bd086e7
commit 13b08c8e7a
5 changed files with 36 additions and 19 deletions

Binary file not shown.

View File

@ -18,10 +18,13 @@ class VadManager(
private var isSpeaking = false
private var lastSpeechMs = 0L
private var lastActiveMs = 0L
private var speechStartMs = 0L // 新增:记录语音启动时间,用于启动保护
private val END_SILENCE_MS = 350L
private val RESET_IDLE_MS = 3_000L
private val MIN_RMS = 0.002f
// 核心参数优化:适配中文说话停顿
private val END_SILENCE_MS = 350L // 静音多久判定结束350→600留足停顿缓冲
private val RESET_IDLE_MS = 3_000L // 超长空闲重置时间,保持不变
private val MIN_RMS = 0.001f // 能量检测阈值0.002→0.001,降低误判静音)
private val SPEECH_START_PROTECT_MS = 90L // 语音启动保护期,避免开头停顿误判
init {
vad = Vad(
@ -29,7 +32,7 @@ class VadManager(
VadModelConfig(
sileroVadModelConfig = SileroVadModelConfig(
model = "silero_vad.onnx",
threshold = 0.5F,
threshold = 0.40F, // 模型敏感度0.45→0.40,降低误判)
minSilenceDuration = 0.1F,
minSpeechDuration = 0.25F,
windowSize = 512,
@ -39,20 +42,21 @@ class VadManager(
provider = "cpu"
)
)
LogUtils.i(TAG, "✅ VAD init")
LogUtils.i(TAG, "✅ VAD init(已优化抗停顿参数)")
}
fun accept(samples: FloatArray) {
if (samples.isEmpty()) return
val now = System.currentTimeMillis()
// 1先快速 RMS 判断
// 1快速RMS能量判断过滤纯静音
val rms = fastRms(samples)
if (rms < MIN_RMS) {
handleSilence(now)
return
}
// 2⃣ 有能量再喂 VAD
// 2⃣ 有有效能量才喂给VAD模型减少计算
vad.acceptWaveform(samples)
val hasSpeech = vad.isSpeechDetected()
@ -61,7 +65,9 @@ class VadManager(
lastActiveMs = now
if (!isSpeaking) {
isSpeaking = true
speechStartMs = now
onSpeechStart()
LogUtils.d(TAG, "🗣 VAD检测到语音开始")
}
} else {
handleSilence(now)
@ -69,12 +75,16 @@ class VadManager(
}
private fun handleSilence(now: Long) {
if (isSpeaking && now - lastSpeechMs > END_SILENCE_MS) {
// 语音启动保护期内 + 静音超阈值,才判定结束
if (isSpeaking
&& now - speechStartMs > SPEECH_START_PROTECT_MS
&& now - lastSpeechMs > END_SILENCE_MS) {
isSpeaking = false
onSpeechEnd()
LogUtils.d(TAG, "🔇 VAD检测到语音结束静音超${END_SILENCE_MS}ms")
}
// 超长 idle 才 reset
// 超长空闲重置VAD避免状态残留
if (!isSpeaking && now - lastActiveMs > RESET_IDLE_MS) {
vad.reset()
lastActiveMs = now
@ -82,6 +92,7 @@ class VadManager(
}
}
// 快速RMS计算步采样减少计算量保持原有逻辑
private fun fastRms(samples: FloatArray): Float {
var sum = 0f
var count = 0
@ -93,14 +104,17 @@ class VadManager(
count++
i += step
}
return sqrt(sum / count)
// 避免除0异常极端情况count=0
return if (count == 0) 0f else sqrt(sum / count)
}
// 重置所有状态,包括新增的语音启动时间
fun reset() {
isSpeaking = false
lastSpeechMs = 0
lastActiveMs = 0
lastSpeechMs = 0L
lastActiveMs = 0L
speechStartMs = 0L
vad.reset()
LogUtils.d(TAG, "🔄 VAD手动重置所有状态")
}
}

View File

@ -36,7 +36,7 @@ class VoiceController(
// 统一的声纹验证阈值(不再分场景)
private const val SPEAKER_THRESHOLD = 0.38f
private const val SPEAKER_THRESHOLD = 0.36f
private const val MIN_VERIFY_MS = 600L
private const val MAX_VERIFY_MS = 1200L

View File

@ -41,9 +41,9 @@ class WakeupManager(assetManager: AssetManager, function: () -> Unit) {
/** ⭐ 永远喂 KWS */
fun acceptAudio(samples: FloatArray) {
val s = stream ?: return
// for (i in samples.indices) {
// samples[i] *= 2.5f
// }
for (i in samples.indices) {
samples[i] *= 2.5f
}
s.acceptWaveform(samples, sampleRate)
while (kws.isReady(s)) {

View File

@ -14,6 +14,7 @@ import android.media.audiofx.NoiseSuppressor
import android.os.Bundle
import android.os.Environment
import android.os.IBinder
import android.os.Looper
import android.text.TextUtils
import android.util.Base64
import android.util.Log
@ -206,7 +207,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
onWakeup = {
cancelSSE()
voicePlayer?.onWakeupStop()
if (backPlaying || promptPlaying){
UnityPlayerHolder.getInstance().cancelPCM()
}
UnityPlayerHolder.getInstance()
.sendVoiceToUnity(/*"https://static.seerteach.net/aidialogue/userWakeUpAudio/344.mp3"*/UserInfoManager.userInfo?.wakeUpAudioUrl?:""
)