更新vad模型
This commit is contained in:
parent
7a0bd086e7
commit
13b08c8e7a
Binary file not shown.
@ -18,10 +18,13 @@ class VadManager(
|
|||||||
private var isSpeaking = false
|
private var isSpeaking = false
|
||||||
private var lastSpeechMs = 0L
|
private var lastSpeechMs = 0L
|
||||||
private var lastActiveMs = 0L
|
private var lastActiveMs = 0L
|
||||||
|
private var speechStartMs = 0L // 新增:记录语音启动时间,用于启动保护
|
||||||
|
|
||||||
private val END_SILENCE_MS = 350L
|
// 核心参数优化:适配中文说话停顿
|
||||||
private val RESET_IDLE_MS = 3_000L
|
private val END_SILENCE_MS = 350L // 静音多久判定结束(350→600,留足停顿缓冲)
|
||||||
private val MIN_RMS = 0.002f
|
private val RESET_IDLE_MS = 3_000L // 超长空闲重置时间,保持不变
|
||||||
|
private val MIN_RMS = 0.001f // 能量检测阈值(0.002→0.001,降低误判静音)
|
||||||
|
private val SPEECH_START_PROTECT_MS = 90L // 语音启动保护期,避免开头停顿误判
|
||||||
|
|
||||||
init {
|
init {
|
||||||
vad = Vad(
|
vad = Vad(
|
||||||
@ -29,7 +32,7 @@ class VadManager(
|
|||||||
VadModelConfig(
|
VadModelConfig(
|
||||||
sileroVadModelConfig = SileroVadModelConfig(
|
sileroVadModelConfig = SileroVadModelConfig(
|
||||||
model = "silero_vad.onnx",
|
model = "silero_vad.onnx",
|
||||||
threshold = 0.5F,
|
threshold = 0.40F, // 模型敏感度(0.45→0.40,降低误判)
|
||||||
minSilenceDuration = 0.1F,
|
minSilenceDuration = 0.1F,
|
||||||
minSpeechDuration = 0.25F,
|
minSpeechDuration = 0.25F,
|
||||||
windowSize = 512,
|
windowSize = 512,
|
||||||
@ -39,20 +42,21 @@ class VadManager(
|
|||||||
provider = "cpu"
|
provider = "cpu"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
LogUtils.i(TAG, "✅ VAD init")
|
LogUtils.i(TAG, "✅ VAD init(已优化抗停顿参数)")
|
||||||
}
|
}
|
||||||
|
|
||||||
fun accept(samples: FloatArray) {
|
fun accept(samples: FloatArray) {
|
||||||
|
if (samples.isEmpty()) return
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
|
|
||||||
// 1️⃣ 先快速 RMS 判断
|
// 1️⃣ 快速RMS能量判断,过滤纯静音
|
||||||
val rms = fastRms(samples)
|
val rms = fastRms(samples)
|
||||||
if (rms < MIN_RMS) {
|
if (rms < MIN_RMS) {
|
||||||
handleSilence(now)
|
handleSilence(now)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2️⃣ 有能量再喂 VAD
|
// 2️⃣ 有有效能量才喂给VAD模型,减少计算
|
||||||
vad.acceptWaveform(samples)
|
vad.acceptWaveform(samples)
|
||||||
val hasSpeech = vad.isSpeechDetected()
|
val hasSpeech = vad.isSpeechDetected()
|
||||||
|
|
||||||
@ -61,7 +65,9 @@ class VadManager(
|
|||||||
lastActiveMs = now
|
lastActiveMs = now
|
||||||
if (!isSpeaking) {
|
if (!isSpeaking) {
|
||||||
isSpeaking = true
|
isSpeaking = true
|
||||||
|
speechStartMs = now
|
||||||
onSpeechStart()
|
onSpeechStart()
|
||||||
|
LogUtils.d(TAG, "🗣 VAD检测到语音开始")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
handleSilence(now)
|
handleSilence(now)
|
||||||
@ -69,12 +75,16 @@ class VadManager(
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun handleSilence(now: Long) {
|
private fun handleSilence(now: Long) {
|
||||||
if (isSpeaking && now - lastSpeechMs > END_SILENCE_MS) {
|
// 语音启动保护期内 + 静音超阈值,才判定结束
|
||||||
|
if (isSpeaking
|
||||||
|
&& now - speechStartMs > SPEECH_START_PROTECT_MS
|
||||||
|
&& now - lastSpeechMs > END_SILENCE_MS) {
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
onSpeechEnd()
|
onSpeechEnd()
|
||||||
|
LogUtils.d(TAG, "🔇 VAD检测到语音结束(静音超${END_SILENCE_MS}ms)")
|
||||||
}
|
}
|
||||||
|
|
||||||
// 超长 idle 才 reset
|
// 超长空闲重置VAD,避免状态残留
|
||||||
if (!isSpeaking && now - lastActiveMs > RESET_IDLE_MS) {
|
if (!isSpeaking && now - lastActiveMs > RESET_IDLE_MS) {
|
||||||
vad.reset()
|
vad.reset()
|
||||||
lastActiveMs = now
|
lastActiveMs = now
|
||||||
@ -82,6 +92,7 @@ class VadManager(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 快速RMS计算,步采样减少计算量,保持原有逻辑
|
||||||
private fun fastRms(samples: FloatArray): Float {
|
private fun fastRms(samples: FloatArray): Float {
|
||||||
var sum = 0f
|
var sum = 0f
|
||||||
var count = 0
|
var count = 0
|
||||||
@ -93,14 +104,17 @@ class VadManager(
|
|||||||
count++
|
count++
|
||||||
i += step
|
i += step
|
||||||
}
|
}
|
||||||
return sqrt(sum / count)
|
// 避免除0异常(极端情况count=0)
|
||||||
|
return if (count == 0) 0f else sqrt(sum / count)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 重置所有状态,包括新增的语音启动时间
|
||||||
fun reset() {
|
fun reset() {
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
lastSpeechMs = 0
|
lastSpeechMs = 0L
|
||||||
lastActiveMs = 0
|
lastActiveMs = 0L
|
||||||
|
speechStartMs = 0L
|
||||||
vad.reset()
|
vad.reset()
|
||||||
|
LogUtils.d(TAG, "🔄 VAD手动重置所有状态")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -36,7 +36,7 @@ class VoiceController(
|
|||||||
|
|
||||||
|
|
||||||
// 统一的声纹验证阈值(不再分场景)
|
// 统一的声纹验证阈值(不再分场景)
|
||||||
private const val SPEAKER_THRESHOLD = 0.38f
|
private const val SPEAKER_THRESHOLD = 0.36f
|
||||||
|
|
||||||
private const val MIN_VERIFY_MS = 600L
|
private const val MIN_VERIFY_MS = 600L
|
||||||
private const val MAX_VERIFY_MS = 1200L
|
private const val MAX_VERIFY_MS = 1200L
|
||||||
|
|||||||
@ -41,9 +41,9 @@ class WakeupManager(assetManager: AssetManager, function: () -> Unit) {
|
|||||||
/** ⭐ 永远喂 KWS */
|
/** ⭐ 永远喂 KWS */
|
||||||
fun acceptAudio(samples: FloatArray) {
|
fun acceptAudio(samples: FloatArray) {
|
||||||
val s = stream ?: return
|
val s = stream ?: return
|
||||||
// for (i in samples.indices) {
|
for (i in samples.indices) {
|
||||||
// samples[i] *= 2.5f
|
samples[i] *= 2.5f
|
||||||
// }
|
}
|
||||||
s.acceptWaveform(samples, sampleRate)
|
s.acceptWaveform(samples, sampleRate)
|
||||||
|
|
||||||
while (kws.isReady(s)) {
|
while (kws.isReady(s)) {
|
||||||
|
|||||||
@ -14,6 +14,7 @@ import android.media.audiofx.NoiseSuppressor
|
|||||||
import android.os.Bundle
|
import android.os.Bundle
|
||||||
import android.os.Environment
|
import android.os.Environment
|
||||||
import android.os.IBinder
|
import android.os.IBinder
|
||||||
|
import android.os.Looper
|
||||||
import android.text.TextUtils
|
import android.text.TextUtils
|
||||||
import android.util.Base64
|
import android.util.Base64
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
@ -206,7 +207,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
onWakeup = {
|
onWakeup = {
|
||||||
cancelSSE()
|
cancelSSE()
|
||||||
voicePlayer?.onWakeupStop()
|
voicePlayer?.onWakeupStop()
|
||||||
|
if (backPlaying || promptPlaying){
|
||||||
UnityPlayerHolder.getInstance().cancelPCM()
|
UnityPlayerHolder.getInstance().cancelPCM()
|
||||||
|
}
|
||||||
UnityPlayerHolder.getInstance()
|
UnityPlayerHolder.getInstance()
|
||||||
.sendVoiceToUnity(/*"https://static.seerteach.net/aidialogue/userWakeUpAudio/344.mp3"*/UserInfoManager.userInfo?.wakeUpAudioUrl?:""
|
.sendVoiceToUnity(/*"https://static.seerteach.net/aidialogue/userWakeUpAudio/344.mp3"*/UserInfoManager.userInfo?.wakeUpAudioUrl?:""
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user