优化后的代码

This commit is contained in:
林若思 2026-01-03 14:33:43 +08:00
parent 0261c5ccaa
commit 102afa291d
2 changed files with 45 additions and 122 deletions

View File

@ -3,6 +3,7 @@ package com.zs.smarthuman.sherpa
import android.content.res.AssetManager
import android.util.Log
import kotlin.math.sqrt
import java.util.ArrayDeque
class VoiceController(
assetManager: AssetManager,
@ -17,8 +18,6 @@ class VoiceController(
private val TAG = "VoiceController"
private val sampleRate = 16000
/* ================= 状态 ================= */
private var state: VoiceState = VoiceState.WAIT_WAKEUP
set(value) {
field = value
@ -26,42 +25,29 @@ class VoiceController(
onStateChanged?.invoke(value)
}
/* ================= KWS ================= */
private val wakeupManager = WakeupManager(assetManager) {
Log.d(TAG, "🔥 WakeWord detected")
handleWakeupEvent()
}
/* ================= VAD ================= */
private val vadManager = VadManager(
assetManager,
onSpeechStart = { onVadStart() },
onSpeechEnd = {}
)
/* ================= Buffer ================= */
private val audioBuffer = mutableListOf<Float>()
/** 前导音缓存2 秒) */
private val preBuffer = ArrayDeque<Float>()
private val PRE_BUFFER_SIZE = sampleRate * 2
/* ================= 时间 ================= */
private var recordingStartMs = 0L
private var silenceStartMs = 0L
private var waitSpeechFailStartMs = 0L
/* ================= 近讲统计(⭐关键新增) ================= */
private var waitSpeechStartMs = 0L
private var speechEnergySum = 0f
private var speechFrameCount = 0
/* ================= 控制 ================= */
private var vadStarted = false
private var inKwsObserve = false
@ -71,28 +57,17 @@ class VoiceController(
private var speechEnableAtMs = 0L
private val SPEECH_COOLDOWN_MS = 300L
/* ================= 阈值(⭐已校正) ================= */
private val RMS_SILENCE_THRESHOLD = 0.012f // 静音阈值(修正)
private val RMS_SILENCE_THRESHOLD = 0.012f
private val SILENCE_END_MS = 1200L
private val MIN_SPEECH_MS = 1000L // 句子级
private val MIN_AVG_ENERGY = 0.02f // 近讲能量门
private val MIN_SPEECH_MS = 1000L
private val MIN_AVG_ENERGY = 0.02f
/** ⭐ 唤醒后等待人声起点 */
private var waitSpeechStartMs = 0L
/** ⭐ 唤醒后最大等待时间(没说一句话) */
private val WAIT_SPEECH_TIMEOUT_MS = 8000L
/* ================= 音频入口 ================= */
fun acceptAudio(samples: FloatArray) {
cachePreBuffer(samples)
wakeupManager.acceptAudio(samples)
if (wakeupManager.consumeWakeupFlag()) {
handleWakeupEvent()
@ -102,7 +77,6 @@ class VoiceController(
val now = System.currentTimeMillis()
when (state) {
VoiceState.WAIT_WAKEUP,
VoiceState.PLAYING_PROMPT,
VoiceState.PLAYING_BACKEND,
@ -110,40 +84,38 @@ class VoiceController(
VoiceState.WAIT_SPEECH_COOLDOWN -> {
if (now >= speechEnableAtMs) {
waitSpeechFailStartMs = 0L // ⭐ 必须清
waitSpeechFailStartMs = System.currentTimeMillis()
state = VoiceState.WAIT_SPEECH
waitSpeechStartMs = now // ⭐ 关键:开始等人说话
waitSpeechStartMs = now
}
return
}
VoiceState.WAIT_SPEECH -> {
// ⭐ 唤醒后长时间没人说话 → 自动退出
if (waitSpeechStartMs > 0 &&
now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS
) {
Log.d(TAG, "⏱ Wakeup but no speech, exit to WAIT_WAKEUP")
if (waitSpeechStartMs > 0 && now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS) {
Log.d(TAG, "⏱ Wakeup but no speech → WAIT_WAKEUP")
resetAll()
return
}
if (inKwsObserve) {
if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return
inKwsObserve = false
if (waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutSeconds * 1000) {
Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
resetAll()
return
}
if (inKwsObserve && now - kwsObserveStartMs < KWS_OBSERVE_MS) return
inKwsObserve = false
vadManager.accept(samples)
}
VoiceState.RECORDING -> {
audioBuffer.addAll(samples.asList())
vadManager.accept(samples)
val rms = calcRms(samples)
if (rms > RMS_SILENCE_THRESHOLD) {
speechEnergySum += rms
speechFrameCount++
@ -169,22 +141,20 @@ class VoiceController(
private fun handleWakeupEvent() {
when (state) {
VoiceState.UPLOADING -> return
VoiceState.RECORDING,
VoiceState.PLAYING_BACKEND -> {
stopBackendAudio?.invoke()
enterWakeup(interrupt = true)
}
else -> enterWakeup(interrupt = false)
}
}
private fun enterWakeup(interrupt: Boolean) {
waitSpeechFailStartMs = 0L // ⭐ 唤醒即新会话
waitSpeechStartMs = 0L
waitSpeechFailStartMs = System.currentTimeMillis()
waitSpeechStartMs = System.currentTimeMillis()
if (interrupt) {
audioBuffer.clear()
vadManager.reset()
@ -200,31 +170,23 @@ class VoiceController(
onWakeup()
}
/* ================= VAD START ================= */
private fun onVadStart() {
if (state != VoiceState.WAIT_SPEECH) return
Log.d(TAG, "🎤 REAL VAD START")
vadStarted = true
recordingStartMs = System.currentTimeMillis()
silenceStartMs = 0L
waitSpeechFailStartMs = 0L // ⭐ 新一轮有效说话
waitSpeechStartMs = 0L // ⭐ 清掉“等待说话”超时
resetEnergyStat()
audioBuffer.clear()
audioBuffer.addAll(preBuffer)
state = VoiceState.RECORDING
}
/* ================= 结束录音(⭐核心) ================= */
/* ================= 结束录音 ================= */
private fun finishSentence() {
val now = System.currentTimeMillis()
val duration = now - recordingStartMs
@ -235,46 +197,28 @@ class VoiceController(
}
val vadRatio = vadManager.activeSpeechRatio()
val avgEnergy =
if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
/* ================= 评分制判定 ================= */
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
var score = 0
// 1⃣ 时长评分(最重要)
when {
duration >= 4000 -> score += 3
duration >= 2500 -> score += 2
duration >= 1500 -> score += 1
}
// 2⃣ 能量评分(近讲人声强信号)
when {
avgEnergy >= 0.10f -> score += 3
avgEnergy >= 0.06f -> score += 2
avgEnergy >= MIN_AVG_ENERGY -> score += 1
}
// 3⃣ VAD 评分(只作为辅助)
when {
vadRatio >= 0.55f -> score += 2
vadRatio >= 0.40f -> score += 1
}
Log.d(
TAG,
"📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score"
)
Log.d(TAG, "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score")
/**
* 评分阈值
* - >=4 : 必然是真实人声
* - 3 : 在近讲/长句条件下允许
* - <3 : 拦截
*/
val pass = when {
score >= 4 -> true
score >= 6 -> true
score == 3 && avgEnergy >= 0.06f -> true
else -> false
}
@ -285,33 +229,22 @@ class VoiceController(
return
}
/* ================= 通过,进入上传 ================= */
waitSpeechFailStartMs = 0L
val finalAudio = audioBuffer.toFloatArray()
audioBuffer.clear()
state = VoiceState.UPLOADING
onFinalAudio(finalAudio)
}
/* ================= 播放回调 ================= */
fun onPlayStartPrompt() {
state = VoiceState.PLAYING_PROMPT
}
fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT } // ⭐ 补全
fun onPlayEndPrompt() {
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
state = VoiceState.WAIT_SPEECH_COOLDOWN
}
fun onPlayStartBackend() {
state = VoiceState.PLAYING_BACKEND
}
fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND } // ⭐ 补全
fun onPlayEndBackend() {
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
state = VoiceState.WAIT_SPEECH_COOLDOWN
@ -321,10 +254,8 @@ class VoiceController(
fun onUploadFinished(success: Boolean) {
if (state != VoiceState.UPLOADING) return
state = if (success) {
VoiceState.PLAYING_BACKEND
} else {
state = if (success) VoiceState.PLAYING_BACKEND
else {
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
VoiceState.WAIT_SPEECH_COOLDOWN
}
@ -335,13 +266,9 @@ class VoiceController(
fun checkIdleTimeout() {
if (state != VoiceState.WAIT_SPEECH) return
if (waitSpeechFailStartMs == 0L) return
if (System.currentTimeMillis() - waitSpeechFailStartMs >
idleTimeoutSeconds * 1000
) {
Log.d(TAG, "⏱ WAIT_SPEECH timeout")
if (System.currentTimeMillis() - waitSpeechFailStartMs > idleTimeoutSeconds * 1000) {
Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
resetAll()
waitSpeechFailStartMs = 0L
}
}
@ -355,9 +282,7 @@ class VoiceController(
silenceStartMs = 0L
state = VoiceState.WAIT_SPEECH
if (waitSpeechFailStartMs == 0L) {
waitSpeechFailStartMs = System.currentTimeMillis()
}
if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis()
}
private fun resetAll() {
@ -367,11 +292,11 @@ class VoiceController(
resetEnergyStat()
vadStarted = false
silenceStartMs = 0L
waitSpeechStartMs = 0L // ⭐
waitSpeechStartMs = 0L
waitSpeechFailStartMs = 0L
state = VoiceState.WAIT_WAKEUP
}
fun release() {
wakeupManager.release()
vadManager.reset()
@ -387,9 +312,7 @@ class VoiceController(
private fun cachePreBuffer(samples: FloatArray) {
for (s in samples) {
preBuffer.addLast(s)
if (preBuffer.size > PRE_BUFFER_SIZE) {
preBuffer.removeFirst()
}
if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
}
}

View File

@ -161,7 +161,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
add(
VoiceBeanResp(
audioUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
audioUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
)
)
}
@ -169,17 +169,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
},
onFinalAudio = { audio ->
Log.d("lrs", "检测到语音,长度=${audio.size}")
// mViewModel?.uploadVoice(
// AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
// 1
// )
loadLocalJsonAndPlay()
val file = File(
getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
"xxx.wav"
mViewModel?.uploadVoice(
AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
1
)
AudioDebugUtil.saveFloatPcmAsWav(audio, file)
LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
loadLocalJsonAndPlay()
// val file = File(
// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
// "xxx.wav"
// )
// AudioDebugUtil.saveFloatPcmAsWav(audio, file)
// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
},
onStateChanged = { state ->
@ -343,7 +343,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
word: String,
audioUrl: String
) {
val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
val wakeupUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
if (audioUrl != wakeupUrl) return