优化后的代码
This commit is contained in:
parent
0261c5ccaa
commit
102afa291d
@ -3,6 +3,7 @@ package com.zs.smarthuman.sherpa
|
||||
import android.content.res.AssetManager
|
||||
import android.util.Log
|
||||
import kotlin.math.sqrt
|
||||
import java.util.ArrayDeque
|
||||
|
||||
class VoiceController(
|
||||
assetManager: AssetManager,
|
||||
@ -17,8 +18,6 @@ class VoiceController(
|
||||
private val TAG = "VoiceController"
|
||||
private val sampleRate = 16000
|
||||
|
||||
/* ================= 状态 ================= */
|
||||
|
||||
private var state: VoiceState = VoiceState.WAIT_WAKEUP
|
||||
set(value) {
|
||||
field = value
|
||||
@ -26,42 +25,29 @@ class VoiceController(
|
||||
onStateChanged?.invoke(value)
|
||||
}
|
||||
|
||||
/* ================= KWS ================= */
|
||||
|
||||
private val wakeupManager = WakeupManager(assetManager) {
|
||||
Log.d(TAG, "🔥 WakeWord detected")
|
||||
handleWakeupEvent()
|
||||
}
|
||||
|
||||
/* ================= VAD ================= */
|
||||
|
||||
private val vadManager = VadManager(
|
||||
assetManager,
|
||||
onSpeechStart = { onVadStart() },
|
||||
onSpeechEnd = {}
|
||||
)
|
||||
|
||||
/* ================= Buffer ================= */
|
||||
|
||||
private val audioBuffer = mutableListOf<Float>()
|
||||
|
||||
/** 前导音缓存(2 秒) */
|
||||
private val preBuffer = ArrayDeque<Float>()
|
||||
private val PRE_BUFFER_SIZE = sampleRate * 2
|
||||
|
||||
/* ================= 时间 ================= */
|
||||
|
||||
private var recordingStartMs = 0L
|
||||
private var silenceStartMs = 0L
|
||||
private var waitSpeechFailStartMs = 0L
|
||||
|
||||
/* ================= 近讲统计(⭐关键新增) ================= */
|
||||
private var waitSpeechStartMs = 0L
|
||||
|
||||
private var speechEnergySum = 0f
|
||||
private var speechFrameCount = 0
|
||||
|
||||
/* ================= 控制 ================= */
|
||||
|
||||
private var vadStarted = false
|
||||
|
||||
private var inKwsObserve = false
|
||||
@ -71,28 +57,17 @@ class VoiceController(
|
||||
private var speechEnableAtMs = 0L
|
||||
private val SPEECH_COOLDOWN_MS = 300L
|
||||
|
||||
/* ================= 阈值(⭐已校正) ================= */
|
||||
|
||||
private val RMS_SILENCE_THRESHOLD = 0.012f // 静音阈值(修正)
|
||||
private val RMS_SILENCE_THRESHOLD = 0.012f
|
||||
private val SILENCE_END_MS = 1200L
|
||||
private val MIN_SPEECH_MS = 1000L // 句子级
|
||||
private val MIN_AVG_ENERGY = 0.02f // 近讲能量门
|
||||
private val MIN_SPEECH_MS = 1000L
|
||||
private val MIN_AVG_ENERGY = 0.02f
|
||||
|
||||
|
||||
/** ⭐ 唤醒后等待人声起点 */
|
||||
private var waitSpeechStartMs = 0L
|
||||
|
||||
/** ⭐ 唤醒后最大等待时间(没说一句话) */
|
||||
private val WAIT_SPEECH_TIMEOUT_MS = 8000L
|
||||
|
||||
|
||||
|
||||
/* ================= 音频入口 ================= */
|
||||
|
||||
fun acceptAudio(samples: FloatArray) {
|
||||
|
||||
cachePreBuffer(samples)
|
||||
|
||||
wakeupManager.acceptAudio(samples)
|
||||
if (wakeupManager.consumeWakeupFlag()) {
|
||||
handleWakeupEvent()
|
||||
@ -102,7 +77,6 @@ class VoiceController(
|
||||
val now = System.currentTimeMillis()
|
||||
|
||||
when (state) {
|
||||
|
||||
VoiceState.WAIT_WAKEUP,
|
||||
VoiceState.PLAYING_PROMPT,
|
||||
VoiceState.PLAYING_BACKEND,
|
||||
@ -110,40 +84,38 @@ class VoiceController(
|
||||
|
||||
VoiceState.WAIT_SPEECH_COOLDOWN -> {
|
||||
if (now >= speechEnableAtMs) {
|
||||
waitSpeechFailStartMs = 0L // ⭐ 必须清
|
||||
waitSpeechFailStartMs = System.currentTimeMillis()
|
||||
state = VoiceState.WAIT_SPEECH
|
||||
waitSpeechStartMs = now // ⭐ 关键:开始等人说话
|
||||
waitSpeechStartMs = now
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
VoiceState.WAIT_SPEECH -> {
|
||||
|
||||
// ⭐ 唤醒后长时间没人说话 → 自动退出
|
||||
if (waitSpeechStartMs > 0 &&
|
||||
now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS
|
||||
) {
|
||||
Log.d(TAG, "⏱ Wakeup but no speech, exit to WAIT_WAKEUP")
|
||||
if (waitSpeechStartMs > 0 && now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS) {
|
||||
Log.d(TAG, "⏱ Wakeup but no speech → WAIT_WAKEUP")
|
||||
resetAll()
|
||||
return
|
||||
}
|
||||
|
||||
if (inKwsObserve) {
|
||||
if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return
|
||||
inKwsObserve = false
|
||||
if (waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutSeconds * 1000) {
|
||||
Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
|
||||
resetAll()
|
||||
return
|
||||
}
|
||||
|
||||
if (inKwsObserve && now - kwsObserveStartMs < KWS_OBSERVE_MS) return
|
||||
inKwsObserve = false
|
||||
|
||||
vadManager.accept(samples)
|
||||
}
|
||||
|
||||
|
||||
VoiceState.RECORDING -> {
|
||||
|
||||
audioBuffer.addAll(samples.asList())
|
||||
vadManager.accept(samples)
|
||||
|
||||
val rms = calcRms(samples)
|
||||
|
||||
if (rms > RMS_SILENCE_THRESHOLD) {
|
||||
speechEnergySum += rms
|
||||
speechFrameCount++
|
||||
@ -169,22 +141,20 @@ class VoiceController(
|
||||
|
||||
private fun handleWakeupEvent() {
|
||||
when (state) {
|
||||
|
||||
VoiceState.UPLOADING -> return
|
||||
|
||||
VoiceState.RECORDING,
|
||||
VoiceState.PLAYING_BACKEND -> {
|
||||
stopBackendAudio?.invoke()
|
||||
enterWakeup(interrupt = true)
|
||||
}
|
||||
|
||||
else -> enterWakeup(interrupt = false)
|
||||
}
|
||||
}
|
||||
|
||||
private fun enterWakeup(interrupt: Boolean) {
|
||||
waitSpeechFailStartMs = 0L // ⭐ 唤醒即新会话
|
||||
waitSpeechStartMs = 0L
|
||||
waitSpeechFailStartMs = System.currentTimeMillis()
|
||||
waitSpeechStartMs = System.currentTimeMillis()
|
||||
|
||||
if (interrupt) {
|
||||
audioBuffer.clear()
|
||||
vadManager.reset()
|
||||
@ -200,31 +170,23 @@ class VoiceController(
|
||||
onWakeup()
|
||||
}
|
||||
|
||||
/* ================= VAD START ================= */
|
||||
|
||||
private fun onVadStart() {
|
||||
if (state != VoiceState.WAIT_SPEECH) return
|
||||
|
||||
Log.d(TAG, "🎤 REAL VAD START")
|
||||
|
||||
vadStarted = true
|
||||
recordingStartMs = System.currentTimeMillis()
|
||||
silenceStartMs = 0L
|
||||
waitSpeechFailStartMs = 0L // ⭐ 新一轮有效说话
|
||||
waitSpeechStartMs = 0L // ⭐ 清掉“等待说话”超时
|
||||
resetEnergyStat()
|
||||
|
||||
audioBuffer.clear()
|
||||
audioBuffer.addAll(preBuffer)
|
||||
|
||||
state = VoiceState.RECORDING
|
||||
}
|
||||
|
||||
|
||||
/* ================= 结束录音(⭐核心) ================= */
|
||||
/* ================= 结束录音 ================= */
|
||||
|
||||
private fun finishSentence() {
|
||||
|
||||
val now = System.currentTimeMillis()
|
||||
val duration = now - recordingStartMs
|
||||
|
||||
@ -235,46 +197,28 @@ class VoiceController(
|
||||
}
|
||||
|
||||
val vadRatio = vadManager.activeSpeechRatio()
|
||||
val avgEnergy =
|
||||
if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
||||
|
||||
/* ================= 评分制判定 ================= */
|
||||
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
||||
|
||||
var score = 0
|
||||
|
||||
// 1️⃣ 时长评分(最重要)
|
||||
when {
|
||||
duration >= 4000 -> score += 3
|
||||
duration >= 2500 -> score += 2
|
||||
duration >= 1500 -> score += 1
|
||||
}
|
||||
|
||||
// 2️⃣ 能量评分(近讲人声强信号)
|
||||
when {
|
||||
avgEnergy >= 0.10f -> score += 3
|
||||
avgEnergy >= 0.06f -> score += 2
|
||||
avgEnergy >= MIN_AVG_ENERGY -> score += 1
|
||||
}
|
||||
|
||||
// 3️⃣ VAD 评分(只作为辅助)
|
||||
when {
|
||||
vadRatio >= 0.55f -> score += 2
|
||||
vadRatio >= 0.40f -> score += 1
|
||||
}
|
||||
|
||||
Log.d(
|
||||
TAG,
|
||||
"📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score"
|
||||
)
|
||||
Log.d(TAG, "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score")
|
||||
|
||||
/**
|
||||
* 评分阈值:
|
||||
* - >=4 : 必然是真实人声
|
||||
* - 3 : 在近讲/长句条件下允许
|
||||
* - <3 : 拦截
|
||||
*/
|
||||
val pass = when {
|
||||
score >= 4 -> true
|
||||
score >= 6 -> true
|
||||
score == 3 && avgEnergy >= 0.06f -> true
|
||||
else -> false
|
||||
}
|
||||
@ -285,33 +229,22 @@ class VoiceController(
|
||||
return
|
||||
}
|
||||
|
||||
/* ================= 通过,进入上传 ================= */
|
||||
|
||||
waitSpeechFailStartMs = 0L
|
||||
|
||||
val finalAudio = audioBuffer.toFloatArray()
|
||||
audioBuffer.clear()
|
||||
|
||||
state = VoiceState.UPLOADING
|
||||
onFinalAudio(finalAudio)
|
||||
}
|
||||
|
||||
|
||||
/* ================= 播放回调 ================= */
|
||||
|
||||
fun onPlayStartPrompt() {
|
||||
state = VoiceState.PLAYING_PROMPT
|
||||
}
|
||||
|
||||
fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT } // ⭐ 补全
|
||||
fun onPlayEndPrompt() {
|
||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||
}
|
||||
|
||||
fun onPlayStartBackend() {
|
||||
state = VoiceState.PLAYING_BACKEND
|
||||
}
|
||||
|
||||
fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND } // ⭐ 补全
|
||||
fun onPlayEndBackend() {
|
||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||
@ -321,10 +254,8 @@ class VoiceController(
|
||||
|
||||
fun onUploadFinished(success: Boolean) {
|
||||
if (state != VoiceState.UPLOADING) return
|
||||
|
||||
state = if (success) {
|
||||
VoiceState.PLAYING_BACKEND
|
||||
} else {
|
||||
state = if (success) VoiceState.PLAYING_BACKEND
|
||||
else {
|
||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||
VoiceState.WAIT_SPEECH_COOLDOWN
|
||||
}
|
||||
@ -335,13 +266,9 @@ class VoiceController(
|
||||
fun checkIdleTimeout() {
|
||||
if (state != VoiceState.WAIT_SPEECH) return
|
||||
if (waitSpeechFailStartMs == 0L) return
|
||||
|
||||
if (System.currentTimeMillis() - waitSpeechFailStartMs >
|
||||
idleTimeoutSeconds * 1000
|
||||
) {
|
||||
Log.d(TAG, "⏱ WAIT_SPEECH timeout")
|
||||
if (System.currentTimeMillis() - waitSpeechFailStartMs > idleTimeoutSeconds * 1000) {
|
||||
Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
|
||||
resetAll()
|
||||
waitSpeechFailStartMs = 0L
|
||||
}
|
||||
}
|
||||
|
||||
@ -355,9 +282,7 @@ class VoiceController(
|
||||
silenceStartMs = 0L
|
||||
state = VoiceState.WAIT_SPEECH
|
||||
|
||||
if (waitSpeechFailStartMs == 0L) {
|
||||
waitSpeechFailStartMs = System.currentTimeMillis()
|
||||
}
|
||||
if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis()
|
||||
}
|
||||
|
||||
private fun resetAll() {
|
||||
@ -367,11 +292,11 @@ class VoiceController(
|
||||
resetEnergyStat()
|
||||
vadStarted = false
|
||||
silenceStartMs = 0L
|
||||
waitSpeechStartMs = 0L // ⭐
|
||||
waitSpeechStartMs = 0L
|
||||
waitSpeechFailStartMs = 0L
|
||||
state = VoiceState.WAIT_WAKEUP
|
||||
}
|
||||
|
||||
|
||||
fun release() {
|
||||
wakeupManager.release()
|
||||
vadManager.reset()
|
||||
@ -387,9 +312,7 @@ class VoiceController(
|
||||
private fun cachePreBuffer(samples: FloatArray) {
|
||||
for (s in samples) {
|
||||
preBuffer.addLast(s)
|
||||
if (preBuffer.size > PRE_BUFFER_SIZE) {
|
||||
preBuffer.removeFirst()
|
||||
}
|
||||
if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -161,7 +161,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
|
||||
add(
|
||||
VoiceBeanResp(
|
||||
audioUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
|
||||
audioUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
|
||||
)
|
||||
)
|
||||
}
|
||||
@ -169,17 +169,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
},
|
||||
onFinalAudio = { audio ->
|
||||
Log.d("lrs", "检测到语音,长度=${audio.size}")
|
||||
// mViewModel?.uploadVoice(
|
||||
// AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
||||
// 1
|
||||
// )
|
||||
loadLocalJsonAndPlay()
|
||||
val file = File(
|
||||
getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
||||
"xxx.wav"
|
||||
mViewModel?.uploadVoice(
|
||||
AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
||||
1
|
||||
)
|
||||
AudioDebugUtil.saveFloatPcmAsWav(audio, file)
|
||||
LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
|
||||
loadLocalJsonAndPlay()
|
||||
// val file = File(
|
||||
// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
||||
// "xxx.wav"
|
||||
// )
|
||||
// AudioDebugUtil.saveFloatPcmAsWav(audio, file)
|
||||
// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
|
||||
|
||||
},
|
||||
onStateChanged = { state ->
|
||||
@ -343,7 +343,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
word: String,
|
||||
audioUrl: String
|
||||
) {
|
||||
val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
|
||||
val wakeupUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
|
||||
|
||||
if (audioUrl != wakeupUrl) return
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user