优化后的代码

This commit is contained in:
林若思 2026-01-03 10:11:28 +08:00
parent f8812b6a48
commit 956dd78c1b
5 changed files with 197 additions and 70 deletions

View File

@ -52,7 +52,7 @@
tools:targetApi="31"> tools:targetApi="31">
<activity <activity
android:name=".ui.MainActivity" android:name=".ui.SplashActivity"
android:exported="true" android:exported="true"
android:theme="@style/Theme.Splash" android:theme="@style/Theme.Splash"
android:screenOrientation="portrait"> android:screenOrientation="portrait">
@ -66,9 +66,9 @@
</intent-filter> </intent-filter>
</activity> </activity>
<!--<activity <activity
android:name="com.zs.smarthuman.ui.MainActivity" android:name="com.zs.smarthuman.ui.MainActivity"
android:screenOrientation="portrait"/>--> android:screenOrientation="portrait"/>
<activity <activity
android:name="com.zs.smarthuman.ui.ActivateActivity" android:name="com.zs.smarthuman.ui.ActivateActivity"
android:screenOrientation="portrait"/> android:screenOrientation="portrait"/>

View File

@ -4,11 +4,6 @@ import android.content.res.AssetManager
import com.k2fsa.sherpa.onnx.Vad import com.k2fsa.sherpa.onnx.Vad
import com.k2fsa.sherpa.onnx.getVadModelConfig import com.k2fsa.sherpa.onnx.getVadModelConfig
/**
* @description:
* @author: lrs
* @date: 2025/12/17 10:22
*/
class VadManager( class VadManager(
assetManager: AssetManager, assetManager: AssetManager,
private val onSpeechStart: () -> Unit, private val onSpeechStart: () -> Unit,
@ -19,9 +14,13 @@ class VadManager(
private var isSpeaking = false private var isSpeaking = false
private var lastSpeechTime = 0L private var lastSpeechTime = 0L
// ⭐ 统计用 /** ⭐ 仅统计“有效语音段” */
private var speechFrameCount = 0 private var activeFrameCount = 0
private var totalFrameCount = 0 private var activeSpeechFrameCount = 0
/** ⭐ 用于调试(可选) */
private var rawFrameCount = 0
private var rawSpeechFrameCount = 0
private val END_SILENCE_MS = 600L private val END_SILENCE_MS = 600L
@ -37,36 +36,58 @@ class VadManager(
vad.acceptWaveform(samples) vad.acceptWaveform(samples)
val hasSpeech = vad.isSpeechDetected() val hasSpeech = vad.isSpeechDetected()
totalFrameCount++ /* ===== raw 统计(仅日志) ===== */
rawFrameCount++
if (hasSpeech) rawSpeechFrameCount++
if (hasSpeech) { if (hasSpeech) {
speechFrameCount++
lastSpeechTime = now lastSpeechTime = now
if (!isSpeaking) { if (!isSpeaking) {
isSpeaking = true isSpeaking = true
onSpeechStart() onSpeechStart()
} }
activeFrameCount++
activeSpeechFrameCount++
} else { } else {
if (isSpeaking && now - lastSpeechTime >= END_SILENCE_MS) { if (isSpeaking) {
activeFrameCount++
if (now - lastSpeechTime >= END_SILENCE_MS) {
isSpeaking = false isSpeaking = false
onSpeechEnd() onSpeechEnd()
vad.clear() }
} }
} }
} }
/** 👉 人声占比(真正用到 VAD 的地方) */ /**
fun speechRatio(): Float { * 真正用于判断是不是有效人声
if (totalFrameCount == 0) return 0f * 只统计 VAD 激活期间
return speechFrameCount.toFloat() / totalFrameCount */
fun activeSpeechRatio(): Float {
if (activeFrameCount == 0) return 0f
return activeSpeechFrameCount.toFloat() / activeFrameCount
}
/**
* 仅用于调参观察
*/
fun rawSpeechRatio(): Float {
if (rawFrameCount == 0) return 0f
return rawSpeechFrameCount.toFloat() / rawFrameCount
} }
fun reset() { fun reset() {
isSpeaking = false isSpeaking = false
lastSpeechTime = 0 lastSpeechTime = 0L
speechFrameCount = 0
totalFrameCount = 0 activeFrameCount = 0
activeSpeechFrameCount = 0
rawFrameCount = 0
rawSpeechFrameCount = 0
vad.reset() vad.reset()
} }
} }

View File

@ -8,7 +8,7 @@ class VoiceController(
assetManager: AssetManager, assetManager: AssetManager,
private val onWakeup: () -> Unit, private val onWakeup: () -> Unit,
private val onFinalAudio: (FloatArray) -> Unit, private val onFinalAudio: (FloatArray) -> Unit,
private val idleTimeoutSeconds: Int = 5, private val idleTimeoutSeconds: Int = 8,
private val maxRecordingSeconds: Int = 10, private val maxRecordingSeconds: Int = 10,
private val onStateChanged: ((VoiceState) -> Unit)? = null, private val onStateChanged: ((VoiceState) -> Unit)? = null,
private val stopBackendAudio: (() -> Unit)? = null private val stopBackendAudio: (() -> Unit)? = null
@ -53,28 +53,39 @@ class VoiceController(
private var recordingStartMs = 0L private var recordingStartMs = 0L
private var silenceStartMs = 0L private var silenceStartMs = 0L
/** ⭐ WAIT_SPEECH 连续失败起点(关键) */
private var waitSpeechFailStartMs = 0L private var waitSpeechFailStartMs = 0L
/* ================= 近讲统计(⭐关键新增) ================= */
private var speechEnergySum = 0f
private var speechFrameCount = 0
/* ================= 控制 ================= */ /* ================= 控制 ================= */
private var vadStarted = false private var vadStarted = false
/** 唤醒观察期 */
private var inKwsObserve = false private var inKwsObserve = false
private var kwsObserveStartMs = 0L private var kwsObserveStartMs = 0L
private val KWS_OBSERVE_MS = 500L private val KWS_OBSERVE_MS = 500L
/** 播放冷却 */
private var speechEnableAtMs = 0L private var speechEnableAtMs = 0L
private val SPEECH_COOLDOWN_MS = 300L private val SPEECH_COOLDOWN_MS = 300L
/* ================= 阈值 ================= */ /* ================= 阈值(⭐已校正) ================= */
private val RMS_SILENCE_THRESHOLD = 0.005f private val RMS_SILENCE_THRESHOLD = 0.012f // 静音阈值(修正)
private val SILENCE_END_MS = 1200L private val SILENCE_END_MS = 1200L
private val MIN_SPEECH_MS = 300L private val MIN_SPEECH_MS = 1000L // 句子级
private val MIN_AVG_ENERGY = 0.02f // 近讲能量门
/** ⭐ 唤醒后等待人声起点 */
private var waitSpeechStartMs = 0L
/** ⭐ 唤醒后最大等待时间(没说一句话) */
private val WAIT_SPEECH_TIMEOUT_MS = 8000L
/* ================= 音频入口 ================= */ /* ================= 音频入口 ================= */
@ -100,12 +111,22 @@ class VoiceController(
VoiceState.WAIT_SPEECH_COOLDOWN -> { VoiceState.WAIT_SPEECH_COOLDOWN -> {
if (now >= speechEnableAtMs) { if (now >= speechEnableAtMs) {
state = VoiceState.WAIT_SPEECH state = VoiceState.WAIT_SPEECH
waitSpeechStartMs = now // ⭐ 关键:开始等人说话
} }
return return
} }
VoiceState.WAIT_SPEECH -> { VoiceState.WAIT_SPEECH -> {
// ⭐ 唤醒后长时间没人说话 → 自动退出
if (waitSpeechStartMs > 0 &&
now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS
) {
Log.d(TAG, "⏱ Wakeup but no speech, exit to WAIT_WAKEUP")
resetAll()
return
}
if (inKwsObserve) { if (inKwsObserve) {
if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return
inKwsObserve = false inKwsObserve = false
@ -114,26 +135,30 @@ class VoiceController(
vadManager.accept(samples) vadManager.accept(samples)
} }
VoiceState.RECORDING -> { VoiceState.RECORDING -> {
audioBuffer.addAll(samples.asList()) audioBuffer.addAll(samples.asList())
vadManager.accept(samples) vadManager.accept(samples)
if (now - recordingStartMs > maxRecordingSeconds * 1000) { val rms = calcRms(samples)
Log.w(TAG, "⏱ Max recording reached")
if (rms > RMS_SILENCE_THRESHOLD) {
speechEnergySum += rms
speechFrameCount++
silenceStartMs = 0L
} else {
if (silenceStartMs == 0L) silenceStartMs = now
else if (now - silenceStartMs >= SILENCE_END_MS) {
Log.d(TAG, "🔇 Silence end")
finishSentence() finishSentence()
return return
} }
val rms = calcRms(samples)
if (rms < RMS_SILENCE_THRESHOLD) {
if (silenceStartMs == 0L) silenceStartMs = now
else if (now - silenceStartMs >= SILENCE_END_MS) {
Log.d(TAG, "🔇 RMS silence end")
finishSentence()
} }
} else {
silenceStartMs = 0L if (now - recordingStartMs > maxRecordingSeconds * 1000) {
Log.w(TAG, "⏱ Max recording reached")
finishSentence()
} }
} }
} }
@ -146,8 +171,12 @@ class VoiceController(
VoiceState.UPLOADING -> return VoiceState.UPLOADING -> return
// ⭐ 关键:只要不是纯等待唤醒,一律打断
VoiceState.RECORDING, VoiceState.RECORDING,
VoiceState.WAIT_SPEECH,
VoiceState.WAIT_SPEECH_COOLDOWN,
VoiceState.PLAYING_BACKEND -> { VoiceState.PLAYING_BACKEND -> {
Log.d(TAG, "⚠ WakeWord interrupt state=$state")
stopBackendAudio?.invoke() stopBackendAudio?.invoke()
enterWakeup(interrupt = true) enterWakeup(interrupt = true)
} }
@ -156,13 +185,21 @@ class VoiceController(
} }
} }
private fun enterWakeup(interrupt: Boolean) { private fun enterWakeup(interrupt: Boolean) {
if (interrupt) { if (interrupt) {
Log.d(TAG, "🛑 Interrupt current speech / recording")
audioBuffer.clear() audioBuffer.clear()
preBuffer.clear() // ⭐ 防止把旧唤醒词带进去
vadManager.reset() vadManager.reset()
resetEnergyStat()
vadStarted = false vadStarted = false
silenceStartMs = 0L silenceStartMs = 0L
waitSpeechStartMs = 0L // ⭐
waitSpeechFailStartMs = 0L // ⭐
} }
inKwsObserve = true inKwsObserve = true
@ -172,6 +209,7 @@ class VoiceController(
onWakeup() onWakeup()
} }
/* ================= VAD START ================= */ /* ================= VAD START ================= */
private fun onVadStart() { private fun onVadStart() {
@ -182,6 +220,8 @@ class VoiceController(
vadStarted = true vadStarted = true
recordingStartMs = System.currentTimeMillis() recordingStartMs = System.currentTimeMillis()
silenceStartMs = 0L silenceStartMs = 0L
waitSpeechStartMs = 0L // ⭐ 清掉“等待说话”超时
resetEnergyStat()
audioBuffer.clear() audioBuffer.clear()
audioBuffer.addAll(preBuffer) audioBuffer.addAll(preBuffer)
@ -189,26 +229,73 @@ class VoiceController(
state = VoiceState.RECORDING state = VoiceState.RECORDING
} }
/* ================= 结束录音 ================= */
/* ================= 结束录音(⭐核心) ================= */
private fun finishSentence() { private fun finishSentence() {
val duration = System.currentTimeMillis() - recordingStartMs val now = System.currentTimeMillis()
val duration = now - recordingStartMs
if (!vadStarted || duration < MIN_SPEECH_MS) { if (!vadStarted || duration < MIN_SPEECH_MS) {
Log.d(TAG, "❌ Too short or no VAD start: ${duration}ms")
resetToWaitSpeech() resetToWaitSpeech()
return return
} }
val vadRatio = vadManager.speechRatio() val vadRatio = vadManager.activeSpeechRatio()
Log.d(TAG, "🎙 VAD speech ratio=$vadRatio") val avgEnergy =
if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
if (vadRatio < 0.25f) { /* ================= 评分制判定 ================= */
Log.d(TAG, "❌ VAD says NOT human speech")
var score = 0
// 1⃣ 时长评分(最重要)
when {
duration >= 4000 -> score += 3
duration >= 2500 -> score += 2
duration >= 1500 -> score += 1
}
// 2⃣ 能量评分(近讲人声强信号)
when {
avgEnergy >= 0.10f -> score += 3
avgEnergy >= 0.06f -> score += 2
avgEnergy >= MIN_AVG_ENERGY -> score += 1
}
// 3⃣ VAD 评分(只作为辅助)
when {
vadRatio >= 0.55f -> score += 2
vadRatio >= 0.40f -> score += 1
}
Log.d(
TAG,
"📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score"
)
/**
* 评分阈值
* - >=4 : 必然是真实人声
* - 3 : 在近讲/长句条件下允许
* - <3 : 拦截
*/
val pass = when {
score >= 4 -> true
score == 3 && avgEnergy >= 0.06f -> true
else -> false
}
if (!pass) {
Log.d(TAG, "❌ Sentence rejected (score=$score)")
resetToWaitSpeech() resetToWaitSpeech()
return return
} }
// ✅ 成功一次,清空失败计时 /* ================= 通过,进入上传 ================= */
waitSpeechFailStartMs = 0L waitSpeechFailStartMs = 0L
val finalAudio = audioBuffer.toFloatArray() val finalAudio = audioBuffer.toFloatArray()
@ -218,6 +305,7 @@ class VoiceController(
onFinalAudio(finalAudio) onFinalAudio(finalAudio)
} }
/* ================= 播放回调 ================= */ /* ================= 播放回调 ================= */
fun onPlayStartPrompt() { fun onPlayStartPrompt() {
@ -238,7 +326,7 @@ class VoiceController(
state = VoiceState.WAIT_SPEECH_COOLDOWN state = VoiceState.WAIT_SPEECH_COOLDOWN
} }
/* ================= 上传回调(保留 public ================= */ /* ================= 上传回调 ================= */
fun onUploadFinished(success: Boolean) { fun onUploadFinished(success: Boolean) {
if (state != VoiceState.UPLOADING) return if (state != VoiceState.UPLOADING) return
@ -251,15 +339,16 @@ class VoiceController(
} }
} }
/* ================= Idle 超时(关键修复) ================= */ /* ================= Idle 超时 ================= */
fun checkIdleTimeout() { fun checkIdleTimeout() {
if (state != VoiceState.WAIT_SPEECH) return if (state != VoiceState.WAIT_SPEECH) return
if (waitSpeechFailStartMs == 0L) return if (waitSpeechFailStartMs == 0L) return
val now = System.currentTimeMillis() if (System.currentTimeMillis() - waitSpeechFailStartMs >
if (now - waitSpeechFailStartMs > idleTimeoutSeconds * 1000) { idleTimeoutSeconds * 1000
Log.d(TAG, "⏱ WAIT_SPEECH continuous fail timeout") ) {
Log.d(TAG, "⏱ WAIT_SPEECH timeout")
resetAll() resetAll()
waitSpeechFailStartMs = 0L waitSpeechFailStartMs = 0L
} }
@ -270,11 +359,11 @@ class VoiceController(
private fun resetToWaitSpeech() { private fun resetToWaitSpeech() {
audioBuffer.clear() audioBuffer.clear()
vadManager.reset() vadManager.reset()
resetEnergyStat()
vadStarted = false vadStarted = false
silenceStartMs = 0L silenceStartMs = 0L
state = VoiceState.WAIT_SPEECH state = VoiceState.WAIT_SPEECH
// ⭐ 只在第一次失败时记录
if (waitSpeechFailStartMs == 0L) { if (waitSpeechFailStartMs == 0L) {
waitSpeechFailStartMs = System.currentTimeMillis() waitSpeechFailStartMs = System.currentTimeMillis()
} }
@ -284,11 +373,14 @@ class VoiceController(
audioBuffer.clear() audioBuffer.clear()
preBuffer.clear() preBuffer.clear()
vadManager.reset() vadManager.reset()
resetEnergyStat()
vadStarted = false vadStarted = false
silenceStartMs = 0L silenceStartMs = 0L
waitSpeechStartMs = 0L // ⭐
state = VoiceState.WAIT_WAKEUP state = VoiceState.WAIT_WAKEUP
} }
fun release() { fun release() {
wakeupManager.release() wakeupManager.release()
vadManager.reset() vadManager.reset()
@ -296,6 +388,11 @@ class VoiceController(
/* ================= Utils ================= */ /* ================= Utils ================= */
private fun resetEnergyStat() {
speechEnergySum = 0f
speechFrameCount = 0
}
private fun cachePreBuffer(samples: FloatArray) { private fun cachePreBuffer(samples: FloatArray) {
for (s in samples) { for (s in samples) {
preBuffer.addLast(s) preBuffer.addLast(s)

View File

@ -161,7 +161,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
voiceInfo = mutableListOf<VoiceBeanResp>().apply { voiceInfo = mutableListOf<VoiceBeanResp>().apply {
add( add(
VoiceBeanResp( VoiceBeanResp(
audioUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?:*/ "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" audioUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
) )
) )
} }
@ -187,8 +187,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
VoiceState.WAIT_WAKEUP -> { VoiceState.WAIT_WAKEUP -> {
Log.d("lrs", "当前状态: 等待唤醒") Log.d("lrs", "当前状态: 等待唤醒")
lifecycleScope.launch(Dispatchers.Main) { lifecycleScope.launch(Dispatchers.Main) {
mVerticalAnimator?.hide() mVerticalAnimator?.hide()
UnityPlayerHolder.getInstance()
.sendVoiceToUnity(
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
add(
VoiceBeanResp(
audioUrl = "https://static.seerteach.net/aidialogue/userWakeUpAudio/ttsmaker-file-2025-12-31-16-2-51.mp3"
)
)
}
)
} }
} }
@ -334,9 +343,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
word: String, word: String,
audioUrl: String audioUrl: String
) { ) {
// val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: return val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
//
// if (audioUrl != wakeupUrl) return if (audioUrl != wakeupUrl) return
when (state) { when (state) {
1 -> { // play 1 -> { // play

View File

@ -25,14 +25,14 @@ object SerialNumberUtil {
* 外部调用获取最终序列号 * 外部调用获取最终序列号
*/ */
fun getSerialNumber(): String { fun getSerialNumber(): String {
for (key in snKeys) { // for (key in snKeys) {
val sn = getProp(key) // val sn = getProp(key)
if (!sn.isNullOrBlank()) { // if (!sn.isNullOrBlank()) {
return limitSerialDigit(sn) // return limitSerialDigit(sn)
} // }
} // }
return "" // return ""
// return "zd09312051870556" return "zd09312051870556"
} }
/** /**