优化后的代码

This commit is contained in:
林若思 2026-01-03 10:11:28 +08:00
parent f8812b6a48
commit 956dd78c1b
5 changed files with 197 additions and 70 deletions

View File

@ -52,7 +52,7 @@
tools:targetApi="31">
<activity
android:name=".ui.MainActivity"
android:name=".ui.SplashActivity"
android:exported="true"
android:theme="@style/Theme.Splash"
android:screenOrientation="portrait">
@ -66,9 +66,9 @@
</intent-filter>
</activity>
<!--<activity
<activity
android:name="com.zs.smarthuman.ui.MainActivity"
android:screenOrientation="portrait"/>-->
android:screenOrientation="portrait"/>
<activity
android:name="com.zs.smarthuman.ui.ActivateActivity"
android:screenOrientation="portrait"/>

View File

@ -4,11 +4,6 @@ import android.content.res.AssetManager
import com.k2fsa.sherpa.onnx.Vad
import com.k2fsa.sherpa.onnx.getVadModelConfig
/**
* @description:
* @author: lrs
* @date: 2025/12/17 10:22
*/
class VadManager(
assetManager: AssetManager,
private val onSpeechStart: () -> Unit,
@ -19,9 +14,13 @@ class VadManager(
private var isSpeaking = false
private var lastSpeechTime = 0L
// ⭐ 统计用
private var speechFrameCount = 0
private var totalFrameCount = 0
/** ⭐ 仅统计“有效语音段” */
private var activeFrameCount = 0
private var activeSpeechFrameCount = 0
/** ⭐ 用于调试(可选) */
private var rawFrameCount = 0
private var rawSpeechFrameCount = 0
private val END_SILENCE_MS = 600L
@ -37,36 +36,58 @@ class VadManager(
vad.acceptWaveform(samples)
val hasSpeech = vad.isSpeechDetected()
totalFrameCount++
/* ===== raw 统计(仅日志) ===== */
rawFrameCount++
if (hasSpeech) rawSpeechFrameCount++
if (hasSpeech) {
speechFrameCount++
lastSpeechTime = now
if (!isSpeaking) {
isSpeaking = true
onSpeechStart()
}
activeFrameCount++
activeSpeechFrameCount++
} else {
if (isSpeaking && now - lastSpeechTime >= END_SILENCE_MS) {
if (isSpeaking) {
activeFrameCount++
if (now - lastSpeechTime >= END_SILENCE_MS) {
isSpeaking = false
onSpeechEnd()
vad.clear()
}
}
}
}
/** 👉 人声占比(真正用到 VAD 的地方) */
fun speechRatio(): Float {
if (totalFrameCount == 0) return 0f
return speechFrameCount.toFloat() / totalFrameCount
/**
* 真正用于判断是不是有效人声
* 只统计 VAD 激活期间
*/
fun activeSpeechRatio(): Float {
if (activeFrameCount == 0) return 0f
return activeSpeechFrameCount.toFloat() / activeFrameCount
}
/**
* 仅用于调参观察
*/
fun rawSpeechRatio(): Float {
if (rawFrameCount == 0) return 0f
return rawSpeechFrameCount.toFloat() / rawFrameCount
}
fun reset() {
isSpeaking = false
lastSpeechTime = 0
speechFrameCount = 0
totalFrameCount = 0
lastSpeechTime = 0L
activeFrameCount = 0
activeSpeechFrameCount = 0
rawFrameCount = 0
rawSpeechFrameCount = 0
vad.reset()
}
}

View File

@ -8,7 +8,7 @@ class VoiceController(
assetManager: AssetManager,
private val onWakeup: () -> Unit,
private val onFinalAudio: (FloatArray) -> Unit,
private val idleTimeoutSeconds: Int = 5,
private val idleTimeoutSeconds: Int = 8,
private val maxRecordingSeconds: Int = 10,
private val onStateChanged: ((VoiceState) -> Unit)? = null,
private val stopBackendAudio: (() -> Unit)? = null
@ -53,28 +53,39 @@ class VoiceController(
private var recordingStartMs = 0L
private var silenceStartMs = 0L
/** ⭐ WAIT_SPEECH 连续失败起点(关键) */
private var waitSpeechFailStartMs = 0L
/* ================= 近讲统计(⭐关键新增) ================= */
private var speechEnergySum = 0f
private var speechFrameCount = 0
/* ================= 控制 ================= */
private var vadStarted = false
/** 唤醒观察期 */
private var inKwsObserve = false
private var kwsObserveStartMs = 0L
private val KWS_OBSERVE_MS = 500L
/** 播放冷却 */
private var speechEnableAtMs = 0L
private val SPEECH_COOLDOWN_MS = 300L
/* ================= 阈值 ================= */
/* ================= 阈值(⭐已校正) ================= */
private val RMS_SILENCE_THRESHOLD = 0.005f
private val RMS_SILENCE_THRESHOLD = 0.012f // 静音阈值(修正)
private val SILENCE_END_MS = 1200L
private val MIN_SPEECH_MS = 300L
private val MIN_SPEECH_MS = 1000L // 句子级
private val MIN_AVG_ENERGY = 0.02f // 近讲能量门
/** ⭐ 唤醒后等待人声起点 */
private var waitSpeechStartMs = 0L
/** ⭐ 唤醒后最大等待时间(没说一句话) */
private val WAIT_SPEECH_TIMEOUT_MS = 8000L
/* ================= 音频入口 ================= */
@ -100,12 +111,22 @@ class VoiceController(
VoiceState.WAIT_SPEECH_COOLDOWN -> {
if (now >= speechEnableAtMs) {
state = VoiceState.WAIT_SPEECH
waitSpeechStartMs = now // ⭐ 关键:开始等人说话
}
return
}
VoiceState.WAIT_SPEECH -> {
// ⭐ 唤醒后长时间没人说话 → 自动退出
if (waitSpeechStartMs > 0 &&
now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS
) {
Log.d(TAG, "⏱ Wakeup but no speech, exit to WAIT_WAKEUP")
resetAll()
return
}
if (inKwsObserve) {
if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return
inKwsObserve = false
@ -114,26 +135,30 @@ class VoiceController(
vadManager.accept(samples)
}
VoiceState.RECORDING -> {
audioBuffer.addAll(samples.asList())
vadManager.accept(samples)
if (now - recordingStartMs > maxRecordingSeconds * 1000) {
Log.w(TAG, "⏱ Max recording reached")
val rms = calcRms(samples)
if (rms > RMS_SILENCE_THRESHOLD) {
speechEnergySum += rms
speechFrameCount++
silenceStartMs = 0L
} else {
if (silenceStartMs == 0L) silenceStartMs = now
else if (now - silenceStartMs >= SILENCE_END_MS) {
Log.d(TAG, "🔇 Silence end")
finishSentence()
return
}
val rms = calcRms(samples)
if (rms < RMS_SILENCE_THRESHOLD) {
if (silenceStartMs == 0L) silenceStartMs = now
else if (now - silenceStartMs >= SILENCE_END_MS) {
Log.d(TAG, "🔇 RMS silence end")
finishSentence()
}
} else {
silenceStartMs = 0L
if (now - recordingStartMs > maxRecordingSeconds * 1000) {
Log.w(TAG, "⏱ Max recording reached")
finishSentence()
}
}
}
@ -146,8 +171,12 @@ class VoiceController(
VoiceState.UPLOADING -> return
// ⭐ 关键:只要不是纯等待唤醒,一律打断
VoiceState.RECORDING,
VoiceState.WAIT_SPEECH,
VoiceState.WAIT_SPEECH_COOLDOWN,
VoiceState.PLAYING_BACKEND -> {
Log.d(TAG, "⚠ WakeWord interrupt state=$state")
stopBackendAudio?.invoke()
enterWakeup(interrupt = true)
}
@ -156,13 +185,21 @@ class VoiceController(
}
}
private fun enterWakeup(interrupt: Boolean) {
if (interrupt) {
Log.d(TAG, "🛑 Interrupt current speech / recording")
audioBuffer.clear()
preBuffer.clear() // ⭐ 防止把旧唤醒词带进去
vadManager.reset()
resetEnergyStat()
vadStarted = false
silenceStartMs = 0L
waitSpeechStartMs = 0L // ⭐
waitSpeechFailStartMs = 0L // ⭐
}
inKwsObserve = true
@ -172,6 +209,7 @@ class VoiceController(
onWakeup()
}
/* ================= VAD START ================= */
private fun onVadStart() {
@ -182,6 +220,8 @@ class VoiceController(
vadStarted = true
recordingStartMs = System.currentTimeMillis()
silenceStartMs = 0L
waitSpeechStartMs = 0L // ⭐ 清掉“等待说话”超时
resetEnergyStat()
audioBuffer.clear()
audioBuffer.addAll(preBuffer)
@ -189,26 +229,73 @@ class VoiceController(
state = VoiceState.RECORDING
}
/* ================= 结束录音 ================= */
/* ================= 结束录音(⭐核心) ================= */
private fun finishSentence() {
val duration = System.currentTimeMillis() - recordingStartMs
val now = System.currentTimeMillis()
val duration = now - recordingStartMs
if (!vadStarted || duration < MIN_SPEECH_MS) {
Log.d(TAG, "❌ Too short or no VAD start: ${duration}ms")
resetToWaitSpeech()
return
}
val vadRatio = vadManager.speechRatio()
Log.d(TAG, "🎙 VAD speech ratio=$vadRatio")
val vadRatio = vadManager.activeSpeechRatio()
val avgEnergy =
if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
if (vadRatio < 0.25f) {
Log.d(TAG, "❌ VAD says NOT human speech")
/* ================= 评分制判定 ================= */
var score = 0
// 1⃣ 时长评分(最重要)
when {
duration >= 4000 -> score += 3
duration >= 2500 -> score += 2
duration >= 1500 -> score += 1
}
// 2⃣ 能量评分(近讲人声强信号)
when {
avgEnergy >= 0.10f -> score += 3
avgEnergy >= 0.06f -> score += 2
avgEnergy >= MIN_AVG_ENERGY -> score += 1
}
// 3⃣ VAD 评分(只作为辅助)
when {
vadRatio >= 0.55f -> score += 2
vadRatio >= 0.40f -> score += 1
}
Log.d(
TAG,
"📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score"
)
/**
* 评分阈值
* - >=4 : 必然是真实人声
* - 3 : 在近讲/长句条件下允许
* - <3 : 拦截
*/
val pass = when {
score >= 4 -> true
score == 3 && avgEnergy >= 0.06f -> true
else -> false
}
if (!pass) {
Log.d(TAG, "❌ Sentence rejected (score=$score)")
resetToWaitSpeech()
return
}
// ✅ 成功一次,清空失败计时
/* ================= 通过,进入上传 ================= */
waitSpeechFailStartMs = 0L
val finalAudio = audioBuffer.toFloatArray()
@ -218,6 +305,7 @@ class VoiceController(
onFinalAudio(finalAudio)
}
/* ================= 播放回调 ================= */
fun onPlayStartPrompt() {
@ -238,7 +326,7 @@ class VoiceController(
state = VoiceState.WAIT_SPEECH_COOLDOWN
}
/* ================= 上传回调(保留 public ================= */
/* ================= 上传回调 ================= */
fun onUploadFinished(success: Boolean) {
if (state != VoiceState.UPLOADING) return
@ -251,15 +339,16 @@ class VoiceController(
}
}
/* ================= Idle 超时(关键修复) ================= */
/* ================= Idle 超时 ================= */
fun checkIdleTimeout() {
if (state != VoiceState.WAIT_SPEECH) return
if (waitSpeechFailStartMs == 0L) return
val now = System.currentTimeMillis()
if (now - waitSpeechFailStartMs > idleTimeoutSeconds * 1000) {
Log.d(TAG, "⏱ WAIT_SPEECH continuous fail timeout")
if (System.currentTimeMillis() - waitSpeechFailStartMs >
idleTimeoutSeconds * 1000
) {
Log.d(TAG, "⏱ WAIT_SPEECH timeout")
resetAll()
waitSpeechFailStartMs = 0L
}
@ -270,11 +359,11 @@ class VoiceController(
private fun resetToWaitSpeech() {
audioBuffer.clear()
vadManager.reset()
resetEnergyStat()
vadStarted = false
silenceStartMs = 0L
state = VoiceState.WAIT_SPEECH
// ⭐ 只在第一次失败时记录
if (waitSpeechFailStartMs == 0L) {
waitSpeechFailStartMs = System.currentTimeMillis()
}
@ -284,11 +373,14 @@ class VoiceController(
audioBuffer.clear()
preBuffer.clear()
vadManager.reset()
resetEnergyStat()
vadStarted = false
silenceStartMs = 0L
waitSpeechStartMs = 0L // ⭐
state = VoiceState.WAIT_WAKEUP
}
fun release() {
wakeupManager.release()
vadManager.reset()
@ -296,6 +388,11 @@ class VoiceController(
/* ================= Utils ================= */
private fun resetEnergyStat() {
speechEnergySum = 0f
speechFrameCount = 0
}
private fun cachePreBuffer(samples: FloatArray) {
for (s in samples) {
preBuffer.addLast(s)

View File

@ -161,7 +161,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
add(
VoiceBeanResp(
audioUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?:*/ "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
audioUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
)
)
}
@ -187,8 +187,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
VoiceState.WAIT_WAKEUP -> {
Log.d("lrs", "当前状态: 等待唤醒")
lifecycleScope.launch(Dispatchers.Main) {
mVerticalAnimator?.hide()
UnityPlayerHolder.getInstance()
.sendVoiceToUnity(
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
add(
VoiceBeanResp(
audioUrl = "https://static.seerteach.net/aidialogue/userWakeUpAudio/ttsmaker-file-2025-12-31-16-2-51.mp3"
)
)
}
)
}
}
@ -334,9 +343,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
word: String,
audioUrl: String
) {
// val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: return
//
// if (audioUrl != wakeupUrl) return
val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
if (audioUrl != wakeupUrl) return
when (state) {
1 -> { // play

View File

@ -25,14 +25,14 @@ object SerialNumberUtil {
* 外部调用获取最终序列号
*/
fun getSerialNumber(): String {
for (key in snKeys) {
val sn = getProp(key)
if (!sn.isNullOrBlank()) {
return limitSerialDigit(sn)
}
}
return ""
// return "zd09312051870556"
// for (key in snKeys) {
// val sn = getProp(key)
// if (!sn.isNullOrBlank()) {
// return limitSerialDigit(sn)
// }
// }
// return ""
return "zd09312051870556"
}
/**