优化后的代码
This commit is contained in:
parent
f8812b6a48
commit
956dd78c1b
@ -52,7 +52,7 @@
|
||||
tools:targetApi="31">
|
||||
|
||||
<activity
|
||||
android:name=".ui.MainActivity"
|
||||
android:name=".ui.SplashActivity"
|
||||
android:exported="true"
|
||||
android:theme="@style/Theme.Splash"
|
||||
android:screenOrientation="portrait">
|
||||
@ -66,9 +66,9 @@
|
||||
</intent-filter>
|
||||
</activity>
|
||||
|
||||
<!--<activity
|
||||
<activity
|
||||
android:name="com.zs.smarthuman.ui.MainActivity"
|
||||
android:screenOrientation="portrait"/>-->
|
||||
android:screenOrientation="portrait"/>
|
||||
<activity
|
||||
android:name="com.zs.smarthuman.ui.ActivateActivity"
|
||||
android:screenOrientation="portrait"/>
|
||||
|
||||
@ -4,11 +4,6 @@ import android.content.res.AssetManager
|
||||
import com.k2fsa.sherpa.onnx.Vad
|
||||
import com.k2fsa.sherpa.onnx.getVadModelConfig
|
||||
|
||||
/**
|
||||
* @description:
|
||||
* @author: lrs
|
||||
* @date: 2025/12/17 10:22
|
||||
*/
|
||||
class VadManager(
|
||||
assetManager: AssetManager,
|
||||
private val onSpeechStart: () -> Unit,
|
||||
@ -19,9 +14,13 @@ class VadManager(
|
||||
private var isSpeaking = false
|
||||
private var lastSpeechTime = 0L
|
||||
|
||||
// ⭐ 统计用
|
||||
private var speechFrameCount = 0
|
||||
private var totalFrameCount = 0
|
||||
/** ⭐ 仅统计“有效语音段” */
|
||||
private var activeFrameCount = 0
|
||||
private var activeSpeechFrameCount = 0
|
||||
|
||||
/** ⭐ 用于调试(可选) */
|
||||
private var rawFrameCount = 0
|
||||
private var rawSpeechFrameCount = 0
|
||||
|
||||
private val END_SILENCE_MS = 600L
|
||||
|
||||
@ -37,36 +36,58 @@ class VadManager(
|
||||
vad.acceptWaveform(samples)
|
||||
val hasSpeech = vad.isSpeechDetected()
|
||||
|
||||
totalFrameCount++
|
||||
/* ===== raw 统计(仅日志) ===== */
|
||||
rawFrameCount++
|
||||
if (hasSpeech) rawSpeechFrameCount++
|
||||
|
||||
if (hasSpeech) {
|
||||
speechFrameCount++
|
||||
lastSpeechTime = now
|
||||
|
||||
if (!isSpeaking) {
|
||||
isSpeaking = true
|
||||
onSpeechStart()
|
||||
}
|
||||
|
||||
activeFrameCount++
|
||||
activeSpeechFrameCount++
|
||||
} else {
|
||||
if (isSpeaking && now - lastSpeechTime >= END_SILENCE_MS) {
|
||||
if (isSpeaking) {
|
||||
activeFrameCount++
|
||||
|
||||
if (now - lastSpeechTime >= END_SILENCE_MS) {
|
||||
isSpeaking = false
|
||||
onSpeechEnd()
|
||||
vad.clear()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** 👉 人声占比(真正用到 VAD 的地方) */
|
||||
fun speechRatio(): Float {
|
||||
if (totalFrameCount == 0) return 0f
|
||||
return speechFrameCount.toFloat() / totalFrameCount
|
||||
/**
|
||||
* ✅ 真正用于判断「是不是有效人声」
|
||||
* 只统计 VAD 激活期间
|
||||
*/
|
||||
fun activeSpeechRatio(): Float {
|
||||
if (activeFrameCount == 0) return 0f
|
||||
return activeSpeechFrameCount.toFloat() / activeFrameCount
|
||||
}
|
||||
|
||||
/**
|
||||
* ⚠️ 仅用于调参观察
|
||||
*/
|
||||
fun rawSpeechRatio(): Float {
|
||||
if (rawFrameCount == 0) return 0f
|
||||
return rawSpeechFrameCount.toFloat() / rawFrameCount
|
||||
}
|
||||
|
||||
fun reset() {
|
||||
isSpeaking = false
|
||||
lastSpeechTime = 0
|
||||
speechFrameCount = 0
|
||||
totalFrameCount = 0
|
||||
lastSpeechTime = 0L
|
||||
|
||||
activeFrameCount = 0
|
||||
activeSpeechFrameCount = 0
|
||||
rawFrameCount = 0
|
||||
rawSpeechFrameCount = 0
|
||||
|
||||
vad.reset()
|
||||
}
|
||||
}
|
||||
|
||||
@ -8,7 +8,7 @@ class VoiceController(
|
||||
assetManager: AssetManager,
|
||||
private val onWakeup: () -> Unit,
|
||||
private val onFinalAudio: (FloatArray) -> Unit,
|
||||
private val idleTimeoutSeconds: Int = 5,
|
||||
private val idleTimeoutSeconds: Int = 8,
|
||||
private val maxRecordingSeconds: Int = 10,
|
||||
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
||||
private val stopBackendAudio: (() -> Unit)? = null
|
||||
@ -53,28 +53,39 @@ class VoiceController(
|
||||
|
||||
private var recordingStartMs = 0L
|
||||
private var silenceStartMs = 0L
|
||||
|
||||
/** ⭐ WAIT_SPEECH 连续失败起点(关键) */
|
||||
private var waitSpeechFailStartMs = 0L
|
||||
|
||||
/* ================= 近讲统计(⭐关键新增) ================= */
|
||||
|
||||
private var speechEnergySum = 0f
|
||||
private var speechFrameCount = 0
|
||||
|
||||
/* ================= 控制 ================= */
|
||||
|
||||
private var vadStarted = false
|
||||
|
||||
/** 唤醒观察期 */
|
||||
private var inKwsObserve = false
|
||||
private var kwsObserveStartMs = 0L
|
||||
private val KWS_OBSERVE_MS = 500L
|
||||
|
||||
/** 播放冷却 */
|
||||
private var speechEnableAtMs = 0L
|
||||
private val SPEECH_COOLDOWN_MS = 300L
|
||||
|
||||
/* ================= 阈值 ================= */
|
||||
/* ================= 阈值(⭐已校正) ================= */
|
||||
|
||||
private val RMS_SILENCE_THRESHOLD = 0.005f
|
||||
private val RMS_SILENCE_THRESHOLD = 0.012f // 静音阈值(修正)
|
||||
private val SILENCE_END_MS = 1200L
|
||||
private val MIN_SPEECH_MS = 300L
|
||||
private val MIN_SPEECH_MS = 1000L // 句子级
|
||||
private val MIN_AVG_ENERGY = 0.02f // 近讲能量门
|
||||
|
||||
|
||||
/** ⭐ 唤醒后等待人声起点 */
|
||||
private var waitSpeechStartMs = 0L
|
||||
|
||||
/** ⭐ 唤醒后最大等待时间(没说一句话) */
|
||||
private val WAIT_SPEECH_TIMEOUT_MS = 8000L
|
||||
|
||||
|
||||
|
||||
/* ================= 音频入口 ================= */
|
||||
|
||||
@ -100,12 +111,22 @@ class VoiceController(
|
||||
VoiceState.WAIT_SPEECH_COOLDOWN -> {
|
||||
if (now >= speechEnableAtMs) {
|
||||
state = VoiceState.WAIT_SPEECH
|
||||
waitSpeechStartMs = now // ⭐ 关键:开始等人说话
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
VoiceState.WAIT_SPEECH -> {
|
||||
|
||||
// ⭐ 唤醒后长时间没人说话 → 自动退出
|
||||
if (waitSpeechStartMs > 0 &&
|
||||
now - waitSpeechStartMs >= WAIT_SPEECH_TIMEOUT_MS
|
||||
) {
|
||||
Log.d(TAG, "⏱ Wakeup but no speech, exit to WAIT_WAKEUP")
|
||||
resetAll()
|
||||
return
|
||||
}
|
||||
|
||||
if (inKwsObserve) {
|
||||
if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return
|
||||
inKwsObserve = false
|
||||
@ -114,26 +135,30 @@ class VoiceController(
|
||||
vadManager.accept(samples)
|
||||
}
|
||||
|
||||
|
||||
VoiceState.RECORDING -> {
|
||||
|
||||
audioBuffer.addAll(samples.asList())
|
||||
vadManager.accept(samples)
|
||||
|
||||
if (now - recordingStartMs > maxRecordingSeconds * 1000) {
|
||||
Log.w(TAG, "⏱ Max recording reached")
|
||||
val rms = calcRms(samples)
|
||||
|
||||
if (rms > RMS_SILENCE_THRESHOLD) {
|
||||
speechEnergySum += rms
|
||||
speechFrameCount++
|
||||
silenceStartMs = 0L
|
||||
} else {
|
||||
if (silenceStartMs == 0L) silenceStartMs = now
|
||||
else if (now - silenceStartMs >= SILENCE_END_MS) {
|
||||
Log.d(TAG, "🔇 Silence end")
|
||||
finishSentence()
|
||||
return
|
||||
}
|
||||
|
||||
val rms = calcRms(samples)
|
||||
if (rms < RMS_SILENCE_THRESHOLD) {
|
||||
if (silenceStartMs == 0L) silenceStartMs = now
|
||||
else if (now - silenceStartMs >= SILENCE_END_MS) {
|
||||
Log.d(TAG, "🔇 RMS silence end")
|
||||
finishSentence()
|
||||
}
|
||||
} else {
|
||||
silenceStartMs = 0L
|
||||
|
||||
if (now - recordingStartMs > maxRecordingSeconds * 1000) {
|
||||
Log.w(TAG, "⏱ Max recording reached")
|
||||
finishSentence()
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -146,8 +171,12 @@ class VoiceController(
|
||||
|
||||
VoiceState.UPLOADING -> return
|
||||
|
||||
// ⭐ 关键:只要不是纯等待唤醒,一律打断
|
||||
VoiceState.RECORDING,
|
||||
VoiceState.WAIT_SPEECH,
|
||||
VoiceState.WAIT_SPEECH_COOLDOWN,
|
||||
VoiceState.PLAYING_BACKEND -> {
|
||||
Log.d(TAG, "⚠ WakeWord interrupt state=$state")
|
||||
stopBackendAudio?.invoke()
|
||||
enterWakeup(interrupt = true)
|
||||
}
|
||||
@ -156,13 +185,21 @@ class VoiceController(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private fun enterWakeup(interrupt: Boolean) {
|
||||
|
||||
if (interrupt) {
|
||||
Log.d(TAG, "🛑 Interrupt current speech / recording")
|
||||
|
||||
audioBuffer.clear()
|
||||
preBuffer.clear() // ⭐ 防止把旧唤醒词带进去
|
||||
vadManager.reset()
|
||||
resetEnergyStat()
|
||||
|
||||
vadStarted = false
|
||||
silenceStartMs = 0L
|
||||
waitSpeechStartMs = 0L // ⭐
|
||||
waitSpeechFailStartMs = 0L // ⭐
|
||||
}
|
||||
|
||||
inKwsObserve = true
|
||||
@ -172,6 +209,7 @@ class VoiceController(
|
||||
onWakeup()
|
||||
}
|
||||
|
||||
|
||||
/* ================= VAD START ================= */
|
||||
|
||||
private fun onVadStart() {
|
||||
@ -182,6 +220,8 @@ class VoiceController(
|
||||
vadStarted = true
|
||||
recordingStartMs = System.currentTimeMillis()
|
||||
silenceStartMs = 0L
|
||||
waitSpeechStartMs = 0L // ⭐ 清掉“等待说话”超时
|
||||
resetEnergyStat()
|
||||
|
||||
audioBuffer.clear()
|
||||
audioBuffer.addAll(preBuffer)
|
||||
@ -189,26 +229,73 @@ class VoiceController(
|
||||
state = VoiceState.RECORDING
|
||||
}
|
||||
|
||||
/* ================= 结束录音 ================= */
|
||||
|
||||
/* ================= 结束录音(⭐核心) ================= */
|
||||
|
||||
private fun finishSentence() {
|
||||
|
||||
val duration = System.currentTimeMillis() - recordingStartMs
|
||||
val now = System.currentTimeMillis()
|
||||
val duration = now - recordingStartMs
|
||||
|
||||
if (!vadStarted || duration < MIN_SPEECH_MS) {
|
||||
Log.d(TAG, "❌ Too short or no VAD start: ${duration}ms")
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
}
|
||||
|
||||
val vadRatio = vadManager.speechRatio()
|
||||
Log.d(TAG, "🎙 VAD speech ratio=$vadRatio")
|
||||
val vadRatio = vadManager.activeSpeechRatio()
|
||||
val avgEnergy =
|
||||
if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
||||
|
||||
if (vadRatio < 0.25f) {
|
||||
Log.d(TAG, "❌ VAD says NOT human speech")
|
||||
/* ================= 评分制判定 ================= */
|
||||
|
||||
var score = 0
|
||||
|
||||
// 1️⃣ 时长评分(最重要)
|
||||
when {
|
||||
duration >= 4000 -> score += 3
|
||||
duration >= 2500 -> score += 2
|
||||
duration >= 1500 -> score += 1
|
||||
}
|
||||
|
||||
// 2️⃣ 能量评分(近讲人声强信号)
|
||||
when {
|
||||
avgEnergy >= 0.10f -> score += 3
|
||||
avgEnergy >= 0.06f -> score += 2
|
||||
avgEnergy >= MIN_AVG_ENERGY -> score += 1
|
||||
}
|
||||
|
||||
// 3️⃣ VAD 评分(只作为辅助)
|
||||
when {
|
||||
vadRatio >= 0.55f -> score += 2
|
||||
vadRatio >= 0.40f -> score += 1
|
||||
}
|
||||
|
||||
Log.d(
|
||||
TAG,
|
||||
"📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score"
|
||||
)
|
||||
|
||||
/**
|
||||
* 评分阈值:
|
||||
* - >=4 : 必然是真实人声
|
||||
* - 3 : 在近讲/长句条件下允许
|
||||
* - <3 : 拦截
|
||||
*/
|
||||
val pass = when {
|
||||
score >= 4 -> true
|
||||
score == 3 && avgEnergy >= 0.06f -> true
|
||||
else -> false
|
||||
}
|
||||
|
||||
if (!pass) {
|
||||
Log.d(TAG, "❌ Sentence rejected (score=$score)")
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
}
|
||||
|
||||
// ✅ 成功一次,清空失败计时
|
||||
/* ================= 通过,进入上传 ================= */
|
||||
|
||||
waitSpeechFailStartMs = 0L
|
||||
|
||||
val finalAudio = audioBuffer.toFloatArray()
|
||||
@ -218,6 +305,7 @@ class VoiceController(
|
||||
onFinalAudio(finalAudio)
|
||||
}
|
||||
|
||||
|
||||
/* ================= 播放回调 ================= */
|
||||
|
||||
fun onPlayStartPrompt() {
|
||||
@ -238,7 +326,7 @@ class VoiceController(
|
||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||
}
|
||||
|
||||
/* ================= 上传回调(保留 public) ================= */
|
||||
/* ================= 上传回调 ================= */
|
||||
|
||||
fun onUploadFinished(success: Boolean) {
|
||||
if (state != VoiceState.UPLOADING) return
|
||||
@ -251,15 +339,16 @@ class VoiceController(
|
||||
}
|
||||
}
|
||||
|
||||
/* ================= Idle 超时(关键修复) ================= */
|
||||
/* ================= Idle 超时 ================= */
|
||||
|
||||
fun checkIdleTimeout() {
|
||||
if (state != VoiceState.WAIT_SPEECH) return
|
||||
if (waitSpeechFailStartMs == 0L) return
|
||||
|
||||
val now = System.currentTimeMillis()
|
||||
if (now - waitSpeechFailStartMs > idleTimeoutSeconds * 1000) {
|
||||
Log.d(TAG, "⏱ WAIT_SPEECH continuous fail timeout")
|
||||
if (System.currentTimeMillis() - waitSpeechFailStartMs >
|
||||
idleTimeoutSeconds * 1000
|
||||
) {
|
||||
Log.d(TAG, "⏱ WAIT_SPEECH timeout")
|
||||
resetAll()
|
||||
waitSpeechFailStartMs = 0L
|
||||
}
|
||||
@ -270,11 +359,11 @@ class VoiceController(
|
||||
private fun resetToWaitSpeech() {
|
||||
audioBuffer.clear()
|
||||
vadManager.reset()
|
||||
resetEnergyStat()
|
||||
vadStarted = false
|
||||
silenceStartMs = 0L
|
||||
state = VoiceState.WAIT_SPEECH
|
||||
|
||||
// ⭐ 只在第一次失败时记录
|
||||
if (waitSpeechFailStartMs == 0L) {
|
||||
waitSpeechFailStartMs = System.currentTimeMillis()
|
||||
}
|
||||
@ -284,11 +373,14 @@ class VoiceController(
|
||||
audioBuffer.clear()
|
||||
preBuffer.clear()
|
||||
vadManager.reset()
|
||||
resetEnergyStat()
|
||||
vadStarted = false
|
||||
silenceStartMs = 0L
|
||||
waitSpeechStartMs = 0L // ⭐
|
||||
state = VoiceState.WAIT_WAKEUP
|
||||
}
|
||||
|
||||
|
||||
fun release() {
|
||||
wakeupManager.release()
|
||||
vadManager.reset()
|
||||
@ -296,6 +388,11 @@ class VoiceController(
|
||||
|
||||
/* ================= Utils ================= */
|
||||
|
||||
private fun resetEnergyStat() {
|
||||
speechEnergySum = 0f
|
||||
speechFrameCount = 0
|
||||
}
|
||||
|
||||
private fun cachePreBuffer(samples: FloatArray) {
|
||||
for (s in samples) {
|
||||
preBuffer.addLast(s)
|
||||
|
||||
@ -161,7 +161,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
|
||||
add(
|
||||
VoiceBeanResp(
|
||||
audioUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?:*/ "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
|
||||
audioUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
|
||||
)
|
||||
)
|
||||
}
|
||||
@ -187,8 +187,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
VoiceState.WAIT_WAKEUP -> {
|
||||
Log.d("lrs", "当前状态: 等待唤醒")
|
||||
lifecycleScope.launch(Dispatchers.Main) {
|
||||
|
||||
mVerticalAnimator?.hide()
|
||||
UnityPlayerHolder.getInstance()
|
||||
.sendVoiceToUnity(
|
||||
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
|
||||
add(
|
||||
VoiceBeanResp(
|
||||
audioUrl = "https://static.seerteach.net/aidialogue/userWakeUpAudio/ttsmaker-file-2025-12-31-16-2-51.mp3"
|
||||
)
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@ -334,9 +343,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
word: String,
|
||||
audioUrl: String
|
||||
) {
|
||||
// val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: return
|
||||
//
|
||||
// if (audioUrl != wakeupUrl) return
|
||||
val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl ?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
|
||||
|
||||
if (audioUrl != wakeupUrl) return
|
||||
|
||||
when (state) {
|
||||
1 -> { // play
|
||||
|
||||
@ -25,14 +25,14 @@ object SerialNumberUtil {
|
||||
* 外部调用,获取最终序列号
|
||||
*/
|
||||
fun getSerialNumber(): String {
|
||||
for (key in snKeys) {
|
||||
val sn = getProp(key)
|
||||
if (!sn.isNullOrBlank()) {
|
||||
return limitSerialDigit(sn)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
// return "zd09312051870556"
|
||||
// for (key in snKeys) {
|
||||
// val sn = getProp(key)
|
||||
// if (!sn.isNullOrBlank()) {
|
||||
// return limitSerialDigit(sn)
|
||||
// }
|
||||
// }
|
||||
// return ""
|
||||
return "zd09312051870556"
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user