临时提交

This commit is contained in:
林若思 2025-12-20 18:47:58 +08:00
parent f7100ddc93
commit b6423678a2
3 changed files with 94 additions and 69 deletions

View File

@ -3,6 +3,7 @@ package com.zs.smarthuman.sherpa
import android.content.res.AssetManager import android.content.res.AssetManager
import android.util.Log import android.util.Log
class VoiceController( class VoiceController(
assetManager: AssetManager, assetManager: AssetManager,
private val onWakeup: () -> Unit, private val onWakeup: () -> Unit,
@ -10,23 +11,25 @@ class VoiceController(
private val idleTimeoutSeconds: Int = 15, private val idleTimeoutSeconds: Int = 15,
private val onStateChanged: ((VoiceState) -> Unit)? = null private val onStateChanged: ((VoiceState) -> Unit)? = null
) { ) {
private val TAG = "VoiceController"
private var state: VoiceState = VoiceState.WAIT_WAKEUP private var state: VoiceState = VoiceState.WAIT_WAKEUP
set(value) { set(value) {
field = value field = value
onStateChanged?.invoke(value) onStateChanged?.invoke(value)
Log.d("VoiceController", "当前状态: $value") Log.d(TAG, "当前状态: $value")
} }
/** ================= 唤醒 ================= */ /** ================= 唤醒 ================= */
private var wakeupLocked = false
private var wakeupFlag = false private var wakeupFlag = false
private var wakeupDiscardEndTime = 0L
private val WAKEUP_DISCARD_BUFFER_MS = 200L // 提示音前后缓冲
private val wakeupManager = WakeupManager(assetManager) { private val wakeupManager = WakeupManager(assetManager) {
Log.d("VoiceController", "唤醒触发! wakeupFlag=$wakeupFlag, 当前状态=$state") Log.d(TAG, "唤醒触发! 当前状态=$state")
wakeupFlag = true wakeupFlag = true
onWakeup() onWakeup()
startWaitSpeech() playLocalPrompt()
} }
/** ================= VAD ================= */ /** ================= VAD ================= */
@ -38,63 +41,83 @@ class VoiceController(
/** ================= 音频缓存 ================= */ /** ================= 音频缓存 ================= */
private val audioBuffer = mutableListOf<Float>() private val audioBuffer = mutableListOf<Float>()
private val preBuffer = ArrayDeque<Float>() // pre-roll 1 秒 private val preBuffer = ArrayDeque<Float>() // pre-roll 1 秒
private val PRE_BUFFER_SIZE = 16000 private val PRE_BUFFER_SIZE = 16000
private var idleTimer = 0L private var idleTimer = 0L
var isPlaying = false
private set
/** ================= 尾部静音控制 ================= */ /** ================= 尾部静音控制 ================= */
private var vadEndPending = false private var vadEndPending = false
private var vadEndTime = 0L private var vadEndTime = 0L
private val END_SILENCE_MS = 1000L private val END_SILENCE_MS = 1000L
/* ================= 公共接口 ================= */ var isPlaying = false
private set
/** ================= 公共接口 ================= */
fun initWakeup(keywords: String) { fun initWakeup(keywords: String) {
wakeupManager.initStream(keywords) wakeupManager.initStream(keywords)
onStateChanged?.invoke(state)
} }
fun acceptAudio(samples: FloatArray) { fun acceptAudio(samples: FloatArray) {
cachePreBuffer(samples) cachePreBuffer(samples)
// KWS 永远运行
wakeupManager.acceptAudio(samples) wakeupManager.acceptAudio(samples)
val now = System.currentTimeMillis()
// 播放本地提示音或后台音频期间,不喂 VAD
if (state == VoiceState.PLAYING_PROMPT || state == VoiceState.PLAYING_BACKEND) return
val discardAudio = now < wakeupDiscardEndTime
when (state) { when (state) {
VoiceState.WAIT_WAKEUP -> Unit VoiceState.WAIT_WAKEUP -> Unit
VoiceState.WAIT_SPEECH -> if (!discardAudio) vadManager.accept(samples)
VoiceState.WAIT_SPEECH -> vadManager.accept(samples) VoiceState.RECORDING -> if (!discardAudio) {
VoiceState.RECORDING -> {
audioBuffer.addAll(samples.asList()) audioBuffer.addAll(samples.asList())
vadManager.accept(samples) vadManager.accept(samples)
idleTimer = System.currentTimeMillis() idleTimer = now
if (vadEndPending && now - vadEndTime >= END_SILENCE_MS) {
if (vadEndPending && System.currentTimeMillis() - vadEndTime >= END_SILENCE_MS) {
finishSentence() finishSentence()
} }
} }
VoiceState.PLAYING -> Unit VoiceState.PLAYING_PROMPT -> {
}
VoiceState.PLAYING_BACKEND -> {}
} }
} }
fun onPlayStart() { fun onPlayStartPrompt() {
isPlaying = true isPlaying = true
state = VoiceState.PLAYING state = VoiceState.PLAYING_PROMPT
Log.d(TAG, "播放提示音, 状态变为 PLAYING_PROMPT")
} }
fun onPlayEnd() { fun onPlayEndPrompt() {
isPlaying = false isPlaying = false
if (state == VoiceState.WAIT_SPEECH) return state = VoiceState.WAIT_SPEECH
reset() idleTimer = System.currentTimeMillis()
// 设置 discardAudio 时间,丢弃提示音残留音频
wakeupDiscardEndTime = System.currentTimeMillis() + WAKEUP_DISCARD_BUFFER_MS
Log.d(TAG, "提示音播放结束, 状态变为 WAIT_SPEECH")
}
fun onPlayStartBackend() {
isPlaying = true
state = VoiceState.PLAYING_BACKEND
Log.d(TAG, "播放后台音频, 状态变为 PLAYING_BACKEND")
}
fun onPlayEndBackend() {
isPlaying = false
state = VoiceState.WAIT_WAKEUP
idleTimer = System.currentTimeMillis()
Log.d(TAG, "后台音频播放结束, 状态变为 WAIT_WAKEUP")
} }
fun checkIdleTimeout() { fun checkIdleTimeout() {
if (state != VoiceState.WAIT_SPEECH) return if (state != VoiceState.WAIT_SPEECH) return
val now = System.currentTimeMillis() val now = System.currentTimeMillis()
if (now - idleTimer > idleTimeoutSeconds * 1000) { if (now - idleTimer > idleTimeoutSeconds * 1000) {
reset() reset()
@ -107,10 +130,11 @@ class VoiceController(
preBuffer.clear() preBuffer.clear()
vadManager.reset() vadManager.reset()
wakeupManager.reset() wakeupManager.reset()
wakeupLocked = false
wakeupFlag = false wakeupFlag = false
wakeupDiscardEndTime = 0
idleTimer = 0 idleTimer = 0
vadEndPending = false vadEndPending = false
Log.d(TAG, "已重置, 状态变为 WAIT_WAKEUP")
} }
fun release() { fun release() {
@ -121,69 +145,56 @@ class VoiceController(
idleTimer = 0 idleTimer = 0
isPlaying = false isPlaying = false
state = VoiceState.WAIT_WAKEUP state = VoiceState.WAIT_WAKEUP
wakeupLocked = false
wakeupFlag = false wakeupFlag = false
wakeupDiscardEndTime = 0
vadEndPending = false vadEndPending = false
} }
/* ================= 内部逻辑 ================= */ /* ================= 内部逻辑 ================= */
private fun startWaitSpeech() { private fun playLocalPrompt() {
state = VoiceState.WAIT_SPEECH onPlayStartPrompt()
audioBuffer.clear() // 在这里播放 "我在" 音频,播放结束后必须调用 onPlayEndPrompt()
idleTimer = System.currentTimeMillis()
} }
private fun onVadSpeechStart() { private fun onVadSpeechStart() {
vadEndPending = false vadEndPending = false
val now = System.currentTimeMillis()
Log.d("VoiceController", "VAD开始, 当前状态=$state, wakeupFlag=$wakeupFlag") Log.d(TAG, "VAD开始, 当前状态=$state, wakeupFlag=$wakeupFlag")
// 如果是唤醒词音频则丢弃
if (wakeupFlag) {
wakeupFlag = false
Log.d("VoiceController", "丢弃唤醒词音频")
return
}
if (state != VoiceState.WAIT_SPEECH) return if (state != VoiceState.WAIT_SPEECH) return
if (wakeupFlag && now < wakeupDiscardEndTime) {
Log.d(TAG, "丢弃唤醒词残留音频")
return
}
state = VoiceState.RECORDING state = VoiceState.RECORDING
audioBuffer.clear() audioBuffer.clear()
audioBuffer.addAll(preBuffer) audioBuffer.addAll(preBuffer)
idleTimer = System.currentTimeMillis() idleTimer = now
Log.d(TAG, "开始录音, 状态变为 RECORDING")
Log.d("VoiceController", "开始录音, 状态变为=$state")
} }
private fun onVadSpeechEnd() { private fun onVadSpeechEnd() {
if (state != VoiceState.RECORDING) return if (state != VoiceState.RECORDING) return
vadEndPending = true vadEndPending = true
vadEndTime = System.currentTimeMillis() vadEndTime = System.currentTimeMillis()
Log.d("VoiceController", "VAD结束, 等待尾部静音") Log.d(TAG, "VAD结束, 等待尾部静音")
} }
private fun finishSentence() { private fun finishSentence() {
vadEndPending = false vadEndPending = false
state = VoiceState.WAIT_WAKEUP state = VoiceState.WAIT_WAKEUP
wakeupLocked = false
val finalAudio = audioBuffer.toFloatArray() val finalAudio = audioBuffer.toFloatArray()
audioBuffer.clear() audioBuffer.clear()
if (finalAudio.isNotEmpty()) onFinalAudio(finalAudio)
if (finalAudio.isNotEmpty()) {
onFinalAudio(finalAudio)
}
idleTimer = 0 idleTimer = 0
Log.d("VoiceController", "录音结束, 返回 WAIT_WAKEUP") Log.d(TAG, "录音结束, 返回 WAIT_WAKEUP")
} }
private fun cachePreBuffer(samples: FloatArray) { private fun cachePreBuffer(samples: FloatArray) {
for (s in samples) { for (s in samples) {
preBuffer.addLast(s) preBuffer.addLast(s)
if (preBuffer.size > PRE_BUFFER_SIZE) { if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
preBuffer.removeFirst()
}
} }
} }
} }

View File

@ -6,8 +6,9 @@ package com.zs.smarthuman.sherpa
* @date: 2025/12/17 10:20 * @date: 2025/12/17 10:20
*/ */
enum class VoiceState { enum class VoiceState {
WAIT_WAKEUP, WAIT_WAKEUP, // 等待唤醒
WAIT_SPEECH, PLAYING_PROMPT, // 播放本地提示音
RECORDING, WAIT_SPEECH, // 等待用户说话
PLAYING RECORDING, // 用户正在说话
PLAYING_BACKEND // 播放后台返回音频
} }

View File

@ -183,9 +183,11 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
VoiceState.WAIT_SPEECH -> Log.d("lrs", "当前状态: 唤醒成功,等待说话") VoiceState.WAIT_SPEECH -> Log.d("lrs", "当前状态: 唤醒成功,等待说话")
VoiceState.RECORDING -> Log.d("lrs", "当前状态: 正在录音") VoiceState.RECORDING -> Log.d("lrs", "当前状态: 正在录音")
VoiceState.PLAYING -> Log.d("lrs", "当前状态: Unity 播放中") VoiceState.PLAYING_PROMPT -> Log.d("lrs", "当前状态: 播放本地音频")
VoiceState.PLAYING_BACKEND -> Log.d("lrs", "当前状态: 播放后台音频")
} }
} },
) )
// 初始化唤醒词 // 初始化唤醒词
voiceController?.initWakeup("x iǎo z ì @小智") voiceController?.initWakeup("x iǎo z ì @小智")
@ -266,10 +268,10 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
private fun processAudio() { private fun processAudio() {
val interval = 0.1 // val interval = 0.1
val bufferSize = (interval * sampleRateInHz).toInt() // in samples // val bufferSize = (interval * sampleRateInHz).toInt() // in samples
// val buffer = ShortArray(bufferSize) // val buffer = ShortArray(bufferSize)
// val bufferSize = 512 // in samples val bufferSize = 512 // in samples
val buffer = ShortArray(bufferSize) val buffer = ShortArray(bufferSize)
while (isRecording) { while (isRecording) {
val ret = audioRecord?.read(buffer, 0, buffer.size) ?: 0 val ret = audioRecord?.read(buffer, 0, buffer.size) ?: 0
@ -297,15 +299,26 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
word: String, word: String,
audioUrl: String audioUrl: String
) { ) {
if (state == 1 && audioUrl == "https://static.seerteach.net/largemodel/smart_read_audio/intensive_reading/689450143596d58606a106e5/689450143596d58606a106e5_1.mp3") { if (state == 1) {
if (!isPlayed) { if (!isPlayed) {
isPlayed = true isPlayed = true
voiceController?.onPlayStart() if (audioUrl == UserInfoManager.userInfo?.wakeUpAudioUrl){
voiceController?.onPlayStartPrompt()
}else{
voiceController?.onPlayStartBackend()
}
} }
} }
if (state == 3&& audioUrl == "https://static.seerteach.net/largemodel/smart_read_audio/intensive_reading/689450143596d58606a106e5/689450143596d58606a106e5_1.mp3") {
voiceController?.onPlayEnd() if (state == 3){
if (audioUrl == UserInfoManager.userInfo?.wakeUpAudioUrl){
voiceController?.onPlayEndPrompt()
}else{
voiceController?.onPlayEndBackend()
}
isPlayed = false isPlayed = false
} }
} }