临时提交

This commit is contained in:
林若思 2025-12-20 18:47:58 +08:00
parent f7100ddc93
commit b6423678a2
3 changed files with 94 additions and 69 deletions

View File

@ -3,6 +3,7 @@ package com.zs.smarthuman.sherpa
import android.content.res.AssetManager
import android.util.Log
class VoiceController(
assetManager: AssetManager,
private val onWakeup: () -> Unit,
@ -10,23 +11,25 @@ class VoiceController(
private val idleTimeoutSeconds: Int = 15,
private val onStateChanged: ((VoiceState) -> Unit)? = null
) {
private val TAG = "VoiceController"
private var state: VoiceState = VoiceState.WAIT_WAKEUP
set(value) {
field = value
onStateChanged?.invoke(value)
Log.d("VoiceController", "当前状态: $value")
Log.d(TAG, "当前状态: $value")
}
/** ================= 唤醒 ================= */
private var wakeupLocked = false
private var wakeupFlag = false
private var wakeupDiscardEndTime = 0L
private val WAKEUP_DISCARD_BUFFER_MS = 200L // 提示音前后缓冲
private val wakeupManager = WakeupManager(assetManager) {
Log.d("VoiceController", "唤醒触发! wakeupFlag=$wakeupFlag, 当前状态=$state")
Log.d(TAG, "唤醒触发! 当前状态=$state")
wakeupFlag = true
onWakeup()
startWaitSpeech()
playLocalPrompt()
}
/** ================= VAD ================= */
@ -38,63 +41,83 @@ class VoiceController(
/** ================= 音频缓存 ================= */
private val audioBuffer = mutableListOf<Float>()
private val preBuffer = ArrayDeque<Float>() // pre-roll 1 秒
private val preBuffer = ArrayDeque<Float>() // pre-roll 1 秒
private val PRE_BUFFER_SIZE = 16000
private var idleTimer = 0L
var isPlaying = false
private set
/** ================= 尾部静音控制 ================= */
private var vadEndPending = false
private var vadEndTime = 0L
private val END_SILENCE_MS = 1000L
/* ================= 公共接口 ================= */
var isPlaying = false
private set
/** ================= 公共接口 ================= */
fun initWakeup(keywords: String) {
wakeupManager.initStream(keywords)
onStateChanged?.invoke(state)
}
fun acceptAudio(samples: FloatArray) {
cachePreBuffer(samples)
// KWS 永远运行
wakeupManager.acceptAudio(samples)
val now = System.currentTimeMillis()
// 播放本地提示音或后台音频期间,不喂 VAD
if (state == VoiceState.PLAYING_PROMPT || state == VoiceState.PLAYING_BACKEND) return
val discardAudio = now < wakeupDiscardEndTime
when (state) {
VoiceState.WAIT_WAKEUP -> Unit
VoiceState.WAIT_SPEECH -> vadManager.accept(samples)
VoiceState.RECORDING -> {
VoiceState.WAIT_SPEECH -> if (!discardAudio) vadManager.accept(samples)
VoiceState.RECORDING -> if (!discardAudio) {
audioBuffer.addAll(samples.asList())
vadManager.accept(samples)
idleTimer = System.currentTimeMillis()
if (vadEndPending && System.currentTimeMillis() - vadEndTime >= END_SILENCE_MS) {
idleTimer = now
if (vadEndPending && now - vadEndTime >= END_SILENCE_MS) {
finishSentence()
}
}
VoiceState.PLAYING -> Unit
VoiceState.PLAYING_PROMPT -> {
}
VoiceState.PLAYING_BACKEND -> {}
}
}
fun onPlayStart() {
fun onPlayStartPrompt() {
isPlaying = true
state = VoiceState.PLAYING
state = VoiceState.PLAYING_PROMPT
Log.d(TAG, "播放提示音, 状态变为 PLAYING_PROMPT")
}
fun onPlayEnd() {
fun onPlayEndPrompt() {
isPlaying = false
if (state == VoiceState.WAIT_SPEECH) return
reset()
state = VoiceState.WAIT_SPEECH
idleTimer = System.currentTimeMillis()
// 设置 discardAudio 时间,丢弃提示音残留音频
wakeupDiscardEndTime = System.currentTimeMillis() + WAKEUP_DISCARD_BUFFER_MS
Log.d(TAG, "提示音播放结束, 状态变为 WAIT_SPEECH")
}
fun onPlayStartBackend() {
isPlaying = true
state = VoiceState.PLAYING_BACKEND
Log.d(TAG, "播放后台音频, 状态变为 PLAYING_BACKEND")
}
fun onPlayEndBackend() {
isPlaying = false
state = VoiceState.WAIT_WAKEUP
idleTimer = System.currentTimeMillis()
Log.d(TAG, "后台音频播放结束, 状态变为 WAIT_WAKEUP")
}
fun checkIdleTimeout() {
if (state != VoiceState.WAIT_SPEECH) return
val now = System.currentTimeMillis()
if (now - idleTimer > idleTimeoutSeconds * 1000) {
reset()
@ -107,10 +130,11 @@ class VoiceController(
preBuffer.clear()
vadManager.reset()
wakeupManager.reset()
wakeupLocked = false
wakeupFlag = false
wakeupDiscardEndTime = 0
idleTimer = 0
vadEndPending = false
Log.d(TAG, "已重置, 状态变为 WAIT_WAKEUP")
}
fun release() {
@ -121,69 +145,56 @@ class VoiceController(
idleTimer = 0
isPlaying = false
state = VoiceState.WAIT_WAKEUP
wakeupLocked = false
wakeupFlag = false
wakeupDiscardEndTime = 0
vadEndPending = false
}
/* ================= 内部逻辑 ================= */
private fun startWaitSpeech() {
state = VoiceState.WAIT_SPEECH
audioBuffer.clear()
idleTimer = System.currentTimeMillis()
private fun playLocalPrompt() {
onPlayStartPrompt()
// 在这里播放 "我在" 音频,播放结束后必须调用 onPlayEndPrompt()
}
private fun onVadSpeechStart() {
vadEndPending = false
Log.d("VoiceController", "VAD开始, 当前状态=$state, wakeupFlag=$wakeupFlag")
// 如果是唤醒词音频则丢弃
if (wakeupFlag) {
wakeupFlag = false
Log.d("VoiceController", "丢弃唤醒词音频")
return
}
val now = System.currentTimeMillis()
Log.d(TAG, "VAD开始, 当前状态=$state, wakeupFlag=$wakeupFlag")
if (state != VoiceState.WAIT_SPEECH) return
if (wakeupFlag && now < wakeupDiscardEndTime) {
Log.d(TAG, "丢弃唤醒词残留音频")
return
}
state = VoiceState.RECORDING
audioBuffer.clear()
audioBuffer.addAll(preBuffer)
idleTimer = System.currentTimeMillis()
Log.d("VoiceController", "开始录音, 状态变为=$state")
idleTimer = now
Log.d(TAG, "开始录音, 状态变为 RECORDING")
}
private fun onVadSpeechEnd() {
if (state != VoiceState.RECORDING) return
vadEndPending = true
vadEndTime = System.currentTimeMillis()
Log.d("VoiceController", "VAD结束, 等待尾部静音")
Log.d(TAG, "VAD结束, 等待尾部静音")
}
private fun finishSentence() {
vadEndPending = false
state = VoiceState.WAIT_WAKEUP
wakeupLocked = false
val finalAudio = audioBuffer.toFloatArray()
audioBuffer.clear()
if (finalAudio.isNotEmpty()) {
onFinalAudio(finalAudio)
}
if (finalAudio.isNotEmpty()) onFinalAudio(finalAudio)
idleTimer = 0
Log.d("VoiceController", "录音结束, 返回 WAIT_WAKEUP")
Log.d(TAG, "录音结束, 返回 WAIT_WAKEUP")
}
private fun cachePreBuffer(samples: FloatArray) {
for (s in samples) {
preBuffer.addLast(s)
if (preBuffer.size > PRE_BUFFER_SIZE) {
preBuffer.removeFirst()
}
if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
}
}
}

View File

@ -6,8 +6,9 @@ package com.zs.smarthuman.sherpa
* @date: 2025/12/17 10:20
*/
enum class VoiceState {
WAIT_WAKEUP,
WAIT_SPEECH,
RECORDING,
PLAYING
WAIT_WAKEUP, // 等待唤醒
PLAYING_PROMPT, // 播放本地提示音
WAIT_SPEECH, // 等待用户说话
RECORDING, // 用户正在说话
PLAYING_BACKEND // 播放后台返回音频
}

View File

@ -183,9 +183,11 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
VoiceState.WAIT_SPEECH -> Log.d("lrs", "当前状态: 唤醒成功,等待说话")
VoiceState.RECORDING -> Log.d("lrs", "当前状态: 正在录音")
VoiceState.PLAYING -> Log.d("lrs", "当前状态: Unity 播放中")
VoiceState.PLAYING_PROMPT -> Log.d("lrs", "当前状态: 播放本地音频")
VoiceState.PLAYING_BACKEND -> Log.d("lrs", "当前状态: 播放后台音频")
}
}
},
)
// 初始化唤醒词
voiceController?.initWakeup("x iǎo z ì @小智")
@ -266,10 +268,10 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
private fun processAudio() {
val interval = 0.1
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
// val interval = 0.1
// val bufferSize = (interval * sampleRateInHz).toInt() // in samples
// val buffer = ShortArray(bufferSize)
// val bufferSize = 512 // in samples
val bufferSize = 512 // in samples
val buffer = ShortArray(bufferSize)
while (isRecording) {
val ret = audioRecord?.read(buffer, 0, buffer.size) ?: 0
@ -297,15 +299,26 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
word: String,
audioUrl: String
) {
if (state == 1 && audioUrl == "https://static.seerteach.net/largemodel/smart_read_audio/intensive_reading/689450143596d58606a106e5/689450143596d58606a106e5_1.mp3") {
if (state == 1) {
if (!isPlayed) {
isPlayed = true
voiceController?.onPlayStart()
if (audioUrl == UserInfoManager.userInfo?.wakeUpAudioUrl){
voiceController?.onPlayStartPrompt()
}else{
voiceController?.onPlayStartBackend()
}
}
}
if (state == 3&& audioUrl == "https://static.seerteach.net/largemodel/smart_read_audio/intensive_reading/689450143596d58606a106e5/689450143596d58606a106e5_1.mp3") {
voiceController?.onPlayEnd()
if (state == 3){
if (audioUrl == UserInfoManager.userInfo?.wakeUpAudioUrl){
voiceController?.onPlayEndPrompt()
}else{
voiceController?.onPlayEndBackend()
}
isPlayed = false
}
}