临时提交
This commit is contained in:
parent
f7100ddc93
commit
b6423678a2
@ -3,6 +3,7 @@ package com.zs.smarthuman.sherpa
|
||||
import android.content.res.AssetManager
|
||||
import android.util.Log
|
||||
|
||||
|
||||
class VoiceController(
|
||||
assetManager: AssetManager,
|
||||
private val onWakeup: () -> Unit,
|
||||
@ -10,23 +11,25 @@ class VoiceController(
|
||||
private val idleTimeoutSeconds: Int = 15,
|
||||
private val onStateChanged: ((VoiceState) -> Unit)? = null
|
||||
) {
|
||||
private val TAG = "VoiceController"
|
||||
|
||||
private var state: VoiceState = VoiceState.WAIT_WAKEUP
|
||||
set(value) {
|
||||
field = value
|
||||
onStateChanged?.invoke(value)
|
||||
Log.d("VoiceController", "当前状态: $value")
|
||||
Log.d(TAG, "当前状态: $value")
|
||||
}
|
||||
|
||||
/** ================= 唤醒 ================= */
|
||||
private var wakeupLocked = false
|
||||
private var wakeupFlag = false
|
||||
private var wakeupDiscardEndTime = 0L
|
||||
private val WAKEUP_DISCARD_BUFFER_MS = 200L // 提示音前后缓冲
|
||||
|
||||
private val wakeupManager = WakeupManager(assetManager) {
|
||||
Log.d("VoiceController", "唤醒触发! wakeupFlag=$wakeupFlag, 当前状态=$state")
|
||||
Log.d(TAG, "唤醒触发! 当前状态=$state")
|
||||
wakeupFlag = true
|
||||
onWakeup()
|
||||
startWaitSpeech()
|
||||
playLocalPrompt()
|
||||
}
|
||||
|
||||
/** ================= VAD ================= */
|
||||
@ -42,59 +45,79 @@ class VoiceController(
|
||||
private val PRE_BUFFER_SIZE = 16000
|
||||
private var idleTimer = 0L
|
||||
|
||||
var isPlaying = false
|
||||
private set
|
||||
|
||||
/** ================= 尾部静音控制 ================= */
|
||||
private var vadEndPending = false
|
||||
private var vadEndTime = 0L
|
||||
private val END_SILENCE_MS = 1000L
|
||||
|
||||
/* ================= 公共接口 ================= */
|
||||
var isPlaying = false
|
||||
private set
|
||||
|
||||
/** ================= 公共接口 ================= */
|
||||
fun initWakeup(keywords: String) {
|
||||
wakeupManager.initStream(keywords)
|
||||
onStateChanged?.invoke(state)
|
||||
}
|
||||
|
||||
fun acceptAudio(samples: FloatArray) {
|
||||
cachePreBuffer(samples)
|
||||
|
||||
// KWS 永远运行
|
||||
wakeupManager.acceptAudio(samples)
|
||||
|
||||
val now = System.currentTimeMillis()
|
||||
|
||||
// 播放本地提示音或后台音频期间,不喂 VAD
|
||||
if (state == VoiceState.PLAYING_PROMPT || state == VoiceState.PLAYING_BACKEND) return
|
||||
|
||||
val discardAudio = now < wakeupDiscardEndTime
|
||||
|
||||
when (state) {
|
||||
VoiceState.WAIT_WAKEUP -> Unit
|
||||
|
||||
VoiceState.WAIT_SPEECH -> vadManager.accept(samples)
|
||||
|
||||
VoiceState.RECORDING -> {
|
||||
VoiceState.WAIT_SPEECH -> if (!discardAudio) vadManager.accept(samples)
|
||||
VoiceState.RECORDING -> if (!discardAudio) {
|
||||
audioBuffer.addAll(samples.asList())
|
||||
vadManager.accept(samples)
|
||||
idleTimer = System.currentTimeMillis()
|
||||
|
||||
if (vadEndPending && System.currentTimeMillis() - vadEndTime >= END_SILENCE_MS) {
|
||||
idleTimer = now
|
||||
if (vadEndPending && now - vadEndTime >= END_SILENCE_MS) {
|
||||
finishSentence()
|
||||
}
|
||||
}
|
||||
|
||||
VoiceState.PLAYING -> Unit
|
||||
VoiceState.PLAYING_PROMPT -> {
|
||||
|
||||
}
|
||||
VoiceState.PLAYING_BACKEND -> {}
|
||||
}
|
||||
}
|
||||
|
||||
fun onPlayStart() {
|
||||
fun onPlayStartPrompt() {
|
||||
isPlaying = true
|
||||
state = VoiceState.PLAYING
|
||||
state = VoiceState.PLAYING_PROMPT
|
||||
Log.d(TAG, "播放提示音, 状态变为 PLAYING_PROMPT")
|
||||
}
|
||||
|
||||
fun onPlayEnd() {
|
||||
fun onPlayEndPrompt() {
|
||||
isPlaying = false
|
||||
if (state == VoiceState.WAIT_SPEECH) return
|
||||
reset()
|
||||
state = VoiceState.WAIT_SPEECH
|
||||
idleTimer = System.currentTimeMillis()
|
||||
// 设置 discardAudio 时间,丢弃提示音残留音频
|
||||
wakeupDiscardEndTime = System.currentTimeMillis() + WAKEUP_DISCARD_BUFFER_MS
|
||||
Log.d(TAG, "提示音播放结束, 状态变为 WAIT_SPEECH")
|
||||
}
|
||||
|
||||
fun onPlayStartBackend() {
|
||||
isPlaying = true
|
||||
state = VoiceState.PLAYING_BACKEND
|
||||
Log.d(TAG, "播放后台音频, 状态变为 PLAYING_BACKEND")
|
||||
}
|
||||
|
||||
fun onPlayEndBackend() {
|
||||
isPlaying = false
|
||||
state = VoiceState.WAIT_WAKEUP
|
||||
idleTimer = System.currentTimeMillis()
|
||||
Log.d(TAG, "后台音频播放结束, 状态变为 WAIT_WAKEUP")
|
||||
}
|
||||
|
||||
fun checkIdleTimeout() {
|
||||
if (state != VoiceState.WAIT_SPEECH) return
|
||||
|
||||
val now = System.currentTimeMillis()
|
||||
if (now - idleTimer > idleTimeoutSeconds * 1000) {
|
||||
reset()
|
||||
@ -107,10 +130,11 @@ class VoiceController(
|
||||
preBuffer.clear()
|
||||
vadManager.reset()
|
||||
wakeupManager.reset()
|
||||
wakeupLocked = false
|
||||
wakeupFlag = false
|
||||
wakeupDiscardEndTime = 0
|
||||
idleTimer = 0
|
||||
vadEndPending = false
|
||||
Log.d(TAG, "已重置, 状态变为 WAIT_WAKEUP")
|
||||
}
|
||||
|
||||
fun release() {
|
||||
@ -121,69 +145,56 @@ class VoiceController(
|
||||
idleTimer = 0
|
||||
isPlaying = false
|
||||
state = VoiceState.WAIT_WAKEUP
|
||||
wakeupLocked = false
|
||||
wakeupFlag = false
|
||||
wakeupDiscardEndTime = 0
|
||||
vadEndPending = false
|
||||
}
|
||||
|
||||
/* ================= 内部逻辑 ================= */
|
||||
private fun startWaitSpeech() {
|
||||
state = VoiceState.WAIT_SPEECH
|
||||
audioBuffer.clear()
|
||||
idleTimer = System.currentTimeMillis()
|
||||
private fun playLocalPrompt() {
|
||||
onPlayStartPrompt()
|
||||
// 在这里播放 "我在" 音频,播放结束后必须调用 onPlayEndPrompt()
|
||||
}
|
||||
|
||||
private fun onVadSpeechStart() {
|
||||
vadEndPending = false
|
||||
|
||||
Log.d("VoiceController", "VAD开始, 当前状态=$state, wakeupFlag=$wakeupFlag")
|
||||
|
||||
// 如果是唤醒词音频则丢弃
|
||||
if (wakeupFlag) {
|
||||
wakeupFlag = false
|
||||
Log.d("VoiceController", "丢弃唤醒词音频")
|
||||
return
|
||||
}
|
||||
val now = System.currentTimeMillis()
|
||||
Log.d(TAG, "VAD开始, 当前状态=$state, wakeupFlag=$wakeupFlag")
|
||||
|
||||
if (state != VoiceState.WAIT_SPEECH) return
|
||||
if (wakeupFlag && now < wakeupDiscardEndTime) {
|
||||
Log.d(TAG, "丢弃唤醒词残留音频")
|
||||
return
|
||||
}
|
||||
|
||||
state = VoiceState.RECORDING
|
||||
audioBuffer.clear()
|
||||
audioBuffer.addAll(preBuffer)
|
||||
idleTimer = System.currentTimeMillis()
|
||||
|
||||
Log.d("VoiceController", "开始录音, 状态变为=$state")
|
||||
idleTimer = now
|
||||
Log.d(TAG, "开始录音, 状态变为 RECORDING")
|
||||
}
|
||||
|
||||
private fun onVadSpeechEnd() {
|
||||
if (state != VoiceState.RECORDING) return
|
||||
vadEndPending = true
|
||||
vadEndTime = System.currentTimeMillis()
|
||||
Log.d("VoiceController", "VAD结束, 等待尾部静音")
|
||||
Log.d(TAG, "VAD结束, 等待尾部静音")
|
||||
}
|
||||
|
||||
private fun finishSentence() {
|
||||
vadEndPending = false
|
||||
state = VoiceState.WAIT_WAKEUP
|
||||
wakeupLocked = false
|
||||
|
||||
val finalAudio = audioBuffer.toFloatArray()
|
||||
audioBuffer.clear()
|
||||
|
||||
if (finalAudio.isNotEmpty()) {
|
||||
onFinalAudio(finalAudio)
|
||||
}
|
||||
|
||||
if (finalAudio.isNotEmpty()) onFinalAudio(finalAudio)
|
||||
idleTimer = 0
|
||||
Log.d("VoiceController", "录音结束, 返回 WAIT_WAKEUP")
|
||||
Log.d(TAG, "录音结束, 返回 WAIT_WAKEUP")
|
||||
}
|
||||
|
||||
private fun cachePreBuffer(samples: FloatArray) {
|
||||
for (s in samples) {
|
||||
preBuffer.addLast(s)
|
||||
if (preBuffer.size > PRE_BUFFER_SIZE) {
|
||||
preBuffer.removeFirst()
|
||||
}
|
||||
if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -6,8 +6,9 @@ package com.zs.smarthuman.sherpa
|
||||
* @date: 2025/12/17 10:20
|
||||
*/
|
||||
enum class VoiceState {
|
||||
WAIT_WAKEUP,
|
||||
WAIT_SPEECH,
|
||||
RECORDING,
|
||||
PLAYING
|
||||
WAIT_WAKEUP, // 等待唤醒
|
||||
PLAYING_PROMPT, // 播放本地提示音
|
||||
WAIT_SPEECH, // 等待用户说话
|
||||
RECORDING, // 用户正在说话
|
||||
PLAYING_BACKEND // 播放后台返回音频
|
||||
}
|
||||
@ -183,9 +183,11 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
|
||||
VoiceState.WAIT_SPEECH -> Log.d("lrs", "当前状态: 唤醒成功,等待说话")
|
||||
VoiceState.RECORDING -> Log.d("lrs", "当前状态: 正在录音")
|
||||
VoiceState.PLAYING -> Log.d("lrs", "当前状态: Unity 播放中")
|
||||
}
|
||||
VoiceState.PLAYING_PROMPT -> Log.d("lrs", "当前状态: 播放本地音频")
|
||||
VoiceState.PLAYING_BACKEND -> Log.d("lrs", "当前状态: 播放后台音频")
|
||||
}
|
||||
},
|
||||
|
||||
)
|
||||
// 初始化唤醒词
|
||||
voiceController?.initWakeup("x iǎo z ì @小智")
|
||||
@ -266,10 +268,10 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
|
||||
|
||||
private fun processAudio() {
|
||||
val interval = 0.1
|
||||
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
|
||||
// val interval = 0.1
|
||||
// val bufferSize = (interval * sampleRateInHz).toInt() // in samples
|
||||
// val buffer = ShortArray(bufferSize)
|
||||
// val bufferSize = 512 // in samples
|
||||
val bufferSize = 512 // in samples
|
||||
val buffer = ShortArray(bufferSize)
|
||||
while (isRecording) {
|
||||
val ret = audioRecord?.read(buffer, 0, buffer.size) ?: 0
|
||||
@ -297,15 +299,26 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
word: String,
|
||||
audioUrl: String
|
||||
) {
|
||||
if (state == 1 && audioUrl == "https://static.seerteach.net/largemodel/smart_read_audio/intensive_reading/689450143596d58606a106e5/689450143596d58606a106e5_1.mp3") {
|
||||
if (state == 1) {
|
||||
if (!isPlayed) {
|
||||
isPlayed = true
|
||||
voiceController?.onPlayStart()
|
||||
if (audioUrl == UserInfoManager.userInfo?.wakeUpAudioUrl){
|
||||
voiceController?.onPlayStartPrompt()
|
||||
}else{
|
||||
voiceController?.onPlayStartBackend()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if (state == 3&& audioUrl == "https://static.seerteach.net/largemodel/smart_read_audio/intensive_reading/689450143596d58606a106e5/689450143596d58606a106e5_1.mp3") {
|
||||
voiceController?.onPlayEnd()
|
||||
|
||||
if (state == 3){
|
||||
if (audioUrl == UserInfoManager.userInfo?.wakeUpAudioUrl){
|
||||
voiceController?.onPlayEndPrompt()
|
||||
|
||||
}else{
|
||||
voiceController?.onPlayEndBackend()
|
||||
}
|
||||
|
||||
isPlayed = false
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user