临时提交
This commit is contained in:
parent
f7100ddc93
commit
b6423678a2
@ -3,6 +3,7 @@ package com.zs.smarthuman.sherpa
|
|||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
|
|
||||||
|
|
||||||
class VoiceController(
|
class VoiceController(
|
||||||
assetManager: AssetManager,
|
assetManager: AssetManager,
|
||||||
private val onWakeup: () -> Unit,
|
private val onWakeup: () -> Unit,
|
||||||
@ -10,23 +11,25 @@ class VoiceController(
|
|||||||
private val idleTimeoutSeconds: Int = 15,
|
private val idleTimeoutSeconds: Int = 15,
|
||||||
private val onStateChanged: ((VoiceState) -> Unit)? = null
|
private val onStateChanged: ((VoiceState) -> Unit)? = null
|
||||||
) {
|
) {
|
||||||
|
private val TAG = "VoiceController"
|
||||||
|
|
||||||
private var state: VoiceState = VoiceState.WAIT_WAKEUP
|
private var state: VoiceState = VoiceState.WAIT_WAKEUP
|
||||||
set(value) {
|
set(value) {
|
||||||
field = value
|
field = value
|
||||||
onStateChanged?.invoke(value)
|
onStateChanged?.invoke(value)
|
||||||
Log.d("VoiceController", "当前状态: $value")
|
Log.d(TAG, "当前状态: $value")
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ================= 唤醒 ================= */
|
/** ================= 唤醒 ================= */
|
||||||
private var wakeupLocked = false
|
|
||||||
private var wakeupFlag = false
|
private var wakeupFlag = false
|
||||||
|
private var wakeupDiscardEndTime = 0L
|
||||||
|
private val WAKEUP_DISCARD_BUFFER_MS = 200L // 提示音前后缓冲
|
||||||
|
|
||||||
private val wakeupManager = WakeupManager(assetManager) {
|
private val wakeupManager = WakeupManager(assetManager) {
|
||||||
Log.d("VoiceController", "唤醒触发! wakeupFlag=$wakeupFlag, 当前状态=$state")
|
Log.d(TAG, "唤醒触发! 当前状态=$state")
|
||||||
wakeupFlag = true
|
wakeupFlag = true
|
||||||
onWakeup()
|
onWakeup()
|
||||||
startWaitSpeech()
|
playLocalPrompt()
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ================= VAD ================= */
|
/** ================= VAD ================= */
|
||||||
@ -38,63 +41,83 @@ class VoiceController(
|
|||||||
|
|
||||||
/** ================= 音频缓存 ================= */
|
/** ================= 音频缓存 ================= */
|
||||||
private val audioBuffer = mutableListOf<Float>()
|
private val audioBuffer = mutableListOf<Float>()
|
||||||
private val preBuffer = ArrayDeque<Float>() // pre-roll 1 秒
|
private val preBuffer = ArrayDeque<Float>() // pre-roll 1 秒
|
||||||
private val PRE_BUFFER_SIZE = 16000
|
private val PRE_BUFFER_SIZE = 16000
|
||||||
private var idleTimer = 0L
|
private var idleTimer = 0L
|
||||||
|
|
||||||
var isPlaying = false
|
|
||||||
private set
|
|
||||||
|
|
||||||
/** ================= 尾部静音控制 ================= */
|
/** ================= 尾部静音控制 ================= */
|
||||||
private var vadEndPending = false
|
private var vadEndPending = false
|
||||||
private var vadEndTime = 0L
|
private var vadEndTime = 0L
|
||||||
private val END_SILENCE_MS = 1000L
|
private val END_SILENCE_MS = 1000L
|
||||||
|
|
||||||
/* ================= 公共接口 ================= */
|
var isPlaying = false
|
||||||
|
private set
|
||||||
|
|
||||||
|
/** ================= 公共接口 ================= */
|
||||||
fun initWakeup(keywords: String) {
|
fun initWakeup(keywords: String) {
|
||||||
wakeupManager.initStream(keywords)
|
wakeupManager.initStream(keywords)
|
||||||
onStateChanged?.invoke(state)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fun acceptAudio(samples: FloatArray) {
|
fun acceptAudio(samples: FloatArray) {
|
||||||
cachePreBuffer(samples)
|
cachePreBuffer(samples)
|
||||||
|
|
||||||
// KWS 永远运行
|
|
||||||
wakeupManager.acceptAudio(samples)
|
wakeupManager.acceptAudio(samples)
|
||||||
|
|
||||||
|
val now = System.currentTimeMillis()
|
||||||
|
|
||||||
|
// 播放本地提示音或后台音频期间,不喂 VAD
|
||||||
|
if (state == VoiceState.PLAYING_PROMPT || state == VoiceState.PLAYING_BACKEND) return
|
||||||
|
|
||||||
|
val discardAudio = now < wakeupDiscardEndTime
|
||||||
|
|
||||||
when (state) {
|
when (state) {
|
||||||
VoiceState.WAIT_WAKEUP -> Unit
|
VoiceState.WAIT_WAKEUP -> Unit
|
||||||
|
VoiceState.WAIT_SPEECH -> if (!discardAudio) vadManager.accept(samples)
|
||||||
VoiceState.WAIT_SPEECH -> vadManager.accept(samples)
|
VoiceState.RECORDING -> if (!discardAudio) {
|
||||||
|
|
||||||
VoiceState.RECORDING -> {
|
|
||||||
audioBuffer.addAll(samples.asList())
|
audioBuffer.addAll(samples.asList())
|
||||||
vadManager.accept(samples)
|
vadManager.accept(samples)
|
||||||
idleTimer = System.currentTimeMillis()
|
idleTimer = now
|
||||||
|
if (vadEndPending && now - vadEndTime >= END_SILENCE_MS) {
|
||||||
if (vadEndPending && System.currentTimeMillis() - vadEndTime >= END_SILENCE_MS) {
|
|
||||||
finishSentence()
|
finishSentence()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
VoiceState.PLAYING -> Unit
|
VoiceState.PLAYING_PROMPT -> {
|
||||||
|
|
||||||
|
}
|
||||||
|
VoiceState.PLAYING_BACKEND -> {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun onPlayStart() {
|
fun onPlayStartPrompt() {
|
||||||
isPlaying = true
|
isPlaying = true
|
||||||
state = VoiceState.PLAYING
|
state = VoiceState.PLAYING_PROMPT
|
||||||
|
Log.d(TAG, "播放提示音, 状态变为 PLAYING_PROMPT")
|
||||||
}
|
}
|
||||||
|
|
||||||
fun onPlayEnd() {
|
fun onPlayEndPrompt() {
|
||||||
isPlaying = false
|
isPlaying = false
|
||||||
if (state == VoiceState.WAIT_SPEECH) return
|
state = VoiceState.WAIT_SPEECH
|
||||||
reset()
|
idleTimer = System.currentTimeMillis()
|
||||||
|
// 设置 discardAudio 时间,丢弃提示音残留音频
|
||||||
|
wakeupDiscardEndTime = System.currentTimeMillis() + WAKEUP_DISCARD_BUFFER_MS
|
||||||
|
Log.d(TAG, "提示音播放结束, 状态变为 WAIT_SPEECH")
|
||||||
|
}
|
||||||
|
|
||||||
|
fun onPlayStartBackend() {
|
||||||
|
isPlaying = true
|
||||||
|
state = VoiceState.PLAYING_BACKEND
|
||||||
|
Log.d(TAG, "播放后台音频, 状态变为 PLAYING_BACKEND")
|
||||||
|
}
|
||||||
|
|
||||||
|
fun onPlayEndBackend() {
|
||||||
|
isPlaying = false
|
||||||
|
state = VoiceState.WAIT_WAKEUP
|
||||||
|
idleTimer = System.currentTimeMillis()
|
||||||
|
Log.d(TAG, "后台音频播放结束, 状态变为 WAIT_WAKEUP")
|
||||||
}
|
}
|
||||||
|
|
||||||
fun checkIdleTimeout() {
|
fun checkIdleTimeout() {
|
||||||
if (state != VoiceState.WAIT_SPEECH) return
|
if (state != VoiceState.WAIT_SPEECH) return
|
||||||
|
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
if (now - idleTimer > idleTimeoutSeconds * 1000) {
|
if (now - idleTimer > idleTimeoutSeconds * 1000) {
|
||||||
reset()
|
reset()
|
||||||
@ -107,10 +130,11 @@ class VoiceController(
|
|||||||
preBuffer.clear()
|
preBuffer.clear()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
wakeupManager.reset()
|
wakeupManager.reset()
|
||||||
wakeupLocked = false
|
|
||||||
wakeupFlag = false
|
wakeupFlag = false
|
||||||
|
wakeupDiscardEndTime = 0
|
||||||
idleTimer = 0
|
idleTimer = 0
|
||||||
vadEndPending = false
|
vadEndPending = false
|
||||||
|
Log.d(TAG, "已重置, 状态变为 WAIT_WAKEUP")
|
||||||
}
|
}
|
||||||
|
|
||||||
fun release() {
|
fun release() {
|
||||||
@ -121,69 +145,56 @@ class VoiceController(
|
|||||||
idleTimer = 0
|
idleTimer = 0
|
||||||
isPlaying = false
|
isPlaying = false
|
||||||
state = VoiceState.WAIT_WAKEUP
|
state = VoiceState.WAIT_WAKEUP
|
||||||
wakeupLocked = false
|
|
||||||
wakeupFlag = false
|
wakeupFlag = false
|
||||||
|
wakeupDiscardEndTime = 0
|
||||||
vadEndPending = false
|
vadEndPending = false
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 内部逻辑 ================= */
|
/* ================= 内部逻辑 ================= */
|
||||||
private fun startWaitSpeech() {
|
private fun playLocalPrompt() {
|
||||||
state = VoiceState.WAIT_SPEECH
|
onPlayStartPrompt()
|
||||||
audioBuffer.clear()
|
// 在这里播放 "我在" 音频,播放结束后必须调用 onPlayEndPrompt()
|
||||||
idleTimer = System.currentTimeMillis()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun onVadSpeechStart() {
|
private fun onVadSpeechStart() {
|
||||||
vadEndPending = false
|
vadEndPending = false
|
||||||
|
val now = System.currentTimeMillis()
|
||||||
Log.d("VoiceController", "VAD开始, 当前状态=$state, wakeupFlag=$wakeupFlag")
|
Log.d(TAG, "VAD开始, 当前状态=$state, wakeupFlag=$wakeupFlag")
|
||||||
|
|
||||||
// 如果是唤醒词音频则丢弃
|
|
||||||
if (wakeupFlag) {
|
|
||||||
wakeupFlag = false
|
|
||||||
Log.d("VoiceController", "丢弃唤醒词音频")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (state != VoiceState.WAIT_SPEECH) return
|
if (state != VoiceState.WAIT_SPEECH) return
|
||||||
|
if (wakeupFlag && now < wakeupDiscardEndTime) {
|
||||||
|
Log.d(TAG, "丢弃唤醒词残留音频")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
state = VoiceState.RECORDING
|
state = VoiceState.RECORDING
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
audioBuffer.addAll(preBuffer)
|
audioBuffer.addAll(preBuffer)
|
||||||
idleTimer = System.currentTimeMillis()
|
idleTimer = now
|
||||||
|
Log.d(TAG, "开始录音, 状态变为 RECORDING")
|
||||||
Log.d("VoiceController", "开始录音, 状态变为=$state")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun onVadSpeechEnd() {
|
private fun onVadSpeechEnd() {
|
||||||
if (state != VoiceState.RECORDING) return
|
if (state != VoiceState.RECORDING) return
|
||||||
vadEndPending = true
|
vadEndPending = true
|
||||||
vadEndTime = System.currentTimeMillis()
|
vadEndTime = System.currentTimeMillis()
|
||||||
Log.d("VoiceController", "VAD结束, 等待尾部静音")
|
Log.d(TAG, "VAD结束, 等待尾部静音")
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun finishSentence() {
|
private fun finishSentence() {
|
||||||
vadEndPending = false
|
vadEndPending = false
|
||||||
state = VoiceState.WAIT_WAKEUP
|
state = VoiceState.WAIT_WAKEUP
|
||||||
wakeupLocked = false
|
|
||||||
|
|
||||||
val finalAudio = audioBuffer.toFloatArray()
|
val finalAudio = audioBuffer.toFloatArray()
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
|
if (finalAudio.isNotEmpty()) onFinalAudio(finalAudio)
|
||||||
if (finalAudio.isNotEmpty()) {
|
|
||||||
onFinalAudio(finalAudio)
|
|
||||||
}
|
|
||||||
|
|
||||||
idleTimer = 0
|
idleTimer = 0
|
||||||
Log.d("VoiceController", "录音结束, 返回 WAIT_WAKEUP")
|
Log.d(TAG, "录音结束, 返回 WAIT_WAKEUP")
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun cachePreBuffer(samples: FloatArray) {
|
private fun cachePreBuffer(samples: FloatArray) {
|
||||||
for (s in samples) {
|
for (s in samples) {
|
||||||
preBuffer.addLast(s)
|
preBuffer.addLast(s)
|
||||||
if (preBuffer.size > PRE_BUFFER_SIZE) {
|
if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
|
||||||
preBuffer.removeFirst()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,8 +6,9 @@ package com.zs.smarthuman.sherpa
|
|||||||
* @date: 2025/12/17 10:20
|
* @date: 2025/12/17 10:20
|
||||||
*/
|
*/
|
||||||
enum class VoiceState {
|
enum class VoiceState {
|
||||||
WAIT_WAKEUP,
|
WAIT_WAKEUP, // 等待唤醒
|
||||||
WAIT_SPEECH,
|
PLAYING_PROMPT, // 播放本地提示音
|
||||||
RECORDING,
|
WAIT_SPEECH, // 等待用户说话
|
||||||
PLAYING
|
RECORDING, // 用户正在说话
|
||||||
|
PLAYING_BACKEND // 播放后台返回音频
|
||||||
}
|
}
|
||||||
@ -183,9 +183,11 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
|
|
||||||
VoiceState.WAIT_SPEECH -> Log.d("lrs", "当前状态: 唤醒成功,等待说话")
|
VoiceState.WAIT_SPEECH -> Log.d("lrs", "当前状态: 唤醒成功,等待说话")
|
||||||
VoiceState.RECORDING -> Log.d("lrs", "当前状态: 正在录音")
|
VoiceState.RECORDING -> Log.d("lrs", "当前状态: 正在录音")
|
||||||
VoiceState.PLAYING -> Log.d("lrs", "当前状态: Unity 播放中")
|
VoiceState.PLAYING_PROMPT -> Log.d("lrs", "当前状态: 播放本地音频")
|
||||||
|
VoiceState.PLAYING_BACKEND -> Log.d("lrs", "当前状态: 播放后台音频")
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
// 初始化唤醒词
|
// 初始化唤醒词
|
||||||
voiceController?.initWakeup("x iǎo z ì @小智")
|
voiceController?.initWakeup("x iǎo z ì @小智")
|
||||||
@ -266,10 +268,10 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
|
|
||||||
|
|
||||||
private fun processAudio() {
|
private fun processAudio() {
|
||||||
val interval = 0.1
|
// val interval = 0.1
|
||||||
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
|
// val bufferSize = (interval * sampleRateInHz).toInt() // in samples
|
||||||
// val buffer = ShortArray(bufferSize)
|
// val buffer = ShortArray(bufferSize)
|
||||||
// val bufferSize = 512 // in samples
|
val bufferSize = 512 // in samples
|
||||||
val buffer = ShortArray(bufferSize)
|
val buffer = ShortArray(bufferSize)
|
||||||
while (isRecording) {
|
while (isRecording) {
|
||||||
val ret = audioRecord?.read(buffer, 0, buffer.size) ?: 0
|
val ret = audioRecord?.read(buffer, 0, buffer.size) ?: 0
|
||||||
@ -297,15 +299,26 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
word: String,
|
word: String,
|
||||||
audioUrl: String
|
audioUrl: String
|
||||||
) {
|
) {
|
||||||
if (state == 1 && audioUrl == "https://static.seerteach.net/largemodel/smart_read_audio/intensive_reading/689450143596d58606a106e5/689450143596d58606a106e5_1.mp3") {
|
if (state == 1) {
|
||||||
if (!isPlayed) {
|
if (!isPlayed) {
|
||||||
isPlayed = true
|
isPlayed = true
|
||||||
voiceController?.onPlayStart()
|
if (audioUrl == UserInfoManager.userInfo?.wakeUpAudioUrl){
|
||||||
|
voiceController?.onPlayStartPrompt()
|
||||||
|
}else{
|
||||||
|
voiceController?.onPlayStartBackend()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if (state == 3&& audioUrl == "https://static.seerteach.net/largemodel/smart_read_audio/intensive_reading/689450143596d58606a106e5/689450143596d58606a106e5_1.mp3") {
|
|
||||||
voiceController?.onPlayEnd()
|
if (state == 3){
|
||||||
|
if (audioUrl == UserInfoManager.userInfo?.wakeUpAudioUrl){
|
||||||
|
voiceController?.onPlayEndPrompt()
|
||||||
|
|
||||||
|
}else{
|
||||||
|
voiceController?.onPlayEndBackend()
|
||||||
|
}
|
||||||
|
|
||||||
isPlayed = false
|
isPlayed = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user