优化后的代码
This commit is contained in:
parent
997bfe0539
commit
f8812b6a48
@ -180,6 +180,6 @@ dependencies {
|
|||||||
|
|
||||||
implementation libs.androidautosize
|
implementation libs.androidautosize
|
||||||
|
|
||||||
implementation files('libs/sherpa19.aar')
|
implementation files('libs/sherpa-onnx-1.12.20.aar')
|
||||||
|
|
||||||
}
|
}
|
||||||
Binary file not shown.
@ -52,7 +52,7 @@
|
|||||||
tools:targetApi="31">
|
tools:targetApi="31">
|
||||||
|
|
||||||
<activity
|
<activity
|
||||||
android:name=".ui.SplashActivity"
|
android:name=".ui.MainActivity"
|
||||||
android:exported="true"
|
android:exported="true"
|
||||||
android:theme="@style/Theme.Splash"
|
android:theme="@style/Theme.Splash"
|
||||||
android:screenOrientation="portrait">
|
android:screenOrientation="portrait">
|
||||||
@ -66,9 +66,9 @@
|
|||||||
</intent-filter>
|
</intent-filter>
|
||||||
</activity>
|
</activity>
|
||||||
|
|
||||||
<activity
|
<!--<activity
|
||||||
android:name="com.zs.smarthuman.ui.MainActivity"
|
android:name="com.zs.smarthuman.ui.MainActivity"
|
||||||
android:screenOrientation="portrait"/>
|
android:screenOrientation="portrait"/>-->
|
||||||
<activity
|
<activity
|
||||||
android:name="com.zs.smarthuman.ui.ActivateActivity"
|
android:name="com.zs.smarthuman.ui.ActivateActivity"
|
||||||
android:screenOrientation="portrait"/>
|
android:screenOrientation="portrait"/>
|
||||||
|
|||||||
@ -10,42 +10,63 @@ import com.k2fsa.sherpa.onnx.getVadModelConfig
|
|||||||
* @date: 2025/12/17 10:22
|
* @date: 2025/12/17 10:22
|
||||||
*/
|
*/
|
||||||
class VadManager(
|
class VadManager(
|
||||||
private val assetManager: AssetManager,
|
assetManager: AssetManager,
|
||||||
private val onSpeechStart: () -> Unit,
|
private val onSpeechStart: () -> Unit,
|
||||||
private val onSpeechEnd: () -> Unit
|
private val onSpeechEnd: () -> Unit
|
||||||
) {
|
) {
|
||||||
private val vad: Vad
|
private val vad: Vad
|
||||||
|
|
||||||
private var isSpeaking = false
|
private var isSpeaking = false
|
||||||
|
private var lastSpeechTime = 0L
|
||||||
|
|
||||||
|
// ⭐ 统计用
|
||||||
|
private var speechFrameCount = 0
|
||||||
|
private var totalFrameCount = 0
|
||||||
|
|
||||||
|
private val END_SILENCE_MS = 600L
|
||||||
|
|
||||||
init {
|
init {
|
||||||
val config = getVadModelConfig(0)
|
val config = getVadModelConfig(1)
|
||||||
if (config == null) {
|
?: throw IllegalStateException("VAD config not found")
|
||||||
throw IllegalStateException("VAD config not found")
|
vad = Vad(assetManager, config)
|
||||||
}
|
|
||||||
vad = Vad(assetManager = assetManager, config = config)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 喂入音频帧 (16kHz PCM float) */
|
|
||||||
fun accept(samples: FloatArray) {
|
fun accept(samples: FloatArray) {
|
||||||
vad.acceptWaveform(samples)
|
val now = System.currentTimeMillis()
|
||||||
val speechDetected = vad.isSpeechDetected()
|
|
||||||
|
|
||||||
if (speechDetected && !isSpeaking) {
|
vad.acceptWaveform(samples)
|
||||||
isSpeaking = true
|
val hasSpeech = vad.isSpeechDetected()
|
||||||
onSpeechStart()
|
|
||||||
} else if (!speechDetected && isSpeaking) {
|
totalFrameCount++
|
||||||
isSpeaking = false
|
|
||||||
onSpeechEnd()
|
if (hasSpeech) {
|
||||||
// ⭐ 只在句子结束时清空 VAD
|
speechFrameCount++
|
||||||
vad.clear()
|
lastSpeechTime = now
|
||||||
|
|
||||||
|
if (!isSpeaking) {
|
||||||
|
isSpeaking = true
|
||||||
|
onSpeechStart()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (isSpeaking && now - lastSpeechTime >= END_SILENCE_MS) {
|
||||||
|
isSpeaking = false
|
||||||
|
onSpeechEnd()
|
||||||
|
vad.clear()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 👉 人声占比(真正用到 VAD 的地方) */
|
||||||
|
fun speechRatio(): Float {
|
||||||
|
if (totalFrameCount == 0) return 0f
|
||||||
|
return speechFrameCount.toFloat() / totalFrameCount
|
||||||
|
}
|
||||||
|
|
||||||
/** 重置内部状态 */
|
|
||||||
fun reset() {
|
fun reset() {
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
|
lastSpeechTime = 0
|
||||||
|
speechFrameCount = 0
|
||||||
|
totalFrameCount = 0
|
||||||
vad.reset()
|
vad.reset()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -8,15 +8,17 @@ class VoiceController(
|
|||||||
assetManager: AssetManager,
|
assetManager: AssetManager,
|
||||||
private val onWakeup: () -> Unit,
|
private val onWakeup: () -> Unit,
|
||||||
private val onFinalAudio: (FloatArray) -> Unit,
|
private val onFinalAudio: (FloatArray) -> Unit,
|
||||||
private val idleTimeoutSeconds: Int = 15,
|
private val idleTimeoutSeconds: Int = 5,
|
||||||
private val maxRecordingSeconds: Int = 10,
|
private val maxRecordingSeconds: Int = 10,
|
||||||
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
||||||
private val stopBackendAudio: (() -> Unit)? = null
|
private val stopBackendAudio: (() -> Unit)? = null
|
||||||
) {
|
) {
|
||||||
|
|
||||||
private val TAG = "VoiceController"
|
private val TAG = "VoiceController"
|
||||||
private val sampleRate = 16000
|
private val sampleRate = 16000
|
||||||
|
|
||||||
/* ================= 状态 ================= */
|
/* ================= 状态 ================= */
|
||||||
|
|
||||||
private var state: VoiceState = VoiceState.WAIT_WAKEUP
|
private var state: VoiceState = VoiceState.WAIT_WAKEUP
|
||||||
set(value) {
|
set(value) {
|
||||||
field = value
|
field = value
|
||||||
@ -24,160 +26,258 @@ class VoiceController(
|
|||||||
onStateChanged?.invoke(value)
|
onStateChanged?.invoke(value)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 唤醒 ================= */
|
/* ================= KWS ================= */
|
||||||
|
|
||||||
private val wakeupManager = WakeupManager(assetManager) {
|
private val wakeupManager = WakeupManager(assetManager) {
|
||||||
Log.d(TAG, "🔥 WakeWord detected")
|
Log.d(TAG, "🔥 WakeWord detected")
|
||||||
stopBackendAudio?.invoke()
|
handleWakeupEvent()
|
||||||
if (state != VoiceState.UPLOADING) { // 上传中不重置
|
|
||||||
resetAll()
|
|
||||||
state = VoiceState.PLAYING_PROMPT
|
|
||||||
}
|
|
||||||
onWakeup()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= VAD(只负责 START) ================= */
|
/* ================= VAD ================= */
|
||||||
|
|
||||||
private val vadManager = VadManager(
|
private val vadManager = VadManager(
|
||||||
assetManager,
|
assetManager,
|
||||||
onSpeechStart = { onVadStart() },
|
onSpeechStart = { onVadStart() },
|
||||||
onSpeechEnd = { /* 不再用于结束 */ }
|
onSpeechEnd = {}
|
||||||
)
|
)
|
||||||
|
|
||||||
/* ================= 音频缓存 ================= */
|
/* ================= Buffer ================= */
|
||||||
|
|
||||||
private val audioBuffer = mutableListOf<Float>()
|
private val audioBuffer = mutableListOf<Float>()
|
||||||
|
|
||||||
|
/** 前导音缓存(2 秒) */
|
||||||
private val preBuffer = ArrayDeque<Float>()
|
private val preBuffer = ArrayDeque<Float>()
|
||||||
private val PRE_BUFFER_SIZE = sampleRate // 1 秒预缓冲
|
private val PRE_BUFFER_SIZE = sampleRate * 2
|
||||||
|
|
||||||
/* ================= 时间 ================= */
|
/* ================= 时间 ================= */
|
||||||
private var idleTimer = 0L
|
|
||||||
private var recordingStartTime = 0L
|
private var recordingStartMs = 0L
|
||||||
|
private var silenceStartMs = 0L
|
||||||
|
|
||||||
|
/** ⭐ WAIT_SPEECH 连续失败起点(关键) */
|
||||||
|
private var waitSpeechFailStartMs = 0L
|
||||||
|
|
||||||
|
/* ================= 控制 ================= */
|
||||||
|
|
||||||
private var vadStarted = false
|
private var vadStarted = false
|
||||||
|
|
||||||
/* ================= RMS 静音判定 ================= */
|
/** 唤醒观察期 */
|
||||||
private var silenceStartMs = 0L
|
private var inKwsObserve = false
|
||||||
private val SILENCE_END_MS = 1200L // 静音多久算一句结束
|
private var kwsObserveStartMs = 0L
|
||||||
private val RMS_SILENCE_THRESHOLD = 0.005f // 更灵敏
|
private val KWS_OBSERVE_MS = 500L
|
||||||
private val MIN_SPEECH_DURATION_MS = 300L // 最短有效语音
|
|
||||||
private val MIN_SPEECH_RATIO = 0.15f // 有效帧占比至少 15%
|
/** 播放冷却 */
|
||||||
|
private var speechEnableAtMs = 0L
|
||||||
|
private val SPEECH_COOLDOWN_MS = 300L
|
||||||
|
|
||||||
|
/* ================= 阈值 ================= */
|
||||||
|
|
||||||
|
private val RMS_SILENCE_THRESHOLD = 0.005f
|
||||||
|
private val SILENCE_END_MS = 1200L
|
||||||
|
private val MIN_SPEECH_MS = 300L
|
||||||
|
|
||||||
/* ================= 音频入口 ================= */
|
/* ================= 音频入口 ================= */
|
||||||
|
|
||||||
fun acceptAudio(samples: FloatArray) {
|
fun acceptAudio(samples: FloatArray) {
|
||||||
// 唤醒独立处理,始终喂
|
|
||||||
|
cachePreBuffer(samples)
|
||||||
|
|
||||||
wakeupManager.acceptAudio(samples)
|
wakeupManager.acceptAudio(samples)
|
||||||
|
if (wakeupManager.consumeWakeupFlag()) {
|
||||||
if (state == VoiceState.UPLOADING ||
|
handleWakeupEvent()
|
||||||
state == VoiceState.PLAYING_PROMPT ||
|
|
||||||
state == VoiceState.PLAYING_BACKEND
|
|
||||||
) return
|
|
||||||
|
|
||||||
if (state == VoiceState.WAIT_SPEECH) {
|
|
||||||
cachePreBuffer(samples)
|
|
||||||
vadManager.accept(samples)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state != VoiceState.RECORDING) return
|
|
||||||
|
|
||||||
// ===== RECORDING =====
|
|
||||||
audioBuffer.addAll(samples.asList())
|
|
||||||
vadManager.accept(samples)
|
|
||||||
|
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
|
|
||||||
// 1️⃣ 最大录音兜底
|
when (state) {
|
||||||
if (now - recordingStartTime >= maxRecordingSeconds * 1000) {
|
|
||||||
Log.w(TAG, "⏱ Max recording reached")
|
|
||||||
finishSentence()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2️⃣ RMS 静音结束判定
|
VoiceState.WAIT_WAKEUP,
|
||||||
val rms = calcRms(samples)
|
VoiceState.PLAYING_PROMPT,
|
||||||
if (rms < RMS_SILENCE_THRESHOLD) {
|
VoiceState.PLAYING_BACKEND,
|
||||||
if (silenceStartMs == 0L) silenceStartMs = now
|
VoiceState.UPLOADING -> return
|
||||||
else if (now - silenceStartMs >= SILENCE_END_MS) {
|
|
||||||
Log.d(TAG, "🔇 RMS silence end")
|
VoiceState.WAIT_SPEECH_COOLDOWN -> {
|
||||||
finishSentence()
|
if (now >= speechEnableAtMs) {
|
||||||
|
state = VoiceState.WAIT_SPEECH
|
||||||
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
|
VoiceState.WAIT_SPEECH -> {
|
||||||
|
|
||||||
|
if (inKwsObserve) {
|
||||||
|
if (now - kwsObserveStartMs < KWS_OBSERVE_MS) return
|
||||||
|
inKwsObserve = false
|
||||||
|
}
|
||||||
|
|
||||||
|
vadManager.accept(samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
VoiceState.RECORDING -> {
|
||||||
|
|
||||||
|
audioBuffer.addAll(samples.asList())
|
||||||
|
vadManager.accept(samples)
|
||||||
|
|
||||||
|
if (now - recordingStartMs > maxRecordingSeconds * 1000) {
|
||||||
|
Log.w(TAG, "⏱ Max recording reached")
|
||||||
|
finishSentence()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
val rms = calcRms(samples)
|
||||||
|
if (rms < RMS_SILENCE_THRESHOLD) {
|
||||||
|
if (silenceStartMs == 0L) silenceStartMs = now
|
||||||
|
else if (now - silenceStartMs >= SILENCE_END_MS) {
|
||||||
|
Log.d(TAG, "🔇 RMS silence end")
|
||||||
|
finishSentence()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
silenceStartMs = 0L
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ================= 唤醒 ================= */
|
||||||
|
|
||||||
|
private fun handleWakeupEvent() {
|
||||||
|
when (state) {
|
||||||
|
|
||||||
|
VoiceState.UPLOADING -> return
|
||||||
|
|
||||||
|
VoiceState.RECORDING,
|
||||||
|
VoiceState.PLAYING_BACKEND -> {
|
||||||
|
stopBackendAudio?.invoke()
|
||||||
|
enterWakeup(interrupt = true)
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> enterWakeup(interrupt = false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun enterWakeup(interrupt: Boolean) {
|
||||||
|
|
||||||
|
if (interrupt) {
|
||||||
|
audioBuffer.clear()
|
||||||
|
vadManager.reset()
|
||||||
|
vadStarted = false
|
||||||
silenceStartMs = 0L
|
silenceStartMs = 0L
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inKwsObserve = true
|
||||||
|
kwsObserveStartMs = System.currentTimeMillis()
|
||||||
|
|
||||||
|
state = VoiceState.PLAYING_PROMPT
|
||||||
|
onWakeup()
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= VAD START ================= */
|
/* ================= VAD START ================= */
|
||||||
|
|
||||||
private fun onVadStart() {
|
private fun onVadStart() {
|
||||||
if (state != VoiceState.WAIT_SPEECH) return
|
if (state != VoiceState.WAIT_SPEECH) return
|
||||||
|
|
||||||
Log.d(TAG, "🎤 VAD START")
|
Log.d(TAG, "🎤 REAL VAD START")
|
||||||
|
|
||||||
vadStarted = true
|
vadStarted = true
|
||||||
state = VoiceState.RECORDING
|
recordingStartMs = System.currentTimeMillis()
|
||||||
recordingStartTime = System.currentTimeMillis()
|
|
||||||
silenceStartMs = 0L
|
silenceStartMs = 0L
|
||||||
|
|
||||||
|
audioBuffer.clear()
|
||||||
audioBuffer.addAll(preBuffer)
|
audioBuffer.addAll(preBuffer)
|
||||||
preBuffer.clear()
|
|
||||||
|
state = VoiceState.RECORDING
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 结束录音 ================= */
|
/* ================= 结束录音 ================= */
|
||||||
|
|
||||||
private fun finishSentence() {
|
private fun finishSentence() {
|
||||||
val speakTime = System.currentTimeMillis() - recordingStartTime
|
|
||||||
|
|
||||||
if (!vadStarted || speakTime < MIN_SPEECH_DURATION_MS) {
|
val duration = System.currentTimeMillis() - recordingStartMs
|
||||||
Log.d(TAG, "⛔ Speech too short, ignore")
|
if (!vadStarted || duration < MIN_SPEECH_MS) {
|
||||||
resetToWaitSpeech(refreshIdle = false)
|
resetToWaitSpeech()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
val rmsFrames = calcRmsFrames(audioBuffer.toFloatArray(), frameSize = 320)
|
val vadRatio = vadManager.speechRatio()
|
||||||
val validFrames = rmsFrames.count { it >= RMS_SILENCE_THRESHOLD }
|
Log.d(TAG, "🎙 VAD speech ratio=$vadRatio")
|
||||||
val ratio = if (rmsFrames.isEmpty()) 0f else validFrames.toFloat() / rmsFrames.size
|
|
||||||
Log.d(TAG, "RMS ratio=$ratio")
|
if (vadRatio < 0.25f) {
|
||||||
if (ratio < MIN_SPEECH_RATIO) {
|
Log.d(TAG, "❌ VAD says NOT human speech")
|
||||||
Log.d(TAG, "❌ Not enough human voice (ratio=$ratio)")
|
resetToWaitSpeech()
|
||||||
resetToWaitSpeech(refreshIdle = false)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ✅ 成功一次,清空失败计时
|
||||||
|
waitSpeechFailStartMs = 0L
|
||||||
|
|
||||||
val finalAudio = audioBuffer.toFloatArray()
|
val finalAudio = audioBuffer.toFloatArray()
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
|
|
||||||
state = VoiceState.UPLOADING
|
state = VoiceState.UPLOADING
|
||||||
Log.d(TAG, "⬆ Upload audio len=${finalAudio.size}")
|
|
||||||
onFinalAudio(finalAudio)
|
onFinalAudio(finalAudio)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 播放回调 ================= */
|
/* ================= 播放回调 ================= */
|
||||||
fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT }
|
|
||||||
fun onPlayEndPrompt() { state = VoiceState.WAIT_SPEECH; idleTimer = System.currentTimeMillis() }
|
|
||||||
fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND }
|
|
||||||
fun onPlayEndBackend() { state = VoiceState.WAIT_SPEECH; idleTimer = System.currentTimeMillis() }
|
|
||||||
|
|
||||||
/* ================= 上传回调 ================= */
|
fun onPlayStartPrompt() {
|
||||||
fun onUploadFinished(success: Boolean) {
|
state = VoiceState.PLAYING_PROMPT
|
||||||
if (state != VoiceState.UPLOADING) return
|
|
||||||
state = if (success) VoiceState.PLAYING_BACKEND else VoiceState.WAIT_SPEECH
|
|
||||||
idleTimer = System.currentTimeMillis()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= Idle ================= */
|
fun onPlayEndPrompt() {
|
||||||
|
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||||
|
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||||
|
}
|
||||||
|
|
||||||
|
fun onPlayStartBackend() {
|
||||||
|
state = VoiceState.PLAYING_BACKEND
|
||||||
|
}
|
||||||
|
|
||||||
|
fun onPlayEndBackend() {
|
||||||
|
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||||
|
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ================= 上传回调(保留 public) ================= */
|
||||||
|
|
||||||
|
fun onUploadFinished(success: Boolean) {
|
||||||
|
if (state != VoiceState.UPLOADING) return
|
||||||
|
|
||||||
|
state = if (success) {
|
||||||
|
VoiceState.PLAYING_BACKEND
|
||||||
|
} else {
|
||||||
|
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||||
|
VoiceState.WAIT_SPEECH_COOLDOWN
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ================= Idle 超时(关键修复) ================= */
|
||||||
|
|
||||||
fun checkIdleTimeout() {
|
fun checkIdleTimeout() {
|
||||||
// 上传中不计时
|
|
||||||
if (state != VoiceState.WAIT_SPEECH) return
|
if (state != VoiceState.WAIT_SPEECH) return
|
||||||
|
if (waitSpeechFailStartMs == 0L) return
|
||||||
|
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
if (now - idleTimer > idleTimeoutSeconds * 1000) {
|
if (now - waitSpeechFailStartMs > idleTimeoutSeconds * 1000) {
|
||||||
Log.d(TAG, "⏱ Idle timeout reached, resetAll")
|
Log.d(TAG, "⏱ WAIT_SPEECH continuous fail timeout")
|
||||||
resetAll()
|
resetAll()
|
||||||
|
waitSpeechFailStartMs = 0L
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= Reset ================= */
|
/* ================= Reset ================= */
|
||||||
private fun resetToWaitSpeech(refreshIdle: Boolean = true) {
|
|
||||||
|
private fun resetToWaitSpeech() {
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
preBuffer.clear()
|
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
vadStarted = false
|
vadStarted = false
|
||||||
silenceStartMs = 0L
|
silenceStartMs = 0L
|
||||||
state = VoiceState.WAIT_SPEECH
|
state = VoiceState.WAIT_SPEECH
|
||||||
if (refreshIdle) idleTimer = System.currentTimeMillis()
|
|
||||||
|
// ⭐ 只在第一次失败时记录
|
||||||
|
if (waitSpeechFailStartMs == 0L) {
|
||||||
|
waitSpeechFailStartMs = System.currentTimeMillis()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun resetAll() {
|
private fun resetAll() {
|
||||||
@ -190,36 +290,24 @@ class VoiceController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fun release() {
|
fun release() {
|
||||||
vadManager.reset()
|
|
||||||
wakeupManager.release()
|
wakeupManager.release()
|
||||||
|
vadManager.reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 工具 ================= */
|
/* ================= Utils ================= */
|
||||||
|
|
||||||
private fun cachePreBuffer(samples: FloatArray) {
|
private fun cachePreBuffer(samples: FloatArray) {
|
||||||
for (s in samples) {
|
for (s in samples) {
|
||||||
preBuffer.addLast(s)
|
preBuffer.addLast(s)
|
||||||
if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
|
if (preBuffer.size > PRE_BUFFER_SIZE) {
|
||||||
|
preBuffer.removeFirst()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun calcRms(audio: FloatArray): Float {
|
private fun calcRms(audio: FloatArray): Float {
|
||||||
if (audio.isEmpty()) return 0f
|
|
||||||
var sum = 0f
|
var sum = 0f
|
||||||
for (v in audio) sum += v * v
|
for (v in audio) sum += v * v
|
||||||
return sqrt(sum / audio.size)
|
return sqrt(sum / audio.size)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun calcRmsFrames(audio: FloatArray, frameSize: Int = 320): FloatArray {
|
|
||||||
val rmsList = mutableListOf<Float>()
|
|
||||||
var i = 0
|
|
||||||
while (i < audio.size) {
|
|
||||||
val end = minOf(i + frameSize, audio.size)
|
|
||||||
val frame = audio.sliceArray(i until end)
|
|
||||||
var sum = 0f
|
|
||||||
for (v in frame) sum += v * v
|
|
||||||
rmsList.add(sqrt(sum / frame.size))
|
|
||||||
i += frameSize
|
|
||||||
}
|
|
||||||
return rmsList.toFloatArray()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -11,5 +11,6 @@ enum class VoiceState {
|
|||||||
WAIT_SPEECH, // 等待用户说话
|
WAIT_SPEECH, // 等待用户说话
|
||||||
RECORDING, // 用户正在说话
|
RECORDING, // 用户正在说话
|
||||||
UPLOADING, //音频上传中
|
UPLOADING, //音频上传中
|
||||||
|
WAIT_SPEECH_COOLDOWN, // ⭐ 唤醒后冷却
|
||||||
PLAYING_BACKEND // 播放后台返回音频
|
PLAYING_BACKEND // 播放后台返回音频
|
||||||
}
|
}
|
||||||
@ -1,18 +1,18 @@
|
|||||||
package com.zs.smarthuman.sherpa
|
package com.zs.smarthuman.sherpa
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
|
import android.util.Log
|
||||||
import com.k2fsa.sherpa.onnx.*
|
import com.k2fsa.sherpa.onnx.*
|
||||||
|
|
||||||
class WakeupManager(
|
class WakeupManager(assetManager: AssetManager, function: () -> Unit) {
|
||||||
assetManager: AssetManager,
|
|
||||||
private val onWakeup: () -> Unit
|
|
||||||
) {
|
|
||||||
|
|
||||||
|
private val TAG = "WakeupManager"
|
||||||
private val sampleRate = 16000
|
private val sampleRate = 16000
|
||||||
|
|
||||||
private val kws: KeywordSpotter
|
private val kws: KeywordSpotter
|
||||||
private var stream: OnlineStream? = null
|
private var stream: OnlineStream? = null
|
||||||
|
|
||||||
/** ⭐ 刚唤醒标记,用来丢弃唤醒词音频 */
|
/** ⭐ 唤醒标记(只能消费一次) */
|
||||||
private var justWokeUp = false
|
private var justWokeUp = false
|
||||||
|
|
||||||
init {
|
init {
|
||||||
@ -29,15 +29,16 @@ class WakeupManager(
|
|||||||
)
|
)
|
||||||
|
|
||||||
kws = KeywordSpotter(assetManager, config)
|
kws = KeywordSpotter(assetManager, config)
|
||||||
|
Log.d(TAG, "✅ KeywordSpotter initialized")
|
||||||
|
|
||||||
stream = kws.createStream()
|
stream = kws.createStream()
|
||||||
?: error("Failed to create KWS stream")
|
require(stream != null) { "Failed to create KWS stream" }
|
||||||
|
Log.d(TAG, "✅ KWS stream created")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** ⭐ 永远喂 KWS */
|
||||||
/** ⭐ 小爱同学策略:不管播放还是录音,永远喂 */
|
|
||||||
fun acceptAudio(samples: FloatArray) {
|
fun acceptAudio(samples: FloatArray) {
|
||||||
val s = stream ?: return
|
val s = stream ?: return
|
||||||
// ⭐ 远讲 / 播放补偿(非常关键)
|
|
||||||
for (i in samples.indices) {
|
for (i in samples.indices) {
|
||||||
samples[i] *= 2.5f
|
samples[i] *= 2.5f
|
||||||
}
|
}
|
||||||
@ -47,15 +48,15 @@ class WakeupManager(
|
|||||||
kws.decode(s)
|
kws.decode(s)
|
||||||
val keyword = kws.getResult(s).keyword
|
val keyword = kws.getResult(s).keyword
|
||||||
if (keyword.isNotBlank()) {
|
if (keyword.isNotBlank()) {
|
||||||
|
Log.d(TAG, "🔥 KWS hit: $keyword")
|
||||||
justWokeUp = true
|
justWokeUp = true
|
||||||
onWakeup()
|
kws.reset(s)
|
||||||
kws.reset(s) // 立刻 reset,进入新一轮
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 被 VAD 消费一次 */
|
/** ⭐ 唯一唤醒出口 */
|
||||||
fun consumeWakeupFlag(): Boolean {
|
fun consumeWakeupFlag(): Boolean {
|
||||||
val r = justWokeUp
|
val r = justWokeUp
|
||||||
justWokeUp = false
|
justWokeUp = false
|
||||||
|
|||||||
@ -77,7 +77,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
private var voiceController: VoiceController? = null
|
private var voiceController: VoiceController? = null
|
||||||
private var audioRecord: AudioRecord? = null
|
private var audioRecord: AudioRecord? = null
|
||||||
private var isRecording = false
|
private var isRecording = false
|
||||||
private val audioSource = MediaRecorder.AudioSource.VOICE_RECOGNITION
|
private val audioSource = MediaRecorder.AudioSource.VOICE_COMMUNICATION
|
||||||
private val sampleRateInHz = 16000
|
private val sampleRateInHz = 16000
|
||||||
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
|
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
|
||||||
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
|
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
|
||||||
@ -169,17 +169,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
},
|
},
|
||||||
onFinalAudio = { audio ->
|
onFinalAudio = { audio ->
|
||||||
Log.d("lrs", "检测到语音,长度=${audio.size}")
|
Log.d("lrs", "检测到语音,长度=${audio.size}")
|
||||||
mViewModel?.uploadVoice(
|
// mViewModel?.uploadVoice(
|
||||||
AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
// AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
||||||
1
|
// 1
|
||||||
)
|
|
||||||
// loadLocalJsonAndPlay()
|
|
||||||
// val file = File(
|
|
||||||
// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
|
||||||
// "xxx.wav"
|
|
||||||
// )
|
// )
|
||||||
// AudioDebugUtil.saveFloatPcmAsWav(audio, file)
|
loadLocalJsonAndPlay()
|
||||||
// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
|
val file = File(
|
||||||
|
getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
||||||
|
"xxx.wav"
|
||||||
|
)
|
||||||
|
AudioDebugUtil.saveFloatPcmAsWav(audio, file)
|
||||||
|
LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
|
||||||
|
|
||||||
},
|
},
|
||||||
onStateChanged = { state ->
|
onStateChanged = { state ->
|
||||||
@ -261,7 +261,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) {
|
if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) {
|
||||||
Log.e("VoiceService", "Failed to initialize AudioRecord")
|
Log.e("VoiceService", "Failed to initialize AudioRecord")
|
||||||
}
|
}
|
||||||
enableSystemAec(audioRecord!!)
|
// enableSystemAec(audioRecord!!)
|
||||||
}
|
}
|
||||||
|
|
||||||
private var aec: AcousticEchoCanceler? = null
|
private var aec: AcousticEchoCanceler? = null
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user