优化代码
This commit is contained in:
parent
6e811b99b7
commit
14179286bb
@ -3,26 +3,25 @@ package com.zs.smarthuman.sherpa
|
|||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
import com.k2fsa.sherpa.onnx.Vad
|
import com.k2fsa.sherpa.onnx.Vad
|
||||||
import com.k2fsa.sherpa.onnx.getVadModelConfig
|
import com.k2fsa.sherpa.onnx.getVadModelConfig
|
||||||
|
import kotlin.math.sqrt
|
||||||
|
|
||||||
class VadManager(
|
class VadManager(
|
||||||
assetManager: AssetManager,
|
assetManager: AssetManager,
|
||||||
private val onSpeechStart: () -> Unit,
|
private val onSpeechStart: () -> Unit,
|
||||||
private val onSpeechEnd: () -> Unit
|
private val onSpeechEnd: (Float, Float) -> Unit // avgEnergy, peakRms
|
||||||
) {
|
) {
|
||||||
private val vad: Vad
|
private val vad: Vad
|
||||||
|
|
||||||
private var isSpeaking = false
|
private var isSpeaking = false
|
||||||
private var lastSpeechTime = 0L
|
private var lastSpeechTime = 0L
|
||||||
|
|
||||||
/** ⭐ 仅统计“有效语音段” */
|
/** 有效语音统计 */
|
||||||
private var activeFrameCount = 0
|
private var activeFrameCount = 0
|
||||||
private var activeSpeechFrameCount = 0
|
private var activeSpeechFrameCount = 0
|
||||||
|
private var speechEnergySum = 0f
|
||||||
|
private var speechFrameCount = 0
|
||||||
|
private var peakRms = 0f
|
||||||
|
|
||||||
/** ⭐ 用于调试(可选) */
|
private val END_SILENCE_MS = 800L
|
||||||
private var rawFrameCount = 0
|
|
||||||
private var rawSpeechFrameCount = 0
|
|
||||||
|
|
||||||
private val END_SILENCE_MS = 600L
|
|
||||||
|
|
||||||
init {
|
init {
|
||||||
val config = getVadModelConfig(0)
|
val config = getVadModelConfig(0)
|
||||||
@ -30,64 +29,64 @@ class VadManager(
|
|||||||
vad = Vad(assetManager, config)
|
vad = Vad(assetManager, config)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** 外部调用的音频输入 */
|
||||||
fun accept(samples: FloatArray) {
|
fun accept(samples: FloatArray) {
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
|
|
||||||
vad.acceptWaveform(samples)
|
vad.acceptWaveform(samples)
|
||||||
val hasSpeech = vad.isSpeechDetected()
|
val hasSpeech = vad.isSpeechDetected()
|
||||||
|
|
||||||
/* ===== raw 统计(仅日志) ===== */
|
val rms = calcRms(samples)
|
||||||
rawFrameCount++
|
|
||||||
if (hasSpeech) rawSpeechFrameCount++
|
|
||||||
|
|
||||||
if (hasSpeech) {
|
if (hasSpeech) {
|
||||||
lastSpeechTime = now
|
lastSpeechTime = now
|
||||||
|
|
||||||
if (!isSpeaking) {
|
if (!isSpeaking) {
|
||||||
isSpeaking = true
|
isSpeaking = true
|
||||||
|
resetStats()
|
||||||
onSpeechStart()
|
onSpeechStart()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 累计有效语音能量和峰值
|
||||||
|
speechEnergySum += rms
|
||||||
|
speechFrameCount++
|
||||||
|
if (rms > peakRms) peakRms = rms
|
||||||
|
|
||||||
activeFrameCount++
|
activeFrameCount++
|
||||||
activeSpeechFrameCount++
|
activeSpeechFrameCount++
|
||||||
} else {
|
} else {
|
||||||
if (isSpeaking) {
|
if (isSpeaking) activeFrameCount++
|
||||||
activeFrameCount++
|
// 检查结束
|
||||||
|
if (isSpeaking && now - lastSpeechTime >= END_SILENCE_MS) {
|
||||||
if (now - lastSpeechTime >= END_SILENCE_MS) {
|
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
onSpeechEnd()
|
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
||||||
}
|
onSpeechEnd(avgEnergy, peakRms)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** 统计有效语音比例,用于 VoiceController */
|
||||||
* ✅ 真正用于判断「是不是有效人声」
|
|
||||||
* 只统计 VAD 激活期间
|
|
||||||
*/
|
|
||||||
fun activeSpeechRatio(): Float {
|
fun activeSpeechRatio(): Float {
|
||||||
if (activeFrameCount == 0) return 0f
|
if (activeFrameCount == 0) return 0f
|
||||||
return activeSpeechFrameCount.toFloat() / activeFrameCount
|
return activeSpeechFrameCount.toFloat() / activeFrameCount
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* ⚠️ 仅用于调参观察
|
|
||||||
*/
|
|
||||||
fun rawSpeechRatio(): Float {
|
|
||||||
if (rawFrameCount == 0) return 0f
|
|
||||||
return rawSpeechFrameCount.toFloat() / rawFrameCount
|
|
||||||
}
|
|
||||||
|
|
||||||
fun reset() {
|
fun reset() {
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
lastSpeechTime = 0L
|
lastSpeechTime = 0L
|
||||||
|
|
||||||
activeFrameCount = 0
|
activeFrameCount = 0
|
||||||
activeSpeechFrameCount = 0
|
activeSpeechFrameCount = 0
|
||||||
rawFrameCount = 0
|
resetStats()
|
||||||
rawSpeechFrameCount = 0
|
|
||||||
|
|
||||||
vad.reset()
|
vad.reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun resetStats() {
|
||||||
|
speechEnergySum = 0f
|
||||||
|
speechFrameCount = 0
|
||||||
|
peakRms = 0f
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun calcRms(audio: FloatArray): Float {
|
||||||
|
var sum = 0f
|
||||||
|
for (v in audio) sum += v * v
|
||||||
|
return sqrt(sum / audio.size)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,7 +2,6 @@ package com.zs.smarthuman.sherpa
|
|||||||
|
|
||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
import kotlin.math.sqrt
|
|
||||||
import java.util.ArrayDeque
|
import java.util.ArrayDeque
|
||||||
|
|
||||||
class VoiceController(
|
class VoiceController(
|
||||||
@ -18,8 +17,8 @@ class VoiceController(
|
|||||||
private val TAG = "VoiceController"
|
private val TAG = "VoiceController"
|
||||||
private val sampleRate = 16000
|
private val sampleRate = 16000
|
||||||
|
|
||||||
private var state: VoiceState = VoiceState.WAIT_WAKEUP
|
var state: VoiceState = VoiceState.WAIT_WAKEUP
|
||||||
set(value) {
|
private set(value) {
|
||||||
field = value
|
field = value
|
||||||
Log.d(TAG, "➡ State = $value")
|
Log.d(TAG, "➡ State = $value")
|
||||||
onStateChanged?.invoke(value)
|
onStateChanged?.invoke(value)
|
||||||
@ -30,41 +29,31 @@ class VoiceController(
|
|||||||
handleWakeupEvent()
|
handleWakeupEvent()
|
||||||
}
|
}
|
||||||
|
|
||||||
private val vadManager = VadManager(
|
|
||||||
assetManager,
|
|
||||||
onSpeechStart = { onVadStart() },
|
|
||||||
onSpeechEnd = {}
|
|
||||||
)
|
|
||||||
|
|
||||||
private val audioBuffer = mutableListOf<Float>()
|
private val audioBuffer = mutableListOf<Float>()
|
||||||
private val preBuffer = ArrayDeque<Float>()
|
private val preBuffer = ArrayDeque<Float>()
|
||||||
private val PRE_BUFFER_SIZE = sampleRate * 2
|
private val PRE_BUFFER_SIZE = sampleRate * 2
|
||||||
|
|
||||||
private var recordingStartMs = 0L
|
private var recordingStartMs = 0L
|
||||||
private var silenceStartMs = 0L
|
|
||||||
private var waitSpeechFailStartMs = 0L
|
private var waitSpeechFailStartMs = 0L
|
||||||
private var waitSpeechStartMs = 0L
|
private var waitSpeechStartMs = 0L
|
||||||
|
|
||||||
private var speechEnergySum = 0f
|
|
||||||
private var speechFrameCount = 0
|
|
||||||
|
|
||||||
private var vadStarted = false
|
private var vadStarted = false
|
||||||
|
|
||||||
private var inKwsObserve = false
|
private var inKwsObserve = false
|
||||||
private var kwsObserveStartMs = 0L
|
private var kwsObserveStartMs = 0L
|
||||||
private val KWS_OBSERVE_MS = 500L
|
private val KWS_OBSERVE_MS = 500L
|
||||||
|
|
||||||
private var speechEnableAtMs = 0L
|
private var speechEnableAtMs = 0L
|
||||||
private val SPEECH_COOLDOWN_MS = 300L
|
private val SPEECH_COOLDOWN_MS = 300L
|
||||||
|
|
||||||
private val RMS_SILENCE_THRESHOLD = 0.012f
|
private val MIN_SPEECH_MS = 800L
|
||||||
private val SILENCE_END_MS = 1200L
|
private val idleTimeoutMs = idleTimeoutSeconds * 1000L
|
||||||
private val MIN_SPEECH_MS = 1000L
|
private val maxRecordingMs = maxRecordingSeconds * 1000L
|
||||||
private val MIN_AVG_ENERGY = 0.02f
|
|
||||||
|
|
||||||
|
private val vadManager = VadManager(assetManager,
|
||||||
|
onSpeechStart = { onVadStart() },
|
||||||
|
onSpeechEnd = { avgEnergy, peakRms -> onVadEnd(avgEnergy, peakRms) }
|
||||||
|
)
|
||||||
|
|
||||||
/* ================= 音频入口 ================= */
|
/* ================= 音频入口 ================= */
|
||||||
|
|
||||||
fun acceptAudio(samples: FloatArray) {
|
fun acceptAudio(samples: FloatArray) {
|
||||||
cachePreBuffer(samples)
|
cachePreBuffer(samples)
|
||||||
wakeupManager.acceptAudio(samples)
|
wakeupManager.acceptAudio(samples)
|
||||||
@ -83,7 +72,7 @@ class VoiceController(
|
|||||||
|
|
||||||
VoiceState.WAIT_SPEECH_COOLDOWN -> {
|
VoiceState.WAIT_SPEECH_COOLDOWN -> {
|
||||||
if (now >= speechEnableAtMs) {
|
if (now >= speechEnableAtMs) {
|
||||||
waitSpeechFailStartMs = System.currentTimeMillis()
|
waitSpeechFailStartMs = now
|
||||||
state = VoiceState.WAIT_SPEECH
|
state = VoiceState.WAIT_SPEECH
|
||||||
waitSpeechStartMs = now
|
waitSpeechStartMs = now
|
||||||
}
|
}
|
||||||
@ -91,14 +80,8 @@ class VoiceController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
VoiceState.WAIT_SPEECH -> {
|
VoiceState.WAIT_SPEECH -> {
|
||||||
|
if ((waitSpeechStartMs > 0 && now - waitSpeechStartMs >= idleTimeoutMs) ||
|
||||||
if (waitSpeechStartMs > 0 && now - waitSpeechStartMs >= idleTimeoutSeconds * 1000) {
|
(waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutMs)) {
|
||||||
Log.d(TAG, "⏱ Wakeup but no speech → WAIT_WAKEUP")
|
|
||||||
resetAll()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutSeconds * 1000) {
|
|
||||||
Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
|
Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
|
||||||
resetAll()
|
resetAll()
|
||||||
return
|
return
|
||||||
@ -114,30 +97,15 @@ class VoiceController(
|
|||||||
audioBuffer.addAll(samples.asList())
|
audioBuffer.addAll(samples.asList())
|
||||||
vadManager.accept(samples)
|
vadManager.accept(samples)
|
||||||
|
|
||||||
val rms = calcRms(samples)
|
if (System.currentTimeMillis() - recordingStartMs > maxRecordingMs) {
|
||||||
if (rms > RMS_SILENCE_THRESHOLD) {
|
|
||||||
speechEnergySum += rms
|
|
||||||
speechFrameCount++
|
|
||||||
silenceStartMs = 0L
|
|
||||||
} else {
|
|
||||||
if (silenceStartMs == 0L) silenceStartMs = now
|
|
||||||
else if (now - silenceStartMs >= SILENCE_END_MS) {
|
|
||||||
Log.d(TAG, "🔇 Silence end")
|
|
||||||
finishSentence()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (now - recordingStartMs > maxRecordingSeconds * 1000) {
|
|
||||||
Log.w(TAG, "⏱ Max recording reached")
|
Log.w(TAG, "⏱ Max recording reached")
|
||||||
finishSentence()
|
finishSentence() // 超时也触发 finish
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 唤醒 ================= */
|
/* ================= 唤醒 ================= */
|
||||||
|
|
||||||
private fun handleWakeupEvent() {
|
private fun handleWakeupEvent() {
|
||||||
when (state) {
|
when (state) {
|
||||||
VoiceState.UPLOADING -> return
|
VoiceState.UPLOADING -> return
|
||||||
@ -157,9 +125,7 @@ class VoiceController(
|
|||||||
if (interrupt) {
|
if (interrupt) {
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
resetEnergyStat()
|
|
||||||
vadStarted = false
|
vadStarted = false
|
||||||
silenceStartMs = 0L
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inKwsObserve = true
|
inKwsObserve = true
|
||||||
@ -169,58 +135,45 @@ class VoiceController(
|
|||||||
|
|
||||||
private fun onVadStart() {
|
private fun onVadStart() {
|
||||||
if (state != VoiceState.WAIT_SPEECH) return
|
if (state != VoiceState.WAIT_SPEECH) return
|
||||||
|
|
||||||
Log.d(TAG, "🎤 REAL VAD START")
|
Log.d(TAG, "🎤 REAL VAD START")
|
||||||
vadStarted = true
|
vadStarted = true
|
||||||
recordingStartMs = System.currentTimeMillis()
|
recordingStartMs = System.currentTimeMillis()
|
||||||
silenceStartMs = 0L
|
|
||||||
resetEnergyStat()
|
|
||||||
|
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
audioBuffer.addAll(preBuffer)
|
audioBuffer.addAll(preBuffer)
|
||||||
state = VoiceState.RECORDING
|
state = VoiceState.RECORDING
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 结束录音 ================= */
|
private fun onVadEnd(avgEnergy: Float, peakRms: Float) {
|
||||||
|
if (state != VoiceState.RECORDING) return
|
||||||
|
Log.d(TAG, "🧠 VAD END")
|
||||||
|
finishSentence(avgEnergy, peakRms)
|
||||||
|
}
|
||||||
|
|
||||||
private fun finishSentence() {
|
/* ================= 结束录音 ================= */
|
||||||
|
private fun finishSentence(avgEnergy: Float = 0f, peakRms: Float = 0f) {
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
val duration = now - recordingStartMs
|
val duration = now - recordingStartMs
|
||||||
|
|
||||||
if (!vadStarted || duration < MIN_SPEECH_MS) {
|
if (!vadStarted || duration < MIN_SPEECH_MS) {
|
||||||
Log.d(TAG, "❌ Too short or no VAD start: ${duration}ms")
|
Log.d(TAG, "❌ Too short or no VAD start: $duration ms")
|
||||||
resetToWaitSpeech()
|
resetToWaitSpeech()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
val audio = audioBuffer.toFloatArray()
|
val audio = audioBuffer.toFloatArray()
|
||||||
val vadRatio = vadManager.activeSpeechRatio()
|
val vadRatio = vadManager.activeSpeechRatio()
|
||||||
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
|
||||||
val peakRms = calcPeakRms(audio)
|
|
||||||
|
|
||||||
|
|
||||||
val peakAvgRatio = if (avgEnergy > 0f) peakRms / avgEnergy else 0f
|
val peakAvgRatio = if (avgEnergy > 0f) peakRms / avgEnergy else 0f
|
||||||
|
|
||||||
if (avgEnergy < MIN_AVG_ENERGY) {
|
Log.d(TAG, "📊 Finish Sentence - duration: $duration ms, vadEnded: true")
|
||||||
Log.d(TAG, "❌ Avg energy too low: $avgEnergy → rejected")
|
Log.d(TAG, "📊 vadRatio=$vadRatio, avgEnergy=$avgEnergy, peakRms=$peakRms, peakAvgRatio=$peakAvgRatio")
|
||||||
|
|
||||||
|
if (avgEnergy < 0.02f || peakAvgRatio < 1.2f || vadRatio < 0.4f) {
|
||||||
|
Log.d(TAG, "❌ Sentence rejected")
|
||||||
resetToWaitSpeech()
|
resetToWaitSpeech()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if (peakAvgRatio < 1.2f) {
|
// 评分逻辑
|
||||||
Log.d(TAG, "❌ Peak/Avg ratio too low: $peakAvgRatio → rejected")
|
|
||||||
resetToWaitSpeech()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (vadRatio < 0.40f) {
|
|
||||||
Log.d(TAG, "❌ VAD ratio too low: $vadRatio → rejected")
|
|
||||||
resetToWaitSpeech()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// 原评分逻辑
|
|
||||||
var score = 0
|
var score = 0
|
||||||
when {
|
when {
|
||||||
duration >= 4000 -> score += 3
|
duration >= 4000 -> score += 3
|
||||||
@ -230,64 +183,40 @@ class VoiceController(
|
|||||||
when {
|
when {
|
||||||
avgEnergy >= 0.10f -> score += 3
|
avgEnergy >= 0.10f -> score += 3
|
||||||
avgEnergy >= 0.06f -> score += 2
|
avgEnergy >= 0.06f -> score += 2
|
||||||
avgEnergy >= MIN_AVG_ENERGY -> score += 1
|
avgEnergy >= 0.02f -> score += 1
|
||||||
}
|
}
|
||||||
when {
|
when {
|
||||||
vadRatio >= 0.55f -> score += 2
|
vadRatio >= 0.55f -> score += 2
|
||||||
vadRatio >= 0.40f -> score += 1
|
vadRatio >= 0.40f -> score += 1
|
||||||
}
|
}
|
||||||
|
Log.d(TAG, "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score")
|
||||||
|
|
||||||
Log.d(TAG, "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, peakRms=$peakRms, score=$score")
|
val pass = score >= 5 || (score == 3 && avgEnergy >= 0.06f)
|
||||||
|
|
||||||
val pass = when {
|
|
||||||
score >= 6 -> true
|
|
||||||
score == 3 && avgEnergy >= 0.06f -> true
|
|
||||||
else -> false
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!pass) {
|
if (!pass) {
|
||||||
Log.d(TAG, "❌ Sentence rejected (score=$score)")
|
Log.d(TAG, "❌ Sentence rejected (score=$score)")
|
||||||
resetToWaitSpeech()
|
resetToWaitSpeech()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// ✅ 通过 → 上传
|
|
||||||
waitSpeechFailStartMs = 0L
|
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
state = VoiceState.UPLOADING
|
state = VoiceState.UPLOADING
|
||||||
onFinalAudio(audio)
|
onFinalAudio(audio)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** 计算音频帧峰值 */
|
|
||||||
private fun calcPeakRms(audio: FloatArray): Float {
|
|
||||||
var peak = 0f
|
|
||||||
for (v in audio) {
|
|
||||||
val abs = kotlin.math.abs(v)
|
|
||||||
if (abs > peak) peak = abs
|
|
||||||
}
|
|
||||||
return peak
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* ================= 播放回调 ================= */
|
/* ================= 播放回调 ================= */
|
||||||
|
fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT }
|
||||||
fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT } // ⭐ 补全
|
|
||||||
fun onPlayEndPrompt() {
|
fun onPlayEndPrompt() {
|
||||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND } // ⭐ 补全
|
fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND }
|
||||||
fun onPlayEndBackend() {
|
fun onPlayEndBackend() {
|
||||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 上传回调 ================= */
|
/* ================= 上传回调 ================= */
|
||||||
|
|
||||||
fun onUploadFinished(success: Boolean) {
|
fun onUploadFinished(success: Boolean) {
|
||||||
if (state != VoiceState.UPLOADING) return
|
if (state != VoiceState.UPLOADING) return
|
||||||
state = if (success) VoiceState.PLAYING_BACKEND
|
state = if (success) VoiceState.PLAYING_BACKEND
|
||||||
@ -297,17 +226,12 @@ class VoiceController(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ================= Reset ================= */
|
/* ================= Reset ================= */
|
||||||
|
|
||||||
private fun resetToWaitSpeech() {
|
private fun resetToWaitSpeech() {
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
resetEnergyStat()
|
|
||||||
vadStarted = false
|
vadStarted = false
|
||||||
silenceStartMs = 0L
|
|
||||||
state = VoiceState.WAIT_SPEECH
|
state = VoiceState.WAIT_SPEECH
|
||||||
|
|
||||||
if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis()
|
if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -315,9 +239,7 @@ class VoiceController(
|
|||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
preBuffer.clear()
|
preBuffer.clear()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
resetEnergyStat()
|
|
||||||
vadStarted = false
|
vadStarted = false
|
||||||
silenceStartMs = 0L
|
|
||||||
waitSpeechStartMs = 0L
|
waitSpeechStartMs = 0L
|
||||||
waitSpeechFailStartMs = 0L
|
waitSpeechFailStartMs = 0L
|
||||||
state = VoiceState.WAIT_WAKEUP
|
state = VoiceState.WAIT_WAKEUP
|
||||||
@ -329,22 +251,10 @@ class VoiceController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* ================= Utils ================= */
|
/* ================= Utils ================= */
|
||||||
|
|
||||||
private fun resetEnergyStat() {
|
|
||||||
speechEnergySum = 0f
|
|
||||||
speechFrameCount = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun cachePreBuffer(samples: FloatArray) {
|
private fun cachePreBuffer(samples: FloatArray) {
|
||||||
for (s in samples) {
|
for (s in samples) {
|
||||||
preBuffer.addLast(s)
|
preBuffer.addLast(s)
|
||||||
if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
|
if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun calcRms(audio: FloatArray): Float {
|
|
||||||
var sum = 0f
|
|
||||||
for (v in audio) sum += v * v
|
|
||||||
return sqrt(sum / audio.size)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -136,12 +136,12 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
mViewModel?.uploadVoiceLiveData?.observe(this) {
|
mViewModel?.uploadVoiceLiveData?.observe(this) {
|
||||||
when (it) {
|
when (it) {
|
||||||
is ApiResult.Error -> {
|
is ApiResult.Error -> {
|
||||||
Toaster.showShort("上传失败")
|
// Toaster.showShort("上传失败")
|
||||||
voiceController?.onUploadFinished(false)
|
voiceController?.onUploadFinished(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
is ApiResult.Success<*> -> {
|
is ApiResult.Success<*> -> {
|
||||||
Toaster.showShort("上传成功")
|
// Toaster.showShort("上传成功")
|
||||||
voiceController?.onUploadFinished(true)
|
voiceController?.onUploadFinished(true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -185,7 +185,6 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
onStateChanged = { state ->
|
onStateChanged = { state ->
|
||||||
when (state) {
|
when (state) {
|
||||||
VoiceState.WAIT_WAKEUP -> {
|
VoiceState.WAIT_WAKEUP -> {
|
||||||
Log.d("lrs", "当前状态: 等待唤醒")
|
|
||||||
lifecycleScope.launch(Dispatchers.Main) {
|
lifecycleScope.launch(Dispatchers.Main) {
|
||||||
mVerticalAnimator?.hide()
|
mVerticalAnimator?.hide()
|
||||||
UnityPlayerHolder.getInstance()
|
UnityPlayerHolder.getInstance()
|
||||||
@ -201,10 +200,18 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
VoiceState.WAIT_SPEECH -> Log.d("lrs", "当前状态: 唤醒成功,等待说话")
|
VoiceState.WAIT_SPEECH -> {
|
||||||
VoiceState.RECORDING -> Log.d("lrs", "当前状态: 正在录音")
|
|
||||||
VoiceState.PLAYING_PROMPT -> Log.d("lrs", "当前状态: 播放本地音频")
|
}
|
||||||
VoiceState.PLAYING_BACKEND -> Log.d("lrs", "当前状态: 播放后台音频")
|
VoiceState.RECORDING -> {
|
||||||
|
startRecording()
|
||||||
|
}
|
||||||
|
VoiceState.PLAYING_PROMPT ->{}
|
||||||
|
VoiceState.PLAYING_BACKEND ->{}
|
||||||
|
VoiceState.UPLOADING -> {}
|
||||||
|
|
||||||
|
VoiceState.WAIT_SPEECH_COOLDOWN -> {}
|
||||||
|
|
||||||
else -> {}
|
else -> {}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -248,15 +255,18 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
|
|
||||||
override fun onPause() {
|
override fun onPause() {
|
||||||
super.onPause()
|
super.onPause()
|
||||||
|
stopRecording()
|
||||||
UnityPlayerHolder.getInstance().pause()
|
UnityPlayerHolder.getInstance().pause()
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressLint("MissingPermission")
|
@SuppressLint("MissingPermission")
|
||||||
private fun initMicrophone() {
|
private fun initMicrophone(): Boolean {
|
||||||
val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
|
val minBuf = AudioRecord.getMinBufferSize(
|
||||||
if (numBytes == AudioRecord.ERROR || numBytes == AudioRecord.ERROR_BAD_VALUE) {
|
sampleRateInHz, channelConfig, audioFormat
|
||||||
Log.e("VoiceService", "Failed to initialize microphone: Invalid buffer size")
|
)
|
||||||
return
|
if (minBuf <= 0) {
|
||||||
|
Log.e("VoiceService", "Invalid min buffer size")
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
audioRecord = AudioRecord(
|
audioRecord = AudioRecord(
|
||||||
@ -264,15 +274,20 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
sampleRateInHz,
|
sampleRateInHz,
|
||||||
channelConfig,
|
channelConfig,
|
||||||
audioFormat,
|
audioFormat,
|
||||||
numBytes * 2 // 设置更大的缓冲区以防止丢失数据
|
minBuf * 2
|
||||||
)
|
)
|
||||||
|
|
||||||
if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) {
|
if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) {
|
||||||
Log.e("VoiceService", "Failed to initialize AudioRecord")
|
Log.e("VoiceService", "AudioRecord init failed")
|
||||||
|
audioRecord?.release()
|
||||||
|
audioRecord = null
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
// enableSystemAec(audioRecord!!)
|
enableSystemAec(audioRecord!!)
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private var aec: AcousticEchoCanceler? = null
|
private var aec: AcousticEchoCanceler? = null
|
||||||
|
|
||||||
private fun enableSystemAec(record: AudioRecord) {
|
private fun enableSystemAec(record: AudioRecord) {
|
||||||
@ -290,23 +305,50 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
|
|
||||||
//开始录音
|
//开始录音
|
||||||
fun startRecording() {
|
fun startRecording() {
|
||||||
isRecording = true
|
if (isRecording) return
|
||||||
|
|
||||||
|
if (audioRecord == null) {
|
||||||
|
if (!initMicrophone()) {
|
||||||
|
Log.e("VoiceService", "startRecording: init failed")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
audioRecord?.startRecording()
|
audioRecord?.startRecording()
|
||||||
|
} catch (e: IllegalStateException) {
|
||||||
|
Log.e("VoiceService", "startRecording failed, recreate", e)
|
||||||
|
recreateAudioRecord()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
isRecording = true
|
||||||
|
|
||||||
lifecycleScope.launch(Dispatchers.IO) {
|
lifecycleScope.launch(Dispatchers.IO) {
|
||||||
val buf = ShortArray(512)
|
val buf = ShortArray(512)
|
||||||
while (isRecording) {
|
while (isRecording) {
|
||||||
val n = audioRecord?.read(buf, 0, buf.size) ?: 0
|
val n = audioRecord?.read(buf, 0, buf.size) ?: break
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
val raw = FloatArray(n) { buf[it] / 32768f }
|
val raw = FloatArray(n) { buf[it] / 32768f }
|
||||||
|
|
||||||
|
|
||||||
voiceController?.acceptAudio(raw)
|
voiceController?.acceptAudio(raw)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private fun recreateAudioRecord() {
|
||||||
|
stopRecording()
|
||||||
|
try {
|
||||||
|
audioRecord?.release()
|
||||||
|
} catch (_: Exception) {
|
||||||
|
}
|
||||||
|
|
||||||
|
audioRecord = null
|
||||||
|
initMicrophone()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//停止录音
|
//停止录音
|
||||||
fun stopRecording() {
|
fun stopRecording() {
|
||||||
isRecording = false
|
isRecording = false
|
||||||
@ -342,7 +384,8 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
word: String,
|
word: String,
|
||||||
audioUrl: String
|
audioUrl: String
|
||||||
) {
|
) {
|
||||||
val wakeupUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
|
val wakeupUrl = /*UserInfoManager.userInfo?.wakeUpAudioUrl ?: */
|
||||||
|
"https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3" ?: return
|
||||||
|
|
||||||
if (audioUrl != wakeupUrl) return
|
if (audioUrl != wakeupUrl) return
|
||||||
|
|
||||||
|
|||||||
@ -90,7 +90,7 @@ class PcmAudioWithAecManager(
|
|||||||
// 录音数据处理
|
// 录音数据处理
|
||||||
processCapture(buffer.copyOf(read))
|
processCapture(buffer.copyOf(read))
|
||||||
}
|
}
|
||||||
voiceController.checkIdleTimeout()
|
// voiceController.checkIdleTimeout()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user