diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml
index adea95b..d64e047 100644
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -52,7 +52,7 @@
tools:targetApi="31">
@@ -66,10 +66,9 @@
-
diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
index 504ea65..72f0021 100644
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt
@@ -27,11 +27,6 @@ class VadManager(
/** 喂入音频帧 (16kHz PCM float) */
fun accept(samples: FloatArray) {
- // 放大音量,提高灵敏度
- for (i in samples.indices) {
- samples[i] *= 2.5f
- }
-
vad.acceptWaveform(samples)
val speechDetected = vad.isSpeechDetected()
diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
index 1ae5caf..8c731f9 100644
--- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt
@@ -13,7 +13,6 @@ class VoiceController(
private val onStateChanged: ((VoiceState) -> Unit)? = null,
private val stopBackendAudio: (() -> Unit)? = null
) {
-
private val TAG = "VoiceController"
private val sampleRate = 16000
@@ -29,8 +28,10 @@ class VoiceController(
private val wakeupManager = WakeupManager(assetManager) {
Log.d(TAG, "🔥 WakeWord detected")
stopBackendAudio?.invoke()
- resetAll()
- state = VoiceState.PLAYING_PROMPT
+ if (state != VoiceState.UPLOADING) { // 上传中不重置
+ resetAll()
+ state = VoiceState.PLAYING_PROMPT
+ }
onWakeup()
}
@@ -38,27 +39,29 @@ class VoiceController(
private val vadManager = VadManager(
assetManager,
onSpeechStart = { onVadStart() },
- onSpeechEnd = { /* ❌ 不再用于结束 */ }
+ onSpeechEnd = { /* 不再用于结束 */ }
)
/* ================= 音频缓存 ================= */
private val audioBuffer = mutableListOf()
private val preBuffer = ArrayDeque()
- private val PRE_BUFFER_SIZE = sampleRate / 2 // 500ms
+ private val PRE_BUFFER_SIZE = sampleRate // 1 秒预缓冲
/* ================= 时间 ================= */
private var idleTimer = 0L
private var recordingStartTime = 0L
private var vadStarted = false
- /* ================= RMS 结束判定 ================= */
+ /* ================= RMS 静音判定 ================= */
private var silenceStartMs = 0L
private val SILENCE_END_MS = 1200L // 静音多久算一句结束
- private val RMS_SILENCE_THRESHOLD = 0.01f // 静音能量阈值
- private val MIN_SPEECH_DURATION_MS = 800L // 最短有效语音
+ private val RMS_SILENCE_THRESHOLD = 0.005f // 更灵敏
+ private val MIN_SPEECH_DURATION_MS = 300L // 最短有效语音
+ private val MIN_SPEECH_RATIO = 0.15f // 有效帧占比至少 15%
/* ================= 音频入口 ================= */
fun acceptAudio(samples: FloatArray) {
+ // 唤醒独立处理,始终喂
wakeupManager.acceptAudio(samples)
if (state == VoiceState.UPLOADING ||
@@ -87,14 +90,11 @@ class VoiceController(
return
}
- // 2️⃣ RMS 静音结束(核心)
+ // 2️⃣ RMS 静音结束判定
val rms = calcRms(samples)
-// Log.d(TAG, "RMS_DEBUG", "rms=${"%.4f".format(rms)}")
-
if (rms < RMS_SILENCE_THRESHOLD) {
- if (silenceStartMs == 0L) {
- silenceStartMs = now
- } else if (now - silenceStartMs >= SILENCE_END_MS) {
+ if (silenceStartMs == 0L) silenceStartMs = now
+ else if (now - silenceStartMs >= SILENCE_END_MS) {
Log.d(TAG, "🔇 RMS silence end")
finishSentence()
}
@@ -123,7 +123,17 @@ class VoiceController(
if (!vadStarted || speakTime < MIN_SPEECH_DURATION_MS) {
Log.d(TAG, "⛔ Speech too short, ignore")
- resetToWaitSpeech()
+ resetToWaitSpeech(refreshIdle = false)
+ return
+ }
+
+ val rmsFrames = calcRmsFrames(audioBuffer.toFloatArray(), frameSize = 320)
+ val validFrames = rmsFrames.count { it >= RMS_SILENCE_THRESHOLD }
+ val ratio = if (rmsFrames.isEmpty()) 0f else validFrames.toFloat() / rmsFrames.size
+ Log.d(TAG, "RMS ratio=$ratio")
+ if (ratio < MIN_SPEECH_RATIO) {
+ Log.d(TAG, "❌ Not enough human voice (ratio=$ratio)")
+ resetToWaitSpeech(refreshIdle = false)
return
}
@@ -136,47 +146,38 @@ class VoiceController(
}
/* ================= 播放回调 ================= */
- fun onPlayStartPrompt() {
- state = VoiceState.PLAYING_PROMPT
- }
-
- fun onPlayEndPrompt() {
- state = VoiceState.WAIT_SPEECH
- idleTimer = System.currentTimeMillis()
- }
-
- fun onPlayStartBackend() {
- state = VoiceState.PLAYING_BACKEND
- }
-
- fun onPlayEndBackend() {
- state = VoiceState.WAIT_SPEECH
- idleTimer = System.currentTimeMillis()
- }
+ fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT }
+ fun onPlayEndPrompt() { state = VoiceState.WAIT_SPEECH; idleTimer = System.currentTimeMillis() }
+ fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND }
+ fun onPlayEndBackend() { state = VoiceState.WAIT_SPEECH; idleTimer = System.currentTimeMillis() }
/* ================= 上传回调 ================= */
fun onUploadFinished(success: Boolean) {
if (state != VoiceState.UPLOADING) return
state = if (success) VoiceState.PLAYING_BACKEND else VoiceState.WAIT_SPEECH
+ idleTimer = System.currentTimeMillis()
}
/* ================= Idle ================= */
fun checkIdleTimeout() {
+ // 上传中不计时
if (state != VoiceState.WAIT_SPEECH) return
- if (System.currentTimeMillis() - idleTimer > idleTimeoutSeconds * 1000) {
+ val now = System.currentTimeMillis()
+ if (now - idleTimer > idleTimeoutSeconds * 1000) {
+ Log.d(TAG, "⏱ Idle timeout reached, resetAll")
resetAll()
}
}
/* ================= Reset ================= */
- private fun resetToWaitSpeech() {
+ private fun resetToWaitSpeech(refreshIdle: Boolean = true) {
audioBuffer.clear()
preBuffer.clear()
vadManager.reset()
vadStarted = false
silenceStartMs = 0L
state = VoiceState.WAIT_SPEECH
- idleTimer = System.currentTimeMillis()
+ if (refreshIdle) idleTimer = System.currentTimeMillis()
}
private fun resetAll() {
@@ -197,17 +198,28 @@ class VoiceController(
private fun cachePreBuffer(samples: FloatArray) {
for (s in samples) {
preBuffer.addLast(s)
- if (preBuffer.size > PRE_BUFFER_SIZE) {
- preBuffer.removeFirst()
- }
+ if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
}
}
private fun calcRms(audio: FloatArray): Float {
+ if (audio.isEmpty()) return 0f
var sum = 0f
- for (v in audio) {
- sum += v * v
- }
+ for (v in audio) sum += v * v
return sqrt(sum / audio.size)
}
+
+ private fun calcRmsFrames(audio: FloatArray, frameSize: Int = 320): FloatArray {
+ val rmsList = mutableListOf()
+ var i = 0
+ while (i < audio.size) {
+ val end = minOf(i + frameSize, audio.size)
+ val frame = audio.sliceArray(i until end)
+ var sum = 0f
+ for (v in frame) sum += v * v
+ rmsList.add(sqrt(sum / frame.size))
+ i += frameSize
+ }
+ return rmsList.toFloatArray()
+ }
}
diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt b/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt
index 7454059..c7fcbce 100644
--- a/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt
+++ b/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt
@@ -38,9 +38,9 @@ class WakeupManager(
fun acceptAudio(samples: FloatArray) {
val s = stream ?: return
// ⭐ 远讲 / 播放补偿(非常关键)
-// for (i in samples.indices) {
-// samples[i] *= 2.5f
-// }
+ for (i in samples.indices) {
+ samples[i] *= 2.5f
+ }
s.acceptWaveform(samples, sampleRate)
while (kws.isReady(s)) {
diff --git a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
index 18d2a39..3c3bb02 100644
--- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
+++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt
@@ -52,6 +52,7 @@ import com.zs.smarthuman.utils.ViewSlideAnimator
import com.zs.smarthuman.viewmodel.MainViewModel
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.Job
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.delay
import kotlinx.coroutines.launch
@@ -136,10 +137,12 @@ class MainActivity : BaseViewModelActivity()
when (it) {
is ApiResult.Error -> {
Toaster.showShort("上传失败")
+ voiceController?.onUploadFinished(false)
}
is ApiResult.Success<*> -> {
Toaster.showShort("上传成功")
+ voiceController?.onUploadFinished(true)
}
}
}
@@ -166,17 +169,17 @@ class MainActivity : BaseViewModelActivity()
},
onFinalAudio = { audio ->
Log.d("lrs", "检测到语音,长度=${audio.size}")
-// mViewModel?.uploadVoice(
-// AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
-// 1
-// )
- loadLocalJsonAndPlay()
- val file = File(
- getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
- "xxx.wav"
+ mViewModel?.uploadVoice(
+ AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
+ 1
)
- AudioDebugUtil.saveFloatPcmAsWav(audio, file)
- LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
+// loadLocalJsonAndPlay()
+// val file = File(
+// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
+// "xxx.wav"
+// )
+// AudioDebugUtil.saveFloatPcmAsWav(audio, file)
+// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
},
onStateChanged = { state ->
@@ -204,8 +207,9 @@ class MainActivity : BaseViewModelActivity()
when (msg.msgContentType) {
MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> {
lifecycleScope.launch(Dispatchers.IO) {
- UnityPlayerHolder.getInstance()
- .startTalking(msg.content)
+// UnityPlayerHolder.getInstance()
+// .startTalking(msg.content)
+ loadLocalJsonAndPlay()
}
}
}
@@ -315,6 +319,14 @@ class MainActivity : BaseViewModelActivity()
private var promptPlaying = false
private var backPlaying = false
+ private var promptTimeoutJob: Job? = null
+ private val PROMPT_PLAY_TIMEOUT_MS = 3000L // 10 秒
+
+
+ private var backTimeoutJob: Job? = null
+ private val BACK_PLAY_TIMEOUT_MS = 3000L // 10 秒
+
+
fun onAudioProgressUpdated( // Unity 调用此方法传递音频进度
progress: Float,
state: Int,//0stop 2pause 1play 3complete 4loading 5error
@@ -331,6 +343,13 @@ class MainActivity : BaseViewModelActivity()
if (!promptPlaying) {
promptPlaying = true
voiceController?.onPlayStartPrompt()
+
+ promptTimeoutJob = lifecycleScope.launch {
+ delay(PROMPT_PLAY_TIMEOUT_MS)
+ promptPlaying = false
+ voiceController?.onPlayEndPrompt()
+ promptTimeoutJob?.cancel()
+ }
}
}
@@ -338,6 +357,7 @@ class MainActivity : BaseViewModelActivity()
if (promptPlaying) {
promptPlaying = false
voiceController?.onPlayEndPrompt()
+ promptTimeoutJob?.cancel()
}
}
}
@@ -360,7 +380,6 @@ class MainActivity : BaseViewModelActivity()
3 -> { // complete
if (backPlaying) {
- Toaster.showShort("借宿了")
backPlaying = false
voiceController?.onPlayEndBackend()
}