临时提交
This commit is contained in:
parent
885dcbd044
commit
997bfe0539
@ -52,7 +52,7 @@
|
|||||||
tools:targetApi="31">
|
tools:targetApi="31">
|
||||||
|
|
||||||
<activity
|
<activity
|
||||||
android:name=".ui.MainActivity"
|
android:name=".ui.SplashActivity"
|
||||||
android:exported="true"
|
android:exported="true"
|
||||||
android:theme="@style/Theme.Splash"
|
android:theme="@style/Theme.Splash"
|
||||||
android:screenOrientation="portrait">
|
android:screenOrientation="portrait">
|
||||||
@ -66,10 +66,9 @@
|
|||||||
</intent-filter>
|
</intent-filter>
|
||||||
</activity>
|
</activity>
|
||||||
|
|
||||||
<!-- <activity
|
<activity
|
||||||
android:name="com.zs.smarthuman.ui.MainActivity"
|
android:name="com.zs.smarthuman.ui.MainActivity"
|
||||||
android:screenOrientation="portrait"/>
|
android:screenOrientation="portrait"/>
|
||||||
-->
|
|
||||||
<activity
|
<activity
|
||||||
android:name="com.zs.smarthuman.ui.ActivateActivity"
|
android:name="com.zs.smarthuman.ui.ActivateActivity"
|
||||||
android:screenOrientation="portrait"/>
|
android:screenOrientation="portrait"/>
|
||||||
|
|||||||
@ -27,11 +27,6 @@ class VadManager(
|
|||||||
|
|
||||||
/** 喂入音频帧 (16kHz PCM float) */
|
/** 喂入音频帧 (16kHz PCM float) */
|
||||||
fun accept(samples: FloatArray) {
|
fun accept(samples: FloatArray) {
|
||||||
// 放大音量,提高灵敏度
|
|
||||||
for (i in samples.indices) {
|
|
||||||
samples[i] *= 2.5f
|
|
||||||
}
|
|
||||||
|
|
||||||
vad.acceptWaveform(samples)
|
vad.acceptWaveform(samples)
|
||||||
val speechDetected = vad.isSpeechDetected()
|
val speechDetected = vad.isSpeechDetected()
|
||||||
|
|
||||||
|
|||||||
@ -13,7 +13,6 @@ class VoiceController(
|
|||||||
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
||||||
private val stopBackendAudio: (() -> Unit)? = null
|
private val stopBackendAudio: (() -> Unit)? = null
|
||||||
) {
|
) {
|
||||||
|
|
||||||
private val TAG = "VoiceController"
|
private val TAG = "VoiceController"
|
||||||
private val sampleRate = 16000
|
private val sampleRate = 16000
|
||||||
|
|
||||||
@ -29,8 +28,10 @@ class VoiceController(
|
|||||||
private val wakeupManager = WakeupManager(assetManager) {
|
private val wakeupManager = WakeupManager(assetManager) {
|
||||||
Log.d(TAG, "🔥 WakeWord detected")
|
Log.d(TAG, "🔥 WakeWord detected")
|
||||||
stopBackendAudio?.invoke()
|
stopBackendAudio?.invoke()
|
||||||
resetAll()
|
if (state != VoiceState.UPLOADING) { // 上传中不重置
|
||||||
state = VoiceState.PLAYING_PROMPT
|
resetAll()
|
||||||
|
state = VoiceState.PLAYING_PROMPT
|
||||||
|
}
|
||||||
onWakeup()
|
onWakeup()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -38,27 +39,29 @@ class VoiceController(
|
|||||||
private val vadManager = VadManager(
|
private val vadManager = VadManager(
|
||||||
assetManager,
|
assetManager,
|
||||||
onSpeechStart = { onVadStart() },
|
onSpeechStart = { onVadStart() },
|
||||||
onSpeechEnd = { /* ❌ 不再用于结束 */ }
|
onSpeechEnd = { /* 不再用于结束 */ }
|
||||||
)
|
)
|
||||||
|
|
||||||
/* ================= 音频缓存 ================= */
|
/* ================= 音频缓存 ================= */
|
||||||
private val audioBuffer = mutableListOf<Float>()
|
private val audioBuffer = mutableListOf<Float>()
|
||||||
private val preBuffer = ArrayDeque<Float>()
|
private val preBuffer = ArrayDeque<Float>()
|
||||||
private val PRE_BUFFER_SIZE = sampleRate / 2 // 500ms
|
private val PRE_BUFFER_SIZE = sampleRate // 1 秒预缓冲
|
||||||
|
|
||||||
/* ================= 时间 ================= */
|
/* ================= 时间 ================= */
|
||||||
private var idleTimer = 0L
|
private var idleTimer = 0L
|
||||||
private var recordingStartTime = 0L
|
private var recordingStartTime = 0L
|
||||||
private var vadStarted = false
|
private var vadStarted = false
|
||||||
|
|
||||||
/* ================= RMS 结束判定 ================= */
|
/* ================= RMS 静音判定 ================= */
|
||||||
private var silenceStartMs = 0L
|
private var silenceStartMs = 0L
|
||||||
private val SILENCE_END_MS = 1200L // 静音多久算一句结束
|
private val SILENCE_END_MS = 1200L // 静音多久算一句结束
|
||||||
private val RMS_SILENCE_THRESHOLD = 0.01f // 静音能量阈值
|
private val RMS_SILENCE_THRESHOLD = 0.005f // 更灵敏
|
||||||
private val MIN_SPEECH_DURATION_MS = 800L // 最短有效语音
|
private val MIN_SPEECH_DURATION_MS = 300L // 最短有效语音
|
||||||
|
private val MIN_SPEECH_RATIO = 0.15f // 有效帧占比至少 15%
|
||||||
|
|
||||||
/* ================= 音频入口 ================= */
|
/* ================= 音频入口 ================= */
|
||||||
fun acceptAudio(samples: FloatArray) {
|
fun acceptAudio(samples: FloatArray) {
|
||||||
|
// 唤醒独立处理,始终喂
|
||||||
wakeupManager.acceptAudio(samples)
|
wakeupManager.acceptAudio(samples)
|
||||||
|
|
||||||
if (state == VoiceState.UPLOADING ||
|
if (state == VoiceState.UPLOADING ||
|
||||||
@ -87,14 +90,11 @@ class VoiceController(
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2️⃣ RMS 静音结束(核心)
|
// 2️⃣ RMS 静音结束判定
|
||||||
val rms = calcRms(samples)
|
val rms = calcRms(samples)
|
||||||
// Log.d(TAG, "RMS_DEBUG", "rms=${"%.4f".format(rms)}")
|
|
||||||
|
|
||||||
if (rms < RMS_SILENCE_THRESHOLD) {
|
if (rms < RMS_SILENCE_THRESHOLD) {
|
||||||
if (silenceStartMs == 0L) {
|
if (silenceStartMs == 0L) silenceStartMs = now
|
||||||
silenceStartMs = now
|
else if (now - silenceStartMs >= SILENCE_END_MS) {
|
||||||
} else if (now - silenceStartMs >= SILENCE_END_MS) {
|
|
||||||
Log.d(TAG, "🔇 RMS silence end")
|
Log.d(TAG, "🔇 RMS silence end")
|
||||||
finishSentence()
|
finishSentence()
|
||||||
}
|
}
|
||||||
@ -123,7 +123,17 @@ class VoiceController(
|
|||||||
|
|
||||||
if (!vadStarted || speakTime < MIN_SPEECH_DURATION_MS) {
|
if (!vadStarted || speakTime < MIN_SPEECH_DURATION_MS) {
|
||||||
Log.d(TAG, "⛔ Speech too short, ignore")
|
Log.d(TAG, "⛔ Speech too short, ignore")
|
||||||
resetToWaitSpeech()
|
resetToWaitSpeech(refreshIdle = false)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
val rmsFrames = calcRmsFrames(audioBuffer.toFloatArray(), frameSize = 320)
|
||||||
|
val validFrames = rmsFrames.count { it >= RMS_SILENCE_THRESHOLD }
|
||||||
|
val ratio = if (rmsFrames.isEmpty()) 0f else validFrames.toFloat() / rmsFrames.size
|
||||||
|
Log.d(TAG, "RMS ratio=$ratio")
|
||||||
|
if (ratio < MIN_SPEECH_RATIO) {
|
||||||
|
Log.d(TAG, "❌ Not enough human voice (ratio=$ratio)")
|
||||||
|
resetToWaitSpeech(refreshIdle = false)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,47 +146,38 @@ class VoiceController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 播放回调 ================= */
|
/* ================= 播放回调 ================= */
|
||||||
fun onPlayStartPrompt() {
|
fun onPlayStartPrompt() { state = VoiceState.PLAYING_PROMPT }
|
||||||
state = VoiceState.PLAYING_PROMPT
|
fun onPlayEndPrompt() { state = VoiceState.WAIT_SPEECH; idleTimer = System.currentTimeMillis() }
|
||||||
}
|
fun onPlayStartBackend() { state = VoiceState.PLAYING_BACKEND }
|
||||||
|
fun onPlayEndBackend() { state = VoiceState.WAIT_SPEECH; idleTimer = System.currentTimeMillis() }
|
||||||
fun onPlayEndPrompt() {
|
|
||||||
state = VoiceState.WAIT_SPEECH
|
|
||||||
idleTimer = System.currentTimeMillis()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun onPlayStartBackend() {
|
|
||||||
state = VoiceState.PLAYING_BACKEND
|
|
||||||
}
|
|
||||||
|
|
||||||
fun onPlayEndBackend() {
|
|
||||||
state = VoiceState.WAIT_SPEECH
|
|
||||||
idleTimer = System.currentTimeMillis()
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ================= 上传回调 ================= */
|
/* ================= 上传回调 ================= */
|
||||||
fun onUploadFinished(success: Boolean) {
|
fun onUploadFinished(success: Boolean) {
|
||||||
if (state != VoiceState.UPLOADING) return
|
if (state != VoiceState.UPLOADING) return
|
||||||
state = if (success) VoiceState.PLAYING_BACKEND else VoiceState.WAIT_SPEECH
|
state = if (success) VoiceState.PLAYING_BACKEND else VoiceState.WAIT_SPEECH
|
||||||
|
idleTimer = System.currentTimeMillis()
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= Idle ================= */
|
/* ================= Idle ================= */
|
||||||
fun checkIdleTimeout() {
|
fun checkIdleTimeout() {
|
||||||
|
// 上传中不计时
|
||||||
if (state != VoiceState.WAIT_SPEECH) return
|
if (state != VoiceState.WAIT_SPEECH) return
|
||||||
if (System.currentTimeMillis() - idleTimer > idleTimeoutSeconds * 1000) {
|
val now = System.currentTimeMillis()
|
||||||
|
if (now - idleTimer > idleTimeoutSeconds * 1000) {
|
||||||
|
Log.d(TAG, "⏱ Idle timeout reached, resetAll")
|
||||||
resetAll()
|
resetAll()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= Reset ================= */
|
/* ================= Reset ================= */
|
||||||
private fun resetToWaitSpeech() {
|
private fun resetToWaitSpeech(refreshIdle: Boolean = true) {
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
preBuffer.clear()
|
preBuffer.clear()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
vadStarted = false
|
vadStarted = false
|
||||||
silenceStartMs = 0L
|
silenceStartMs = 0L
|
||||||
state = VoiceState.WAIT_SPEECH
|
state = VoiceState.WAIT_SPEECH
|
||||||
idleTimer = System.currentTimeMillis()
|
if (refreshIdle) idleTimer = System.currentTimeMillis()
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun resetAll() {
|
private fun resetAll() {
|
||||||
@ -197,17 +198,28 @@ class VoiceController(
|
|||||||
private fun cachePreBuffer(samples: FloatArray) {
|
private fun cachePreBuffer(samples: FloatArray) {
|
||||||
for (s in samples) {
|
for (s in samples) {
|
||||||
preBuffer.addLast(s)
|
preBuffer.addLast(s)
|
||||||
if (preBuffer.size > PRE_BUFFER_SIZE) {
|
if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst()
|
||||||
preBuffer.removeFirst()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun calcRms(audio: FloatArray): Float {
|
private fun calcRms(audio: FloatArray): Float {
|
||||||
|
if (audio.isEmpty()) return 0f
|
||||||
var sum = 0f
|
var sum = 0f
|
||||||
for (v in audio) {
|
for (v in audio) sum += v * v
|
||||||
sum += v * v
|
|
||||||
}
|
|
||||||
return sqrt(sum / audio.size)
|
return sqrt(sum / audio.size)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun calcRmsFrames(audio: FloatArray, frameSize: Int = 320): FloatArray {
|
||||||
|
val rmsList = mutableListOf<Float>()
|
||||||
|
var i = 0
|
||||||
|
while (i < audio.size) {
|
||||||
|
val end = minOf(i + frameSize, audio.size)
|
||||||
|
val frame = audio.sliceArray(i until end)
|
||||||
|
var sum = 0f
|
||||||
|
for (v in frame) sum += v * v
|
||||||
|
rmsList.add(sqrt(sum / frame.size))
|
||||||
|
i += frameSize
|
||||||
|
}
|
||||||
|
return rmsList.toFloatArray()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -38,9 +38,9 @@ class WakeupManager(
|
|||||||
fun acceptAudio(samples: FloatArray) {
|
fun acceptAudio(samples: FloatArray) {
|
||||||
val s = stream ?: return
|
val s = stream ?: return
|
||||||
// ⭐ 远讲 / 播放补偿(非常关键)
|
// ⭐ 远讲 / 播放补偿(非常关键)
|
||||||
// for (i in samples.indices) {
|
for (i in samples.indices) {
|
||||||
// samples[i] *= 2.5f
|
samples[i] *= 2.5f
|
||||||
// }
|
}
|
||||||
s.acceptWaveform(samples, sampleRate)
|
s.acceptWaveform(samples, sampleRate)
|
||||||
|
|
||||||
while (kws.isReady(s)) {
|
while (kws.isReady(s)) {
|
||||||
|
|||||||
@ -52,6 +52,7 @@ import com.zs.smarthuman.utils.ViewSlideAnimator
|
|||||||
import com.zs.smarthuman.viewmodel.MainViewModel
|
import com.zs.smarthuman.viewmodel.MainViewModel
|
||||||
import kotlinx.coroutines.CoroutineScope
|
import kotlinx.coroutines.CoroutineScope
|
||||||
import kotlinx.coroutines.Dispatchers
|
import kotlinx.coroutines.Dispatchers
|
||||||
|
import kotlinx.coroutines.Job
|
||||||
import kotlinx.coroutines.SupervisorJob
|
import kotlinx.coroutines.SupervisorJob
|
||||||
import kotlinx.coroutines.delay
|
import kotlinx.coroutines.delay
|
||||||
import kotlinx.coroutines.launch
|
import kotlinx.coroutines.launch
|
||||||
@ -136,10 +137,12 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
when (it) {
|
when (it) {
|
||||||
is ApiResult.Error -> {
|
is ApiResult.Error -> {
|
||||||
Toaster.showShort("上传失败")
|
Toaster.showShort("上传失败")
|
||||||
|
voiceController?.onUploadFinished(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
is ApiResult.Success<*> -> {
|
is ApiResult.Success<*> -> {
|
||||||
Toaster.showShort("上传成功")
|
Toaster.showShort("上传成功")
|
||||||
|
voiceController?.onUploadFinished(true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -166,17 +169,17 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
},
|
},
|
||||||
onFinalAudio = { audio ->
|
onFinalAudio = { audio ->
|
||||||
Log.d("lrs", "检测到语音,长度=${audio.size}")
|
Log.d("lrs", "检测到语音,长度=${audio.size}")
|
||||||
// mViewModel?.uploadVoice(
|
mViewModel?.uploadVoice(
|
||||||
// AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
||||||
// 1
|
1
|
||||||
// )
|
|
||||||
loadLocalJsonAndPlay()
|
|
||||||
val file = File(
|
|
||||||
getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
|
||||||
"xxx.wav"
|
|
||||||
)
|
)
|
||||||
AudioDebugUtil.saveFloatPcmAsWav(audio, file)
|
// loadLocalJsonAndPlay()
|
||||||
LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
|
// val file = File(
|
||||||
|
// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
||||||
|
// "xxx.wav"
|
||||||
|
// )
|
||||||
|
// AudioDebugUtil.saveFloatPcmAsWav(audio, file)
|
||||||
|
// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
|
||||||
|
|
||||||
},
|
},
|
||||||
onStateChanged = { state ->
|
onStateChanged = { state ->
|
||||||
@ -204,8 +207,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
when (msg.msgContentType) {
|
when (msg.msgContentType) {
|
||||||
MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> {
|
MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> {
|
||||||
lifecycleScope.launch(Dispatchers.IO) {
|
lifecycleScope.launch(Dispatchers.IO) {
|
||||||
UnityPlayerHolder.getInstance()
|
// UnityPlayerHolder.getInstance()
|
||||||
.startTalking(msg.content)
|
// .startTalking(msg.content)
|
||||||
|
loadLocalJsonAndPlay()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -315,6 +319,14 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
|
|
||||||
private var promptPlaying = false
|
private var promptPlaying = false
|
||||||
private var backPlaying = false
|
private var backPlaying = false
|
||||||
|
private var promptTimeoutJob: Job? = null
|
||||||
|
private val PROMPT_PLAY_TIMEOUT_MS = 3000L // 10 秒
|
||||||
|
|
||||||
|
|
||||||
|
private var backTimeoutJob: Job? = null
|
||||||
|
private val BACK_PLAY_TIMEOUT_MS = 3000L // 10 秒
|
||||||
|
|
||||||
|
|
||||||
fun onAudioProgressUpdated( // Unity 调用此方法传递音频进度
|
fun onAudioProgressUpdated( // Unity 调用此方法传递音频进度
|
||||||
progress: Float,
|
progress: Float,
|
||||||
state: Int,//0stop 2pause 1play 3complete 4loading 5error
|
state: Int,//0stop 2pause 1play 3complete 4loading 5error
|
||||||
@ -331,6 +343,13 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
if (!promptPlaying) {
|
if (!promptPlaying) {
|
||||||
promptPlaying = true
|
promptPlaying = true
|
||||||
voiceController?.onPlayStartPrompt()
|
voiceController?.onPlayStartPrompt()
|
||||||
|
|
||||||
|
promptTimeoutJob = lifecycleScope.launch {
|
||||||
|
delay(PROMPT_PLAY_TIMEOUT_MS)
|
||||||
|
promptPlaying = false
|
||||||
|
voiceController?.onPlayEndPrompt()
|
||||||
|
promptTimeoutJob?.cancel()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -338,6 +357,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
if (promptPlaying) {
|
if (promptPlaying) {
|
||||||
promptPlaying = false
|
promptPlaying = false
|
||||||
voiceController?.onPlayEndPrompt()
|
voiceController?.onPlayEndPrompt()
|
||||||
|
promptTimeoutJob?.cancel()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -360,7 +380,6 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
|
|
||||||
3 -> { // complete
|
3 -> { // complete
|
||||||
if (backPlaying) {
|
if (backPlaying) {
|
||||||
Toaster.showShort("借宿了")
|
|
||||||
backPlaying = false
|
backPlaying = false
|
||||||
voiceController?.onPlayEndBackend()
|
voiceController?.onPlayEndBackend()
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user