添加超时类型判断

This commit is contained in:
林若思 2026-01-08 17:07:36 +08:00
parent 725f1a9019
commit e9eab6ae23
3 changed files with 181 additions and 83 deletions

View File

@ -4,14 +4,25 @@ import android.content.res.AssetManager
import com.blankj.utilcode.util.LogUtils
import java.util.ArrayDeque
// 新增:超时类型枚举(区分两种超时)
enum class TimeoutType {
IDLE_TIMEOUT, // 唤醒后全程没说话的闲时超时
INVALID_SPEECH_TIMEOUT // 唤醒后一直无效说话的超时
}
// 新增:提示语回调(外部可通过此获取不同的提示语)
typealias OnTimeoutTip = (TimeoutType) -> Unit
class VoiceController(
assetManager: AssetManager,
private val onWakeup: () -> Unit,
private val onFinalAudio: (FloatArray) -> Unit,
private val idleTimeoutSeconds: Int = 10,
private val maxRecordingSeconds: Int = 10,
idleTimeoutSeconds: Int = 10,
maxRecordingSeconds: Int = 10,
private val onStateChanged: ((VoiceState) -> Unit)? = null,
private val stopBackendAudio: (() -> Unit)? = null
private val stopBackendAudio: (() -> Unit)? = null,
// 新增:超时提示语回调
private val onTimeoutTip: OnTimeoutTip? = null
) {
private val TAG = "VoiceController"
@ -102,13 +113,12 @@ class VoiceController(
private var lastInvalidResetMs = 0L
private val INVALID_RESET_DEBOUNCE_MS = 1500L // 1.5秒内不重复重置
// 阈值配置数据类
private data class ThresholdConfig(
val energyThreshold: Float,
val vadRatioThreshold: Float,
val minScore: Int,
val scene: String
)
// ========== 核心新增:区分超时类型的标记 ==========
private var hasInvalidSpeech = false // 是否有过无效说话行为
private var currentTimeoutType: TimeoutType = TimeoutType.IDLE_TIMEOUT // 当前超时类型
// ========== 补充MIN_EFFECTIVE_SPEECH_RMS 常量和VadManager对齐 ==========
private val MIN_EFFECTIVE_SPEECH_RMS = 0.001f
/* ================= 音频入口 ================= */
fun acceptAudio(samples: FloatArray) {
@ -144,7 +154,15 @@ class VoiceController(
if ((waitSpeechStartMs > 0 && now - waitSpeechStartMs >= idleTimeoutMs) ||
(waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutMs)
) {
LogUtils.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP")
// 核心修改:超时前先判定超时类型
currentTimeoutType = if (hasInvalidSpeech) {
TimeoutType.INVALID_SPEECH_TIMEOUT
} else {
TimeoutType.IDLE_TIMEOUT
}
LogUtils.d(TAG, "⏱ WAIT_SPEECH timeout → WAIT_WAKEUP | 超时类型: $currentTimeoutType")
// 触发超时提示语回调
onTimeoutTip?.invoke(currentTimeoutType)
resetAll()
return
}
@ -255,6 +273,10 @@ class VoiceController(
waitSpeechFailStartMs = System.currentTimeMillis()
waitSpeechStartMs = System.currentTimeMillis()
// 核心新增:唤醒时重置无效说话标记(每次唤醒都是新的会话)
hasInvalidSpeech = false
currentTimeoutType = TimeoutType.IDLE_TIMEOUT
if (interrupt) {
audioBuffer.clear()
vadManager.reset()
@ -297,6 +319,8 @@ class VoiceController(
if (!vadStarted || duration < MIN_SPEECH_MS) {
LogUtils.d(TAG, "❌ 语音过短: $duration ms | 基线: $currentEnvBaseline")
// 核心新增无效说话语音过短标记hasInvalidSpeech为true
hasInvalidSpeech = true
resetToWaitSpeech()
return
}
@ -312,6 +336,8 @@ class VoiceController(
// 若录音中已识别出多人对话,直接过滤
if (isMultiPersonDialogueDetected) {
LogUtils.w(TAG, "❌ 过滤多人对话垃圾语音(实时识别) | 时长: $duration ms")
// 核心新增无效说话多人对话标记hasInvalidSpeech为true
hasInvalidSpeech = true
resetToWaitSpeech()
return
}
@ -324,6 +350,8 @@ class VoiceController(
state = VoiceState.UPLOADING
onFinalAudio(audio)
resetRealtimeStats() // 重置实时统计
// 核心新增:有效语音通过后,重置无效说话标记(后续超时重新判定)
hasInvalidSpeech = false
return
}
@ -332,6 +360,8 @@ class VoiceController(
val isInvalidPeakRatio = peakAvgRatio < MIN_VALID_PEAK_AVG_RATIO
if (isFarField && isInvalidPeakRatio) {
LogUtils.w(TAG, "❌ 远场/无效语音过滤 | 能量: $avgEnergy < $MAX_FAR_FIELD_ENERGY")
// 核心新增无效说话远场标记hasInvalidSpeech为true
hasInvalidSpeech = true
resetToWaitSpeech()
return
}
@ -344,6 +374,8 @@ class VoiceController(
peakPositionRatio > MAX_PEAK_POSITION_RATIO
if (isDiscontinuous) {
LogUtils.w(TAG, "❌ 非连续杂音过滤 | 连续占比: $continuousRatio < $MIN_CONTINUOUS_FRAME_RATIO")
// 核心新增无效说话非连续杂音标记hasInvalidSpeech为true
hasInvalidSpeech = true
resetToWaitSpeech()
return
}
@ -380,6 +412,8 @@ class VoiceController(
val vadRatioPass = vadRatio >= thresholdConfig.vadRatioThreshold
if (!energyPass || !vadRatioPass) {
LogUtils.w(TAG, "❌ 低能量语音阈值过滤 | 能量: $avgEnergy < ${thresholdConfig.energyThreshold} | 占比: $vadRatio < ${thresholdConfig.vadRatioThreshold} | 场景: ${thresholdConfig.scene}")
// 核心新增无效说话低能量标记hasInvalidSpeech为true
hasInvalidSpeech = true
resetToWaitSpeech()
return
}
@ -397,6 +431,8 @@ class VoiceController(
val pass = score >= thresholdConfig.minScore
if (!pass) {
LogUtils.w(TAG, "❌ 评分不足过滤 | 总分: $score < ${thresholdConfig.minScore} | 场景: ${thresholdConfig.scene}")
// 核心新增无效说话评分不足标记hasInvalidSpeech为true
hasInvalidSpeech = true
resetToWaitSpeech()
return
}
@ -406,6 +442,8 @@ class VoiceController(
state = VoiceState.UPLOADING
onFinalAudio(audio)
resetRealtimeStats() // 重置实时统计
// 核心新增:有效语音通过后,重置无效说话标记
hasInvalidSpeech = false
LogUtils.i(TAG, "✅ 低能量语音通过 | 时长: $duration ms | 能量: $avgEnergy | 场景: ${thresholdConfig.scene}")
}
@ -455,7 +493,7 @@ class VoiceController(
}
private fun resetToWaitSpeech() {
LogUtils.d(TAG, "🔄 重置到等待说话 | 基线: $currentEnvBaseline")
LogUtils.d(TAG, "🔄 重置到等待说话 | 基线: $currentEnvBaseline | 已标记无效说话: $hasInvalidSpeech")
val now = System.currentTimeMillis()
if (now - lastInvalidResetMs < INVALID_RESET_DEBOUNCE_MS) {
LogUtils.d(TAG, "🛡 防抖1.5秒内重复无效语音,跳过重置")
@ -471,7 +509,7 @@ class VoiceController(
}
private fun resetAll() {
LogUtils.d(TAG, "🔄 重置所有状态 | 基线: $currentEnvBaseline")
LogUtils.d(TAG, "🔄 重置所有状态 | 基线: $currentEnvBaseline | 本次超时类型: $currentTimeoutType")
audioBuffer.clear()
preBuffer.clear()
vadManager.reset()
@ -482,7 +520,10 @@ class VoiceController(
envNoiseBuffer.clear()
currentEnvBaseline = 0.001f
resetRealtimeStats() // 重置实时统计
LogUtils.d(TAG, "🔄 环境基线已重置 | 新基线: $currentEnvBaseline")
// 核心新增:重置所有状态时,同时重置无效说话标记和超时类型
hasInvalidSpeech = false
currentTimeoutType = TimeoutType.IDLE_TIMEOUT
LogUtils.d(TAG, "🔄 环境基线已重置 | 新基线: $currentEnvBaseline | 无效说话标记已重置")
state = VoiceState.WAIT_WAKEUP
}
@ -492,6 +533,9 @@ class VoiceController(
vadManager.reset()
envNoiseBuffer.clear()
resetRealtimeStats() // 重置实时统计
// 核心新增:释放资源时重置标记
hasInvalidSpeech = false
currentTimeoutType = TimeoutType.IDLE_TIMEOUT
}
private fun cachePreBuffer(samples: FloatArray) {
@ -501,6 +545,11 @@ class VoiceController(
}
}
// ========== 补充MIN_EFFECTIVE_SPEECH_RMS 常量和VadManager对齐 ==========
private val MIN_EFFECTIVE_SPEECH_RMS = 0.001f
// 阈值配置数据类
private data class ThresholdConfig(
val energyThreshold: Float,
val vadRatioThreshold: Float,
val minScore: Int,
val scene: String
)
}

View File

@ -43,41 +43,30 @@ import com.zs.smarthuman.bean.VoiceBeanResp
import com.zs.smarthuman.common.UserInfoManager
import com.zs.smarthuman.databinding.ActivityMainBinding
import com.zs.smarthuman.http.ApiResult
import com.zs.smarthuman.http.ApiService
import com.zs.smarthuman.im.chat.MessageContentType
import com.zs.smarthuman.im.chat.bean.SingleMessage
import com.zs.smarthuman.kt.releaseIM
import com.zs.smarthuman.sherpa.TimeoutType
import com.zs.smarthuman.sherpa.VoiceController
import com.zs.smarthuman.sherpa.VoiceState
import com.zs.smarthuman.toast.Toaster
import com.zs.smarthuman.utils.AudioDebugUtil
import com.zs.smarthuman.utils.AudioPcmUtil
import com.zs.smarthuman.utils.DangerousUtils
import com.zs.smarthuman.utils.LogFileUtils
import com.zs.smarthuman.utils.UnityPlayerHolder
import com.zs.smarthuman.utils.ViewSlideAnimator
import com.zs.smarthuman.viewmodel.MainViewModel
import com.zs.smarthuman.widget.VersionUpdateDialog
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.delay
import kotlinx.coroutines.flow.catch
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import rxhttp.awaitResult
import rxhttp.toAwaitString
import rxhttp.toDownloadFlow
import rxhttp.wrapper.param.RxHttp
import rxhttp.wrapper.param.toAwaitResponse
import java.io.File
import java.nio.ByteBuffer
import java.nio.ByteOrder
import kotlin.compareTo
import kotlin.concurrent.thread
import kotlin.math.sqrt
import kotlin.text.compareTo
/**
* @description:
@ -89,7 +78,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
private var voiceController: VoiceController? = null
private var audioRecord: AudioRecord? = null
private var isRecording = false
private val audioSource = MediaRecorder.AudioSource.MIC
private val audioSource = MediaRecorder.AudioSource.VOICE_COMMUNICATION
private val sampleRateInHz = 16000
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
@ -105,7 +94,6 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
.setDirection(ViewSlideAnimator.Direction.VERTICAL)
.setAnimationDuration(300)
.build()
}
override fun initData() {
@ -156,12 +144,12 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
mViewModel?.uploadVoiceLiveData?.observe(this) {
when (it) {
is ApiResult.Error -> {
// Toaster.showShort("上传失败")
Toaster.showShort("上传失败")
voiceController?.onUploadFinished(false)
}
is ApiResult.Success<*> -> {
// Toaster.showShort("上传成功")
Toaster.showShort("上传成功")
voiceController?.onUploadFinished(true)
}
}
@ -207,67 +195,59 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
},
onFinalAudio = { audio ->
Log.d("lrsxx", "检测到语音,长度=${audio.size}")
// mViewModel?.uploadVoice(
// AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
// 1
// )
loadLocalJsonAndPlay()
val file = File(
getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
"xxx.wav"
mViewModel?.uploadVoice(
AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
1
)
AudioDebugUtil.saveFloatPcmAsWav(audio, file)
LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
lifecycleScope.launch(Dispatchers.Main) {
mVerticalAnimator?.show()
}
// loadLocalJsonAndPlay()
// val file = File(
// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
// "xxx.wav"
// )
// AudioDebugUtil.saveFloatPcmAsWav(audio, file)
// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
// lifecycleScope.launch(Dispatchers.Main) {
//
// mVerticalAnimator?.show()
// }
},
onStateChanged = { state ->
when (state) {
VoiceState.WAIT_WAKEUP -> {
lifecycleScope.launch(Dispatchers.Main) {
mVerticalAnimator?.hide()
UnityPlayerHolder.getInstance()
.sendVoiceToUnity(
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
add(
VoiceBeanResp(
audioUrl = /*UserInfoManager.userInfo?.endAudioUrl ?: */"https://static.seerteach.net/aidialogue/userWakeUpAudio/ttsmaker-file-2025-12-31-16-2-51.mp3"
)
)
}
)
}
}
VoiceState.WAIT_SPEECH -> {
}
VoiceState.RECORDING -> {
startRecording()
}
VoiceState.PLAYING_PROMPT -> {}
VoiceState.PLAYING_BACKEND -> {}
VoiceState.UPLOADING -> {}
VoiceState.WAIT_SPEECH_COOLDOWN -> {}
else -> {}
}
},
onTimeoutTip = {timeoutType->
when(timeoutType){
TimeoutType.IDLE_TIMEOUT -> {//闲时超时
)
}
TimeoutType.INVALID_SPEECH_TIMEOUT -> {//无效语音后超时
}
}
lifecycleScope.launch(Dispatchers.Main) {
mVerticalAnimator?.hide()
UnityPlayerHolder.getInstance()
.sendVoiceToUnity(
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
add(
VoiceBeanResp(
audioUrl = "https://static.seerteach.net/aidialogue/userWakeUpAudio/ttsmaker-file-2025-12-31-16-2-51.mp3"
)
)
}
)
}
}
)
}
override fun receivedIMMsg(msg: SingleMessage) {
when (msg.msgContentType) {
MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> {
lifecycleScope.launch(Dispatchers.IO) {
// UnityPlayerHolder.getInstance()
// .startTalking(msg.content)
LogFileUtils.logToFile2(this@MainActivity,msg.content)
UnityPlayerHolder.getInstance()
.startTalking(msg.content)
// loadLocalJsonAndPlay()
}
}
@ -345,6 +325,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
Log.d("VoiceService", "✅ System AEC enabled")
}
private var noiseSuppressor: NoiseSuppressor? = null
@ -361,7 +342,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
}
// private val audioBuffer = mutableListOf<Float>()
// private val audioBuffer = mutableListOf<Float>()
//开始录音
private fun startRecording() {
if (isRecording) return

View File

@ -0,0 +1,68 @@
package com.zs.smarthuman.utils;
import android.content.Context;
import android.os.Environment;
import com.blankj.utilcode.util.LogUtils;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
public class LogFileUtils {
public static final String LOG_DIR_NAME = "seerteach";
private static final String LOG_FILE_NAME = "app_log.txt";
private static final String LOG_FILE_NAME2 = "unity.txt";
public static void logToFile(Context context, String log) {
try {
File logDir = new File(context.getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS).getAbsolutePath(),LOG_DIR_NAME);
// logDir.deleteOnExit();
LogUtils.iTag("lrs",">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:"+logDir.getAbsolutePath());
if (!logDir.exists()) {
logDir.mkdirs();
}
File logFile = new File(logDir, LOG_FILE_NAME);
if (!logFile.exists()) {
logFile.createNewFile();
}
FileOutputStream fos = new FileOutputStream(logFile, true);
OutputStreamWriter osw = new OutputStreamWriter(fos);
BufferedWriter bw = new BufferedWriter(osw);
bw.write(log);
bw.newLine();
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void logToFile2(Context context, String log) {
try {
File logDir = new File(context.getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS).getAbsolutePath(),LOG_DIR_NAME);
// logDir.deleteOnExit();
LogUtils.iTag("lrs",">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:"+logDir.getAbsolutePath());
if (!logDir.exists()) {
logDir.mkdirs();
}
File logFile = new File(logDir, LOG_FILE_NAME2);
if (!logFile.exists()) {
logFile.createNewFile();
}
FileOutputStream fos = new FileOutputStream(logFile, true);
OutputStreamWriter osw = new OutputStreamWriter(fos);
BufferedWriter bw = new BufferedWriter(osw);
bw.write(log);
bw.newLine();
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}