diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt index 517014c..2389b5e 100644 --- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt +++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt @@ -4,14 +4,25 @@ import android.content.res.AssetManager import com.blankj.utilcode.util.LogUtils import java.util.ArrayDeque +// 新增:超时类型枚举(区分两种超时) +enum class TimeoutType { + IDLE_TIMEOUT, // 唤醒后全程没说话的闲时超时 + INVALID_SPEECH_TIMEOUT // 唤醒后一直无效说话的超时 +} + +// 新增:提示语回调(外部可通过此获取不同的提示语) +typealias OnTimeoutTip = (TimeoutType) -> Unit + class VoiceController( assetManager: AssetManager, private val onWakeup: () -> Unit, private val onFinalAudio: (FloatArray) -> Unit, - private val idleTimeoutSeconds: Int = 10, - private val maxRecordingSeconds: Int = 10, + idleTimeoutSeconds: Int = 10, + maxRecordingSeconds: Int = 10, private val onStateChanged: ((VoiceState) -> Unit)? = null, - private val stopBackendAudio: (() -> Unit)? = null + private val stopBackendAudio: (() -> Unit)? = null, + // 新增:超时提示语回调 + private val onTimeoutTip: OnTimeoutTip? = null ) { private val TAG = "VoiceController" @@ -102,13 +113,12 @@ class VoiceController( private var lastInvalidResetMs = 0L private val INVALID_RESET_DEBOUNCE_MS = 1500L // 1.5秒内不重复重置 - // 阈值配置数据类 - private data class ThresholdConfig( - val energyThreshold: Float, - val vadRatioThreshold: Float, - val minScore: Int, - val scene: String - ) + // ========== 核心新增:区分超时类型的标记 ========== + private var hasInvalidSpeech = false // 是否有过无效说话行为 + private var currentTimeoutType: TimeoutType = TimeoutType.IDLE_TIMEOUT // 当前超时类型 + + // ========== 补充:MIN_EFFECTIVE_SPEECH_RMS 常量(和VadManager对齐) ========== + private val MIN_EFFECTIVE_SPEECH_RMS = 0.001f /* ================= 音频入口 ================= */ fun acceptAudio(samples: FloatArray) { @@ -144,7 +154,15 @@ class VoiceController( if ((waitSpeechStartMs > 0 && now - waitSpeechStartMs >= idleTimeoutMs) || (waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutMs) ) { - LogUtils.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP") + // 核心修改:超时前先判定超时类型 + currentTimeoutType = if (hasInvalidSpeech) { + TimeoutType.INVALID_SPEECH_TIMEOUT + } else { + TimeoutType.IDLE_TIMEOUT + } + LogUtils.d(TAG, "⏱ WAIT_SPEECH timeout → WAIT_WAKEUP | 超时类型: $currentTimeoutType") + // 触发超时提示语回调 + onTimeoutTip?.invoke(currentTimeoutType) resetAll() return } @@ -255,6 +273,10 @@ class VoiceController( waitSpeechFailStartMs = System.currentTimeMillis() waitSpeechStartMs = System.currentTimeMillis() + // 核心新增:唤醒时重置无效说话标记(每次唤醒都是新的会话) + hasInvalidSpeech = false + currentTimeoutType = TimeoutType.IDLE_TIMEOUT + if (interrupt) { audioBuffer.clear() vadManager.reset() @@ -297,6 +319,8 @@ class VoiceController( if (!vadStarted || duration < MIN_SPEECH_MS) { LogUtils.d(TAG, "❌ 语音过短: $duration ms | 基线: $currentEnvBaseline") + // 核心新增:无效说话(语音过短),标记hasInvalidSpeech为true + hasInvalidSpeech = true resetToWaitSpeech() return } @@ -312,6 +336,8 @@ class VoiceController( // 若录音中已识别出多人对话,直接过滤 if (isMultiPersonDialogueDetected) { LogUtils.w(TAG, "❌ 过滤多人对话垃圾语音(实时识别) | 时长: $duration ms") + // 核心新增:无效说话(多人对话),标记hasInvalidSpeech为true + hasInvalidSpeech = true resetToWaitSpeech() return } @@ -324,6 +350,8 @@ class VoiceController( state = VoiceState.UPLOADING onFinalAudio(audio) resetRealtimeStats() // 重置实时统计 + // 核心新增:有效语音通过后,重置无效说话标记(后续超时重新判定) + hasInvalidSpeech = false return } @@ -332,6 +360,8 @@ class VoiceController( val isInvalidPeakRatio = peakAvgRatio < MIN_VALID_PEAK_AVG_RATIO if (isFarField && isInvalidPeakRatio) { LogUtils.w(TAG, "❌ 远场/无效语音过滤 | 能量: $avgEnergy < $MAX_FAR_FIELD_ENERGY") + // 核心新增:无效说话(远场),标记hasInvalidSpeech为true + hasInvalidSpeech = true resetToWaitSpeech() return } @@ -344,6 +374,8 @@ class VoiceController( peakPositionRatio > MAX_PEAK_POSITION_RATIO if (isDiscontinuous) { LogUtils.w(TAG, "❌ 非连续杂音过滤 | 连续占比: $continuousRatio < $MIN_CONTINUOUS_FRAME_RATIO") + // 核心新增:无效说话(非连续杂音),标记hasInvalidSpeech为true + hasInvalidSpeech = true resetToWaitSpeech() return } @@ -380,6 +412,8 @@ class VoiceController( val vadRatioPass = vadRatio >= thresholdConfig.vadRatioThreshold if (!energyPass || !vadRatioPass) { LogUtils.w(TAG, "❌ 低能量语音阈值过滤 | 能量: $avgEnergy < ${thresholdConfig.energyThreshold} | 占比: $vadRatio < ${thresholdConfig.vadRatioThreshold} | 场景: ${thresholdConfig.scene}") + // 核心新增:无效说话(低能量),标记hasInvalidSpeech为true + hasInvalidSpeech = true resetToWaitSpeech() return } @@ -397,6 +431,8 @@ class VoiceController( val pass = score >= thresholdConfig.minScore if (!pass) { LogUtils.w(TAG, "❌ 评分不足过滤 | 总分: $score < ${thresholdConfig.minScore} | 场景: ${thresholdConfig.scene}") + // 核心新增:无效说话(评分不足),标记hasInvalidSpeech为true + hasInvalidSpeech = true resetToWaitSpeech() return } @@ -406,6 +442,8 @@ class VoiceController( state = VoiceState.UPLOADING onFinalAudio(audio) resetRealtimeStats() // 重置实时统计 + // 核心新增:有效语音通过后,重置无效说话标记 + hasInvalidSpeech = false LogUtils.i(TAG, "✅ 低能量语音通过 | 时长: $duration ms | 能量: $avgEnergy | 场景: ${thresholdConfig.scene}") } @@ -455,7 +493,7 @@ class VoiceController( } private fun resetToWaitSpeech() { - LogUtils.d(TAG, "🔄 重置到等待说话 | 基线: $currentEnvBaseline") + LogUtils.d(TAG, "🔄 重置到等待说话 | 基线: $currentEnvBaseline | 已标记无效说话: $hasInvalidSpeech") val now = System.currentTimeMillis() if (now - lastInvalidResetMs < INVALID_RESET_DEBOUNCE_MS) { LogUtils.d(TAG, "🛡 防抖:1.5秒内重复无效语音,跳过重置") @@ -471,7 +509,7 @@ class VoiceController( } private fun resetAll() { - LogUtils.d(TAG, "🔄 重置所有状态 | 基线: $currentEnvBaseline") + LogUtils.d(TAG, "🔄 重置所有状态 | 基线: $currentEnvBaseline | 本次超时类型: $currentTimeoutType") audioBuffer.clear() preBuffer.clear() vadManager.reset() @@ -482,7 +520,10 @@ class VoiceController( envNoiseBuffer.clear() currentEnvBaseline = 0.001f resetRealtimeStats() // 重置实时统计 - LogUtils.d(TAG, "🔄 环境基线已重置 | 新基线: $currentEnvBaseline") + // 核心新增:重置所有状态时,同时重置无效说话标记和超时类型 + hasInvalidSpeech = false + currentTimeoutType = TimeoutType.IDLE_TIMEOUT + LogUtils.d(TAG, "🔄 环境基线已重置 | 新基线: $currentEnvBaseline | 无效说话标记已重置") state = VoiceState.WAIT_WAKEUP } @@ -492,6 +533,9 @@ class VoiceController( vadManager.reset() envNoiseBuffer.clear() resetRealtimeStats() // 重置实时统计 + // 核心新增:释放资源时重置标记 + hasInvalidSpeech = false + currentTimeoutType = TimeoutType.IDLE_TIMEOUT } private fun cachePreBuffer(samples: FloatArray) { @@ -501,6 +545,11 @@ class VoiceController( } } - // ========== 补充:MIN_EFFECTIVE_SPEECH_RMS 常量(和VadManager对齐) ========== - private val MIN_EFFECTIVE_SPEECH_RMS = 0.001f + // 阈值配置数据类 + private data class ThresholdConfig( + val energyThreshold: Float, + val vadRatioThreshold: Float, + val minScore: Int, + val scene: String + ) } \ No newline at end of file diff --git a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt index 7097336..9d30b7f 100644 --- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt +++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt @@ -43,41 +43,30 @@ import com.zs.smarthuman.bean.VoiceBeanResp import com.zs.smarthuman.common.UserInfoManager import com.zs.smarthuman.databinding.ActivityMainBinding import com.zs.smarthuman.http.ApiResult -import com.zs.smarthuman.http.ApiService import com.zs.smarthuman.im.chat.MessageContentType import com.zs.smarthuman.im.chat.bean.SingleMessage import com.zs.smarthuman.kt.releaseIM +import com.zs.smarthuman.sherpa.TimeoutType import com.zs.smarthuman.sherpa.VoiceController import com.zs.smarthuman.sherpa.VoiceState import com.zs.smarthuman.toast.Toaster import com.zs.smarthuman.utils.AudioDebugUtil import com.zs.smarthuman.utils.AudioPcmUtil import com.zs.smarthuman.utils.DangerousUtils +import com.zs.smarthuman.utils.LogFileUtils import com.zs.smarthuman.utils.UnityPlayerHolder import com.zs.smarthuman.utils.ViewSlideAnimator import com.zs.smarthuman.viewmodel.MainViewModel import com.zs.smarthuman.widget.VersionUpdateDialog -import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job -import kotlinx.coroutines.SupervisorJob import kotlinx.coroutines.delay import kotlinx.coroutines.flow.catch import kotlinx.coroutines.launch -import kotlinx.coroutines.withContext -import rxhttp.awaitResult -import rxhttp.toAwaitString import rxhttp.toDownloadFlow import rxhttp.wrapper.param.RxHttp -import rxhttp.wrapper.param.toAwaitResponse import java.io.File -import java.nio.ByteBuffer -import java.nio.ByteOrder -import kotlin.compareTo -import kotlin.concurrent.thread -import kotlin.math.sqrt -import kotlin.text.compareTo /** * @description: @@ -89,7 +78,7 @@ class MainActivity : BaseViewModelActivity() private var voiceController: VoiceController? = null private var audioRecord: AudioRecord? = null private var isRecording = false - private val audioSource = MediaRecorder.AudioSource.MIC + private val audioSource = MediaRecorder.AudioSource.VOICE_COMMUNICATION private val sampleRateInHz = 16000 private val channelConfig = AudioFormat.CHANNEL_IN_MONO private val audioFormat = AudioFormat.ENCODING_PCM_16BIT @@ -105,7 +94,6 @@ class MainActivity : BaseViewModelActivity() .setDirection(ViewSlideAnimator.Direction.VERTICAL) .setAnimationDuration(300) .build() - } override fun initData() { @@ -156,12 +144,12 @@ class MainActivity : BaseViewModelActivity() mViewModel?.uploadVoiceLiveData?.observe(this) { when (it) { is ApiResult.Error -> { -// Toaster.showShort("上传失败") + Toaster.showShort("上传失败") voiceController?.onUploadFinished(false) } is ApiResult.Success<*> -> { -// Toaster.showShort("上传成功") + Toaster.showShort("上传成功") voiceController?.onUploadFinished(true) } } @@ -207,67 +195,59 @@ class MainActivity : BaseViewModelActivity() }, onFinalAudio = { audio -> Log.d("lrsxx", "检测到语音,长度=${audio.size}") -// mViewModel?.uploadVoice( -// AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)), -// 1 -// ) - loadLocalJsonAndPlay() - val file = File( - getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(), - "xxx.wav" + mViewModel?.uploadVoice( + AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)), + 1 ) - AudioDebugUtil.saveFloatPcmAsWav(audio, file) - LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}") - lifecycleScope.launch(Dispatchers.Main) { - - mVerticalAnimator?.show() - } +// loadLocalJsonAndPlay() +// val file = File( +// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(), +// "xxx.wav" +// ) +// AudioDebugUtil.saveFloatPcmAsWav(audio, file) +// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}") +// lifecycleScope.launch(Dispatchers.Main) { +// +// mVerticalAnimator?.show() +// } }, onStateChanged = { state -> - when (state) { - VoiceState.WAIT_WAKEUP -> { - lifecycleScope.launch(Dispatchers.Main) { - mVerticalAnimator?.hide() - UnityPlayerHolder.getInstance() - .sendVoiceToUnity( - voiceInfo = mutableListOf().apply { - add( - VoiceBeanResp( - audioUrl = /*UserInfoManager.userInfo?.endAudioUrl ?: */"https://static.seerteach.net/aidialogue/userWakeUpAudio/ttsmaker-file-2025-12-31-16-2-51.mp3" - ) - ) - } - ) - } - } - VoiceState.WAIT_SPEECH -> { - - } - - VoiceState.RECORDING -> { - startRecording() - } - - VoiceState.PLAYING_PROMPT -> {} - VoiceState.PLAYING_BACKEND -> {} - VoiceState.UPLOADING -> {} - - VoiceState.WAIT_SPEECH_COOLDOWN -> {} - - else -> {} - } }, + onTimeoutTip = {timeoutType-> + when(timeoutType){ + TimeoutType.IDLE_TIMEOUT -> {//闲时超时 - ) + } + TimeoutType.INVALID_SPEECH_TIMEOUT -> {//无效语音后超时 + + } + } + lifecycleScope.launch(Dispatchers.Main) { + mVerticalAnimator?.hide() + UnityPlayerHolder.getInstance() + .sendVoiceToUnity( + voiceInfo = mutableListOf().apply { + add( + VoiceBeanResp( + audioUrl = "https://static.seerteach.net/aidialogue/userWakeUpAudio/ttsmaker-file-2025-12-31-16-2-51.mp3" + ) + ) + } + ) + + } + } + ) } override fun receivedIMMsg(msg: SingleMessage) { when (msg.msgContentType) { MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> { lifecycleScope.launch(Dispatchers.IO) { -// UnityPlayerHolder.getInstance() -// .startTalking(msg.content) + LogFileUtils.logToFile2(this@MainActivity,msg.content) + UnityPlayerHolder.getInstance() + .startTalking(msg.content) // loadLocalJsonAndPlay() } } @@ -345,6 +325,7 @@ class MainActivity : BaseViewModelActivity() Log.d("VoiceService", "✅ System AEC enabled") } + private var noiseSuppressor: NoiseSuppressor? = null @@ -361,7 +342,7 @@ class MainActivity : BaseViewModelActivity() } -// private val audioBuffer = mutableListOf() + // private val audioBuffer = mutableListOf() //开始录音 private fun startRecording() { if (isRecording) return diff --git a/app/src/main/java/com/zs/smarthuman/utils/LogFileUtils.java b/app/src/main/java/com/zs/smarthuman/utils/LogFileUtils.java new file mode 100644 index 0000000..32f7bf7 --- /dev/null +++ b/app/src/main/java/com/zs/smarthuman/utils/LogFileUtils.java @@ -0,0 +1,68 @@ +package com.zs.smarthuman.utils; + +import android.content.Context; +import android.os.Environment; + +import com.blankj.utilcode.util.LogUtils; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; + +public class LogFileUtils { + public static final String LOG_DIR_NAME = "seerteach"; + private static final String LOG_FILE_NAME = "app_log.txt"; + private static final String LOG_FILE_NAME2 = "unity.txt"; + + public static void logToFile(Context context, String log) { + try { + File logDir = new File(context.getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS).getAbsolutePath(),LOG_DIR_NAME); +// logDir.deleteOnExit(); + LogUtils.iTag("lrs",">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:"+logDir.getAbsolutePath()); + if (!logDir.exists()) { + logDir.mkdirs(); + } + + File logFile = new File(logDir, LOG_FILE_NAME); + if (!logFile.exists()) { + logFile.createNewFile(); + } + + FileOutputStream fos = new FileOutputStream(logFile, true); + OutputStreamWriter osw = new OutputStreamWriter(fos); + BufferedWriter bw = new BufferedWriter(osw); + bw.write(log); + bw.newLine(); + bw.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public static void logToFile2(Context context, String log) { + try { + File logDir = new File(context.getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS).getAbsolutePath(),LOG_DIR_NAME); +// logDir.deleteOnExit(); + LogUtils.iTag("lrs",">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:"+logDir.getAbsolutePath()); + if (!logDir.exists()) { + logDir.mkdirs(); + } + + File logFile = new File(logDir, LOG_FILE_NAME2); + if (!logFile.exists()) { + logFile.createNewFile(); + } + + FileOutputStream fos = new FileOutputStream(logFile, true); + OutputStreamWriter osw = new OutputStreamWriter(fos); + BufferedWriter bw = new BufferedWriter(osw); + bw.write(log); + bw.newLine(); + bw.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } +}