diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt index 4029b3c..f3c150b 100644 --- a/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt +++ b/app/src/main/java/com/zs/smarthuman/sherpa/VadManager.kt @@ -1,6 +1,7 @@ package com.zs.smarthuman.sherpa import android.content.res.AssetManager +import com.blankj.utilcode.util.LogUtils import com.k2fsa.sherpa.onnx.Vad import com.k2fsa.sherpa.onnx.getVadModelConfig import kotlin.math.sqrt @@ -10,44 +11,81 @@ class VadManager( private val onSpeechStart: () -> Unit, private val onSpeechEnd: (avgEnergy: Float, peakRms: Float) -> Unit ) { - + private val TAG = "SmartHuman-VadManager" private val vad: Vad private var isSpeaking = false private var lastSpeechTime = 0L - private val END_SILENCE_MS = 800L + // 基础统计变量 private var activeFrameCount = 0 private var activeSpeechFrameCount = 0 - private var speechEnergySum = 0f private var speechFrameCount = 0 private var peakRms = 0f + // ========== 新增:连续性检测核心变量 ========== + private var totalFrames = 0 // 总处理帧数 + private var speechFrames = 0 // 语音帧总数 + private var continuousSpeechFrames = 0 // 连续语音帧数 + private var lastFrameIsSpeech = false // 上一帧是否为语音 + private var peakPosition = 0 // 峰值所在帧位置 + private var frameIndex = 0 // 当前帧索引 + init { - val config = getVadModelConfig(0) ?: throw IllegalStateException("VAD config not found") + val config = getVadModelConfig(0) ?: throw IllegalStateException("[$TAG] VAD config not found") vad = Vad(assetManager, config) + LogUtils.i(TAG, "✅ VAD 初始化成功") } + /** + * 接收音频数据并进行VAD检测 + * @param samples 音频采样数据(float数组) + */ fun accept(samples: FloatArray) { val now = System.currentTimeMillis() - vad.acceptWaveform(samples) val hasSpeech = vad.isSpeechDetected() - - // RMS & peak 统计 val rms = calcRms(samples) + + // ========== 1. 语音能量统计 ========== if (hasSpeech) { speechEnergySum += rms speechFrameCount++ peakRms = maxOf(peakRms, rms) + LogUtils.v(TAG, "🔊 检测到语音帧 | RMS: $rms | 累计峰值: $peakRms") + } else { + LogUtils.v(TAG, "🔇 检测到静音帧 | RMS: $rms") } - // VAD逻辑 + // ========== 2. 新增:帧统计与连续性计算 ========== + totalFrames++ + frameIndex++ + + if (hasSpeech) { + speechFrames++ + // 连续语音帧计数 + continuousSpeechFrames = if (lastFrameIsSpeech) { + continuousSpeechFrames + 1 + } else { + 1 // 重置连续计数 + } + lastFrameIsSpeech = true + + // 更新峰值位置(仅当当前RMS为新峰值时) + if (rms == peakRms) { + peakPosition = frameIndex + } + } else { + lastFrameIsSpeech = false + } + + // ========== 3. VAD核心状态流转 ========== if (hasSpeech) { lastSpeechTime = now if (!isSpeaking) { isSpeaking = true + LogUtils.d(TAG, "🎤 语音开始") onSpeechStart() } activeFrameCount++ @@ -55,36 +93,85 @@ class VadManager( } else { if (isSpeaking) { activeFrameCount++ - if (now - lastSpeechTime >= END_SILENCE_MS) { + val silenceDuration = now - lastSpeechTime + if (silenceDuration >= END_SILENCE_MS) { isSpeaking = false val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f - val peak = peakRms - onSpeechEnd(avgEnergy, peak) + LogUtils.d(TAG, "🛑 语音结束 | 静音时长: ${silenceDuration}ms | 平均能量: $avgEnergy | 峰值: $peakRms") + onSpeechEnd(avgEnergy, peakRms) + resetStats() // 重置基础统计 + } else { + LogUtils.v(TAG, "⏳ 静音中,时长: ${silenceDuration}ms (阈值: ${END_SILENCE_MS}ms)") } } } } - fun activeSpeechRatio(): Float = if (activeFrameCount == 0) 0f else activeSpeechFrameCount.toFloat() / activeFrameCount + /** + * 计算语音占比(活跃语音帧 / 总活跃帧) + * @return 语音占比(0~1) + */ + fun activeSpeechRatio(): Float { + val ratio = if (activeFrameCount == 0) 0f else activeSpeechFrameCount.toFloat() / activeFrameCount + LogUtils.d(TAG, "📊 语音占比: $ratio | 语音帧: $activeSpeechFrameCount | 总帧: $activeFrameCount") + return ratio + } + // ========== 新增:帧统计获取方法(给VoiceController调用) ========== + /** 获取总处理帧数 */ + fun getTotalFrames(): Int = totalFrames + + /** 获取语音帧总数 */ + fun getSpeechFrames(): Int = speechFrames + + /** 获取连续语音帧数 */ + fun getContinuousSpeechFrames(): Int = continuousSpeechFrames + + /** 获取峰值位置占比(峰值帧索引/总帧数) */ + fun getPeakPositionRatio(): Float { + return if (totalFrames == 0) 0f else peakPosition.toFloat() / totalFrames + } + + /** + * 重置VAD状态(保留核心对象,清空统计数据) + */ fun reset() { + // 基础状态重置 isSpeaking = false lastSpeechTime = 0L + resetStats() + vad.reset() + + // 新增:连续性统计重置 + totalFrames = 0 + speechFrames = 0 + continuousSpeechFrames = 0 + lastFrameIsSpeech = false + peakPosition = 0 + frameIndex = 0 + + LogUtils.d(TAG, "🔄 VAD 状态已完全重置") + } + + /** + * 重置统计数据(内部使用) + */ + private fun resetStats() { activeFrameCount = 0 activeSpeechFrameCount = 0 speechEnergySum = 0f speechFrameCount = 0 peakRms = 0f - vad.reset() } - private fun calcRms(samples: FloatArray): Float { + /** + * 计算音频采样的RMS(均方根)能量 + * @param samples 音频采样数据 + * @return RMS值 + */ + fun calcRms(samples: FloatArray): Float { var sum = 0f - var peak = 0f - for (v in samples) { - sum += v * v - peak = maxOf(peak, kotlin.math.abs(v)) - } + for (v in samples) sum += v * v return sqrt(sum / samples.size) } -} +} \ No newline at end of file diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt index e18314b..cf420dc 100644 --- a/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt +++ b/app/src/main/java/com/zs/smarthuman/sherpa/VoiceController.kt @@ -1,13 +1,13 @@ package com.zs.smarthuman.sherpa import android.content.res.AssetManager -import android.util.Log +import com.blankj.utilcode.util.LogUtils import java.util.ArrayDeque class VoiceController( assetManager: AssetManager, private val onWakeup: () -> Unit, - private val onFinalAudio: (FloatArray) -> Unit, // 修改:传回识别结果文本 + private val onFinalAudio: (FloatArray) -> Unit, private val idleTimeoutSeconds: Int = 10, private val maxRecordingSeconds: Int = 10, private val onStateChanged: ((VoiceState) -> Unit)? = null, @@ -20,14 +20,16 @@ class VoiceController( var state: VoiceState = VoiceState.WAIT_WAKEUP private set(value) { field = value - Log.d(TAG, "➡ State = $value") + LogUtils.d(TAG, "➡ State = $value") onStateChanged?.invoke(value) } - private val wakeupManager = WakeupManager(assetManager) { - Log.d(TAG, "🔥 WakeWord detected") - handleWakeupEvent() - } + private val wakeupManager = WakeupManager(assetManager, onWakeup) + private val vadManager = VadManager( + assetManager, + onSpeechStart = { onVadStart() }, + onSpeechEnd = { avgEnergy, peakRms -> onVadEnd(avgEnergy, peakRms) } + ) private val audioBuffer = mutableListOf() private val preBuffer = ArrayDeque() @@ -48,10 +50,36 @@ class VoiceController( private val idleTimeoutMs = idleTimeoutSeconds * 1000L private val maxRecordingMs = maxRecordingSeconds * 1000L - private val vadManager = VadManager( - assetManager, - onSpeechStart = { onVadStart() }, - onSpeechEnd = { avgEnergy, peakRms -> onVadEnd(avgEnergy, peakRms) } + // ================= 动态阈值核心配置(修复+调整) ================= + private val BASELINE_WINDOW_SIZE = 50 + private val envNoiseBuffer = ArrayDeque(BASELINE_WINDOW_SIZE) + private var currentEnvBaseline = 0.001f + + // ========== 修复:远场过滤配置(关键调整) ========== + private val MAX_FAR_FIELD_ENERGY = 0.05f // 远场能量上限(正常语音>0.05,远场<0.05) + private val MIN_VALID_PEAK_AVG_RATIO = 1.5f // 有效峰均比下限(正常语音>1.5,咳嗽<1.5) + private val BASELINE_QUIET_THRESHOLD = 0.002f + private val MIN_CONTINUOUS_FRAME_RATIO = 0.4f // 连续帧占比下限(从0.6调低,兼容正常短语音) + private val MAX_PEAK_POSITION_RATIO = 0.5f // 峰值位置上限(从0.3调高,兼容正常语音) + private val MIN_EFFECTIVE_SPEECH_FRAMES = 3 // 最低有效帧数(从5调低) + + // 分场景动态系数(调整:降低安静环境系数) + private val SHORT_SPEECH_ENERGY_COEFF_QUIET = 6.0f // 从8.0调低 + private val SHORT_SPEECH_ENERGY_COEFF_NOISY = 4.0f // 从5.0调低 + private val LONG_SPEECH_ENERGY_COEFF = 15.0f // 从20.0调低 + private val SHORT_SPEECH_VAD_COEFF = 0.15f // 从0.2调低 + private val LONG_SPEECH_VAD_COEFF = 0.3f // 从0.4调低 + private val SHORT_SPEECH_MIN_SCORE = 2 // 调回2分 + private val LONG_SPEECH_MIN_SCORE = 4 // 从5调低 + private val SHORT_SPEECH_MIN = 500L + private val SHORT_SPEECH_MAX = 2000L + + // 阈值配置数据类 + private data class ThresholdConfig( + val energyThreshold: Float, + val vadRatioThreshold: Float, + val minScore: Int, + val scene: String ) /* ================= 音频入口 ================= */ @@ -65,6 +93,10 @@ class VoiceController( val now = System.currentTimeMillis() + if (state == VoiceState.WAIT_WAKEUP) { + calibrateEnvBaseline(samples) + } + when (state) { VoiceState.WAIT_WAKEUP, VoiceState.PLAYING_PROMPT, @@ -84,7 +116,7 @@ class VoiceController( if ((waitSpeechStartMs > 0 && now - waitSpeechStartMs >= idleTimeoutMs) || (waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutMs) ) { - Log.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP") + LogUtils.d(TAG, "⏱ WAIT_SPEECH idle timeout → WAIT_WAKEUP") resetAll() return } @@ -100,13 +132,28 @@ class VoiceController( vadManager.accept(samples) if (System.currentTimeMillis() - recordingStartMs > maxRecordingMs) { - Log.w(TAG, "⏱ Max recording reached") - finishSentence() // 超时也触发 finish + LogUtils.w(TAG, "⏱ Max recording reached | 当前环境基线: $currentEnvBaseline") + finishSentence() } } } } + /* ================= 环境基线校准 ================= */ + private fun calibrateEnvBaseline(samples: FloatArray) { + val rms = vadManager.calcRms(samples) + if (rms < 0.01f) { + if (envNoiseBuffer.size >= BASELINE_WINDOW_SIZE) { + envNoiseBuffer.removeFirst() + } + envNoiseBuffer.addLast(rms) + currentEnvBaseline = envNoiseBuffer.maxOrNull() ?: 0.001f + LogUtils.d(TAG, "🌡 环境基线校准 | RMS: $rms | 基线: $currentEnvBaseline | 缓存数: ${envNoiseBuffer.size}") + } else { + LogUtils.v(TAG, "🔊 高能量音频跳过校准 | RMS: $rms | 基线: $currentEnvBaseline") + } + } + /* ================= 唤醒 ================= */ private fun handleWakeupEvent() { if (state == VoiceState.UPLOADING) return @@ -127,11 +174,12 @@ class VoiceController( inKwsObserve = true kwsObserveStartMs = System.currentTimeMillis() onWakeup() + LogUtils.d(TAG, "🔔 唤醒成功 | 环境基线: $currentEnvBaseline") } private fun onVadStart() { if (state != VoiceState.WAIT_SPEECH) return - Log.d(TAG, "🎤 REAL VAD START") + LogUtils.d(TAG, "🎤 REAL VAD START | 环境基线: $currentEnvBaseline") vadStarted = true recordingStartMs = System.currentTimeMillis() audioBuffer.clear() @@ -141,17 +189,17 @@ class VoiceController( private fun onVadEnd(avgEnergy: Float, peakRms: Float) { if (state != VoiceState.RECORDING) return - Log.d(TAG, "🧠 VAD END") + LogUtils.d(TAG, "🧠 VAD END | 环境基线: $currentEnvBaseline") finishSentence(avgEnergy, peakRms) } - /* ================= 结束录音 ================= */ + /* ================= 结束录音(修复远场过滤逻辑) ================= */ private fun finishSentence(avgEnergy: Float = 0f, peakRms: Float = 0f) { val now = System.currentTimeMillis() val duration = now - recordingStartMs if (!vadStarted || duration < MIN_SPEECH_MS) { - Log.d(TAG, "❌ Too short or no VAD start: $duration ms") + LogUtils.d(TAG, "❌ 语音过短: $duration ms | 基线: $currentEnvBaseline") resetToWaitSpeech() return } @@ -160,73 +208,148 @@ class VoiceController( val vadRatio = vadManager.activeSpeechRatio() val peakAvgRatio = if (avgEnergy > 0f) peakRms / avgEnergy else 0f - Log.d(TAG, "📊 Finish Sentence - duration: $duration ms, vadEnded: true") - Log.d( - TAG, - "📊 vadRatio=$vadRatio, avgEnergy=$avgEnergy, peakRms=$peakRms, peakAvgRatio=$peakAvgRatio" - ) + // 获取VAD帧统计 + val totalFrames = vadManager.getTotalFrames() + val speechFrames = vadManager.getSpeechFrames() + val continuousSpeechFrames = vadManager.getContinuousSpeechFrames() + val peakPositionRatio = vadManager.getPeakPositionRatio() - if (avgEnergy < 0.02f || peakAvgRatio < 1.2f || vadRatio < 0.4f) { - Log.d(TAG, "❌ Sentence rejected") + LogUtils.d(TAG, "📊 录音信息 | 时长: $duration ms | 能量: $avgEnergy | 峰均比: $peakAvgRatio | 基线: $currentEnvBaseline") + LogUtils.d(TAG, "📊 帧统计 | 总帧: $totalFrames | 语音帧: $speechFrames | 连续语音帧: $continuousSpeechFrames | 峰值位置占比: $peakPositionRatio") + + // ========== 修复:远场语音过滤逻辑(核心) ========== + // 正确逻辑:能量 < MAX_FAR_FIELD_ENERGY 才是远场;峰均比 < MIN_VALID_PEAK_AVG_RATIO 才是无效语音 + val isFarField = avgEnergy < MAX_FAR_FIELD_ENERGY // 修复:从 < MIN 改为 < MAX + val isInvalidPeakRatio = peakAvgRatio < MIN_VALID_PEAK_AVG_RATIO // 降低阈值 + + // 非连续特征(调整阈值后更宽松) + val continuousRatio = if (speechFrames > 0) continuousSpeechFrames.toFloat() / speechFrames else 0f + val isDiscontinuous = continuousRatio < MIN_CONTINUOUS_FRAME_RATIO || + speechFrames < MIN_EFFECTIVE_SPEECH_FRAMES || + peakPositionRatio < MAX_PEAK_POSITION_RATIO + + // 远场+无效语音过滤(仅过滤真正的远场/杂音) + if (isFarField && isInvalidPeakRatio) { // 修复:同时满足才过滤 + LogUtils.w(TAG, "❌ 远场/无效语音过滤 | 能量: $avgEnergy < 远场上限: $MAX_FAR_FIELD_ENERGY | 峰均比: $peakAvgRatio < 有效下限: $MIN_VALID_PEAK_AVG_RATIO") resetToWaitSpeech() return } - // 评分逻辑 - var score = 0 - when { - duration >= 4000 -> score += 3 - duration >= 2500 -> score += 2 - duration >= 1500 -> score += 1 + // 非连续语音过滤(仅过滤真正的零散杂音) + if (isDiscontinuous && isFarField) { // 修复:结合远场特征,避免过滤正常语音 + LogUtils.w(TAG, "❌ 非连续杂音过滤 | 连续占比: $continuousRatio < $MIN_CONTINUOUS_FRAME_RATIO | 语音帧: $speechFrames < $MIN_EFFECTIVE_SPEECH_FRAMES | 峰值位置: $peakPositionRatio < $MAX_PEAK_POSITION_RATIO") + resetToWaitSpeech() + return + } + + // ========== 动态阈值计算(调整后更宽松) ========== + val thresholdConfig = when { + duration in SHORT_SPEECH_MIN..SHORT_SPEECH_MAX -> { + val coeff = if (currentEnvBaseline < BASELINE_QUIET_THRESHOLD) { + SHORT_SPEECH_ENERGY_COEFF_QUIET + } else { + SHORT_SPEECH_ENERGY_COEFF_NOISY + } + val threshold = currentEnvBaseline * coeff + LogUtils.d(TAG, "📏 短语音阈值 | 场景: ${if (currentEnvBaseline < BASELINE_QUIET_THRESHOLD) "安静" else "嘈杂"} | 系数: $coeff | 阈值: $threshold") + ThresholdConfig( + energyThreshold = threshold, + vadRatioThreshold = SHORT_SPEECH_VAD_COEFF, + minScore = SHORT_SPEECH_MIN_SCORE, + scene = "短语音" + ) + } + else -> { + val threshold = currentEnvBaseline * LONG_SPEECH_ENERGY_COEFF + ThresholdConfig( + energyThreshold = threshold, + vadRatioThreshold = LONG_SPEECH_VAD_COEFF, + minScore = LONG_SPEECH_MIN_SCORE, + scene = "长语音" + ) + } + } + + LogUtils.d(TAG, "📊 动态阈值 | ${thresholdConfig.scene} | 能量阈值: ${thresholdConfig.energyThreshold} | 占比阈值: ${thresholdConfig.vadRatioThreshold} | 最低分: ${thresholdConfig.minScore}") + + // 基础阈值过滤(调整后更宽松) + if (avgEnergy < thresholdConfig.energyThreshold || vadRatio < thresholdConfig.vadRatioThreshold) { + LogUtils.w(TAG, "❌ 阈值过滤 | 能量: $avgEnergy < ${thresholdConfig.energyThreshold} | 占比: $vadRatio < ${thresholdConfig.vadRatioThreshold}") + resetToWaitSpeech() + return + } + + // 评分逻辑(恢复短语音保底分) + var score = 0 + // 1. 时长评分(恢复短语音保底1分) + score += when { + duration >= 4000 -> 3 + duration >= 2500 -> 2 + duration >= 1500 -> 1 + duration >= SHORT_SPEECH_MIN -> 1 // 恢复保底分 + else -> 0 + } + // 2. 能量评分 + score += when { + avgEnergy >= thresholdConfig.energyThreshold * 10 -> 3 + avgEnergy >= thresholdConfig.energyThreshold * 5 -> 2 + avgEnergy >= thresholdConfig.energyThreshold -> 1 + else -> 0 + } + // 3. 占比+连续性评分(调整阈值) + score += when { + continuousRatio >= 0.7 -> 2 // 从0.8调低 + continuousRatio >= MIN_CONTINUOUS_FRAME_RATIO -> 1 + else -> 0 + } + + LogUtils.d(TAG, "🏆 评分结果 | 总分: $score | 最低分: ${thresholdConfig.minScore} | 连续占比: $continuousRatio") + + // 分场景判定(调整后更宽松) + val pass = if (duration in SHORT_SPEECH_MIN..SHORT_SPEECH_MAX) { + score >= thresholdConfig.minScore && continuousRatio >= 0.5 // 从0.7调低 + } else { + score >= thresholdConfig.minScore || (score >= 2 && avgEnergy >= currentEnvBaseline * 4) // 从3→2,6→4 } - when { - avgEnergy >= 0.10f -> score += 3 - avgEnergy >= 0.06f -> score += 2 - avgEnergy >= 0.02f -> score += 1 - } - when { - vadRatio >= 0.55f -> score += 2 - vadRatio >= 0.40f -> score += 1 - } - Log.d( - TAG, - "📊 duration=$duration ms, vadRatio=$vadRatio, avgEnergy=$avgEnergy, score=$score" - ) - val pass = score >= 5 || (score == 3 && avgEnergy >= 0.06f) if (!pass) { - Log.d(TAG, "❌ Sentence rejected (score=$score)") + LogUtils.w(TAG, "❌ 评分/连续性不足过滤 | 总分: $score < ${thresholdConfig.minScore} | 连续占比: $continuousRatio") resetToWaitSpeech() return } audioBuffer.clear() state = VoiceState.UPLOADING - onFinalAudio(audio) // 传递音频和识别文本 + onFinalAudio(audio) + LogUtils.i(TAG, "✅ 录音通过 | 时长: $duration ms | 能量: $avgEnergy | 连续占比: $continuousRatio | 准备上传") } - /* ================= 播放回调 ================= */ + /* ================= 播放/上传/Reset 回调(无修改) ================= */ fun onPlayStartPrompt() { + LogUtils.d(TAG, "🎵 播放提示音 | 基线: $currentEnvBaseline") state = VoiceState.PLAYING_PROMPT } fun onPlayEndPrompt() { speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS + LogUtils.d(TAG, "🎵 提示音结束 | 基线: $currentEnvBaseline") state = VoiceState.WAIT_SPEECH_COOLDOWN } fun onPlayStartBackend() { + LogUtils.d(TAG, "🎶 播放后台音频 | 基线: $currentEnvBaseline") state = VoiceState.PLAYING_BACKEND } fun onPlayEndBackend() { speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS + LogUtils.d(TAG, "🎶 后台音频结束 | 基线: $currentEnvBaseline") state = VoiceState.WAIT_SPEECH_COOLDOWN } - /* ================= 上传回调 ================= */ fun onUploadFinished(success: Boolean) { if (state != VoiceState.UPLOADING) return + LogUtils.d(TAG, "📤 上传完成 | 成功: $success | 基线: $currentEnvBaseline") state = if (success) VoiceState.PLAYING_BACKEND else { speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS @@ -234,8 +357,8 @@ class VoiceController( } } - /* ================= Reset ================= */ private fun resetToWaitSpeech() { + LogUtils.d(TAG, "🔄 重置到等待说话 | 基线: $currentEnvBaseline") audioBuffer.clear() vadManager.reset() vadStarted = false @@ -244,25 +367,31 @@ class VoiceController( } private fun resetAll() { + LogUtils.d(TAG, "🔄 重置所有状态 | 基线: $currentEnvBaseline") audioBuffer.clear() preBuffer.clear() vadManager.reset() + wakeupManager.reset() vadStarted = false waitSpeechStartMs = 0L waitSpeechFailStartMs = 0L + envNoiseBuffer.clear() + currentEnvBaseline = 0.001f + LogUtils.d(TAG, "🔄 环境基线已重置 | 新基线: $currentEnvBaseline") state = VoiceState.WAIT_WAKEUP } fun release() { + LogUtils.d(TAG, "🔌 释放资源 | 最终基线: $currentEnvBaseline") wakeupManager.release() vadManager.reset() + envNoiseBuffer.clear() } - /* ================= Utils ================= */ private fun cachePreBuffer(samples: FloatArray) { for (s in samples) { preBuffer.addLast(s) if (preBuffer.size > PRE_BUFFER_SIZE) preBuffer.removeFirst() } } -} +} \ No newline at end of file diff --git a/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt b/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt index 0c7a72c..768f312 100644 --- a/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt +++ b/app/src/main/java/com/zs/smarthuman/sherpa/WakeupManager.kt @@ -2,6 +2,7 @@ package com.zs.smarthuman.sherpa import android.content.res.AssetManager import android.util.Log +import com.blankj.utilcode.util.LogUtils import com.k2fsa.sherpa.onnx.* class WakeupManager(assetManager: AssetManager, function: () -> Unit) { @@ -29,11 +30,11 @@ class WakeupManager(assetManager: AssetManager, function: () -> Unit) { ) kws = KeywordSpotter(assetManager, config) - Log.d(TAG, "✅ KeywordSpotter initialized") + LogUtils.d(TAG, "✅ KeywordSpotter initialized") stream = kws.createStream() require(stream != null) { "Failed to create KWS stream" } - Log.d(TAG, "✅ KWS stream created") + LogUtils.d(TAG, "✅ KWS stream created") } /** ⭐ 永远喂 KWS */ @@ -48,7 +49,7 @@ class WakeupManager(assetManager: AssetManager, function: () -> Unit) { kws.decode(s) val keyword = kws.getResult(s).keyword if (keyword.isNotBlank()) { - Log.d(TAG, "🔥 KWS hit: $keyword") + LogUtils.d(TAG, "🔥 KWS hit: $keyword") justWokeUp = true kws.reset(s) break