修改合适阈值
This commit is contained in:
parent
b15621985d
commit
58e2bc3e06
@ -15,7 +15,25 @@ class VadManager(
|
|||||||
private val vad: Vad
|
private val vad: Vad
|
||||||
private var isSpeaking = false
|
private var isSpeaking = false
|
||||||
private var lastSpeechTime = 0L
|
private var lastSpeechTime = 0L
|
||||||
private val END_SILENCE_MS = 800L
|
|
||||||
|
// ========== 核心调整:适配人类正常说话停顿 ==========
|
||||||
|
private val END_SILENCE_MS = 1500L // 基础静默阈值(1.5秒)
|
||||||
|
private val MAX_SILENCE_AFTER_SPEECH_MS = 3000L// 兜底静默阈值(3秒)
|
||||||
|
private val SPEECH_ACTIVE_DURATION = 5000L // 语音活跃期(5秒内容忍更长停顿)
|
||||||
|
|
||||||
|
// ========== 核心调整:降低有效语音阈值 ==========
|
||||||
|
private val MIN_EFFECTIVE_SPEECH_RMS = 0.001f // 有效语音最小RMS
|
||||||
|
private val ENV_BASELINE_FACTOR = 1.2f // 环境基线倍数
|
||||||
|
private var envBaselineRms = 0.0005f // 初始环境基线
|
||||||
|
private var lastEffectiveSpeechTime = 0L // 最后一次有效语音时间戳
|
||||||
|
|
||||||
|
// ========== 新增:语音活跃期变量 ==========
|
||||||
|
private var isSpeechActive = false // 语音活跃期标记
|
||||||
|
private var speechActiveStartMs = 0L // 活跃期开始时间
|
||||||
|
|
||||||
|
// ========== 连续静音帧校验(放宽阈值) ==========
|
||||||
|
private var consecutiveSilenceFrames = 0
|
||||||
|
private val CONSECUTIVE_SILENCE_FRAME_THRESHOLD = 10 // 连续10帧静音(200ms)
|
||||||
|
|
||||||
// 基础统计变量
|
// 基础统计变量
|
||||||
private var activeFrameCount = 0
|
private var activeFrameCount = 0
|
||||||
@ -24,13 +42,13 @@ class VadManager(
|
|||||||
private var speechFrameCount = 0
|
private var speechFrameCount = 0
|
||||||
private var peakRms = 0f
|
private var peakRms = 0f
|
||||||
|
|
||||||
// ========== 新增:连续性检测核心变量 ==========
|
// 连续性检测核心变量
|
||||||
private var totalFrames = 0 // 总处理帧数
|
private var totalFrames = 0
|
||||||
private var speechFrames = 0 // 语音帧总数
|
private var speechFrames = 0
|
||||||
private var continuousSpeechFrames = 0 // 连续语音帧数
|
private var continuousSpeechFrames = 0
|
||||||
private var lastFrameIsSpeech = false // 上一帧是否为语音
|
private var lastFrameIsSpeech = false
|
||||||
private var peakPosition = 0 // 峰值所在帧位置
|
private var peakPosition = 0
|
||||||
private var frameIndex = 0 // 当前帧索引
|
private var frameIndex = 0
|
||||||
|
|
||||||
init {
|
init {
|
||||||
val config = getVadModelConfig(0) ?: throw IllegalStateException("[$TAG] VAD config not found")
|
val config = getVadModelConfig(0) ?: throw IllegalStateException("[$TAG] VAD config not found")
|
||||||
@ -38,54 +56,64 @@ class VadManager(
|
|||||||
LogUtils.i(TAG, "✅ VAD 初始化成功")
|
LogUtils.i(TAG, "✅ VAD 初始化成功")
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 接收音频数据并进行VAD检测
|
|
||||||
* @param samples 音频采样数据(float数组)
|
|
||||||
*/
|
|
||||||
fun accept(samples: FloatArray) {
|
fun accept(samples: FloatArray) {
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
vad.acceptWaveform(samples)
|
vad.acceptWaveform(samples)
|
||||||
val hasSpeech = vad.isSpeechDetected()
|
val vadHasSpeech = vad.isSpeechDetected()
|
||||||
val rms = calcRms(samples)
|
val rms = calcRms(samples)
|
||||||
|
|
||||||
// ========== 1. 语音能量统计 ==========
|
// 环境基线更新(滑动平均,适配背景噪音)
|
||||||
if (hasSpeech) {
|
if (!vadHasSpeech || rms < MIN_EFFECTIVE_SPEECH_RMS) {
|
||||||
|
envBaselineRms = (envBaselineRms * 0.9f) + (rms * 0.1f)
|
||||||
|
}
|
||||||
|
// 有效语音判定:阈值更低,更容易触发
|
||||||
|
val effectiveSpeechThreshold = maxOf(MIN_EFFECTIVE_SPEECH_RMS, envBaselineRms * ENV_BASELINE_FACTOR)
|
||||||
|
val isEffectiveSpeech = vadHasSpeech && rms >= effectiveSpeechThreshold
|
||||||
|
|
||||||
|
// ========== 核心优化:语音活跃期逻辑 ==========
|
||||||
|
if (isEffectiveSpeech) {
|
||||||
|
isSpeechActive = true
|
||||||
|
speechActiveStartMs = now // 重置活跃期
|
||||||
|
}
|
||||||
|
// 活跃期内(5秒)用2秒静默阈值,活跃期外用1.5秒
|
||||||
|
val dynamicEndSilenceMs = if (isSpeechActive && (now - speechActiveStartMs) < SPEECH_ACTIVE_DURATION) {
|
||||||
|
2000L
|
||||||
|
} else {
|
||||||
|
END_SILENCE_MS
|
||||||
|
}
|
||||||
|
|
||||||
|
// 语音能量统计(仅有效语音)
|
||||||
|
if (isEffectiveSpeech) {
|
||||||
speechEnergySum += rms
|
speechEnergySum += rms
|
||||||
speechFrameCount++
|
speechFrameCount++
|
||||||
peakRms = maxOf(peakRms, rms)
|
peakRms = maxOf(peakRms, rms)
|
||||||
LogUtils.v(TAG, "🔊 检测到语音帧 | RMS: $rms | 累计峰值: $peakRms")
|
lastEffectiveSpeechTime = now
|
||||||
|
lastSpeechTime = now
|
||||||
|
consecutiveSilenceFrames = 0 // 重置连续静音帧
|
||||||
|
LogUtils.v(TAG, "🔊 有效语音帧 | RMS: $rms | 阈值: $effectiveSpeechThreshold")
|
||||||
} else {
|
} else {
|
||||||
LogUtils.v(TAG, "🔇 检测到静音帧 | RMS: $rms")
|
consecutiveSilenceFrames++ // 累计连续静音帧
|
||||||
|
LogUtils.v(TAG, if (vadHasSpeech) "⚠ 低能量语音帧 | RMS: $rms | 阈值: $effectiveSpeechThreshold"
|
||||||
|
else "🔇 静音帧 | 连续静音帧: $consecutiveSilenceFrames")
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========== 2. 新增:帧统计与连续性计算 ==========
|
// 帧统计与连续性计算
|
||||||
totalFrames++
|
totalFrames++
|
||||||
frameIndex++
|
frameIndex++
|
||||||
|
if (isEffectiveSpeech) {
|
||||||
if (hasSpeech) {
|
|
||||||
speechFrames++
|
speechFrames++
|
||||||
// 连续语音帧计数
|
continuousSpeechFrames = if (lastFrameIsSpeech) continuousSpeechFrames + 1 else 1
|
||||||
continuousSpeechFrames = if (lastFrameIsSpeech) {
|
|
||||||
continuousSpeechFrames + 1
|
|
||||||
} else {
|
|
||||||
1 // 重置连续计数
|
|
||||||
}
|
|
||||||
lastFrameIsSpeech = true
|
lastFrameIsSpeech = true
|
||||||
|
if (rms == peakRms) peakPosition = frameIndex
|
||||||
// 更新峰值位置(仅当当前RMS为新峰值时)
|
|
||||||
if (rms == peakRms) {
|
|
||||||
peakPosition = frameIndex
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
lastFrameIsSpeech = false
|
lastFrameIsSpeech = false
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========== 3. VAD核心状态流转 ==========
|
// VAD核心状态流转(使用动态静默阈值)
|
||||||
if (hasSpeech) {
|
if (isEffectiveSpeech) {
|
||||||
lastSpeechTime = now
|
|
||||||
if (!isSpeaking) {
|
if (!isSpeaking) {
|
||||||
isSpeaking = true
|
isSpeaking = true
|
||||||
LogUtils.d(TAG, "🎤 语音开始")
|
LogUtils.d(TAG, "🎤 有效语音开始 | 阈值: $effectiveSpeechThreshold")
|
||||||
onSpeechStart()
|
onSpeechStart()
|
||||||
}
|
}
|
||||||
activeFrameCount++
|
activeFrameCount++
|
||||||
@ -93,56 +121,61 @@ class VadManager(
|
|||||||
} else {
|
} else {
|
||||||
if (isSpeaking) {
|
if (isSpeaking) {
|
||||||
activeFrameCount++
|
activeFrameCount++
|
||||||
val silenceDuration = now - lastSpeechTime
|
val vadSilenceDuration = now - lastSpeechTime
|
||||||
if (silenceDuration >= END_SILENCE_MS) {
|
val effectiveSilenceDuration = now - lastEffectiveSpeechTime
|
||||||
|
|
||||||
|
// 触发条件:动态静默时长 + 连续静音帧达标
|
||||||
|
val isSilenceTimeout = (vadSilenceDuration >= dynamicEndSilenceMs ||
|
||||||
|
effectiveSilenceDuration >= MAX_SILENCE_AFTER_SPEECH_MS) &&
|
||||||
|
consecutiveSilenceFrames >= CONSECUTIVE_SILENCE_FRAME_THRESHOLD
|
||||||
|
|
||||||
|
if (isSilenceTimeout) {
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
|
isSpeechActive = false // 结束活跃期
|
||||||
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
val avgEnergy = if (speechFrameCount > 0) speechEnergySum / speechFrameCount else 0f
|
||||||
LogUtils.d(TAG, "🛑 语音结束 | 静音时长: ${silenceDuration}ms | 平均能量: $avgEnergy | 峰值: $peakRms")
|
LogUtils.d(TAG, """
|
||||||
|
🛑 语音结束
|
||||||
|
- 有效静默时长: ${effectiveSilenceDuration}ms
|
||||||
|
- 连续静音帧: $consecutiveSilenceFrames
|
||||||
|
- 平均能量: $avgEnergy | 峰值: $peakRms
|
||||||
|
- 活跃期: ${if (isSpeechActive) "是" else "否"}
|
||||||
|
""".trimIndent())
|
||||||
onSpeechEnd(avgEnergy, peakRms)
|
onSpeechEnd(avgEnergy, peakRms)
|
||||||
resetStats() // 重置基础统计
|
resetStats()
|
||||||
} else {
|
} else {
|
||||||
LogUtils.v(TAG, "⏳ 静音中,时长: ${silenceDuration}ms (阈值: ${END_SILENCE_MS}ms)")
|
LogUtils.v(TAG, "⏳ 静默中(停顿容忍) | 连续静音帧: $consecutiveSilenceFrames | 静默时长: ${effectiveSilenceDuration}ms | 动态阈值: $dynamicEndSilenceMs")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// ========== 保留原有方法 ==========
|
||||||
* 计算语音占比(活跃语音帧 / 总活跃帧)
|
|
||||||
* @return 语音占比(0~1)
|
|
||||||
*/
|
|
||||||
fun activeSpeechRatio(): Float {
|
fun activeSpeechRatio(): Float {
|
||||||
val ratio = if (activeFrameCount == 0) 0f else activeSpeechFrameCount.toFloat() / activeFrameCount
|
val ratio = if (activeFrameCount == 0) 0f else activeSpeechFrameCount.toFloat() / activeFrameCount
|
||||||
LogUtils.d(TAG, "📊 语音占比: $ratio | 语音帧: $activeSpeechFrameCount | 总帧: $activeFrameCount")
|
LogUtils.d(TAG, "📊 语音占比: $ratio | 有效语音帧: $activeSpeechFrameCount | 总帧: $activeFrameCount")
|
||||||
return ratio
|
return ratio
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========== 新增:帧统计获取方法(给VoiceController调用) ==========
|
|
||||||
/** 获取总处理帧数 */
|
|
||||||
fun getTotalFrames(): Int = totalFrames
|
fun getTotalFrames(): Int = totalFrames
|
||||||
|
|
||||||
/** 获取语音帧总数 */
|
|
||||||
fun getSpeechFrames(): Int = speechFrames
|
fun getSpeechFrames(): Int = speechFrames
|
||||||
|
|
||||||
/** 获取连续语音帧数 */
|
|
||||||
fun getContinuousSpeechFrames(): Int = continuousSpeechFrames
|
fun getContinuousSpeechFrames(): Int = continuousSpeechFrames
|
||||||
|
|
||||||
/** 获取峰值位置占比(峰值帧索引/总帧数) */
|
|
||||||
fun getPeakPositionRatio(): Float {
|
fun getPeakPositionRatio(): Float {
|
||||||
return if (totalFrames == 0) 0f else peakPosition.toFloat() / totalFrames
|
return if (totalFrames == 0) 0f else peakPosition.toFloat() / totalFrames
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 重置VAD状态(保留核心对象,清空统计数据)
|
|
||||||
*/
|
|
||||||
fun reset() {
|
fun reset() {
|
||||||
// 基础状态重置
|
|
||||||
isSpeaking = false
|
isSpeaking = false
|
||||||
lastSpeechTime = 0L
|
lastSpeechTime = 0L
|
||||||
|
lastEffectiveSpeechTime = 0L
|
||||||
|
envBaselineRms = 0.0005f
|
||||||
|
consecutiveSilenceFrames = 0
|
||||||
|
// 重置活跃期
|
||||||
|
isSpeechActive = false
|
||||||
|
speechActiveStartMs = 0L
|
||||||
|
// 重置统计
|
||||||
resetStats()
|
resetStats()
|
||||||
vad.reset()
|
vad.reset()
|
||||||
|
|
||||||
// 新增:连续性统计重置
|
|
||||||
totalFrames = 0
|
totalFrames = 0
|
||||||
speechFrames = 0
|
speechFrames = 0
|
||||||
continuousSpeechFrames = 0
|
continuousSpeechFrames = 0
|
||||||
@ -153,9 +186,6 @@ class VadManager(
|
|||||||
LogUtils.d(TAG, "🔄 VAD 状态已完全重置")
|
LogUtils.d(TAG, "🔄 VAD 状态已完全重置")
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 重置统计数据(内部使用)
|
|
||||||
*/
|
|
||||||
private fun resetStats() {
|
private fun resetStats() {
|
||||||
activeFrameCount = 0
|
activeFrameCount = 0
|
||||||
activeSpeechFrameCount = 0
|
activeSpeechFrameCount = 0
|
||||||
@ -164,11 +194,6 @@ class VadManager(
|
|||||||
peakRms = 0f
|
peakRms = 0f
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 计算音频采样的RMS(均方根)能量
|
|
||||||
* @param samples 音频采样数据
|
|
||||||
* @return RMS值
|
|
||||||
*/
|
|
||||||
fun calcRms(samples: FloatArray): Float {
|
fun calcRms(samples: FloatArray): Float {
|
||||||
var sum = 0f
|
var sum = 0f
|
||||||
for (v in samples) sum += v * v
|
for (v in samples) sum += v * v
|
||||||
|
|||||||
@ -55,7 +55,7 @@ class VoiceController(
|
|||||||
private val envNoiseBuffer = ArrayDeque<Float>(BASELINE_WINDOW_SIZE)
|
private val envNoiseBuffer = ArrayDeque<Float>(BASELINE_WINDOW_SIZE)
|
||||||
private var currentEnvBaseline = 0.001f
|
private var currentEnvBaseline = 0.001f
|
||||||
|
|
||||||
// 强制兜底:正常语音最低门槛(你的0.0809≥0.06直接通过)
|
// 强制兜底:正常语音最低门槛
|
||||||
private val MIN_NORMAL_VOICE_ENERGY = 0.06f
|
private val MIN_NORMAL_VOICE_ENERGY = 0.06f
|
||||||
private val MIN_NORMAL_VOICE_VAD_RATIO = 0.3f
|
private val MIN_NORMAL_VOICE_VAD_RATIO = 0.3f
|
||||||
|
|
||||||
@ -79,6 +79,13 @@ class VoiceController(
|
|||||||
private val SHORT_SPEECH_MIN = 500L
|
private val SHORT_SPEECH_MIN = 500L
|
||||||
private val SHORT_SPEECH_MAX = 2000L
|
private val SHORT_SPEECH_MAX = 2000L
|
||||||
|
|
||||||
|
// ========== 核心修改:多人对话过滤配置(适配2人以上场景) ==========
|
||||||
|
private val MULTI_DIALOGUE_MIN_DURATION = 2500L // 多人对话最小时长(2.5秒,比两人更短也能判定)
|
||||||
|
private val MULTI_DIALOGUE_MAX_PEAK_AVG_RATIO = 2.5f // 多人对话峰均比范围更大(多人音量差异更大)
|
||||||
|
private val MULTI_DIALOGUE_MIN_PEAK_AVG_RATIO = 0.4f
|
||||||
|
private val MULTI_DIALOGUE_MAX_CONTINUOUS_RATIO = 0.3f // 多人对话连续帧占比更低(轮流说话,断层更多)
|
||||||
|
private val MULTI_DIALOGUE_MIN_VAD_RATIO = 0.55f // 多人对话有效帧占比要求稍低(避免漏过滤)
|
||||||
|
|
||||||
// 阈值配置数据类
|
// 阈值配置数据类
|
||||||
private data class ThresholdConfig(
|
private data class ThresholdConfig(
|
||||||
val energyThreshold: Float,
|
val energyThreshold: Float,
|
||||||
@ -147,15 +154,14 @@ class VoiceController(
|
|||||||
/* ================= 环境基线校准 ================= */
|
/* ================= 环境基线校准 ================= */
|
||||||
private fun calibrateEnvBaseline(samples: FloatArray) {
|
private fun calibrateEnvBaseline(samples: FloatArray) {
|
||||||
val rms = vadManager.calcRms(samples)
|
val rms = vadManager.calcRms(samples)
|
||||||
|
// 新增:只保留低于基线+阈值的有效值,过滤突发噪音
|
||||||
|
val validRms = if (rms < currentEnvBaseline + 0.005f) rms else currentEnvBaseline
|
||||||
if (rms < 0.03f) {
|
if (rms < 0.03f) {
|
||||||
if (envNoiseBuffer.size >= BASELINE_WINDOW_SIZE) {
|
if (envNoiseBuffer.size >= BASELINE_WINDOW_SIZE) {
|
||||||
envNoiseBuffer.removeFirst()
|
envNoiseBuffer.removeFirst()
|
||||||
}
|
}
|
||||||
envNoiseBuffer.addLast(rms)
|
envNoiseBuffer.addLast(rms)
|
||||||
currentEnvBaseline = envNoiseBuffer.maxOrNull() ?: 0.001f
|
currentEnvBaseline = envNoiseBuffer.maxOrNull() ?: 0.001f
|
||||||
// LogUtils.d(TAG, "🌡 环境基线校准 | RMS: $rms | 基线: $currentEnvBaseline | 缓存数: ${envNoiseBuffer.size}")
|
|
||||||
} else {
|
|
||||||
// LogUtils.v(TAG, "🔊 高能量音频跳过校准 | RMS: $rms | 基线: $currentEnvBaseline")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,7 +204,7 @@ class VoiceController(
|
|||||||
finishSentence(avgEnergy, peakRms)
|
finishSentence(avgEnergy, peakRms)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 结束录音(分场景系数+强制兜底) ================= */
|
/* ================= 结束录音(核心:多人对话过滤) ================= */
|
||||||
private fun finishSentence(avgEnergy: Float = 0f, peakRms: Float = 0f) {
|
private fun finishSentence(avgEnergy: Float = 0f, peakRms: Float = 0f) {
|
||||||
val now = System.currentTimeMillis()
|
val now = System.currentTimeMillis()
|
||||||
val duration = now - recordingStartMs
|
val duration = now - recordingStartMs
|
||||||
@ -222,6 +228,19 @@ class VoiceController(
|
|||||||
LogUtils.d(TAG, "📊 录音信息 | 时长: $duration ms | 能量: $avgEnergy | 峰均比: $peakAvgRatio | 基线: $currentEnvBaseline")
|
LogUtils.d(TAG, "📊 录音信息 | 时长: $duration ms | 能量: $avgEnergy | 峰均比: $peakAvgRatio | 基线: $currentEnvBaseline")
|
||||||
LogUtils.d(TAG, "📊 帧统计 | 总帧: $totalFrames | 语音帧: $speechFrames | 连续语音帧: $continuousSpeechFrames | 峰值位置占比: $peakPositionRatio")
|
LogUtils.d(TAG, "📊 帧统计 | 总帧: $totalFrames | 语音帧: $speechFrames | 连续语音帧: $continuousSpeechFrames | 峰值位置占比: $peakPositionRatio")
|
||||||
|
|
||||||
|
// ========== 核心修改:第一步过滤多人对话垃圾语音 ==========
|
||||||
|
val continuousRatio = if (speechFrames > 0) continuousSpeechFrames.toFloat() / speechFrames else 0f
|
||||||
|
val isMultiPersonDialogue = duration >= MULTI_DIALOGUE_MIN_DURATION && // 时长≥2.5秒
|
||||||
|
peakAvgRatio in MULTI_DIALOGUE_MIN_PEAK_AVG_RATIO..MULTI_DIALOGUE_MAX_PEAK_AVG_RATIO && // 峰均比0.4~2.5
|
||||||
|
continuousRatio <= MULTI_DIALOGUE_MAX_CONTINUOUS_RATIO && // 连续帧占比≤0.3(多人轮流说,断层多)
|
||||||
|
vadRatio >= MULTI_DIALOGUE_MIN_VAD_RATIO // 有效帧占比≥0.55(整体语音占比高)
|
||||||
|
|
||||||
|
if (isMultiPersonDialogue) {
|
||||||
|
LogUtils.w(TAG, "❌ 过滤多人对话垃圾语音 | 时长: $duration ms | 连续占比: $continuousRatio | 有效占比: $vadRatio | 峰均比: $peakAvgRatio")
|
||||||
|
resetToWaitSpeech()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// ========== 1. 强制兜底:正常语音直接通过 ==========
|
// ========== 1. 强制兜底:正常语音直接通过 ==========
|
||||||
val isNormalVoice = avgEnergy >= MIN_NORMAL_VOICE_ENERGY && vadRatio >= MIN_NORMAL_VOICE_VAD_RATIO
|
val isNormalVoice = avgEnergy >= MIN_NORMAL_VOICE_ENERGY && vadRatio >= MIN_NORMAL_VOICE_VAD_RATIO
|
||||||
if (isNormalVoice) {
|
if (isNormalVoice) {
|
||||||
@ -242,7 +261,6 @@ class VoiceController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ========== 3. 非连续判定:极度宽松 ==========
|
// ========== 3. 非连续判定:极度宽松 ==========
|
||||||
val continuousRatio = if (speechFrames > 0) continuousSpeechFrames.toFloat() / speechFrames else 0f
|
|
||||||
val isDiscontinuous = continuousRatio < MIN_CONTINUOUS_FRAME_RATIO &&
|
val isDiscontinuous = continuousRatio < MIN_CONTINUOUS_FRAME_RATIO &&
|
||||||
speechFrames < MIN_EFFECTIVE_SPEECH_FRAMES &&
|
speechFrames < MIN_EFFECTIVE_SPEECH_FRAMES &&
|
||||||
peakPositionRatio > MAX_PEAK_POSITION_RATIO
|
peakPositionRatio > MAX_PEAK_POSITION_RATIO
|
||||||
@ -344,9 +362,16 @@ class VoiceController(
|
|||||||
VoiceState.WAIT_SPEECH_COOLDOWN
|
VoiceState.WAIT_SPEECH_COOLDOWN
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
private var lastInvalidResetMs = 0L
|
||||||
|
private val INVALID_RESET_DEBOUNCE_MS = 1500L // 1.5秒内不重复重置
|
||||||
private fun resetToWaitSpeech() {
|
private fun resetToWaitSpeech() {
|
||||||
LogUtils.d(TAG, "🔄 重置到等待说话 | 基线: $currentEnvBaseline")
|
LogUtils.d(TAG, "🔄 重置到等待说话 | 基线: $currentEnvBaseline")
|
||||||
|
val now = System.currentTimeMillis()
|
||||||
|
if (now - lastInvalidResetMs < INVALID_RESET_DEBOUNCE_MS) {
|
||||||
|
LogUtils.d(TAG, "🛡 防抖:1.5秒内重复无效语音,跳过重置")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
lastInvalidResetMs = now
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
vadStarted = false
|
vadStarted = false
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user