临时提交
This commit is contained in:
parent
724c5d51e0
commit
d01e43cd56
@ -52,7 +52,7 @@
|
||||
tools:targetApi="31">
|
||||
|
||||
<activity
|
||||
android:name=".ui.MainActivity"
|
||||
android:name=".ui.SplashActivity"
|
||||
android:exported="true"
|
||||
android:theme="@style/Theme.Splash"
|
||||
android:screenOrientation="portrait">
|
||||
@ -66,9 +66,9 @@
|
||||
</intent-filter>
|
||||
</activity>
|
||||
|
||||
<!-- <activity
|
||||
<activity
|
||||
android:name="com.zs.smarthuman.ui.MainActivity"
|
||||
android:screenOrientation="portrait"/>-->
|
||||
android:screenOrientation="portrait"/>
|
||||
<activity
|
||||
android:name="com.zs.smarthuman.ui.ActivateActivity"
|
||||
android:screenOrientation="portrait"/>
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
# Introduction
|
||||
|
||||
Model in this directory is converted from
|
||||
https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/u2pp_conformer_yue
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -2,10 +2,6 @@ package com.zs.smarthuman.sherpa
|
||||
|
||||
import android.content.res.AssetManager
|
||||
import com.blankj.utilcode.util.LogUtils
|
||||
import kotlinx.coroutines.GlobalScope
|
||||
import kotlinx.coroutines.Job
|
||||
import kotlinx.coroutines.delay
|
||||
import kotlinx.coroutines.launch
|
||||
import java.util.ArrayDeque
|
||||
|
||||
class VoiceController(
|
||||
@ -29,6 +25,22 @@ class VoiceController(
|
||||
onStateChanged?.invoke(value)
|
||||
}
|
||||
|
||||
// ========== 缺失变量补充:实时能量与帧统计变量 ==========
|
||||
// 实时能量统计
|
||||
private var realtimeEnergySum = 0f
|
||||
private var realtimeEnergyCount = 0
|
||||
private var realtimePeakRms = 0f
|
||||
// 实时帧统计
|
||||
private var realtimeTotalFrames = 0
|
||||
private var realtimeSpeechFrames = 0
|
||||
private var realtimeContinuousSpeechFrames = 0
|
||||
private var realtimeLastFrameIsSpeech = false
|
||||
// 多人对话检测标记
|
||||
private var isMultiPersonDialogueDetected = false
|
||||
// 防抖重置标记
|
||||
private var lastInvalidResetMs = 0L
|
||||
private val INVALID_RESET_DEBOUNCE_MS = 1500L
|
||||
|
||||
private val wakeupManager = WakeupManager(assetManager, onWakeup)
|
||||
private val vadManager = VadManager(
|
||||
assetManager,
|
||||
@ -55,69 +67,56 @@ class VoiceController(
|
||||
private val idleTimeoutMs = idleTimeoutSeconds * 1000L
|
||||
private val maxRecordingMs = maxRecordingSeconds * 1000L
|
||||
|
||||
// ================= 保留分场景动态系数 + 强制兜底配置 =================
|
||||
// ================= 保留分场景动态系数 + 强制兜底配置(近距离优化版) =================
|
||||
private val BASELINE_WINDOW_SIZE = 50
|
||||
private val envNoiseBuffer = ArrayDeque<Float>(BASELINE_WINDOW_SIZE)
|
||||
private var currentEnvBaseline = 0.001f
|
||||
|
||||
// 强制兜底:正常语音最低门槛
|
||||
private val MIN_NORMAL_VOICE_ENERGY = 0.06f
|
||||
private val MIN_NORMAL_VOICE_VAD_RATIO = 0.3f
|
||||
// 强制兜底:正常语音最低门槛(近距离场景大幅降低)
|
||||
private val MIN_NORMAL_VOICE_ENERGY = 0.03f
|
||||
private val MIN_NORMAL_VOICE_VAD_RATIO = 0.2f
|
||||
|
||||
// 分场景动态系数(安静环境系数极低)
|
||||
private val BASELINE_QUIET_THRESHOLD = 0.005f // 安静环境基线阈值
|
||||
private val SHORT_SPEECH_ENERGY_COEFF_QUIET = 2.0f // 安静环境短语音系数
|
||||
private val SHORT_SPEECH_ENERGY_COEFF_NOISY = 3.0f // 嘈杂环境短语音系数
|
||||
private val LONG_SPEECH_ENERGY_COEFF_QUIET = 4.0f // 安静环境长语音系数
|
||||
private val LONG_SPEECH_ENERGY_COEFF_NOISY = 6.0f // 嘈杂环境长语音系数
|
||||
private val SHORT_SPEECH_VAD_COEFF = 0.08f
|
||||
private val LONG_SPEECH_VAD_COEFF = 0.15f
|
||||
// 分场景动态系数(安静环境系数极低,适配近距离轻声)
|
||||
private val BASELINE_QUIET_THRESHOLD = 0.005f
|
||||
private val SHORT_SPEECH_ENERGY_COEFF_QUIET = 1.5f
|
||||
private val SHORT_SPEECH_ENERGY_COEFF_NOISY = 2.0f
|
||||
private val LONG_SPEECH_ENERGY_COEFF_QUIET = 2.5f
|
||||
private val LONG_SPEECH_ENERGY_COEFF_NOISY = 3.5f
|
||||
private val SHORT_SPEECH_VAD_COEFF = 0.05f
|
||||
private val LONG_SPEECH_VAD_COEFF = 0.10f
|
||||
private val SHORT_SPEECH_MIN_SCORE = 1
|
||||
private val LONG_SPEECH_MIN_SCORE = 2
|
||||
private val LONG_SPEECH_MIN_SCORE = 1
|
||||
|
||||
// 其他过滤参数
|
||||
private val MAX_FAR_FIELD_ENERGY = 0.03f
|
||||
private val MIN_VALID_PEAK_AVG_RATIO = 0.8f
|
||||
private val MIN_CONTINUOUS_FRAME_RATIO = 0.2f
|
||||
// 其他过滤参数(近距离场景放宽)
|
||||
private val MAX_FAR_FIELD_ENERGY = 0.015f
|
||||
private val MIN_VALID_PEAK_AVG_RATIO = 0.5f
|
||||
private val MIN_CONTINUOUS_FRAME_RATIO = 0.1f
|
||||
private val MAX_PEAK_POSITION_RATIO = 0.95f
|
||||
private val MIN_EFFECTIVE_SPEECH_FRAMES = 5
|
||||
private val MIN_EFFECTIVE_SPEECH_FRAMES = 3
|
||||
private val SHORT_SPEECH_MIN = 500L
|
||||
private val SHORT_SPEECH_MAX = 2000L
|
||||
|
||||
// ========== 核心修改:多人对话过滤配置(适配2人以上场景) ==========
|
||||
private val MULTI_DIALOGUE_MIN_DURATION = 2500L // 多人对话最小时长(2.5秒)
|
||||
private val MULTI_DIALOGUE_MAX_PEAK_AVG_RATIO = 2.5f // 多人对话峰均比范围
|
||||
// ========== 核心修改:多人对话过滤配置 ==========
|
||||
private val MULTI_DIALOGUE_MIN_DURATION = 2500L
|
||||
private val MULTI_DIALOGUE_MAX_PEAK_AVG_RATIO = 2.5f
|
||||
private val MULTI_DIALOGUE_MIN_PEAK_AVG_RATIO = 0.4f
|
||||
private val MULTI_DIALOGUE_MAX_CONTINUOUS_RATIO = 0.3f // 多人对话连续帧占比
|
||||
private val MULTI_DIALOGUE_MIN_VAD_RATIO = 0.55f // 多人对话有效帧占比
|
||||
private val MULTI_DIALOGUE_MAX_CONTINUOUS_RATIO = 0.3f
|
||||
private val MULTI_DIALOGUE_MIN_VAD_RATIO = 0.55f
|
||||
|
||||
// ========== 新增:录音过程中实时统计的变量 ==========
|
||||
// 能量统计
|
||||
private var realtimeEnergySum = 0f
|
||||
private var realtimeEnergyCount = 0
|
||||
private var realtimePeakRms = 0f
|
||||
// 帧统计(实时累加)
|
||||
private var realtimeTotalFrames = 0
|
||||
private var realtimeSpeechFrames = 0
|
||||
private var realtimeContinuousSpeechFrames = 0
|
||||
private var realtimeLastFrameIsSpeech = false
|
||||
// 多人对话实时判定标记
|
||||
private var isMultiPersonDialogueDetected = false
|
||||
// 防抖变量
|
||||
private var lastInvalidResetMs = 0L
|
||||
private val INVALID_RESET_DEBOUNCE_MS = 1500L // 1.5秒内不重复重置
|
||||
// ========== 核心调整:近距离场景 微弱人声过滤配置(重点优化) ==========
|
||||
private val MIN_EFFECTIVE_VOICE_DURATION = 400L
|
||||
private val MIN_VOICE_FRAME_RATIO = 0.08f
|
||||
private val MIN_PEAK_ENERGY_RATIO = 1.5f
|
||||
private val NORMAL_VOICE_ENERGY_THRESHOLD = 0.008f
|
||||
private val MIN_CONTINUOUS_VOICE_FRAMES = 1
|
||||
|
||||
// ========== 核心新增:区分超时类型的标记 ==========
|
||||
private var hasInvalidSpeech = false // 是否有过无效说话行为
|
||||
private var currentTimeoutType: TimeoutType = TimeoutType.IDLE_TIMEOUT // 当前超时类型
|
||||
|
||||
// ========== 补充:MIN_EFFECTIVE_SPEECH_RMS 常量(和VadManager对齐) ==========
|
||||
private val MIN_EFFECTIVE_SPEECH_RMS = 0.001f
|
||||
// ========== 核心新增:MIN_EFFECTIVE_SPEECH_RMS 常量 ==========
|
||||
private val MIN_EFFECTIVE_SPEECH_RMS = 0.0005f
|
||||
|
||||
|
||||
//播放等待超时
|
||||
private val PLAY_WAIT_TIMEOUT_MS = 3000L
|
||||
private var playWaitJob: Job? = null
|
||||
// ========== 核心新增:无效说话标记 + 超时类型 ==========
|
||||
private var hasInvalidSpeech = false
|
||||
private var currentTimeoutType: TimeoutType = TimeoutType.IDLE_TIMEOUT
|
||||
|
||||
/* ================= 音频入口 ================= */
|
||||
fun acceptAudio(samples: FloatArray) {
|
||||
@ -153,14 +152,12 @@ class VoiceController(
|
||||
if ((waitSpeechStartMs > 0 && now - waitSpeechStartMs >= idleTimeoutMs) ||
|
||||
(waitSpeechFailStartMs > 0 && now - waitSpeechFailStartMs >= idleTimeoutMs)
|
||||
) {
|
||||
// 核心修改:超时前先判定超时类型
|
||||
currentTimeoutType = if (hasInvalidSpeech) {
|
||||
TimeoutType.INVALID_SPEECH_TIMEOUT
|
||||
} else {
|
||||
TimeoutType.IDLE_TIMEOUT
|
||||
}
|
||||
LogUtils.d(TAG, "⏱ WAIT_SPEECH timeout → WAIT_WAKEUP | 超时类型: $currentTimeoutType")
|
||||
// 触发超时提示语回调
|
||||
onTimeoutTip?.invoke(currentTimeoutType)
|
||||
resetAll()
|
||||
return
|
||||
@ -177,20 +174,15 @@ class VoiceController(
|
||||
vadManager.accept(samples)
|
||||
|
||||
// ========== 核心优化:录音过程中实时计算 ==========
|
||||
// 1. 实时校准环境基线(适配录音中环境变化)
|
||||
calibrateEnvBaseline(samples)
|
||||
// 2. 实时计算能量/峰值
|
||||
updateRealtimeEnergy(samples)
|
||||
// 3. 实时更新帧统计
|
||||
updateRealtimeFrameStats()
|
||||
// 4. 实时判定是否为多人对话,若是则立即终止录音
|
||||
if (checkMultiPersonDialogueRealtime(now)) {
|
||||
LogUtils.w(TAG, "🚨 录音中识别出多人对话,提前终止")
|
||||
finishSentence(realtimeEnergySum / realtimeEnergyCount, realtimePeakRms)
|
||||
return
|
||||
}
|
||||
|
||||
// 原有最大录音时长判断
|
||||
if (System.currentTimeMillis() - recordingStartMs > maxRecordingMs) {
|
||||
LogUtils.w(TAG, "⏱ Max recording reached | 当前环境基线: $currentEnvBaseline")
|
||||
finishSentence(realtimeEnergySum / realtimeEnergyCount, realtimePeakRms)
|
||||
@ -199,10 +191,10 @@ class VoiceController(
|
||||
}
|
||||
}
|
||||
|
||||
/* ================= 新增:录音中实时更新能量统计 ================= */
|
||||
/* ================= 新增:录音中实时更新能量统计(适配近距离轻声) ================= */
|
||||
private fun updateRealtimeEnergy(samples: FloatArray) {
|
||||
val rms = vadManager.calcRms(samples)
|
||||
// 仅统计有效语音帧的能量
|
||||
// 仅统计有效语音帧的能量(阈值降低)
|
||||
if (rms >= MIN_EFFECTIVE_SPEECH_RMS) {
|
||||
realtimeEnergySum += rms
|
||||
realtimeEnergyCount++
|
||||
@ -212,12 +204,10 @@ class VoiceController(
|
||||
|
||||
/* ================= 新增:录音中实时更新帧统计 ================= */
|
||||
private fun updateRealtimeFrameStats() {
|
||||
// 从VADManager获取最新帧状态
|
||||
realtimeTotalFrames = vadManager.getTotalFrames()
|
||||
realtimeSpeechFrames = vadManager.getSpeechFrames()
|
||||
realtimeContinuousSpeechFrames = vadManager.getContinuousSpeechFrames()
|
||||
// 实时更新连续帧标记
|
||||
val currentFrameIsSpeech = vadManager.isSpeechDetected() // 需给VadManager新增isSpeechDetected()方法
|
||||
val currentFrameIsSpeech = vadManager.isSpeechDetected()
|
||||
if (currentFrameIsSpeech) {
|
||||
realtimeContinuousSpeechFrames = if (realtimeLastFrameIsSpeech) realtimeContinuousSpeechFrames + 1 else 1
|
||||
} else {
|
||||
@ -228,17 +218,14 @@ class VoiceController(
|
||||
|
||||
/* ================= 新增:录音中实时判定多人对话 ================= */
|
||||
private fun checkMultiPersonDialogueRealtime(now: Long): Boolean {
|
||||
// 还没到多人对话最小时长,不判定
|
||||
val duration = now - recordingStartMs
|
||||
if (duration < MULTI_DIALOGUE_MIN_DURATION) return false
|
||||
|
||||
// 实时计算特征值
|
||||
val avgEnergy = if (realtimeEnergyCount > 0) realtimeEnergySum / realtimeEnergyCount else 0f
|
||||
val peakAvgRatio = if (avgEnergy > 0) realtimePeakRms / avgEnergy else 0f
|
||||
val continuousRatio = if (realtimeSpeechFrames > 0) realtimeContinuousSpeechFrames.toFloat() / realtimeSpeechFrames else 0f
|
||||
val vadRatio = vadManager.activeSpeechRatio()
|
||||
|
||||
// 多人对话判定逻辑(和原逻辑一致,但实时执行)
|
||||
isMultiPersonDialogueDetected = duration >= MULTI_DIALOGUE_MIN_DURATION &&
|
||||
peakAvgRatio in MULTI_DIALOGUE_MIN_PEAK_AVG_RATIO..MULTI_DIALOGUE_MAX_PEAK_AVG_RATIO &&
|
||||
continuousRatio <= MULTI_DIALOGUE_MAX_CONTINUOUS_RATIO &&
|
||||
@ -247,21 +234,21 @@ class VoiceController(
|
||||
return isMultiPersonDialogueDetected
|
||||
}
|
||||
|
||||
/* ================= 环境基线校准(保留,录音中也会调用) ================= */
|
||||
/* ================= 环境基线校准(适配近距离场景,降低噪音敏感度) ================= */
|
||||
private fun calibrateEnvBaseline(samples: FloatArray) {
|
||||
val rms = vadManager.calcRms(samples)
|
||||
// 新增:只保留低于基线+阈值的有效值,过滤突发噪音
|
||||
val validRms = if (rms < currentEnvBaseline + 0.005f) rms else currentEnvBaseline
|
||||
if (rms < 0.03f) {
|
||||
// 只保留低于基线+阈值的有效值,过滤突发噪音(阈值降低)
|
||||
val validRms = if (rms < currentEnvBaseline + 0.002f) rms else currentEnvBaseline
|
||||
if (rms < 0.015f) {
|
||||
if (envNoiseBuffer.size >= BASELINE_WINDOW_SIZE) {
|
||||
envNoiseBuffer.removeFirst()
|
||||
}
|
||||
envNoiseBuffer.addLast(validRms) // 用过滤后的有效值更新
|
||||
envNoiseBuffer.addLast(validRms)
|
||||
currentEnvBaseline = envNoiseBuffer.maxOrNull() ?: 0.001f
|
||||
}
|
||||
}
|
||||
|
||||
/* ================= 唤醒 ================= */
|
||||
/* ================= 唤醒相关方法 ================= */
|
||||
private fun handleWakeupEvent() {
|
||||
if (state == VoiceState.UPLOADING) return
|
||||
stopBackendAudio?.invoke()
|
||||
@ -272,7 +259,6 @@ class VoiceController(
|
||||
waitSpeechFailStartMs = System.currentTimeMillis()
|
||||
waitSpeechStartMs = System.currentTimeMillis()
|
||||
|
||||
// 核心新增:唤醒时重置无效说话标记(每次唤醒都是新的会话)
|
||||
hasInvalidSpeech = false
|
||||
currentTimeoutType = TimeoutType.IDLE_TIMEOUT
|
||||
|
||||
@ -280,7 +266,6 @@ class VoiceController(
|
||||
audioBuffer.clear()
|
||||
vadManager.reset()
|
||||
vadStarted = false
|
||||
// 重置实时统计变量
|
||||
resetRealtimeStats()
|
||||
}
|
||||
|
||||
@ -297,7 +282,6 @@ class VoiceController(
|
||||
recordingStartMs = System.currentTimeMillis()
|
||||
audioBuffer.clear()
|
||||
audioBuffer.addAll(preBuffer)
|
||||
// 初始化实时统计变量
|
||||
resetRealtimeStats()
|
||||
state = VoiceState.RECORDING
|
||||
}
|
||||
@ -305,20 +289,65 @@ class VoiceController(
|
||||
private fun onVadEnd(avgEnergy: Float, peakRms: Float) {
|
||||
if (state != VoiceState.RECORDING) return
|
||||
LogUtils.d(TAG, "🧠 VAD END | 环境基线: $currentEnvBaseline")
|
||||
// 优先使用实时统计的能量值,避免重复计算
|
||||
val realAvgEnergy = if (realtimeEnergyCount > 0) realtimeEnergySum / realtimeEnergyCount else avgEnergy
|
||||
val realPeakRms = if (realtimePeakRms > 0) realtimePeakRms else peakRms
|
||||
finishSentence(realAvgEnergy, realPeakRms)
|
||||
}
|
||||
|
||||
/* ================= 结束录音(核心:复用实时计算结果) ================= */
|
||||
/* ================= 核心优化:近距离场景 微弱人声过滤方法 ================= */
|
||||
private fun filterWeakVoice(duration: Long, avgEnergy: Float, peakRms: Float): Boolean {
|
||||
// 1. 时长过滤:<400ms的极短杂音才过滤
|
||||
if (duration < MIN_EFFECTIVE_VOICE_DURATION) {
|
||||
LogUtils.w(TAG, "❌ 微弱人声过滤:时长${duration}ms < ${MIN_EFFECTIVE_VOICE_DURATION}ms")
|
||||
return true
|
||||
}
|
||||
|
||||
// 2. 帧占比过滤:仅对极低能量语音生效
|
||||
val voiceFrameRatio = if (realtimeTotalFrames > 0) realtimeSpeechFrames.toFloat() / realtimeTotalFrames else 0f
|
||||
if (avgEnergy < NORMAL_VOICE_ENERGY_THRESHOLD && voiceFrameRatio < MIN_VOICE_FRAME_RATIO) {
|
||||
LogUtils.w(TAG, "❌ 微弱人声过滤:帧占比${voiceFrameRatio} < ${MIN_VOICE_FRAME_RATIO}(极低能量)")
|
||||
return true
|
||||
}
|
||||
|
||||
// 3. 峰值能量过滤:仅对极低能量语音生效,且阈值大幅降低
|
||||
val peakBaselineRatio = peakRms / currentEnvBaseline
|
||||
if (avgEnergy < NORMAL_VOICE_ENERGY_THRESHOLD && peakBaselineRatio < MIN_PEAK_ENERGY_RATIO) {
|
||||
LogUtils.w(TAG, "❌ 微弱人声过滤:峰值/基线${peakBaselineRatio} < ${MIN_PEAK_ENERGY_RATIO}(极低能量)")
|
||||
return true
|
||||
}
|
||||
|
||||
// 4. 连续帧过滤:仅对极低能量语音生效,且阈值降到1
|
||||
if (avgEnergy < NORMAL_VOICE_ENERGY_THRESHOLD && realtimeContinuousSpeechFrames < MIN_CONTINUOUS_VOICE_FRAMES) {
|
||||
LogUtils.w(TAG, "❌ 微弱人声过滤:连续帧${realtimeContinuousSpeechFrames} < ${MIN_CONTINUOUS_VOICE_FRAMES}(极低能量)")
|
||||
return true
|
||||
}
|
||||
|
||||
// 5. 平均能量过滤:仅对极极低能量语音生效
|
||||
val energyBaselineRatio = avgEnergy / currentEnvBaseline
|
||||
if (avgEnergy < 0.005f && energyBaselineRatio < 1.2f) {
|
||||
LogUtils.w(TAG, "❌ 微弱人声过滤:能量/基线${energyBaselineRatio} < 1.2(极极低能量)")
|
||||
return true
|
||||
}
|
||||
|
||||
// 正常语音(包括近距离轻声)直接通过
|
||||
return false
|
||||
}
|
||||
|
||||
/* ================= 结束录音(核心:适配近距离轻声) ================= */
|
||||
private fun finishSentence(avgEnergy: Float = 0f, peakRms: Float = 0f) {
|
||||
val now = System.currentTimeMillis()
|
||||
val duration = now - recordingStartMs
|
||||
|
||||
// ========== 第一步:基础过滤(语音过短) ==========
|
||||
if (!vadStarted || duration < MIN_SPEECH_MS) {
|
||||
LogUtils.d(TAG, "❌ 语音过短: $duration ms | 基线: $currentEnvBaseline")
|
||||
// 核心新增:无效说话(语音过短),标记hasInvalidSpeech为true
|
||||
hasInvalidSpeech = true
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
}
|
||||
|
||||
// ========== 第二步:微弱人声专项过滤(仅过滤极微弱杂音) ==========
|
||||
if (filterWeakVoice(duration, avgEnergy, peakRms)) {
|
||||
hasInvalidSpeech = true
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
@ -328,44 +357,40 @@ class VoiceController(
|
||||
val vadRatio = vadManager.activeSpeechRatio()
|
||||
val peakAvgRatio = if (avgEnergy > 0f) peakRms / avgEnergy else 0f
|
||||
|
||||
// 直接复用实时统计的帧数据,无需重新获取
|
||||
LogUtils.d(TAG, "📊 录音信息 | 时长: $duration ms | 能量: $avgEnergy | 峰均比: $peakAvgRatio | 基线: $currentEnvBaseline")
|
||||
LogUtils.d(TAG, "📊 实时帧统计 | 总帧: $realtimeTotalFrames | 语音帧: $realtimeSpeechFrames | 连续语音帧: $realtimeContinuousSpeechFrames")
|
||||
|
||||
// 若录音中已识别出多人对话,直接过滤
|
||||
// 多人对话过滤
|
||||
if (isMultiPersonDialogueDetected) {
|
||||
LogUtils.w(TAG, "❌ 过滤多人对话垃圾语音(实时识别) | 时长: $duration ms")
|
||||
// 核心新增:无效说话(多人对话),标记hasInvalidSpeech为true
|
||||
hasInvalidSpeech = true
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
}
|
||||
|
||||
// ========== 1. 强制兜底:正常语音直接通过 ==========
|
||||
// ========== 1. 强制兜底:正常语音直接通过(阈值降低) ==========
|
||||
val isNormalVoice = avgEnergy >= MIN_NORMAL_VOICE_ENERGY && vadRatio >= MIN_NORMAL_VOICE_VAD_RATIO
|
||||
if (isNormalVoice) {
|
||||
LogUtils.i(TAG, "✅ 正常语音强制通过 | 能量: $avgEnergy ≥ $MIN_NORMAL_VOICE_ENERGY | 占比: $vadRatio ≥ $MIN_NORMAL_VOICE_VAD_RATIO")
|
||||
audioBuffer.clear()
|
||||
state = VoiceState.UPLOADING
|
||||
onFinalAudio(audio)
|
||||
resetRealtimeStats() // 重置实时统计
|
||||
// 核心新增:有效语音通过后,重置无效说话标记(后续超时重新判定)
|
||||
resetRealtimeStats()
|
||||
hasInvalidSpeech = false
|
||||
return
|
||||
}
|
||||
|
||||
// ========== 2. 远场过滤:只过滤极低能量 ==========
|
||||
// ========== 2. 远场过滤(近距离场景几乎不生效) ==========
|
||||
val isFarField = avgEnergy < MAX_FAR_FIELD_ENERGY
|
||||
val isInvalidPeakRatio = peakAvgRatio < MIN_VALID_PEAK_AVG_RATIO
|
||||
if (isFarField && isInvalidPeakRatio) {
|
||||
LogUtils.w(TAG, "❌ 远场/无效语音过滤 | 能量: $avgEnergy < $MAX_FAR_FIELD_ENERGY")
|
||||
// 核心新增:无效说话(远场),标记hasInvalidSpeech为true
|
||||
hasInvalidSpeech = true
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
}
|
||||
|
||||
// ========== 3. 非连续判定:极度宽松 ==========
|
||||
// ========== 3. 非连续判定(大幅放宽) ==========
|
||||
val continuousRatio = if (realtimeSpeechFrames > 0) realtimeContinuousSpeechFrames.toFloat() / realtimeSpeechFrames else 0f
|
||||
val peakPositionRatio = vadManager.getPeakPositionRatio()
|
||||
val isDiscontinuous = continuousRatio < MIN_CONTINUOUS_FRAME_RATIO &&
|
||||
@ -373,13 +398,12 @@ class VoiceController(
|
||||
peakPositionRatio > MAX_PEAK_POSITION_RATIO
|
||||
if (isDiscontinuous) {
|
||||
LogUtils.w(TAG, "❌ 非连续杂音过滤 | 连续占比: $continuousRatio < $MIN_CONTINUOUS_FRAME_RATIO")
|
||||
// 核心新增:无效说话(非连续杂音),标记hasInvalidSpeech为true
|
||||
hasInvalidSpeech = true
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
}
|
||||
|
||||
// ========== 4. 分场景动态阈值计算(保留核心逻辑) ==========
|
||||
// ========== 4. 分场景动态阈值计算(系数大幅降低) ==========
|
||||
val isQuietEnv = currentEnvBaseline < BASELINE_QUIET_THRESHOLD
|
||||
val thresholdConfig = when {
|
||||
duration in SHORT_SPEECH_MIN..SHORT_SPEECH_MAX -> {
|
||||
@ -406,18 +430,17 @@ class VoiceController(
|
||||
}
|
||||
}
|
||||
|
||||
// ========== 5. 分场景阈值过滤 ==========
|
||||
// ========== 5. 分场景阈值过滤(阈值降低) ==========
|
||||
val energyPass = avgEnergy >= thresholdConfig.energyThreshold
|
||||
val vadRatioPass = vadRatio >= thresholdConfig.vadRatioThreshold
|
||||
if (!energyPass || !vadRatioPass) {
|
||||
LogUtils.w(TAG, "❌ 低能量语音阈值过滤 | 能量: $avgEnergy < ${thresholdConfig.energyThreshold} | 占比: $vadRatio < ${thresholdConfig.vadRatioThreshold} | 场景: ${thresholdConfig.scene}")
|
||||
// 核心新增:无效说话(低能量),标记hasInvalidSpeech为true
|
||||
hasInvalidSpeech = true
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
}
|
||||
|
||||
// ========== 6. 评分判定:极度宽松 ==========
|
||||
// ========== 6. 评分判定(门槛降低到1) ==========
|
||||
var score = 0
|
||||
score += when {
|
||||
duration >= 4000 -> 3
|
||||
@ -430,7 +453,6 @@ class VoiceController(
|
||||
val pass = score >= thresholdConfig.minScore
|
||||
if (!pass) {
|
||||
LogUtils.w(TAG, "❌ 评分不足过滤 | 总分: $score < ${thresholdConfig.minScore} | 场景: ${thresholdConfig.scene}")
|
||||
// 核心新增:无效说话(评分不足),标记hasInvalidSpeech为true
|
||||
hasInvalidSpeech = true
|
||||
resetToWaitSpeech()
|
||||
return
|
||||
@ -440,13 +462,12 @@ class VoiceController(
|
||||
audioBuffer.clear()
|
||||
state = VoiceState.UPLOADING
|
||||
onFinalAudio(audio)
|
||||
resetRealtimeStats() // 重置实时统计
|
||||
// 核心新增:有效语音通过后,重置无效说话标记
|
||||
resetRealtimeStats()
|
||||
hasInvalidSpeech = false
|
||||
LogUtils.i(TAG, "✅ 低能量语音通过 | 时长: $duration ms | 能量: $avgEnergy | 场景: ${thresholdConfig.scene}")
|
||||
LogUtils.i(TAG, "✅ 近距离轻声通过 | 时长: $duration ms | 能量: $avgEnergy | 场景: ${thresholdConfig.scene}")
|
||||
}
|
||||
|
||||
/* ================= 新增:重置实时统计变量 ================= */
|
||||
/* ================= 重置实时统计变量 ================= */
|
||||
private fun resetRealtimeStats() {
|
||||
realtimeEnergySum = 0f
|
||||
realtimeEnergyCount = 0
|
||||
@ -458,6 +479,7 @@ class VoiceController(
|
||||
isMultiPersonDialogueDetected = false
|
||||
}
|
||||
|
||||
|
||||
/* ================= 播放/上传/Reset 回调 ================= */
|
||||
fun onPlayStartPrompt() {
|
||||
LogUtils.d(TAG, "🎵 播放提示音 | 基线: $currentEnvBaseline")
|
||||
@ -471,7 +493,6 @@ class VoiceController(
|
||||
}
|
||||
|
||||
fun onPlayStartBackend() {
|
||||
// 仅当上传完成(成功)且状态为 UPLOADING 时,才切换状态
|
||||
if (state != VoiceState.UPLOADING) {
|
||||
LogUtils.w(TAG, "🎶 非上传完成状态,禁止切换到 PLAYING_BACKEND | 当前状态: $state")
|
||||
return
|
||||
@ -490,42 +511,12 @@ class VoiceController(
|
||||
if (state != VoiceState.UPLOADING) return
|
||||
LogUtils.d(TAG, "📤 上传完成 | 成功: $success | 基线: $currentEnvBaseline")
|
||||
|
||||
if (success) {
|
||||
// 上传成功:启动协程超时任务
|
||||
startPlayWaitTimer()
|
||||
} else {
|
||||
// 上传失败:取消超时任务,重置状态
|
||||
cancelPlayWaitTimer()
|
||||
if (!success) {
|
||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||
}
|
||||
}
|
||||
|
||||
private fun startPlayWaitTimer() {
|
||||
// 先取消旧任务,避免重复
|
||||
cancelPlayWaitTimer()
|
||||
|
||||
// 启动协程超时任务(Dispatchers.Main保证状态修改在主线程)
|
||||
playWaitJob = GlobalScope.launch {
|
||||
delay(PLAY_WAIT_TIMEOUT_MS) // 挂起3秒,无线程阻塞
|
||||
LogUtils.w(TAG, "⏱ 播放等待超时(${PLAY_WAIT_TIMEOUT_MS}ms),自动重置状态")
|
||||
|
||||
// 超时后重置状态(加同步锁,避免多线程冲突)
|
||||
synchronized(this@VoiceController) {
|
||||
speechEnableAtMs = System.currentTimeMillis() + SPEECH_COOLDOWN_MS
|
||||
state = VoiceState.WAIT_SPEECH_COOLDOWN
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ================= 替换:取消协程任务 =================
|
||||
private fun cancelPlayWaitTimer() {
|
||||
playWaitJob?.cancel() // 取消协程(挂起函数会立即停止)
|
||||
playWaitJob = null
|
||||
LogUtils.d(TAG, "🔄 播放等待协程已取消")
|
||||
}
|
||||
|
||||
|
||||
private fun resetToWaitSpeech() {
|
||||
LogUtils.d(TAG, "🔄 重置到等待说话 | 基线: $currentEnvBaseline | 已标记无效说话: $hasInvalidSpeech")
|
||||
val now = System.currentTimeMillis()
|
||||
@ -537,7 +528,7 @@ class VoiceController(
|
||||
audioBuffer.clear()
|
||||
vadManager.reset()
|
||||
vadStarted = false
|
||||
resetRealtimeStats() // 重置实时统计
|
||||
resetRealtimeStats()
|
||||
state = VoiceState.WAIT_SPEECH
|
||||
if (waitSpeechFailStartMs == 0L) waitSpeechFailStartMs = System.currentTimeMillis()
|
||||
}
|
||||
@ -553,13 +544,11 @@ class VoiceController(
|
||||
waitSpeechFailStartMs = 0L
|
||||
envNoiseBuffer.clear()
|
||||
currentEnvBaseline = 0.001f
|
||||
resetRealtimeStats() // 重置实时统计
|
||||
// 核心新增:重置所有状态时,同时重置无效说话标记和超时类型
|
||||
resetRealtimeStats()
|
||||
hasInvalidSpeech = false
|
||||
currentTimeoutType = TimeoutType.IDLE_TIMEOUT
|
||||
LogUtils.d(TAG, "🔄 环境基线已重置 | 新基线: $currentEnvBaseline | 无效说话标记已重置")
|
||||
state = VoiceState.WAIT_WAKEUP
|
||||
cancelPlayWaitTimer()
|
||||
}
|
||||
|
||||
fun release() {
|
||||
@ -567,11 +556,9 @@ class VoiceController(
|
||||
wakeupManager.release()
|
||||
vadManager.reset()
|
||||
envNoiseBuffer.clear()
|
||||
resetRealtimeStats() // 重置实时统计
|
||||
// 核心新增:释放资源时重置标记
|
||||
resetRealtimeStats()
|
||||
hasInvalidSpeech = false
|
||||
currentTimeoutType = TimeoutType.IDLE_TIMEOUT
|
||||
cancelPlayWaitTimer()
|
||||
}
|
||||
|
||||
private fun cachePreBuffer(samples: FloatArray) {
|
||||
|
||||
@ -54,6 +54,7 @@ import com.zs.smarthuman.utils.AudioDebugUtil
|
||||
import com.zs.smarthuman.utils.AudioPcmUtil
|
||||
import com.zs.smarthuman.utils.DangerousUtils
|
||||
import com.zs.smarthuman.utils.LogFileUtils
|
||||
import com.zs.smarthuman.utils.SimulateStreamingAsr
|
||||
|
||||
import com.zs.smarthuman.utils.UnityPlayerHolder
|
||||
import com.zs.smarthuman.utils.ViewSlideAnimator
|
||||
@ -86,6 +87,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
|
||||
private var versionUpdateDialog: VersionUpdateDialog? = null
|
||||
|
||||
private val PLAY_WAIT_TIMEOUT_MS = 2000L // 统一2秒超时阈值
|
||||
private var startPlayTimeoutJob: Job? = null // 统一管理所有播放场景的超时Job
|
||||
|
||||
override fun getViewBinding(): ActivityMainBinding = ActivityMainBinding.inflate(layoutInflater)
|
||||
override fun initView() {
|
||||
UnityPlayerHolder.getInstance().initialize(this)
|
||||
@ -97,6 +101,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
}
|
||||
|
||||
override fun initData() {
|
||||
initAsrModel()
|
||||
PermissionUtils.permissionGroup(PermissionConstants.MICROPHONE)
|
||||
.callback(object : PermissionUtils.FullCallback {
|
||||
override fun onGranted(granted: List<String?>) {
|
||||
@ -154,6 +159,11 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
}
|
||||
Toaster.showShort(it)
|
||||
voiceController?.onUploadFinished(true)
|
||||
startPlayTimeoutJob?.cancel()
|
||||
startPlayTimeoutJob = lifecycleScope.launch {
|
||||
delay(PLAY_WAIT_TIMEOUT_MS)
|
||||
voiceController?.onPlayEndBackend()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -181,38 +191,42 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
voiceController = VoiceController(
|
||||
assetManager = assets,
|
||||
onWakeup = {
|
||||
Log.d("lrs", "当前状态: 唤醒成功wakeup")
|
||||
//每次唤醒前都要把前面的音频停掉
|
||||
// UnityPlayerHolder.getInstance().cancelPCM()
|
||||
UnityPlayerHolder.getInstance().cancelPCM()
|
||||
UnityPlayerHolder.getInstance()
|
||||
.sendVoiceToUnity(
|
||||
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
|
||||
add(
|
||||
VoiceBeanResp(
|
||||
audioUrl = UserInfoManager.userInfo?.wakeUpAudioUrl
|
||||
?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
|
||||
audioUrl = "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
|
||||
)
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
startPlayTimeoutJob?.cancel()
|
||||
startPlayTimeoutJob = lifecycleScope.launch {
|
||||
delay(PLAY_WAIT_TIMEOUT_MS)
|
||||
voiceController?.onPlayEndPrompt()
|
||||
}
|
||||
},
|
||||
onFinalAudio = { audio ->
|
||||
Log.d("lrsxx", "检测到语音,长度=${audio.size}")
|
||||
mViewModel?.uploadVoice(
|
||||
AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
||||
1
|
||||
)
|
||||
// loadLocalJsonAndPlay()
|
||||
// mViewModel?.uploadVoice(
|
||||
// AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
||||
// 1
|
||||
// )
|
||||
loadLocalJsonAndPlay()
|
||||
val file = File(
|
||||
getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
||||
"xxx.wav"
|
||||
)
|
||||
AudioDebugUtil.saveFloatPcmAsWav(audio, file)
|
||||
LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
|
||||
// lifecycleScope.launch(Dispatchers.Main) {
|
||||
//
|
||||
// mVerticalAnimator?.show()
|
||||
// }
|
||||
lifecycleScope.launch(Dispatchers.Main) {
|
||||
|
||||
mVerticalAnimator?.show()
|
||||
}
|
||||
},
|
||||
onStateChanged = { state ->
|
||||
|
||||
@ -233,7 +247,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
voiceInfo = mutableListOf<VoiceBeanResp>().apply {
|
||||
add(
|
||||
VoiceBeanResp(
|
||||
audioUrl = "https://static.seerteach.net/aidialogue/userWakeUpAudio/ttsmaker-file-2025-12-31-16-2-51.mp3"
|
||||
audioUrl = UserInfoManager.userInfo?.endAudioUrl?:""
|
||||
)
|
||||
)
|
||||
}
|
||||
@ -244,6 +258,11 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
)
|
||||
}
|
||||
|
||||
private fun initAsrModel(){
|
||||
lifecycleScope.launch(Dispatchers.IO){
|
||||
SimulateStreamingAsr.initOfflineRecognizer(App.getInstance())
|
||||
}
|
||||
}
|
||||
override fun receivedIMMsg(msg: SingleMessage) {
|
||||
when (msg.msgContentType) {
|
||||
MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> {
|
||||
@ -415,9 +434,6 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
|
||||
private var promptPlaying = false
|
||||
private var backPlaying = false
|
||||
private var promptTimeoutJob: Job? = null
|
||||
private val PROMPT_PLAY_TIMEOUT_MS = 3000L // 3 秒
|
||||
|
||||
|
||||
fun onAudioProgressUpdated( // Unity 调用此方法传递音频进度
|
||||
progress: Float,
|
||||
@ -427,7 +443,6 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
audioUrl: String
|
||||
) {
|
||||
val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl
|
||||
?: "https://static.seerteach.net/aidialogue/systemVoice/aliyun-nv.mp3"
|
||||
|
||||
if (audioUrl != wakeupUrl) return
|
||||
|
||||
@ -436,13 +451,8 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
if (!promptPlaying) {
|
||||
promptPlaying = true
|
||||
voiceController?.onPlayStartPrompt()
|
||||
|
||||
promptTimeoutJob = lifecycleScope.launch {
|
||||
delay(PROMPT_PLAY_TIMEOUT_MS)
|
||||
promptPlaying = false
|
||||
voiceController?.onPlayEndPrompt()
|
||||
promptTimeoutJob?.cancel()
|
||||
}
|
||||
startPlayTimeoutJob?.cancel()
|
||||
LogUtils.eTag("MainActivity","等待超时")
|
||||
}
|
||||
}
|
||||
|
||||
@ -450,7 +460,6 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
if (promptPlaying) {
|
||||
promptPlaying = false
|
||||
voiceController?.onPlayEndPrompt()
|
||||
promptTimeoutJob?.cancel()
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -464,14 +473,15 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
||||
isFinal: Boolean
|
||||
) {
|
||||
when (state) {
|
||||
1 -> { // play
|
||||
1 -> {
|
||||
if (!backPlaying) {
|
||||
backPlaying = true
|
||||
voiceController?.onPlayStartBackend()
|
||||
startPlayTimeoutJob?.cancel()
|
||||
}
|
||||
}
|
||||
|
||||
3 -> { // complete
|
||||
3 -> {
|
||||
if (backPlaying) {
|
||||
backPlaying = false
|
||||
voiceController?.onPlayEndBackend()
|
||||
|
||||
@ -0,0 +1,155 @@
|
||||
package com.zs.smarthuman.utils
|
||||
|
||||
import android.content.Context
|
||||
import android.content.res.AssetManager
|
||||
import com.blankj.utilcode.util.LogUtils
|
||||
|
||||
import com.k2fsa.sherpa.onnx.OfflineModelConfig
|
||||
import com.k2fsa.sherpa.onnx.OfflineRecognizer
|
||||
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
|
||||
import com.k2fsa.sherpa.onnx.OfflineWenetCtcModelConfig
|
||||
import com.k2fsa.sherpa.onnx.Vad
|
||||
import com.k2fsa.sherpa.onnx.getVadModelConfig
|
||||
import java.io.File
|
||||
import java.io.FileOutputStream
|
||||
import java.io.InputStream
|
||||
import java.io.OutputStream
|
||||
|
||||
|
||||
fun assetExists(assetManager: AssetManager, path: String): Boolean {
|
||||
val dir = path.substringBeforeLast('/', "")
|
||||
val fileName = path.substringAfterLast('/')
|
||||
|
||||
val files = assetManager.list(dir) ?: return false
|
||||
return files.contains(fileName)
|
||||
}
|
||||
|
||||
fun copyAssetToInternalStorage(path: String, context: Context): String {
|
||||
val targetRoot = context.filesDir
|
||||
val outFile = File(targetRoot, path)
|
||||
|
||||
if (!assetExists(context.assets, path = path)) {
|
||||
// for context binary, if it is does not exist, we return a path
|
||||
// that can be written to
|
||||
outFile.parentFile?.mkdirs()
|
||||
LogUtils.i("VoiceController", "$path does not exist, return ${outFile.absolutePath}")
|
||||
return outFile.absolutePath
|
||||
}
|
||||
|
||||
if (outFile.exists()) {
|
||||
val assetSize = context.assets.open(path).use { it.available() }
|
||||
if (outFile.length() == assetSize.toLong()) {
|
||||
LogUtils.i("VoiceController", "$targetRoot/$path already exists, skip copying, return $targetRoot/$path")
|
||||
|
||||
return "$targetRoot/$path"
|
||||
}
|
||||
}
|
||||
|
||||
outFile.parentFile?.mkdirs()
|
||||
|
||||
context.assets.open(path).use { input: InputStream ->
|
||||
FileOutputStream(outFile).use { output: OutputStream ->
|
||||
input.copyTo(output)
|
||||
}
|
||||
}
|
||||
LogUtils.i("VoiceController", "Copied $path to $targetRoot/$path")
|
||||
|
||||
return outFile.absolutePath
|
||||
}
|
||||
|
||||
|
||||
object SimulateStreamingAsr {
|
||||
private var _recognizer: OfflineRecognizer? = null
|
||||
val recognizer: OfflineRecognizer
|
||||
get() {
|
||||
return _recognizer!!
|
||||
}
|
||||
|
||||
|
||||
fun initOfflineRecognizer(context: Context) {
|
||||
synchronized(this) {
|
||||
if (_recognizer != null) {
|
||||
return
|
||||
}
|
||||
|
||||
val wenetConfig = OfflineWenetCtcModelConfig(
|
||||
model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx",
|
||||
)
|
||||
|
||||
val modelConfig = OfflineModelConfig(
|
||||
wenetCtc = wenetConfig,
|
||||
tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt"
|
||||
)
|
||||
val config = OfflineRecognizerConfig(
|
||||
modelConfig = modelConfig,
|
||||
)
|
||||
|
||||
|
||||
var assetManager: AssetManager? = context.assets
|
||||
|
||||
if (config.modelConfig.provider == "qnn") {
|
||||
// We assume you have copied files like libQnnHtpV81Skel.so to jniLibs/arm64-v8a
|
||||
LogUtils.i("VoiceController", "nativelibdir: ${context.applicationInfo.nativeLibraryDir}")
|
||||
|
||||
// If we don't set the environment variable for ADSP_LIBRARY_PATH, we will see
|
||||
// the error code 1008 from qnn_interface.deviceCreate()
|
||||
// See also
|
||||
// https://workbench.aihub.qualcomm.com/docs/hub/faq.html#why-am-i-seeing-error-1008-when-trying-to-use-htp
|
||||
OfflineRecognizer.prependAdspLibraryPath(context.applicationInfo.nativeLibraryDir)
|
||||
|
||||
// for qnn, we need to copy *.so files from assets folder to sd card
|
||||
if (config.modelConfig.senseVoice.qnnConfig.backendLib.isEmpty() && config.modelConfig.zipformerCtc.qnnConfig.backendLib.isEmpty()) {
|
||||
LogUtils.i("VoiceController", "You should provide libQnnHtp.so for qnn")
|
||||
throw IllegalArgumentException("You should provide libQnnHtp.so for qnn")
|
||||
}
|
||||
config.modelConfig.tokens =
|
||||
copyAssetToInternalStorage(config.modelConfig.tokens, context)
|
||||
|
||||
if (config.modelConfig.senseVoice.model.isNotEmpty() || assetExists(
|
||||
context.assets,
|
||||
path = config.modelConfig.senseVoice.qnnConfig.contextBinary
|
||||
)
|
||||
) {
|
||||
if (config.modelConfig.senseVoice.model.isNotEmpty()) {
|
||||
config.modelConfig.senseVoice.model =
|
||||
copyAssetToInternalStorage(config.modelConfig.senseVoice.model, context)
|
||||
}
|
||||
|
||||
config.modelConfig.senseVoice.qnnConfig.contextBinary =
|
||||
copyAssetToInternalStorage(
|
||||
config.modelConfig.senseVoice.qnnConfig.contextBinary,
|
||||
context
|
||||
)
|
||||
} else if (config.modelConfig.zipformerCtc.model.isNotEmpty()) {
|
||||
config.modelConfig.zipformerCtc.model =
|
||||
copyAssetToInternalStorage(config.modelConfig.zipformerCtc.model, context)
|
||||
|
||||
config.modelConfig.zipformerCtc.qnnConfig.contextBinary =
|
||||
copyAssetToInternalStorage(
|
||||
config.modelConfig.zipformerCtc.qnnConfig.contextBinary,
|
||||
context
|
||||
)
|
||||
}
|
||||
|
||||
if (config.hr.lexicon.isNotEmpty()) {
|
||||
config.hr.lexicon = copyAssetToInternalStorage(config.hr.lexicon, context)
|
||||
}
|
||||
|
||||
if (config.hr.ruleFsts.isNotEmpty()) {
|
||||
// it assumes there is only one fst. otherwise, you need to copy each fst separately
|
||||
config.hr.ruleFsts = copyAssetToInternalStorage(config.hr.ruleFsts, context)
|
||||
}
|
||||
|
||||
assetManager = null
|
||||
}
|
||||
|
||||
_recognizer = OfflineRecognizer(
|
||||
assetManager = assetManager,
|
||||
config = config,
|
||||
)
|
||||
|
||||
LogUtils.i("VoiceController", "sherpa-onnx offline recognizer initialized")
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
BIN
app/src/main/jniLibs/arm64-v8a/libandroidx.graphics.path.so
Normal file
BIN
app/src/main/jniLibs/arm64-v8a/libandroidx.graphics.path.so
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user