暂时去除asr模型,部分机器带不动
This commit is contained in:
parent
a1d0520789
commit
8dd5e68ded
Binary file not shown.
@ -1,4 +0,0 @@
|
|||||||
# Introduction
|
|
||||||
|
|
||||||
Model in this directory is converted from
|
|
||||||
https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/u2pp_conformer_yue
|
|
||||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -2,17 +2,12 @@ package com.zs.smarthuman.sherpa
|
|||||||
|
|
||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
import com.k2fsa.sherpa.onnx.OfflineModelConfig
|
|
||||||
import com.k2fsa.sherpa.onnx.OfflineRecognizer
|
|
||||||
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
|
|
||||||
import com.k2fsa.sherpa.onnx.OfflineWenetCtcModelConfig
|
|
||||||
import com.k2fsa.sherpa.onnx.getFeatureConfig
|
|
||||||
import java.util.ArrayDeque
|
import java.util.ArrayDeque
|
||||||
|
|
||||||
class VoiceController(
|
class VoiceController(
|
||||||
assetManager: AssetManager,
|
assetManager: AssetManager,
|
||||||
private val onWakeup: () -> Unit,
|
private val onWakeup: () -> Unit,
|
||||||
private val onFinalAudio: (FloatArray, String) -> Unit, // 修改:传回识别结果文本
|
private val onFinalAudio: (FloatArray) -> Unit, // 修改:传回识别结果文本
|
||||||
private val idleTimeoutSeconds: Int = 10,
|
private val idleTimeoutSeconds: Int = 10,
|
||||||
private val maxRecordingSeconds: Int = 10,
|
private val maxRecordingSeconds: Int = 10,
|
||||||
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
private val onStateChanged: ((VoiceState) -> Unit)? = null,
|
||||||
@ -59,57 +54,6 @@ class VoiceController(
|
|||||||
onSpeechEnd = { avgEnergy, peakRms -> onVadEnd(avgEnergy, peakRms) }
|
onSpeechEnd = { avgEnergy, peakRms -> onVadEnd(avgEnergy, peakRms) }
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
// 初始化非流式 ASR
|
|
||||||
private var offlineRecognizer: OfflineRecognizer? = null
|
|
||||||
|
|
||||||
init {
|
|
||||||
offlineRecognizer = initOfflineRecognizer(assetManager)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun initOfflineRecognizer(assetManager: AssetManager): OfflineRecognizer {
|
|
||||||
// Wenetspeech 模型配置
|
|
||||||
val wenetConfig = OfflineWenetCtcModelConfig(
|
|
||||||
model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx"
|
|
||||||
)
|
|
||||||
|
|
||||||
val modelConfig = OfflineModelConfig(
|
|
||||||
wenetCtc = wenetConfig,
|
|
||||||
tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt",
|
|
||||||
numThreads = 4,
|
|
||||||
debug = true
|
|
||||||
)
|
|
||||||
|
|
||||||
val featConfig = getFeatureConfig(sampleRate = sampleRate, featureDim = 80)
|
|
||||||
|
|
||||||
return OfflineRecognizer(
|
|
||||||
assetManager = assetManager,
|
|
||||||
config = OfflineRecognizerConfig(
|
|
||||||
featConfig = featConfig,
|
|
||||||
modelConfig = modelConfig
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// runSecondPass 里直接用 offlineRecognizer
|
|
||||||
private fun runSecondPass(samples: FloatArray): String {
|
|
||||||
return try {
|
|
||||||
offlineRecognizer?.let {
|
|
||||||
val stream = it.createStream()
|
|
||||||
stream.acceptWaveform(samples, sampleRate)
|
|
||||||
it.decode(stream)
|
|
||||||
val result = it.getResult(stream)
|
|
||||||
stream.release()
|
|
||||||
result.text
|
|
||||||
} ?: ""
|
|
||||||
|
|
||||||
} catch (e: Exception) {
|
|
||||||
Log.e(TAG, "Error during ASR processing: ${e.message}")
|
|
||||||
""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* ================= 音频入口 ================= */
|
/* ================= 音频入口 ================= */
|
||||||
fun acceptAudio(samples: FloatArray) {
|
fun acceptAudio(samples: FloatArray) {
|
||||||
cachePreBuffer(samples)
|
cachePreBuffer(samples)
|
||||||
@ -228,24 +172,6 @@ class VoiceController(
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// 检查音频数据是否有效
|
|
||||||
if (audio.isEmpty()) {
|
|
||||||
Log.d(TAG, "❌ Audio is empty")
|
|
||||||
resetToWaitSpeech()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var asrText = runSecondPass(audio)
|
|
||||||
|
|
||||||
if (asrText.isEmpty()) {
|
|
||||||
resetToWaitSpeech()
|
|
||||||
Log.d(TAG, "识别不出来asr")
|
|
||||||
return
|
|
||||||
} else {
|
|
||||||
Log.d(TAG, "识别出来:${asrText}")
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// 评分逻辑
|
// 评分逻辑
|
||||||
var score = 0
|
var score = 0
|
||||||
when {
|
when {
|
||||||
@ -276,7 +202,7 @@ class VoiceController(
|
|||||||
|
|
||||||
audioBuffer.clear()
|
audioBuffer.clear()
|
||||||
state = VoiceState.UPLOADING
|
state = VoiceState.UPLOADING
|
||||||
onFinalAudio(audio, asrText) // 传递音频和识别文本
|
onFinalAudio(audio) // 传递音频和识别文本
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= 播放回调 ================= */
|
/* ================= 播放回调 ================= */
|
||||||
@ -330,7 +256,6 @@ class VoiceController(
|
|||||||
fun release() {
|
fun release() {
|
||||||
wakeupManager.release()
|
wakeupManager.release()
|
||||||
vadManager.reset()
|
vadManager.reset()
|
||||||
offlineRecognizer?.release()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ================= Utils ================= */
|
/* ================= Utils ================= */
|
||||||
|
|||||||
@ -10,6 +10,7 @@ import android.media.AudioFormat
|
|||||||
import android.media.AudioRecord
|
import android.media.AudioRecord
|
||||||
import android.media.MediaRecorder
|
import android.media.MediaRecorder
|
||||||
import android.media.audiofx.AcousticEchoCanceler
|
import android.media.audiofx.AcousticEchoCanceler
|
||||||
|
import android.media.audiofx.NoiseSuppressor
|
||||||
import android.os.Bundle
|
import android.os.Bundle
|
||||||
import android.os.Environment
|
import android.os.Environment
|
||||||
import android.os.IBinder
|
import android.os.IBinder
|
||||||
@ -88,7 +89,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
private var voiceController: VoiceController? = null
|
private var voiceController: VoiceController? = null
|
||||||
private var audioRecord: AudioRecord? = null
|
private var audioRecord: AudioRecord? = null
|
||||||
private var isRecording = false
|
private var isRecording = false
|
||||||
private val audioSource = MediaRecorder.AudioSource.VOICE_COMMUNICATION
|
private val audioSource = MediaRecorder.AudioSource.MIC
|
||||||
private val sampleRateInHz = 16000
|
private val sampleRateInHz = 16000
|
||||||
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
|
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
|
||||||
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
|
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
|
||||||
@ -204,12 +205,12 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
onFinalAudio = { audio, asrText ->
|
onFinalAudio = { audio ->
|
||||||
Log.d("lrsxx", "检测到语音,长度=${audio.size},文本:${asrText}")
|
Log.d("lrsxx", "检测到语音,长度=${audio.size}")
|
||||||
mViewModel?.uploadVoice(
|
// mViewModel?.uploadVoice(
|
||||||
AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
// AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
|
||||||
1
|
// 1
|
||||||
)
|
// )
|
||||||
loadLocalJsonAndPlay()
|
loadLocalJsonAndPlay()
|
||||||
val file = File(
|
val file = File(
|
||||||
getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
||||||
@ -267,7 +268,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
lifecycleScope.launch(Dispatchers.IO) {
|
lifecycleScope.launch(Dispatchers.IO) {
|
||||||
// UnityPlayerHolder.getInstance()
|
// UnityPlayerHolder.getInstance()
|
||||||
// .startTalking(msg.content)
|
// .startTalking(msg.content)
|
||||||
loadLocalJsonAndPlay()
|
// loadLocalJsonAndPlay()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -326,6 +327,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
enableSystemAec(audioRecord!!)
|
enableSystemAec(audioRecord!!)
|
||||||
|
enableSystemNs(audioRecord!!)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -343,8 +345,23 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
|
|
||||||
Log.d("VoiceService", "✅ System AEC enabled")
|
Log.d("VoiceService", "✅ System AEC enabled")
|
||||||
}
|
}
|
||||||
|
private var noiseSuppressor: NoiseSuppressor? = null
|
||||||
|
|
||||||
|
|
||||||
|
private fun enableSystemNs(record: AudioRecord) {
|
||||||
|
if (!NoiseSuppressor.isAvailable()) {
|
||||||
|
Log.w("VoiceService", "System NS not available")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
noiseSuppressor = NoiseSuppressor.create(record.audioSessionId)
|
||||||
|
noiseSuppressor?.enabled = true
|
||||||
|
|
||||||
|
Log.d("VoiceService", "✅ System NoiseSuppressor enabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// private val audioBuffer = mutableListOf<Float>()
|
||||||
//开始录音
|
//开始录音
|
||||||
private fun startRecording() {
|
private fun startRecording() {
|
||||||
if (isRecording) return
|
if (isRecording) return
|
||||||
@ -366,7 +383,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
if (read > 0) {
|
if (read > 0) {
|
||||||
// 避免每次都 new FloatArray,复用
|
// 避免每次都 new FloatArray,复用
|
||||||
for (i in 0 until read) floatBuf[i] = buf[i] / 32768f
|
for (i in 0 until read) floatBuf[i] = buf[i] / 32768f
|
||||||
voiceController?.acceptAudio(floatBuf.copyOf(read))
|
val gg = floatBuf.copyOf(read)
|
||||||
|
voiceController?.acceptAudio(gg)
|
||||||
|
// audioBuffer.addAll(gg.asList())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -393,6 +412,13 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
|
|||||||
release()
|
release()
|
||||||
audioRecord = null
|
audioRecord = null
|
||||||
}
|
}
|
||||||
|
// val file = File(
|
||||||
|
// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
|
||||||
|
// "xxx.wav"
|
||||||
|
// )
|
||||||
|
// AudioDebugUtil.saveFloatPcmAsWav(audioBuffer.toFloatArray(), file)
|
||||||
|
// LogUtils.dTag("audioxx", "WAV saved: ${file.path}")
|
||||||
|
// audioBuffer.clear()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user