临时提交可以播放pcm的类

This commit is contained in:
林若思 2026-01-15 20:01:22 +08:00
parent d47362ca38
commit cdc189c5f4
5 changed files with 365 additions and 87 deletions

View File

@ -13,7 +13,7 @@ class ApiService {
const val GET_USER_INFO_URL = "iot/info/getUserInfo"
const val UPLOAD_RECORD_VOICE_URL = "iot/chat"
const val UPLOAD_RECORD_VOICE_URL = "iot/chat/stream"
const val VERSION_UPDATE_URL = "iot/info/getLatestVersion"
}

View File

@ -38,6 +38,7 @@ import com.zs.smarthuman.R
import com.zs.smarthuman.base.BaseActivity
import com.zs.smarthuman.base.BaseViewModelActivity
import com.zs.smarthuman.bean.AudioDTO
import com.zs.smarthuman.bean.LmChatDTO
import com.zs.smarthuman.bean.NetworkStatusEventMsg
import com.zs.smarthuman.bean.UserInfoResp
import com.zs.smarthuman.bean.VersionUpdateResp
@ -45,6 +46,7 @@ import com.zs.smarthuman.bean.VoiceBeanResp
import com.zs.smarthuman.common.UserInfoManager
import com.zs.smarthuman.databinding.ActivityMainBinding
import com.zs.smarthuman.http.ApiResult
import com.zs.smarthuman.http.ApiService
import com.zs.smarthuman.im.chat.MessageContentType
import com.zs.smarthuman.im.chat.bean.SingleMessage
import com.zs.smarthuman.kt.releaseIM
@ -52,7 +54,6 @@ import com.zs.smarthuman.sherpa.TimeoutType
import com.zs.smarthuman.sherpa.VoiceController
import com.zs.smarthuman.toast.Toaster
import com.zs.smarthuman.utils.AudioDebugUtil
import com.zs.smarthuman.utils.AudioPcmUtil
import com.zs.smarthuman.utils.DangerousUtils
import com.zs.smarthuman.utils.LogFileUtils
import com.zs.smarthuman.utils.PcmStreamPlayer
@ -68,6 +69,13 @@ import kotlinx.coroutines.Job
import kotlinx.coroutines.delay
import kotlinx.coroutines.flow.catch
import kotlinx.coroutines.launch
import okhttp3.Request
import okhttp3.Response
import okhttp3.sse.EventSource
import okhttp3.sse.EventSourceListener
import okhttp3.sse.EventSources
import okhttp3.sse.EventSources.createFactory
import rxhttp.RxHttpPlugins
import rxhttp.toDownloadFlow
import rxhttp.wrapper.param.RxHttp
import java.io.File
@ -82,7 +90,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
private var voiceController: VoiceController? = null
private var audioRecord: AudioRecord? = null
private var isRecording = false
private val audioSource = MediaRecorder.AudioSource.VOICE_COMMUNICATION
private val audioSource = MediaRecorder.AudioSource.MIC
private val sampleRateInHz = 16000
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
@ -93,6 +101,9 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
private val PLAY_WAIT_TIMEOUT_MS = 2000L // 统一2秒超时阈值
private var startPlayTimeoutJob: Job? = null // 统一管理所有播放场景的超时Job
private var mEventSources: EventSource? = null
private var isManualCancel = false
override fun getViewBinding(): ActivityMainBinding = ActivityMainBinding.inflate(layoutInflater)
override fun initView() {
UnityPlayerHolder.getInstance().initialize(this)
@ -193,6 +204,8 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
voiceController = VoiceController(
assetManager = assets,
onWakeup = {
cancelSSE()
voicePlayer?.onWakeupStop()
UnityPlayerHolder.getInstance().cancelPCM()
UnityPlayerHolder.getInstance()
.sendVoiceToUnity(
@ -206,10 +219,12 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
}
},
onFinalAudio = { audio ->
mViewModel?.uploadVoice(
AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)),
1
)
sendRecordVoiceToServer(AudioPcmUtil.floatToPcm16Base64(audio))
// mViewModel?.uploadVoice(
//
// AudioPcmUtil.floatToPcm16Base64(audio),
// 1
// )
// loadLocalJsonAndPlay()
// val file = File(
// getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(),
@ -217,10 +232,10 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
// )
// AudioDebugUtil.saveFloatPcmAsWav(audio, file)
// LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}")
lifecycleScope.launch(Dispatchers.Main) {
mVerticalAnimator?.show()
}
// lifecycleScope.launch(Dispatchers.Main) {
//
// mVerticalAnimator?.show()
// }
},
onStateChanged = { state ->
@ -246,9 +261,10 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
}
)
}
private val voicePlayer = VoiceStreamPlayer().apply {
onPlayStart = { id ->
LogUtils.d("🎵 开始播放 audioId=$id")
LogUtils.dTag("lrsxxx", "🎵 开始播放 audioId=$id")
startPlayTimeoutJob?.cancel()
voiceController?.onPlayStartBackend()
}
@ -257,15 +273,16 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
voiceController?.onPlayEndBackend()
}
}
override fun receivedIMMsg(msg: SingleMessage) {
when (msg.msgContentType) {
MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> {
lifecycleScope.launch(Dispatchers.IO) {
// UnityPlayerHolder.getInstance().startTalking(msg.content)
val audioDTO = GsonUtils.fromJson(msg.content, AudioDTO::class.java)
voicePlayer.onAudioDTO(audioDTO)
// voicePlayer.onAudioDTO(audioDTO)
}
}
}
@ -437,7 +454,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
word: String,
audioUrl: String
) {
LogUtils.eTag("lrs","onAudioProgressUpdated:${state}")
LogUtils.eTag("lrs", "onAudioProgressUpdated:${state}")
val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl
if (audioUrl != wakeupUrl) return
@ -467,7 +484,7 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
state: Int,//0stop 2pause 1play 3complete 4loading 5error
text: String
) {
LogUtils.eTag("lrs","onStreamAudioProgressUpdated:${state}")
LogUtils.eTag("lrs", "onStreamAudioProgressUpdated:${state}")
when (state) {
1 -> {
if (!backPlaying) {
@ -543,6 +560,77 @@ class MainActivity : BaseViewModelActivity<ActivityMainBinding, MainViewModel>()
}
private fun sendRecordVoiceToServer(audio: String) {
cancelSSE()
val request: Request? = RxHttp.postJson(ApiService.UPLOAD_RECORD_VOICE_URL)
.add("audio",audio)
.buildRequest()
request?.let {
// 重置手动取消标记
isManualCancel = false
mEventSources = createFactory(RxHttpPlugins.getOkHttpClient())
.newEventSource(it, object : EventSourceListener() {
override fun onOpen(eventSource: EventSource, response: Response) {
super.onOpen(eventSource, response)
LogUtils.eTag("lrsxxx", "SSE连接成功${response.code}")
}
override fun onEvent(
eventSource: EventSource,
id: String?,
type: String?,
data: String
) {
super.onEvent(eventSource, id, type, data)
LogUtils.eTag("lrsxxx", "onEvent:${data}")
runCatching {
// val audioDTO = GsonUtils.fromJson(data, LmChatDTO::class.java)
// voicePlayer.handleSlice(audioDTO)
UnityPlayerHolder.getInstance().startTalking(data)
}.onFailure {
LogUtils.eTag("lrsxxx", "解析音频数据失败", it)
voiceController?.onUploadFinished(false)
}
}
override fun onFailure(
eventSource: EventSource,
t: Throwable?,
response: Response?
) {
super.onFailure(eventSource, t, response)
// 关键修复2忽略手动取消导致的异常
if (isManualCancel) {
LogUtils.eTag("lrsxxx", "SSE手动取消忽略失败回调")
return
}
// 正常失败逻辑
val errorMsg = t?.message ?: response?.message ?: "未知错误"
voiceController?.onUploadFinished(false)
}
override fun onClosed(eventSource: EventSource) {
super.onClosed(eventSource)
// 关键修复3区分手动取消和正常关闭
val isSuccess = !isManualCancel
// 关键修复4关闭后置空引用避免内存泄漏
mEventSources = null
}
})
}
}
private fun cancelSSE() {
isManualCancel = true
mEventSources?.cancel()
mEventSources = null
}
override fun onDestroy() {
super.onDestroy()
stopRecording()

View File

@ -1,31 +1,52 @@
package com.zs.smarthuman.utils
import java.nio.ByteBuffer
import java.nio.ByteOrder
/**
* @description:
* @author: lrs
* @date: 2025/12/19 15:42
*/
object AudioPcmUtil {
// 常量抽离,避免魔法值,提升可读性
private const val PCM16_MAX = 32767
private const val PCM16_MIN = -32768
private const val BASE64_FLAGS = android.util.Base64.NO_WRAP
/**
* Float音频转16位PCM字节数组优化版
* 优化点减少内存分配提升循环效率避免多余装箱
*/
fun floatToPcm16(floatArray: FloatArray): ByteArray {
val buffer = ByteBuffer.allocate(floatArray.size * 2)
.order(ByteOrder.LITTLE_ENDIAN)
// 空值快速返回,避免空指针
if (floatArray.isEmpty()) return ByteArray(0)
val byteArray = ByteArray(floatArray.size * 2)
var byteIndex = 0
// 直接操作字节数组避免ByteBuffer的额外开销
for (f in floatArray) {
val v = (f.coerceIn(-1f, 1f) * 32767).toInt().toShort()
buffer.putShort(v)
// 1. 范围限制(-1~1转换为16位整型值
val intValue = (f.coerceIn(-1f, 1f) * PCM16_MAX).toInt()
// 2. 确保值在16位有符号整数范围内防止溢出
val shortValue = intValue.coerceIn(PCM16_MIN, PCM16_MAX).toShort()
// 3. 小端序写入字节数组替代ByteBuffer减少内存拷贝
byteArray[byteIndex++] = shortValue.toByte() // 低字节
byteArray[byteIndex++] = (shortValue.toInt() shr 8).toByte() // 高字节
}
return buffer.array()
return byteArray
}
/**
* 16位PCM转Base64字符串优化版
* 优化点空值保护常量抽离精简逻辑
*/
fun pcm16ToBase64(pcm: ByteArray): String {
return android.util.Base64.encodeToString(
pcm,
android.util.Base64.NO_WRAP
)
// 空值快速返回,避免无效转换
if (pcm.isEmpty()) return ""
return android.util.Base64.encodeToString(pcm, BASE64_FLAGS)
}
// ========== 可选扩展:批量转换(进一步提升性能) ==========
/**
* 批量转换float数组为Base64编码的PCM减少中间变量
* 场景直接将音频数据转Base64传输跳过单独保存PCM字节数组
*/
fun floatToPcm16Base64(floatArray: FloatArray): String {
if (floatArray.isEmpty()) return ""
return pcm16ToBase64(floatToPcm16(floatArray))
}
}

View File

@ -1,101 +1,272 @@
package com.zs.smarthuman.utils
import android.util.Base64
import com.zs.smarthuman.bean.AudioDTO
import com.zs.smarthuman.bean.LmChatDTO
import kotlinx.coroutines.*
import java.util.*
class VoiceStreamPlayer {
// ====================== Voice 流播放器 ======================
class VoiceStreamPlayer(
private val sampleRate: Int = 24000
) {
var onPlayStart: ((audioId: Int) -> Unit)? = null
var onPlayEnd: ((audioId: Int) -> Unit)? = null
private val scope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
private var currentAudioId: Int? = null
private val pcmPlayer: PcmStreamPlayer by lazy { PcmStreamPlayer(sampleRate) }
// 新增:使用可取消的协程上下文,绑定生命周期
private val mainScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
private var playEndJob: Job? = null // 单独管理播放结束的协程
private var currentAudioId: Int? = null
private var pcmPlayer: PcmStreamPlayer? = null
/** sortId -> pcm */
private val sliceBuffer = TreeMap<Int, ByteArray>()
private var nextSortId = 1
private var inputFinished = false
private var firstPlayTriggered = false
private var bufferedBytes = 0
private var playEndLaunched = false
private val PREBUFFER_BYTES = (sampleRate * 2 * 250 / 1000) // 250ms
/** 预缓冲字节数(首包) */
private var prebufferBytes = 0
fun onAudioDTO(dto: AudioDTO) {
scope.launch {
dto.items.forEach { slice ->
handleSlice(slice)
/** ========= 首包合并 ========= */
private val firstChunkList = ArrayList<ByteArray>()
private var firstChunkBytes = 0
/** ========= 运行时短 slice 合并(关键) ========= */
private val runningMergeList = ArrayList<ByteArray>()
private var runningMergeBytes = 0
private var minWriteBytes = 0 // ≥60ms 才写 AudioTrack
// ========== 对外暴露:唤醒时调用此方法,彻底停止当前播放 ==========
fun onWakeupStop() {
// 1. 取消播放结束的协程,防止空指针
playEndJob?.cancel()
// 2. 强制停止PCM播放器空安全
pcmPlayer?.forceStop()
// 3. 清空所有切片缓存
synchronized(sliceBuffer) {
sliceBuffer.clear()
}
// 4. 清空合并列表
synchronized(firstChunkList) {
firstChunkList.clear()
}
synchronized(runningMergeList) {
runningMergeList.clear()
}
// 5. 重置所有状态变量
firstChunkBytes = 0
runningMergeBytes = 0
nextSortId = 1
inputFinished = false
firstPlayTriggered = false
playEndLaunched = false
// 6. 重置当前音频ID
currentAudioId = null
println("VoiceStreamPlayer: 唤醒触发,已彻底停止当前音频播放")
}
private fun handleSlice(slice: LmChatDTO) {
// fun onAudioDTO(dto: AudioDTO) {
// // 空安全检查防止dto为空
// dto ?: return
//
// // 只在第一次初始化
// if (pcmPlayer == null) {
// val sampleRate = if (dto.samplingRate > 0) dto.samplingRate else 24000
// pcmPlayer = PcmStreamPlayer(sampleRate)
//
// prebufferBytes = sampleRate * 2 * 400 / 1000 // 150ms 首包
// minWriteBytes = sampleRate * 2 * 80 / 1000 // 60ms 最小写入
// }
//
// mainScope.launch {
// dto.items?.forEach { item ->
// item ?: return@forEach
// handleSlice(item)
// }
// }
// }
fun handleSlice(slice: LmChatDTO) {
// 空安全检查
slice ?: return
if (pcmPlayer == null) {
val sampleRate = 24000
pcmPlayer = PcmStreamPlayer(sampleRate)
prebufferBytes = sampleRate * 2 * 150 / 1000 // 150ms 首包
minWriteBytes = sampleRate * 2 * 60 / 1000 // 60ms 最小写入
}
// 如果是新音频ID先停止当前播放
if (currentAudioId != slice.id) {
startNewAudio(slice.id)
}
slice.audioData?.takeIf { it.isNotBlank() }?.let {
val pcm = Base64.decode(it, Base64.DEFAULT)
sliceBuffer[slice.sortId] = pcm
synchronized(sliceBuffer) {
sliceBuffer[slice.sortId] = Base64.decode(it, Base64.DEFAULT)
}
}
if (slice.isFinal) inputFinished = true
if (slice.isFinal) {
inputFinished = true
}
flushBufferIfPossible()
}
private fun startNewAudio(audioId: Int) {
currentAudioId = audioId
// 1. 取消旧的播放结束协程
playEndJob?.cancel()
// 2. 切换新音频时,先彻底停止旧音频(空安全)
pcmPlayer?.forceStop()
// 3. 重置所有状态
synchronized(sliceBuffer) {
sliceBuffer.clear()
}
nextSortId = 1
bufferedBytes = 0
inputFinished = false
firstPlayTriggered = false
playEndLaunched = false
inputFinished = false
pcmPlayer.clearQueue()
synchronized(firstChunkList) {
firstChunkList.clear()
}
firstChunkBytes = 0
synchronized(runningMergeList) {
runningMergeList.clear()
}
runningMergeBytes = 0
// 4. 重启播放器,准备播放新音频(空安全)
pcmPlayer?.restart()
currentAudioId = audioId
}
private fun flushBufferIfPossible() {
// 空安全检查防止pcmPlayer为空时执行后续逻辑
val player = pcmPlayer ?: return
while (true) {
val pcm = sliceBuffer[nextSortId] ?: break
bufferedBytes += pcm.size
val pcm = synchronized(sliceBuffer) {
val data = sliceBuffer[nextSortId]
if (data != null) {
sliceBuffer.remove(nextSortId)
}
data
} ?: break
nextSortId++
if (!firstPlayTriggered && bufferedBytes >= PREBUFFER_BYTES) {
if (!firstPlayTriggered) {
// ===== 首包阶段 =====
synchronized(firstChunkList) {
firstChunkList.add(pcm)
firstChunkBytes += pcm.size
}
if (firstChunkBytes >= prebufferBytes) {
firstPlayTriggered = true
onPlayStart?.invoke(currentAudioId!!)
// 空安全currentAudioId不为空才回调
currentAudioId?.let { audioId ->
onPlayStart?.invoke(audioId)
}
if (firstPlayTriggered) {
pcmPlayer.pushPcm(pcm)
val merged = synchronized(firstChunkList) {
merge(firstChunkList, firstChunkBytes)
}
player.pushPcm(merged)
synchronized(firstChunkList) {
firstChunkList.clear()
}
}
} else {
// ===== 运行时动态合并(核心) =====
synchronized(runningMergeList) {
runningMergeList.add(pcm)
runningMergeBytes += pcm.size
}
if (runningMergeBytes >= minWriteBytes) {
val merged = synchronized(runningMergeList) {
merge(runningMergeList, runningMergeBytes)
}
player.pushPcm(merged)
synchronized(runningMergeList) {
runningMergeList.clear()
}
runningMergeBytes = 0
}
}
}
// 收尾,只启动一次协程监控播放完成
if (inputFinished && sliceBuffer.isEmpty() && firstPlayTriggered && !playEndLaunched) {
// ===== 结束收尾 =====
if (inputFinished && synchronized(sliceBuffer) { sliceBuffer.isEmpty() } && firstPlayTriggered && !playEndLaunched) {
playEndLaunched = true
scope.launch {
while (!pcmPlayer.queueEmpty()) {
// 把剩余的短 slice 一次性 flush
if (runningMergeBytes > 0) {
val merged = synchronized(runningMergeList) {
merge(runningMergeList, runningMergeBytes)
}
player.pushPcm(merged)
synchronized(runningMergeList) {
runningMergeList.clear()
}
runningMergeBytes = 0
}
// 关键修复:单独管理这个协程,可取消,且增加空安全检查
playEndJob = mainScope.launch {
// 循环检查前先判断player是否还存在
while (player.queueEmpty().not() && currentAudioId != null) {
delay(10)
}
onPlayEnd?.invoke(currentAudioId!!)
// 空安全currentAudioId不为空才回调
currentAudioId?.let { audioId ->
onPlayEnd?.invoke(audioId)
}
// 回调后重置状态
playEndLaunched = false
}
}
}
private fun merge(list: List<ByteArray>, totalBytes: Int): ByteArray {
val merged = ByteArray(totalBytes)
var offset = 0
list.forEach { byteArray ->
byteArray ?: return@forEach
System.arraycopy(byteArray, 0, merged, offset, byteArray.size)
offset += byteArray.size
}
return merged
}
fun release() {
pcmPlayer.release()
scope.cancel()
// 1. 取消所有协程
mainScope.cancel()
playEndJob?.cancel()
// 2. 释放播放器
pcmPlayer?.release()
pcmPlayer = null
// 3. 清空所有状态
currentAudioId = null
synchronized(sliceBuffer) {
sliceBuffer.clear()
}
synchronized(firstChunkList) {
firstChunkList.clear()
}
synchronized(runningMergeList) {
runningMergeList.clear()
}
// 4. 重置回调
onPlayStart = null
onPlayEnd = null
}
}

View File

@ -10,8 +10,6 @@ import com.zs.smarthuman.bean.VoiceBeanResp
import com.zs.smarthuman.http.ApiLiveData
import com.zs.smarthuman.http.ApiResult
import com.zs.smarthuman.http.ApiService
import com.zs.smarthuman.utils.AudioPcmUtil
import com.zs.smarthuman.utils.UnityPlayerHolder
import rxhttp.awaitResult
import rxhttp.wrapper.param.RxHttp
import rxhttp.wrapper.param.toAwaitResponse
@ -41,9 +39,9 @@ class MainViewModel: BaseViewModel() {
RxHttp.postJson(ApiService.UPLOAD_RECORD_VOICE_URL)
.add("sessionCode",sessionCode)
.add("audio", audioVoice)
.readTimeout(5000L)
.writeTimeout(5000L)
.connectTimeout(5000L)
.readTimeout(3000L)
.writeTimeout(3000L)
.connectTimeout(3000L)
.toAwaitResponse<String>()
.awaitResult()
.getOrThrow()