diff --git a/app/src/main/java/com/zs/smarthuman/http/ApiService.kt b/app/src/main/java/com/zs/smarthuman/http/ApiService.kt index de2d9e6..3af1097 100644 --- a/app/src/main/java/com/zs/smarthuman/http/ApiService.kt +++ b/app/src/main/java/com/zs/smarthuman/http/ApiService.kt @@ -13,7 +13,7 @@ class ApiService { const val GET_USER_INFO_URL = "iot/info/getUserInfo" - const val UPLOAD_RECORD_VOICE_URL = "iot/chat" + const val UPLOAD_RECORD_VOICE_URL = "iot/chat/stream" const val VERSION_UPDATE_URL = "iot/info/getLatestVersion" } diff --git a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt index e61512e..e6faeac 100644 --- a/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt +++ b/app/src/main/java/com/zs/smarthuman/ui/MainActivity.kt @@ -38,6 +38,7 @@ import com.zs.smarthuman.R import com.zs.smarthuman.base.BaseActivity import com.zs.smarthuman.base.BaseViewModelActivity import com.zs.smarthuman.bean.AudioDTO +import com.zs.smarthuman.bean.LmChatDTO import com.zs.smarthuman.bean.NetworkStatusEventMsg import com.zs.smarthuman.bean.UserInfoResp import com.zs.smarthuman.bean.VersionUpdateResp @@ -45,6 +46,7 @@ import com.zs.smarthuman.bean.VoiceBeanResp import com.zs.smarthuman.common.UserInfoManager import com.zs.smarthuman.databinding.ActivityMainBinding import com.zs.smarthuman.http.ApiResult +import com.zs.smarthuman.http.ApiService import com.zs.smarthuman.im.chat.MessageContentType import com.zs.smarthuman.im.chat.bean.SingleMessage import com.zs.smarthuman.kt.releaseIM @@ -52,7 +54,6 @@ import com.zs.smarthuman.sherpa.TimeoutType import com.zs.smarthuman.sherpa.VoiceController import com.zs.smarthuman.toast.Toaster import com.zs.smarthuman.utils.AudioDebugUtil -import com.zs.smarthuman.utils.AudioPcmUtil import com.zs.smarthuman.utils.DangerousUtils import com.zs.smarthuman.utils.LogFileUtils import com.zs.smarthuman.utils.PcmStreamPlayer @@ -68,6 +69,13 @@ import kotlinx.coroutines.Job import kotlinx.coroutines.delay import kotlinx.coroutines.flow.catch import kotlinx.coroutines.launch +import okhttp3.Request +import okhttp3.Response +import okhttp3.sse.EventSource +import okhttp3.sse.EventSourceListener +import okhttp3.sse.EventSources +import okhttp3.sse.EventSources.createFactory +import rxhttp.RxHttpPlugins import rxhttp.toDownloadFlow import rxhttp.wrapper.param.RxHttp import java.io.File @@ -82,7 +90,7 @@ class MainActivity : BaseViewModelActivity() private var voiceController: VoiceController? = null private var audioRecord: AudioRecord? = null private var isRecording = false - private val audioSource = MediaRecorder.AudioSource.VOICE_COMMUNICATION + private val audioSource = MediaRecorder.AudioSource.MIC private val sampleRateInHz = 16000 private val channelConfig = AudioFormat.CHANNEL_IN_MONO private val audioFormat = AudioFormat.ENCODING_PCM_16BIT @@ -93,6 +101,9 @@ class MainActivity : BaseViewModelActivity() private val PLAY_WAIT_TIMEOUT_MS = 2000L // 统一2秒超时阈值 private var startPlayTimeoutJob: Job? = null // 统一管理所有播放场景的超时Job + private var mEventSources: EventSource? = null + private var isManualCancel = false + override fun getViewBinding(): ActivityMainBinding = ActivityMainBinding.inflate(layoutInflater) override fun initView() { UnityPlayerHolder.getInstance().initialize(this) @@ -193,6 +204,8 @@ class MainActivity : BaseViewModelActivity() voiceController = VoiceController( assetManager = assets, onWakeup = { + cancelSSE() + voicePlayer?.onWakeupStop() UnityPlayerHolder.getInstance().cancelPCM() UnityPlayerHolder.getInstance() .sendVoiceToUnity( @@ -206,10 +219,12 @@ class MainActivity : BaseViewModelActivity() } }, onFinalAudio = { audio -> - mViewModel?.uploadVoice( - AudioPcmUtil.pcm16ToBase64(AudioPcmUtil.floatToPcm16(audio)), - 1 - ) + sendRecordVoiceToServer(AudioPcmUtil.floatToPcm16Base64(audio)) +// mViewModel?.uploadVoice( +// +// AudioPcmUtil.floatToPcm16Base64(audio), +// 1 +// ) // loadLocalJsonAndPlay() // val file = File( // getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)!!.getAbsolutePath(), @@ -217,10 +232,10 @@ class MainActivity : BaseViewModelActivity() // ) // AudioDebugUtil.saveFloatPcmAsWav(audio, file) // LogUtils.dTag("audioxx", "WAV saved: ${file.path}, samples=${audio.size}") - lifecycleScope.launch(Dispatchers.Main) { - - mVerticalAnimator?.show() - } +// lifecycleScope.launch(Dispatchers.Main) { +// +// mVerticalAnimator?.show() +// } }, onStateChanged = { state -> @@ -246,9 +261,10 @@ class MainActivity : BaseViewModelActivity() } ) } + private val voicePlayer = VoiceStreamPlayer().apply { onPlayStart = { id -> - LogUtils.d("🎵 开始播放 audioId=$id") + LogUtils.dTag("lrsxxx", "🎵 开始播放 audioId=$id") startPlayTimeoutJob?.cancel() voiceController?.onPlayStartBackend() } @@ -257,15 +273,16 @@ class MainActivity : BaseViewModelActivity() voiceController?.onPlayEndBackend() } + } + override fun receivedIMMsg(msg: SingleMessage) { when (msg.msgContentType) { MessageContentType.RECEIVE_VOICE_STREAM.msgContentType -> { - lifecycleScope.launch(Dispatchers.IO) { - +// UnityPlayerHolder.getInstance().startTalking(msg.content) val audioDTO = GsonUtils.fromJson(msg.content, AudioDTO::class.java) - voicePlayer.onAudioDTO(audioDTO) +// voicePlayer.onAudioDTO(audioDTO) } } } @@ -437,7 +454,7 @@ class MainActivity : BaseViewModelActivity() word: String, audioUrl: String ) { - LogUtils.eTag("lrs","onAudioProgressUpdated:${state}") + LogUtils.eTag("lrs", "onAudioProgressUpdated:${state}") val wakeupUrl = UserInfoManager.userInfo?.wakeUpAudioUrl if (audioUrl != wakeupUrl) return @@ -467,7 +484,7 @@ class MainActivity : BaseViewModelActivity() state: Int,//0stop 2pause 1play 3complete 4loading 5error text: String ) { - LogUtils.eTag("lrs","onStreamAudioProgressUpdated:${state}") + LogUtils.eTag("lrs", "onStreamAudioProgressUpdated:${state}") when (state) { 1 -> { if (!backPlaying) { @@ -543,6 +560,77 @@ class MainActivity : BaseViewModelActivity() } + private fun sendRecordVoiceToServer(audio: String) { + cancelSSE() + val request: Request? = RxHttp.postJson(ApiService.UPLOAD_RECORD_VOICE_URL) + .add("audio",audio) + .buildRequest() + + request?.let { + // 重置手动取消标记 + isManualCancel = false + + mEventSources = createFactory(RxHttpPlugins.getOkHttpClient()) + .newEventSource(it, object : EventSourceListener() { + override fun onOpen(eventSource: EventSource, response: Response) { + super.onOpen(eventSource, response) + LogUtils.eTag("lrsxxx", "SSE连接成功:${response.code}") + } + + override fun onEvent( + eventSource: EventSource, + id: String?, + type: String?, + data: String + ) { + super.onEvent(eventSource, id, type, data) + LogUtils.eTag("lrsxxx", "onEvent:${data}") + runCatching { +// val audioDTO = GsonUtils.fromJson(data, LmChatDTO::class.java) +// voicePlayer.handleSlice(audioDTO) + UnityPlayerHolder.getInstance().startTalking(data) + }.onFailure { + LogUtils.eTag("lrsxxx", "解析音频数据失败", it) + voiceController?.onUploadFinished(false) + } + } + + override fun onFailure( + eventSource: EventSource, + t: Throwable?, + response: Response? + ) { + super.onFailure(eventSource, t, response) + // 关键修复2:忽略手动取消导致的异常 + if (isManualCancel) { + LogUtils.eTag("lrsxxx", "SSE手动取消,忽略失败回调") + return + } + + // 正常失败逻辑 + val errorMsg = t?.message ?: response?.message ?: "未知错误" + voiceController?.onUploadFinished(false) + } + + override fun onClosed(eventSource: EventSource) { + super.onClosed(eventSource) + // 关键修复3:区分手动取消和正常关闭 + val isSuccess = !isManualCancel + // 关键修复4:关闭后置空引用,避免内存泄漏 + mEventSources = null + } + }) + } + } + + + private fun cancelSSE() { + isManualCancel = true + mEventSources?.cancel() + mEventSources = null + } + + override fun onDestroy() { super.onDestroy() stopRecording() diff --git a/app/src/main/java/com/zs/smarthuman/utils/AudioPcmUtil.kt b/app/src/main/java/com/zs/smarthuman/utils/AudioPcmUtil.kt index 1b006d2..04c03f2 100644 --- a/app/src/main/java/com/zs/smarthuman/utils/AudioPcmUtil.kt +++ b/app/src/main/java/com/zs/smarthuman/utils/AudioPcmUtil.kt @@ -1,31 +1,52 @@ -package com.zs.smarthuman.utils - -import java.nio.ByteBuffer -import java.nio.ByteOrder - -/** - * @description: - * @author: lrs - * @date: 2025/12/19 15:42 - */ object AudioPcmUtil { + // 常量抽离,避免魔法值,提升可读性 + private const val PCM16_MAX = 32767 + private const val PCM16_MIN = -32768 + private const val BASE64_FLAGS = android.util.Base64.NO_WRAP + + /** + * Float音频转16位PCM字节数组(优化版) + * 优化点:减少内存分配、提升循环效率、避免多余装箱 + */ fun floatToPcm16(floatArray: FloatArray): ByteArray { - val buffer = ByteBuffer.allocate(floatArray.size * 2) - .order(ByteOrder.LITTLE_ENDIAN) + // 空值快速返回,避免空指针 + if (floatArray.isEmpty()) return ByteArray(0) + val byteArray = ByteArray(floatArray.size * 2) + var byteIndex = 0 + + // 直接操作字节数组,避免ByteBuffer的额外开销 for (f in floatArray) { - val v = (f.coerceIn(-1f, 1f) * 32767).toInt().toShort() - buffer.putShort(v) + // 1. 范围限制(-1~1),转换为16位整型值 + val intValue = (f.coerceIn(-1f, 1f) * PCM16_MAX).toInt() + // 2. 确保值在16位有符号整数范围内(防止溢出) + val shortValue = intValue.coerceIn(PCM16_MIN, PCM16_MAX).toShort() + + // 3. 小端序写入字节数组(替代ByteBuffer,减少内存拷贝) + byteArray[byteIndex++] = shortValue.toByte() // 低字节 + byteArray[byteIndex++] = (shortValue.toInt() shr 8).toByte() // 高字节 } - return buffer.array() + return byteArray } - + /** + * 16位PCM转Base64字符串(优化版) + * 优化点:空值保护、常量抽离、精简逻辑 + */ fun pcm16ToBase64(pcm: ByteArray): String { - return android.util.Base64.encodeToString( - pcm, - android.util.Base64.NO_WRAP - ) + // 空值快速返回,避免无效转换 + if (pcm.isEmpty()) return "" + return android.util.Base64.encodeToString(pcm, BASE64_FLAGS) } -} + + // ========== 可选扩展:批量转换(进一步提升性能) ========== + /** + * 批量转换float数组为Base64编码的PCM(减少中间变量) + * 场景:直接将音频数据转Base64传输,跳过单独保存PCM字节数组 + */ + fun floatToPcm16Base64(floatArray: FloatArray): String { + if (floatArray.isEmpty()) return "" + return pcm16ToBase64(floatToPcm16(floatArray)) + } +} \ No newline at end of file diff --git a/app/src/main/java/com/zs/smarthuman/utils/VoiceStreamPlayer.kt b/app/src/main/java/com/zs/smarthuman/utils/VoiceStreamPlayer.kt index d20ee0a..f126c31 100644 --- a/app/src/main/java/com/zs/smarthuman/utils/VoiceStreamPlayer.kt +++ b/app/src/main/java/com/zs/smarthuman/utils/VoiceStreamPlayer.kt @@ -1,101 +1,272 @@ package com.zs.smarthuman.utils - import android.util.Base64 import com.zs.smarthuman.bean.AudioDTO import com.zs.smarthuman.bean.LmChatDTO import kotlinx.coroutines.* import java.util.* +class VoiceStreamPlayer { - - -// ====================== Voice 流播放器 ====================== -class VoiceStreamPlayer( - private val sampleRate: Int = 24000 -) { var onPlayStart: ((audioId: Int) -> Unit)? = null var onPlayEnd: ((audioId: Int) -> Unit)? = null - private val scope = CoroutineScope(SupervisorJob() + Dispatchers.IO) - private var currentAudioId: Int? = null - private val pcmPlayer: PcmStreamPlayer by lazy { PcmStreamPlayer(sampleRate) } + // 新增:使用可取消的协程上下文,绑定生命周期 + private val mainScope = CoroutineScope(SupervisorJob() + Dispatchers.IO) + private var playEndJob: Job? = null // 单独管理播放结束的协程 + private var currentAudioId: Int? = null + private var pcmPlayer: PcmStreamPlayer? = null + + /** sortId -> pcm */ private val sliceBuffer = TreeMap() private var nextSortId = 1 + private var inputFinished = false private var firstPlayTriggered = false - private var bufferedBytes = 0 private var playEndLaunched = false - private val PREBUFFER_BYTES = (sampleRate * 2 * 250 / 1000) // 250ms + /** 预缓冲字节数(首包) */ + private var prebufferBytes = 0 - fun onAudioDTO(dto: AudioDTO) { - scope.launch { - dto.items.forEach { slice -> - handleSlice(slice) - } + /** ========= 首包合并 ========= */ + private val firstChunkList = ArrayList() + private var firstChunkBytes = 0 + + /** ========= 运行时短 slice 合并(关键) ========= */ + private val runningMergeList = ArrayList() + private var runningMergeBytes = 0 + private var minWriteBytes = 0 // ≥60ms 才写 AudioTrack + + // ========== 对外暴露:唤醒时调用此方法,彻底停止当前播放 ========== + fun onWakeupStop() { + // 1. 取消播放结束的协程,防止空指针 + playEndJob?.cancel() + // 2. 强制停止PCM播放器(空安全) + pcmPlayer?.forceStop() + // 3. 清空所有切片缓存 + synchronized(sliceBuffer) { + sliceBuffer.clear() } + // 4. 清空合并列表 + synchronized(firstChunkList) { + firstChunkList.clear() + } + synchronized(runningMergeList) { + runningMergeList.clear() + } + // 5. 重置所有状态变量 + firstChunkBytes = 0 + runningMergeBytes = 0 + nextSortId = 1 + inputFinished = false + firstPlayTriggered = false + playEndLaunched = false + // 6. 重置当前音频ID + currentAudioId = null + println("VoiceStreamPlayer: 唤醒触发,已彻底停止当前音频播放") } - private fun handleSlice(slice: LmChatDTO) { +// fun onAudioDTO(dto: AudioDTO) { +// // 空安全检查:防止dto为空 +// dto ?: return +// +// // 只在第一次初始化 +// if (pcmPlayer == null) { +// val sampleRate = if (dto.samplingRate > 0) dto.samplingRate else 24000 +// pcmPlayer = PcmStreamPlayer(sampleRate) +// +// prebufferBytes = sampleRate * 2 * 400 / 1000 // 150ms 首包 +// minWriteBytes = sampleRate * 2 * 80 / 1000 // 60ms 最小写入 +// } +// +// mainScope.launch { +// dto.items?.forEach { item -> +// item ?: return@forEach +// handleSlice(item) +// } +// } +// } + + + + fun handleSlice(slice: LmChatDTO) { + // 空安全检查 + slice ?: return + if (pcmPlayer == null) { + val sampleRate = 24000 + pcmPlayer = PcmStreamPlayer(sampleRate) + + prebufferBytes = sampleRate * 2 * 150 / 1000 // 150ms 首包 + minWriteBytes = sampleRate * 2 * 60 / 1000 // 60ms 最小写入 + } + // 如果是新音频ID,先停止当前播放 if (currentAudioId != slice.id) { startNewAudio(slice.id) } slice.audioData?.takeIf { it.isNotBlank() }?.let { - val pcm = Base64.decode(it, Base64.DEFAULT) - sliceBuffer[slice.sortId] = pcm + synchronized(sliceBuffer) { + sliceBuffer[slice.sortId] = Base64.decode(it, Base64.DEFAULT) + } } - if (slice.isFinal) inputFinished = true + if (slice.isFinal) { + inputFinished = true + } flushBufferIfPossible() } private fun startNewAudio(audioId: Int) { - currentAudioId = audioId - sliceBuffer.clear() + // 1. 取消旧的播放结束协程 + playEndJob?.cancel() + // 2. 切换新音频时,先彻底停止旧音频(空安全) + pcmPlayer?.forceStop() + // 3. 重置所有状态 + synchronized(sliceBuffer) { + sliceBuffer.clear() + } nextSortId = 1 - bufferedBytes = 0 + inputFinished = false firstPlayTriggered = false playEndLaunched = false - inputFinished = false - pcmPlayer.clearQueue() + synchronized(firstChunkList) { + firstChunkList.clear() + } + firstChunkBytes = 0 + synchronized(runningMergeList) { + runningMergeList.clear() + } + runningMergeBytes = 0 + // 4. 重启播放器,准备播放新音频(空安全) + pcmPlayer?.restart() + + currentAudioId = audioId } private fun flushBufferIfPossible() { + // 空安全检查:防止pcmPlayer为空时执行后续逻辑 + val player = pcmPlayer ?: return + while (true) { - val pcm = sliceBuffer[nextSortId] ?: break - bufferedBytes += pcm.size - sliceBuffer.remove(nextSortId) + val pcm = synchronized(sliceBuffer) { + val data = sliceBuffer[nextSortId] + if (data != null) { + sliceBuffer.remove(nextSortId) + } + data + } ?: break nextSortId++ - if (!firstPlayTriggered && bufferedBytes >= PREBUFFER_BYTES) { - firstPlayTriggered = true - onPlayStart?.invoke(currentAudioId!!) - } + if (!firstPlayTriggered) { + // ===== 首包阶段 ===== + synchronized(firstChunkList) { + firstChunkList.add(pcm) + firstChunkBytes += pcm.size + } - if (firstPlayTriggered) { - pcmPlayer.pushPcm(pcm) + if (firstChunkBytes >= prebufferBytes) { + firstPlayTriggered = true + // 空安全:currentAudioId不为空才回调 + currentAudioId?.let { audioId -> + onPlayStart?.invoke(audioId) + } + + val merged = synchronized(firstChunkList) { + merge(firstChunkList, firstChunkBytes) + } + player.pushPcm(merged) + + synchronized(firstChunkList) { + firstChunkList.clear() + } + } + } else { + // ===== 运行时动态合并(核心) ===== + synchronized(runningMergeList) { + runningMergeList.add(pcm) + runningMergeBytes += pcm.size + } + + if (runningMergeBytes >= minWriteBytes) { + val merged = synchronized(runningMergeList) { + merge(runningMergeList, runningMergeBytes) + } + player.pushPcm(merged) + + synchronized(runningMergeList) { + runningMergeList.clear() + } + runningMergeBytes = 0 + } } } - // 收尾,只启动一次协程监控播放完成 - if (inputFinished && sliceBuffer.isEmpty() && firstPlayTriggered && !playEndLaunched) { + // ===== 结束收尾 ===== + if (inputFinished && synchronized(sliceBuffer) { sliceBuffer.isEmpty() } && firstPlayTriggered && !playEndLaunched) { playEndLaunched = true - scope.launch { - while (!pcmPlayer.queueEmpty()) { + + // 把剩余的短 slice 一次性 flush + if (runningMergeBytes > 0) { + val merged = synchronized(runningMergeList) { + merge(runningMergeList, runningMergeBytes) + } + player.pushPcm(merged) + + synchronized(runningMergeList) { + runningMergeList.clear() + } + runningMergeBytes = 0 + } + + // 关键修复:单独管理这个协程,可取消,且增加空安全检查 + playEndJob = mainScope.launch { + // 循环检查前先判断player是否还存在 + while (player.queueEmpty().not() && currentAudioId != null) { delay(10) } - onPlayEnd?.invoke(currentAudioId!!) + // 空安全:currentAudioId不为空才回调 + currentAudioId?.let { audioId -> + onPlayEnd?.invoke(audioId) + } + // 回调后重置状态 + playEndLaunched = false } } } + private fun merge(list: List, totalBytes: Int): ByteArray { + val merged = ByteArray(totalBytes) + var offset = 0 + list.forEach { byteArray -> + byteArray ?: return@forEach + System.arraycopy(byteArray, 0, merged, offset, byteArray.size) + offset += byteArray.size + } + return merged + } + fun release() { - pcmPlayer.release() - scope.cancel() + // 1. 取消所有协程 + mainScope.cancel() + playEndJob?.cancel() + // 2. 释放播放器 + pcmPlayer?.release() + pcmPlayer = null + // 3. 清空所有状态 + currentAudioId = null + synchronized(sliceBuffer) { + sliceBuffer.clear() + } + synchronized(firstChunkList) { + firstChunkList.clear() + } + synchronized(runningMergeList) { + runningMergeList.clear() + } + // 4. 重置回调 + onPlayStart = null + onPlayEnd = null } -} - +} \ No newline at end of file diff --git a/app/src/main/java/com/zs/smarthuman/viewmodel/MainViewModel.kt b/app/src/main/java/com/zs/smarthuman/viewmodel/MainViewModel.kt index fa35fe6..1acc217 100644 --- a/app/src/main/java/com/zs/smarthuman/viewmodel/MainViewModel.kt +++ b/app/src/main/java/com/zs/smarthuman/viewmodel/MainViewModel.kt @@ -10,8 +10,6 @@ import com.zs.smarthuman.bean.VoiceBeanResp import com.zs.smarthuman.http.ApiLiveData import com.zs.smarthuman.http.ApiResult import com.zs.smarthuman.http.ApiService -import com.zs.smarthuman.utils.AudioPcmUtil -import com.zs.smarthuman.utils.UnityPlayerHolder import rxhttp.awaitResult import rxhttp.wrapper.param.RxHttp import rxhttp.wrapper.param.toAwaitResponse @@ -41,9 +39,9 @@ class MainViewModel: BaseViewModel() { RxHttp.postJson(ApiService.UPLOAD_RECORD_VOICE_URL) .add("sessionCode",sessionCode) .add("audio", audioVoice) - .readTimeout(5000L) - .writeTimeout(5000L) - .connectTimeout(5000L) + .readTimeout(3000L) + .writeTimeout(3000L) + .connectTimeout(3000L) .toAwaitResponse() .awaitResult() .getOrThrow()