音視頻的格式是一個有歧義的說法。我們熟知的諸如Flv、Mp4、Mov啥的都是包裝格式,可以理解為一種容器,就像一個盒子。里面放到是經過編碼的音視頻數據,而這些音視頻數據都有自己的編碼格式,如AAC、H264、H265等等。 今天要展示的是從直播流中獲取到的音頻編碼數據進行解碼并使用H5的音頻API進行播放的過程。
這些格式分別是
1. speex
2. aac
3. mp3
這些格式都有開源的解碼庫,不過都是c庫,在H5中需要通過emscripten編譯成js執行。
引入頭文件
#ifdef USE_SPEEX#include <speex/speex.h>#endif#ifdef USE_AAC#include "aacDecoder/include/neaacdec.h"// #include "libfdk-aac/libAACdec/include/aacdecoder_lib.h"#endif#ifdef USE_MP3#include "libmad/mad.h"//#include "libid3tag/tag.h"#endif
定義變量
int bufferLength;int bufferFilled;u8 *outputBuffer;#ifdef USE_AAC faacDecHandle faacHandle;#endif#ifdef USE_SPEEX i16 *audioOutput; void *speexState; SpeexBits speexBits;#endif#ifdef USE_MP3 MP3Decoder mp3Decoder;#endif
bufferLength 用于指定緩沖區的長度,bufferFilled用于指示緩沖中沒有使用的數據,outputBuffer用來存放解碼后的數據。 MP3Decoder是自己寫的一個類,需要定義這幾個成員
mad_stream inputStream;mad_frame frame;mad_synth synth;
初始化
outputBuffer = (u8 *)malloc(bufferLength);#ifdef USE_SPEEX audioOutput = (i16 *)malloc(640); auto mode = speex_lib_get_mode(SPEEX_MODEID_WB); speexState = speex_decoder_init(mode); speex_bits_init(&speexBits);#endif#ifdef USE_AAC faacHandle = faacDecOpen();#endif
mp3的初始化
mad_stream_init(&inputStream);mad_frame_init(&frame);mad_synth_init(&synth);
解碼
input對象中包含了經過協議拆包后的原始音頻數據(RTMP協議或Flv格式中的格式)緩沖大小雖然是自己定義,但必須遵循下面的規則
aac:1024的倍數(AAC一幀的播放時間是= 1024 * 1000/44100 = 22.32ms)
speex:320的倍數(320 * 1000/16000 = 20ms)
MP3:576的倍數(雙聲道1152 * 1000 /44100 = 26.122ms)
根據這些數據可以估算緩沖大小引起的音頻的延時,然后需要和視頻的延遲進行同步。
#ifdef USE_SPEEX if (input.length() <= 11) { memset(output, 0, 640); } else { speex_bits_read_from(&speexBits, (const char *)input, 52); speex_decode_int(speexState, &speexBits, audioOutput); memcpy(output, audioOutput, 640); } return 640;#endif#ifdef USE_AAC //0 = AAC sequence header ,1 = AAC raw if (input.readB<1, u8>()) { faacDecFrameInfo frame_info; auto pcm_data = faacDecDecode(faacHandle, &frame_info, (unsigned char *)input.point(), input.length()); if (frame_info.error > 0) { emscripten_log(1, "!!%sn", NeAACDecGetErrorMessage(frame_info.error)); } else { int samplesBytes = frame_info.samples << 1; memcpy(output, pcm_data, samplesBytes); return samplesBytes; } } else { unsigned long samplerate; unsigned char channels; auto config = faacDecGetCurrentConfiguration(faacHandle); config->defObjectType = LTP; faacDecSetConfiguration(faacHandle,config); faacDecInit2(faacHandle, (unsigned char *)input.point(), 4, &samplerate, &channels); emscripten_log(0, "aac samplerate:%d channels:%d", samplerate, channels); }#endif
mp3 比較復雜,這里不貼代碼了,主要是mad庫不能直接調用其提供的API,直播流中的MP3數據和mp3文件的格式有所不同導致。如果本文火的話,我就詳細說明。
C++音視頻開發學習資料:點擊領取→音視頻開發(資料文檔+視頻教程+面試題)(FFmpeg+WebRTC+RTMP+RTSP+HLS+RTP)
釋放資源
#ifdef USE_AAC faacDecClose(faacHandle);#endif#ifdef USE_SPEEX speex_decoder_destroy(speexState); speex_bits_destroy(&speexBits); free(audioOutput);#endif free(outputBuffer);
mp3
mad_synth_finish(&synth);mad_frame_finish(&frame);
播放
創建AudioContext對象
window.AudioContext = window.AudioContext || window.webkitAudioContext;var context = new window.AudioContext();
創建audioBuffer
var audioBuffers = []var audioBuffer = context.createBuffer(channels, frameCount, samplerate);
播放音頻(帶緩沖)
var playNextBuffer = function() { isPlaying = false; if (audioBuffers.length) { playAudio(audioBuffers.shift()); } if (audioBuffers.length > 1) audioBuffers.shift(); //console.log(audioBuffers.length) }; var copyAudioOutputArray = resampled ? function(target) { for (var i = 0; i < allFrameCount; i++) { var j = i << 1; target[j] = target[j + 1] = audioOutputArray[i] / 32768; } } : function(target) { for (var i = 0; i < allFrameCount; i++) { target[i] = audioOutputArray[i] / 32768; } }; var copyToCtxBuffer = channels > 1 ? function(fromBuffer) { for (var channel = 0; channel < channels; channel++) { var nowBuffering = audioBuffer.getChannelData(channel); if (fromBuffer) { for (var i = 0; i < frameCount; i++) { nowBuffering[i] = fromBuffer[i * (channel + 1)]; } } else { for (var i = 0; i < frameCount; i++) { nowBuffering[i] = audioOutputArray[i * (channel + 1)] / 32768; } } } } : function(fromBuffer) { var nowBuffering = audioBuffer.getChannelData(0); if (fromBuffer) nowBuffering.set(fromBuffer); else copyAudioOutputArray(nowBuffering); }; var playAudio = function(fromBuffer) { if (isPlaying) { var buffer = new Float32Array(resampled ? allFrameCount * 2 : allFrameCount); copyAudioOutputArray(buffer); audioBuffers.push(buffer); return; } isPlaying = true; copyToCtxBuffer(fromBuffer); var source = context.createBufferSource(); source.buffer = audioBuffer; source.connect(context.destination); source.onended = playNextBuffer; //setTimeout(playNextBuffer, audioBufferTime-audioBuffers.length*200); source.start(); };
其中playNextBuffer 函數用于從緩沖中取出數據 copyAudioOutputArray 函數用于將音頻數據轉化成浮點數。 copyToCtxBuffer 函數用于將音頻數據拷貝進可以播放的緩沖數組中。 這些函數對單聲道和雙聲道進行了處理
var resampled = samplerate < 22050;
對于頻率小于22khz的數據,我們需要復制一份,模擬成22khz,因為H5只支持大于22khz的數據。