web audio living

总结网页音频直播的方案和遇到的问题。

代码：（github，待整理）

结果：使用opus音频编码，web audio api 播放，可以达到100ms以内延时，高质量，低流量的音频直播。

背景： VDI（虚拟桌面） h264网页版预研，继h264视频直播方案解决之后的又一个对延时有高要求的音频直播方案（交互性，音视频同步）。

前提： flexVDI开源项目对音频的支持只实现了对未编码压缩的PCM音频数据。并且效果不好，要么卡顿，要么延时，流量在2~3Mbps（根据缓冲的大小）。

解决方案：在spice server端对音频采用opus进行编码，flexVDI playback通道拿到opus packet数据后，调用opus js解码库解码成PCM数据，喂给audioContext进行播放。

流程简介：flexVDI palyback通道接收opus音频数据，调用libopus.js解码得到PCM数据，保存到buffer。创建scriptProcessorNode，在onaudioprocess函数中从buffer里面拿到PCM数据，

　　　　　按声道填充outputBuffer，把scriptProcessorNode连接到audioContext.destination进行播放。具体代码见后文或者github。

opus编解码接口介绍：

参考:http://opus-codec.org/docs/opus_api-1.2/index.html

一、下面是我用opus c库解码opus音频，再用ffplay播放PCM数据的一个demo，可以看看opus解码接口是怎么使用的：

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include "opus.h"

/*

static void int_to_char(opus_uint32 i, unsigned char ch[4])

{

    ch[0] = i>>24;

    ch[1] = (i>>16)&0xFF;

    ch[2] = (i>>8)&0xFF;

    ch[3] = i&0xFF;

}*/

static opus_uint32 char_to_int(unsigned char ch[])

{

    return ((opus_uint32)ch[]<<) | ((opus_uint32)ch[]<<)

         | ((opus_uint32)ch[]<< ) |  (opus_uint32)ch[];

}

int main(int argc, char** argv)

{

    opus_int32 sampleRate = ;

    int channels = , err = , len = ;

    int max_payload_bytes = ;

    int max_frame_size = *;

    OpusDecoder*  dec = NULL;

    sampleRate = (opus_int32)atol(argv[]);

    channels = atoi(argv[]);

    FILE*  fin = fopen(argv[], "rb");

    FILE*  fout = fopen(argv[], "wb+");

    short *out;

    unsigned char* fbytes, *data;

    //in = (short*)malloc(max_frame_size*channels*sizeof(short));

    out = (short*)malloc(max_frame_size*channels*sizeof(short));

    /* We need to allocate for 16-bit PCM data, but we store it as unsigned char. */

    fbytes = (unsigned char*)malloc(max_frame_size*channels*sizeof(short));

    data   = (unsigned char*)calloc(max_payload_bytes, sizeof(unsigned char));

    dec = opus_decoder_create(sampleRate, channels, &err);

    int nBytesRead = ;

    opus_uint64 tot_out = ;

    while(){

 　　　　unsigned char ch[] = {};

        nBytesRead = fread(ch, , , fin);

        if(nBytesRead != )

            break;

        len = char_to_int(ch);

        nBytesRead = fread(data, , len, fin);

        if(nBytesRead != len)

            break;

        opus_int32 output_samples = max_frame_size;

        output_samples = opus_decode(dec, data, len, out, output_samples, );

        int i;

        for(i=; i < output_samples*channels; i++)

        {

            short s;

            s=out[i];

            fbytes[*i]=s&0xFF;

            fbytes[*i+]=(s>>)&0xFF;

        }

        if (fwrite(fbytes, sizeof(short)*channels, output_samples, fout) != (unsigned)output_samples){

            fprintf(stderr, "Error writing.\n");

            return EXIT_FAILURE;

        }

        tot_out += output_samples;

    }

    printf("tot_out: %llu \n", tot_out);

    return ;

}

这个程序对opus packets组成的文件（简单的length+packet格式）解码后得到PCM数据，再用ffplay播放PCM数据，看能否正常播放：

ffplay -f f32le -ac 1 -ar 48000 input_audio // 播放float32型PCM数据

ffplay -f s16le -ac 1 -ar 48000 input_audio //播放short16型PCM数据

ac表示声道数， ar表示采样率， input_audio是PCM音频文件。

二、要获取PCM数据文件，首先要得到opus packet二进制文件，所以这里涉及到浏览器如何保存二进制文件到本地的问题：

参考代码：

var saveFile = (function(){

        var a  = document.createElement("a");

        document.body.appendChild(a);

        a.style = "display:none";

        return function(data, name){

                var blob = new Blob([data]);

                var url = window.URL.createObjectURL(blob);

                a.href = url;

                a.download = name;

                a.click();

                window.URL.revokeObjectURL(url);

        };

}());

saveFile(data, 'test.pcm');

说明：首先把二进制数据写到typedArray中，然后用这个buffer构造Blob对象，生成URL，再使用a标签把这个blob下载到本地。

三、利用audioContext播放PCM音频数据的两种方案：

（1）flexVDI的实现

参考：https://github.com/flexVDI/spice-web-client

 function play(buffer, dataTimestamp) {

        // Each data packet is 16 bits, the first being left channel data and the second being right channel data (LR-LR-LR-LR...)

        //var audio = new Int16Array(buffer);

        var audio = new Float32Array(buffer);

        // We split the audio buffer in two channels. Float32Array is the type required by Web Audio API

        var left = new Float32Array(audio.length / 2);

        var right = new Float32Array(audio.length / 2);

        var channelCounter = 0;

        var audioContext = this.audioContext;

        var len = audio.length;

        for (var i = 0; i < len; ) {

          //because the audio data spice gives us is 16 bits signed int (32768) and we wont to get a float out of it (between -1.0 and 1.0)

          left[channelCounter] = audio[i++] / 32768;

          right[channelCounter] = audio[i++] / 32768;

          channelCounter++;

        }

        var source = audioContext['createBufferSource'](); // creates a sound source

        var audioBuffer = audioContext['createBuffer'](2, channelCounter, this.frequency);

        audioBuffer['getChannelData'](0)['set'](left);

        audioBuffer['getChannelData'](1)['set'](right);

        source['buffer'] = audioBuffer;

        source['connect'](this.audioContext['destination']);

        source['start'](0);

}

注： buffer中保存的是short 型PCM数据，这里为了简单，去掉了对时间戳的处理，因为source.start(0)表示立即播放。如果是float型数据，不需要除以32768.

（2）ws-audio-api的实现

参考：https://github.com/Ivan-Feofanov/ws-audio-api

var bufL = new Float32Array(this.config.codec.bufferSize);

var bufR = new Float32Array(this.config.codec.bufferSize);

this.scriptNode = audioContext.createScriptProcessor(this.config.codec.bufferSize, 0, 2);

if (typeof AudioBuffer.prototype.copyToChannel === "function") {

     this.scriptNode.onaudioprocess = function(e) {

          var buf = e.outputBuffer;

          _this.process(bufL, bufR);　　//获取PCM数据到bufL， bufR

          buf.copyToChannel(bufL, 0);

          buf.copyToChannel(bufR, 1);

     };

} else {

     this.scriptNode.onaudioprocess = function(e) {

          var buf = e.outputBuffer;

          _this.process(bufL, bufR);

          buf.getChannelData(0).set(bufL);

          buf.getChannelData(1).set(bufR);

     };

}

this.scriptNode.connect(audioContext.destination);

延时卡顿的问题：audioContext有的浏览器默认是48000采样率，有的浏览器默认是44100的采样率，如果喂给audioContext的PCM数据的采样率不匹配，就会产生延时和卡顿的问题。