如何使用Opus编码和解码音频数据?

时间:2018-08-01 16:58:10

标签: c++ qt audio opus

我正在进行语音聊天,因此我需要压缩音频数据。我通过Qt Framework记录和播放音频数据。如果我在不压缩的情况下录制和播放音频数据,一切都很好。如果我压缩,解压缩并播放音频数据,我只会听到刺耳的声音。

编辑:我看了一下演示代码,并尝试使用该代码。 我可以听到一些声音,但是非常慢。如果我将pcm_bytes的大小增加到例如40000,听起来会更好,但我的声音仍然滞后且发出刺耳的声音。

这是一行(底部是audioinput.cpp):

speaker->write((const char*)pcm_bytes,3840);

codecopus.cpp:

#include "codecopus.h"

CodecOpus::CodecOpus()
{

}

void CodecOpus::initDecoder(opus_int32 samplingRate, int channels) //decoder
{
    int error;
    decoderState = opus_decoder_create(samplingRate,channels,&error);
    if(error == OPUS_OK){
        std::cout << "Created Opus Decoder struct" << std::endl;
    }

}

void CodecOpus::initEncoder(opus_int32 samplingRate, int channels) // Encoder
{
    int error;
    encoderState = opus_encoder_create(samplingRate,channels,OPUS_APPLICATION_VOIP,&error);
    error = opus_encoder_ctl(encoderState,OPUS_SET_BITRATE(64000));
    if(error == OPUS_OK){
        std::cout << "Created Opus Encoder struct" << std::endl;
    }
}

opus_int32 CodecOpus::encodeData(const opus_int16 *pcm, int frameSize, unsigned char *data, opus_int32 maxDataBytes) //Encoder
{
    opus_int32 i = opus_encode(encoderState,pcm,frameSize,data,maxDataBytes);
    return i;
}

int CodecOpus::decodeData(const unsigned char *data, opus_int32 numberOfBytes,opus_int16* pcm,int frameSizeInSec) //Decoder
{

    int i = opus_decode(decoderState,data,numberOfBytes,pcm,frameSizeInSec,0);

    return i;
}

CodecOpus::~CodecOpus()
{
    opus_decoder_destroy(this->decoderState);
    opus_encoder_destroy(this->encoderState);

}

audioinput.h:

#ifndef AUDIOINPUT_H
#define AUDIOINPUT_H
#include <QAudioFormat>
#include <iostream>
#include <QAudioInput>
#include <QAudioOutput>
#include <thread>
#include "codecopus.h"
#include "QDebug"
class AudioInput : public QObject
{
    Q_OBJECT

public:
    AudioInput();
    ~AudioInput();
    void startRecording();
    void CreateNewAudioThread();

private:

    CodecOpus opus;
    unsigned char cbits[4000] = {};
    opus_int16 in[960*2*sizeof(opus_int16)] = {};
    opus_int16 out[5760*2] = {};

    unsigned char *pcm_bytes;

    int MAX_FRAME_SIZE;

    QAudioFormat audioFormat;
    QAudioInput *audioInput;
    QIODevice *mic;
    QByteArray data;
    int micFrameSize;


    QAudioOutput *audioOutput;
    QIODevice *speaker;
    QAudioFormat speakerAudioFormat;




public slots:
    void OnAudioNotfiy();
};

#endif // AUDIOINPUT_H

audioinput.cpp:

#include "audioinput.h"

AudioInput::AudioInput() : audioFormat(),pcm_bytes(new unsigned char[40000])
{
    audioFormat.setSampleRate(48000);
    audioFormat.setChannelCount(2);
    audioFormat.setSampleSize(16);
    audioFormat.setSampleType(QAudioFormat::SignedInt);
    audioFormat.setByteOrder(QAudioFormat::LittleEndian);
    audioFormat.setCodec("audio/pcm");


    speakerAudioFormat.setSampleRate(48000);
    speakerAudioFormat.setChannelCount(2);
    speakerAudioFormat.setSampleSize(16);
    speakerAudioFormat.setSampleType(QAudioFormat::SignedInt);
    speakerAudioFormat.setByteOrder(QAudioFormat::LittleEndian);
    speakerAudioFormat.setCodec("audio/pcm");

    QAudioDeviceInfo info = QAudioDeviceInfo::defaultInputDevice();
    if(!info.isFormatSupported(audioFormat)){
        std::cout << "Mic Format not supported!" << std::endl;
        audioFormat = info.nearestFormat(audioFormat);
    }
    QAudioDeviceInfo speakerInfo = QAudioDeviceInfo::defaultOutputDevice();
    if(!speakerInfo.isFormatSupported(speakerAudioFormat)){
        std::cout << "Speaker Format is not supported!" << std::endl;
        speakerAudioFormat = info.nearestFormat(speakerAudioFormat);

    }
    std::cout << speakerAudioFormat.sampleRate() << audioFormat.sampleRate() << speakerAudioFormat.channelCount() << audioFormat.channelCount() << std::endl;
    audioInput = new QAudioInput(audioFormat);
    audioOutput = new QAudioOutput(speakerAudioFormat);
    audioInput->setNotifyInterval(20);
    micFrameSize = (audioFormat.sampleRate()/1000)*20;

    opus.initEncoder(audioFormat.sampleRate(),audioFormat.channelCount());
    opus.initDecoder(speakerAudioFormat.sampleRate(),speakerAudioFormat.channelCount());

    MAX_FRAME_SIZE = 6*960;



    connect(audioInput,SIGNAL(notify()),this,SLOT(OnAudioNotfiy()));
}

AudioInput::~AudioInput()
{

}

void AudioInput::startRecording()
{

    mic = audioInput->start();
    speaker = audioOutput->start();
    std::cout << "Recording started!" << std::endl;


}


void AudioInput::CreateNewAudioThread()
{
    std::thread t1(&AudioInput::startRecording,this);
    t1.detach();
}





void AudioInput::OnAudioNotfiy()
{
    data = mic->readAll();


    std::cout << "data size" <<data.size() << std::endl;
    if(data.size() > 0){
    pcm_bytes = reinterpret_cast<unsigned char*>(data.data());

//convert

    for(int i=0;i<2*960;i++){ //TODO HARDCODED
        in[i]=pcm_bytes[2*i+1]<<8|pcm_bytes[2*i];
    }
    opus_int32 compressedBytes = opus.encodeData(in,960,cbits,4000);

    opus_int32 decompressedBytes = opus.decodeData(cbits,compressedBytes,out,MAX_FRAME_SIZE);

    for(int i = 0; i<2*decompressedBytes;i++) //TODO HARDCODED
    {
        pcm_bytes[2*i]=out[i]&0xFF;
        pcm_bytes[2*i+1]=(out[i]>>8)&0xFF;
    }


    speaker->write((const char*)pcm_bytes,3840);
}

}

2 个答案:

答案 0 :(得分:0)

对于您如何将opus.decodeData的返回值错误地解释为字节数,我有一个很长的答案,正确的解释是“每个通道的解码样本数”。但是看起来以后您仍然要在字节转换例程中说明这一点。因此,我不确定该错误在哪里。

总的来说,我认为您正在使从无符号字符<-> int16进行的转换比它所需的复杂。您应该能够直接将音频缓冲区传递到opus,或从opus传递音频缓冲区,并将其指针重新解释为所需的内联类型,而无需手动执行位操作以在不同缓冲区之间进行转换和复制。音频设备应为您提供低端数据,但是如果不匹配,您可以执行基本的字节交换例程

for (int c = 0; c < numSamples; c++)
{
    unsigned char tmp = data[2 * c];
    data[2 * c] = data[2 * c + 1];
    data[2 * c + 1] = tmp;
}

我在这里看不到它,但是我想您也有代码,一次只能从麦克风中消耗960个样本,其余的保留在下一帧的缓冲区中,否则会丢失数据。

无关紧要,但是您也可以用1275(这是最大的opus数据包大小)替换4000 cbits。

答案 1 :(得分:0)

1)您仅编码960个字节,而缓冲区要大得多。您必须将缓冲区分成几个相等的部分,然后将它们传递给编码器。零件的大小为120、240、480、960、1920和2880。

2)从char数组转换为opus_int16数组/从opus_int16数组转换为char数组时,请使用qFromLittleEndian()/ qToLittleEndian()函数或类型转换。这样可以防止破裂和不良音质。

示例:

void voice::slot_read_audio_input()
{

    //    Audio settings:
    //    Sample Rate=48000
    //    Sample Size=16
    //    Channel Count=1
    //    Byte Order=Little Endian
    //    Sample Type= UnSignedInt

    //    Encoder settings:
    //    Sample Rate=48000
    //    Channel Count=1
    //    OPUS_APPLICATION_VOIP

    //    Decoder settings:
    //    Sample Rate=48000
    //    Channel Count=1

    QByteArray audio_buffer;//mic
    QByteArray output_audio_buffer;//speaker

    int const OPUS_INT_SIZE=2;//sizeof(opus_int16)
    int const FRAME_SIZE=960;
    int const MAX_FRAME_SIZE=1276;
    int FRAME_COUNT=3840/FRAME_SIZE/OPUS_INT_SIZE;// 3840 is a sample size= voice_input->bytesReady;

    opus_int16 input_frame[FRAME_SIZE] = {};
    opus_int16 output_frame[FRAME_SIZE] = {};
    unsigned char compressed_frame[MAX_FRAME_SIZE] = {};
    unsigned char decompressed_frame[FRAME_SIZE*OPUS_INT_SIZE] = {};

    audio_buffer.resize(voice_input->bytesReady());   
    output_audio_buffer.resize(FRAME_SIZE*OPUS_INT_SIZE);

    input->read(audio_buffer.data(),audio_buffer.size());

    for(int i=0;i<FRAME_COUNT;i++)
    {
        //    convert from LittleEndian
        for(int j=0;j<FRAME_SIZE;j++)
        {
            input_frame[j]=qFromLittleEndian<opus_int16>(audio_buffer.data()+j*OPUS_INT_SIZE);
            //    or use this:
            //    input_frame[j]=static_cast<short>(static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j+1))<<8|static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j)));
        }

        opus_int32 compressedBytes = opus_encode(enc, input_frame,FRAME_SIZE,compressed_frame,MAX_FRAME_SIZE);
        opus_int32 decompressedBytes = opus_decode(dec,compressed_frame,compressedBytes,output_frame,FRAME_SIZE,0);

        //    conver to LittleEndian
        for(int j = 0; j<decompressedBytes;j++)
        {
            qToLittleEndian(output_frame[j],output_audio_buffer.data()+j*OPUS_INT_SIZE);
            //    or use this:
            //    decompressed_frame[OPUS_INT_SIZE*j]=output_frame[j]&0xFF;
            //    decompressed_frame[OPUS_INT_SIZE*j+1]=(output_frame[j]>>8)&0xFF;
        }

        audio_buffer.remove(0,FRAME_SIZE*OPUS_INT_SIZE);
        output->write(output_audio_buffer,FRAME_SIZE*OPUS_INT_SIZE);
        //    or use this:
        //    output->write(reinterpret_cast<char*>(decompressed_frame),FRAME_SIZE*OPUS_INT_SIZE);
    }
}