Question

我在使用ffmpeg解码wav文件时遇到问题。我是新手，我不太习惯它。

在我的应用程序中，我必须输入音频文件并获取一系列样本。我使用ffmpeg来创建一个函数，它输入文件的路径，开始输出样本的时间位置以及以秒为单位解码的块的长度。

我没有声誉，所以我必须制作一个gdrive目录，在那里你可以看到问题和我工作的文件。

这是：https://goo.gl/8KnjAj

当我尝试解码文件 harp.wav 时，一切运行正常，我可以将图像绘制成图像 plot-harp.png

该文件是一个WAV文件，编码为：pcm_u8,11025 Hz，1个通道，u8,88 kb / s

当我尝试解码文件 demo-unprocessed.wav 时出现问题。它输出一系列无意义的样本。它会输出一系列样本，如图像 graph1-demo.jpg 所示。

该文件是一个WAV文件，编码为：pcm_s16le，44100 Hz，1个通道，s16,705 kb / s

我的代码中存在问题的IDK，我已经在使用FFMPEG解码之前和之后检查了代码，并且它工作得很好。

以下是dataReader.cpp的代码：

/* Start by including the necessary */
#include "dataReader.h"
#include <cstdlib>
#include <iostream>
#include <fstream>

#ifdef __cplusplus
extern "C" {
#endif
    #include <libavcodec/avcodec.h> 
    #include <libavformat/avformat.h>
    #include <libavutil/avutil.h>
#ifdef __cplusplus 
}
#endif

using namespace std;

/* initialization function for audioChunk */
audioChunk::audioChunk(){
    data=NULL;
    size=0;
    bitrate=0;
}

/* function to get back chunk lenght in seconds */
int audioChunk::getTimeLenght(){
    return size/bitrate;
}

/* initialization function for audioChunk_dNorm */
audioChunk_dNorm::audioChunk_dNorm(){
    data=NULL;
    size=0;
    bitrate=0;
}

/* function to get back chunk lenght in seconds */
int audioChunk_dNorm::getTimeLenght(){
    return size/bitrate;
}

/* function to normalize audioChunk into audioChunk_dNorm */
void audioChunk_dNorm::fillAudioChunk(audioChunk* cnk){

    size=cnk->size;
    bitrate=cnk->bitrate;

    double min=cnk->data[0];
    double max=cnk->data[0];

    for(int i=0;i<cnk->size;i++){
        if(*(cnk->data+i)>max) max=*(cnk->data+i);
        else if(*(cnk->data+i)<min) min=*(cnk->data+i);
    }

    data=new double[size];

    for(int i=0;i<size;i++){
        //data[i]=cnk->data[i]+256*data[i+1];
        if(data[i]!=255) data[i]=2*((cnk->data[i])-(max-min)/2)/(max-min);
        else data[i]=0;
    }
    cout<<"bitrate "<<bitrate<<endl;
}


audioChunk readData(const char* path_name, const double start_time, const double lenght){

    /* inizialize audioChunk */
    audioChunk output;

    /* Check input times */
    if((start_time<0)||(lenght<0)) {
        cout<<"Input times should be positive";
        return output;
    }

    /* Start FFmpeg */
    av_register_all();

    /* Initialize the frame to read the data and verify memory allocation */
    AVFrame* frame = av_frame_alloc();
    if (!frame)
    {
        cout << "Error allocating the frame" << endl;
        return output;
    }

    /* Initialization of the Context, to open the file */
    AVFormatContext* formatContext = NULL;
    /* Opening the file, and check if it has opened */
    if (avformat_open_input(&formatContext, path_name, NULL, NULL) != 0)
    {
        av_frame_free(&frame);
        cout << "Error opening the file" << endl;
        return output;
    }

    /* Find the stream info, if not found, exit */
    if (avformat_find_stream_info(formatContext, NULL) < 0)
    {
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        cout << "Error finding the stream info" << endl;
        return output;
    }

    /* Check inputs to verify time input */
    if(start_time>(formatContext->duration/1000000)){
        cout<< "Error, start_time is over file duration"<<endl;
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        return output;
    }

    /* Chunk = number of samples to output */
    long long int chunk = ((formatContext->bit_rate)*lenght/8);
    /* Start = address of sample where start to read */
    long long int start = ((formatContext->bit_rate)*start_time/8);
    /* Tot_sampl = number of the samples in the file */
    long long int tot_sampl = (formatContext->bit_rate)*(formatContext->duration)/8000000;

    /* Set the lenght of chunk to avoid segfault and to read all the file */
    if (start+chunk>tot_sampl) {chunk = tot_sampl-start;}
    if (lenght==0) {start = 0; chunk = tot_sampl;}

    /* initialize the array to output */
    output.data = new unsigned char[chunk];
    output.bitrate = formatContext->bit_rate;
    output.size=chunk;

    av_dump_format(formatContext,0,NULL,0);
    cout<<chunk<<" n of sample to read"<<endl;
    cout<<start<<" start"<<endl;
    cout<<output.bitrate<<" bitrate"<<endl;
    cout<<tot_sampl<<" total sample"<<endl;


    /* Find the audio Stream, if no audio stream are found, clean and exit */
    AVCodec* cdc = NULL;
    int streamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &cdc, 0);
    if (streamIndex < 0)
    {
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        cout << "Could not find any audio stream in the file" << endl;
        return output;
    }

    /* Open the audio stream to read data  in audioStream */
    AVStream* audioStream = formatContext->streams[streamIndex];

    /* Initialize the codec context */
    AVCodecContext* codecContext = audioStream->codec;
    codecContext->codec = cdc;
    /* Open the codec, and verify if it has opened */
    if (avcodec_open2(codecContext, codecContext->codec, NULL) != 0)
    {
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        cout << "Couldn't open the context with the decoder" << endl;
        return output;
    }

    /* Initialize buffer to store compressed packets */
    AVPacket readingPacket;
    av_init_packet(&readingPacket);


    int j=0;
    int count = 0; 

    while(av_read_frame(formatContext, &readingPacket)==0){
        if((count+readingPacket.size)>start){
            if(readingPacket.stream_index == audioStream->index){

                AVPacket decodingPacket = readingPacket;

                // Audio packets can have multiple audio frames in a single packet
                while (decodingPacket.size > 0){
                    // Try to decode the packet into a frame
                    // Some frames rely on multiple packets, so we have to make sure the frame is finished before
                    // we can use it
                    int gotFrame = 0;
                    int result = avcodec_decode_audio4(codecContext, frame, &gotFrame, &decodingPacket);

                    count += result;

                    if (result >= 0 && gotFrame)
                    {
                        decodingPacket.size -= result;
                        decodingPacket.data += result;
                        int a;

                        for(int i=0;i<result-1;i++){

                            *(output.data+j)=frame->data[0][i];

                            j++;
                            if(j>=chunk) break;
                        }

                        // We now have a fully decoded audio frame
                    }
                    else
                    {
                        decodingPacket.size = 0;
                        decodingPacket.data = NULL;
                    }
                    if(j>=chunk) break;
                }
            }              
        }else count+=readingPacket.size;

        // To prevent memory leak, must free packet.
        av_free_packet(&readingPacket);
        if(j>=chunk) break;
    }

    // Some codecs will cause frames to be buffered up in the decoding process. If the CODEC_CAP_DELAY flag
    // is set, there can be buffered up frames that need to be flushed, so we'll do that
    if (codecContext->codec->capabilities & CODEC_CAP_DELAY)
    {
        av_init_packet(&readingPacket);
        // Decode all the remaining frames in the buffer, until the end is reached
        int gotFrame = 0;
        int a;
        int result=avcodec_decode_audio4(codecContext, frame, &gotFrame, &readingPacket);
        while (result >= 0 && gotFrame)
        {
            // We now have a fully decoded audio frame
            for(int i=0;i<result-1;i++){

                *(output.data+j)=frame->data[0][i];

                j++;
                if(j>=chunk) break;
            }
            if(j>=chunk) break;
        }
    }

    // Clean up!
    av_free(frame);
    avcodec_close(codecContext);
    avformat_close_input(&formatContext);

    cout<<"Ended Reading, "<<j<<" samples read"<<endl;
    output.size=j;
    return output;
}

这是dataReader.h

/* 
 * File:   dataReader.h
 * Author: davide
 *
 * Created on 27 luglio 2015, 11.11
 */

#ifndef DATAREADER_H
#define DATAREADER_H

/* function that reads a file and outputs an array of samples
 * @ path_name = the path of the file to read
 * @ start_time = the position where to start the data reading, 0 = start
 *                the time is in seconds, it can hold to 10e-6 seconds
 * @ lenght = the lenght of the frame to extract the data, 
 *            0 = read all the file (do not use with big files)
 *            if lenght > of file duration, it reads through the end of file.
 *            the time is in seconds, it can hold to 10e-6 seconds  
 */

#include <stdint.h>

class audioChunk{
public:
    uint8_t *data;
    unsigned int size;
    int bitrate;
    int getTimeLenght();
    audioChunk();
};

class audioChunk_dNorm{
public:
    double* data;
    unsigned int size;
    int bitrate;
    int getTimeLenght();
    void fillAudioChunk(audioChunk* cnk);
    audioChunk_dNorm();
};

audioChunk readData(const char* path_name, const double start_time, const double lenght);

#endif  /* DATAREADER_H */

最后还有应用程序的main.cpp。

/* 
 * File:   main.cpp
 * Author: davide
 *
 * Created on 28 luglio 2015, 17.04
 */

#include <cstdlib>
#include "dataReader.h"
#include "transforms.h"
#include "tognuplot.h"
#include <fstream>
#include <iostream>

using namespace std;

/*
 * 
 */
int main(int argc, char** argv) {

    audioChunk *chunk1=new audioChunk;

    audioChunk_dNorm *normChunk1=new audioChunk_dNorm;

    *chunk1=readData("./audio/demo-unprocessed.wav",0,1);

    normChunk1->fillAudioChunk(chunk1);

    ofstream file1;
    file1.open("./file/2wave.txt", std::ofstream::trunc);
    if(file1.is_open()) {
        for(int i=0;i<chunk1->size;i++) {
            int a=chunk1->data[i];
            file1<<i<<" "<<a<<endl;
        }
    }
    else cout<<"Error opening file";

    file1.close();

    return 0;
}

我无法理解为什么输出会像这样。是否有可能解码器无法将样本（pcm_16le，16bits）转换为存储样本ad uint8_t的FFMPEG AVFrame.data？如果它有一些方法可以使FFMPEG适用于存储样本超过8位的音频文件？

文件graph1-demo_good.jpg是样本的样本，使用我制作的LIBSNDFILE应用程序提取。

编辑：好像程序无法将解码后的数据，存储在几个uint8_t unsigned char中的小端字节耦合到目标格式（我设置为unsigned char [ ]），因为它将位存储为小端16字节。因此，将数据导入audioChunk.data是正确的，但我必须将其读作无符号字符，而是作为几个小端字节。

Answer 1

我使用gdb查看chunk1->data指向的内存。（x /256xh 0x18dddf0，以十六进制转储前256个半字）。它看起来像有符号的16位值，因为它开始时有很多0，0xFFFF和0x0001。

所以你的代码需要让ffmpeg转换为特定的格式。 IDK如何最好地做到这一点，抱歉。

使用FFMPEG解码pcm_s16le？

1 个答案: