从H.264 NALU获取视频的宽度/高度

时间:2012-08-18 12:25:51

标签: h.264 video-processing

我在NALU(AVC解码器配置记录)中获得了SPS,并试图从中解析视频宽度/高度。

67 64 00 15 ac c8 60 20 09 6c 04 40 00 00 03 00 40 00 00 07 a3 c5 8b 67 80 

这是我的代码解析SPS但得到错误的值。 pic_width_in_mbs_minus1为5,pic_height_in_map_units_minus1为1。 实际上视频是512 X 288px

typedef struct _SequenceParameterSet
{
private:
    const unsigned char * m_pStart;
    unsigned short m_nLength;
    int m_nCurrentBit;

    unsigned int ReadBit()
    {
        ATLASSERT(m_nCurrentBit <= m_nLength * 8);
        int nIndex = m_nCurrentBit / 8;
        int nOffset = m_nCurrentBit % 8 + 1;

        m_nCurrentBit ++;
        return (m_pStart[nIndex] >> (8-nOffset)) & 0x01;
    }

    unsigned int ReadBits(int n)
    {
        int r = 0;
        int i;
        for (i = 0; i < n; i++)
        {
            r |= ( ReadBit() << ( n - i - 1 ) );
        }
        return r;
    }

    unsigned int ReadExponentialGolombCode()
    {
        int r = 0;
        int i = 0;

        while( (ReadBit() == 0) && (i < 32) )
        {
            i++;
        }
        r = ReadBits(i);
        r += (1 << i) - 1;
        return r;
    }


    unsigned int ReadSE() 
    {
        int r = ReadExponentialGolombCode();
        if (r & 0x01)
        {
            r = (r+1)/2;
        }
        else
        {
            r = -(r/2);
        }
        return r;
    }
public:

    void Parse(const unsigned char * pStart, unsigned short nLen)
    {
        m_pStart = pStart;
        m_nLength = nLen;
        m_nCurrentBit = 0;
        int profile_idc = ReadBits(8);
        int constraint_set0_flag = ReadBit();
        int constraint_set1_flag = ReadBit();
        int constraint_set2_flag = ReadBit();
        int constraint_set3_flag = ReadBit();
        int constraint_set4_flag = ReadBit();
        int constraint_set5_flag = ReadBit();
        int reserved_zero_2bits  = ReadBits(2);
        int level_idc = ReadBits(8);
        int seq_parameter_set_id = ReadExponentialGolombCode();

        if( profile_idc == 100 || profile_idc == 110 ||
            profile_idc == 122 || profile_idc == 144 )
        {
            int chroma_format_idc = ReadExponentialGolombCode();
            if( chroma_format_idc == 3 )
            {
                int residual_colour_transform_flag = ReadBit();
            }
            int bit_depth_luma_minus8 = ReadExponentialGolombCode();
            int bit_depth_chroma_minus8 = ReadExponentialGolombCode();
            int qpprime_y_zero_transform_bypass_flag = ReadBit();
            int seq_scaling_matrix_present_flag = ReadBit();
            if( seq_scaling_matrix_present_flag )
            {
                for( int i = 0; i < 8; i++ )
                {
                    int seq_scaling_list_present_flag = ReadBit();
                    if( seq_scaling_list_present_flag )
                    {
                        /*
                        if( i < 6 )
                        {
                            read_scaling_list( b, sps->ScalingList4x4[ i ], 16,
                                sps->UseDefaultScalingMatrix4x4Flag[ i ]);
                        }
                        else
                        {
                            read_scaling_list( b, sps->ScalingList8x8[ i - 6 ], 64,
                                sps->UseDefaultScalingMatrix8x8Flag[ i - 6 ] );
                        }
                        */
                    }
                }
            }
        }
        int log2_max_frame_num_minus4 = ReadExponentialGolombCode();
        int pic_order_cnt_type = ReadExponentialGolombCode();
        if( pic_order_cnt_type == 0 )
        {
            int log2_max_pic_order_cnt_lsb_minus4 = ReadExponentialGolombCode();
        }
        else if( pic_order_cnt_type == 1 )
        {
            int delta_pic_order_always_zero_flag = ReadBit();
            int offset_for_non_ref_pic = ReadSE();
            int offset_for_top_to_bottom_field = ReadSE();
            int num_ref_frames_in_pic_order_cnt_cycle = ReadExponentialGolombCode();
            for( int i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ )
            {
                ReadSE();
                //sps->offset_for_ref_frame[ i ] = ReadSE();
            }
        }
        int num_ref_frames = ReadExponentialGolombCode();
        int gaps_in_frame_num_value_allowed_flag = ReadBit();
        int pic_width_in_mbs_minus1 = ReadExponentialGolombCode();
        int pic_height_in_map_units_minus1 = ReadExponentialGolombCode();
        int frame_mbs_only_flag = ReadBit();
        if( !frame_mbs_only_flag )
        {
            int mb_adaptive_frame_field_flag = ReadBit();
        }
        int direct_8x8_inference_flag = ReadBit();
        int frame_cropping_flag = ReadBit();
        if( frame_cropping_flag )
        {
            int frame_crop_left_offset = ReadExponentialGolombCode();
            int frame_crop_right_offset = ReadExponentialGolombCode();
            int frame_crop_top_offset = ReadExponentialGolombCode();
            int frame_crop_bottom_offset = ReadExponentialGolombCode();
        }
        int vui_parameters_present_flag = ReadBit();

        pStart++;
    }
}SequenceParameterSet, *LPSequenceParameterSet;

这是我的代码解析SPS但得到错误的值。 pic_width_in_mbs_minus1为5,pic_height_in_map_units_minus1为1.实际上视频为512 X 288px

谢谢

4 个答案:

答案 0 :(得分:11)

#include <stdio.h>
#include <stdlib.h>    
#include <string.h>
#include <assert.h>

const unsigned char * m_pStart;
unsigned short m_nLength;
int m_nCurrentBit;

unsigned int ReadBit()
{
    assert(m_nCurrentBit <= m_nLength * 8);
    int nIndex = m_nCurrentBit / 8;
    int nOffset = m_nCurrentBit % 8 + 1;

    m_nCurrentBit ++;
    return (m_pStart[nIndex] >> (8-nOffset)) & 0x01;
}

unsigned int ReadBits(int n)
{
    int r = 0;
    int i;
    for (i = 0; i < n; i++)
    {
        r |= ( ReadBit() << ( n - i - 1 ) );
    }
    return r;
}

unsigned int ReadExponentialGolombCode()
{
    int r = 0;
    int i = 0;

    while( (ReadBit() == 0) && (i < 32) )
    {
        i++;
    }

    r = ReadBits(i);
    r += (1 << i) - 1;
    return r;
}

unsigned int ReadSE() 
{
    int r = ReadExponentialGolombCode();
    if (r & 0x01)
    {
        r = (r+1)/2;
    }
    else
    {
        r = -(r/2);
    }
    return r;
}

void Parse(const unsigned char * pStart, unsigned short nLen)
{
    m_pStart = pStart;
    m_nLength = nLen;
    m_nCurrentBit = 0;

    int frame_crop_left_offset=0;
    int frame_crop_right_offset=0;
    int frame_crop_top_offset=0;
    int frame_crop_bottom_offset=0;

    int profile_idc = ReadBits(8);          
    int constraint_set0_flag = ReadBit();   
    int constraint_set1_flag = ReadBit();   
    int constraint_set2_flag = ReadBit();   
    int constraint_set3_flag = ReadBit();   
    int constraint_set4_flag = ReadBit();   
    int constraint_set5_flag = ReadBit();   
    int reserved_zero_2bits  = ReadBits(2); 
    int level_idc = ReadBits(8);            
    int seq_parameter_set_id = ReadExponentialGolombCode();


    if( profile_idc == 100 || profile_idc == 110 ||
        profile_idc == 122 || profile_idc == 244 ||
        profile_idc == 44 || profile_idc == 83 ||
        profile_idc == 86 || profile_idc == 118 )
    {
        int chroma_format_idc = ReadExponentialGolombCode();

        if( chroma_format_idc == 3 )
        {
            int residual_colour_transform_flag = ReadBit();         
        }
        int bit_depth_luma_minus8 = ReadExponentialGolombCode();        
        int bit_depth_chroma_minus8 = ReadExponentialGolombCode();      
        int qpprime_y_zero_transform_bypass_flag = ReadBit();       
        int seq_scaling_matrix_present_flag = ReadBit();        

        if (seq_scaling_matrix_present_flag) 
        {
            int i=0;
            for ( i = 0; i < 8; i++) 
            {
                int seq_scaling_list_present_flag = ReadBit();
                if (seq_scaling_list_present_flag) 
                {
                    int sizeOfScalingList = (i < 6) ? 16 : 64;
                    int lastScale = 8;
                    int nextScale = 8;
                    int j=0;
                    for ( j = 0; j < sizeOfScalingList; j++) 
                    {
                        if (nextScale != 0) 
                        {
                            int delta_scale = ReadSE();
                            nextScale = (lastScale + delta_scale + 256) % 256;
                        }
                        lastScale = (nextScale == 0) ? lastScale : nextScale;
                    }
                }
            }
        }
    }

    int log2_max_frame_num_minus4 = ReadExponentialGolombCode();
    int pic_order_cnt_type = ReadExponentialGolombCode();
    if( pic_order_cnt_type == 0 )
    {
        int log2_max_pic_order_cnt_lsb_minus4 = ReadExponentialGolombCode();
    }
    else if( pic_order_cnt_type == 1 )
    {
        int delta_pic_order_always_zero_flag = ReadBit();
        int offset_for_non_ref_pic = ReadSE();
        int offset_for_top_to_bottom_field = ReadSE();
        int num_ref_frames_in_pic_order_cnt_cycle = ReadExponentialGolombCode();
        int i;
        for( i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ )
        {
            ReadSE();
            //sps->offset_for_ref_frame[ i ] = ReadSE();
        }
    }
    int max_num_ref_frames = ReadExponentialGolombCode();
    int gaps_in_frame_num_value_allowed_flag = ReadBit();
    int pic_width_in_mbs_minus1 = ReadExponentialGolombCode();
    int pic_height_in_map_units_minus1 = ReadExponentialGolombCode();
    int frame_mbs_only_flag = ReadBit();
    if( !frame_mbs_only_flag )
    {
        int mb_adaptive_frame_field_flag = ReadBit();
    }
    int direct_8x8_inference_flag = ReadBit();
    int frame_cropping_flag = ReadBit();
    if( frame_cropping_flag )
    {
        frame_crop_left_offset = ReadExponentialGolombCode();
        frame_crop_right_offset = ReadExponentialGolombCode();
        frame_crop_top_offset = ReadExponentialGolombCode();
        frame_crop_bottom_offset = ReadExponentialGolombCode();
    }
    int vui_parameters_present_flag = ReadBit();
    pStart++;

    int Width = ((pic_width_in_mbs_minus1 +1)*16) - frame_crop_bottom_offset*2 - frame_crop_top_offset*2;
    int Height = ((2 - frame_mbs_only_flag)* (pic_height_in_map_units_minus1 +1) * 16) - (frame_crop_right_offset * 2) - (frame_crop_left_offset * 2);

    printf("\n\nWxH = %dx%d\n\n",Width,Height); 

}

void FindJPGFileResolution(char *cpFileName, int *ipWidth, int *ipHeight) 
{   
    int i;

    FILE *fp = fopen(cpFileName,"rb");
    fseek(fp,0,SEEK_END);
    long len = ftell(fp);
    fseek(fp,0,SEEK_SET);

    unsigned char *ucpInBuffer = (unsigned char*) malloc (len+1);
    fread(ucpInBuffer,1,len,fp);
    fclose(fp);

    printf("\n\nBuffer size %ld\n", len);   
    for(i=0;i<len;i++)
    {
        //printf(" %x", ucpInBuffer[i]);    
        if( 
            (ucpInBuffer[i]==0x00) && (ucpInBuffer[i+1]==0x00) && 
            (ucpInBuffer[i+2]==0x00) && (ucpInBuffer[i+3]==0x01) 
          )
        {
            //if(ucpInBuffer[i+4] & 0x0F ==0x07)
            if(ucpInBuffer[i+4] == 0x67 || ucpInBuffer[i+4] == 0x27)
            {
                Parse(&ucpInBuffer[i+5], len);              
                break;
            }
        }       
    }

    free(ucpInBuffer);
    return;
}


int main()               
{
    int iHeight=0, iWidth=0;
    char *cpFileName = "/home/pankaj/pankil/SameSystem_H264_1920x320.264";
    FindJPGFileResolution(cpFileName, &iWidth, &iHeight);
    return 0;
}

试试这个编码。这是成功的,我已经用gcc编译器测试过了。 只需设置文件路径编译并运行它。

答案 1 :(得分:4)

SPS是(摘录):

profile_idc 100 
constraint_set0_flag 0 
constraint_set1_flag 0 
constraint_set2_flag 0 
constraint_set3_flag 0 
level_idc 21 
seq_parameter_set_id 0 
chroma_format_idc 1 
num_ref_frames 5 
gaps_in_frame_num_value_allowed_flag 0 
pic_width_in_mbs_minus1 31 
pic_height_in_map_units_minus1 17 
frame_mbs_only_flag 1 
direct_8x8_inference_flag 1 
frame_cropping_flag 0 
vui_parameters_present_flag 1 

这是512x288视频,请参阅此处有关如何解码数据的详细信息:Fetching the dimensions of a H264Video stream

答案 2 :(得分:3)

我在上面的示例代码中遇到了错误:

int Width = ((pic_width_in_mbs_minus1 +1)*16) - frame_crop_bottom_offset*2 - frame_crop_top_offset*2;
int Height = ((2 - frame_mbs_only_flag)* (pic_height_in_map_units_minus1 +1) * 16) - (frame_crop_right_offset * 2) - (frame_crop_left_offset * 2);

应该是:

int Width = ((pic_width_in_mbs_minus1 +1)*16) - frame_crop_right_offset *2 - frame_crop_left_offset *2;
int Height = ((2 - frame_mbs_only_flag)* (pic_height_in_map_units_minus1 +1) * 16) - (frame_crop_bottom_offset* 2) - (frame_crop_top_offset* 2);

否则当frame_cropping_flag为1时,您将得到错误的分辨率。 希望这有助于其他人。

答案 3 :(得分:0)

我可以确认经过Alan Lu修正的Kaidul代码可以正常工作。 请注意,RecyclerView的h264 SPS nalu的偏移量为5(去掉前缀“ 0 0 0 1”和nal_unit_type字节)