问候,
我编写了一个directshow源过滤器,它从WinTS / ARM视频处理器上编写的ATSC-153广播中获取AVC / AAC视频帧/ AAC访问单元。输出引脚(其中2个,一个用于视频,一个用于音频)连接到相应的解码器和渲染器。目前,我从适当的RTP头中获取PTS,并将它们传递给源过滤器并对directshow时钟执行计算。视频PTS的速率为90Khz,音频PTS速率不同,我目前的测试流音频滴答为55.2Khz。
以下是convert_to_dshow_timestamp()和FillBuffer()例程。当我通过过滤器检索视频/音频时打印出转换后的时间戳,时间在100-200ms之间。这不会是坏事,可以使用。但是,视频会将音频跟踪2-3秒。
/ *将时钟频率转换为directshow时钟速率的例程* / static unsigned long long convert_to_dshow_timestamp( unsigned long long ts, 无符号长期利率 ) { 长双hz; 长双倍; long double tmp;
if (rate == 0)
{
return 0;
}
hz = (long double) 1.0 / rate;
multi = hz / 1e-7;
tmp = ((long double) ts * multi) + 0.5;
return (unsigned long long) tmp;
}
/ *源过滤器FillBuffer()例程* / HRESULT OutputPin :: FillBuffer(IMediaSample * pSamp) { BYTE * pData; DWORD dataSize; pipeStream流; BOOL retVal; DWORD returnBytes; HRESULT小时; DWORD discont; REFERENCE_TIME ts; REFERENCE_TIME df; unsigned long long difPts; unsigned long long difTimeRef;
pSamp->GetPointer(&pData);
dataSize = pSamp->GetSize();
ZeroMemory(pData, dataSize);
stream.lBuf = pData;
stream.dataSize = dataSize;
/* Pin type 1 is H.264 AVC video frames */
if (m_iPinType == 1)
{
retVal = DeviceIoControl(
ghMHTune,
IOCTL_MHTUNE_RVIDEO_STREAM,
NULL,
0,
&stream,
sizeof(pipeStream),
&returnBytes,
NULL
);
if (retVal == TRUE)
{
/* Get the data */
/* Check for the first of the stream, if so, set the start time */
pSamp->SetActualDataLength(returnBytes);
hr = S_OK;
if (returnBytes > 0)
{
/* The discontinuety is set in upper layers, when an RTP
* sequence number has been lost.
*/
discont = stream.discont;
/* Check for another break in stream time */
if (
m_PrevTimeRef &&
((m_PrevTimeRef > (stream.timeRef + 90000 * 10)) ||
((m_PrevTimeRef + 90000 * 10) < stream.timeRef))
)
{
dbg_log(TEXT("MY:DISC HERE\n"));
if (m_StartStream > 0)
{
discont = 1;
}
}
/* If the stream has not started yet, or there is a
* discontinuety then reset the stream time.
*/
if ((m_StartStream == 0) || (discont != 0))
{
sys_time = timeGetTime() - m_ClockStartTime;
m_OtherSide->sys_time = sys_time;
/* For Video, the clockRate is 90Khz */
m_RefGap = (sys_time * (stream.clockRate / 1000)) +
(stream.clockRate / 2);
/* timeRef is the PTS for the frame from the RTP header */
m_TimeGap = stream.timeRef;
m_StartStream = 1;
difTimeRef = 1;
m_PrevPTS = 0;
m_PrevSysTime = timeGetTime();
dbg_log(
TEXT("MY:StartStream %lld: %lld: %lld\n"),
sys_time,
m_RefGap,
m_TimeGap
);
}
else
{
m_StartStream++;
}
difTimeRef = stream.timeRef - m_PrevTimeRef;
m_PrevTimeRef = stream.timeRef;
/* Difference in 90 Khz clocking */
ts = stream.timeRef - m_TimeGap + m_RefGap;
ts = convert_to_dshow_timestamp(ts, stream.clockRate);
if (discont != 0)
{
dbg_log(TEXT("MY:VDISC TRUE\n"));
pSamp->SetDiscontinuity(TRUE);
}
else
{
pSamp->SetDiscontinuity(FALSE);
pSamp->SetSyncPoint(TRUE);
}
difPts = ts - m_PrevPTS;
df = ts + 1;
m_PrevPTS = ts;
dbg_log(
TEXT("MY:T %lld: %lld = %lld: %d: %lld\n"),
ts,
m_OtherSide->m_PrevPTS,
stream.timeRef,
(timeGetTime() - m_PrevSysTime),
difPts
);
pSamp->SetTime(&ts, &df);
m_PrevSysTime = timeGetTime();
}
else
{
Sleep(10);
}
}
else
{
dbg_log(TEXT("MY: Fill FAIL\n"));
hr = E_FAIL;
}
}
else if (m_iPinType == 2)
{
/* Pin Type 2 is audio AAC Access units, with ADTS headers */
retVal = DeviceIoControl(
ghMHTune,
IOCTL_MHTUNE_RAUDIO_STREAM,
NULL,
0,
&stream,
sizeof(pipeStream),
&returnBytes,
NULL
);
if (retVal == TRUE)
{
/* Get the data */
/* Check for the first of the stream, if so, set the start time */
hr = S_OK;
if (returnBytes > 0)
{
discont = stream.discont;
if ((m_StartStream == 0) || (discont != 0))
{
sys_time = timeGetTime() - m_ClockStartTime;
m_RefGap = (sys_time * (stream.clockRate / 1000)) +
(stream.clockRate / 2);
/* Mark the first PTS from stream. This PTS is from the
* RTP header, and is usually clocked differently than the
* video clock.
*/
m_TimeGap = stream.timeRef;
m_StartStream = 1;
difTimeRef = 1;
m_PrevPTS = 0;
m_PrevSysTime = timeGetTime();
dbg_log(
TEXT("MY:AStartStream %lld: %lld: %lld\n"),
sys_time,
m_RefGap,
m_TimeGap
);
}
/* Let the video side stream in first before letting audio
* start to flow.
*/
if (m_OtherSide->m_StartStream < 32)
{
pSamp->SetActualDataLength(0);
Sleep(10);
return hr;
}
else
{
pSamp->SetActualDataLength(returnBytes);
}
difTimeRef = stream.timeRef - m_PrevTimeRef;
m_PrevTimeRef = stream.timeRef;
if (discont != 0)
{
dbg_log(TEXT("MY:ADISC TRUE\n"));
pSamp->SetDiscontinuity(TRUE);
}
else
{
pSamp->SetDiscontinuity(FALSE);
pSamp->SetSyncPoint(TRUE);
}
/* Difference in Audio PTS clock, TESTING AT 55.2 Khz */
ts = stream.timeRef - m_TimeGap + m_RefGap;
ts = convert_to_dshow_timestamp(ts, stream.clockRate);
difPts = ts - m_PrevPTS;
df = ts + 1;
m_PrevPTS = ts;
dbg_log(
TEXT("MY:AT %lld = %lld: %d: %lld\n"),
ts,
stream.timeRef,
(timeGetTime() - m_PrevSysTime),
difPts
);
pSamp->SetTime(&ts, &df);
m_PrevSysTime = timeGetTime();
}
else
{
pSamp->SetActualDataLength(0);
Sleep(10);
}
}
}
return hr;
} / *代码结束* /
我尝试通过简单地添加(90000 * 10)来调整视频PTS,以查看视频是否会远远超过音频,但事实并非如此。视频仍会将音频跟踪2秒或更长时间。我真的不明白为什么这不起作用。每个视频帧应该提前10秒。这不正确吗?
他们的主要问题是,算法听起来基本上是什么?他们似乎可以独立运行视频/音频。
源过滤器不是推送过滤器,我不确定这是否会产生影响。我没有遇到解码器与广播输入不同步的问题。
非常感谢。
答案 0 :(得分:3)
实际上我找出了问题,其中有两个问题。
第一个是对SPS H.264帧的糟糕工作。当解码器启动时,它会丢弃每一帧,直到它找到SPS帧。该流以每秒15帧的速度编码。这会使时间失效,因为解码器将在不到10ms的时间内消耗高达一秒的视频。之后呈现的每一帧都被认为是迟到的,它会尝试快进帧以赶上。作为一个实时源,它将再次耗尽帧。解决方法放在我前面的代码中,以确保有至少32帧的缓冲区,大约2秒。
第二个问题确实围绕着问题的根源。我使用RTP标头中的PTS作为时间参考。虽然这可以在单独的音频和/或视频情况下工作,但是不能保证视频RTP PTS将匹配相应的音频RTP PTS,并且通常不会。因此,根据规范使用RTCP NTP时间,根据规范:
PTS = RTCP_SR_NTP_timestamp + (RTP_timestamp - RTCP_SR_RTP_timestamp) / media_clock_rate
这允许我将实际视频PTS与相应的音频PTS相匹配。