我有2个立体声ADPCM波形。第一个是原始波形,第二个是原始波形的10秒修正 我需要找到第一个剪辑中第二个剪辑开始的时间戳(以毫秒为单位)。
我做了一些研究,发现FFT是要走的路。然而,我对DSP非常陌生,所以我希望有人能指出我正确的方向。
我使用NAudio 1.7.3
将波形转换为PCM,AForge 2.2.5
执行FFT。
这是我到目前为止所做的:
// ADPCM -> PCM -> Wave32
// 44.1khz 16bits 2ch
using (var song = new WaveFileReader(songFile))
using (var songuncompressed = WaveFormatConversionStream.CreatePcmStream(song))
using (var songwav = new WaveChannel32(songuncompressed))
using (var preview = new WaveFileReader(previewFile))
using (var previewuncompressed = WaveFormatConversionStream.CreatePcmStream(preview))
using (var prevwav = new WaveChannel32(previewuncompressed))
{
// Read files into buffer
var prebuffer = new byte[prevwav.Length];
var songbuffer = new byte[songwav.Length];
prevwav.Read(prebuffer, 0, (int)prevwav.Length);
songwav.Read(songbuffer, 0, (int)songwav.Length);
// byte[] -> float[]
float[] prefloats = new float[prebuffer.Length / sizeof(float)];
float[] songfloats = new float[songbuffer.Length / sizeof(float)];
Buffer.BlockCopy(prebuffer, 0, prefloats, 0, prebuffer.Length);
Buffer.BlockCopy(songbuffer, 0, songfloats, 0, songbuffer.Length);
int FFTSize = 1024;
int increment = prefloats.Length / FFTSize; // Not sure if this is correct?
double step = songwav.Length / songwav.TotalTime.TotalMilliseconds / 4; //float per millisecond
double smallest = double.MaxValue;
int millisecond = 0;
var presample = new Complex[FFTSize];
var songsample = new Complex[FFTSize];
// Preview data
for (int a = 0; a < FFTSize; a++)
presample[a] = new Complex(prefloats[a * increment], 0);
FourierTransform.FFT(presample, FourierTransform.Direction.Forward);
// Sliding fft (1ms per step)
for (double pos=0; pos < (double)songfloats.Length; pos += step)
{
int offset = (int)pos;
// Song data
for (int b = 0; b < FFTSize; b++)
{
// Check for overflow
// Fill remaining samples with 0 if overflow
int idx = Math.Min(b * increment + offset, songfloats.Length);
if(idx != songfloats.Length)
songsample[b] = new Complex(songfloats[idx], 0);
else
songsample[b] = new Complex(0, 0);
}
FourierTransform.FFT(songsample, FourierTransform.Direction.Forward);
// Simple comparison through magnitude difference
// Not sure if it is the right way to compare results
double similarity = 0;
for(int i =0; i < FFTSize; i++)
{
similarity += Math.Abs(presample[i].Magnitude - songsample[i].Magnitude);
}
if(similarity < smallest)
{
smallest = similarity;
int bytes = (int)pos * 4; // bytes position
// bytes / bytesPerMS for actual millisecond
millisecond = bytes / (int)(songwav.Length / songwav.TotalTime.TotalMilliseconds);
}
}
// Result is +- 200ms
return millisecond;
}
我不太确定的3件事: