我生成了8 bar mp3,120bpm,每个四分音符上有1 / 16th音符。因此模式是
x---x---x---x---
。 (x =注释,-没什么)
我将整个文件分成128个相等的段:filesize/(8*16)
,没有id3标签;
const size = fs.statSync(src).size,
const noteLength = size/128;
const start = notePosition * noteLength / size ; // notePosition: 0-127
const end = (notePosition + 1) * noteLength / size;
... 拆分部分来自mp3-cutter软件包:
var offsetBuffer = Buffer.alloc(options.offset);
fs.readSync(options.fd, offsetBuffer, 0, offsetBuffer.length, options.offset);
if (options.target) { // if option target write to file
fs.writeFileSync(options.target, offsetBuffer);
var audioBuffer = Buffer.alloc(end - start);
fs.readSync(options.fd, audioBuffer, 0, audioBuffer.length, parseInt(start + options.offset));
fs.writeFileSync(options.target, audioBuffer);
}
几乎可以正常工作,我可以在文件1,5,9 ...中听到鼓声,在某些文件中有点浅。但是所产生的声音在这些位置上具有相同的鼓,因此应该没有差异。
文件长16s(8bars * 4 /(120 * 60))。因此每个分割为125ms,一帧为1152/41000 * 1000 = 28.0976ms
const frameLength = 1152/41000 * 1000;
const sliceLength = 16/128 * 1000;
const slices = [];
for(let i = 0; i< 128; i++) {
slices.push(sliceLength * i/frameLength);
}
console.table(slices);
我想知道是否通过在任意点而不是帧边界分割mp3来获得此错误。
我如何正确分割mp3,我是否需要从所有帧中提取音频信息,然后将其拆分,然后创建新帧?我该如何处理最后一帧如果我的音频信息是10毫秒,帧是26毫秒?
PS: 对我来说,减少添加/删除任何信息非常重要,因为我想使用切片的信息来提供机器学习应用程序。
这是该库的更新版本,但目前只有随机噪音。大多数帧的帧大小为418(可能是字节)。我想知道随机噪声是否是因为我没有考虑填充位。
class Duration {
/**
* Returns the duration of an mp3 file.
*
* @param {String} filename
* @returns {{duration:Number, offset:Number}}
*/
static getDuration(filename, options) {
options = Object.assign({
bqm: 120,
noteResolution: 16
}, options);
let audioBuffer = null;
let frameHeader = null;
let frameSize = null;
var fd = fs.openSync(filename, 'r'),
buffer = Buffer.alloc(100), // What happens here why 100
block = fs.readSync(fd, buffer, 0, 100, 0), // 1st block read in
stat = fs.statSync(filename),
duration = 0,
countNotes = 0,
frames = 0,
_offset = 0;
try {
calculateDuration: {
if (block < 100) {
break calculateDuration;
}
var offset = _offset = this.skipID3v2Tag(buffer);
while (offset < stat.size) {
debugDurationVerbose("\noffset < stat.size", offset < stat.size, "offset", offset , "size", stat.size);
// start reading at current offset
block = fs.readSync(fd, buffer, 0, 10, offset);
if (block < 10) {
debugDurationVerbose('break < 10');
break calculateDuration;
} else if (buffer[0] == 255 && (buffer[1] & 224) == 224) { // what is THIS
var info = this.parseFrameHeader(buffer);
if (!info.frameSize || isNaN(info.frameSize) || !info.samples || isNaN(info.samples)) {
offset += 1;
} else {
// save the extracted audio info without frames
// frame header is 32bits, 1byte = 8 bits => 32/8 = 4 Bytes
const mp3HeaderSize = 4;
const audioBufferTemp = Buffer.alloc(info.frameSize - mp3HeaderSize);
frameHeader = Buffer.alloc(mp3HeaderSize);
frameSize = info.frameSize; // bytes
// read the frame header
fs.readSync(fd, frameHeader, 0, mp3HeaderSize, offset);
// read the audio info and concat it to previous audio info
fs.readSync(fd, audioBufferTemp, 0, frameSize - mp3HeaderSize, offset + mp3HeaderSize);
if (audioBuffer) {
audioBuffer = Buffer.concat([audioBuffer, audioBufferTemp]);
} else {
audioBuffer = audioBufferTemp;
}
frames++;
offset += info.frameSize;
duration += (info.samples / info.sampleRate);
// e.g. (1152 samples / 44100 HZ) * 1000 = 26,122449 ms per frame
debugDurationVerbose('duration', duration);
debugDurationVerbose('info.samples', info.samples, 'info.sampleRate', info.sampleRate, 'frame time ms', info.samples/ info.sampleRate * 1000, "frameSize byte", frameSize);
debugDurationVerbose("audioBuffer.length", audioBuffer.length, "frame", frames);
}
} else if (buffer[0] === 84 && buffer[1] === 65 && buffer[2] === 71) { // 'TAG'
debugDurationVerbose('+128');
offset += 128;
} else { // is this random
debugDurationVerbose('+1');
offset += 1;
}
}
}
} catch (e) {
console.error(e);
} finally {
fs.closeSync(fd);
}
// convert duration from seconds to not length e.g. 16th
duration -= 2* 1152 / 44100; // TODO: 1) duration too long
const beatsPerSecond = options.bqm / 60;
const beats = duration * beatsPerSecond;
console.warn('\n\nbeats', beats);
countNotes = beats * options.noteResolution / 4; // bqm: 1 quarter note is one beat, in a 16th grid its 4 16th per beat
debugDuration('countNotes before', countNotes, "duration", duration, "beatsPerSecond", beatsPerSecond);
countNotes = Math.floor(countNotes); // TODO: account for 1), rm
console.warn('Math.floor countNotes');
debugDuration('countNotes %s expected %s after', countNotes, options.expected);
return {duration: parseFloat(duration.toFixed(2)), offset: _offset, countNotes, audioBuffer, frameHeader, frameSize};
}
/**
* http://id3.org/ID3v2Easy
*
* @param {Buffer} buffer
* @returns {Number}
*/
static skipID3v2Tag(buffer) {
if (buffer[0] == 73 && buffer[1] == 68 && buffer[2] == 51) { // ID3
var z0 = buffer[6],
z1 = buffer[7],
z2 = buffer[8],
z3 = buffer[9];
if ((z0 & 128) == 0 && (z1 & 128) == 0 && (z2 & 128) == 0 && (z3 & 128) == 0) {
var headerSize = 10,
tagSize = ((z0 & 127) * 2097152) + ((z1 & 127) * 16384) + ((z2 & 128) * 128) + (z3 & 128),
footerSize = (buffer[5] & 16) ? 10 : 0;
return headerSize + tagSize + footerSize;
}
}
return 0;
}
/**
* Parses the frame header of a buffer.
*
* @param {Buffer} buffer
* @returns {sampleRate:Number, samples:Object, frameSize:Number}
*/
static parseFrameHeader(buffer) {
var b1 = buffer[1],
b2 = buffer[2],
versionBits = (b1 & 24) >> 3,
version = versions[versionBits],
simpleVersion = (version == '2.5') ? 2 : version,
layerBits = (b1 & 6) >> 1,
layer = layers[layerBits],
bitRateKey = `V${simpleVersion}L${layer}`,
bitRateIdx = (b2 & 240) >> 4,
bitRate = bitRates[bitRateKey][bitRateIdx] || 0,
sampleRateIdx = (b2 & 12) >> 2,
sampleRate = sampleRates[version][sampleRateIdx] || 0,
$samples = samples[simpleVersion][layer],
paddingBit = (b2 & 2) >> 1,
frameSize = this.getFrameSize(layer, bitRate, sampleRate, paddingBit);
debugFrameHeader("b1 %s b2 %s versionBits %s version %s simpleVersion %s layerBits %s layer %s bitRateKey %s bitRateIdx %s bitRate %s sampleRateIdx %s sampleRate %s $samples %s paddingBit %s frameSize %s", b1, b2, versionBits, version, simpleVersion, layerBits, layer, bitRateKey, bitRateIdx, bitRate, sampleRateIdx, sampleRate, $samples, paddingBit, frameSize);
return {
sampleRate,
samples: $samples,
frameSize,
paddingBit
};
}
/**
* Returns the frame size.
*
* @param {String} layer
* @param {Number} bitRate
* @param {Number} sampleRate
* @param {Number} paddingBit
* @returns {Number}
*/
static getFrameSize(layer, bitRate, sampleRate, paddingBit) {
if (layer == 1) {
return parseInt(((12 * bitRate * 1000 / sampleRate) + paddingBit) * 4);
} else {
return parseInt(((144 * bitRate * 1000) / sampleRate) + paddingBit);
}
}
}
class MP3Cutter {
static cutByNotes2(options = {}) {
totalFrames = 0
const src = options.src,
{offset, countNotes, audioBuffer, frameHeader, frameSize} = Duration.getDuration(src, options),
size = audioBuffer.length,
valuePerNote = size / countNotes,
audioSize = (frameSize - frameHeader.length);
let notePosition = options.startNote || 0;
debugCutter("offset", offset, "size in bytes", size, "options.valuePerNote", valuePerNote);
debugCutter("countNotes", countNotes);
debugCutter("size", size);
while (notePosition < countNotes) {
const optionsCut = _.clone(options);
Object.assign(optionsCut, {
target: options.target + '_' + ( notePosition + 1 ) + '.mp3',
audioBuffer,
offset: offset,
startNote: notePosition,
endNote: notePosition + 1,
valuePerNote,
frameHeader,
frameSize: 418, // TODO: rm hard coded frame size varies between 417-418 in samples
audioSize
});
this.cutByNote2(optionsCut);
notePosition++;
}
debugCutter('totalFrames', totalFrames);
};
static cutByNote2(optionsCut = {}){
// cut
const start = optionsCut.startNote * optionsCut.valuePerNote;
const end = (optionsCut.startNote + 1) * optionsCut.valuePerNote;
const noteSliceAudio = optionsCut.audioBuffer.slice(start, end);
// go through the sliced audio and divide it into single frames
let frameNr = 0;
const frames = [];
while (frameNr * (optionsCut.frameSize - MP3_HEADER_SIZE) < noteSliceAudio.length) { // TODO: check if the last frame is missing!, 4 Bytes header
// debugCutter('frameNr', frameNr);
const audioPart = noteSliceAudio.slice(frameNr * optionsCut.audioSize, (frameNr + 1) * optionsCut.audioSize);
frames.push(Buffer.concat([optionsCut.frameHeader, audioPart]));
frameNr++;
}
totalFrames += frameNr;
// concat all frames from the slice
const slicedAudioWithHeaders = Buffer.concat(frames);
// save the sliced audio
fs.writeFileSync(optionsCut.target, slicedAudioWithHeaders);
};
}