编辑

Question

我生成了8 bar mp3，120bpm，每个四分音符上有1 / 16th音符。因此模式是

x---x---x---x---。（x =注释，-没什么）

我将整个文件分成128个相等的段：filesize/(8*16)，没有id3标签；

const size = fs.statSync(src).size,
const noteLength = size/128;
const start = notePosition * noteLength / size ; // notePosition: 0-127
const end = (notePosition + 1) * noteLength / size;

... 拆分部分来自mp3-cutter软件包：

        var offsetBuffer = Buffer.alloc(options.offset);
        fs.readSync(options.fd, offsetBuffer, 0, offsetBuffer.length, options.offset);
        if (options.target) { // if option target write to file
            fs.writeFileSync(options.target, offsetBuffer);
            var audioBuffer = Buffer.alloc(end - start);
            fs.readSync(options.fd, audioBuffer, 0, audioBuffer.length, parseInt(start + options.offset));
            fs.writeFileSync(options.target, audioBuffer);
        }

几乎可以正常工作，我可以在文件1,5,9 ...中听到鼓声，在某些文件中有点浅。但是所产生的声音在这些位置上具有相同的鼓，因此应该没有差异。

文件长16s（8bars * 4 /（120 * 60））。因此每个分割为125ms，一帧为1152/41000 * 1000 = 28.0976ms

const frameLength = 1152/41000 * 1000;
const sliceLength = 16/128 * 1000;
const slices = [];
for(let i = 0; i< 128; i++) {
    slices.push(sliceLength * i/frameLength);

}
console.table(slices);

我想知道是否通过在任意点而不是帧边界分割mp3来获得此错误。

我如何正确分割mp3，我是否需要从所有帧中提取音频信息，然后将其拆分，然后创建新帧？我该如何处理最后一帧如果我的音频信息是10毫秒，帧是26毫秒？

PS：对我来说，减少添加/删除任何信息非常重要，因为我想使用切片的信息来提供机器学习应用程序。

编辑

这是该库的更新版本，但目前只有随机噪音。大多数帧的帧大小为418（可能是字节）。我想知道随机噪声是否是因为我没有考虑填充位。

从帧中提取音频信息并复制一帧

class Duration {
    /**
     * Returns the duration of an mp3 file.
     *
     * @param {String} filename
     * @returns {{duration:Number, offset:Number}}
     */
    static getDuration(filename, options) {
        options = Object.assign({
            bqm: 120,
            noteResolution: 16
        }, options);
        let audioBuffer = null;
        let frameHeader = null;
        let frameSize = null;
        var fd = fs.openSync(filename, 'r'),
            buffer = Buffer.alloc(100), // What happens here why 100
            block = fs.readSync(fd, buffer, 0, 100, 0), // 1st block read in
            stat = fs.statSync(filename),
            duration = 0,
            countNotes = 0,
            frames = 0,
            _offset = 0;

        try {
            calculateDuration: {
                if (block < 100) {
                    break calculateDuration;
                }
                var offset = _offset = this.skipID3v2Tag(buffer);
                while (offset < stat.size) {
                    debugDurationVerbose("\noffset < stat.size", offset < stat.size, "offset", offset , "size", stat.size);
                    // start reading at current offset
                    block = fs.readSync(fd, buffer, 0, 10, offset);

                    if (block < 10) {
                        debugDurationVerbose('break < 10');
                        break calculateDuration;
                    } else if (buffer[0] == 255 && (buffer[1] & 224) == 224) { // what is THIS
                        var info = this.parseFrameHeader(buffer);
                        if (!info.frameSize || isNaN(info.frameSize) || !info.samples || isNaN(info.samples)) {
                            offset += 1;
                        } else {
                            // save the extracted audio info without frames
                            // frame header is 32bits, 1byte = 8 bits => 32/8 = 4 Bytes
                            const mp3HeaderSize = 4;
                            const audioBufferTemp = Buffer.alloc(info.frameSize - mp3HeaderSize);
                            frameHeader = Buffer.alloc(mp3HeaderSize);
                            frameSize = info.frameSize; // bytes
                            // read the frame header
                            fs.readSync(fd, frameHeader, 0, mp3HeaderSize, offset);
                            // read the audio info and concat it to previous audio info
                            fs.readSync(fd, audioBufferTemp, 0, frameSize - mp3HeaderSize, offset + mp3HeaderSize);
                            if (audioBuffer) {
                                audioBuffer = Buffer.concat([audioBuffer, audioBufferTemp]);
                            } else {
                                audioBuffer = audioBufferTemp;
                            }

                            frames++;
                            offset += info.frameSize;
                            duration += (info.samples / info.sampleRate);
                            // e.g. (1152 samples / 44100 HZ) * 1000 = 26,122449 ms per frame

                            debugDurationVerbose('duration', duration);
                            debugDurationVerbose('info.samples', info.samples, 'info.sampleRate', info.sampleRate, 'frame time ms', info.samples/ info.sampleRate * 1000, "frameSize byte", frameSize);
                            debugDurationVerbose("audioBuffer.length", audioBuffer.length, "frame", frames);
                        }
                    } else if (buffer[0] === 84 && buffer[1] === 65 && buffer[2] === 71) { // 'TAG'
                        debugDurationVerbose('+128');
                        offset += 128;
                    } else {  // is this random
                        debugDurationVerbose('+1');
                        offset += 1;
                    }
                }
            }
        } catch (e) {
            console.error(e);
        } finally {
            fs.closeSync(fd);
        }

        // convert duration from seconds to not length e.g. 16th
        duration -= 2* 1152 / 44100; // TODO: 1) duration too long
        const beatsPerSecond = options.bqm / 60;
        const beats = duration * beatsPerSecond;
        console.warn('\n\nbeats', beats);
        countNotes = beats * options.noteResolution / 4; // bqm: 1 quarter note is one beat, in a 16th grid its 4 16th per beat
        debugDuration('countNotes before', countNotes, "duration", duration, "beatsPerSecond", beatsPerSecond);
        countNotes = Math.floor(countNotes); // TODO: account for 1), rm
        console.warn('Math.floor countNotes');
        debugDuration('countNotes %s expected %s after', countNotes, options.expected);
        return {duration: parseFloat(duration.toFixed(2)), offset: _offset, countNotes, audioBuffer, frameHeader, frameSize};
    }

    /**
     * http://id3.org/ID3v2Easy
     *
     * @param {Buffer} buffer
     * @returns {Number}
     */
    static skipID3v2Tag(buffer) {
        if (buffer[0] == 73 && buffer[1] == 68 && buffer[2] == 51) { // ID3
            var z0 = buffer[6],
                z1 = buffer[7],
                z2 = buffer[8],
                z3 = buffer[9];

            if ((z0 & 128) == 0 && (z1 & 128) == 0 && (z2 & 128) == 0 && (z3 & 128) == 0) {
                var headerSize = 10,
                    tagSize = ((z0 & 127) * 2097152) + ((z1 & 127) * 16384) + ((z2 & 128) * 128) + (z3 & 128),
                    footerSize = (buffer[5] & 16) ? 10 : 0;
                return headerSize + tagSize + footerSize;
            }
        }
        return 0;
    }

    /**
     * Parses the frame header of a buffer.
     *
     * @param {Buffer} buffer
     * @returns {sampleRate:Number, samples:Object, frameSize:Number}
     */
    static parseFrameHeader(buffer) {
        var b1 = buffer[1],
            b2 = buffer[2],
            versionBits = (b1 & 24) >> 3,
            version = versions[versionBits],
            simpleVersion = (version == '2.5') ? 2 : version,
            layerBits = (b1 & 6) >> 1,
            layer = layers[layerBits],
            bitRateKey = `V${simpleVersion}L${layer}`,
            bitRateIdx = (b2 & 240) >> 4,
            bitRate = bitRates[bitRateKey][bitRateIdx] || 0,
            sampleRateIdx = (b2 & 12) >> 2,
            sampleRate = sampleRates[version][sampleRateIdx] || 0,
            $samples = samples[simpleVersion][layer],
            paddingBit = (b2 & 2) >> 1,
            frameSize = this.getFrameSize(layer, bitRate, sampleRate, paddingBit);
        debugFrameHeader("b1 %s b2 %s versionBits %s version %s simpleVersion %s layerBits %s layer %s bitRateKey %s bitRateIdx %s bitRate %s sampleRateIdx %s sampleRate %s $samples %s paddingBit %s frameSize %s", b1, b2, versionBits, version, simpleVersion, layerBits, layer, bitRateKey, bitRateIdx, bitRate, sampleRateIdx, sampleRate, $samples, paddingBit, frameSize);

        return {
            sampleRate,
            samples: $samples,
            frameSize,
            paddingBit
        };
    }

    /**
     * Returns the frame size.
     *
     * @param {String} layer
     * @param {Number} bitRate
     * @param {Number} sampleRate
     * @param {Number} paddingBit
     * @returns {Number}
     */
    static getFrameSize(layer, bitRate, sampleRate, paddingBit) {
        if (layer == 1) {
            return parseInt(((12 * bitRate * 1000 / sampleRate) + paddingBit) * 4);
        } else {
            return parseInt(((144 * bitRate * 1000) / sampleRate) + paddingBit);
        }
    }
}

切片

class MP3Cutter {
    static cutByNotes2(options = {}) {
        totalFrames = 0
        const src = options.src,
            {offset, countNotes, audioBuffer, frameHeader, frameSize} = Duration.getDuration(src, options),
            size = audioBuffer.length,
            valuePerNote = size / countNotes,
            audioSize = (frameSize - frameHeader.length);
        let notePosition = options.startNote || 0;

        debugCutter("offset", offset, "size in bytes", size, "options.valuePerNote", valuePerNote);
        debugCutter("countNotes", countNotes);
        debugCutter("size", size);

        while (notePosition < countNotes) {
            const optionsCut = _.clone(options);
            Object.assign(optionsCut, {
                target:  options.target + '_' + ( notePosition + 1 ) + '.mp3',
                audioBuffer,
                offset: offset,
                startNote: notePosition,
                endNote: notePosition + 1,
                valuePerNote,
                frameHeader,
                frameSize: 418, // TODO: rm hard coded frame size varies between 417-418 in samples
                audioSize
            });

            this.cutByNote2(optionsCut);
            notePosition++;
        }
        debugCutter('totalFrames', totalFrames);

    };
    static cutByNote2(optionsCut = {}){
        // cut
        const start = optionsCut.startNote * optionsCut.valuePerNote;
        const end = (optionsCut.startNote + 1) * optionsCut.valuePerNote;
        const noteSliceAudio = optionsCut.audioBuffer.slice(start, end);

        // go through the sliced audio and divide it into single frames
        let frameNr = 0;
        const frames = [];
        while (frameNr * (optionsCut.frameSize - MP3_HEADER_SIZE) < noteSliceAudio.length) { // TODO: check if the last frame is missing!, 4 Bytes header
            // debugCutter('frameNr', frameNr);
            const audioPart = noteSliceAudio.slice(frameNr * optionsCut.audioSize, (frameNr + 1) * optionsCut.audioSize);
            frames.push(Buffer.concat([optionsCut.frameHeader, audioPart]));
            frameNr++;
        }
        totalFrames += frameNr;
        // concat all frames from the slice
        const slicedAudioWithHeaders = Buffer.concat(frames);
        // save the sliced audio
        fs.writeFileSync(optionsCut.target, slicedAudioWithHeaders);
    };
}

均分Mp3

编辑

从帧中提取音频信息并复制一帧

切片

0 个答案: