我需要从文件中提取音频电平表,以便可以在播放音频之前渲染电平。我知道AVAudioPlayer
在通过
func averagePower(forChannel channelNumber: Int) -> Float.
但是对于我来说,我想预先获得[Float]
的电表级别。
答案 0 :(得分:9)
首先,这是繁重的操作,因此要花费一些OS时间和资源来完成此操作。在下面的示例中,我将使用标准帧速率和采样,但是如果您仅想显示条形作为指示,那么您实际上应该采样得多
确定,因此您无需播放声音即可对其进行分析。因此,在此我完全不会使用AVAudioPlayer
,我假设我会以URL
的身份参加比赛:
let path = Bundle.main.path(forResource: "example3.mp3", ofType:nil)!
let url = URL(fileURLWithPath: path)
然后,我将使用AVAudioFile将曲目信息获取到AVAudioPCMBuffer中。每当您将其保存在缓冲区中时,您都可以获得有关曲目的所有信息:
func buffer(url: URL) {
do {
let track = try AVAudioFile(forReading: url)
let format = AVAudioFormat(commonFormat:.pcmFormatFloat32, sampleRate:track.fileFormat.sampleRate, channels: track.fileFormat.channelCount, interleaved: false)
let buffer = AVAudioPCMBuffer(pcmFormat: format!, frameCapacity: UInt32(track.length))!
try track.read(into : buffer, frameCount:UInt32(track.length))
self.analyze(buffer: buffer)
} catch {
print(error)
}
}
您可能会注意到有一种analyze
方法。您的缓冲区中的变量应该接近floatChannelData。这是一个纯数据,因此您需要对其进行解析。我将发布一种方法,并在下面对此进行说明:
func analyze(buffer: AVAudioPCMBuffer) {
let channelCount = Int(buffer.format.channelCount)
let frameLength = Int(buffer.frameLength)
var result = Array(repeating: [Float](repeatElement(0, count: frameLength)), count: channelCount)
for channel in 0..<channelCount {
for sampleIndex in 0..<frameLength {
let sqrtV = sqrt(buffer.floatChannelData![channel][sampleIndex*buffer.stride]/Float(buffer.frameLength))
let dbPower = 20 * log10(sqrtV)
result[channel][sampleIndex] = dbPower
}
}
}
其中涉及一些计算(较重的计算)。几个月前,当我研究类似的解决方案时,我遇到了本教程:https://www.raywenderlich.com/5154-avaudioengine-tutorial-for-ios-getting-started,那里有关于此计算的出色解释,还有我上面粘贴并在项目中使用的部分代码,所以我想在这里注明作者:Scott McAlister
答案 1 :(得分:7)
安装在iPhone上:
0.538s 来处理8MByte
持续时间和4min47s
采样率
44,100
mp3播放器
0.170s 处理一个712KByte
持续时间和22s
采样率的44,100
mp3播放器
0.089s 处理caf
文件,该文件是通过在终端中使用此命令afconvert -f caff -d LEI16 audio.mp3 audio.caf
转换上面的文件而创建的。
让我们开始吧:
A)声明将要包含有关音频资产的必要信息的此类:
/// Holds audio information used for building waveforms
final class AudioContext {
/// The audio asset URL used to load the context
public let audioURL: URL
/// Total number of samples in loaded asset
public let totalSamples: Int
/// Loaded asset
public let asset: AVAsset
// Loaded assetTrack
public let assetTrack: AVAssetTrack
private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
self.audioURL = audioURL
self.totalSamples = totalSamples
self.asset = asset
self.assetTrack = assetTrack
}
public static func load(fromAudioURL audioURL: URL, completionHandler: @escaping (_ audioContext: AudioContext?) -> ()) {
let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
fatalError("Couldn't load AVAssetTrack")
}
asset.loadValuesAsynchronously(forKeys: ["duration"]) {
var error: NSError?
let status = asset.statusOfValue(forKey: "duration", error: &error)
switch status {
case .loaded:
guard
let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
let audioFormatDesc = formatDescriptions.first,
let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
else { break }
let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
completionHandler(audioContext)
return
case .failed, .cancelled, .loading, .unknown:
print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
}
completionHandler(nil)
}
}
}
我们将使用其异步函数load
,并将其结果处理给完成处理程序。
B)在视图控制器中导入AVFoundation
和Accelerate
:
import AVFoundation
import Accelerate
C)声明视图控制器中的噪声级别(以dB为单位):
let noiseFloor: Float = -80
例如,小于-80dB
的任何内容都将被视为沉默。
D)以下功能采用音频环境并产生所需的dB功率。 targetSamples
默认设置为100,您可以更改它以满足您的UI需求:
func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples/3
guard let reader = try? AVAssetReader(asset: audioContext.asset)
else {
fatalError("Couldn't initialize the AVAssetReader")
}
reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
let outputSettingsDict: [String : Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsBigEndianKey: false,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsNonInterleaved: false
]
let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
outputSettings: outputSettingsDict)
readerOutput.alwaysCopiesSampleData = false
reader.add(readerOutput)
var channelCount = 1
let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]
for item in formatDescriptions {
guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {
fatalError("Couldn't get the format description")
}
channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)
}
let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples)
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
var outputSamples = [Float]()
var sampleBuffer = Data()
// 16-bit samples
reader.startReading()
defer { reader.cancelReading() }
while reader.status == .reading {
guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
break
}
// Append audio sample buffer into our current sample buffer
var readBufferLength = 0
var readBufferPointer: UnsafeMutablePointer<Int8>?
CMBlockBufferGetDataPointer(readBuffer, 0, &readBufferLength, nil, &readBufferPointer)
sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
CMSampleBufferInvalidate(readSampleBuffer)
let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
let downSampledLength = totalSamples / samplesPerPixel
let samplesToProcess = downSampledLength * samplesPerPixel
guard samplesToProcess > 0 else { continue }
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// Process the remaining samples at the end which didn't fit into samplesPerPixel
let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
if samplesToProcess > 0 {
let downSampledLength = 1
let samplesPerPixel = samplesToProcess
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
guard reader.status == .completed || true else {
fatalError("Couldn't read the audio file")
}
return outputSamples
}
E) render
使用此功能对音频文件中的数据进行降采样,并转换为分贝:
func processSamples(fromData sampleBuffer: inout Data,
outputSamples: inout [Float],
samplesToProcess: Int,
downSampledLength: Int,
samplesPerPixel: Int,
filter: [Float]) {
sampleBuffer.withUnsafeBytes { (samples: UnsafePointer<Int16>) in
var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)
let sampleCount = vDSP_Length(samplesToProcess)
//Convert 16bit int samples to floats
vDSP_vflt16(samples, 1, &processingBuffer, 1, sampleCount)
//Take the absolute values to get amplitude
vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)
//get the corresponding dB, and clip the results
getdB(from: &processingBuffer)
//Downsample and average
var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
vDSP_desamp(processingBuffer,
vDSP_Stride(samplesPerPixel),
filter, &downSampledData,
vDSP_Length(downSampledLength),
vDSP_Length(samplesPerPixel))
//Remove processed samples
sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)
outputSamples += downSampledData
}
}
F)依次调用此函数以获取相应的dB,并将结果剪切到[noiseFloor, 0]
:
func getdB(from normalizedSamples: inout [Float]) {
// Convert samples to a log scale
var zero: Float = 32768.0
vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1)
//Clip to [noiseFloor, 0]
var ceil: Float = 0.0
var noiseFloorMutable = noiseFloor
vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count))
}
G)最后,您可以像这样获得音频的波形:
guard let path = Bundle.main.path(forResource: "audio", ofType:"mp3") else {
fatalError("Couldn't find the file path")
}
let url = URL(fileURLWithPath: path)
var outputArray : [Float] = []
AudioContext.load(fromAudioURL: url, completionHandler: { audioContext in
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
outputArray = self.render(audioContext: audioContext, targetSamples: 300)
})
请不要忘记AudioContext.load(fromAudioURL:)
是异步的。
此解决方案是由 William Entriken 从this repo合成的。一切归功于他。
以下是更新为Swift 5语法的相同代码:
import AVFoundation
import Accelerate
/// Holds audio information used for building waveforms
final class AudioContext {
/// The audio asset URL used to load the context
public let audioURL: URL
/// Total number of samples in loaded asset
public let totalSamples: Int
/// Loaded asset
public let asset: AVAsset
// Loaded assetTrack
public let assetTrack: AVAssetTrack
private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
self.audioURL = audioURL
self.totalSamples = totalSamples
self.asset = asset
self.assetTrack = assetTrack
}
public static func load(fromAudioURL audioURL: URL, completionHandler: @escaping (_ audioContext: AudioContext?) -> ()) {
let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
fatalError("Couldn't load AVAssetTrack")
}
asset.loadValuesAsynchronously(forKeys: ["duration"]) {
var error: NSError?
let status = asset.statusOfValue(forKey: "duration", error: &error)
switch status {
case .loaded:
guard
let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
let audioFormatDesc = formatDescriptions.first,
let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
else { break }
let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
completionHandler(audioContext)
return
case .failed, .cancelled, .loading, .unknown:
print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
}
completionHandler(nil)
}
}
}
let noiseFloor: Float = -80
func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples/3
guard let reader = try? AVAssetReader(asset: audioContext.asset)
else {
fatalError("Couldn't initialize the AVAssetReader")
}
reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
let outputSettingsDict: [String : Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsBigEndianKey: false,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsNonInterleaved: false
]
let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
outputSettings: outputSettingsDict)
readerOutput.alwaysCopiesSampleData = false
reader.add(readerOutput)
var channelCount = 1
let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]
for item in formatDescriptions {
guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {
fatalError("Couldn't get the format description")
}
channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)
}
let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples)
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
var outputSamples = [Float]()
var sampleBuffer = Data()
// 16-bit samples
reader.startReading()
defer { reader.cancelReading() }
while reader.status == .reading {
guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
break
}
// Append audio sample buffer into our current sample buffer
var readBufferLength = 0
var readBufferPointer: UnsafeMutablePointer<Int8>?
CMBlockBufferGetDataPointer(readBuffer,
atOffset: 0,
lengthAtOffsetOut: &readBufferLength,
totalLengthOut: nil,
dataPointerOut: &readBufferPointer)
sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
CMSampleBufferInvalidate(readSampleBuffer)
let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
let downSampledLength = totalSamples / samplesPerPixel
let samplesToProcess = downSampledLength * samplesPerPixel
guard samplesToProcess > 0 else { continue }
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// Process the remaining samples at the end which didn't fit into samplesPerPixel
let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
if samplesToProcess > 0 {
let downSampledLength = 1
let samplesPerPixel = samplesToProcess
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
guard reader.status == .completed || true else {
fatalError("Couldn't read the audio file")
}
return outputSamples
}
func processSamples(fromData sampleBuffer: inout Data,
outputSamples: inout [Float],
samplesToProcess: Int,
downSampledLength: Int,
samplesPerPixel: Int,
filter: [Float]) {
sampleBuffer.withUnsafeBytes { (samples: UnsafeRawBufferPointer) in
var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)
let sampleCount = vDSP_Length(samplesToProcess)
//Create an UnsafePointer<Int16> from samples
let unsafeBufferPointer = samples.bindMemory(to: Int16.self)
let unsafePointer = unsafeBufferPointer.baseAddress!
//Convert 16bit int samples to floats
vDSP_vflt16(unsafePointer, 1, &processingBuffer, 1, sampleCount)
//Take the absolute values to get amplitude
vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)
//get the corresponding dB, and clip the results
getdB(from: &processingBuffer)
//Downsample and average
var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
vDSP_desamp(processingBuffer,
vDSP_Stride(samplesPerPixel),
filter, &downSampledData,
vDSP_Length(downSampledLength),
vDSP_Length(samplesPerPixel))
//Remove processed samples
sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)
outputSamples += downSampledData
}
sampleBuffer.withUnsafeBytes { (samples: UnsafeRawBufferPointer) in
var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)
let sampleCount = vDSP_Length(samplesToProcess)
//Create an UnsafePointer<Int16> from samples
let unsafeBufferPointer = samples.bindMemory(to: Int16.self)
let unsafePointer = unsafeBufferPointer.baseAddress!
//Convert 16bit int samples to floats
vDSP_vflt16(unsafePointer, 1, &processingBuffer, 1, sampleCount)
//Take the absolute values to get amplitude
vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)
//get the corresponding dB, and clip the results
getdB(from: &processingBuffer)
//Downsample and average
var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
vDSP_desamp(processingBuffer,
vDSP_Stride(samplesPerPixel),
filter, &downSampledData,
vDSP_Length(downSampledLength),
vDSP_Length(samplesPerPixel))
//Remove processed samples
sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)
outputSamples += downSampledData
}
}
func getdB(from normalizedSamples: inout [Float]) {
// Convert samples to a log scale
var zero: Float = 32768.0
vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1)
//Clip to [noiseFloor, 0]
var ceil: Float = 0.0
var noiseFloorMutable = noiseFloor
vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count))
}
这是您可以用来在不播放音频文件的情况下预呈现其音频电平的功能:
func averagePowers(audioFileURL: URL, forChannel channelNumber: Int, completionHandler: @escaping(_ success: [Float]) -> ()) {
let audioFile = try! AVAudioFile(forReading: audioFileURL)
let audioFilePFormat = audioFile.processingFormat
let audioFileLength = audioFile.length
//Set the size of frames to read from the audio file, you can adjust this to your liking
let frameSizeToRead = Int(audioFilePFormat.sampleRate/20)
//This is to how many frames/portions we're going to divide the audio file
let numberOfFrames = Int(audioFileLength)/frameSizeToRead
//Create a pcm buffer the size of a frame
guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFilePFormat, frameCapacity: AVAudioFrameCount(frameSizeToRead)) else {
fatalError("Couldn't create the audio buffer")
}
//Do the calculations in a background thread, if you don't want to block the main thread for larger audio files
DispatchQueue.global(qos: .userInitiated).async {
//This is the array to be returned
var returnArray : [Float] = [Float]()
//We're going to read the audio file, frame by frame
for i in 0..<numberOfFrames {
//Change the position from which we are reading the audio file, since each frame starts from a different position in the audio file
audioFile.framePosition = AVAudioFramePosition(i * frameSizeToRead)
//Read the frame from the audio file
try! audioFile.read(into: audioBuffer, frameCount: AVAudioFrameCount(frameSizeToRead))
//Get the data from the chosen channel
let channelData = audioBuffer.floatChannelData![channelNumber]
//This is the array of floats
let arr = Array(UnsafeBufferPointer(start:channelData, count: frameSizeToRead))
//Calculate the mean value of the absolute values
let meanValue = arr.reduce(0, {$0 + abs($1)})/Float(arr.count)
//Calculate the dB power (You can adjust this), if average is less than 0.000_000_01 we limit it to -160.0
let dbPower: Float = meanValue > 0.000_000_01 ? 20 * log10(meanValue) : -160.0
//append the db power in the current frame to the returnArray
returnArray.append(dbPower)
}
//Return the dBPowers
completionHandler(returnArray)
}
}
您可以这样称呼它:
let path = Bundle.main.path(forResource: "audio.mp3", ofType:nil)!
let url = URL(fileURLWithPath: path)
averagePowers(audioFileURL: url, forChannel: 0, completionHandler: { array in
//Use the array
})
使用仪器,此解决方案在1.2秒内可以使用较高的CPU,使用returnArray
返回主线程大约需要5秒,而在低电量模式下则最多需要10秒。