我最初使用tensorflowlite SDK来做所有事情,但是我发现官方文档,示例和开源工作甚至都无法与Firebase SDK相提并论。另外,实际项目(当前正在测试环境中进行测试)已经使用Firebase SDK。无论如何,我能够获得某种形式的输出,但是我没有对图像进行适当的规范化,因此输出不符合预期,但至少存在某些问题。
从下面的代码中,您将看到我具有TensorFlowLite作为依赖项,但实际上我并未主动使用它。我有一个使用它的函数,但未调用该函数。 因此,基本上您可以忽略: parseOutputTensor,coordinateToIndex 和枚举:常量
import UIKit
import Firebase
import AVFoundation
import TensorFlowLite
class ViewController: UIViewController {
var captureSesssion : AVCaptureSession!
var cameraOutput : AVCapturePhotoOutput!
var previewLayer : AVCaptureVideoPreviewLayer!
@objc let device = AVCaptureDevice.default(for: .video)!
private var previousInferenceTimeMs: TimeInterval = Date.distantPast.timeIntervalSince1970 * 1000
private let delayBetweenInferencesMs: Double = 1000
@IBOutlet var imageView: UIImageView!
private var button1 : UIButton = {
var button = UIButton()
button.setTitle("button lol", for: .normal)
button.translatesAutoresizingMaskIntoConstraints = false
button.addTarget(self, action: #selector(buttonClicked), for: .touchDown)
return button
override func viewDidLoad() {
button1.bottomAnchor.constraint(equalTo: view.bottomAnchor).isActive = true
button1.titleLabel?.font = UIFont(name: "Helvetica", size: 25)
button1.widthAnchor.constraint(equalToConstant: view.frame.width/3).isActive = true
button1.centerXAnchor.constraint(equalTo: view.centerXAnchor).isActive = true
@objc func buttonClicked() {
private func configureLocalModel() -> CustomLocalModel {
guard let modelPath = Bundle.main.path(forResource: "modelName", ofType: "tflite") else { fatalError("Couldn't find the modelPath") }
return CustomLocalModel(modelPath: modelPath)
private func createInterpreter(customLocalModel: CustomLocalModel) -> ModelInterpreter{
return ModelInterpreter.modelInterpreter(localModel: customLocalModel)
private func setModelInputOutput() -> ModelInputOutputOptions? {
var ioOptions : ModelInputOutputOptions
do {
ioOptions = ModelInputOutputOptions()
try ioOptions.setInputFormat(index: 0, type: .float32, dimensions: [1, 512, 512, 3])
try ioOptions.setOutputFormat(index: 0, type: .float32, dimensions: [1, 512, 512, 2])
} catch let error as NSError {
print("Failed to set input or output format with error: \(error.localizedDescription)")
return ioOptions
private func inputDataForInference(theImage: CGImage) -> ModelInputs?{
let image: CGImage = theImage
guard let context = CGContext(
data: nil,
width: image.width, height: image.height,
bitsPerComponent: 8, bytesPerRow: image.width * 4,
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue
) else { fatalError("Context issues") }
context.draw(image, in: CGRect(x: 0, y: 0, width: image.width, height: image.height))
guard let imageData = context.data else { fatalError("Context issues") }
let inputs : ModelInputs
var inputData = Data()
do {
for row in 0 ..< 512 {
for col in 0 ..< 512 {
let offset = 4 * (col * context.width + row)
// (Ignore offset 0, the unused alpha channel)
let red = imageData.load(fromByteOffset: offset+1, as: UInt8.self)
let green = imageData.load(fromByteOffset: offset+2, as: UInt8.self)
let blue = imageData.load(fromByteOffset: offset+3, as: UInt8.self)
// Normalize channel values to [0.0, 1.0]. This requirement varies
// by model. For example, some models might require values to be
// normalized to the range [-1.0, 1.0] instead, and others might
// require fixed-point values or the original bytes.
var normalizedRed = Float32(red) / 255.0
var normalizedGreen = Float32(green) / 255.0
var normalizedBlue = Float32(blue) / 255.0
// Append normalized values to Data object in RGB order.
let elementSize = MemoryLayout.size(ofValue: normalizedRed)
var bytes = [UInt8](repeating: 0, count: elementSize)
memcpy(&bytes, &normalizedRed, elementSize)
inputData.append(&bytes, count: elementSize)
memcpy(&bytes, &normalizedGreen, elementSize)
inputData.append(&bytes, count: elementSize)
memcpy(&bytes, &normalizedBlue, elementSize)
inputData.append(&bytes, count: elementSize)
inputs = ModelInputs()
try inputs.addInput(inputData)
} catch let error {
print("Failed to add input: \(error)")
return inputs
private func runInterpreter(interpreter: ModelInterpreter, inputs: ModelInputs, ioOptions: ModelInputOutputOptions){
interpreter.run(inputs: inputs, options: ioOptions) { outputs, error in
guard error == nil, let outputs = outputs else { fatalError("interpreter run error is nil or outputs is nil") }
let output = try? outputs.output(index: 0) as? [[NSNumber]]
print("output?[0]: \(output?[0])")
print("output?.count: \(output?.count)")
print("output?.description: \(output?.description)")
private func gotImage(cgImage: CGImage){
let configuredModel = configureLocalModel()
let interpreter = createInterpreter(customLocalModel: configuredModel)
guard let modelioOptions = setModelInputOutput() else { fatalError("modelioOptions got image error") }
guard let modelInputs = inputDataForInference(theImage: cgImage) else { fatalError("modelInputs got image error") }
runInterpreter(interpreter: interpreter, inputs: modelInputs, ioOptions: modelioOptions)
private func resizeImage(image: UIImage, targetSize: CGSize) -> UIImage {
let newSize = CGSize(width: targetSize.width, height: targetSize.height)
// This is the rect that we've calculated out and this is what is actually used below
let rect = CGRect(x: 0, y: 0, width: targetSize.width, height: targetSize.height)
// Actually do the resizing to the rect using the ImageContext stuff
UIGraphicsBeginImageContextWithOptions(newSize, false, 1.0)
image.draw(in: rect)
let newImage = UIGraphicsGetImageFromCurrentImageContext()
return newImage!
extension ViewController: AVCapturePhotoCaptureDelegate{
func startCamera(){
captureSesssion = AVCaptureSession()
previewLayer = AVCaptureVideoPreviewLayer(session: captureSesssion)
captureSesssion.sessionPreset = AVCaptureSession.Preset.photo;
cameraOutput = AVCapturePhotoOutput()
previewLayer.frame = CGRect(x: view.frame.origin.x, y: view.frame.origin.y, width: view.frame.width, height: view.frame.height)
previewLayer.videoGravity = AVLayerVideoGravity.resizeAspectFill
do {
try device.lockForConfiguration()
} catch {
device.focusMode = .continuousAutoFocus
if let input = try? AVCaptureDeviceInput(device: device) {
if captureSesssion.canAddInput(input) {
if captureSesssion.canAddOutput(cameraOutput) {
} else {
print("issue here : captureSesssion.canAddInput")
_ = UIAlertController(title: "Your camera doesn't seem to be working :(", message: "Please make sure your camera works", preferredStyle: .alert)
} else {
fatalError("TBPVC -> startCamera() : AVCaptureDeviceInput Error")
func cameraPressed(){
let outputFormat = [kCVPixelBufferPixelFormatTypeKey as String: kCMPixelFormat_32BGRA]
let settings = AVCapturePhotoSettings(format: outputFormat)
cameraOutput.capturePhoto(with: settings, delegate: self)
func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
print("got image")
// guard let cgImageFromPhoto = photo.cgImageRepresentation()?.takeRetainedValue() else { fatalError("cgImageRepresentation()?.takeRetainedValue error") }
guard let imageData = photo.fileDataRepresentation() else {
fatalError("Error while generating image from photo capture data.")
guard let uiImage = UIImage(data: imageData) else {
fatalError("Unable to generate UIImage from image data.")
let tempImage = resizeImage(image: uiImage, targetSize: CGSize(width: 512, height: 512))
// generate a corresponding CGImage
guard let tempCgImage = tempImage.cgImage else {
fatalError("Error generating CGImage")
gotImage(cgImage: tempCgImage)
@objc func image(_ image: UIImage, didFinishSavingWithError error: Error?, contextInfo: UnsafeRawPointer) {
if let error = error {
let ac = UIAlertController(title: "Save error", message: error.localizedDescription, preferredStyle: .alert)
ac.addAction(UIAlertAction(title: "OK", style: .default))
present(ac, animated: true)
} else {
let ac = UIAlertController(title: "Saved!", message: "Your altered image has been saved to your photos.", preferredStyle: .alert)
ac.addAction(UIAlertAction(title: "OK", style: .default))
present(ac, animated: true)