在 iOS 上使用 FFmpegKit 采集实时摄像机和音频流

在多媒体处理领域，FFmpeg 是一款功能强大、用途广泛的工具。对于希望在其应用程序中利用 FFmpeg 功能的 iOS 开发人员来说，FFmpegKit 提供了一个方便的封装器。在这篇文章中，我们将探讨如何使用 FFmpegKit 在 iOS 上采集实时摄像机和音频流，并利用输入管道来处理这两种流。

前提条件

在深入实施之前，请确保已将 FFmpegKit 集成到您的 iOS 项目中。您可以在官方文档中找到详细说明。

设置 AVCaptureSession

为了采集实时摄像机和音频流，我们将使用 AVCaptureSession（用于视听捕捉的本地 iOS API）。初始化会话并配置音频和视频输入：

let videoOutput = AVCaptureVideoDataOutput();
let audioOutput = AVCaptureAudioDataOutput();
let backgroundVideoQueue = DispatchQueue.global(qos: .background)
let backgroundAudioQueue = DispatchQueue.global(qos: .background)
private var videoPipe: String?
private var audioPipe: String?

private func addCamera(session: AVCaptureSession) -> AVCaptureDeviceInput? {
        do {
            /// Check if the device has a camera
            guard let camera = AVCaptureDevice.default(for: .video) else {
                print("Camera not available")
                return nil
            }
            /// Create input from the camera
            let input = try AVCaptureDeviceInput(device: camera)
            
            if session.canAddInput(input) {
                session.addInput(input)
            }
            return input
        } catch {
            print(error)
        }
        return nil
    }
    
    private func addMicrophone(session: AVCaptureSession) -> AVCaptureDeviceInput? {
        do {
            // Check if the device has a microphone
            guard let mic = AVCaptureDevice.default(for: .audio) else {
                print("Microphone not available")
                return nil
            }
            // Create input from the camera
            let input = try AVCaptureDeviceInput(device: mic)
            
            if session.canAddInput(input) {
                session.addInput(input)
            }
            return input
        } catch {
            print(error)
        }
        return nil
    }
    
    private func setupCaptureSession(view: UIView) {
        do {
            // Create a session and add the input
            let session = AVCaptureSession()
            /// add camera to session input
            let cameraInput = addCamera(session: session)
            guard let camera = cameraInput?.device else {
                return
            }
            /// add videooutput as session output
            videoOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as String) : NSNumber(value: kCVPixelFormatType_32BGRA as UInt32),]
            if session.canAddOutput(videoOutput) {
                session.addOutput(videoOutput)
                
            }
            /// add microphone as session input
            if !useAudioEngine {
                let audioInput = addMicrophone(session: session)
                /// add session output
                if session.canAddOutput(audioOutput) {
                    session.addOutput(audioOutput)
                }
            }
            /// Start the capture session
            do {
                try AVAudioSession.sharedInstance().setCategory(.playAndRecord, mode: .videoChat)
                try AVAudioSession.sharedInstance().setPreferredSampleRate(48000) // Set your preferred sample rate here
                try AVAudioSession.sharedInstance().setActive(true)
            } catch {
                print("Failed to set audio session settings: \(error.localizedDescription)")
                return
            }
            /// Set the preview layer to display the camera feed
            DispatchQueue.main.async {
                self.previewLayer.session = session
                self.previewLayer.videoGravity = .resizeAspectFill
                /// Add the preview layer to your view's layer
                view.layer.insertSublayer(self.previewLayer, at: 0)
                /// Optional: Adjust the frame of the preview layer
                self.previewLayer.frame = view.layer.bounds
            }
            
            /// set framerate 30
            do {
                try camera.lockForConfiguration()
                
                let desiredFrameRate = CMTimeMake(value: 1, timescale: 30)
                camera.activeVideoMinFrameDuration = desiredFrameRate
                camera.activeVideoMaxFrameDuration = desiredFrameRate
                camera.unlockForConfiguration()
                
            } catch {
                print("Error accessing video device: \(error)")
            }
            /// just print the current resoultion
            let activeFormat = camera.activeFormat.formatDescription
            let dimensions = CMVideoFormatDescriptionGetDimensions(activeFormat)
            let width = dimensions.width
            let height = dimensions.height
            print("Resolution: \(width) x \(height)")
            
            /// Set output delegate
            videoOutput.setSampleBufferDelegate(self, queue: backgroundVideoQueue)
            audioOutput.setSampleBufferDelegate(self, queue: backgroundAudioQueue)
            
            /// Set the session to output video frames
            session.startRunning()
            
            
        } catch {
            print("Error setting up AVCaptureDeviceInput: \(error)")
        }
    }

.
.
.
videoPipe = FFmpegKitConfig.registerNewFFmpegPipe()
audioPipe = FFmpegKitConfig.registerNewFFmpegPipe()
.
.
.
func writeToVideoPipe(data: Data) {
            if let currentPipe = self.videoPipe, let fileHandle = try? FileHandle(forWritingTo: URL(fileURLWithPath: currentPipe)) {
                if #available(iOS 13.4, *) {
                    try? fileHandle.write(contentsOf: data)
                } else {
                    fileHandle.write(data)
                }
                fileHandle.closeFile()
            } else {
                print("Failed to open file handle for writing")
            }
    }
    
    func writeToAudioPipe(data: Data) {
            if let currentPipe = self.audioPipe, let fileHandle = try? FileHandle(forWritingTo: URL(fileURLWithPath: currentPipe)) {
                if #available(iOS 13.4, *) {
                    try? fileHandle.write(contentsOf: data)
                } else {
                    fileHandle.write(data)
                }
                fileHandle.closeFile()
            } else {
                print("Failed to open file handle for writing")
            }
    }
.
.
.
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        if output is AVCaptureVideoDataOutput {
             /// convert samplebuffer to Data
             /// call: writeToVideoPipe. See: Github Repo
        } else if output is AVCaptureAudioDataOutput {
             /// convert samplebuffer to Data
             /// call: writeToAudioPipe. See: Github Repo
        }
}

let ffmpegCommand = "-re -f rawvideo -pixel_format bgra -video_size 1920x1080 -framerate 30 -i \(videoPipe!) 
-f s16le -ar 48000 -ac 1 -itsoffset -5 -i \(audioPipe!) 
-framerate 30 -pixel_format yuv420p -c:v h264 -c:a aac -vf "transpose=1,scale=360:640" -b:v 640k -b:a 64k -vsync 1 
-f flv \(url!)"

// Execute FFmpeg command
FFmpegKit.executeAsync(ffmpegCommand) { session in
    // Handle FFmpeg execution completion
    print("FFmpeg execution completed with return code \(session.returnCode)")
}

1. 视频输入 (-i (videoPipe!))：

-re：以原始帧频读取输入。这在流式传输时非常有用，可确保实时性。
-f rawvideo：强制输入格式为原始视频。
-pixel_format bgra：将像素格式设置为 BGRA（32 位）。
-video_size 1920x1080：将视频尺寸设置为 1920×1080 像素。
-framerate 30：将输入帧频设置为每秒 30 帧。
-i （videoPipe!）：指定输入视频管道。

2. 音频输入（-i \(audioPipe!)）：

-f s16le：强制输入格式为带符号的 16 位小端音频。
-ar 48000：将音频采样率设置为 48 kHz。
-ac 1：将音频通道设置为 1（单声道）。
-itsoffset -5：设置-5秒的音频偏移。这用于同步音频和视频。

3. 常用输出设置：

-framerate 30：将输出帧速率设置为每秒 30 帧。
-pixel_format yuv420p：设置像素格式为YUV420p。
-c:v h264：使用H.264视频编解码器。
-c:a aac：使用 AAC 音频编解码器。
-vf "transpose=1,scale=360:640"：应用视频过滤器图表 – 将视频顺时针转置（旋转）90 度并将其缩放至 360×640 像素。
-b:v 640k：将视频比特率设置为 640 kbps。
-b:a 64k：将音频比特率设置为 64 kbps。
-vsync 1：将视频同步方式设置为1（与音频垂直同步）。