在 iOS 上使用 FFmpegKit 采集实时摄像机和音频流

在多媒体处理领域,FFmpeg 是一款功能强大、用途广泛的工具。对于希望在其应用程序中利用 FFmpeg 功能的 iOS 开发人员来说,FFmpegKit 提供了一个方便的封装器。在这篇文章中,我们将探讨如何使用 FFmpegKit 在 iOS 上采集实时摄像机和音频流,并利用输入管道来处理这两种流。


在深入实施之前,请确保已将 FFmpegKit 集成到您的 iOS 项目中。您可以在官方文档中找到详细说明。

设置 AVCaptureSession

为了采集实时摄像机和音频流,我们将使用 AVCaptureSession(用于视听捕捉的本地 iOS API)。初始化会话并配置音频和视频输入:

let videoOutput = AVCaptureVideoDataOutput();
let audioOutput = AVCaptureAudioDataOutput();
let backgroundVideoQueue = DispatchQueue.global(qos: .background)
let backgroundAudioQueue = DispatchQueue.global(qos: .background)
private var videoPipe: String?
private var audioPipe: String?

private func addCamera(session: AVCaptureSession) -> AVCaptureDeviceInput? {
        do {
            /// Check if the device has a camera
            guard let camera = AVCaptureDevice.default(for: .video) else {
                print("Camera not available")
                return nil
            /// Create input from the camera
            let input = try AVCaptureDeviceInput(device: camera)
            if session.canAddInput(input) {
            return input
        } catch {
        return nil
    private func addMicrophone(session: AVCaptureSession) -> AVCaptureDeviceInput? {
        do {
            // Check if the device has a microphone
            guard let mic = AVCaptureDevice.default(for: .audio) else {
                print("Microphone not available")
                return nil
            // Create input from the camera
            let input = try AVCaptureDeviceInput(device: mic)
            if session.canAddInput(input) {
            return input
        } catch {
        return nil
    private func setupCaptureSession(view: UIView) {
        do {
            // Create a session and add the input
            let session = AVCaptureSession()
            /// add camera to session input
            let cameraInput = addCamera(session: session)
            guard let camera = cameraInput?.device else {
            /// add videooutput as session output
            videoOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as String) : NSNumber(value: kCVPixelFormatType_32BGRA as UInt32),]
            if session.canAddOutput(videoOutput) {
            /// add microphone as session input
            if !useAudioEngine {
                let audioInput = addMicrophone(session: session)
                /// add session output
                if session.canAddOutput(audioOutput) {
            /// Start the capture session
            do {
                try AVAudioSession.sharedInstance().setCategory(.playAndRecord, mode: .videoChat)
                try AVAudioSession.sharedInstance().setPreferredSampleRate(48000) // Set your preferred sample rate here
                try AVAudioSession.sharedInstance().setActive(true)
            } catch {
                print("Failed to set audio session settings: \(error.localizedDescription)")
            /// Set the preview layer to display the camera feed
            DispatchQueue.main.async {
                self.previewLayer.session = session
                self.previewLayer.videoGravity = .resizeAspectFill
                /// Add the preview layer to your view's layer
                view.layer.insertSublayer(self.previewLayer, at: 0)
                /// Optional: Adjust the frame of the preview layer
                self.previewLayer.frame = view.layer.bounds
            /// set framerate 30
            do {
                try camera.lockForConfiguration()
                let desiredFrameRate = CMTimeMake(value: 1, timescale: 30)
                camera.activeVideoMinFrameDuration = desiredFrameRate
                camera.activeVideoMaxFrameDuration = desiredFrameRate
            } catch {
                print("Error accessing video device: \(error)")
            /// just print the current resoultion
            let activeFormat = camera.activeFormat.formatDescription
            let dimensions = CMVideoFormatDescriptionGetDimensions(activeFormat)
            let width = dimensions.width
            let height = dimensions.height
            print("Resolution: \(width) x \(height)")
            /// Set output delegate
            videoOutput.setSampleBufferDelegate(self, queue: backgroundVideoQueue)
            audioOutput.setSampleBufferDelegate(self, queue: backgroundAudioQueue)
            /// Set the session to output video frames
        } catch {
            print("Error setting up AVCaptureDeviceInput: \(error)")

videoPipe = FFmpegKitConfig.registerNewFFmpegPipe()
audioPipe = FFmpegKitConfig.registerNewFFmpegPipe()
func writeToVideoPipe(data: Data) {
            if let currentPipe = self.videoPipe, let fileHandle = try? FileHandle(forWritingTo: URL(fileURLWithPath: currentPipe)) {
                if #available(iOS 13.4, *) {
                    try? fileHandle.write(contentsOf: data)
                } else {
            } else {
                print("Failed to open file handle for writing")
    func writeToAudioPipe(data: Data) {
            if let currentPipe = self.audioPipe, let fileHandle = try? FileHandle(forWritingTo: URL(fileURLWithPath: currentPipe)) {
                if #available(iOS 13.4, *) {
                    try? fileHandle.write(contentsOf: data)
                } else {
            } else {
                print("Failed to open file handle for writing")
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        if output is AVCaptureVideoDataOutput {
             /// convert samplebuffer to Data
             /// call: writeToVideoPipe. See: Github Repo
        } else if output is AVCaptureAudioDataOutput {
             /// convert samplebuffer to Data
             /// call: writeToAudioPipe. See: Github Repo
let ffmpegCommand = "-re -f rawvideo -pixel_format bgra -video_size 1920x1080 -framerate 30 -i \(videoPipe!) 
-f s16le -ar 48000 -ac 1 -itsoffset -5 -i \(audioPipe!) 
-framerate 30 -pixel_format yuv420p -c:v h264 -c:a aac -vf "transpose=1,scale=360:640" -b:v 640k -b:a 64k -vsync 1 
-f flv \(url!)"

// Execute FFmpeg command
FFmpegKit.executeAsync(ffmpegCommand) { session in
    // Handle FFmpeg execution completion
    print("FFmpeg execution completed with return code \(session.returnCode)")

1. 视频输入 (-i (videoPipe!)):

  • -re:以原始帧频读取输入。这在流式传输时非常有用,可确保实时性。
  • -f rawvideo:强制输入格式为原始视频。
  • -pixel_format bgra:将像素格式设置为 BGRA(32 位)。
  • -video_size 1920x1080:将视频尺寸设置为 1920×1080 像素。
  • -framerate 30:将输入帧频设置为每秒 30 帧。
  • -i (videoPipe!):指定输入视频管道。

2. 音频输入-i \(audioPipe!)):

  • -f s16le:强制输入格式为带符号的 16 位小端音频。
  • -ar 48000:将音频采样率设置为 48 kHz。
  • -ac 1:将音频通道设置为 1(单声道)。
  • -itsoffset -5:设置-5秒的音频偏移。这用于同步音频和视频。

3. 常用输出设置:

  • -framerate 30:将输出帧速率设置为每秒 30 帧。
  • -pixel_format yuv420p:设置像素格式为YUV420p。
  • -c:v h264:使用H.264视频编解码器。
  • -c:a aac:使用 AAC 音频编解码器。
  • -vf "transpose=1,scale=360:640":应用视频过滤器图表 – 将视频顺时针转置(旋转)90 度并将其缩放至 360×640 像素。
  • -b:v 640k:将视频比特率设置为 640 kbps。
  • -b:a 64k:将音频比特率设置为 64 kbps。
  • -vsync 1:将视频同步方式设置为1(与音频垂直同步)。

4. 输出格式(-f flv):

  • -f flv:强制输出格式为 Flash 视频 (FLV)。
  • 输出 URL ( \(url!)):该 URL 指定处理和编码的视频/音频流将发送到的位置。它通常指向 RTMP 服务器。

注意:这是表面代码。如果想深入了解,请查看 github 存储库。


通过将 AVCaptureSession 的本地功能与 FFmpegKit 的灵活性相结合,我们演示了如何使用输入管道在 iOS 上摄取实时摄像机和音频流。这种方法为在应用程序中进行实时多媒体处理提供了可能性。

Github 仓库:https://github.com/sxudan/ffmpeg-ios-publisher-example




