Audio visualization using Swift/SwiftUI

Karen Mirakyan
5 min readDec 4, 2022

Audio visualization in IOS apps is a popular feature that can be required when implementing chat functionality. I’ve faced some issues personally when adding it to one of my recent projects.
Some of you might have faced issues developing apps that include chats where some messages can contain audio. Those messages need a proper audio visualization inside the app with appropriate voice intensities and functionality to play and pause the audio. This is not a trivial task and requires some work to accomplish it.

Today I would like to share one of the ways to visualize audio

The implementation

Let’s jump to the implementation right away.
First, we need to create a demo project and quickly jump into the implementation.

We need a model for our audio file that will have a magnitude and a color that we want it to present ( later I’ll explain why the color is needed ).
Let’s call the model “AudioPreviewModel”:

import Foundation
import SwiftUI

struct AudioPreviewModel: Hashable {
var magnitude: Float
var color: Color
}

To display an audio track, we first need to split the track into several parts and determine the magnitude of each part to be able to display it appropriately (the magnitude will be the height of each stick). The width, on the other hand, can be calculated by dividing the screen width by a number (like 0.6 / 4):

UIScreen.main.bounds.width * 0.6 / 4

But at first, let’s create an extension for array chunking

extension Array {
func chunked(into size: Int) -> [[Element]] {
return stride(from: 0, to: count, by: size).map {
Array(self[$0 ..< Swift.min($0 + size, count)])
}
}
}

Now that we have everything prepared we can start the implementation of our service.

import Foundation
import Combine
import AVFoundation

protocol ServiceProtocol {
func buffer(url: URL, samplesCount: Int, completion: @escaping([AudioPreviewModel]) -> ())
}


class Service {
static let shared: ServiceProtocol = Service()
private init() { }
}

extension Service: ServiceProtocol {
func buffer(url: URL, samplesCount: Int, completion: @escaping([AudioPreviewModel]) -> ()) {

DispatchQueue.global(qos: .userInteractive).async {
do {
var cur_url = url
if url.absoluteString.hasPrefix("https://") {
let data = try Data(contentsOf: url)

let directory = FileManager.default.temporaryDirectory
let fileName = "chunk.m4a"
cur_url = directory.appendingPathComponent(fileName)

try data.write(to: cur_url)
}

let file = try AVAudioFile(forReading: cur_url)
if let format = AVAudioFormat(commonFormat: .pcmFormatFloat32,
sampleRate: file.fileFormat.sampleRate,
channels: file.fileFormat.channelCount, interleaved: false),
let buf = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(file.length)) {

try file.read(into: buf)
guard let floatChannelData = buf.floatChannelData else { return }
let frameLength = Int(buf.frameLength)

let samples = Array(UnsafeBufferPointer(start:floatChannelData[0], count:frameLength))

var result = [AudioPreviewModel]()

let chunked = samples.chunked(into: samples.count / samplesCount)
for row in chunked {
var accumulator: Float = 0
let newRow = row.map{ $0 * $0 }
accumulator = newRow.reduce(0, +)
let power: Float = accumulator / Float(row.count)
let decibles = 10 * log10f(power)

result.append(AudioPreviewModel(magnitude: decibles, color: .gray))

}

DispatchQueue.main.async {
completion(result)
}
}
} catch {
print("Audio Error: \(error)")
}
}

}
}

Here we write the file to a local one and extract the buffer. After that, we take the extracted buffer and perform chunk action on it, and calculate the decibel for each chunk.

Now it’s time to create our “ViewModel” where we will manage all actions of getting the visualization results, playing/pausing the audio file, getting its duration, etc.


import Foundation
import AVKit
import SwiftUI
import AVFoundation
import Combine

class AudioPlayViewModel: ObservableObject {

private var timer: Timer?

@Published var isPlaying: Bool = false

@Published public var soundSamples = [AudioPreviewModel]()
let sample_count: Int
var index = 0
let url: URL

var dataManager: ServiceProtocol

@Published var player: AVPlayer!
@Published var session: AVAudioSession!

init(url: URL, sampels_count: Int, dataManager: ServiceProtocol = Service.shared) {
self.url = url
self.sample_count = sampels_count
self.dataManager = dataManager

visualizeAudio()


do {
session = AVAudioSession.sharedInstance()
try session.setCategory(.playAndRecord)

try session.overrideOutputAudioPort(AVAudioSession.PortOverride.speaker)

} catch {
print(error.localizedDescription)
}

player = AVPlayer(url: self.url)
}

func startTimer() {

count_duration { duration in
let time_interval = duration / Double(self.sample_count)

self.timer = Timer.scheduledTimer(withTimeInterval: time_interval, repeats: true, block: { (timer) in
if self.index < self.soundSamples.count {
withAnimation(Animation.linear) {
self.soundSamples[self.index].color = Color.black
}
self.index += 1
}
})
}
}

@objc func playerDidFinishPlaying(note: NSNotification) {
self.player.pause()
self.player.seek(to: .zero)
self.timer?.invalidate()
self.isPlaying = false
self.index = 0
self.soundSamples = self.soundSamples.map { tmp -> AudioPreviewModel in
var cur = tmp
cur.color = Color.gray
return cur
}
}

func playAudio() {

if isPlaying {
pauseAudio()
} else {

NotificationCenter.default.addObserver(self, selector:#selector(self.playerDidFinishPlaying(note:)),name: NSNotification.Name.AVPlayerItemDidPlayToEndTime, object: player.currentItem)

isPlaying.toggle()
player.play()

startTimer()
count_duration { _ in }
}
}

func pauseAudio() {
player.pause()
timer?.invalidate()
self.isPlaying = false
}


func count_duration(completion: @escaping(Float64) -> ()) {

DispatchQueue.global(qos: .background).async {
if let duration = self.player.currentItem?.asset.duration {
let seconds = CMTimeGetSeconds(duration)
DispatchQueue.main.async {
completion(seconds)
}
return
}

DispatchQueue.main.async {
completion(1)
}
}

}

func visualizeAudio() {
dataManager.buffer(url: url, samplesCount: sample_count) { results in
self.soundSamples = results
}
}

func removeAudio() {
do {
try FileManager.default.removeItem(at: url)
NotificationCenter.default.post(name: Notification.Name("hide_audio_preview"), object: nil)

} catch {
print(error)
}
}

}

In the startTimer function, you can see that we are changing the color of our samples, that was the reason we were storing the color parameter in AudioPreviewModel.
Here I implemented all the necessary functionality that you’ll need to show the duration count the duration, etc. You can modify anything based on your needs.

Now it’s time to create our view and see the resulting UI.

import SwiftUI
import AVFoundation
import AVKit

struct ContentView: View {
@StateObject private var audioVM: AudioPlayViewModel

private func normalizeSoundLevel(level: Float) -> CGFloat {
let level = max(0.2, CGFloat(level) + 70) / 2 // between 0.1 and 35

return CGFloat(level * (40/35))
}

init(audio: String) {
_audioVM = StateObject(wrappedValue: AudioPlayViewModel(url: URL(string: audio)!, sampels_count: Int(UIScreen.main.bounds.width * 0.6 / 4)))
}

var body: some View {
VStack( alignment: .leading ) {

LazyHStack(alignment: .center, spacing: 10) {

Button {
if audioVM.isPlaying {
audioVM.pauseAudio()
} else {
audioVM.playAudio()
}
} label: {
Image(systemName: !(audioVM.isPlaying) ? "play.fill" : "pause.fill" )
.resizable()
.aspectRatio(contentMode: .fit)
.frame(width: 20, height: 20)
.foregroundColor(.black)

}

HStack(alignment: .center, spacing: 2) {
if audioVM.soundSamples.isEmpty {
ProgressView()
} else {
ForEach(audioVM.soundSamples, id: \.self) { model in
BarView(value: self.normalizeSoundLevel(level: model.magnitude), color: model.color)
}
}
}.frame(width: UIScreen.main.bounds.width * 0.6)
}


}.padding(.vertical, 8)
.padding(.horizontal)
.frame(minHeight: 0, maxHeight: 50)
.background(Color.gray.opacity(0.3).cornerRadius(10))
}
}

struct BarView: View {
let value: CGFloat
var color: Color = Color.gray

var body: some View {
ZStack {
Rectangle()
.fill(color)
.cornerRadius(10)
.frame(width: 2, height: value)
}
}
}

struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView(audio: "audio.mp3")
}
}

To get more info about normalizeSoundLevel you can check out this repository: https://github.com/bbaars/SwiftUI-Sound-Visualizer ( that’s just the range of values as like the minimum value of the voice can be 1.5 and the maximum value can be 50 for example).

That’s it. Hope it was helpful.
View the whole source code on Github:

--

--