Skip to content

Commit 7088df6

Browse files
committed
Implement spectrogram strategy
1 parent 6a3bd79 commit 7088df6

37 files changed

Lines changed: 559 additions & 10 deletions

README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,24 @@ class MyAudioTests: XCTestCase {
4949
}
5050
```
5151

52+
Snapshot audio tests are snapshot tested itself. Please find many examples in: [AudioSnapshotTestingTests.swift](Tests/AudioSnapshotTestingTests/AudioSnapshotTestingTests.swift)
53+
5254
### Features
5355

5456
- [x] `AVAudioPCMBuffer` waveform snapshots
5557
- [x] `AVAudioPCMBuffer` overlayed waveform snapshots
56-
- [ ] Spectrogram
58+
- [x] Spectrogram
5759
- [x] Spectra
5860
- [x] Different waveform rendering strategies
59-
- [ ] Test against other reference implementations and with known audio files
61+
- [x] Test against other reference implementations and with known audio files
6062
- [ ] Documentation
6163
- [ ] Mention JUCE
6264
- [ ] Link blog post and talk
65+
- [ ] review stashes
66+
- [x] Add a link to swift snapshot testing
67+
- [ ] Multi level comparison (first hash, then data, then image)
68+
- [ ] Use accelerate in downsampling
69+
- [ ] Add file strategy
6370

6471
## Contributing
6572

Sources/AudioSnapshotTesting/AVAudioPCMBufferExtensions.swift

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,57 @@ extension AVAudioPCMBuffer {
8888
sampleRate: Float(mono.format.sampleRate)
8989
)
9090
}
91+
92+
func spectrogram(fftSize: Int, hopSize: Int, width: Int, window: [Float], amplitudeScale: AmplitudeScale) -> [Float] {
93+
var spectrogramData = [Float]()
94+
95+
let mono = mixToMono()
96+
let data = mono.floatChannelData![0]
97+
98+
for iteration in (0..<width) {
99+
let frequencyAmplitudes = fft(
100+
n: fftSize,
101+
signal: data.advanced(by: iteration * hopSize),
102+
window: window,
103+
sampleRate: Float(mono.format.sampleRate)
104+
)
105+
.map(\.amplitude)
106+
spectrogramData.append(contentsOf: frequencyAmplitudes)
107+
}
108+
109+
// Apply amplitude scaling
110+
return applyAmplitudeScale(spectrogramData, scale: amplitudeScale)
111+
}
112+
}
113+
114+
/// Applies amplitude scaling to spectrogram data
115+
/// - Parameters:
116+
/// - data: Raw amplitude values from FFT
117+
/// - scale: The scaling method to apply
118+
/// - Returns: Scaled amplitude values
119+
private func applyAmplitudeScale(_ data: [Float], scale: AmplitudeScale) -> [Float] {
120+
switch scale {
121+
case .linear:
122+
// Return raw values unchanged (backward compatible)
123+
return data
124+
125+
case .logarithmic(let range):
126+
// Convert to dB scale: 20 * log10(amplitude)
127+
// Use a small threshold to avoid log(0)
128+
let minThreshold: Float = 1e-10
129+
let minDB = -range
130+
131+
let dbValues = data.map { amplitude -> Float in
132+
let clampedAmplitude = max(amplitude, minThreshold)
133+
return 20.0 * log10(clampedAmplitude)
134+
}
135+
136+
// Normalize to 0...1 based on dB range
137+
return dbValues.map { db in
138+
let normalized = (db - minDB) / range
139+
return max(0, min(1, normalized)) // Clamp to 0...1
140+
}
141+
}
91142
}
92143

93144
private func fft(
@@ -193,9 +244,10 @@ private func computeAmplitudes(
193244
sampleRate: Float,
194245
n: Float
195246
) -> [FrequencyAmplitude] {
247+
// Return all frequency bins without filtering
248+
// Filtering is applied at a higher level (e.g., via threshold parameter in spectrum())
196249
autospectrum
197250
.enumerated()
198-
.filter { $0.element > 1 }
199251
.map { index, element in
200252
return FrequencyAmplitude(
201253
frequency: Float(index) * sampleRate / n,

Sources/AudioSnapshotTesting/AudioSnapshotTesting.swift

Lines changed: 67 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,24 @@ import Accelerate
44

55
@_exported import SnapshotTesting
66

7+
/// Specifies the amplitude scaling method for spectrograms.
8+
public enum AmplitudeScale {
9+
/// Linear amplitude scale (raw FFT magnitudes normalized to 0...1)
10+
case linear
11+
/// Logarithmic amplitude scale in decibels with specified dynamic range
12+
/// - Parameter range: The dB range from minimum to maximum (e.g., 120 means -120dB to 0dB)
13+
case logarithmic(range: Float)
14+
}
15+
716
#if os(macOS)
817
public typealias PlatformImage = NSImage
918
typealias PlatformView = NSView
19+
typealias PlatformColor = NSColor
1020
typealias PlatformHostingView = NSHostingView
1121
#elseif os(iOS)
1222
public typealias PlatformImage = UIImage
1323
typealias PlatformView = UIView
24+
typealias PlatformColor = UIColor
1425
typealias PlatformHostingView = _UIHostingView
1526
#endif
1627

@@ -90,17 +101,66 @@ public extension Snapshotting where Format == PlatformImage, Value == AVAudioPCM
90101
) -> Snapshotting {
91102
Snapshotting<PlatformView, PlatformImage>.image(size: .init(width: width, height: height))
92103
.pullback { buffer in
93-
let fallbackWindow = vDSP.window(
94-
ofType: Float.self,
95-
usingSequence: .hanningNormalized,
96-
count: Int(buffer.frameLength),
97-
isHalfWindow: false
98-
)
104+
let effectiveWindow = window ?? createHannWindow(size: Int(buffer.frameLength))
99105
let data = buffer
100-
.spectrum(window: window ?? fallbackWindow)
106+
.spectrum(window: effectiveWindow)
101107
.filter { $0.amplitude > threshold }
102108
let spectrum = SpectrumView(data: data, height: CGFloat(height))
103109
return PlatformHostingView(rootView: spectrum.environment(\.colorScheme, .light))
104110
}
105111
}
112+
113+
/// Generates a spectrogram of the given `AVAudioPCMBuffer`.
114+
/// - Parameters:
115+
/// - hopSize: The number of audio frames between successive spectral frames.
116+
/// - frequencyCount: The number of frequency bins to include in each spectral frame.
117+
/// - window: An optional array of floats representing the window function to apply before computing the FFT. If not provided, a Hann window will be used by default.
118+
/// - amplitudeScale: The amplitude scaling method. Defaults to `.logarithmic(range: 120)` for standard 120 dB dynamic range. Use `.linear` for raw FFT magnitudes.
119+
/// - imageWidth: The width of the resulting snapshot image in pixels. Defaults to 1000.
120+
@available(iOS 16, macOS 13, *)
121+
static func spectrogram(
122+
hopSize: Int,
123+
frequencyCount: Int,
124+
window: [Float]? = nil,
125+
amplitudeScale: AmplitudeScale = .logarithmic(range: 120),
126+
imageWidth: Int = 1000
127+
) -> Snapshotting {
128+
let height = frequencyCount
129+
return Snapshotting<PlatformView, PlatformImage>.image(size: .init(width: imageWidth, height: height))
130+
.pullback { buffer in
131+
let fftSize = frequencyCount * 2
132+
let lastBucketStart = Int(buffer.frameLength) - fftSize
133+
134+
// Calculate number of FFT windows that fit in the buffer
135+
// We need at least fftSize samples for each window, starting at position 0
136+
// The last window starts at (frameLength - fftSize), and we hop by hopSize
137+
// +1 accounts for the initial window at position 0
138+
let width = 1 + (lastBucketStart / hopSize)
139+
140+
let effectiveWindow = window ?? createHannWindow(size: fftSize)
141+
let data = buffer.spectrogram(fftSize: fftSize, hopSize: hopSize, width: width, window: effectiveWindow, amplitudeScale: amplitudeScale)
142+
143+
// Calculate bin size and max frequency based on sample rate and frequency count
144+
let binSize = buffer.format.sampleRate / Double(fftSize)
145+
let maxFrequency = Int(binSize * Double(frequencyCount))
146+
147+
// Calculate duration from buffer length and sample rate
148+
let duration = Double(buffer.frameLength) / buffer.format.sampleRate
149+
150+
let spectrogram = SpectrogramView(data: data, width: width, height: height, maxFrequency: maxFrequency, duration: duration)
151+
return PlatformHostingView(rootView: spectrogram.environment(\.colorScheme, .light))
152+
}
153+
}
154+
}
155+
156+
/// Creates a normalized Hann window of the specified size
157+
/// - Parameter size: The number of samples in the window
158+
/// - Returns: An array of Float values representing the Hann window function
159+
private func createHannWindow(size: Int) -> [Float] {
160+
vDSP.window(
161+
ofType: Float.self,
162+
usingSequence: .hanningNormalized,
163+
count: size,
164+
isHalfWindow: false
165+
)
106166
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import CoreGraphics
2+
import Accelerate
3+
4+
@available(iOS 16, macOS 13, *)
5+
extension CGImage {
6+
static func spectrogramImage(data: [Float], width: Int, height: Int) -> CGImage {
7+
let rgbImageFormat = vImage_CGImageFormat(
8+
bitsPerComponent: 32,
9+
bitsPerPixel: 32 * 3,
10+
colorSpace: CGColorSpaceCreateDeviceRGB(),
11+
bitmapInfo: CGBitmapInfo(
12+
rawValue: kCGBitmapByteOrder32Host.rawValue |
13+
CGBitmapInfo.floatComponents.rawValue |
14+
CGImageAlphaInfo.none.rawValue))!
15+
16+
/// RGB vImage buffer that contains a vertical representation of the audio spectrogram.
17+
let redBuffer = vImage.PixelBuffer<vImage.PlanarF>(width: height, height: width)
18+
let greenBuffer = vImage.PixelBuffer<vImage.PlanarF>(width: height, height: width)
19+
let blueBuffer = vImage.PixelBuffer<vImage.PlanarF>(width: height, height: width)
20+
let rgbImageBuffer = vImage.PixelBuffer<vImage.InterleavedFx3>(width: height, height: width)
21+
var data = data
22+
data.withUnsafeMutableBufferPointer {
23+
let planarImageBuffer = vImage.PixelBuffer(
24+
data: $0.baseAddress!,
25+
width: height,
26+
height: width,
27+
byteCountPerRow: height * MemoryLayout<Float>.stride,
28+
pixelFormat: vImage.PlanarF.self
29+
)
30+
31+
multidimensionalLookupTable.apply(
32+
sources: [planarImageBuffer],
33+
destinations: [redBuffer, greenBuffer, blueBuffer],
34+
interpolation: .half
35+
)
36+
37+
rgbImageBuffer.interleave(
38+
planarSourceBuffers: [redBuffer, greenBuffer, blueBuffer]
39+
)
40+
}
41+
return rgbImageBuffer.makeCGImage(cgImageFormat: rgbImageFormat)!
42+
}
43+
}
44+
45+
/// Returns the RGB values from a blue -> red -> green color map for a specified value.
46+
///
47+
/// Values near zero return dark blue, `0.5` returns red, and `1.0` returns full-brightness green.
48+
@available(macOS 13, iOS 16, *)
49+
nonisolated(unsafe) var multidimensionalLookupTable: vImage.MultidimensionalLookupTable = {
50+
// Lookup table resolution: 32 entries provides good color accuracy while keeping memory usage low
51+
// Higher values (64, 128) would provide smoother gradients but increase memory and initialization time
52+
// Lower values (16, 8) would be faster but produce visible banding in spectrograms
53+
let entriesPerChannel = UInt8(32)
54+
let srcChannelCount = 1
55+
let destChannelCount = 3
56+
57+
let lookupTableElementCount = Int(pow(Float(entriesPerChannel), Float(srcChannelCount))) * Int(destChannelCount)
58+
59+
let tableData = [UInt16](unsafeUninitializedCapacity: lookupTableElementCount) {
60+
buffer,
61+
count in
62+
/// Supply the samples in the range `0...65535`. The transform function
63+
/// interpolates these to the range `0...1`.
64+
let multiplier = CGFloat(UInt16.max)
65+
var bufferIndex = 0
66+
67+
for gray in ( 0 ..< entriesPerChannel) {
68+
/// Create normalized red, green, and blue values in the range `0...1`.
69+
let normalizedValue = CGFloat(gray) / CGFloat(entriesPerChannel - 1)
70+
71+
// Define `hue` that's blue at `0.0` to red at `1.0`.
72+
let hue = 0.6666 - (0.6666 * normalizedValue)
73+
let brightness = sqrt(normalizedValue)
74+
75+
let color = PlatformColor(
76+
hue: hue,
77+
saturation: 1,
78+
brightness: brightness,
79+
alpha: 1
80+
)
81+
82+
var r = CGFloat()
83+
var g = CGFloat()
84+
var b = CGFloat()
85+
86+
color.getRed(&r, green: &g, blue: &b, alpha: nil)
87+
88+
buffer[ bufferIndex ] = UInt16(g * multiplier)
89+
bufferIndex += 1
90+
buffer[ bufferIndex ] = UInt16(r * multiplier)
91+
bufferIndex += 1
92+
buffer[ bufferIndex ] = UInt16(b * multiplier)
93+
bufferIndex += 1
94+
}
95+
count = lookupTableElementCount
96+
}
97+
98+
let entryCountPerSourceChannel = [UInt8](repeating: entriesPerChannel, count: srcChannelCount)
99+
100+
return vImage.MultidimensionalLookupTable(
101+
entryCountPerSourceChannel: entryCountPerSourceChannel,
102+
destinationChannelCount: destChannelCount,
103+
data: tableData
104+
)
105+
}()

0 commit comments

Comments
 (0)