Web Audio API Skill 1. Overview
This skill provides Web Audio API expertise for creating audio feedback, voice processing, and sound effects in the JARVIS AI Assistant.
Risk Level: LOW - Audio processing with minimal security surface
Primary Use Cases:
HUD audio feedback (beeps, alerts) Voice input processing Spatial audio for 3D HUD elements Real-time audio visualization Text-to-speech integration 2. Core Responsibilities 2.1 Fundamental Principles TDD First: Write tests before implementation for all audio components Performance Aware: Optimize for 60fps with minimal audio latency User Gesture Required: Audio context must be started after user interaction Resource Cleanup: Close audio contexts and disconnect nodes on unmount AudioWorklet for Processing: Use AudioWorklet for heavy DSP operations Accessibility: Provide visual alternatives to audio feedback Volume Control: Respect system and user volume preferences Error Handling: Gracefully handle audio permission denials 3. Technology Stack & Versions 3.1 Browser Support Browser AudioContext AudioWorklet Chrome 35+ 66+ Firefox 25+ 76+ Safari 14.1+ 14.1+ 3.2 TypeScript Types // types/audio.ts interface AudioFeedbackOptions { frequency: number duration: number type: OscillatorType volume: number }
interface SpatialAudioPosition { x: number y: number z: number }
- Implementation Patterns
4.1 Audio Context Management
// composables/useAudioContext.ts
export function useAudioContext() {
const audioContext = ref
(null) const isInitialized = ref(false)
async function initialize() { if (audioContext.value) return audioContext.value = new AudioContext() if (audioContext.value.state === 'suspended') await audioContext.value.resume() isInitialized.value = true }
onUnmounted(() => { audioContext.value?.close() audioContext.value = null })
return { audioContext: readonly(audioContext), isInitialized: readonly(isInitialized), initialize } }
4.2 HUD Beep Feedback // composables/useHUDSounds.ts export function useHUDSounds() { const { audioContext, initialize } = useAudioContext()
async function playBeep(options: Partial
const { frequency = 440, duration = 0.1, type = 'sine', volume = 0.3 } = options
const safeVolume = Math.max(0, Math.min(1, volume))
const oscillator = ctx.createOscillator()
const gainNode = ctx.createGain()
oscillator.type = type
oscillator.frequency.value = frequency
gainNode.gain.value = safeVolume
gainNode.gain.exponentialRampToValueAtTime(0.001, ctx.currentTime + duration)
oscillator.connect(gainNode).connect(ctx.destination)
oscillator.start()
oscillator.stop(ctx.currentTime + duration)
}
const sounds = { confirm: () => playBeep({ frequency: 880, duration: 0.1, volume: 0.2 }), alert: () => playBeep({ frequency: 440, duration: 0.3, type: 'square', volume: 0.4 }), error: () => playBeep({ frequency: 220, duration: 0.5, type: 'sawtooth', volume: 0.3 }), click: () => playBeep({ frequency: 1000, duration: 0.05, volume: 0.1 }) } return { playBeep, sounds } }
4.3 Audio Visualization // composables/useAudioVisualization.ts export function useAudioVisualization() { const { audioContext, initialize } = useAudioContext() let analyser: AnalyserNode | null = null let dataArray: Uint8Array | null = null
async function setupAnalyser(source: AudioNode) { await initialize() const ctx = audioContext.value if (!ctx) return analyser = ctx.createAnalyser() analyser.fftSize = 256 dataArray = new Uint8Array(analyser.frequencyBinCount) source.connect(analyser) }
function getFrequencyData(): Uint8Array | null { if (!analyser || !dataArray) return null analyser.getByteFrequencyData(dataArray) return dataArray }
return { setupAnalyser, getFrequencyData } }
4.4 Spatial Audio for 3D HUD // composables/useSpatialAudio.ts export function useSpatialAudio() { const { audioContext, initialize } = useAudioContext() let panner: PannerNode | null = null
async function createSpatialSource(position: SpatialAudioPosition) { await initialize() const ctx = audioContext.value if (!ctx) return null panner = ctx.createPanner() panner.panningModel = 'HRTF' panner.distanceModel = 'inverse' setPosition(position) return panner }
function setPosition(pos: SpatialAudioPosition) { if (!panner) return panner.positionX.value = pos.x panner.positionY.value = pos.y panner.positionZ.value = pos.z }
return { createSpatialSource, setPosition } }
4.5 Microphone Input
// composables/useMicrophone.ts
export function useMicrophone() {
const { audioContext, initialize } = useAudioContext()
const stream = ref
async function startListening() { try { await initialize() stream.value = await navigator.mediaDevices.getUserMedia({ audio: { echoCancellation: true, noiseSuppression: true, autoGainControl: true } }) isListening.value = true return stream.value } catch (err) { error.value = err instanceof Error ? err.message : 'Microphone access denied' return null } }
function stopListening() { stream.value?.getTracks().forEach(track => track.stop()) stream.value = null isListening.value = false }
onUnmounted(() => stopListening())
return { stream: readonly(stream), isListening: readonly(isListening), error: readonly(error), startListening, stopListening } }
- Implementation Workflow (TDD) Step 1: Write Failing Test First // tests/composables/useHUDSounds.test.ts import { describe, it, expect, vi, beforeEach } from 'vitest' import { useHUDSounds } from '~/composables/useHUDSounds'
// Mock AudioContext nodes const mockOscillator = { connect: vi.fn(), start: vi.fn(), stop: vi.fn(), frequency: { value: 440 } } const mockGainNode = { connect: vi.fn(), gain: { value: 1, exponentialRampToValueAtTime: vi.fn() } } const mockAudioContext = { state: 'running', currentTime: 0, destination: {}, createOscillator: vi.fn(() => mockOscillator), createGain: vi.fn(() => mockGainNode), resume: vi.fn(), close: vi.fn() } vi.stubGlobal('AudioContext', vi.fn(() => mockAudioContext))
describe('useHUDSounds', () => { beforeEach(() => vi.clearAllMocks())
it('creates oscillator with correct frequency', async () => { const { playBeep } = useHUDSounds() await playBeep({ frequency: 880 }) expect(mockOscillator.frequency.value).toBe(880) })
it('clamps volume to valid range', async () => { const { playBeep } = useHUDSounds() await playBeep({ volume: 2.5 }) expect(mockGainNode.gain.value).toBeLessThanOrEqual(1) })
it('connects nodes in correct order', async () => { const { playBeep } = useHUDSounds() await playBeep() expect(mockOscillator.connect).toHaveBeenCalledWith(mockGainNode) expect(mockGainNode.connect).toHaveBeenCalledWith(mockAudioContext.destination) }) })
Step 2: Implement Minimum to Pass // composables/useHUDSounds.ts export function useHUDSounds() { // Implementation from section 4.2 // Only add features that tests require }
Step 3: Refactor Following Patterns
After tests pass, refactor to:
Extract shared audio context logic Add proper TypeScript types Implement cleanup on unmount Step 4: Run Full Verification
Run all audio-related tests
npm test -- --grep "audio|sound|HUD"
Check types
npm run typecheck
Verify no memory leaks in browser
npm run dev # Test manually with DevTools Memory tab
- Performance Patterns 6.1 AudioWorklet for Processing // ✅ Good: Use AudioWorklet for DSP (runs on audio thread) class NoiseGateProcessor extends AudioWorkletProcessor { process(inputs: Float32Array[][], outputs: Float32Array[][]) { for (let ch = 0; ch < inputs[0].length; ch++) { for (let i = 0; i < inputs[0][ch].length; i++) { outputs[0][ch][i] = Math.abs(inputs[0][ch][i]) > 0.01 ? inputs[0][ch][i] : 0 } } return true } } registerProcessor('noise-gate', NoiseGateProcessor)
// ❌ Bad: ScriptProcessorNode (deprecated, blocks main thread)
6.2 Buffer Pooling // ✅ Good: Reuse audio buffers class AudioBufferPool { private pool: AudioBuffer[] = [] constructor(ctx: AudioContext, size: number, length: number) { for (let i = 0; i < size; i++) { this.pool.push(ctx.createBuffer(2, length, ctx.sampleRate)) } } acquire(): AudioBuffer | undefined { return this.pool.pop() } release(buffer: AudioBuffer) { for (let ch = 0; ch < buffer.numberOfChannels; ch++) { buffer.getChannelData(ch).fill(0) } this.pool.push(buffer) } }
// ❌ Bad: Create new buffer each time const buffer = ctx.createBuffer(2, 44100, 44100) // Allocates memory each call
6.3 Offline Rendering
// ✅ Good: Pre-render complex sounds
async function prerenderSound(): Promise
// ❌ Bad: Generate complex sounds in real-time (multiple oscillators computed live)
6.4 Node Graph Optimization // ✅ Good: Reuse master gain node const masterGain = ctx.createGain() masterGain.connect(ctx.destination) function playSound(buffer: AudioBuffer) { const source = ctx.createBufferSource() source.buffer = buffer source.connect(masterGain) source.start() }
// ❌ Bad: Create full chain for each sound (gain + compressor per play)
6.5 Memory Management // ✅ Good: Disconnect and cleanup nodes function playOneShot(buffer: AudioBuffer) { const source = ctx.createBufferSource() source.buffer = buffer source.connect(masterGain) source.onended = () => source.disconnect() source.start() }
// ✅ Good: Limit concurrent sounds (max 8)
class SoundManager {
private activeSources = new Set
// ❌ Bad: Never cleanup - nodes stay in memory after playback const source = ctx.createBufferSource() source.connect(ctx.destination) source.start()
- Quality Standards // ✅ Always require user gesture button.addEventListener('click', async () => { await audioContext.resume() playSound() })
// ✅ Respect user preferences if (usePreferencesStore().preferences.soundEnabled) playBeep()
// ✅ Handle permission denial gracefully try { await navigator.mediaDevices.getUserMedia({ audio: true }) } catch (err) { if (err.name === 'NotAllowedError') { showVisualFeedback('Microphone access required') } }
-
Testing & Quality describe('HUD Sounds', () => { it('validates volume bounds', async () => { const { playBeep } = useHUDSounds() await playBeep({ volume: 2 }) // Clamped to 1 await playBeep({ volume: -1 }) // Clamped to 0 }) })
-
Common Mistakes & Anti-Patterns 9.1 Critical Anti-Patterns // ❌ Auto-play without user gesture - BLOCKED onMounted(() => playSound())
// ✅ After user interaction const handleClick = async () => { await audioContext.resume(); playSound() }
// ❌ Memory leak - no cleanup const audioContext = new AudioContext()
// ✅ Proper cleanup onUnmounted(() => audioContext.close())
// ❌ New context per sound - performance killer function playSound() { const ctx = new AudioContext() }
// ✅ Reuse context const ctx = new AudioContext() function playSound() { / reuse ctx / }
- Pre-Implementation Checklist Phase 1: Before Writing Code Tests written for audio node creation and connections Tests written for volume clamping and validation Performance requirements identified (latency, concurrent sounds) AudioWorklet needed for DSP? Worklet file created Buffer pool size calculated for expected usage Phase 2: During Implementation User gesture required for AudioContext initialization Audio context reused (not created per sound) Nodes disconnected in onended callbacks Volume bounds validated (0-1 range) Microphone permissions handled gracefully Error states provide visual feedback Phase 3: Before Committing All audio tests pass: npm test -- --grep "audio" Type checking passes: npm run typecheck No memory leaks (tested in DevTools Memory tab) Audio context closed on component unmount Visual alternatives provided for accessibility Sound can be disabled via user preferences Volume respects system preferences
- Summary
Web Audio API for JARVIS: Initialize after user gesture, cleanup on unmount, handle permission denials, provide visual alternatives. See references/advanced-patterns.md