azure-ai-voicelive-java

安装量: 42
排名: #17234

安装

npx skills add https://github.com/sickn33/antigravity-awesome-skills --skill azure-ai-voicelive-java
Azure AI VoiceLive SDK for Java
Real-time, bidirectional voice conversations with AI assistants using WebSocket technology.
Installation
<
dependency
>
<
groupId
>
com.azure
</
groupId
>
<
artifactId
>
azure-ai-voicelive
</
artifactId
>
<
version
>
1.0.0-beta.2
</
version
>
</
dependency
>
Environment Variables
AZURE_VOICELIVE_ENDPOINT
=
https://
<
resource
>
.openai.azure.com/
AZURE_VOICELIVE_API_KEY
=
<
your-api-key
>
Authentication
API Key
import
com
.
azure
.
ai
.
voicelive
.
VoiceLiveAsyncClient
;
import
com
.
azure
.
ai
.
voicelive
.
VoiceLiveClientBuilder
;
import
com
.
azure
.
core
.
credential
.
AzureKeyCredential
;
VoiceLiveAsyncClient
client
=
new
VoiceLiveClientBuilder
(
)
.
endpoint
(
System
.
getenv
(
"AZURE_VOICELIVE_ENDPOINT"
)
)
.
credential
(
new
AzureKeyCredential
(
System
.
getenv
(
"AZURE_VOICELIVE_API_KEY"
)
)
)
.
buildAsyncClient
(
)
;
DefaultAzureCredential (Recommended)
import
com
.
azure
.
identity
.
DefaultAzureCredentialBuilder
;
VoiceLiveAsyncClient
client
=
new
VoiceLiveClientBuilder
(
)
.
endpoint
(
System
.
getenv
(
"AZURE_VOICELIVE_ENDPOINT"
)
)
.
credential
(
new
DefaultAzureCredentialBuilder
(
)
.
build
(
)
)
.
buildAsyncClient
(
)
;
Key Concepts
Concept
Description
VoiceLiveAsyncClient
Main entry point for voice sessions
VoiceLiveSessionAsyncClient
Active WebSocket connection for streaming
VoiceLiveSessionOptions
Configuration for session behavior
Audio Requirements
Sample Rate
24kHz (24000 Hz)
Bit Depth
16-bit PCM
Channels
Mono (1 channel)
Format
Signed PCM, little-endian Core Workflow 1. Start Session import reactor . core . publisher . Mono ; client . startSession ( "gpt-4o-realtime-preview" ) . flatMap ( session -> { System . out . println ( "Session started" ) ; // Subscribe to events session . receiveEvents ( ) . subscribe ( event -> System . out . println ( "Event: " + event . getType ( ) ) , error -> System . err . println ( "Error: " + error . getMessage ( ) ) ) ; return Mono . just ( session ) ; } ) . block ( ) ; 2. Configure Session Options import com . azure . ai . voicelive . models . * ; import java . util . Arrays ; ServerVadTurnDetection turnDetection = new ServerVadTurnDetection ( ) . setThreshold ( 0.5 ) // Sensitivity (0.0-1.0) . setPrefixPaddingMs ( 300 ) // Audio before speech . setSilenceDurationMs ( 500 ) // Silence to end turn . setInterruptResponse ( true ) // Allow interruptions . setAutoTruncate ( true ) . setCreateResponse ( true ) ; AudioInputTranscriptionOptions transcription = new AudioInputTranscriptionOptions ( AudioInputTranscriptionOptionsModel . WHISPER_1 ) ; VoiceLiveSessionOptions options = new VoiceLiveSessionOptions ( ) . setInstructions ( "You are a helpful AI voice assistant." ) . setVoice ( BinaryData . fromObject ( new OpenAIVoice ( OpenAIVoiceName . ALLOY ) ) ) . setModalities ( Arrays . asList ( InteractionModality . TEXT , InteractionModality . AUDIO ) ) . setInputAudioFormat ( InputAudioFormat . PCM16 ) . setOutputAudioFormat ( OutputAudioFormat . PCM16 ) . setInputAudioSamplingRate ( 24000 ) . setInputAudioNoiseReduction ( new AudioNoiseReduction ( AudioNoiseReductionType . NEAR_FIELD ) ) . setInputAudioEchoCancellation ( new AudioEchoCancellation ( ) ) . setInputAudioTranscription ( transcription ) . setTurnDetection ( turnDetection ) ; // Send configuration ClientEventSessionUpdate updateEvent = new ClientEventSessionUpdate ( options ) ; session . sendEvent ( updateEvent ) . subscribe ( ) ; 3. Send Audio Input byte [ ] audioData = readAudioChunk ( ) ; // Your PCM16 audio data session . sendInputAudio ( BinaryData . fromBytes ( audioData ) ) . subscribe ( ) ; 4. Handle Events session . receiveEvents ( ) . subscribe ( event -> { ServerEventType eventType = event . getType ( ) ; if ( ServerEventType . SESSION_CREATED . equals ( eventType ) ) { System . out . println ( "Session created" ) ; } else if ( ServerEventType . INPUT_AUDIO_BUFFER_SPEECH_STARTED . equals ( eventType ) ) { System . out . println ( "User started speaking" ) ; } else if ( ServerEventType . INPUT_AUDIO_BUFFER_SPEECH_STOPPED . equals ( eventType ) ) { System . out . println ( "User stopped speaking" ) ; } else if ( ServerEventType . RESPONSE_AUDIO_DELTA . equals ( eventType ) ) { if ( event instanceof SessionUpdateResponseAudioDelta ) { SessionUpdateResponseAudioDelta audioEvent = ( SessionUpdateResponseAudioDelta ) event ; playAudioChunk ( audioEvent . getDelta ( ) ) ; } } else if ( ServerEventType . RESPONSE_DONE . equals ( eventType ) ) { System . out . println ( "Response complete" ) ; } else if ( ServerEventType . ERROR . equals ( eventType ) ) { if ( event instanceof SessionUpdateError ) { SessionUpdateError errorEvent = ( SessionUpdateError ) event ; System . err . println ( "Error: " + errorEvent . getError ( ) . getMessage ( ) ) ; } } } ) ; Voice Configuration OpenAI Voices // Available: ALLOY, ASH, BALLAD, CORAL, ECHO, SAGE, SHIMMER, VERSE VoiceLiveSessionOptions options = new VoiceLiveSessionOptions ( ) . setVoice ( BinaryData . fromObject ( new OpenAIVoice ( OpenAIVoiceName . ALLOY ) ) ) ; Azure Voices // Azure Standard Voice options . setVoice ( BinaryData . fromObject ( new AzureStandardVoice ( "en-US-JennyNeural" ) ) ) ; // Azure Custom Voice options . setVoice ( BinaryData . fromObject ( new AzureCustomVoice ( "myVoice" , "endpointId" ) ) ) ; // Azure Personal Voice options . setVoice ( BinaryData . fromObject ( new AzurePersonalVoice ( "speakerProfileId" , PersonalVoiceModels . PHOENIX_LATEST_NEURAL ) ) ) ; Function Calling VoiceLiveFunctionDefinition weatherFunction = new VoiceLiveFunctionDefinition ( "get_weather" ) . setDescription ( "Get current weather for a location" ) . setParameters ( BinaryData . fromObject ( parametersSchema ) ) ; VoiceLiveSessionOptions options = new VoiceLiveSessionOptions ( ) . setTools ( Arrays . asList ( weatherFunction ) ) . setInstructions ( "You have access to weather information." ) ; Best Practices Use async client — VoiceLive requires reactive patterns Configure turn detection for natural conversation flow Enable noise reduction for better speech recognition Handle interruptions gracefully with setInterruptResponse(true) Use Whisper transcription for input audio transcription Close sessions properly when conversation ends Error Handling session . receiveEvents ( ) . doOnError ( error -> System . err . println ( "Connection error: " + error . getMessage ( ) ) ) . onErrorResume ( error -> { // Attempt reconnection or cleanup return Flux . empty ( ) ; } ) . subscribe ( ) ; Reference Links Resource URL GitHub Source https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/ai/azure-ai-voicelive Samples https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/ai/azure-ai-voicelive/src/samples When to Use This skill is applicable to execute the workflow or actions described in the overview.
返回排行榜