Azure AI Vision Image Analysis SDK for Java Build image analysis applications using the Azure AI Vision Image Analysis SDK for Java. Installation < dependency
< groupId
com.azure </ groupId
< artifactId
azure-ai-vision-imageanalysis </ artifactId
< version
1.1.0-beta.1 </ version
</ dependency
Client Creation With API Key import com . azure . ai . vision . imageanalysis . ImageAnalysisClient ; import com . azure . ai . vision . imageanalysis . ImageAnalysisClientBuilder ; import com . azure . core . credential . KeyCredential ; String endpoint = System . getenv ( "VISION_ENDPOINT" ) ; String key = System . getenv ( "VISION_KEY" ) ; ImageAnalysisClient client = new ImageAnalysisClientBuilder ( ) . endpoint ( endpoint ) . credential ( new KeyCredential ( key ) ) . buildClient ( ) ; Async Client import com . azure . ai . vision . imageanalysis . ImageAnalysisAsyncClient ; ImageAnalysisAsyncClient asyncClient = new ImageAnalysisClientBuilder ( ) . endpoint ( endpoint ) . credential ( new KeyCredential ( key ) ) . buildAsyncClient ( ) ; With DefaultAzureCredential import com . azure . identity . DefaultAzureCredentialBuilder ; ImageAnalysisClient client = new ImageAnalysisClientBuilder ( ) . endpoint ( endpoint ) . credential ( new DefaultAzureCredentialBuilder ( ) . build ( ) ) . buildClient ( ) ; Visual Features Feature Description CAPTION Generate human-readable image description DENSE_CAPTIONS Captions for up to 10 regions READ OCR - Extract text from images TAGS Content tags for objects, scenes, actions OBJECTS Detect objects with bounding boxes SMART_CROPS Smart thumbnail regions PEOPLE Detect people with locations Core Patterns Generate Caption import com . azure . ai . vision . imageanalysis . models . * ; import com . azure . core . util . BinaryData ; import java . io . File ; import java . util . Arrays ; // From file BinaryData imageData = BinaryData . fromFile ( new File ( "image.jpg" ) . toPath ( ) ) ; ImageAnalysisResult result = client . analyze ( imageData , Arrays . asList ( VisualFeatures . CAPTION ) , new ImageAnalysisOptions ( ) . setGenderNeutralCaption ( true ) ) ; System . out . printf ( "Caption: \"%s\" (confidence: %.4f)%n" , result . getCaption ( ) . getText ( ) , result . getCaption ( ) . getConfidence ( ) ) ; Generate Caption from URL ImageAnalysisResult result = client . analyzeFromUrl ( "https://example.com/image.jpg" , Arrays . asList ( VisualFeatures . CAPTION ) , new ImageAnalysisOptions ( ) . setGenderNeutralCaption ( true ) ) ; System . out . printf ( "Caption: \"%s\"%n" , result . getCaption ( ) . getText ( ) ) ; Extract Text (OCR) ImageAnalysisResult result = client . analyze ( BinaryData . fromFile ( new File ( "document.jpg" ) . toPath ( ) ) , Arrays . asList ( VisualFeatures . READ ) , null ) ; for ( DetectedTextBlock block : result . getRead ( ) . getBlocks ( ) ) { for ( DetectedTextLine line : block . getLines ( ) ) { System . out . printf ( "Line: '%s'%n" , line . getText ( ) ) ; System . out . printf ( " Bounding polygon: %s%n" , line . getBoundingPolygon ( ) ) ; for ( DetectedTextWord word : line . getWords ( ) ) { System . out . printf ( " Word: '%s' (confidence: %.4f)%n" , word . getText ( ) , word . getConfidence ( ) ) ; } } } Detect Objects ImageAnalysisResult result = client . analyzeFromUrl ( imageUrl , Arrays . asList ( VisualFeatures . OBJECTS ) , null ) ; for ( DetectedObject obj : result . getObjects ( ) ) { System . out . printf ( "Object: %s (confidence: %.4f)%n" , obj . getTags ( ) . get ( 0 ) . getName ( ) , obj . getTags ( ) . get ( 0 ) . getConfidence ( ) ) ; ImageBoundingBox box = obj . getBoundingBox ( ) ; System . out . printf ( " Location: x=%d, y=%d, w=%d, h=%d%n" , box . getX ( ) , box . getY ( ) , box . getWidth ( ) , box . getHeight ( ) ) ; } Get Tags ImageAnalysisResult result = client . analyzeFromUrl ( imageUrl , Arrays . asList ( VisualFeatures . TAGS ) , null ) ; for ( DetectedTag tag : result . getTags ( ) ) { System . out . printf ( "Tag: %s (confidence: %.4f)%n" , tag . getName ( ) , tag . getConfidence ( ) ) ; } Detect People ImageAnalysisResult result = client . analyzeFromUrl ( imageUrl , Arrays . asList ( VisualFeatures . PEOPLE ) , null ) ; for ( DetectedPerson person : result . getPeople ( ) ) { ImageBoundingBox box = person . getBoundingBox ( ) ; System . out . printf ( "Person at x=%d, y=%d (confidence: %.4f)%n" , box . getX ( ) , box . getY ( ) , person . getConfidence ( ) ) ; } Smart Cropping ImageAnalysisResult result = client . analyzeFromUrl ( imageUrl , Arrays . asList ( VisualFeatures . SMART_CROPS ) , new ImageAnalysisOptions ( ) . setSmartCropsAspectRatios ( Arrays . asList ( 1.0 , 1.5 ) ) ) ; for ( CropRegion crop : result . getSmartCrops ( ) ) { System . out . printf ( "Crop region: aspect=%.2f, x=%d, y=%d, w=%d, h=%d%n" , crop . getAspectRatio ( ) , crop . getBoundingBox ( ) . getX ( ) , crop . getBoundingBox ( ) . getY ( ) , crop . getBoundingBox ( ) . getWidth ( ) , crop . getBoundingBox ( ) . getHeight ( ) ) ; } Dense Captions ImageAnalysisResult result = client . analyzeFromUrl ( imageUrl , Arrays . asList ( VisualFeatures . DENSE_CAPTIONS ) , new ImageAnalysisOptions ( ) . setGenderNeutralCaption ( true ) ) ; for ( DenseCaption caption : result . getDenseCaptions ( ) ) { System . out . printf ( "Caption: \"%s\" (confidence: %.4f)%n" , caption . getText ( ) , caption . getConfidence ( ) ) ; System . out . printf ( " Region: x=%d, y=%d, w=%d, h=%d%n" , caption . getBoundingBox ( ) . getX ( ) , caption . getBoundingBox ( ) . getY ( ) , caption . getBoundingBox ( ) . getWidth ( ) , caption . getBoundingBox ( ) . getHeight ( ) ) ; } Multiple Features ImageAnalysisResult result = client . analyzeFromUrl ( imageUrl , Arrays . asList ( VisualFeatures . CAPTION , VisualFeatures . TAGS , VisualFeatures . OBJECTS , VisualFeatures . READ ) , new ImageAnalysisOptions ( ) . setGenderNeutralCaption ( true ) . setLanguage ( "en" ) ) ; // Access all results System . out . println ( "Caption: " + result . getCaption ( ) . getText ( ) ) ; System . out . println ( "Tags: " + result . getTags ( ) . size ( ) ) ; System . out . println ( "Objects: " + result . getObjects ( ) . size ( ) ) ; System . out . println ( "Text blocks: " + result . getRead ( ) . getBlocks ( ) . size ( ) ) ; Async Analysis asyncClient . analyzeFromUrl ( imageUrl , Arrays . asList ( VisualFeatures . CAPTION ) , null ) . subscribe ( result -> System . out . println ( "Caption: " + result . getCaption ( ) . getText ( ) ) , error -> System . err . println ( "Error: " + error . getMessage ( ) ) , ( ) -> System . out . println ( "Complete" ) ) ; Error Handling import com . azure . core . exception . HttpResponseException ; try { client . analyzeFromUrl ( imageUrl , Arrays . asList ( VisualFeatures . CAPTION ) , null ) ; } catch ( HttpResponseException e ) { System . out . println ( "Status: " + e . getResponse ( ) . getStatusCode ( ) ) ; System . out . println ( "Error: " + e . getMessage ( ) ) ; } Environment Variables VISION_ENDPOINT = https:// < resource
.cognitiveservices.azure.com/ VISION_KEY = < your-api-key
Image Requirements Formats: JPEG, PNG, GIF, BMP, WEBP, ICO, TIFF, MPO Size: < 20 MB Dimensions: 50x50 to 16000x16000 pixels Regional Availability Caption and Dense Captions require GPU-supported regions. Check supported regions before deployment. Trigger Phrases "image analysis Java" "Azure Vision SDK" "image captioning" "OCR image text extraction" "object detection image" "smart crop thumbnail" "detect people image" When to Use This skill is applicable to execute the workflow or actions described in the overview.