azure-ai-document-intelligence-ts

安装量: 50
排名: #14933

安装

npx skills add https://github.com/sickn33/antigravity-awesome-skills --skill azure-ai-document-intelligence-ts
Azure Document Intelligence REST SDK for TypeScript
Extract text, tables, and structured data from documents using prebuilt and custom models.
Installation
npm
install
@azure-rest/ai-document-intelligence @azure/identity
Environment Variables
DOCUMENT_INTELLIGENCE_ENDPOINT
=
https://
<
resource
>
.cognitiveservices.azure.com
DOCUMENT_INTELLIGENCE_API_KEY
=
<
api-key
>
Authentication
Important
This is a REST client. DocumentIntelligence is a function , not a class. DefaultAzureCredential import DocumentIntelligence from "@azure-rest/ai-document-intelligence" ; import { DefaultAzureCredential } from "@azure/identity" ; const client = DocumentIntelligence ( process . env . DOCUMENT_INTELLIGENCE_ENDPOINT ! , new DefaultAzureCredential ( ) ) ; API Key import DocumentIntelligence from "@azure-rest/ai-document-intelligence" ; const client = DocumentIntelligence ( process . env . DOCUMENT_INTELLIGENCE_ENDPOINT ! , { key : process . env . DOCUMENT_INTELLIGENCE_API_KEY ! } ) ; Analyze Document (URL) import DocumentIntelligence , { isUnexpected , getLongRunningPoller , AnalyzeOperationOutput } from "@azure-rest/ai-document-intelligence" ; const initialResponse = await client . path ( "/documentModels/{modelId}:analyze" , "prebuilt-layout" ) . post ( { contentType : "application/json" , body : { urlSource : "https://example.com/document.pdf" } , queryParameters : { locale : "en-US" } } ) ; if ( isUnexpected ( initialResponse ) ) { throw initialResponse . body . error ; } const poller = getLongRunningPoller ( client , initialResponse ) ; const result = ( await poller . pollUntilDone ( ) ) . body as AnalyzeOperationOutput ; console . log ( "Pages:" , result . analyzeResult ?. pages ?. length ) ; console . log ( "Tables:" , result . analyzeResult ?. tables ?. length ) ; Analyze Document (Local File) import { readFile } from "node:fs/promises" ; const fileBuffer = await readFile ( "./document.pdf" ) ; const base64Source = fileBuffer . toString ( "base64" ) ; const initialResponse = await client . path ( "/documentModels/{modelId}:analyze" , "prebuilt-invoice" ) . post ( { contentType : "application/json" , body : { base64Source } } ) ; if ( isUnexpected ( initialResponse ) ) { throw initialResponse . body . error ; } const poller = getLongRunningPoller ( client , initialResponse ) ; const result = ( await poller . pollUntilDone ( ) ) . body as AnalyzeOperationOutput ; Prebuilt Models Model ID Description prebuilt-read OCR - text and language extraction prebuilt-layout Text, tables, selection marks, structure prebuilt-invoice Invoice fields prebuilt-receipt Receipt fields prebuilt-idDocument ID document fields prebuilt-tax.us.w2 W-2 tax form fields prebuilt-healthInsuranceCard.us Health insurance card fields prebuilt-contract Contract fields prebuilt-bankStatement.us Bank statement fields Extract Invoice Fields const initialResponse = await client . path ( "/documentModels/{modelId}:analyze" , "prebuilt-invoice" ) . post ( { contentType : "application/json" , body : { urlSource : invoiceUrl } } ) ; if ( isUnexpected ( initialResponse ) ) { throw initialResponse . body . error ; } const poller = getLongRunningPoller ( client , initialResponse ) ; const result = ( await poller . pollUntilDone ( ) ) . body as AnalyzeOperationOutput ; const invoice = result . analyzeResult ?. documents ?. [ 0 ] ; if ( invoice ) { console . log ( "Vendor:" , invoice . fields ?. VendorName ?. content ) ; console . log ( "Total:" , invoice . fields ?. InvoiceTotal ?. content ) ; console . log ( "Due Date:" , invoice . fields ?. DueDate ?. content ) ; } Extract Receipt Fields const initialResponse = await client . path ( "/documentModels/{modelId}:analyze" , "prebuilt-receipt" ) . post ( { contentType : "application/json" , body : { urlSource : receiptUrl } } ) ; const poller = getLongRunningPoller ( client , initialResponse ) ; const result = ( await poller . pollUntilDone ( ) ) . body as AnalyzeOperationOutput ; const receipt = result . analyzeResult ?. documents ?. [ 0 ] ; if ( receipt ) { console . log ( "Merchant:" , receipt . fields ?. MerchantName ?. content ) ; console . log ( "Total:" , receipt . fields ?. Total ?. content ) ; for ( const item of receipt . fields ?. Items ?. values || [ ] ) { console . log ( "Item:" , item . properties ?. Description ?. content ) ; console . log ( "Price:" , item . properties ?. TotalPrice ?. content ) ; } } List Document Models import DocumentIntelligence , { isUnexpected , paginate } from "@azure-rest/ai-document-intelligence" ; const response = await client . path ( "/documentModels" ) . get ( ) ; if ( isUnexpected ( response ) ) { throw response . body . error ; } for await ( const model of paginate ( client , response ) ) { console . log ( model . modelId ) ; } Build Custom Model const initialResponse = await client . path ( "/documentModels:build" ) . post ( { body : { modelId : "my-custom-model" , description : "Custom model for purchase orders" , buildMode : "template" , // or "neural" azureBlobSource : { containerUrl : process . env . TRAINING_CONTAINER_SAS_URL ! , prefix : "training-data/" } } } ) ; if ( isUnexpected ( initialResponse ) ) { throw initialResponse . body . error ; } const poller = getLongRunningPoller ( client , initialResponse ) ; const result = await poller . pollUntilDone ( ) ; console . log ( "Model built:" , result . body ) ; Build Document Classifier import { DocumentClassifierBuildOperationDetailsOutput } from "@azure-rest/ai-document-intelligence" ; const containerSasUrl = process . env . TRAINING_CONTAINER_SAS_URL ! ; const initialResponse = await client . path ( "/documentClassifiers:build" ) . post ( { body : { classifierId : "my-classifier" , description : "Invoice vs Receipt classifier" , docTypes : { invoices : { azureBlobSource : { containerUrl : containerSasUrl , prefix : "invoices/" } } , receipts : { azureBlobSource : { containerUrl : containerSasUrl , prefix : "receipts/" } } } } } ) ; if ( isUnexpected ( initialResponse ) ) { throw initialResponse . body . error ; } const poller = getLongRunningPoller ( client , initialResponse ) ; const result = ( await poller . pollUntilDone ( ) ) . body as DocumentClassifierBuildOperationDetailsOutput ; console . log ( "Classifier:" , result . result ?. classifierId ) ; Classify Document const initialResponse = await client . path ( "/documentClassifiers/{classifierId}:analyze" , "my-classifier" ) . post ( { contentType : "application/json" , body : { urlSource : documentUrl } , queryParameters : { split : "auto" } } ) ; if ( isUnexpected ( initialResponse ) ) { throw initialResponse . body . error ; } const poller = getLongRunningPoller ( client , initialResponse ) ; const result = await poller . pollUntilDone ( ) ; console . log ( "Classification:" , result . body . analyzeResult ?. documents ) ; Get Service Info const response = await client . path ( "/info" ) . get ( ) ; if ( isUnexpected ( response ) ) { throw response . body . error ; } console . log ( "Custom model limit:" , response . body . customDocumentModels . limit ) ; console . log ( "Custom model count:" , response . body . customDocumentModels . count ) ; Polling Pattern import DocumentIntelligence , { isUnexpected , getLongRunningPoller , AnalyzeOperationOutput } from "@azure-rest/ai-document-intelligence" ; // 1. Start operation const initialResponse = await client . path ( "/documentModels/{modelId}:analyze" , "prebuilt-layout" ) . post ( { contentType : "application/json" , body : { urlSource } } ) ; // 2. Check for errors if ( isUnexpected ( initialResponse ) ) { throw initialResponse . body . error ; } // 3. Create poller const poller = getLongRunningPoller ( client , initialResponse ) ; // 4. Optional: Monitor progress poller . onProgress ( ( state ) => { console . log ( "Status:" , state . status ) ; } ) ; // 5. Wait for completion const result = ( await poller . pollUntilDone ( ) ) . body as AnalyzeOperationOutput ; Key Types import DocumentIntelligence , { isUnexpected , getLongRunningPoller , paginate , parseResultIdFromResponse , AnalyzeOperationOutput , DocumentClassifierBuildOperationDetailsOutput } from "@azure-rest/ai-document-intelligence" ; Best Practices Use getLongRunningPoller() - Document analysis is async, always poll for results Check isUnexpected() - Type guard for proper error handling Choose the right model - Use prebuilt models when possible, custom for specialized docs Handle confidence scores - Fields have confidence values, set thresholds for your use case Use pagination - Use paginate() helper for listing models Prefer neural mode - For custom models, neural handles more variation than template When to Use This skill is applicable to execute the workflow or actions described in the overview.
返回排行榜