安装
npx skills add https://github.com/sickn33/antigravity-awesome-skills --skill azure-ai-document-intelligence-ts
- Azure Document Intelligence REST SDK for TypeScript
- Extract text, tables, and structured data from documents using prebuilt and custom models.
- Installation
- npm
- install
- @azure-rest/ai-document-intelligence @azure/identity
- Environment Variables
- DOCUMENT_INTELLIGENCE_ENDPOINT
- =
- https://
- <
- resource
- >
- .cognitiveservices.azure.com
- DOCUMENT_INTELLIGENCE_API_KEY
- =
- <
- api-key
- >
- Authentication
- Important
- This is a REST client.
DocumentIntelligence
is a
function
, not a class.
DefaultAzureCredential
import
DocumentIntelligence
from
"@azure-rest/ai-document-intelligence"
;
import
{
DefaultAzureCredential
}
from
"@azure/identity"
;
const
client
=
DocumentIntelligence
(
process
.
env
.
DOCUMENT_INTELLIGENCE_ENDPOINT
!
,
new
DefaultAzureCredential
(
)
)
;
API Key
import
DocumentIntelligence
from
"@azure-rest/ai-document-intelligence"
;
const
client
=
DocumentIntelligence
(
process
.
env
.
DOCUMENT_INTELLIGENCE_ENDPOINT
!
,
{
key
:
process
.
env
.
DOCUMENT_INTELLIGENCE_API_KEY
!
}
)
;
Analyze Document (URL)
import
DocumentIntelligence
,
{
isUnexpected
,
getLongRunningPoller
,
AnalyzeOperationOutput
}
from
"@azure-rest/ai-document-intelligence"
;
const
initialResponse
=
await
client
.
path
(
"/documentModels/{modelId}:analyze"
,
"prebuilt-layout"
)
.
post
(
{
contentType
:
"application/json"
,
body
:
{
urlSource
:
"https://example.com/document.pdf"
}
,
queryParameters
:
{
locale
:
"en-US"
}
}
)
;
if
(
isUnexpected
(
initialResponse
)
)
{
throw
initialResponse
.
body
.
error
;
}
const
poller
=
getLongRunningPoller
(
client
,
initialResponse
)
;
const
result
=
(
await
poller
.
pollUntilDone
(
)
)
.
body
as
AnalyzeOperationOutput
;
console
.
log
(
"Pages:"
,
result
.
analyzeResult
?.
pages
?.
length
)
;
console
.
log
(
"Tables:"
,
result
.
analyzeResult
?.
tables
?.
length
)
;
Analyze Document (Local File)
import
{
readFile
}
from
"node:fs/promises"
;
const
fileBuffer
=
await
readFile
(
"./document.pdf"
)
;
const
base64Source
=
fileBuffer
.
toString
(
"base64"
)
;
const
initialResponse
=
await
client
.
path
(
"/documentModels/{modelId}:analyze"
,
"prebuilt-invoice"
)
.
post
(
{
contentType
:
"application/json"
,
body
:
{
base64Source
}
}
)
;
if
(
isUnexpected
(
initialResponse
)
)
{
throw
initialResponse
.
body
.
error
;
}
const
poller
=
getLongRunningPoller
(
client
,
initialResponse
)
;
const
result
=
(
await
poller
.
pollUntilDone
(
)
)
.
body
as
AnalyzeOperationOutput
;
Prebuilt Models
Model ID
Description
prebuilt-read
OCR - text and language extraction
prebuilt-layout
Text, tables, selection marks, structure
prebuilt-invoice
Invoice fields
prebuilt-receipt
Receipt fields
prebuilt-idDocument
ID document fields
prebuilt-tax.us.w2
W-2 tax form fields
prebuilt-healthInsuranceCard.us
Health insurance card fields
prebuilt-contract
Contract fields
prebuilt-bankStatement.us
Bank statement fields
Extract Invoice Fields
const
initialResponse
=
await
client
.
path
(
"/documentModels/{modelId}:analyze"
,
"prebuilt-invoice"
)
.
post
(
{
contentType
:
"application/json"
,
body
:
{
urlSource
:
invoiceUrl
}
}
)
;
if
(
isUnexpected
(
initialResponse
)
)
{
throw
initialResponse
.
body
.
error
;
}
const
poller
=
getLongRunningPoller
(
client
,
initialResponse
)
;
const
result
=
(
await
poller
.
pollUntilDone
(
)
)
.
body
as
AnalyzeOperationOutput
;
const
invoice
=
result
.
analyzeResult
?.
documents
?.
[
0
]
;
if
(
invoice
)
{
console
.
log
(
"Vendor:"
,
invoice
.
fields
?.
VendorName
?.
content
)
;
console
.
log
(
"Total:"
,
invoice
.
fields
?.
InvoiceTotal
?.
content
)
;
console
.
log
(
"Due Date:"
,
invoice
.
fields
?.
DueDate
?.
content
)
;
}
Extract Receipt Fields
const
initialResponse
=
await
client
.
path
(
"/documentModels/{modelId}:analyze"
,
"prebuilt-receipt"
)
.
post
(
{
contentType
:
"application/json"
,
body
:
{
urlSource
:
receiptUrl
}
}
)
;
const
poller
=
getLongRunningPoller
(
client
,
initialResponse
)
;
const
result
=
(
await
poller
.
pollUntilDone
(
)
)
.
body
as
AnalyzeOperationOutput
;
const
receipt
=
result
.
analyzeResult
?.
documents
?.
[
0
]
;
if
(
receipt
)
{
console
.
log
(
"Merchant:"
,
receipt
.
fields
?.
MerchantName
?.
content
)
;
console
.
log
(
"Total:"
,
receipt
.
fields
?.
Total
?.
content
)
;
for
(
const
item
of
receipt
.
fields
?.
Items
?.
values
||
[
]
)
{
console
.
log
(
"Item:"
,
item
.
properties
?.
Description
?.
content
)
;
console
.
log
(
"Price:"
,
item
.
properties
?.
TotalPrice
?.
content
)
;
}
}
List Document Models
import
DocumentIntelligence
,
{
isUnexpected
,
paginate
}
from
"@azure-rest/ai-document-intelligence"
;
const
response
=
await
client
.
path
(
"/documentModels"
)
.
get
(
)
;
if
(
isUnexpected
(
response
)
)
{
throw
response
.
body
.
error
;
}
for
await
(
const
model
of
paginate
(
client
,
response
)
)
{
console
.
log
(
model
.
modelId
)
;
}
Build Custom Model
const
initialResponse
=
await
client
.
path
(
"/documentModels:build"
)
.
post
(
{
body
:
{
modelId
:
"my-custom-model"
,
description
:
"Custom model for purchase orders"
,
buildMode
:
"template"
,
// or "neural"
azureBlobSource
:
{
containerUrl
:
process
.
env
.
TRAINING_CONTAINER_SAS_URL
!
,
prefix
:
"training-data/"
}
}
}
)
;
if
(
isUnexpected
(
initialResponse
)
)
{
throw
initialResponse
.
body
.
error
;
}
const
poller
=
getLongRunningPoller
(
client
,
initialResponse
)
;
const
result
=
await
poller
.
pollUntilDone
(
)
;
console
.
log
(
"Model built:"
,
result
.
body
)
;
Build Document Classifier
import
{
DocumentClassifierBuildOperationDetailsOutput
}
from
"@azure-rest/ai-document-intelligence"
;
const
containerSasUrl
=
process
.
env
.
TRAINING_CONTAINER_SAS_URL
!
;
const
initialResponse
=
await
client
.
path
(
"/documentClassifiers:build"
)
.
post
(
{
body
:
{
classifierId
:
"my-classifier"
,
description
:
"Invoice vs Receipt classifier"
,
docTypes
:
{
invoices
:
{
azureBlobSource
:
{
containerUrl
:
containerSasUrl
,
prefix
:
"invoices/"
}
}
,
receipts
:
{
azureBlobSource
:
{
containerUrl
:
containerSasUrl
,
prefix
:
"receipts/"
}
}
}
}
}
)
;
if
(
isUnexpected
(
initialResponse
)
)
{
throw
initialResponse
.
body
.
error
;
}
const
poller
=
getLongRunningPoller
(
client
,
initialResponse
)
;
const
result
=
(
await
poller
.
pollUntilDone
(
)
)
.
body
as
DocumentClassifierBuildOperationDetailsOutput
;
console
.
log
(
"Classifier:"
,
result
.
result
?.
classifierId
)
;
Classify Document
const
initialResponse
=
await
client
.
path
(
"/documentClassifiers/{classifierId}:analyze"
,
"my-classifier"
)
.
post
(
{
contentType
:
"application/json"
,
body
:
{
urlSource
:
documentUrl
}
,
queryParameters
:
{
split
:
"auto"
}
}
)
;
if
(
isUnexpected
(
initialResponse
)
)
{
throw
initialResponse
.
body
.
error
;
}
const
poller
=
getLongRunningPoller
(
client
,
initialResponse
)
;
const
result
=
await
poller
.
pollUntilDone
(
)
;
console
.
log
(
"Classification:"
,
result
.
body
.
analyzeResult
?.
documents
)
;
Get Service Info
const
response
=
await
client
.
path
(
"/info"
)
.
get
(
)
;
if
(
isUnexpected
(
response
)
)
{
throw
response
.
body
.
error
;
}
console
.
log
(
"Custom model limit:"
,
response
.
body
.
customDocumentModels
.
limit
)
;
console
.
log
(
"Custom model count:"
,
response
.
body
.
customDocumentModels
.
count
)
;
Polling Pattern
import
DocumentIntelligence
,
{
isUnexpected
,
getLongRunningPoller
,
AnalyzeOperationOutput
}
from
"@azure-rest/ai-document-intelligence"
;
// 1. Start operation
const
initialResponse
=
await
client
.
path
(
"/documentModels/{modelId}:analyze"
,
"prebuilt-layout"
)
.
post
(
{
contentType
:
"application/json"
,
body
:
{
urlSource
}
}
)
;
// 2. Check for errors
if
(
isUnexpected
(
initialResponse
)
)
{
throw
initialResponse
.
body
.
error
;
}
// 3. Create poller
const
poller
=
getLongRunningPoller
(
client
,
initialResponse
)
;
// 4. Optional: Monitor progress
poller
.
onProgress
(
(
state
)
=>
{
console
.
log
(
"Status:"
,
state
.
status
)
;
}
)
;
// 5. Wait for completion
const
result
=
(
await
poller
.
pollUntilDone
(
)
)
.
body
as
AnalyzeOperationOutput
;
Key Types
import
DocumentIntelligence
,
{
isUnexpected
,
getLongRunningPoller
,
paginate
,
parseResultIdFromResponse
,
AnalyzeOperationOutput
,
DocumentClassifierBuildOperationDetailsOutput
}
from
"@azure-rest/ai-document-intelligence"
;
Best Practices
Use getLongRunningPoller()
- Document analysis is async, always poll for results
Check isUnexpected()
- Type guard for proper error handling
Choose the right model
- Use prebuilt models when possible, custom for specialized docs
Handle confidence scores
- Fields have confidence values, set thresholds for your use case
Use pagination
- Use
paginate()
helper for listing models
Prefer neural mode
- For custom models, neural handles more variation than template
When to Use
This skill is applicable to execute the workflow or actions described in the overview.
← 返回排行榜