Azure Data Lake Storage Gen2 SDK for Python Hierarchical file system for big data analytics workloads. Installation pip install azure-storage-file-datalake azure-identity Environment Variables AZURE_STORAGE_ACCOUNT_URL = https:// < account
.dfs.core.windows.net Authentication from azure . identity import DefaultAzureCredential from azure . storage . filedatalake import DataLakeServiceClient credential = DefaultAzureCredential ( ) account_url = "https://
.dfs.core.windows.net" service_client = DataLakeServiceClient ( account_url = account_url , credential = credential ) Client Hierarchy Client Purpose DataLakeServiceClient Account-level operations FileSystemClient Container (file system) operations DataLakeDirectoryClient Directory operations DataLakeFileClient File operations File System Operations
Create file system (container)
file_system_client
service_client . create_file_system ( "myfilesystem" )
Get existing
file_system_client
service_client . get_file_system_client ( "myfilesystem" )
Delete
service_client . delete_file_system ( "myfilesystem" )
List file systems
for fs in service_client . list_file_systems ( ) : print ( fs . name ) Directory Operations file_system_client = service_client . get_file_system_client ( "myfilesystem" )
Create directory
directory_client
file_system_client . create_directory ( "mydir" )
Create nested directories
directory_client
file_system_client . create_directory ( "path/to/nested/dir" )
Get directory client
directory_client
file_system_client . get_directory_client ( "mydir" )
Delete directory
directory_client . delete_directory ( )
Rename/move directory
directory_client . rename_directory ( new_name = "myfilesystem/newname" ) File Operations Upload File
Get file client
file_client
file_system_client . get_file_client ( "path/to/file.txt" )
Upload from local file
with open ( "local-file.txt" , "rb" ) as data : file_client . upload_data ( data , overwrite = True )
Upload bytes
file_client . upload_data ( b"Hello, Data Lake!" , overwrite = True )
Append data (for large files)
file_client . append_data ( data = b"chunk1" , offset = 0 , length = 6 ) file_client . append_data ( data = b"chunk2" , offset = 6 , length = 6 ) file_client . flush_data ( 12 )
Commit the data
Download File file_client = file_system_client . get_file_client ( "path/to/file.txt" )
Download all content
download
file_client . download_file ( ) content = download . readall ( )
Download to file
with open ( "downloaded.txt" , "wb" ) as f : download = file_client . download_file ( ) download . readinto ( f )
Download range
download
file_client . download_file ( offset = 0 , length = 100 ) Delete File file_client . delete_file ( ) List Contents
List paths (files and directories)
for path in file_system_client . get_paths ( ) : print ( f" { 'DIR' if path . is_directory else 'FILE' } : { path . name } " )
List paths in directory
for path in file_system_client . get_paths ( path = "mydir" ) : print ( path . name )
Recursive listing
for path in file_system_client . get_paths ( path = "mydir" , recursive = True ) : print ( path . name ) File/Directory Properties
Get properties
properties
file_client . get_file_properties ( ) print ( f"Size: { properties . size } " ) print ( f"Last modified: { properties . last_modified } " )
Set metadata
file_client . set_metadata ( metadata = { "processed" : "true" } ) Access Control (ACL)
Get ACL
acl
directory_client . get_access_control ( ) print ( f"Owner: { acl [ 'owner' ] } " ) print ( f"Permissions: { acl [ 'permissions' ] } " )
Set ACL
directory_client . set_access_control ( owner = "user-id" , permissions = "rwxr-x---" )
Update ACL entries
from
azure
.
storage
.
filedatalake
import
AccessControlChangeResult
directory_client
.
update_access_control_recursive
(
acl
=
"user:user-id:rwx"
)
Async Client
from
azure
.
storage
.
filedatalake
.
aio
import
DataLakeServiceClient
from
azure
.
identity
.
aio
import
DefaultAzureCredential
async
def
datalake_operations
(
)
:
credential
=
DefaultAzureCredential
(
)
async
with
DataLakeServiceClient
(
account_url
=
"https://