Openpyxl is a Python library for reading and writing Excel 2010+ xlsx/xlsm files. This skill covers comprehensive patterns for spreadsheet automation including:
-
Workbook creation with multiple worksheets
-
Cell operations including formatting, merging, and data validation
-
Formula support for calculations and dynamic content
-
Chart generation for data visualization within Excel
-
Conditional formatting for visual data analysis
-
Large dataset handling with optimized read/write modes
-
Pivot table creation for data summarization
-
Style management for professional appearances
When to Use This Skill
USE when:
-
Creating Excel reports with formulas and calculations
-
Generating spreadsheets from database queries
-
Automating financial reports and dashboards
-
Building Excel templates with formatting
-
Processing and transforming existing Excel files
-
Creating charts and visualizations in Excel
-
Applying conditional formatting rules
-
Building data entry forms with validation
-
Handling large datasets (100k+ rows)
-
Creating pivot tables programmatically
DON'T USE when:
-
Only need to read data into pandas (use pandas.read_excel directly)
-
Need real-time Excel manipulation (use xlwings on Windows)
-
Working with .xls format (use xlrd/xlwt)
-
Creating complex macros (requires VBA)
-
Need Excel-specific features like Power Query
Prerequisites
Installation
# Basic installation
pip install openpyxl
# Using uv (recommended)
uv pip install openpyxl
# With image support
pip install openpyxl Pillow
# With pandas integration
pip install openpyxl pandas
# Full installation
pip install openpyxl Pillow pandas numpy
Verify Installation
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border
from openpyxl.chart import BarChart, LineChart, PieChart
from openpyxl.utils.dataframe import dataframe_to_rows
print("openpyxl installed successfully!")
Core Capabilities
1. Basic Workbook Creation
"""
Create a basic Excel workbook with data and formatting.
"""
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from datetime import datetime
def create_basic_workbook(output_path: str) -> None:
"""Create a basic workbook with common elements."""
# Create workbook and select active sheet
wb = Workbook()
ws = wb.active
ws.title = "Sales Report"
# Set document properties
wb.properties.creator = "Excel Generator"
wb.properties.title = "Monthly Sales Report"
wb.properties.created = datetime.now()
# Define styles
header_font = Font(bold=True, color="FFFFFF", size=12)
header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
header_alignment = Alignment(horizontal="center", vertical="center")
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
# Headers
headers = ["Product", "Q1", "Q2", "Q3", "Q4", "Total"]
for col, header in enumerate(headers, start=1):
cell = ws.cell(row=1, column=col, value=header)
cell.font = header_font
cell.fill = header_fill
cell.alignment = header_alignment
cell.border = thin_border
# Data
data = [
["Widget A", 1500, 1800, 2200, 2500],
["Widget B", 800, 950, 1100, 1300],
["Widget C", 2000, 2300, 2600, 2900],
["Widget D", 500, 600, 750, 900],
]
for row_idx, row_data in enumerate(data, start=2):
# Product name
ws.cell(row=row_idx, column=1, value=row_data[0]).border = thin_border
# Quarterly values
for col_idx, value in enumerate(row_data[1:], start=2):
cell = ws.cell(row=row_idx, column=col_idx, value=value)
cell.border = thin_border
cell.number_format = '#,##0'
# Total formula
total_cell = ws.cell(
row=row_idx,
column=6,
value=f"=SUM(B{row_idx}:E{row_idx})"
)
total_cell.border = thin_border
total_cell.font = Font(bold=True)
total_cell.number_format = '#,##0'
# Add totals row
total_row = len(data) + 2
ws.cell(row=total_row, column=1, value="TOTAL").font = Font(bold=True)
for col in range(2, 7):
col_letter = get_column_letter(col)
cell = ws.cell(
row=total_row,
column=col,
value=f"=SUM({col_letter}2:{col_letter}{total_row-1})"
)
cell.font = Font(bold=True)
cell.number_format = '#,##0'
cell.border = thin_border
# Adjust column widths
column_widths = [15, 12, 12, 12, 12, 14]
for i, width in enumerate(column_widths, start=1):
ws.column_dimensions[get_column_letter(i)].width = width
# Freeze header row
ws.freeze_panes = "A2"
# Save workbook
wb.save(output_path)
print(f"Workbook saved to {output_path}")
create_basic_workbook("sales_report.xlsx")
2. Advanced Cell Formatting
"""
Advanced cell formatting with styles, merging, and data validation.
"""
from openpyxl import Workbook
from openpyxl.styles import (
Font, PatternFill, Alignment, Border, Side,
GradientFill, NamedStyle, Color
)
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.formatting.rule import Rule, CellIsRule, FormulaRule
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.datavalidation import DataValidation
def create_formatted_workbook(output_path: str) -> None:
"""Create workbook with advanced formatting."""
wb = Workbook()
ws = wb.active
ws.title = "Formatted Data"
# Create named styles for reuse
header_style = NamedStyle(name="header_style")
header_style.font = Font(bold=True, color="FFFFFF", size=11)
header_style.fill = PatternFill(start_color="2F5496", fill_type="solid")
header_style.alignment = Alignment(horizontal="center", vertical="center")
header_style.border = Border(
bottom=Side(style='medium', color="1F4E79")
)
wb.add_named_style(header_style)
currency_style = NamedStyle(name="currency_style")
currency_style.number_format = '"$"#,##0.00'
currency_style.alignment = Alignment(horizontal="right")
wb.add_named_style(currency_style)
percentage_style = NamedStyle(name="percentage_style")
percentage_style.number_format = '0.0%'
percentage_style.alignment = Alignment(horizontal="center")
wb.add_named_style(percentage_style)
# Title with merged cells
ws.merge_cells('A1:F1')
title_cell = ws['A1']
title_cell.value = "Financial Summary Report"
title_cell.font = Font(bold=True, size=16, color="1F4E79")
title_cell.alignment = Alignment(horizontal="center", vertical="center")
ws.row_dimensions[1].height = 30
# Subtitle
ws.merge_cells('A2:F2')
subtitle_cell = ws['A2']
subtitle_cell.value = "Fiscal Year 2026"
subtitle_cell.font = Font(italic=True, size=12, color="5B9BD5")
subtitle_cell.alignment = Alignment(horizontal="center")
ws.row_dimensions[2].height = 20
# Headers row
headers = ["Category", "Budget", "Actual", "Variance", "% of Budget", "Status"]
for col, header in enumerate(headers, start=1):
cell = ws.cell(row=4, column=col, value=header)
cell.style = "header_style"
# Data with various formats
data = [
["Revenue", 1000000, 1150000],
["Personnel", 500000, 485000],
["Operations", 200000, 215000],
["Marketing", 150000, 142000],
["Technology", 100000, 108000],
]
for row_idx, (category, budget, actual) in enumerate(data, start=5):
# Category
ws.cell(row=row_idx, column=1, value=category)
# Budget
ws.cell(row=row_idx, column=2, value=budget).style = "currency_style"
# Actual
ws.cell(row=row_idx, column=3, value=actual).style = "currency_style"
# Variance formula
variance_cell = ws.cell(row=row_idx, column=4)
variance_cell.value = f"=C{row_idx}-B{row_idx}"
variance_cell.style = "currency_style"
# Percentage formula
pct_cell = ws.cell(row=row_idx, column=5)
pct_cell.value = f"=C{row_idx}/B{row_idx}"
pct_cell.style = "percentage_style"
# Status (will be filled by conditional formatting)
ws.cell(row=row_idx, column=6, value="")
# Add conditional formatting for variance column
# Green for positive, red for negative
green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
ws.conditional_formatting.add(
'D5:D9',
CellIsRule(
operator='greaterThan',
formula=['0'],
fill=green_fill,
font=Font(color="006100")
)
)
ws.conditional_formatting.add(
'D5:D9',
CellIsRule(
operator='lessThan',
formula=['0'],
fill=red_fill,
font=Font(color="9C0006")
)
)
# Data validation for status column
status_validation = DataValidation(
type="list",
formula1='"On Track,At Risk,Over Budget,Under Budget"',
allow_blank=True
)
status_validation.error = "Please select from the dropdown"
status_validation.errorTitle = "Invalid Status"
ws.add_data_validation(status_validation)
status_validation.add('F5:F9')
# Gradient fill example
ws['A12'] = "Gradient Fill Example"
ws['A12'].fill = GradientFill(
stop=["4472C4", "70AD47"],
degree=90
)
ws['A12'].font = Font(color="FFFFFF", bold=True)
ws.merge_cells('A12:C12')
# Column widths
widths = {'A': 15, 'B': 15, 'C': 15, 'D': 15, 'E': 15, 'F': 15}
for col, width in widths.items():
ws.column_dimensions[col].width = width
wb.save(output_path)
print(f"Formatted workbook saved to {output_path}")
create_formatted_workbook("formatted_report.xlsx")
3. Chart Generation
"""
Create various chart types in Excel.
"""
from openpyxl import Workbook
from openpyxl.chart import (
BarChart, LineChart, PieChart, AreaChart, ScatterChart,
Reference, Series
)
from openpyxl.chart.label import DataLabelList
from openpyxl.chart.layout import Layout, ManualLayout
def create_charts_workbook(output_path: str) -> None:
"""Create workbook with various chart examples."""
wb = Workbook()
ws = wb.active
ws.title = "Chart Data"
# Sample data for charts
data = [
["Month", "Sales", "Expenses", "Profit"],
["Jan", 15000, 12000, 3000],
["Feb", 18000, 13000, 5000],
["Mar", 22000, 14500, 7500],
["Apr", 20000, 14000, 6000],
["May", 25000, 15000, 10000],
["Jun", 28000, 16000, 12000],
]
for row in data:
ws.append(row)
# Bar Chart
bar_chart = BarChart()
bar_chart.type = "col"
bar_chart.grouping = "clustered"
bar_chart.title = "Monthly Financial Overview"
bar_chart.y_axis.title = "Amount ($)"
bar_chart.x_axis.title = "Month"
# Data references
data_ref = Reference(ws, min_col=2, max_col=4, min_row=1, max_row=7)
cats_ref = Reference(ws, min_col=1, min_row=2, max_row=7)
bar_chart.add_data(data_ref, titles_from_data=True)
bar_chart.set_categories(cats_ref)
bar_chart.shape = 4 # Rounded corners
# Style the chart
bar_chart.style = 10
bar_chart.width = 15
bar_chart.height = 10
ws.add_chart(bar_chart, "F2")
# Line Chart
line_chart = LineChart()
line_chart.title = "Profit Trend"
line_chart.y_axis.title = "Profit ($)"
line_chart.x_axis.title = "Month"
line_chart.style = 12
profit_data = Reference(ws, min_col=4, min_row=1, max_row=7)
line_chart.add_data(profit_data, titles_from_data=True)
line_chart.set_categories(cats_ref)
# Add markers
line_chart.series[0].marker.symbol = "circle"
line_chart.series[0].marker.size = 7
line_chart.series[0].graphicalProperties.line.width = 25000 # in EMUs
ws.add_chart(line_chart, "F18")
# Pie Chart on new sheet
pie_ws = wb.create_sheet("Pie Chart")
pie_data = [
["Category", "Value"],
["Product A", 35],
["Product B", 25],
["Product C", 20],
["Product D", 15],
["Other", 5],
]
for row in pie_data:
pie_ws.append(row)
pie_chart = PieChart()
pie_chart.title = "Sales by Product Category"
pie_data_ref = Reference(pie_ws, min_col=2, min_row=2, max_row=6)
pie_labels_ref = Reference(pie_ws, min_col=1, min_row=2, max_row=6)
pie_chart.add_data(pie_data_ref)
pie_chart.set_categories(pie_labels_ref)
# Add data labels with percentages
pie_chart.dataLabels = DataLabelList()
pie_chart.dataLabels.showPercent = True
pie_chart.dataLabels.showVal = False
pie_chart.dataLabels.showCatName = True
pie_ws.add_chart(pie_chart, "D2")
# Stacked Area Chart
area_ws = wb.create_sheet("Area Chart")
area_data = [
["Quarter", "Region A", "Region B", "Region C"],
["Q1", 5000, 4000, 3000],
["Q2", 6000, 4500, 3500],
["Q3", 7000, 5000, 4000],
["Q4", 8000, 5500, 4500],
]
for row in area_data:
area_ws.append(row)
area_chart = AreaChart()
area_chart.title = "Regional Sales Growth"
area_chart.style = 13
area_chart.grouping = "stacked"
area_data_ref = Reference(area_ws, min_col=2, max_col=4, min_row=1, max_row=5)
area_cats_ref = Reference(area_ws, min_col=1, min_row=2, max_row=5)
area_chart.add_data(area_data_ref, titles_from_data=True)
area_chart.set_categories(area_cats_ref)
area_ws.add_chart(area_chart, "F2")
# Scatter Chart
scatter_ws = wb.create_sheet("Scatter Chart")
scatter_data = [
["X", "Y"],
[1, 2.5],
[2, 4.1],
[3, 5.8],
[4, 8.2],
[5, 10.1],
[6, 12.5],
[7, 14.8],
]
for row in scatter_data:
scatter_ws.append(row)
scatter_chart = ScatterChart()
scatter_chart.title = "Correlation Analysis"
scatter_chart.x_axis.title = "X Values"
scatter_chart.y_axis.title = "Y Values"
scatter_chart.style = 13
x_values = Reference(scatter_ws, min_col=1, min_row=2, max_row=8)
y_values = Reference(scatter_ws, min_col=2, min_row=2, max_row=8)
series = Series(y_values, x_values, title="Data Points")
scatter_chart.series.append(series)
# Add trendline
from openpyxl.chart.trendline import Trendline
series.trendline = Trendline(trendlineType='linear')
scatter_ws.add_chart(scatter_chart, "D2")
wb.save(output_path)
print(f"Charts workbook saved to {output_path}")
create_charts_workbook("charts_example.xlsx")
4. Conditional Formatting
""" Apply conditional formatting rules for visual data analysis. """ from openpyxl import Workbook from openpyxl.styles import PatternFill, Font, Border, Side from openpyxl.formatting.rule import ( ColorScaleRule, DataBarRule, IconSetRule, CellIsRule, FormulaRule, Rule ) from openpyxl.styles.differential import DifferentialStyle from openpyxl.utils import get_column_letter
def create_conditional_formatting_workbook(output_path: str) -> None: """Create workbook demonstrating conditional formatting.""" wb = Workbook()
# Sheet 1: Color Scales
ws1 = wb.active
ws1.title = "Color Scales"
# Header
ws1['A1'] = "Performance Scores"
ws1['A1'].font = Font(bold=True, size=14)
# Data
scores = [85, 72, 91, 68, 95, 78, 82, 60, 88, 75, 93, 71, 86, 79, 64]
for i, score in enumerate(scores, start=3):
ws1.cell(row=i, column=1, value=f"Employee {i-2}")
ws1.cell(row=i, column=2, value=score)
# Apply 3-color scale (red-yellow-green)
color_scale_rule = ColorScaleRule(
start_type='min',
start_color='F8696B', # Red
mid_type=<span class="token s