openpyxl是Python中最流行的Excel操作库之一,支持读写.xlsx格式文件。本指南将全面介绍openpyxl的使用方法及常见坑点。
pip install openpyxl
from openpyxl import Workbook, load_workbook
from openpyxl.utils import get_column_letter, column_index_from_string
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
# 创建工作簿
wb = Workbook()
# 获取活动工作表
ws = wb.active
ws.title = "我的工作表"
# 创建新工作表
ws1 = wb.create_sheet("第二个工作表") # 插入到最后
ws2 = wb.create_sheet("第三个工作表", 0) # 插入到第一个位置
# 保存工作簿
wb.save("示例.xlsx")
# 加载工作簿
wb = load_workbook("示例.xlsx")
# 查看所有工作表名称
print(wb.sheetnames) # ['第三个工作表', '我的工作表', '第二个工作表']
# 选择工作表
ws = wb["我的工作表"] # 按名称选择
ws = wb.worksheets[1] # 按索引选择
# 写入数据
ws["A1"] = "姓名"
ws["B1"] = "年龄"
ws["C1"] = "部门"
# 使用行列号写入
ws.cell(row=2, column=1, value="张三")
ws.cell(row=2, column=2, value=25)
ws.cell(row=2, column=3, value="技术部")
# 批量写入
data = [
["李四", 28, "市场部"],
["王五", 32, "销售部"],
["赵六", 26, "人力资源部"]
]
for row in data:
ws.append(row)
# 读取数据
print(ws["A2"].value) # 张三
print(ws.cell(row=2, column=2).value) # 25
# 读取范围数据
for row in ws.iter_rows(min_row=1, max_row=5, min_col=1, max_col=3, values_only=True):
print(row)
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
# 创建样式对象
header_font = Font(name="微软雅黑", size=12, bold=True, color="FFFFFF")
header_fill = PatternFill(fill_type="solid", fgColor="366092")
center_alignment = Alignment(horizontal="center", vertical="center")
thin_border = Border(
left=Side(style="thin"),
right=Side(style="thin"),
top=Side(style="thin"),
bottom=Side(style="thin")
)
# 应用样式
for cell in ws[1]: # 第一行
cell.font = header_font
cell.fill = header_fill
cell.alignment = center_alignment
cell.border = thin_border
# 设置列宽和行高
ws.column_dimensions["A"].width = 20
ws.column_dimensions["B"].width = 10
ws.column_dimensions["C"].width = 15
ws.row_dimensions[1].height = 25
# 设置数字格式
ws["B2"].number_format = "#,##0"
ws["B3"].number_format = "0.00%"
# 写入公式
ws["D1"] = "合计"
ws["D2"] = "=SUM(B2:B5)"
# 注意:openpyxl默认不计算公式,需要设置data_only=True读取计算值
ws["D2"].value = "=SUM(B2:B5)"
# 保存后重新加载
wb.save("示例.xlsx")
wb_calc = load_workbook("示例.xlsx", data_only=True)
ws_calc = wb_calc.active
print(ws_calc["D2"].value) # 如果Excel已经计算过,这里会显示计算值
# 合并单元格
ws.merge_cells("A1:C1")
ws["A1"] = "员工信息表"
ws["A1"].alignment = Alignment(horizontal="center", vertical="center")
# 取消合并
# ws.unmerge_cells("A1:C1")
from openpyxl.drawing.image import Image
# 插入图片
img = Image("logo.png")
img.width = 100
img.height = 100
ws.add_image(img, "E1")
# 添加筛选器
ws.auto_filter.ref = "A1:C5"
# 注意:openpyxl只设置筛选区域,不执行实际筛选
from openpyxl.chart import BarChart, Reference, Series
# 创建柱状图
chart = BarChart()
chart.title = "年龄分布"
chart.x_axis.title = "姓名"
chart.y_axis.title = "年龄"
# 指定数据范围
data = Reference(ws, min_col=2, min_row=1, max_row=5, max_col=2)
categories = Reference(ws, min_col=1, min_row=2, max_row=5)
# 添加数据到图表
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)
# 将图表插入工作表
ws.add_chart(chart, "E10")
# 低效方式(不推荐)
for i in range(1, 1001):
for j in range(1, 51):
ws.cell(row=i, column=j, value=f"数据{i}-{j}")
# 高效方式(推荐)
data = []
for i in range(1, 1001):
row_data = [f"数据{i}-{j}" for j in range(1, 51)]
data.append(row_data)
for row in data:
ws.append(row)
from openpyxl import load_workbook
# 只读模式,适用于大文件
wb = load_workbook("大数据文件.xlsx", read_only=True)
ws = wb.active
# 逐行读取,节省内存
for row in ws.iter_rows(values_only=True):
print(row)
# 注意:在只读模式下,不能修改工作簿
from openpyxl import Workbook
from openpyxl.writer.excel import save_workbook
# 只写模式,适用于生成大文件
wb = Workbook(write_only=True)
ws = wb.create_sheet()
# 使用append添加行(必须是可迭代对象)
for row in range(1, 10001):
ws.append([f"数据{row}-{col}" for col in range(1, 51)])
wb.save("大文件.xlsx")
问题:openpyxl保存的公式不会自动计算 解决方案:
# 方案1:使用data_only=True读取已计算的值(需要Excel先计算过)
wb = load_workbook("文件.xlsx", data_only=True)
# 方案2:使用Python计算并写入结果
import pandas as pd
def calculate_formula(ws, cell_address):
"""简单公式计算示例"""
formula = ws[cell_address].value
if formula and formula.startswith("="):
if "SUM" in formula:
# 提取SUM范围,如SUM(B2:B10)
range_str = formula.split("(")[1].split(")")[0]
# 实际应用中需要更完整的公式解析
return None
问题:Excel和Python的日期格式不同 解决方案:
from datetime import datetime
from openpyxl.utils import datetime
# 正确写入日期
ws["A1"] = datetime.datetime.now()
# 设置日期格式
ws["A1"].number_format = "YYYY-MM-DD HH:MM:SS"
# 读取日期
excel_date = ws["A1"].value
if isinstance(excel_date, datetime.datetime):
print(excel_date.strftime("%Y-%m-%d"))
问题:修改单元格样式可能影响其他单元格 解决方案:
# 错误方式:直接修改样式会影响其他单元格
bad_font = ws["A1"].font
bad_font.bold = True # 这会修改所有使用此字体的单元格
# 正确方式:创建新样式对象
from copy import copy
# 复制样式并修改
new_font = copy(ws["A1"].font)
new_font.bold = True
ws["A1"].font = new_font
问题:处理大文件时内存占用高 解决方案:
# 使用iter_rows分批处理
batch_size = 1000
rows_processed = 0
for row in ws.iter_rows(min_row=2, values_only=True):
# 处理数据
process_row(row)
rows_processed += 1
if rows_processed % batch_size == 0:
print(f"已处理 {rows_processed} 行")
# 及时关闭工作簿
del wb
问题:特殊字符和路径错误 解决方案:
import os
from pathlib import Path
def safe_save(wb, filename):
"""安全保存函数"""
# 处理路径
path = Path(filename)
# 确保目录存在
path.parent.mkdir(parents=True, exist_ok=True)
# 清理文件名中的特殊字符
safe_name = "".join(c for c in path.name if c.isalnum() or c in "._- ")
# 构建安全路径
safe_path = path.parent / safe_name
try:
wb.save(safe_path)
return safe_path
except Exception as e:
print(f"保存失败: {e}")
return None
问题:中文字符显示异常 解决方案:
# 设置中文字体
from openpyxl.styles import Font
chinese_font = Font(name="微软雅黑", size=11)
ws["A1"].font = chinese_font
# 或者使用系统字体
import platform
system = platform.system()
if system == "Windows":
font_name = "微软雅黑"
elif system == "Darwin": # macOS
font_name = "PingFang SC"
else: # Linux
font_name = "WenQuanYi Micro Hei"
chinese_font = Font(name=font_name, size=11)
def apply_style_to_range(ws, cell_range, style_dict):
"""为指定范围应用样式"""
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
for row in ws[cell_range]:
for cell in row:
if "font" in style_dict:
cell.font = style_dict["font"]
if "fill" in style_dict:
cell.fill = style_dict["fill"]
if "alignment" in style_dict:
cell.alignment = style_dict["alignment"]
if "border" in style_dict:
cell.border = style_dict["border"]
from openpyxl.worksheet.datavalidation import DataValidation
def add_data_validation(ws, cell, options):
"""添加数据验证"""
dv = DataValidation(
type="list",
formula1=f'"{",".join(options)}"',
allow_blank=True
)
dv.add(cell)
ws.add_data_validation(dv)
class ExcelReport:
"""Excel报告生成器"""
def __init__(self, filename):
self.wb = Workbook()
self.ws = self.wb.active
self.filename = filename
def add_title(self, title, cell="A1", merge_range=None):
"""添加标题"""
if merge_range:
self.ws.merge_cells(merge_range)
self.ws[cell] = title
title_cell = self.ws[cell]
else:
title_cell = self.ws[cell]
title_cell.value = title
title_cell.font = Font(size=16, bold=True)
title_cell.alignment = Alignment(horizontal="center")
def add_table(self, headers, data, start_cell="A3"):
"""添加表格"""
# 添加表头
start_col = column_index_from_string(start_cell[0])
start_row = int(start_cell[1:])
for i, header in enumerate(headers):
cell = self.ws.cell(row=start_row, column=start_col+i)
cell.value = header
cell.font = Font(bold=True)
cell.fill = PatternFill(fill_type="solid", fgColor="E0E0E0")
# 添加数据
for i, row_data in enumerate(data):
for j, value in enumerate(row_data):
cell = self.ws.cell(row=start_row+i+1, column=start_col+j)
cell.value = value
def save(self):
"""保存文件"""
self.wb.save(self.filename)
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from datetime import datetime
def create_employee_report():
# 创建Workbook
wb = Workbook()
ws = wb.active
ws.title = "员工信息"
# 数据
headers = ["工号", "姓名", "部门", "入职日期", "薪资", "绩效评级"]
employees = [
["001", "张三", "技术部", datetime(2020, 3, 15), 15000, "A"],
["002", "李四", "市场部", datetime(2019, 7, 22), 12000, "B"],
["003", "王五", "销售部", datetime(2021, 1, 10), 13000, "A"],
["004", "赵六", "人力资源部", datetime(2018, 11, 5), 11000, "C"],
["005", "钱七", "技术部", datetime(2022, 5, 30), 14000, "B"]
]
# 添加标题
ws.merge_cells("A1:F1")
title_cell = ws["A1"]
title_cell.value = "员工信息表"
title_cell.font = Font(name="微软雅黑", size=18, bold=True, color="366092")
title_cell.alignment = Alignment(horizontal="center", vertical="center")
# 添加表头
header_font = Font(bold=True, color="FFFFFF")
header_fill = PatternFill(fill_type="solid", fgColor="366092")
center_alignment = Alignment(horizontal="center", vertical="center")
for col_idx, header in enumerate(headers, start=1):
cell = ws.cell(row=2, column=col_idx)
cell.value = header
cell.font = header_font
cell.fill = header_fill
cell.alignment = center_alignment
# 添加数据
for row_idx, employee in enumerate(employees, start=3):
for col_idx, value in enumerate(employee, start=1):
cell = ws.cell(row=row_idx, column=col_idx)
cell.value = value
# 设置日期格式
if isinstance(value, datetime):
cell.number_format = "YYYY-MM-DD"
# 设置数字格式
if col_idx == 5: # 薪资列
cell.number_format = "#,##0"
# 设置对齐
cell.alignment = Alignment(horizontal="center")
# 设置列宽
column_widths = [10, 15, 15, 15, 12, 12]
for i, width in enumerate(column_widths, start=1):
column_letter = get_column_letter(i)
ws.column_dimensions[column_letter].width = width
# 添加边框
thin_border = Border(
left=Side(style="thin"),
right=Side(style="thin"),
top=Side(style="thin"),
bottom=Side(style="thin")
)
for row in ws.iter_rows(min_row=2, max_row=len(employees)+2,
min_col=1, max_col=len(headers)):
for cell in row:
cell.border = thin_border
# 保存文件
filename = f"员工信息表_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
wb.save(filename)
print(f"文件已保存: {filename}")
return filename
# 执行生成
create_employee_report()
with语句或确保正确关闭文件
性能优化:对于大数据量使用read_only和write_only模式
样式管理:创建样式对象并复用,避免重复创建
错误处理:添加适当的异常处理机制
代码组织:将常用功能封装为函数或类
兼容性:注意不同Excel版本的兼容性问题
资源清理:及时删除不需要的对象释放内存
通过掌握以上内容,您将能够高效地使用openpyxl进行Excel自动化操作,并避免常见的陷阱。