Skip to content

Commit 5c1e5d1

Browse files
authored
Merge pull request #6 from dataiku/feature/dss12-sc-181423-multi-sheet-excel-export-apply-a-formatting
Feature/dss12 sc 181423 multi sheet excel export apply a formatting
2 parents 93c02d1 + fdc0cdb commit 5c1e5d1

3 files changed

Lines changed: 110 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## [Version 1.1.2](https://github.com/dataiku/dss-plugin-multisheet-excel-export/releases/tag/v1.1.2) - Feature release - 2024-05
4+
- Style exported worksheet header
5+
- Auto-size columns to fit header
6+
37
## [Version 1.1.1](https://github.com/dataiku/dss-plugin-multisheet-excel-export/releases/tag/v1.1.0) - Chore release - 2023-08
48
- Use python library to create temp file instead of a custom cache
59

plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"id" : "multisheet-excel-export",
3-
"version" : "1.1.1",
3+
"version" : "1.1.2",
44

55

66
"meta" : {

python-lib/xlsx_writer.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,102 @@
77
"""
88

99
import logging
10+
import math
11+
from typing import Tuple
12+
13+
from openpyxl.styles import Alignment, Font, PatternFill, Side
14+
from openpyxl.styles.borders import Border
15+
from openpyxl.styles.colors import WHITE
16+
from openpyxl.utils import get_column_letter
17+
from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder
18+
from openpyxl.worksheet.worksheet import Worksheet
1019
import pandas as pd
1120

21+
DATAIKU_TEAL = "FF2AB1AC"
22+
LETTER_WIDTH = 1.20 # Approximative letter width to scale column width
23+
MAX_LENGTH_TO_SHOW = 45 # Limit copied from DSS native excel exporter
24+
1225
logger = logging.getLogger(__name__)
1326
logging.basicConfig(level=logging.INFO, format='Multi-Sheet Excel Exporter | %(levelname)s - %(message)s')
1427

28+
def style_header(worksheet: Worksheet,
29+
font_name: str = "Calibri",
30+
font_size: int = 11,
31+
font_color : str = WHITE,
32+
background_color : str = DATAIKU_TEAL,
33+
bold : bool = True
34+
):
35+
"""
36+
Style header of the worksheet
37+
"""
38+
39+
if worksheet.min_column < 1:
40+
logger.warn(f"No header row for worksheet {worksheet}. Styling skipped.")
41+
return
42+
43+
font = Font(name=font_name, size=font_size, color=font_color, bold=bold)
44+
fill = PatternFill("solid", fgColor=background_color)
45+
46+
no_border_side = Side(border_style=None)
47+
border = Border(left=no_border_side, right=no_border_side, top=no_border_side, bottom=no_border_side)
48+
49+
alignment = Alignment(vertical='bottom', horizontal='center')
50+
51+
for header_cell in worksheet[1]:
52+
header_cell.font = font
53+
header_cell.fill = fill
54+
header_cell.border = border
55+
header_cell.alignment = alignment
56+
57+
def get_column_width(column: Tuple):
58+
"""
59+
Find optimum column width based on content and header length
60+
Based on the computations of DSS native excel output formatter
61+
"""
62+
63+
header = column[0]
64+
length_header = len(str(header.value))
65+
66+
sum_length_cells = 0
67+
max_length_cells = 0
68+
for cell in column:
69+
length_cell = len(str(cell.value))
70+
max_length_cells = max(max_length_cells, length_cell)
71+
sum_length_cells += length_cell
72+
73+
# Computations from ExcelOutputFormatter.java ExcelOutputFormatter.footer
74+
average_length_cell = math.ceil(sum_length_cells / (len(column) + 1))
75+
max_length_cells = min(max_length_cells, MAX_LENGTH_TO_SHOW)
76+
77+
if max_length_cells > 2 * average_length_cell: # if max length much bigger than average
78+
length_to_show = int((max_length_cells + average_length_cell) / 2)
79+
else:
80+
length_to_show = max_length_cells
81+
82+
length_to_show = max(length_to_show, length_header)
83+
84+
return length_to_show * LETTER_WIDTH
85+
86+
def auto_size_column_width(worksheet: Worksheet):
87+
"""
88+
Resize columns based on the length of the header text
89+
"""
90+
if worksheet.min_column < 1:
91+
logger.warn(f"No header row for worksheet {worksheet}. Column auto-size skipped.")
92+
return
93+
94+
dimension_holder = DimensionHolder(worksheet=worksheet)
95+
96+
column_indexes = range(worksheet.min_column, worksheet.max_column + 1)
97+
for index_column, column in zip(column_indexes, worksheet.iter_cols()):
98+
99+
column_width = get_column_width(column)
100+
dimension_holder[get_column_letter(index_column)] = ColumnDimension(worksheet,
101+
min=index_column,
102+
max=index_column,
103+
width=column_width)
104+
worksheet.column_dimensions = dimension_holder
105+
15106

16107
def dataframes_to_xlsx(input_dataframes_names, xlsx_abs_path, dataframe_provider):
17108
"""
@@ -22,10 +113,24 @@ def dataframes_to_xlsx(input_dataframes_names, xlsx_abs_path, dataframe_provider
22113
"""
23114
logger.info("Writing output xlsx file ...")
24115
writer = pd.ExcelWriter(xlsx_abs_path, engine='openpyxl')
116+
25117
for name in input_dataframes_names:
26118
df = dataframe_provider(name)
119+
27120
logger.info("Writing dataset into excel sheet...")
28121
df.to_excel(writer, sheet_name=name, index=False, encoding='utf-8')
122+
123+
worksheet = writer.sheets.get(name)
124+
125+
if worksheet is None:
126+
logger.warn(f"No worksheet for dataset {name}. Written but styling skipped.")
127+
continue
128+
129+
logger.info(f"Styling excel sheet...")
130+
style_header(worksheet)
131+
auto_size_column_width(worksheet)
132+
29133
logger.info("Finished writing dataset {} into excel sheet.".format(name))
134+
30135
writer.save()
31136
logger.info("Done writing output xlsx file")

0 commit comments

Comments
 (0)