77"""
88
99import logging
10+ import math
11+ from typing import Tuple
12+
13+ from openpyxl .styles import Alignment , Font , PatternFill , Side
14+ from openpyxl .styles .borders import Border
15+ from openpyxl .styles .colors import WHITE
16+ from openpyxl .utils import get_column_letter
17+ from openpyxl .worksheet .dimensions import ColumnDimension , DimensionHolder
18+ from openpyxl .worksheet .worksheet import Worksheet
1019import pandas as pd
1120
21+ DATAIKU_TEAL = "FF2AB1AC"
22+ LETTER_WIDTH = 1.20 # Approximative letter width to scale column width
23+ MAX_LENGTH_TO_SHOW = 45 # Limit copied from DSS native excel exporter
24+
1225logger = logging .getLogger (__name__ )
1326logging .basicConfig (level = logging .INFO , format = 'Multi-Sheet Excel Exporter | %(levelname)s - %(message)s' )
1427
28+ def style_header (worksheet : Worksheet ,
29+ font_name : str = "Calibri" ,
30+ font_size : int = 11 ,
31+ font_color : str = WHITE ,
32+ background_color : str = DATAIKU_TEAL ,
33+ bold : bool = True
34+ ):
35+ """
36+ Style header of the worksheet
37+ """
38+
39+ if worksheet .min_column < 1 :
40+ logger .warn (f"No header row for worksheet { worksheet } . Styling skipped." )
41+ return
42+
43+ font = Font (name = font_name , size = font_size , color = font_color , bold = bold )
44+ fill = PatternFill ("solid" , fgColor = background_color )
45+
46+ no_border_side = Side (border_style = None )
47+ border = Border (left = no_border_side , right = no_border_side , top = no_border_side , bottom = no_border_side )
48+
49+ alignment = Alignment (vertical = 'bottom' , horizontal = 'center' )
50+
51+ for header_cell in worksheet [1 ]:
52+ header_cell .font = font
53+ header_cell .fill = fill
54+ header_cell .border = border
55+ header_cell .alignment = alignment
56+
57+ def get_column_width (column : Tuple ):
58+ """
59+ Find optimum column width based on content and header length
60+ Based on the computations of DSS native excel output formatter
61+ """
62+
63+ header = column [0 ]
64+ length_header = len (str (header .value ))
65+
66+ sum_length_cells = 0
67+ max_length_cells = 0
68+ for cell in column :
69+ length_cell = len (str (cell .value ))
70+ max_length_cells = max (max_length_cells , length_cell )
71+ sum_length_cells += length_cell
72+
73+ # Computations from ExcelOutputFormatter.java ExcelOutputFormatter.footer
74+ average_length_cell = math .ceil (sum_length_cells / (len (column ) + 1 ))
75+ max_length_cells = min (max_length_cells , MAX_LENGTH_TO_SHOW )
76+
77+ if max_length_cells > 2 * average_length_cell : # if max length much bigger than average
78+ length_to_show = int ((max_length_cells + average_length_cell ) / 2 )
79+ else :
80+ length_to_show = max_length_cells
81+
82+ length_to_show = max (length_to_show , length_header )
83+
84+ return length_to_show * LETTER_WIDTH
85+
86+ def auto_size_column_width (worksheet : Worksheet ):
87+ """
88+ Resize columns based on the length of the header text
89+ """
90+ if worksheet .min_column < 1 :
91+ logger .warn (f"No header row for worksheet { worksheet } . Column auto-size skipped." )
92+ return
93+
94+ dimension_holder = DimensionHolder (worksheet = worksheet )
95+
96+ column_indexes = range (worksheet .min_column , worksheet .max_column + 1 )
97+ for index_column , column in zip (column_indexes , worksheet .iter_cols ()):
98+
99+ column_width = get_column_width (column )
100+ dimension_holder [get_column_letter (index_column )] = ColumnDimension (worksheet ,
101+ min = index_column ,
102+ max = index_column ,
103+ width = column_width )
104+ worksheet .column_dimensions = dimension_holder
105+
15106
16107def dataframes_to_xlsx (input_dataframes_names , xlsx_abs_path , dataframe_provider ):
17108 """
@@ -22,10 +113,24 @@ def dataframes_to_xlsx(input_dataframes_names, xlsx_abs_path, dataframe_provider
22113 """
23114 logger .info ("Writing output xlsx file ..." )
24115 writer = pd .ExcelWriter (xlsx_abs_path , engine = 'openpyxl' )
116+
25117 for name in input_dataframes_names :
26118 df = dataframe_provider (name )
119+
27120 logger .info ("Writing dataset into excel sheet..." )
28121 df .to_excel (writer , sheet_name = name , index = False , encoding = 'utf-8' )
122+
123+ worksheet = writer .sheets .get (name )
124+
125+ if worksheet is None :
126+ logger .warn (f"No worksheet for dataset { name } . Written but styling skipped." )
127+ continue
128+
129+ logger .info (f"Styling excel sheet..." )
130+ style_header (worksheet )
131+ auto_size_column_width (worksheet )
132+
29133 logger .info ("Finished writing dataset {} into excel sheet." .format (name ))
134+
30135 writer .save ()
31136 logger .info ("Done writing output xlsx file" )
0 commit comments