Skip to content

Commit e5d00bd

Browse files
authored
Merge pull request #14 from dataiku/feature/sc-230874-customize-tab-names
feat: [sc-230874] [Multisheet Excel Export] Customize tab names
2 parents 34ca92a + e75c529 commit e5d00bd

5 files changed

Lines changed: 68 additions & 9 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## [Version 2.2.0](https://github.com/dataiku/dss-plugin-multisheet-excel-export/releases/tag/v2.2.0) - Major release - 2025-02
4+
5+
- Add customization of tab names
6+
37
## [Version 2.1.0](https://github.com/dataiku/dss-plugin-multisheet-excel-export/releases/tag/v2.1.0) - Major release - 2024-09
48
- Bug fix: one temporary workbook is used per dataset to avoid out of memory issues while exporting large datasets. All these temporary workbooks are merged at the end to generate the final excel file
59
- Optimizations: using of a cache for styles to avoid useless copies + openpyxl write only mode with lxml

custom-recipes/to-excel/recipe.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,34 @@
4545
"type": "BOOLEAN",
4646
"defaultValue": false,
4747
"mandatory": true
48+
},
49+
{
50+
"name": "renaming_sheets",
51+
"label": "Renaming the sheets",
52+
"description": "Name the sheets rather than use the datasets names",
53+
"type": "BOOLEAN",
54+
"defaultValue": false
55+
},
56+
{
57+
"name": "dataset_to_sheet_mapping",
58+
"label": "Sheet name",
59+
"description": "",
60+
"type": "OBJECT_LIST",
61+
"visibilityCondition": "model.renaming_sheets == true",
62+
"subParams": [
63+
{
64+
"name": "dataset_name",
65+
"type": "DATASET",
66+
"label": "Dataset",
67+
"description": "To be valid, the dataset has to be selected in the recipe's input"
68+
},
69+
{
70+
"name": "sheet_name",
71+
"type": "STRING",
72+
"label": "Sheet name",
73+
"description": "Name of the targeted sheet for this dataset"
74+
}
75+
]
4876
}
4977
],
5078
"resourceKeys" : []

custom-recipes/to-excel/recipe.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from dataiku.customrecipe import get_output_names_for_role
1515
from dataiku.customrecipe import get_recipe_config
1616
from openpyxl import load_workbook, Workbook
17-
from xlsx_writer import datasets_to_xlsx
17+
from xlsx_writer import datasets_to_xlsx, assert_valid_sheet_name
1818
from typing import Union
1919

2020
DEFAULT_DATAIKU_SHEET_NAME = "Sheet1"
@@ -50,6 +50,20 @@ def get_excel_worksheet(dataset: dataiku.Dataset, apply_conditional_formatting:
5050
return None
5151

5252

53+
def get_dataset_to_sheet_mapping(config):
54+
renaming_sheets = config.get("renaming_sheets", False)
55+
dataset_to_sheet_mapping = {}
56+
if renaming_sheets:
57+
dataset_to_sheet_mappings = config.get("dataset_to_sheet_mapping", {})
58+
for mapping in dataset_to_sheet_mappings:
59+
dataset_name = mapping.get("dataset_name")
60+
sheet_name = mapping.get("sheet_name")
61+
assert_valid_sheet_name(sheet_name)
62+
dataset_to_sheet_mapping[dataset_name] = sheet_name
63+
logger.info("Renaming dataset '{}' into sheet '{}'".format(dataset_name, sheet_name))
64+
return dataset_to_sheet_mapping
65+
66+
5367
logger = logging.getLogger(__name__)
5468
logging.basicConfig(level=logging.INFO, format='Multi-Sheet Excel Exporter | %(levelname)s - %(message)s')
5569

@@ -72,6 +86,7 @@ def get_excel_worksheet(dataset: dataiku.Dataset, apply_conditional_formatting:
7286
input_config = get_recipe_config()
7387
workbook_name = input_config.get('output_workbook_name', None)
7488
apply_conditional_formatting = input_config.get('export_conditional_formatting', False)
89+
dataset_to_sheet_mapping = get_dataset_to_sheet_mapping(input_config)
7590

7691
if workbook_name is None:
7792
logger.warning("Received input received recipe config: {}".format(input_config))
@@ -89,7 +104,12 @@ def get_excel_worksheet(dataset: dataiku.Dataset, apply_conditional_formatting:
89104
tmp_file_path = tmp_file.name
90105
logger.info("Intend to write the output xls file to the following location: {}".format(tmp_file_path))
91106

92-
datasets_to_xlsx(input_datasets_names, tmp_file_path, lambda name: get_excel_worksheet(dataiku.Dataset(name), apply_conditional_formatting))
107+
datasets_to_xlsx(
108+
input_datasets_names,
109+
tmp_file_path,
110+
lambda name: get_excel_worksheet(dataiku.Dataset(name), apply_conditional_formatting),
111+
dataset_to_sheet_mapping=dataset_to_sheet_mapping
112+
)
93113

94114
with open(tmp_file_path, 'rb', encoding=None) as f:
95115
output_folder.upload_stream(output_file_name, f)

plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"id" : "multisheet-excel-export",
3-
"version" : "2.1.0",
3+
"version" : "2.2.0",
44

55

66
"meta" : {

python-lib/xlsx_writer.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def copy_sheet_to_workbook(source_sheet: Worksheet, target_workbook: Workbook) -
171171
return target_sheet
172172

173173

174-
def rename_too_long_dataset_names(input_dataset_names: List[str]) -> Dict[str, str]:
174+
def rename_too_long_dataset_names(input_dataset_names: List[str], dataset_to_sheet_mapping={}) -> Dict[str, str]:
175175
"""
176176
Excel allows for only maximum 30 chars in the sheet names, so if some DS have more than 30 chars :
177177
- truncate the name to 28 chars
@@ -184,7 +184,9 @@ def rename_too_long_dataset_names(input_dataset_names: List[str]) -> Dict[str, s
184184
index_rename = -1
185185
renaming_length = EXCEL_MAX_LEN_SHEET_NAME - 2
186186
for name in input_dataset_names:
187-
if len(name) > EXCEL_MAX_LEN_SHEET_NAME:
187+
if name in dataset_to_sheet_mapping:
188+
return_map[name] = dataset_to_sheet_mapping.get(name)
189+
elif len(name) > EXCEL_MAX_LEN_SHEET_NAME:
188190
index_rename += 1
189191
rename = f"{name[0:renaming_length]}{index_rename:02d}"
190192
# Almost impossible case : a DS already has this name
@@ -200,7 +202,7 @@ def rename_too_long_dataset_names(input_dataset_names: List[str]) -> Dict[str, s
200202
return return_map
201203

202204

203-
def datasets_to_xlsx(input_dataset_names, xlsx_abs_path, worksheet_provider):
205+
def datasets_to_xlsx(input_dataset_names, xlsx_abs_path, worksheet_provider, dataset_to_sheet_mapping={}):
204206
"""
205207
Write each input dataset into one temporary excel file and merge all these excel files into the final excel file
206208
:param input_dataset_names: the list of dataset, using one temporary workbook per dataset
@@ -210,7 +212,7 @@ def datasets_to_xlsx(input_dataset_names, xlsx_abs_path, worksheet_provider):
210212

211213
logger.info(f"Building output excel file '{xlsx_abs_path}'...")
212214

213-
template_workbook, workbook_tmp_files = get_temporary_workbooks(input_dataset_names, worksheet_provider)
215+
template_workbook, workbook_tmp_files = get_temporary_workbooks(input_dataset_names, worksheet_provider, dataset_to_sheet_mapping=dataset_to_sheet_mapping)
214216

215217
# Save template workbook with styles and unzip it
216218
template_workbook_extract_dir = get_template_workbook_directory(template_workbook)
@@ -227,7 +229,7 @@ def datasets_to_xlsx(input_dataset_names, xlsx_abs_path, worksheet_provider):
227229
logger.info("Done writing output xlsx file.")
228230

229231

230-
def get_temporary_workbooks(input_dataset_names, worksheet_provider):
232+
def get_temporary_workbooks(input_dataset_names, worksheet_provider, dataset_to_sheet_mapping={}):
231233
"""
232234
Create a template workbook and one temporary workbook per dataset stored on disk
233235
:param input_dataset_names: the list of dataset, using one temporary workbook per dataset
@@ -243,7 +245,7 @@ def get_temporary_workbooks(input_dataset_names, worksheet_provider):
243245
# List containing all temporary workbooks generated from dataset
244246
workbook_tmp_files = []
245247

246-
renaming_map = rename_too_long_dataset_names(input_dataset_names)
248+
renaming_map = rename_too_long_dataset_names(input_dataset_names, dataset_to_sheet_mapping=dataset_to_sheet_mapping)
247249

248250
for name in input_dataset_names:
249251
dataset_worksheet = worksheet_provider(name)
@@ -368,3 +370,8 @@ def add_style_if_not_exist(style, list):
368370
len(fills),
369371
len(alignments),
370372
len(number_formats)))
373+
374+
375+
def assert_valid_sheet_name(sheet_name):
376+
if sheet_name is not None and len(sheet_name) > EXCEL_MAX_LEN_SHEET_NAME:
377+
raise Exception("The sheet name '{}' is too long. Maximum is {} characters".format(sheet_name, EXCEL_MAX_LEN_SHEET_NAME))

0 commit comments

Comments
 (0)