Skip to content

Commit 0381acf

Browse files
committed
fix: fix pptx2md with numpy conflict version
1 parent 8fa963f commit 0381acf

11 files changed

Lines changed: 1474 additions & 3 deletions

File tree

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright 2024 Liu Siyao
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from .entry import convert
16+
from .types import ConversionConfig
17+
18+
__all__ = ['convert', 'ConversionConfig']
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Copyright 2024 Liu Siyao
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import argparse
16+
import logging
17+
from pathlib import Path
18+
19+
from bisheng.pptx2md.entry import convert
20+
from bisheng.pptx2md.log import setup_logging
21+
from bisheng.pptx2md.types import ConversionConfig
22+
23+
setup_logging(compat_tqdm=True)
24+
logger = logging.getLogger(__name__)
25+
26+
27+
def parse_args() -> ConversionConfig:
28+
arg_parser = argparse.ArgumentParser(description='Convert pptx to markdown')
29+
arg_parser.add_argument('pptx_path', type=Path, help='path to the pptx file to be converted')
30+
arg_parser.add_argument('-t', '--title', type=Path, help='path to the custom title list file')
31+
arg_parser.add_argument('-o', '--output', type=Path, help='path of the output file')
32+
arg_parser.add_argument('-i', '--image-dir', type=Path, help='where to put images extracted')
33+
arg_parser.add_argument('--image-width', type=int, help='maximum image with in px')
34+
arg_parser.add_argument('--disable-image', action="store_true", help='disable image extraction')
35+
arg_parser.add_argument('--disable-wmf',
36+
action="store_true",
37+
help='keep wmf formatted image untouched(avoid exceptions under linux)')
38+
arg_parser.add_argument('--disable-color', action="store_true", help='do not add color HTML tags')
39+
arg_parser.add_argument('--disable-escaping',
40+
action="store_true",
41+
help='do not attempt to escape special characters')
42+
arg_parser.add_argument('--disable-notes', action="store_true", help='do not add presenter notes')
43+
arg_parser.add_argument('--enable-slides', action="store_true", help='deliniate slides `\n---\n`')
44+
arg_parser.add_argument('--try-multi-column', action="store_true", help='try to detect multi-column slides')
45+
arg_parser.add_argument('--wiki', action="store_true", help='generate output as wikitext(TiddlyWiki)')
46+
arg_parser.add_argument('--mdk', action="store_true", help='generate output as madoko markdown')
47+
arg_parser.add_argument('--qmd', action="store_true", help='generate output as quarto markdown presentation')
48+
arg_parser.add_argument('--min-block-size',
49+
type=int,
50+
default=15,
51+
help='the minimum character number of a text block to be converted')
52+
arg_parser.add_argument("--page", type=int, default=None, help="only convert the specified page")
53+
arg_parser.add_argument(
54+
"--keep-similar-titles",
55+
action="store_true",
56+
help="keep similar titles (allow for repeated slide titles - One or more - Add (cont.) to the title)")
57+
58+
args = arg_parser.parse_args()
59+
60+
# Determine output path if not specified
61+
if args.output is None:
62+
extension = '.tid' if args.wiki else '.qmd' if args.qmd else '.md'
63+
args.output = Path(f'out{extension}')
64+
65+
return ConversionConfig(
66+
pptx_path=args.pptx_path,
67+
output_path=args.output,
68+
image_dir=args.image_dir or args.output.parent / 'img',
69+
title_path=args.title,
70+
image_width=args.image_width,
71+
disable_image=args.disable_image,
72+
disable_wmf=args.disable_wmf,
73+
disable_color=args.disable_color,
74+
disable_escaping=args.disable_escaping,
75+
disable_notes=args.disable_notes,
76+
enable_slides=args.enable_slides,
77+
try_multi_column=args.try_multi_column,
78+
is_wiki=args.wiki,
79+
is_mdk=args.mdk,
80+
is_qmd=args.qmd,
81+
min_block_size=args.min_block_size,
82+
page=args.page,
83+
keep_similar_titles=args.keep_similar_titles,
84+
)
85+
86+
87+
def main():
88+
config = parse_args()
89+
convert(config)
90+
91+
92+
if __name__ == '__main__':
93+
main()
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright 2024 Liu Siyao
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import logging
16+
17+
import bisheng.pptx2md.outputter as outputter
18+
from bisheng.pptx2md.parser import parse
19+
from bisheng.pptx2md.types import ConversionConfig
20+
from bisheng.pptx2md.utils import load_pptx, prepare_titles
21+
22+
logger = logging.getLogger(__name__)
23+
24+
25+
def convert(config: ConversionConfig):
26+
if config.title_path:
27+
config.custom_titles = prepare_titles(config.title_path)
28+
29+
prs = load_pptx(config.pptx_path)
30+
31+
logger.info("conversion started")
32+
33+
ast = parse(config, prs)
34+
35+
if str(config.output_path).endswith('.json'):
36+
with open(config.output_path, 'w') as f:
37+
f.write(ast.model_dump_json(indent=2))
38+
logger.info(f'presentation data saved to {config.output_path}')
39+
return
40+
41+
if config.is_wiki:
42+
out = outputter.WikiFormatter(config)
43+
elif config.is_mdk:
44+
out = outputter.MadokoFormatter(config)
45+
elif config.is_qmd:
46+
out = outputter.QuartoFormatter(config)
47+
else:
48+
out = outputter.MarkdownFormatter(config)
49+
50+
out.output(ast)
51+
logger.info(f'converted document saved to {config.output_path}')
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright 2024 Liu Siyao
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# --coding:utf-8--
16+
# author = ''
17+
18+
19+
def convert_wmf_to_png(input_file, output_png_path):
20+
"""
21+
Convert WMF data to a PNG file.
22+
23+
"""
24+
# from PIL import ImageGrab
25+
# shape.Copy()
26+
# image = ImageGrab.grabclipboard()
27+
# #image.save('{}.jpg'.format(filename), 'jpeg')
28+
# image.save(output_png_path)
29+
30+
# from PIL import Image
31+
# Image.open(input_file).save(output_png_path)
32+
33+
from wand.image import Image
34+
35+
with Image(filename=input_file) as img:
36+
img.format = 'png'
37+
img.save(filename=output_png_path)

src/backend/bisheng/pptx2md/log.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright 2024 Liu Siyao
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import logging
16+
import sys
17+
18+
from tqdm import tqdm
19+
20+
21+
class TqdmStreamHandler(logging.StreamHandler):
22+
23+
def emit(self, record):
24+
try:
25+
msg = self.format(record)
26+
tqdm.write(msg)
27+
self.flush()
28+
except Exception:
29+
self.handleError(record)
30+
31+
32+
def setup_logging(compat_tqdm=True):
33+
root_logger = logging.getLogger()
34+
for handler in root_logger.handlers[:]:
35+
root_logger.removeHandler(handler)
36+
37+
formatter = logging.Formatter(
38+
'%(asctime)s | %(levelname)s | %(name)s | %(message)s',
39+
datefmt='%Y-%m-%d %H:%M:%S',
40+
)
41+
42+
if compat_tqdm:
43+
stream_handler = TqdmStreamHandler(sys.stdout)
44+
else:
45+
stream_handler = logging.StreamHandler(sys.stdout)
46+
47+
stream_handler.setLevel(logging.INFO)
48+
stream_handler.setFormatter(formatter)
49+
50+
root_logger.setLevel(logging.INFO)
51+
root_logger.addHandler(stream_handler)

0 commit comments

Comments
 (0)