Skip to content

Commit 793e8e4

Browse files
committed
fix: preserve no-wrap text extraction
Signed-off-by: Yonghye Kwon <developer.0hye@gmail.com>
1 parent f66fffe commit 793e8e4

9 files changed

Lines changed: 141 additions & 72 deletions

File tree

crates/office2pdf/src/lib_render_tests.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -380,9 +380,18 @@ fn test_render_document_fixed_textbox_ordered_list_keeps_all_numbers() {
380380
text.contains("3."),
381381
"Expected third marker in PDF text, got:\n{text}",
382382
);
383-
assert!(text.contains("Alpha"), "Expected first item text, got:\n{text}");
384-
assert!(text.contains("Beta"), "Expected second item text, got:\n{text}");
385-
assert!(text.contains("Gamma"), "Expected third item text, got:\n{text}");
383+
assert!(
384+
text.contains("Alpha"),
385+
"Expected first item text, got:\n{text}"
386+
);
387+
assert!(
388+
text.contains("Beta"),
389+
"Expected second item text, got:\n{text}"
390+
);
391+
assert!(
392+
text.contains("Gamma"),
393+
"Expected third item text, got:\n{text}"
394+
);
386395
}
387396

388397
#[test]

crates/office2pdf/src/parser/pptx.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ use self::theme::{
4040
resolve_effective_color_map, resolve_theme_font,
4141
};
4242

43-
#[path = "pptx_package.rs"]
44-
mod package;
4543
#[path = "pptx_emf.rs"]
4644
mod emf;
45+
#[path = "pptx_package.rs"]
46+
mod package;
4747
#[path = "pptx_shapes.rs"]
4848
mod shapes;
4949
#[path = "pptx_slides.rs"]

crates/office2pdf/src/parser/pptx_emf.rs

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,10 @@ enum PenStyle {
5050
Null,
5151
}
5252

53-
#[derive(Clone, Copy)]
53+
#[derive(Clone, Copy, Default)]
5454
enum FillRule {
5555
EvenOdd,
56+
#[default]
5657
NonZero,
5758
}
5859

@@ -147,12 +148,6 @@ struct EmfSvgConverter {
147148
elements: Vec<SvgPathElement>,
148149
}
149150

150-
impl Default for FillRule {
151-
fn default() -> Self {
152-
Self::NonZero
153-
}
154-
}
155-
156151
impl EmfSvgConverter {
157152
fn convert(data: &[u8]) -> Option<Vec<u8>> {
158153
let mut converter = Self {
@@ -196,7 +191,8 @@ impl EmfSvgConverter {
196191

197192
fn handle_record(&mut self, record_type: u32, body: &[u8]) -> Option<()> {
198193
match record_type {
199-
EMR_SETWINDOWEXTEX | EMR_SETWINDOWORGEX | EMR_SETVIEWPORTEXTEX | EMR_SETVIEWPORTORGEX => {
194+
EMR_SETWINDOWEXTEX | EMR_SETWINDOWORGEX | EMR_SETVIEWPORTEXTEX
195+
| EMR_SETVIEWPORTORGEX => {
200196
// The converter derives its SVG viewBox from actual drawn geometry instead
201197
// of these logical extents because many Office EMFs use a wider drawing space.
202198
}
@@ -319,7 +315,8 @@ impl EmfSvgConverter {
319315
" C {} {} {} {} {} {}",
320316
control1.x, control1.y, control2.x, control2.y, end_point.x, end_point.y
321317
);
322-
self.current_path_points.extend_from_slice(&[control1, control2, end_point]);
318+
self.current_path_points
319+
.extend_from_slice(&[control1, control2, end_point]);
323320
self.current_point = Some(end_point);
324321
chunk_start += 3;
325322
}
@@ -367,7 +364,11 @@ impl EmfSvgConverter {
367364
return;
368365
}
369366

370-
let fill: Option<RgbColor> = if close_path { self.current_fill() } else { None };
367+
let fill: Option<RgbColor> = if close_path {
368+
self.current_fill()
369+
} else {
370+
None
371+
};
371372
let stroke: Option<RgbColor> = self.current_stroke_color();
372373
let stroke_width: Option<i32> = self.current_stroke_width();
373374
if fill.is_none() && stroke.is_none() {
@@ -403,7 +404,11 @@ impl EmfSvgConverter {
403404
return;
404405
}
405406

406-
let fill: Option<RgbColor> = if stroke_only { None } else { self.current_fill() };
407+
let fill: Option<RgbColor> = if stroke_only {
408+
None
409+
} else {
410+
self.current_fill()
411+
};
407412
let stroke: Option<RgbColor> = if stroke_only {
408413
self.current_stroke_color()
409414
} else {
@@ -481,7 +486,11 @@ impl EmfSvgConverter {
481486
}
482487
if let Some(stroke) = element.stroke {
483488
let _ = write!(svg, " stroke=\"{}\"", stroke.as_svg_hex());
484-
let _ = write!(svg, " stroke-width=\"{}\"", element.stroke_width.unwrap_or(1));
489+
let _ = write!(
490+
svg,
491+
" stroke-width=\"{}\"",
492+
element.stroke_width.unwrap_or(1)
493+
);
485494
}
486495
svg.push_str("/>\n");
487496
}

crates/office2pdf/src/parser/pptx_package.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -415,10 +415,7 @@ fn image_format_from_ext(path: &str) -> Option<ImageFormat> {
415415
}
416416
}
417417

418-
fn normalize_slide_image_asset(
419-
target: &str,
420-
data: Vec<u8>,
421-
) -> (Vec<u8>, SlideImageSource) {
418+
fn normalize_slide_image_asset(target: &str, data: Vec<u8>) -> (Vec<u8>, SlideImageSource) {
422419
if let Some(format) = image_format_from_ext(target) {
423420
return (data, SlideImageSource::Supported(format));
424421
}

crates/office2pdf/src/render/typst_gen.rs

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -643,11 +643,7 @@ fn generate_fixed_text_box(
643643
out,
644644
" let text_box_scale_{text_box_id} = calc.min(100%, calc.min(text_box_scale_width_{text_box_id}, text_box_scale_height_{text_box_id}))",
645645
);
646-
let _ = writeln!(
647-
out,
648-
" box(width: {}pt)[",
649-
format_f64(inner_width_pt),
650-
);
646+
let _ = writeln!(out, " box(width: {}pt)[", format_f64(inner_width_pt),);
651647
if let Some(align_str) = fixed_text_box_alignment_name(paragraph.style.alignment) {
652648
let _ = writeln!(out, " #align({align_str})[");
653649
}
@@ -678,11 +674,7 @@ fn generate_fixed_text_box(
678674
" let text_box_scale_{text_box_id} = calc.min(100%, ({}pt / calc.max(measure(text_box_raw_{text_box_id}).height, 1pt)) * 100%)",
679675
format_f64(inner_height_pt),
680676
);
681-
let _ = writeln!(
682-
out,
683-
" box(width: {}pt)[",
684-
format_f64(inner_width_pt),
685-
);
677+
let _ = writeln!(out, " box(width: {}pt)[", format_f64(inner_width_pt),);
686678
let _ = writeln!(
687679
out,
688680
" #scale(x: text_box_scale_{text_box_id}, y: text_box_scale_{text_box_id}, origin: top + left, reflow: true)["
@@ -1132,10 +1124,7 @@ fn generate_floating_text_box_content(
11321124
Ok(())
11331125
}
11341126

1135-
fn single_line_fit_paragraph<'a>(
1136-
text_box: &'a TextBoxData,
1137-
inner_height_pt: f64,
1138-
) -> Option<&'a Paragraph> {
1127+
fn single_line_fit_paragraph(text_box: &TextBoxData, inner_height_pt: f64) -> Option<&Paragraph> {
11391128
if text_box.no_wrap {
11401129
return None;
11411130
}
@@ -1172,7 +1161,7 @@ fn single_line_fit_paragraph<'a>(
11721161
needs_single_line_fit.then_some(paragraph)
11731162
}
11741163

1175-
fn wrapped_fit_paragraph<'a>(text_box: &'a TextBoxData) -> Option<&'a Paragraph> {
1164+
fn wrapped_fit_paragraph(text_box: &TextBoxData) -> Option<&Paragraph> {
11761165
if text_box.no_wrap || matches!(text_box.vertical_align, TextBoxVerticalAlign::Top) {
11771166
return None;
11781167
}

crates/office2pdf/src/render/typst_gen_fixed_page_textbox_tests.rs

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,7 @@ fn test_fixed_page_text_box_uses_padding_and_center_vertical_align() {
4747
.source
4848
.contains("inset: (top: 3.6pt, right: 7.2pt, bottom: 3.6pt, left: 7.2pt)")
4949
);
50-
assert!(
51-
output
52-
.source
53-
.contains("width: 285.6pt")
54-
);
50+
assert!(output.source.contains("width: 285.6pt"));
5551
assert!(output.source.contains(
5652
"#context {\n let text_box_slack_0 = calc.max(42.8pt - measure(text_box_content_0).height, 0pt)"
5753
));
@@ -337,7 +333,9 @@ fn test_fixed_page_text_box_compact_list_preserves_hanging_indent() {
337333
let output = generate_typst(&doc).unwrap();
338334

339335
assert!(
340-
output.source.contains("#grid(columns: (36pt, 1fr), gutter: 0pt,"),
336+
output
337+
.source
338+
.contains("#grid(columns: (36pt, 1fr), gutter: 0pt,"),
341339
"Expected ordered hanging-indent list to use a marker/body grid, got:\n{}",
342340
output.source,
343341
);
@@ -410,7 +408,11 @@ fn test_fixed_page_text_box_compact_list_preserves_marker_origin_offset() {
410408
.source
411409
.contains("inset: (top: 0pt, right: 0pt, bottom: 0pt, left: 18pt)")
412410
);
413-
assert!(output.source.contains("#grid(columns: (36pt, 1fr), gutter: 0pt,"));
411+
assert!(
412+
output
413+
.source
414+
.contains("#grid(columns: (36pt, 1fr), gutter: 0pt,")
415+
);
414416
}
415417

416418
#[test]
@@ -987,6 +989,56 @@ fn test_fixed_page_text_box_no_wrap_inserts_word_joiners_for_cjk_titles() {
987989
);
988990
}
989991

992+
#[test]
993+
fn test_fixed_page_text_box_no_wrap_keeps_latin_text_extractable() {
994+
let doc = make_doc(vec![make_fixed_page(
995+
960.0,
996+
540.0,
997+
vec![FixedElement {
998+
x: 100.0,
999+
y: 120.0,
1000+
width: 180.0,
1001+
height: 40.0,
1002+
kind: FixedElementKind::TextBox(crate::ir::TextBoxData {
1003+
content: vec![Block::Paragraph(Paragraph {
1004+
style: ParagraphStyle {
1005+
alignment: Some(Alignment::Center),
1006+
..ParagraphStyle::default()
1007+
},
1008+
runs: vec![Run {
1009+
text: "Test text".to_string(),
1010+
style: TextStyle {
1011+
font_size: Some(28.0),
1012+
..TextStyle::default()
1013+
},
1014+
href: None,
1015+
footnote: None,
1016+
}],
1017+
})],
1018+
padding: Insets::default(),
1019+
vertical_align: crate::ir::TextBoxVerticalAlign::Top,
1020+
fill: None,
1021+
opacity: None,
1022+
stroke: None,
1023+
shape_kind: None,
1024+
no_wrap: true,
1025+
auto_fit: false,
1026+
}),
1027+
}],
1028+
)]);
1029+
let output = generate_typst(&doc).unwrap();
1030+
assert!(
1031+
output.source.contains("Test text"),
1032+
"Expected plain Latin no-wrap text to remain extractable, got:\n{}",
1033+
output.source,
1034+
);
1035+
assert!(
1036+
!output.source.contains('\u{2060}') && !output.source.contains('\u{00A0}'),
1037+
"Expected no invisible joiners or non-breaking spaces for Latin no-wrap text, got:\n{}",
1038+
output.source,
1039+
);
1040+
}
1041+
9901042
#[test]
9911043
fn test_fixed_page_text_box_auto_fit_short_text_uses_scale_to_fit() {
9921044
let doc = make_doc(vec![make_fixed_page(
@@ -1031,7 +1083,9 @@ fn test_fixed_page_text_box_auto_fit_short_text_uses_scale_to_fit() {
10311083
output.source,
10321084
);
10331085
assert!(
1034-
output.source.contains("let text_box_scale_height_0 = (12pt / 21.599999999999998pt) * 100%"),
1086+
output
1087+
.source
1088+
.contains("let text_box_scale_height_0 = (12pt / 21.599999999999998pt) * 100%"),
10351089
"Expected estimated line-height scale calculation, got:\n{}",
10361090
output.source,
10371091
);
@@ -1041,9 +1095,9 @@ fn test_fixed_page_text_box_auto_fit_short_text_uses_scale_to_fit() {
10411095
output.source,
10421096
);
10431097
assert!(
1044-
output
1045-
.source
1046-
.contains("#scale(x: text_box_scale_0, y: text_box_scale_0, origin: top + left, reflow: true)["),
1098+
output.source.contains(
1099+
"#scale(x: text_box_scale_0, y: text_box_scale_0, origin: top + left, reflow: true)["
1100+
),
10471101
"Expected scale-to-fit wrapper, got:\n{}",
10481102
output.source,
10491103
);
@@ -1106,9 +1160,9 @@ fn test_fixed_page_text_box_mixed_font_header_uses_scale_to_fit() {
11061160
output.source,
11071161
);
11081162
assert!(
1109-
output
1110-
.source
1111-
.contains("#scale(x: text_box_scale_0, y: text_box_scale_0, origin: top + left, reflow: true)["),
1163+
output.source.contains(
1164+
"#scale(x: text_box_scale_0, y: text_box_scale_0, origin: top + left, reflow: true)["
1165+
),
11121166
"Expected mixed-font header to use scale-to-fit, got:\n{}",
11131167
output.source,
11141168
);

crates/office2pdf/src/render/typst_gen_lists.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,7 @@ fn write_fixed_text_list_item(
310310
let inset: Insets = fixed_text_list_item_inset(&paragraph.style);
311311
let has_inset: bool = inset.left > 0.0 || inset.right > 0.0;
312312
let hanging_indent_pt: Option<f64> = fixed_text_list_hanging_indent_pt(&paragraph.style);
313-
let use_marker_grid: bool =
314-
list_style.kind == ListKind::Ordered && hanging_indent_pt.is_some();
313+
let use_marker_grid: bool = list_style.kind == ListKind::Ordered && hanging_indent_pt.is_some();
315314

316315
out.push_str("#block(width: ");
317316
if let Some(width_pt) = available_width_pt {

crates/office2pdf/src/render/typst_gen_shapes.rs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@ pub(super) fn generate_shape(out: &mut String, shape: &Shape, width: f64, height
99
}
1010

1111
let use_typst_rotation = shape.rotation_deg.is_some()
12-
&& !matches!(shape.kind, ShapeKind::Line { .. } | ShapeKind::Polyline { .. });
12+
&& !matches!(
13+
shape.kind,
14+
ShapeKind::Line { .. } | ShapeKind::Polyline { .. }
15+
);
1316
if let Some(deg) = shape.rotation_deg.filter(|_| use_typst_rotation) {
1417
let _ = write!(out, "#rotate({}deg)[", format_f64(deg));
1518
}
@@ -33,15 +36,8 @@ pub(super) fn generate_shape(out: &mut String, shape: &Shape, width: f64, height
3336
head_end,
3437
tail_end,
3538
} => {
36-
let ((start_x, start_y), (end_x, end_y)) = rotated_line_points(
37-
*x1,
38-
*y1,
39-
*x2,
40-
*y2,
41-
width,
42-
height,
43-
shape.rotation_deg,
44-
);
39+
let ((start_x, start_y), (end_x, end_y)) =
40+
rotated_line_points(*x1, *y1, *x2, *y2, width, height, shape.rotation_deg);
4541
let has_arrowheads: bool = *tail_end != ArrowHead::None || *head_end != ArrowHead::None;
4642
// When arrowheads follow the line, wrap everything in #place()
4743
// so that Typst overlays them at the same origin instead of

0 commit comments

Comments
 (0)