Skip to content

Commit 4973174

Browse files
CopilotShane32
andauthored
Fix PDF spec compliance, git line-ending corruption, and add xref validation test (#691)
* Initial plan * Fix PDF Kids array to use indirect page references per PDF spec Co-authored-by: Shane32 <6377684+Shane32@users.noreply.github.com> * Apply suggestion from @Shane32 * Fix tests * plan: fix git line ending corruption of PDF files Co-authored-by: Shane32 <6377684+Shane32@users.noreply.github.com> * Fix git line-ending corruption of PDF files; add xref validation test Co-authored-by: Shane32 <6377684+Shane32@users.noreply.github.com> * Remove usings * Update new test * Update --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Shane32 <6377684+Shane32@users.noreply.github.com> Co-authored-by: Shane Krueger <shane@acdmail.com>
1 parent ac4aac0 commit 4973174

9 files changed

Lines changed: 125 additions & 24 deletions

.gitattributes

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
*.DOCX diff=astextplain
1717
*.dot diff=astextplain
1818
*.DOT diff=astextplain
19-
*.pdf diff=astextplain
20-
*.PDF diff=astextplain
19+
*.pdf binary
20+
*.PDF binary
2121
*.rtf diff=astextplain
2222
*.RTF diff=astextplain

QRCoder/PdfByteQRCode.cs

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public byte[] GetGraphic(int pixelsPerModule, string darkColorHtmlHex, string li
7272

7373
// Binary comment - ensures PDF is treated as binary file (prevents text mode corruption)
7474
stream.Write(_pdfBinaryComment, 0, _pdfBinaryComment.Length);
75-
writer.WriteLine();
75+
writer.Write("\r\n");
7676

7777
writer.Flush();
7878
xrefs.Add(stream.Position);
@@ -92,18 +92,12 @@ public byte[] GetGraphic(int pixelsPerModule, string darkColorHtmlHex, string li
9292

9393
// Object 2: Pages - defines page tree structure
9494
writer.Write(
95-
ToStr(xrefs.Count) + " 0 obj\r\n" + // Object number and generation number (0)
96-
"<<\r\n" + // Begin dictionary
97-
"/Count 1\r\n" + // Number of pages in document
98-
"/Kids [ <<\r\n" + // Array of page objects - begin inline page dictionary
99-
"/Type /Page\r\n" + // Declares this as a page
100-
"/Parent 2 0 R\r\n" + // References parent Pages object
101-
"/MediaBox [0 0 " + pdfMediaSize + " " + pdfMediaSize + "]\r\n" + // Page dimensions [x1 y1 x2 y2]
102-
"/Resources << /ProcSet [ /PDF ] >>\r\n" + // Required resources: PDF operations only (no images)
103-
"/Contents 3 0 R\r\n" + // References content stream (object 3)
104-
">> ]\r\n" + // End inline page dictionary and Kids array
105-
">>\r\n" + // End dictionary
106-
"endobj\r\n" // End object
95+
ToStr(xrefs.Count) + " 0 obj\r\n" + // Object number and generation number (0)
96+
"<<\r\n" + // Begin dictionary
97+
"/Count 1\r\n" + // Number of pages in document
98+
"/Kids [ 3 0 R ]\r\n" + // Kids must contain indirect references to Page objects
99+
">>\r\n" + // End dictionary
100+
"endobj\r\n" // End object
107101
);
108102

109103
// Content stream - PDF drawing instructions
@@ -122,13 +116,29 @@ public byte[] GetGraphic(int pixelsPerModule, string darkColorHtmlHex, string li
122116
writer.Flush();
123117
xrefs.Add(stream.Position);
124118

125-
// Object 3: Content stream - contains the drawing instructions
119+
// Object 3: Page - indirect page object (Kids array must reference pages indirectly)
120+
writer.Write(
121+
ToStr(xrefs.Count) + " 0 obj\r\n" + // Object number and generation number (0)
122+
"<<\r\n" + // Begin dictionary
123+
"/Type /Page\r\n" + // Declares this as a page
124+
"/Parent 2 0 R\r\n" + // References parent Pages object
125+
"/MediaBox [0 0 " + pdfMediaSize + " " + pdfMediaSize + "]\r\n" + // Page dimensions [x1 y1 x2 y2]
126+
"/Resources << /ProcSet [ /PDF ] >>\r\n" + // Required resources: PDF operations only (no images)
127+
"/Contents 4 0 R\r\n" + // References content stream (object 4)
128+
">>\r\n" + // End dictionary
129+
"endobj\r\n" // End object
130+
);
131+
132+
writer.Flush();
133+
xrefs.Add(stream.Position);
134+
135+
// Object 4: Content stream - contains the drawing instructions
126136
writer.Write(
127-
ToStr(xrefs.Count) + " 0 obj\r\n" + // Object number and generation number (0)
128-
"<< /Length " + ToStr(content.Length) + " >>\r\n" + // Dictionary with stream length in bytes
129-
"stream\r\n" + // Begin stream data
130-
content + "endstream\r\n" + // Stream content followed by end stream marker
131-
"endobj\r\n" // End object
137+
ToStr(xrefs.Count) + " 0 obj\r\n" + // Object number and generation number (0)
138+
"<< /Length " + ToStr(System.Text.Encoding.ASCII.GetByteCount(content)) + " >>\r\n" + // Dictionary with stream length in bytes
139+
"stream\r\n" + // Begin stream data
140+
content + "endstream\r\n" + // Stream content followed by end stream marker
141+
"endobj\r\n" // End object
132142
);
133143

134144
writer.Flush();
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

QRCoderTests/PdfByteQRCodeRendererTests.cs

Lines changed: 94 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
using QRCoder;
2-
using Shouldly;
3-
using Xunit;
1+
using System.IO;
42

53
namespace QRCoderTests;
64

@@ -54,4 +52,97 @@ public void can_render_pdfbyte_qrcode_from_helper_2()
5452
var pdfCodeGfx = PdfByteQRCodeHelper.GetQRCode("This is a quick test! 123#?", 5, "#FF0000", "#0000FF", QRCodeGenerator.ECCLevel.L);
5553
pdfCodeGfx.ShouldMatchApproved("pdf");
5654
}
55+
56+
private static readonly char[] _lineEndChars = { '\r', '\n' };
57+
58+
[Fact]
59+
public void pdf_xref_table_is_valid()
60+
{
61+
var gen = new QRCodeGenerator();
62+
var data = gen.CreateQrCode("This is a quick test! 123#?", QRCodeGenerator.ECCLevel.L);
63+
var pdfBytes = new PdfByteQRCode(data).GetGraphic(5);
64+
65+
// Parse from the end to find startxref
66+
var pdfText = Encoding.ASCII.GetString(pdfBytes);
67+
68+
// Verify no \n line breaks; only \r\n should be used (this test file has no binary image data)
69+
pdfText.Replace("\r\n", "CRLF").ShouldNotContain('\n', "PDF should not contain LF line breaks; only CRLF should be used");
70+
pdfText.Replace("\r\n", "CRLF").ShouldNotContain('\r', "PDF should not contain CR line breaks; only CRLF should be used");
71+
72+
// Find %%EOF at the end, then work backward to find startxref
73+
var eofIndex = pdfText.LastIndexOf("%%EOF", StringComparison.Ordinal);
74+
eofIndex.ShouldBeGreaterThan(0, "%%EOF not found");
75+
76+
var startxrefIndex = pdfText.LastIndexOf("startxref\r\n", eofIndex, StringComparison.Ordinal);
77+
startxrefIndex.ShouldBeGreaterThan(0, "startxref not found");
78+
79+
// Read the xref byte offset (the number on the line after "startxref")
80+
var afterStartxref = startxrefIndex + "startxref\r\n".Length;
81+
var endOfOffset = pdfText.IndexOf("\r\n", afterStartxref, StringComparison.Ordinal);
82+
var xrefOffsetStr = pdfText.Substring(afterStartxref, endOfOffset - afterStartxref);
83+
var xrefOffset = int.Parse(xrefOffsetStr, NumberStyles.None, CultureInfo.InvariantCulture);
84+
xrefOffset.ShouldBeGreaterThan(0, "xref byte offset should be positive");
85+
86+
// Seek to xref table and parse it
87+
using var stream = new MemoryStream(pdfBytes);
88+
stream.Position = xrefOffset;
89+
var reader = new StreamReader(stream, Encoding.ASCII, detectEncodingFromByteOrderMarks: false, bufferSize: 1024, leaveOpen: true);
90+
91+
// First line must be "xref"
92+
var xrefLine = reader.ReadLine();
93+
xrefLine.ShouldBe("xref", "xref keyword not found at expected offset");
94+
95+
// Parse subsections: "firstObjNum count"
96+
var objectOffsets = new Dictionary<int, long>();
97+
string? subsectionLine;
98+
while ((subsectionLine = reader.ReadLine()) != null && subsectionLine != "trailer")
99+
{
100+
var parts = subsectionLine.Split(' ');
101+
parts.Length.ShouldBe(2, $"Expected 'firstObj count' but got: {subsectionLine}");
102+
var firstObj = int.Parse(parts[0], NumberStyles.None, CultureInfo.InvariantCulture);
103+
firstObj.ShouldBe(0);
104+
var count = int.Parse(parts[1], NumberStyles.None, CultureInfo.InvariantCulture);
105+
106+
for (int i = 0; i < count; i++)
107+
{
108+
// Each entry: "NNNNNNNNNN GGGGG f\r\n" or "NNNNNNNNNN GGGGG n\r\n"
109+
var entry = reader.ReadLine();
110+
entry.ShouldNotBeNull();
111+
entry.Length.ShouldBe(18);
112+
var entryParts = entry.Split(' ');
113+
entryParts.Length.ShouldBe(3, $"Expected 'offset gen type' but got: {entry}");
114+
var offset = long.Parse(entryParts[0], NumberStyles.None, CultureInfo.InvariantCulture);
115+
var generation = int.Parse(entryParts[1], NumberStyles.None, CultureInfo.InvariantCulture);
116+
var type = entryParts[2];
117+
type.ShouldBeOneOf("n", "f");
118+
119+
if (type == "n")
120+
{
121+
generation.ShouldBe(0, $"Expected generation 0 for in-use object but got {generation}");
122+
objectOffsets[i] = offset;
123+
}
124+
else
125+
{
126+
// Free objects should only be listed for the first object in the subsection
127+
i.ShouldBe(0);
128+
offset.ShouldBe(0);
129+
generation.ShouldBe(65535, $"Expected generation 65535 for free object but got {generation}");
130+
}
131+
}
132+
}
133+
134+
objectOffsets.Count.ShouldBeGreaterThan(0, "No in-use objects found in xref table");
135+
136+
// Verify each object: seek to its offset and confirm "N 0 obj" is present
137+
foreach (var kvp in objectOffsets)
138+
{
139+
stream.Position = kvp.Value;
140+
var objNum = kvp.Key;
141+
var offset = kvp.Value;
142+
var objReader = new StreamReader(stream, Encoding.ASCII, detectEncodingFromByteOrderMarks: false, bufferSize: 1024, leaveOpen: true);
143+
var objLine = objReader.ReadLine();
144+
objLine.ShouldNotBeNull($"No content at offset {offset} for object {objNum}");
145+
objLine.ShouldBe($"{objNum} 0 obj", $"Object {objNum} at offset {offset} did not start with '{objNum} 0 obj'");
146+
}
147+
}
57148
}
208 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)