From 6d4e3ba8a43d8a084a94f9cea4669cb35812fb67 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Fri, 20 May 2022 09:14:52 -0400 Subject: Test (and fix) handling of dangling references --- libqpdf/QPDF_json.cc | 10 ++++-- qpdf/qpdf.testcov | 1 + qpdf/qtest/qpdf/good13.out | 3 +- qpdf/qtest/qpdf/good13.pdf | 11 +++++-- qpdf/qtest/qpdf/good13.qdf | 53 ++++++++++++++++++------------- qpdf/qtest/qpdf/json-changed-good13.pdf | 53 ++++++++++++++++++------------- qpdf/qtest/qpdf/manual-qpdf-json-out.json | 12 +++++-- qpdf/qtest/qpdf/manual-qpdf-json-pdf.json | 26 +++++++++------ qpdf/qtest/qpdf/manual-qpdf-json.json | 5 ++- qpdf/qtest/qpdf/manual-qpdf-json.pdf | 45 ++++++++++++++++---------- 10 files changed, 140 insertions(+), 79 deletions(-) diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index b7fe4368..18631114 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -147,6 +147,7 @@ QPDF::JSONReactor::arrayStart() void QPDF::JSONReactor::containerEnd(JSON const& value) { + auto from_state = state; state = state_stack.back(); state_stack.pop_back(); if (state == st_initial) { @@ -215,10 +216,13 @@ QPDF::JSONReactor::containerEnd(JSON const& value) if (!parse_error) { object_stack.pop_back(); } - } else if (state == st_qpdf) { + } else if ((state == st_top) && (from_state == st_qpdf)) { for (auto const& og: this->reserved) { - // QXXXQ - // QTC::TC("qpdf", "QPDF_json non-trivial null reserved"); + // Handle dangling indirect object references which the + // PDF spec says to treat as nulls. It's tempting to make + // this an error, but that would be wrong since valid + // input files may have these. + QTC::TC("qpdf", "QPDF_json non-trivial null reserved"); this->pdf.replaceObject(og, QPDFObjectHandle::newNull()); } this->reserved.clear(); diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 9b1910fb..65aa421a 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -675,3 +675,4 @@ QPDF_json ignore second-level key 0 QPDF_json ignore unknown key in object_top 0 QPDF_json ignore unknown key in trailer 0 QPDF_json ignore unknown key in stream 0 +QPDF_json non-trivial null reserved 0 diff --git a/qpdf/qtest/qpdf/good13.out b/qpdf/qtest/qpdf/good13.out index 6df4bd7d..e6cb1d32 100644 --- a/qpdf/qtest/qpdf/good13.out +++ b/qpdf/qtest/qpdf/good13.out @@ -1,10 +1,11 @@ /QTest is indirect and has type dictionary (9) /QTest is a dictionary + /dangling-ref-for-json-test is direct /hex strings is direct /indirect is indirect /names is direct /nesting is direct /strings is direct unparse: 7 0 R -unparseResolved: << /hex#20strings [ (Potato) <01020300040560> (AB) ] /indirect 8 0 R /names [ /nesting /hex#20strings /text#2fplain ] /nesting << /a [ 1 2 << /x (y) >> [ (z) ] ] /b << / (legal) /a [ 1 2 ] >> >> /strings [ (one) <24a2> () (\(\)) (\() (\)) (a\f\b\t\r\nb) (") ("") ("\("\)") <410042> (a\nb) (a b) ] >> +unparseResolved: << /dangling-ref-for-json-test [ 9 0 R ] /hex#20strings [ (Potato) <01020300040560> (AB) ] /indirect 8 0 R /names [ /nesting /hex#20strings /text#2fplain ] /nesting << /a [ 1 2 << /x (y) >> [ (z) ] ] /b << / (legal) /a [ 1 2 ] >> >> /strings [ (one) <24a2> () (\(\)) (\() (\)) (a\f\b\t\r\nb) (") ("") ("\("\)") <410042> (a\nb) (a b) ] >> test 1 done diff --git a/qpdf/qtest/qpdf/good13.pdf b/qpdf/qtest/qpdf/good13.pdf index 89953222..0c97b89f 100644 --- a/qpdf/qtest/qpdf/good13.pdf +++ b/qpdf/qtest/qpdf/good13.pdf @@ -74,6 +74,7 @@ b) (π) (🥔)] /b <> >> /names [/n#65sting /hex#20strings /text#2fplain] + /dangling-ref-for-json-test [9 0 R] >> endobj @@ -81,6 +82,12 @@ endobj (hello) endobj +% NOTE: +% +% If adding a new object, update /dangling-ref-for-json-test to point +% to something that's not here. +% + xref 0 9 0000000000 65535 f @@ -91,12 +98,12 @@ xref 0000000403 00000 n 0000000438 00000 n 0000000556 00000 n -0000000908 00000 n +0000000946 00000 n trailer << /Size 9 /Root 1 0 R /QTest 7 0 R >> startxref -932 +1085 %%EOF diff --git a/qpdf/qtest/qpdf/good13.qdf b/qpdf/qtest/qpdf/good13.qdf index 297c35f3..236ee585 100644 --- a/qpdf/qtest/qpdf/good13.qdf +++ b/qpdf/qtest/qpdf/good13.qdf @@ -13,12 +13,15 @@ endobj %% Original object ID: 7 0 2 0 obj << + /dangling-ref-for-json-test [ + 4 0 R + ] /hex#20strings [ (Potato) <01020300040560> (AB) ] - /indirect 4 0 R + /indirect 5 0 R /names [ /nesting /hex#20strings @@ -68,22 +71,27 @@ endobj << /Count 1 /Kids [ - 5 0 R + 6 0 R ] /Type /Pages >> endobj -%% Original object ID: 8 0 +%% Original object ID: 9 0 4 0 obj +null +endobj + +%% Original object ID: 8 0 +5 0 obj (hello) endobj %% Page 1 %% Original object ID: 3 0 -5 0 obj +6 0 obj << - /Contents 6 0 R + /Contents 7 0 R /MediaBox [ 0 0 @@ -93,9 +101,9 @@ endobj /Parent 3 0 R /Resources << /Font << - /F1 8 0 R + /F1 9 0 R >> - /ProcSet 9 0 R + /ProcSet 10 0 R >> /Type /Page >> @@ -103,9 +111,9 @@ endobj %% Contents for page 1 %% Original object ID: 4 0 -6 0 obj +7 0 obj << - /Length 7 0 R + /Length 8 0 R >> stream BT @@ -116,12 +124,12 @@ ET endstream endobj -7 0 obj +8 0 obj 44 endobj %% Original object ID: 6 0 -8 0 obj +9 0 obj << /BaseFont /Helvetica /Encoding /WinAnsiEncoding @@ -132,7 +140,7 @@ endobj endobj %% Original object ID: 5 0 -9 0 obj +10 0 obj [ /PDF /Text @@ -140,23 +148,24 @@ endobj endobj xref -0 10 +0 11 0000000000 65535 f 0000000052 00000 n 0000000133 00000 n -0000000710 00000 n -0000000809 00000 n -0000000870 00000 n -0000001112 00000 n -0000001211 00000 n -0000001257 00000 n -0000001402 00000 n +0000000756 00000 n +0000000855 00000 n +0000000903 00000 n +0000000964 00000 n +0000001207 00000 n +0000001306 00000 n +0000001352 00000 n +0000001497 00000 n trailer << /QTest 2 0 R /Root 1 0 R - /Size 10 + /Size 11 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> startxref -1437 +1533 %%EOF diff --git a/qpdf/qtest/qpdf/json-changed-good13.pdf b/qpdf/qtest/qpdf/json-changed-good13.pdf index b8240b18..2c25334c 100644 --- a/qpdf/qtest/qpdf/json-changed-good13.pdf +++ b/qpdf/qtest/qpdf/json-changed-good13.pdf @@ -13,12 +13,15 @@ endobj %% Original object ID: 7 0 2 0 obj << + /dangling-ref-for-json-test [ + 4 0 R + ] /hex#20strings [ (Potato) <01020300040560> (AB) ] - /indirect 4 0 R + /indirect 5 0 R /names [ /nesting /hex#20strings @@ -68,22 +71,27 @@ endobj << /Count 1 /Kids [ - 5 0 R + 6 0 R ] /Type /Pages >> endobj -%% Original object ID: 8 0 +%% Original object ID: 9 0 4 0 obj +null +endobj + +%% Original object ID: 8 0 +5 0 obj (hello) endobj %% Page 1 %% Original object ID: 3 0 -5 0 obj +6 0 obj << - /Contents 6 0 R + /Contents 7 0 R /MediaBox [ 0 0 @@ -93,9 +101,9 @@ endobj /Parent 3 0 R /Resources << /Font << - /F1 8 0 R + /F1 9 0 R >> - /ProcSet 9 0 R + /ProcSet 10 0 R >> /Type /Page >> @@ -103,9 +111,9 @@ endobj %% Contents for page 1 %% Original object ID: 4 0 -6 0 obj +7 0 obj << - /Length 7 0 R + /Length 8 0 R >> stream BT @@ -116,12 +124,12 @@ ET endstream endobj -7 0 obj +8 0 obj 44 endobj %% Original object ID: 6 0 -8 0 obj +9 0 obj << /BaseFont /Helvetica /Encoding /WinAnsiEncoding @@ -132,7 +140,7 @@ endobj endobj %% Original object ID: 5 0 -9 0 obj +10 0 obj [ /PDF /Text @@ -140,23 +148,24 @@ endobj endobj xref -0 10 +0 11 0000000000 65535 f 0000000052 00000 n 0000000133 00000 n -0000000706 00000 n -0000000805 00000 n -0000000866 00000 n -0000001108 00000 n -0000001207 00000 n -0000001253 00000 n -0000001398 00000 n +0000000752 00000 n +0000000851 00000 n +0000000899 00000 n +0000000960 00000 n +0000001203 00000 n +0000001302 00000 n +0000001348 00000 n +0000001493 00000 n trailer << /QTest 2 0 R /Root 1 0 R - /Size 10 + /Size 11 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> startxref -1433 +1529 %%EOF diff --git a/qpdf/qtest/qpdf/manual-qpdf-json-out.json b/qpdf/qtest/qpdf/manual-qpdf-json-out.json index a04c98ae..877351dd 100644 --- a/qpdf/qtest/qpdf/manual-qpdf-json-out.json +++ b/qpdf/qtest/qpdf/manual-qpdf-json-out.json @@ -1,7 +1,7 @@ { "qpdf-v2": { "pdfversion": "2.0", - "maxobjectid": 8, + "maxobjectid": 100, "objects": { "obj:1 0 R": { "value": { @@ -119,7 +119,15 @@ } }, "obj:8 0 R": { - "value": "u:hello" + "value": { + "/dangling": [ + "100 0 R" + ], + "/k1": "u:hello" + } + }, + "obj:100 0 R": { + "value": null }, "trailer": { "value": { diff --git a/qpdf/qtest/qpdf/manual-qpdf-json-pdf.json b/qpdf/qtest/qpdf/manual-qpdf-json-pdf.json index a27f551f..6e9520d4 100644 --- a/qpdf/qtest/qpdf/manual-qpdf-json-pdf.json +++ b/qpdf/qtest/qpdf/manual-qpdf-json-pdf.json @@ -1,7 +1,7 @@ { "qpdf-v2": { "pdfversion": "2.0", - "maxobjectid": 9, + "maxobjectid": 10, "objects": { "obj:1 0 R": { "value": { @@ -79,11 +79,16 @@ } }, "obj:4 0 R": { - "value": "u:hello" + "value": { + "/dangling": [ + "6 0 R" + ], + "/k1": "u:hello" + } }, "obj:5 0 R": { "value": { - "/Contents": "6 0 R", + "/Contents": "7 0 R", "/MediaBox": [ 0, 0, @@ -93,23 +98,26 @@ "/Parent": "3 0 R", "/Resources": { "/Font": { - "/F1": "8 0 R" + "/F1": "9 0 R" }, - "/ProcSet": "9 0 R" + "/ProcSet": "10 0 R" }, "/Type": "/Page" } }, "obj:6 0 R": { + "value": null + }, + "obj:7 0 R": { "stream": { "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=", "dict": {} } }, - "obj:7 0 R": { + "obj:8 0 R": { "value": 44 }, - "obj:8 0 R": { + "obj:9 0 R": { "value": { "/BaseFont": "/Helvetica", "/Encoding": "/WinAnsiEncoding", @@ -118,7 +126,7 @@ "/Type": "/Font" } }, - "obj:9 0 R": { + "obj:10 0 R": { "value": [ "/PDF", "/Text" @@ -132,7 +140,7 @@ ], "/QTest": "2 0 R", "/Root": "1 0 R", - "/Size": 10 + "/Size": 11 } } } diff --git a/qpdf/qtest/qpdf/manual-qpdf-json.json b/qpdf/qtest/qpdf/manual-qpdf-json.json index 579c88f9..4a7a3f4e 100644 --- a/qpdf/qtest/qpdf/manual-qpdf-json.json +++ b/qpdf/qtest/qpdf/manual-qpdf-json.json @@ -134,7 +134,10 @@ } }, "obj:8 0 R": { - "value": "u:hello" + "value": { + "/k1": "u:hello", + "/dangling": ["100 0 R"] + } } } } diff --git a/qpdf/qtest/qpdf/manual-qpdf-json.pdf b/qpdf/qtest/qpdf/manual-qpdf-json.pdf index d8a40eed..0611bae0 100644 --- a/qpdf/qtest/qpdf/manual-qpdf-json.pdf +++ b/qpdf/qtest/qpdf/manual-qpdf-json.pdf @@ -85,14 +85,19 @@ endobj %% Original object ID: 8 0 4 0 obj -(hello) +<< + /dangling [ + 6 0 R + ] + /k1 (hello) +>> endobj %% Page 1 %% Original object ID: 3 0 5 0 obj << - /Contents 6 0 R + /Contents 7 0 R /MediaBox [ 0 0 @@ -102,19 +107,24 @@ endobj /Parent 3 0 R /Resources << /Font << - /F1 8 0 R + /F1 9 0 R >> - /ProcSet 9 0 R + /ProcSet 10 0 R >> /Type /Page >> endobj +%% Original object ID: 100 0 +6 0 obj +null +endobj + %% Contents for page 1 %% Original object ID: 4 0 -6 0 obj +7 0 obj << - /Length 7 0 R + /Length 8 0 R >> stream BT @@ -125,12 +135,12 @@ ET endstream endobj -7 0 obj +8 0 obj 44 endobj %% Original object ID: 6 0 -8 0 obj +9 0 obj << /BaseFont /Helvetica /Encoding /WinAnsiEncoding @@ -141,7 +151,7 @@ endobj endobj %% Original object ID: 5 0 -9 0 obj +10 0 obj [ /PDF /Text @@ -149,23 +159,24 @@ endobj endobj xref -0 10 +0 11 0000000000 65535 f 0000000052 00000 n 0000000133 00000 n 0000000829 00000 n 0000000928 00000 n -0000000989 00000 n -0000001231 00000 n -0000001330 00000 n -0000001376 00000 n -0000001521 00000 n +0000001029 00000 n +0000001251 00000 n +0000001322 00000 n +0000001421 00000 n +0000001467 00000 n +0000001612 00000 n trailer << /QTest 2 0 R /Root 1 0 R - /Size 10 + /Size 11 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> startxref -1556 +1648 %%EOF -- cgit v1.2.3-70-g09d2