From 02b4e29608216d045f3927cb8cb976f57bd034ed Mon Sep 17 00:00:00 2001 From: petervwyatt <26521615+petervwyatt@users.noreply.github.com> Date: Tue, 2 Jul 2024 01:03:24 +1000 Subject: [PATCH 1/9] Initial PDF COS rouge lexer --- lib/rouge/lexers/pdf.rb | 92 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 lib/rouge/lexers/pdf.rb diff --git a/lib/rouge/lexers/pdf.rb b/lib/rouge/lexers/pdf.rb new file mode 100644 index 0000000000..08220ced2f --- /dev/null +++ b/lib/rouge/lexers/pdf.rb @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +# Adapted from Rouge lib/rouge/lexers/PostScript.rb +module Rouge + module Lexers + class Pdf < RegexLexer + title "PDF" + desc "PDF - Portable Document Format (ISO 32000)" + tag "pdf" + aliases "fdf", "cos" + filenames "*.pdf", "*.fdf" + mimetypes "application/pdf", "application/fdf" # IANA registered media types + + # PDF and FDF files must start with "%PDF-x.y" or "%FDF-x.y" + # where x is the emajor version (1-9) and y is the minor version (0-9) + # Supports invalid PDF versions. + def self.detect?(text) + return true if /^%[PF]DF-[1-9]\.\d/ =~ text + end + + # PDF Delimiters (ISO 32000-2:2020, Table 2) including Ruby whitespace + delimiter = %s"()<>\[\]/%\s" + + delimiter_end = Regexp.new("(?=[#{delimiter}])") + valid_name_chars = Regexp.new("[^#{delimiter}]") + + state :root do + # PDF only has single-line comments: from "%"" to EOL + rule %r'%.*?$', Comment::Single + + # PDF Boolean object + rule %r'(false|true)#{delimiter_end}', Keyword::Constant + + # PDF Null object + rule %r'(null)#{delimiter_end}', Keyword::Constant + + # PDF Hex string - can contain whitespace and span multiple lines + rule %r/<[0-9A-Fa-f\s]+>/m, String::Other + + # PDF Dictionary + rule %r/<>/, Variable::Instance + + # PDF Arrays + rule %r/\[/, Variable::Instance + rule %r/\]/, Variable::Instance + + # PDF literal strings are complex (multi-line, escapes, etc.); enter separate state. + rule %r'\(', String, :stringliteral + + # PDF Name objects - can be empty (nothing after "/") + # No special processing needed for 2-digit hex codes starting with "#" + rule %r'/\/#{valid_name_chars}*#{delimiter_end}', Name::Variable + + # PDF objects and stream (no checking of object number) + rule %r/\d+\s\d+obj#{delimiter_end}/, Keyword::Declaration + rule %r/stream/, Keyword::Declaration + rule %r/endstream/, Keyword::Declaration + rule %r/endobj/, Keyword::Declaration + + # PDF file layout + rule %r/xref/, Keyword::Constant + rule %r/trailer/, Keyword::Constant + rule %r/startxref/, Keyword::Constant + + # PDF cross reference section entries (supposedly 20 bytes including EOL) + rule %r/^\d{10} \d{5} [nf]\s?/, Keyword::Namespace + + # PDF Indirect reference (lax, allows zero as the object number) + rule %r/\d+\s\d+R#{delimiter_end}/. Keyword::Pseudo + + # PDF Real object + rule %r/(\-|\+)?([0-9]+\.?|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)#{delimiter_end}/, Number::Float + + # PDF Integer object + rule %r/(\-|\+)?[0-9]+#{delimiter_end}/, Number::Integer + + # most likely PDF content stream operators + rule valid_name_chars, Operator::Word + end + + # PDF literal string - see ISO 32000-2:2020 clause 7.3.4.2 and Table 3 + state :stringliteral do + rule %r/\(/, String, :stringliteral # recursive for internal balanced literal strings + rule %r/\)/, String, :pop! + rule %r/\\([0-7]{3}|n|r|t|b|f|\\)/, String::Escape + rule %r/[^\(\)\\]+/, String + end + end + end +end From 82785e3e76463c8e0faf2a75052eb558030f115b Mon Sep 17 00:00:00 2001 From: petervwyatt <26521615+petervwyatt@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:09:55 +1000 Subject: [PATCH 2/9] Update pdf.rb --- lib/rouge/lexers/pdf.rb | 114 ++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/lib/rouge/lexers/pdf.rb b/lib/rouge/lexers/pdf.rb index 08220ced2f..7c0d48d541 100644 --- a/lib/rouge/lexers/pdf.rb +++ b/lib/rouge/lexers/pdf.rb @@ -1,7 +1,20 @@ # -*- coding: utf-8 -*- # # frozen_string_literal: true -# Adapted from Rouge lib/rouge/lexers/PostScript.rb +# +# PDF = Portable Document Format page description language +# As defined by ISO 32000-2:2020 including resolved errata from https://pdf-issues.pdfa.org/ +# +# The PDF syntax is also know as "COS" and can also be used with FDF (Forms Data Field) files. +# +# This is a token-based parser ONLY! It is intended to syntax highlight full or partial fragments +# of nicely written hand-writteen PDF syntax in documentation such as ISO specifications. It is NOT +# intended to cope with real-world PDFs that will contain arbitrary binary data (that form invalid +# UTF-8 sequences and generate "ArgumentError: invalid byte sequence in UTF-8" Ruby errors) and +# other types of malformation/syntax errors. +# +# Author: Peter Wyatt, CTO, PDF Association. 2024 +# module Rouge module Lexers class Pdf < RegexLexer @@ -13,79 +26,80 @@ class Pdf < RegexLexer mimetypes "application/pdf", "application/fdf" # IANA registered media types # PDF and FDF files must start with "%PDF-x.y" or "%FDF-x.y" - # where x is the emajor version (1-9) and y is the minor version (0-9) - # Supports invalid PDF versions. + # where x is the single digit major version and y is the single digit minor version. def self.detect?(text) - return true if /^%[PF]DF-[1-9]\.\d/ =~ text + return true if /^%(P|F)DF-\d.\d/ =~ text end - # PDF Delimiters (ISO 32000-2:2020, Table 2) including Ruby whitespace - delimiter = %s"()<>\[\]/%\s" - - delimiter_end = Regexp.new("(?=[#{delimiter}])") - valid_name_chars = Regexp.new("[^#{delimiter}]") + # PDF Delimiters (ISO 32000-2:2020, Table 1 and Table 2). + # Ruby whitespace "\s" is /[ \t\r\n\f\v]/ which does not include NUL (ISO 32000-2:2020, Table 1). + # PDF also support 2 character EOL sequences. + delimiter = %r/\(\)<>\[\]\/%\s/ state :root do - # PDF only has single-line comments: from "%"" to EOL - rule %r'%.*?$', Comment::Single + # Start-of-file header comment is special (comment is up to EOL) + rule %r/^%(P|F)DF-\d\.\d.*$/, Comment::Special - # PDF Boolean object - rule %r'(false|true)#{delimiter_end}', Keyword::Constant + # End-of-file marker comment is special (comment is up to EOL) + rule %r/^%%EOF.*$/, Comment::Special - # PDF Null object - rule %r'(null)#{delimiter_end}', Keyword::Constant + # PDF only has single-line comments: from "%" to EOL + rule %r/%.*$/, Comment::Single - # PDF Hex string - can contain whitespace and span multiple lines - rule %r/<[0-9A-Fa-f\s]+>/m, String::Other + # PDF Boolean and null object keywords + rule %r/(false|true|null)/, Keyword::Constant - # PDF Dictionary - rule %r/<>/, Variable::Instance + # PDF Dictionary and array object start and end tokens + rule %r/(<<|>>|\[|\])/, Punctuation - # PDF Arrays - rule %r/\[/, Variable::Instance - rule %r/\]/, Variable::Instance + # PDF Hex string - can contain whitespace and span multiple lines. + # This rule must be after "<<"/">>" + rule %r/<[0-9A-Fa-f\s]*>/m, Str::Other - # PDF literal strings are complex (multi-line, escapes, etc.); enter separate state. - rule %r'\(', String, :stringliteral + # PDF literal strings are complex (multi-line, escapes, etc.). Use separate state machine. + rule %r/\(/, Str, :stringliteral - # PDF Name objects - can be empty (nothing after "/") - # No special processing needed for 2-digit hex codes starting with "#" - rule %r'/\/#{valid_name_chars}*#{delimiter_end}', Name::Variable + # PDF Name objects - can be empty (i.e., nothing after "/"). + # No special processing required for 2-digit hex codes that start with "#". + rule %r/\/[^\(\)<>\[\]\/%\s]*/, Name::Entity - # PDF objects and stream (no checking of object number) - rule %r/\d+\s\d+obj#{delimiter_end}/, Keyword::Declaration - rule %r/stream/, Keyword::Declaration - rule %r/endstream/, Keyword::Declaration - rule %r/endobj/, Keyword::Declaration + # PDF objects and stream (no checking of object ID) + # Note that object number and generation numbers do not have sign. + rule %r/\d+\s\d+\sobj/, Keyword::Declaration + rule %r/(endstream|endobj|stream)/, Keyword::Declaration - # PDF file layout - rule %r/xref/, Keyword::Constant - rule %r/trailer/, Keyword::Constant - rule %r/startxref/, Keyword::Constant + # PDF conventional file layout keywords + rule %r/(startxref|trailer|xref)/, Keyword::Constant - # PDF cross reference section entries (supposedly 20 bytes including EOL) - rule %r/^\d{10} \d{5} [nf]\s?/, Keyword::Namespace + # PDF cross reference section entries (20 bytes including EOL). + # Explicit single SPACE separators. + rule %r/^\d{10} \d{5} (n|f)\s*$/, Keyword::Namespace - # PDF Indirect reference (lax, allows zero as the object number) - rule %r/\d+\s\d+R#{delimiter_end}/. Keyword::Pseudo + # PDF Indirect reference (lax, allows zero as the object number). + # Requires terminating delimiter lookahead to disambiguate from "RG" operator + rule %r/\d+\s\d+\sR(?=[\(\)<>\[\]\/%\s])/, Keyword::Variable # PDF Real object - rule %r/(\-|\+)?([0-9]+\.?|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)#{delimiter_end}/, Number::Float + rule %r/(\-|\+)?([0-9]+\.?|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)/, Num::Float # PDF Integer object - rule %r/(\-|\+)?[0-9]+#{delimiter_end}/, Number::Integer + rule %r/(\-|\+)?[0-9]+/, Num::Integer + + # A run of non-delimiters is most likely a PDF content stream + # operator (ISO 32000-2:2020, Annex A). + rule %r/[^\(\)<>\[\]\/%\s]+/, Operator::Word - # most likely PDF content stream operators - rule valid_name_chars, Operator::Word + # Whitespace (except inside strings and comments) is ignored = /[ \t\r\n\f\v]/. + # Ruby doesn't include NUL as whitespace (vs ISO 32000-2:2020 Table 1) + rule %r/\s+/, Text::Whitespace end - # PDF literal string - see ISO 32000-2:2020 clause 7.3.4.2 and Table 3 + # PDF literal string. See ISO 32000-2:2020 clause 7.3.4.2 and Table 3 state :stringliteral do - rule %r/\(/, String, :stringliteral # recursive for internal balanced literal strings - rule %r/\)/, String, :pop! - rule %r/\\([0-7]{3}|n|r|t|b|f|\\)/, String::Escape - rule %r/[^\(\)\\]+/, String + rule %r/\(/, Str, :stringliteral # recursive for internal balanced(!) literal strings + rule %r/\)/, Str, :pop! + rule %r/\\([0-7]{3}|n|r|t|b|f|\\)/, Str::Escape + rule %r/[^\(\)\\]+/, Str end end end From e54e1d31ff76f36c25f702c0ce5de8016554c319 Mon Sep 17 00:00:00 2001 From: petervwyatt <26521615+petervwyatt@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:13:09 +1000 Subject: [PATCH 3/9] Create demo PDF (functional) Needs to be treated as binary for xref to remain valid --- lib/rouge/demos/pdf | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 lib/rouge/demos/pdf diff --git a/lib/rouge/demos/pdf b/lib/rouge/demos/pdf new file mode 100644 index 0000000000..849cf59d52 --- /dev/null +++ b/lib/rouge/demos/pdf @@ -0,0 +1,29 @@ +%PDF-1.6 +%©©©© + +1 0 obj<>>> +endobj +2 0 obj<> +endobj +3 0 obj<>>> +endobj +4 0 obj<> +stream + +8 w 1 j + 1.0 0 0 rg + 0 0 1 RG + 10 10 180 180 re B +endstream +endobj +xref +0 5 +0000000000 65535 f +0000000021 00000 n +0000000113 00000 n +0000000165 00000 n +0000000261 00000 n +trailer +<<18D6B641245C03F28E67D93AD879D6EC>]>> +startxref +371 +%%EOF \ No newline at end of file From 91d499cfda718f1ba6a68e44f37fb14edfae8370 Mon Sep 17 00:00:00 2001 From: petervwyatt <26521615+petervwyatt@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:22:14 +1000 Subject: [PATCH 4/9] Update pdf.rb --- lib/rouge/lexers/pdf.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/rouge/lexers/pdf.rb b/lib/rouge/lexers/pdf.rb index 7c0d48d541..c80dfcd95c 100644 --- a/lib/rouge/lexers/pdf.rb +++ b/lib/rouge/lexers/pdf.rb @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # # frozen_string_literal: true -# # PDF = Portable Document Format page description language # As defined by ISO 32000-2:2020 including resolved errata from https://pdf-issues.pdfa.org/ # @@ -20,7 +19,7 @@ module Lexers class Pdf < RegexLexer title "PDF" desc "PDF - Portable Document Format (ISO 32000)" - tag "pdf" + tag "Pdf" aliases "fdf", "cos" filenames "*.pdf", "*.fdf" mimetypes "application/pdf", "application/fdf" # IANA registered media types From 062647e54a9473f080250ce6056659669cf49427 Mon Sep 17 00:00:00 2001 From: petervwyatt <26521615+petervwyatt@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:22:31 +1000 Subject: [PATCH 5/9] Add basic spec checker --- spec/lexers/pdf_spec.rb | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 spec/lexers/pdf_spec.rb diff --git a/spec/lexers/pdf_spec.rb b/spec/lexers/pdf_spec.rb new file mode 100644 index 0000000000..9fbb001327 --- /dev/null +++ b/spec/lexers/pdf_spec.rb @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::Pdf do + let(:subject) { Rouge::Lexers::Pdf.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.pdf' + assert_guess :filename => 'foo.fdf' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'application/pdf' + assert_guess :mimetype => 'application/fdf' + end + + it 'guesses by source' do + assert_guess :source => '%PDF-1.6' + assert_guess :source => '%PDF-2.0' + assert_guess :source => '%PDF-0.3' # Fake PDF version + assert_guess :source => '%PDF-6.8' # Fake PDF version + assert_guess :source => '%FDF-1.2' + end + end + +end From 9cf372f293ea7f617330ce410b964f7440bf0fe4 Mon Sep 17 00:00:00 2001 From: petervwyatt <26521615+petervwyatt@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:47:46 +1000 Subject: [PATCH 6/9] Fixups --- lib/rouge/lexers/pdf.rb | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/rouge/lexers/pdf.rb b/lib/rouge/lexers/pdf.rb index c80dfcd95c..37f7240240 100644 --- a/lib/rouge/lexers/pdf.rb +++ b/lib/rouge/lexers/pdf.rb @@ -1,16 +1,18 @@ # -*- coding: utf-8 -*- # # frozen_string_literal: true +# vim: set ts=2 sw=2 et: # PDF = Portable Document Format page description language # As defined by ISO 32000-2:2020 including resolved errata from https://pdf-issues.pdfa.org/ # -# The PDF syntax is also know as "COS" and can also be used with FDF (Forms Data Field) files. +# The PDF syntax is also known as "COS" and can be used with FDF (Forms Data Field) files as +# per ISO 32000-2:2020 clause 12.7.8. # # This is a token-based parser ONLY! It is intended to syntax highlight full or partial fragments # of nicely written hand-writteen PDF syntax in documentation such as ISO specifications. It is NOT # intended to cope with real-world PDFs that will contain arbitrary binary data (that form invalid # UTF-8 sequences and generate "ArgumentError: invalid byte sequence in UTF-8" Ruby errors) and -# other types of malformation/syntax errors. +# other types of malformations or syntax errors. # # Author: Peter Wyatt, CTO, PDF Association. 2024 # @@ -19,10 +21,10 @@ module Lexers class Pdf < RegexLexer title "PDF" desc "PDF - Portable Document Format (ISO 32000)" - tag "Pdf" - aliases "fdf", "cos" - filenames "*.pdf", "*.fdf" - mimetypes "application/pdf", "application/fdf" # IANA registered media types + tag 'pdf' + aliases "fdf", 'cos' + filenames '*.pdf', '*.fdf' + mimetypes 'application/pdf', 'application/fdf' # IANA registered media types # PDF and FDF files must start with "%PDF-x.y" or "%FDF-x.y" # where x is the single digit major version and y is the single digit minor version. @@ -33,7 +35,7 @@ def self.detect?(text) # PDF Delimiters (ISO 32000-2:2020, Table 1 and Table 2). # Ruby whitespace "\s" is /[ \t\r\n\f\v]/ which does not include NUL (ISO 32000-2:2020, Table 1). # PDF also support 2 character EOL sequences. - delimiter = %r/\(\)<>\[\]\/%\s/ + # NOT USED: delimiter = %r/\(\)<>\[\]\/%\s/ state :root do # Start-of-file header comment is special (comment is up to EOL) From 24889094faa55875dd118b83fde49bf0fc28c2e3 Mon Sep 17 00:00:00 2001 From: petervwyatt <26521615+petervwyatt@users.noreply.github.com> Date: Tue, 2 Jul 2024 16:05:22 +1000 Subject: [PATCH 7/9] Altered tokens for better color --- lib/rouge/lexers/pdf.rb | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/rouge/lexers/pdf.rb b/lib/rouge/lexers/pdf.rb index 37f7240240..8b38cc89e0 100644 --- a/lib/rouge/lexers/pdf.rb +++ b/lib/rouge/lexers/pdf.rb @@ -35,14 +35,13 @@ def self.detect?(text) # PDF Delimiters (ISO 32000-2:2020, Table 1 and Table 2). # Ruby whitespace "\s" is /[ \t\r\n\f\v]/ which does not include NUL (ISO 32000-2:2020, Table 1). # PDF also support 2 character EOL sequences. - # NOT USED: delimiter = %r/\(\)<>\[\]\/%\s/ state :root do # Start-of-file header comment is special (comment is up to EOL) - rule %r/^%(P|F)DF-\d\.\d.*$/, Comment::Special + rule %r/^%(P|F)DF-\d\.\d.*$/, Comment::Preproc # End-of-file marker comment is special (comment is up to EOL) - rule %r/^%%EOF.*$/, Comment::Special + rule %r/^%%EOF.*$/, Comment::Preproc # PDF only has single-line comments: from "%" to EOL rule %r/%.*$/, Comment::Single @@ -62,7 +61,7 @@ def self.detect?(text) # PDF Name objects - can be empty (i.e., nothing after "/"). # No special processing required for 2-digit hex codes that start with "#". - rule %r/\/[^\(\)<>\[\]\/%\s]*/, Name::Entity + rule %r/\/[^\(\)<>\[\]\/%\s]*/, Name::Other # PDF objects and stream (no checking of object ID) # Note that object number and generation numbers do not have sign. @@ -70,7 +69,7 @@ def self.detect?(text) rule %r/(endstream|endobj|stream)/, Keyword::Declaration # PDF conventional file layout keywords - rule %r/(startxref|trailer|xref)/, Keyword::Constant + rule %r/(startxref|trailer|xref)/, Keyword::Declaration # PDF cross reference section entries (20 bytes including EOL). # Explicit single SPACE separators. @@ -78,7 +77,7 @@ def self.detect?(text) # PDF Indirect reference (lax, allows zero as the object number). # Requires terminating delimiter lookahead to disambiguate from "RG" operator - rule %r/\d+\s\d+\sR(?=[\(\)<>\[\]\/%\s])/, Keyword::Variable + rule %r/\d+\s\d+\sR(?=[\(\)<>\[\]\/%\s])/, Name::Decorator # PDF Real object rule %r/(\-|\+)?([0-9]+\.?|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)/, Num::Float @@ -97,8 +96,10 @@ def self.detect?(text) # PDF literal string. See ISO 32000-2:2020 clause 7.3.4.2 and Table 3 state :stringliteral do - rule %r/\(/, Str, :stringliteral # recursive for internal balanced(!) literal strings + rule %r/\(/, Str, :stringliteral # recursive for internal bracketed strings + rule %r/\\\(/, Str::Escape, :stringliteral # recursive for internal escaped bracketed strings rule %r/\)/, Str, :pop! + rule %r/\\\)/, Str::Escape, :pop! rule %r/\\([0-7]{3}|n|r|t|b|f|\\)/, Str::Escape rule %r/[^\(\)\\]+/, Str end From a8e8c8b0fbadcd2262eae8376fa3566273a1cc77 Mon Sep 17 00:00:00 2001 From: petervwyatt <26521615+petervwyatt@users.noreply.github.com> Date: Tue, 2 Jul 2024 16:05:41 +1000 Subject: [PATCH 8/9] More complex PDF for visual test --- spec/visual/samples/pdf | 58 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 spec/visual/samples/pdf diff --git a/spec/visual/samples/pdf b/spec/visual/samples/pdf new file mode 100644 index 0000000000..40bdca1e75 --- /dev/null +++ b/spec/visual/samples/pdf @@ -0,0 +1,58 @@ +%PDF-1.7 +%©© +1 0 obj +<>/StructTreeRoot null/AA<>>>/Pages 3 0 R>>%comment after dictionary close +endobj +2 0 obj +null%comment after null +endobj +3 0 obj +<null<686932>null[/Dummy](hi3)[(hi4)(hi5)true(hi6)null(hi7)12(hi8)]-1.<>[](hi99)[]null[]<>true<>[<>]<686933>1 0 R[.1 -2 +.3]6 0 R<686934>4 0 R(hi9)2 0 R<>[true]<><686935><>3 0 R<>(hi10)<>null<686936>true(hi11)<686937>(hi12)+.0<686938>] +/Type/Pages/Count 1/Kids[4 0 R%comment after indirect ref +]>>endobj +4 0 obj +<>/ProcSet[null]/ExtGState<>/Font<>>>>>>> +endobj +5 0 obj +<> +stream +BX /BreakMyParser <null<686932>null[/Dummy](hi3)[(hi4)(hi5)true(hi6)null(hi7)12(hi8)]-1.<>[](hi99)[]null[]<>true<>[<>]<686933>[1 2 3]<686934>(hi9)<>[true]<><686935><><>(hi10)<>null<686936>true(hi11)<686937>(hi12)+.0<686938>]>> DP EX +BT/F1 30 Tf 0 Tr 1 0 0 1 10 950 Tm(PDF Ruby Rouge test file)Tj 1 0 0 1 10 900 Tm +(This file must NOT be resaved or modified by any tool!!)Tj ET% 3 colored vector graphic squares that are clipped +/ gs q 40 w 75 75 400 400 re W S % stroke then clip a path with a wide black border +1 0. .0 rg 75 75 200 200 re f 0 1 0 rg 275 75 200 200 re f .0 0 1 rg 275 275 200 200 re f Q +endstream +endobj +6 0 obj<> +endobj +7 0 obj +<%comment after hex string end +/Keywords(PDF,Compacted,Syntax,ISO 32000-2:2020)/CreationDate(D:20200317)/Author(Peter Wyatt)/Creator< 48616e +642d65646974>/Producer<48616e 6 4 2 d 6 5646974>>> +endobj +xref +0 8 +0000000000 65535 f +0000000017 00000 n +0000000332 00000 n +0000000374 00000 n +0000000837 00000 n +0000001198 00000 n +0000002009 00000 n +0000002084 00000 n +trailer +<<18D 6B 641245C033A6E67D93AD879D6EC>]/Size 8>> +startxref + 2403 +%%EOF \ No newline at end of file From 643179c07907953272dfdb493bed06436da2047f Mon Sep 17 00:00:00 2001 From: petervwyatt <26521615+petervwyatt@users.noreply.github.com> Date: Fri, 5 Jul 2024 10:58:09 +1000 Subject: [PATCH 9/9] Added EOL to last line of PDF Added EOL to last line of PDF to pass linelint CI check used by Rouge. This is not required by real PDF files. --- lib/rouge/demos/pdf | 2 +- spec/visual/samples/pdf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rouge/demos/pdf b/lib/rouge/demos/pdf index 849cf59d52..9c8c326987 100644 --- a/lib/rouge/demos/pdf +++ b/lib/rouge/demos/pdf @@ -26,4 +26,4 @@ trailer <<18D6B641245C03F28E67D93AD879D6EC>]>> startxref 371 -%%EOF \ No newline at end of file +%%EOF diff --git a/spec/visual/samples/pdf b/spec/visual/samples/pdf index 40bdca1e75..10da023287 100644 --- a/spec/visual/samples/pdf +++ b/spec/visual/samples/pdf @@ -55,4 +55,4 @@ trailer 45C033A6E67D93AD879D6EC><18D 6B 641245C033A6E67D93AD879D6EC>]/Size 8>> startxref 2403 -%%EOF \ No newline at end of file +%%EOF