Skip to content

Commit

Permalink
Start of PDF merging
Browse files Browse the repository at this point in the history
  • Loading branch information
tatjam committed Sep 28, 2022
1 parent 9858b95 commit afc219a
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 6 deletions.
1 change: 0 additions & 1 deletion src/document/document.nim
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ proc generate*(path: string, preset: Preset): Document =
# PDF files may not actually have the files if the page is empty, we generate
# it anyway as we will superimpose the pdf files later
if strm.isNil:
echo "What in the world?"
discard
else:
npage = load_page(strm).get()
Expand Down
8 changes: 4 additions & 4 deletions src/gui/base.nim
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ def.save_preset()

load_presets()

let x = download("fa906f70-e0e7-4492-ac6a-1b735b2f251c", all_presets[parseUUID("66d6d990-2fd8-4e31-8260-a53c41a71429")])
#[let x = download("fa906f70-e0e7-4492-ac6a-1b735b2f251c", all_presets[parseUUID("66d6d990-2fd8-4e31-8260-a53c41a71429")])
let doc = sync(x)
doc.generate_pdf("output.pdf")
doc.generate_pdf("output.pdf")]#


#[let y = download("d4bd814c-dc0c-4352-b3bd-e37e8b6576d1")
let y = download("d4bd814c-dc0c-4352-b3bd-e37e8b6576d1", all_presets[parseUUID("66d6d990-2fd8-4e31-8260-a53c41a71429")])
let doc2 = sync(y)
doc2.generate_pdf("output-pdf.pdf")]#
doc2.generate_pdf("output-pdf.pdf")


let MARGINS* = 8
Expand Down
114 changes: 114 additions & 0 deletions src/pdf/pdfcombine.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# Allows adding pages to PDF files / adding content on top of existing pages
# very barebones implementation, it would be a better idea to wrap a PDF library that
# can read files, but it works
import std/tables
import std/streams
import std/parseutils
import std/strutils
import json

type PDFObjType = enum
ARRAY,
TABLE,
OTHER

# We only really care about array and table objects
type PDFObj = ref object
case kind: PDFObjType
of ARRAY:
elems: seq[PDFObj]
of TABLE:
children: Table[string, PDFObj]
of OTHER: discard


type PDFMap = ref object
objects: seq[PDFObj]

# Ignores comments
proc get_next_line(f: FileStream): string =
var in_comment = false
var line_start = true
var c: char
var line: string
while true:
c = f.readChar()
line.add(c)
if c == '%' and line_start == true:
in_comment = true

if line_start == true:
line_start = false

if c == '\n':
line_start = true
if not in_comment:
return line
in_comment = false
line = ""

# We convert the weird syntax into JSON and parse it
proc parse_table(f: FileStream): JsonNode =
var as_json = "{"
var depth = 0
assert f.get_next_line().startsWith("<<")
while true:
let line = f.get_next_line().unindent()
if line.startsWith(">>"):
# remove ',' from last elem
as_json.delete(as_json.len - 2, as_json.len - 2)
if depth != 0:
as_json.add("},\n")
depth = depth - 1
continue
else:
as_json.add("}\n")
echo as_json
return parseJson(as_json)
var separator_loc = line.find(" ")
assert separator_loc >= 0
var key = line.substr(1, separator_loc)
as_json.add("\"" & key & "\": ")
# Value is a bit more complicated as it may be many stuff, but for parsing
# we lump everything into a string EXCEPT sub dictionaries
var value = line.substr(separator_loc + 1)
value.removeSuffix({'\n', '\r', ' '})
if value.startsWith("<<"):
depth = depth + 1
as_json.add("{\n")
else:
as_json.add("\"" & value & "\",\n")



# TODO: This could break on HUGE pdf files?
proc get_uid(first_num: int, second_num: int): uint64 =
return first_num.uint64 + second_num.uint64 * 4294967296'u64

# We only parse obj streams. TODO: This could fail on some very weird PDFs
proc parse_pdf*(path: string): PDFMap =
var file = newFileStream(path, fmRead)

while true:
var line = file.get_next_line()
# Parse an object
if line.endsWith(" obj\n") or line.endsWith(" obj\r\n"):
# First line defines object ID
var first_num, second_num: int
let advance = line.parseInt(first_num)
line = line.substr(advance)
discard line.parseInt(second_num)
let object_uid = get_uid(first_num, second_num)

# next lines should be a table
let table = file.parse_table()


file.close()

# page_map indicates wether a page in over goes into a new page (true)
# or if it goes over the old page (ie an overlay)
# Afterwards we COULD linearize or optimize the PDF for performance, but for relatively
# small updates (which these are, a few lines on top of a long pdf) it should be good
proc overlap_pdf*(base: string, over: string, page_map: seq[bool]) =
discard
6 changes: 5 additions & 1 deletion src/remarkablenim.nim
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
import gui/base
#import gui/base

import pdf/pdfcombine

discard parse_pdf("./retmp/data/d4bd814c-dc0c-4352-b3bd-e37e8b6576d1.pdf")

0 comments on commit afc219a

Please sign in to comment.