-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
199 lines (169 loc) · 7.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
#!/usr/bin/env python3
import argparse
import re
import tempfile
import os
import json
import pypdf
import time
import subprocess
import glob
import shutil
from remarkable import Remarkable
from datetime import datetime
def parse_args():
a = argparse.ArgumentParser(description="Writes pdfs from folder to reMarkable cloud")
a.add_argument('--max-save-count', type=int, default=None, help='Maximum number of articles to save on device')
a.add_argument('--delete-already-read', action='store_true', help='Delete articles in reMarkable cloud which are already read')
a.add_argument('--delete-unread-after-hours', type=int, default=None, help='If an article has not been opened for this many hours on the device and there are new articles to add, will delete. Set to -1 to disable, or 0 to always replace old articles.')
a.add_argument('--remarkable-folder', default=None, help='Folder title to write to on Remarkable')
a.add_argument('--glob', default=None, help='Local glob for files to upload')
a.add_argument('--remarkable-auth-token', help='For initial authentication with reMarkable: device token')
a.add_argument('--config-folder', help='Configuration folder for remarkable-mirror')
a.add_argument('--tmp-folder', help='Temporary storage folder for remarkable-mirror')
a.add_argument('--remarkable-relogin-command', help='Command to run when relogin is required to remarkable (e.g. send a notification)', default=None)
return a.parse_args()
def parse_filename(fn):
# Find ID in final brackets
pattern = r"\[([^\[\]]*)\][^\[\]]*$"
match = re.search(pattern, fn)
if match:
return match.group(1)
return None
def main(args):
try:
rm = Remarkable()
rm.auth_if_needed(args.remarkable_auth_token)
except Exception as e:
if args.remarkable_relogin_command:
subprocess.run(['/bin/bash', '-c', args.remarkable_relogin_command])
raise e
if not rm.is_auth():
if args.remarkable_relogin_command:
subprocess.run(['/bin/bash', '-c', args.remarkable_relogin_command])
ls = []
try:
ls = rm.ls(args.remarkable_folder)
except FileNotFoundError:
rm.mkdir(args.remarkable_folder)
ls = []
if not args.config_folder:
args.config_folder = os.path.join(os.path.expanduser('~'), '.config', 'remarkable-mirror')
if not os.path.exists(args.config_folder):
os.makedirs(args.config_folder)
print(f'Set --config-folder to {args.config_folder}')
print(f'Existing files in {args.remarkable_folder}: {ls}')
db_file = os.path.join(args.config_folder, 'db_file.json')
already_downloaded_ids = set()
article_data = {}
if os.path.exists(db_file):
article_data = json.loads(open(db_file, 'r').read())
already_downloaded_ids = list(article_data.keys())
existing_ids = set()
files_to_delete = set()
delete_if_needed = {}
now_ts = time.time()
for file in ls:
id = parse_filename(file)
if id:
existing_ids.add(id)
if id in article_data:
added_ts = article_data.get(id)['added']
num_pages = article_data.get(id)['num_pages']
stat = rm.stat(f'{args.remarkable_folder}/{file}')
print(f"Check: {file} is on page {1+stat['CurrentPage']} of {num_pages} total")
if args.delete_already_read:
if 1 + stat['CurrentPage'] == num_pages:
print(f"Will delete {file} since already read")
files_to_delete.add(f'{args.remarkable_folder}/{file}')
if stat['CurrentPage'] == 0:
unread_hrs = (now_ts - added_ts) / 60 / 60
if args.delete_unread_after_hours is not None and args.delete_unread_after_hours >= 0 and unread_hrs >= args.delete_unread_after_hours:
print(f"Article not opened after {unread_hrs} hrs, will delete if needed: {file}")
delete_if_needed[id] = f'{args.remarkable_folder}/{file}'
print(f'{existing_ids=}')
print(f'{delete_if_needed.keys()=}')
if args.delete_already_read:
print(f'{files_to_delete=}')
def to_filename(post):
return f"{os.path.basename(post)}"
new_ids = set()
fetched_ids = set()
fetched_old_ids = set()
all_posts = []
files = glob.glob(args.glob)
for post in files:
id = str(post)
fetched_ids.add(id)
if id not in existing_ids:
if id not in already_downloaded_ids:
if args.max_save_count is None or (len(new_ids) + len(existing_ids) < args.max_save_count) or args.max_save_count < 0:
print(f'Found new file: {id}: {to_filename(post)}')
new_ids.add(id)
elif len(delete_if_needed) > 0 and args.delete_unread_after_hours is not None and args.delete_unread_after_hours >= 0:
delete_id = list(sorted(list(delete_if_needed.keys())))[0]
print(f'Article in delete_if_needed dropped: {delete_id} {delete_if_needed[delete_id]}')
files_to_delete.add(delete_if_needed[delete_id])
del delete_if_needed[delete_id]
print(f'Found new file: {id}: {to_filename(post)}')
new_ids.add(id)
else:
print(f'Found but not uploading new file (no space): {id}: {to_filename(post)}')
else:
print(f'File already read: {id}: {to_filename(post)}')
else:
fetched_old_ids.add(id)
print(f'File already on remarkable: {id}: {to_filename(post)}')
all_posts.append(post)
print(f'{fetched_ids=}')
print(f'{fetched_old_ids=}')
print(f'{new_ids=}')
dir = tempfile.gettempdir()
if args.tmp_folder:
dir = args.tmp_folder
to_upload = []
for post in all_posts:
id = str(post)
if id in new_ids:
output_file = os.path.join(dir, to_filename(post))
print(f"Downloading {post} to pdf {output_file}")
shutil.copy2(post, output_file)
if not os.path.exists(output_file):
print(f"Unable to download {post} to {output_file}. Skipping")
continue
num_pages = get_num_pages(output_file)
article_data[id] = {
'id': id,
'num_pages': num_pages,
'canonical_url': post,
'filename': to_filename(post),
'added': now_ts
}
to_upload.append(output_file)
print(f"Copy complete: {article_data[id]}")
print(f'Uploading: {to_upload}')
for f in to_upload:
print(f'Uploading {f} to {args.remarkable_folder}')
rm.put(f, args.remarkable_folder)
print('Upload complete')
if args.delete_already_read and len(files_to_delete) > 0:
print('Deleting old files')
for path in files_to_delete:
print(f'Deleting {path}')
assert path.startswith(f'{args.remarkable_folder}/')
assert '../' not in path
assert '/..' not in path
assert len(path) > 2 + len(args.remarkable_folder)
rm.rm(path)
id = parse_filename(path)
if id and id in article_data:
article_data[id]['deleted'] = now_ts
with open(db_file, 'w') as f:
f.write(json.dumps(article_data))
def get_num_pages(path):
with open(path, 'rb') as f:
r = pypdf.PdfReader(f)
return len(r.pages)
if __name__ == '__main__':
args = parse_args()
main(args)