forked from zpaq/zpaq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzpaq.cpp
3881 lines (3571 loc) · 122 KB
/
zpaq.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// zpaq.cpp - Journaling incremental deduplicating archiver
#define ZPAQ_VERSION "7.05"
/*
This software is provided as-is, with no warranty.
I, Matt Mahoney, release this software into
the public domain. This applies worldwide.
In some countries this may not be legally possible; if so:
I grant anyone the right to use this software for any purpose,
without any conditions, unless such conditions are required by law.
zpaq is a journaling (append-only) archiver for incremental backups.
Files are added only when the last-modified date has changed. Both the old
and new versions are saved. You can extract from old versions of the
archive by specifying a date or version number. zpaq supports 5
compression levels, deduplication, AES-256 encryption, and multi-threading
using an open, self-describing format for backward and forward
compatibility in Windows and Linux. See zpaq.pod for usage.
Undocumented options:
-method x... Advanced compression modes, see libzpaq.h
-method s... same as x but in streaming mode.
-detailed show fragment IDs in list.
TO COMPILE:
This program needs libzpaq from http://mattmahoney.net/zpaq/
Recommended compile for Windows with MinGW:
g++ -O3 zpaq.cpp libzpaq.cpp -o zpaq
With Visual C++:
cl /O2 /EHsc zpaq.cpp libzpaq.cpp advapi32.lib
For Linux:
g++ -O3 -Dunix zpaq.cpp libzpaq.cpp -pthread -o zpaq
For BSD or OS/X
g++ -O3 -Dunix -DBSD zpaq.cpp libzpaq.cpp -pthread -o zpaq
Possible options:
-o Name of output executable.
-O3 or /O2 Optimize (faster).
/EHsc Enable exception handing in VC++ (required).
-s Strip debugging symbols. Smaller executable.
/arch:SSE2 Assume x86 processor with SSE2. Otherwise use -DNOJIT.
-msse2 Same. Implied by -m64 for a x86-64 target.
-DNOJIT Don't assume x86 with SSE2 for libzpaq. Slower (disables JIT).
-static Don't assume C++ runtime on target. Bigger executable but safer.
-Dunix Not Windows. Sometimes automatic in Linux. Needed for Mac OS/X.
-fopenmp Parallel divsufsort (faster, implies -pthread, broken in MinGW).
-pthread Required in Linux, implied by -fopenmp.
-DDEBUG Enable run time checks and help screen for undocumented options.
-DPTHREAD Use Pthreads instead of Windows threads. Requires pthreadGC2.dll
or pthreadVC2.dll from http://sourceware.org/pthreads-win32/
-Dunixtest To make -Dunix work in Windows with MinGW.
-Wl,--large-address-aware To make 3 GB available in 32 bit Windows.
-DXP Support Windows XP (disables alternate data stream handling).
*/
#define _FILE_OFFSET_BITS 64 // In Linux make sizeof(off_t) == 8
#define UNICODE // For Windows
#include "libzpaq.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include <stdint.h>
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include <stdexcept>
#include <fcntl.h>
#ifndef DEBUG
#define NDEBUG 1
#endif
#include <assert.h>
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
#ifndef unix
#define unix 1
#endif
#endif
#ifdef unix
#define PTHREAD 1
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>
#include <dirent.h>
#include <utime.h>
#include <errno.h>
#ifdef BSD
#include <sys/sysctl.h>
#endif
#ifdef unixtest
struct termios {
int c_lflag;
};
#define ECHO 1
#define ECHONL 2
#define TCSANOW 4
int tcgetattr(int, termios*) {return 0;}
int tcsetattr(int, int, termios*) {return 0;}
#else
#include <termios.h>
#endif
#else // Assume Windows
#include <windows.h>
#include <io.h>
#endif
using std::string;
using std::vector;
using std::map;
using std::min;
using std::max;
using libzpaq::StringBuffer;
// Handle errors in libzpaq and elsewhere
void libzpaq::error(const char* msg) {
if (strstr(msg, "ut of memory")) throw std::bad_alloc();
throw std::runtime_error(msg);
}
using libzpaq::error;
// Portable thread types and functions for Windows and Linux. Use like this:
//
// // Create mutex for locking thread-unsafe code
// Mutex mutex; // shared by all threads
// init_mutex(mutex); // initialize in unlocked state
// Semaphore sem(n); // n >= 0 is initial state
//
// // Declare a thread function
// ThreadReturn thread(void *arg) { // arg points to in/out parameters
// lock(mutex); // wait if another thread has it first
// release(mutex); // allow another waiting thread to continue
// sem.wait(); // wait until n>0, then --n
// sem.signal(); // ++n to allow waiting threads to continue
// return 0; // must return 0 to exit thread
// }
//
// // Start a thread
// ThreadID tid;
// run(tid, thread, &arg); // runs in parallel
// join(tid); // wait for thread to return
// destroy_mutex(mutex); // deallocate resources used by mutex
// sem.destroy(); // deallocate resources used by semaphore
#ifdef PTHREAD
#include <pthread.h>
typedef void* ThreadReturn; // job return type
typedef pthread_t ThreadID; // job ID type
void run(ThreadID& tid, ThreadReturn(*f)(void*), void* arg)// start job
{pthread_create(&tid, NULL, f, arg);}
void join(ThreadID tid) {pthread_join(tid, NULL);} // wait for job
typedef pthread_mutex_t Mutex; // mutex type
void init_mutex(Mutex& m) {pthread_mutex_init(&m, 0);} // init mutex
void lock(Mutex& m) {pthread_mutex_lock(&m);} // wait for mutex
void release(Mutex& m) {pthread_mutex_unlock(&m);} // release mutex
void destroy_mutex(Mutex& m) {pthread_mutex_destroy(&m);} // destroy mutex
class Semaphore {
public:
Semaphore() {sem=-1;}
void init(int n) {
assert(n>=0);
assert(sem==-1);
pthread_cond_init(&cv, 0);
pthread_mutex_init(&mutex, 0);
sem=n;
}
void destroy() {
assert(sem>=0);
pthread_mutex_destroy(&mutex);
pthread_cond_destroy(&cv);
}
int wait() {
assert(sem>=0);
pthread_mutex_lock(&mutex);
int r=0;
if (sem==0) r=pthread_cond_wait(&cv, &mutex);
assert(sem>0);
--sem;
pthread_mutex_unlock(&mutex);
return r;
}
void signal() {
assert(sem>=0);
pthread_mutex_lock(&mutex);
++sem;
pthread_cond_signal(&cv);
pthread_mutex_unlock(&mutex);
}
private:
pthread_cond_t cv; // to signal FINISHED
pthread_mutex_t mutex; // protects cv
int sem; // semaphore count
};
#else // Windows
typedef DWORD ThreadReturn;
typedef HANDLE ThreadID;
void run(ThreadID& tid, ThreadReturn(*f)(void*), void* arg) {
tid=CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)f, arg, 0, NULL);
if (tid==NULL) error("CreateThread failed");
}
void join(ThreadID& tid) {WaitForSingleObject(tid, INFINITE);}
typedef HANDLE Mutex;
void init_mutex(Mutex& m) {m=CreateMutex(NULL, FALSE, NULL);}
void lock(Mutex& m) {WaitForSingleObject(m, INFINITE);}
void release(Mutex& m) {ReleaseMutex(m);}
void destroy_mutex(Mutex& m) {CloseHandle(m);}
class Semaphore {
public:
enum {MAXCOUNT=2000000000};
Semaphore(): h(NULL) {}
void init(int n) {assert(!h); h=CreateSemaphore(NULL, n, MAXCOUNT, NULL);}
void destroy() {assert(h); CloseHandle(h);}
int wait() {assert(h); return WaitForSingleObject(h, INFINITE);}
void signal() {assert(h); ReleaseSemaphore(h, 1, NULL);}
private:
HANDLE h; // Windows semaphore
};
#endif
#ifdef _MSC_VER // Microsoft C++
#define fseeko(a,b,c) _fseeki64(a,b,c)
#define ftello(a) _ftelli64(a)
#else
#ifndef unix
#ifndef fseeko
#define fseeko(a,b,c) fseeko64(a,b,c)
#endif
#ifndef ftello
#define ftello(a) ftello64(a)
#endif
#endif
#endif
// For testing -Dunix in Windows
#ifdef unixtest
#define lstat(a,b) stat(a,b)
#define mkdir(a,b) mkdir(a)
#ifndef fseeko
#define fseeko(a,b,c) fseeko64(a,b,c)
#endif
#ifndef ftello
#define ftello(a) ftello64(a)
#endif
#endif
// Global variables
int64_t global_start=0; // set to mtime() at start of main()
// signed size of a string or vector
template <typename T> int size(const T& x) {
return x.size();
}
// In Windows, convert 16-bit wide string to UTF-8 and \ to /
#ifndef unix
string wtou(const wchar_t* s) {
assert(sizeof(wchar_t)==2); // Not true in Linux
assert((wchar_t)(-1)==65535);
string r;
if (!s) return r;
for (; *s; ++s) {
if (*s=='\\') r+='/';
else if (*s<128) r+=*s;
else if (*s<2048) r+=192+*s/64, r+=128+*s%64;
else r+=224+*s/4096, r+=128+*s/64%64, r+=128+*s%64;
}
return r;
}
// In Windows, convert UTF-8 string to wide string ignoring
// invalid UTF-8 or >64K. If doslash then convert "/" to "\".
std::wstring utow(const char* ss, bool doslash=false) {
assert(sizeof(wchar_t)==2);
assert((wchar_t)(-1)==65535);
std::wstring r;
if (!ss) return r;
const unsigned char* s=(const unsigned char*)ss;
for (; s && *s; ++s) {
if (s[0]=='/' && doslash) r+='\\';
else if (s[0]<128) r+=s[0];
else if (s[0]>=192 && s[0]<224 && s[1]>=128 && s[1]<192)
r+=(s[0]-192)*64+s[1]-128, ++s;
else if (s[0]>=224 && s[0]<240 && s[1]>=128 && s[1]<192
&& s[2]>=128 && s[2]<192)
r+=(s[0]-224)*4096+(s[1]-128)*64+s[2]-128, s+=2;
}
return r;
}
#endif
// Print a UTF-8 string to f (stdout, stderr) so it displays properly
void printUTF8(const char* s, FILE* f=stdout) {
assert(f);
assert(s);
#ifdef unix
fprintf(f, "%s", s);
#else
const HANDLE h=(HANDLE)_get_osfhandle(_fileno(f));
DWORD ft=GetFileType(h);
if (ft==FILE_TYPE_CHAR) {
fflush(f);
std::wstring w=utow(s); // Windows console: convert to UTF-16
DWORD n=0;
WriteConsole(h, w.c_str(), w.size(), &n, 0);
}
else // stdout redirected to file
fprintf(f, "%s", s);
#endif
}
// Return relative time in milliseconds
int64_t mtime() {
#ifdef unix
timeval tv;
gettimeofday(&tv, 0);
return tv.tv_sec*1000LL+tv.tv_usec/1000;
#else
int64_t t=GetTickCount();
if (t<global_start) t+=0x100000000LL;
return t;
#endif
}
// Convert 64 bit decimal YYYYMMDDHHMMSS to "YYYY-MM-DD HH:MM:SS"
// where -1 = unknown date, 0 = deleted.
string dateToString(int64_t date) {
if (date<=0) return " ";
string s="0000-00-00 00:00:00";
static const int t[]={18,17,15,14,12,11,9,8,6,5,3,2,1,0};
for (int i=0; i<14; ++i) s[t[i]]+=int(date%10), date/=10;
return s;
}
// Convert attributes to a readable format
string attrToString(int64_t attrib) {
string r=" ";
if ((attrib&255)=='u') {
r[0]="0pc3d5b7 9lBsDEF"[(attrib>>20)&15];
for (int i=0; i<4; ++i)
r[4-i]=(attrib>>(8+3*i))%8+'0';
}
else if ((attrib&255)=='w') {
for (int i=0, j=0; i<32; ++i) {
if ((attrib>>(i+8))&1) {
char c="RHS DAdFTprCoIEivs89012345678901"[i];
if (j<5) r[j]=c;
else r+=c;
++j;
}
}
}
return r;
}
// Convert seconds since 0000 1/1/1970 to 64 bit decimal YYYYMMDDHHMMSS
// Valid from 1970 to 2099.
int64_t decimal_time(time_t tt) {
if (tt==-1) tt=0;
int64_t t=(sizeof(tt)==4) ? unsigned(tt) : tt;
const int second=t%60;
const int minute=t/60%60;
const int hour=t/3600%24;
t/=86400; // days since Jan 1 1970
const int term=t/1461; // 4 year terms since 1970
t%=1461;
t+=(t>=59); // insert Feb 29 on non leap years
t+=(t>=425);
t+=(t>=1157);
const int year=term*4+t/366+1970; // actual year
t%=366;
t+=(t>=60)*2; // make Feb. 31 days
t+=(t>=123); // insert Apr 31
t+=(t>=185); // insert June 31
t+=(t>=278); // insert Sept 31
t+=(t>=340); // insert Nov 31
const int month=t/31+1;
const int day=t%31+1;
return year*10000000000LL+month*100000000+day*1000000
+hour*10000+minute*100+second;
}
// Convert decimal date to time_t - inverse of decimal_time()
time_t unix_time(int64_t date) {
if (date<=0) return -1;
static const int days[12]={0,31,59,90,120,151,181,212,243,273,304,334};
const int year=date/10000000000LL%10000;
const int month=(date/100000000%100-1)%12;
const int day=date/1000000%100;
const int hour=date/10000%100;
const int min=date/100%100;
const int sec=date%100;
return (day-1+days[month]+(year%4==0 && month>1)+((year-1970)*1461+1)/4)
*86400+hour*3600+min*60+sec;
}
/////////////////////////////// File //////////////////////////////////
// Convert non-negative decimal number x to string of at least n digits
string itos(int64_t x, int n=1) {
assert(x>=0);
assert(n>=0);
string r;
for (; x || n>0; x/=10, --n) r=string(1, '0'+x%10)+r;
return r;
}
// Replace * and ? in fn with part or digits of part
string subpart(string fn, int part) {
for (int j=fn.size()-1; j>=0; --j) {
if (fn[j]=='?')
fn[j]='0'+part%10, part/=10;
else if (fn[j]=='*')
fn=fn.substr(0, j)+itos(part)+fn.substr(j+1), part=0;
}
return fn;
}
// Return true if a file or directory (UTF-8 without trailing /) exists.
// If part>0 then replace * and ? in filename with part or its digits.
bool exists(string filename, int part=0) {
if (part>0) filename=subpart(filename, part);
int len=filename.size();
if (len<1) return false;
if (filename[len-1]=='/') filename=filename.substr(0, len-1);
#ifdef unix
struct stat sb;
return !lstat(filename.c_str(), &sb);
#else
return GetFileAttributes(utow(filename.c_str(), true).c_str())
!=INVALID_FILE_ATTRIBUTES;
#endif
}
// Delete a file, return true if successful
bool delete_file(const char* filename) {
#ifdef unix
return remove(filename)==0;
#else
return DeleteFile(utow(filename, true).c_str());
#endif
}
#ifdef unix
// Print last error message
void printerr(const char* filename) {
perror(filename);
}
#else
// Print last error message
void printerr(const char* filename) {
fflush(stdout);
int err=GetLastError();
printUTF8(filename, stderr);
if (err==ERROR_FILE_NOT_FOUND)
fprintf(stderr, ": file not found\n");
else if (err==ERROR_PATH_NOT_FOUND)
fprintf(stderr, ": path not found\n");
else if (err==ERROR_ACCESS_DENIED)
fprintf(stderr, ": access denied\n");
else if (err==ERROR_SHARING_VIOLATION)
fprintf(stderr, ": sharing violation\n");
else if (err==ERROR_BAD_PATHNAME)
fprintf(stderr, ": bad pathname\n");
else if (err==ERROR_INVALID_NAME)
fprintf(stderr, ": invalid name\n");
else if (err==ERROR_NETNAME_DELETED)
fprintf(stderr, ": network name no longer available\n");
else
fprintf(stderr, ": Windows error %d\n", err);
}
// Set the last-modified date of an open file handle
void setDate(HANDLE out, int64_t date) {
if (date>0) {
SYSTEMTIME st;
FILETIME ft;
st.wYear=date/10000000000LL%10000;
st.wMonth=date/100000000%100;
st.wDayOfWeek=0; // ignored
st.wDay=date/1000000%100;
st.wHour=date/10000%100;
st.wMinute=date/100%100;
st.wSecond=date%100;
st.wMilliseconds=0;
SystemTimeToFileTime(&st, &ft);
if (!SetFileTime(out, NULL, NULL, &ft)) {
fflush(stdout);
fprintf(stderr, "SetFileTime error %d\n", int(GetLastError()));
}
}
}
#endif
// Print file open error and throw exception
void ioerr(const char* msg) {
printerr(msg);
throw std::runtime_error(msg);
}
// Create directories as needed. For example if path="/tmp/foo/bar"
// then create directories /, /tmp, and /tmp/foo unless they exist.
// Set date and attributes if not 0.
void makepath(string path, int64_t date=0, int64_t attr=0) {
for (int i=0; i<size(path); ++i) {
if (path[i]=='\\' || path[i]=='/') {
path[i]=0;
#ifdef unix
mkdir(path.c_str(), 0777);
#else
CreateDirectory(utow(path.c_str(), true).c_str(), 0);
#endif
path[i]='/';
}
}
// Set date and attributes
string filename=path;
if (filename!="" && filename[filename.size()-1]=='/')
filename=filename.substr(0, filename.size()-1); // remove trailing slash
#ifdef unix
if (date>0) {
struct utimbuf ub;
ub.actime=time(NULL);
ub.modtime=unix_time(date);
utime(filename.c_str(), &ub);
}
if ((attr&255)=='u')
chmod(filename.c_str(), attr>>8);
#else
for (int i=0; i<size(filename); ++i) // change to backslashes
if (filename[i]=='/') filename[i]='\\';
if (date>0) {
HANDLE out=CreateFile(utow(filename.c_str(), true).c_str(),
FILE_WRITE_ATTRIBUTES, 0, NULL, OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS, NULL);
if (out!=INVALID_HANDLE_VALUE) {
setDate(out, date);
CloseHandle(out);
}
else printerr(filename.c_str());
}
if ((attr&255)=='w') {
SetFileAttributes(utow(filename.c_str(), true).c_str(), attr>>8);
}
#endif
}
// abstract Writer with tell()
class CounterBase: public libzpaq::Writer {
public:
virtual int64_t tell() const = 0;
};
// Count bytes written and discard them
struct Counter: public CounterBase {
int64_t pos; // count of written bytes
Counter(): pos(0) {}
void put(int c) {++pos;}
void write(const char* bufp, int size) {pos+=size;}
int64_t tell() const {return pos;}
};
// Base class of InputFile and OutputFile (OS independent)
class File {
protected:
enum {BUFSIZE=1<<16}; // buffer size
int ptr; // next byte to read or write in buf
libzpaq::Array<char> buf; // I/O buffer
libzpaq::AES_CTR *aes; // if not NULL then encrypt
int64_t eoff; // extra offset for multi-file encryption
File(): ptr(0), buf(BUFSIZE), aes(0), eoff(0) {}
};
// File types accepting UTF-8 filenames
#ifdef unix
class InputFile: public File, public libzpaq::Reader {
FILE* in;
int n; // number of bytes in buf
public:
InputFile(): in(0), n(0) {}
// Open file for reading. Return true if successful.
// If aes then encrypt with aes+eoff.
bool open(const char* filename, libzpaq::AES_CTR* a=0, int64_t e=0) {
in=fopen(filename, "rb");
aes=a;
eoff=e;
n=ptr=0;
return in!=0;
}
// True if open
bool isopen() const {return in!=0;}
// Read into bufp[0..sz-1]
int read(char* bufp, int sz);
// Read and return 1 byte (0..255) or EOF
int get() {
assert(in);
if (ptr>=n) {
assert(ptr==n);
n=fread(&buf[0], 1, BUFSIZE, in);
ptr=0;
if (aes) {
int64_t off=tell()+eoff;
if (off<32) error("attempt to read salt");
aes->encrypt(&buf[0], n, off);
}
if (!n) return EOF;
}
assert(ptr<n);
return buf[ptr++]&255;
}
// Return file position
int64_t tell() const {
return ftello(in)-n+ptr;
}
// Set file position
void seek(int64_t pos, int whence) {
if (whence==SEEK_CUR) {
whence=SEEK_SET;
pos+=tell();
}
fseeko(in, pos, whence);
n=ptr=0;
}
// Close file if open
void close() {if (in) fclose(in), in=0;}
~InputFile() {close();}
};
class OutputFile: public File, public CounterBase {
FILE* out;
string filename;
public:
OutputFile(): out(0) {}
// Return true if file is open
bool isopen() const {return out!=0;}
// Open for append/update or create if needed.
// If aes then encrypt with aes+eoff.
bool open(const char* filename, libzpaq::AES_CTR* a=0, int64_t e=0) {
assert(!isopen());
ptr=0;
this->filename=filename;
out=fopen(filename, "rb+");
if (!out) out=fopen(filename, "wb+");
aes=a;
eoff=e;
if (out) fseeko(out, 0, SEEK_END);
return isopen();
}
// Flush pending output
void flush() {
if (ptr) {
assert(isopen());
assert(ptr>0 && ptr<=BUFSIZE);
if (aes) {
int64_t off=ftello(out)+eoff;
if (off<32) error("attempt to overwrite salt");
aes->encrypt(&buf[0], ptr, off);
}
int n=fwrite(&buf[0], 1, ptr, out);
if (n!=ptr) {
perror(filename.c_str());
error("write failed");
}
ptr=0;
}
}
// Write 1 byte
void put(int c) {
assert(isopen());
if (ptr>=BUFSIZE) {
assert(ptr==BUFSIZE);
flush();
}
assert(ptr>=0 && ptr<BUFSIZE);
buf[ptr++]=c;
}
// Write bufp[0..size-1]
void write(const char* bufp, int size);
// Write size bytes at offset
void write(const char* bufp, int64_t pos, int size) {
assert(isopen());
flush();
fseeko(out, pos, SEEK_SET);
write(bufp, size);
}
// Seek to pos. whence is SEEK_SET, SEEK_CUR, or SEEK_END
void seek(int64_t pos, int whence) {
assert(isopen());
flush();
fseeko(out, pos, whence);
}
// return position
int64_t tell() const {
assert(isopen());
return ftello(out)+ptr;
}
// Truncate file and move file pointer to end
void truncate(int64_t newsize=0) {
assert(isopen());
seek(newsize, SEEK_SET);
if (ftruncate(fileno(out), newsize)) perror("ftruncate");
}
// Close file and set date if not 0. Set permissions if attr low byte is 'u'
void close(int64_t date=0, int64_t attr=0) {
if (out) {
flush();
fclose(out);
}
out=0;
if (date>0) {
struct utimbuf ub;
ub.actime=time(NULL);
ub.modtime=unix_time(date);
utime(filename.c_str(), &ub);
}
if ((attr&255)=='u')
chmod(filename.c_str(), attr>>8);
}
~OutputFile() {close();}
};
#else // Windows
class InputFile: public File, public libzpaq::Reader {
HANDLE in; // input file handle
DWORD n; // buffer size
public:
InputFile():
in(INVALID_HANDLE_VALUE), n(0) {}
// Open for reading. Return true if successful.
// Encrypt with aes+e if aes.
bool open(const char* filename, libzpaq::AES_CTR* a=0, int64_t e=0) {
assert(in==INVALID_HANDLE_VALUE);
n=ptr=0;
std::wstring w=utow(filename, true);
in=CreateFile(w.c_str(), GENERIC_READ,
FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE, NULL,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
aes=a;
eoff=e;
return in!=INVALID_HANDLE_VALUE;
}
bool isopen() const {return in!=INVALID_HANDLE_VALUE;}
// Read 1 byte
int get() {
if (ptr>=int(n)) {
assert(ptr==int(n));
ptr=0;
ReadFile(in, &buf[0], BUFSIZE, &n, NULL);
if (n==0) return EOF;
if (aes) {
int64_t off=tell()+eoff;
if (off<32) error("attempt to read salt");
aes->encrypt(&buf[0], n, off);
}
}
assert(ptr<int(n));
return buf[ptr++]&255;
}
// Read into bufp[0..sz-1]
int read(char* bufp, int sz);
// set file pointer
void seek(int64_t pos, int whence) {
if (whence==SEEK_SET) whence=FILE_BEGIN;
else if (whence==SEEK_END) whence=FILE_END;
else if (whence==SEEK_CUR) {
whence=FILE_BEGIN;
pos+=tell();
}
LONG offhigh=pos>>32;
SetFilePointer(in, pos, &offhigh, whence);
n=ptr=0;
}
// get file pointer
int64_t tell() const {
LONG offhigh=0;
DWORD r=SetFilePointer(in, 0, &offhigh, FILE_CURRENT);
return (int64_t(offhigh)<<32)+r+ptr-n;
}
// Close handle if open
void close() {
if (in!=INVALID_HANDLE_VALUE) {
CloseHandle(in);
in=INVALID_HANDLE_VALUE;
}
}
~InputFile() {close();}
};
class OutputFile: public File, public CounterBase {
HANDLE out; // output file handle
std::wstring filename;
public:
OutputFile(): out(INVALID_HANDLE_VALUE) {}
// Return true if file is open
bool isopen() const {
return out!=INVALID_HANDLE_VALUE;
}
// Open file ready to update or append, create if needed.
// If aes then encrypt with aes+e.
bool open(const char* filename_, libzpaq::AES_CTR* a=0, int64_t e=0) {
assert(!isopen());
ptr=0;
filename=utow(filename_, true); // replace slashes with backslashes
out=CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE,
0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if (!isopen() && GetLastError()==ERROR_INVALID_NAME) {
for (int i=0; i<size(filename); ++i) { // replace invalid chars in name
wchar_t& c=filename[i];
if (c<32 || c=='<' || c=='>' || c=='*' || c=='"') c='_';
if (c=='?' && (i!=2 || filename[0]!='\\' || filename[1]!='\\')) c='_';
if (c==':' && i!=1 && (i!=5 || filename.substr(0, 4)!=L"\\\\?\\"))
c='_';
}
out=CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE,
0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
}
LONG hi=0;
aes=a;
eoff=e;
if (isopen()) SetFilePointer(out, 0, &hi, FILE_END);
return isopen();
}
// Write pending output
void flush() {
assert(isopen());
if (ptr) {
DWORD n=0;
if (aes) {
int64_t off=tell()-ptr+eoff;
if (off<32) error("attempt to overwrite salt");
aes->encrypt(&buf[0], ptr, off);
}
WriteFile(out, &buf[0], ptr, &n, NULL);
if (ptr!=int(n)) error("write failed");
ptr=0;
}
}
// Write 1 byte
void put(int c) {
assert(isopen());
if (ptr>=BUFSIZE) {
assert(ptr==BUFSIZE);
flush();
}
buf[ptr++]=c;
}
// Write bufp[0..size-1]
void write(const char* bufp, int size);
// Write size bytes at offset
void write(const char* bufp, int64_t pos, int size) {
assert(isopen());
flush();
if (pos!=tell()) seek(pos, SEEK_SET);
write(bufp, size);
}
// set file pointer
void seek(int64_t pos, int whence) {
if (whence==SEEK_SET) whence=FILE_BEGIN;
else if (whence==SEEK_CUR) whence=FILE_CURRENT;
else if (whence==SEEK_END) whence=FILE_END;
flush();
LONG offhigh=pos>>32;
SetFilePointer(out, pos, &offhigh, whence);
}
// get file pointer
int64_t tell() const {
LONG offhigh=0;
DWORD r=SetFilePointer(out, 0, &offhigh, FILE_CURRENT);
return (int64_t(offhigh)<<32)+r+ptr;
}
// Truncate file and move file pointer to end
void truncate(int64_t newsize=0) {
seek(newsize, SEEK_SET);
SetEndOfFile(out);
}
// Close file and set date if not 0. Set attr if low byte is 'w'.
void close(int64_t date=0, int64_t attr=0) {
if (isopen()) {
flush();
setDate(out, date);
CloseHandle(out);
out=INVALID_HANDLE_VALUE;
if ((attr&255)=='w')
SetFileAttributes(filename.c_str(), attr>>8);
filename=L"";
}
}
~OutputFile() {close();}
};
#endif
// Read bufp[0..sz-1] and return number of bytes read
int InputFile::read(char* bufp, int sz) {
int r=0; // bytes read
while (sz>0) {
if (ptr<int(n)) { // copy buf[ptr..n-1]
int n1=n-ptr;
if (n1>sz) n1=sz;
memcpy(bufp+r, &buf[ptr], n1);
ptr+=n1;
sz-=n1;
r+=n1;
}
else { // refill buf
assert(ptr==int(n));
int c=get();
if (c<0) return r;
bufp[r++]=c;
--sz;
}
}
return r;
}
// Write bufp[0..sz-1]
void OutputFile::write(const char* bufp, int sz) {
if (ptr==BUFSIZE) flush();
while (sz>0) {
assert(ptr>=0 && ptr<BUFSIZE);
int n=BUFSIZE-ptr; // number of bytes to copy to buf
if (n>sz) n=sz;
memcpy(&buf[ptr], bufp, n);
sz-=n;
bufp+=n;
ptr+=n;
if (ptr==BUFSIZE) flush();
}
}
/////////////////////////////// Archive ///////////////////////////////
// An Archive is a multi-part file that supports encrypted input
class Archive: public libzpaq::Reader, public CounterBase {
libzpaq::AES_CTR* aes; // NULL if not encrypted
struct FE { // File element for multi-part archives
string fn; // file name
int64_t end; // size of previous and current files
FE(): end(0) {}
FE(const string& s, int64_t e): fn(s), end(e) {}
};