-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathpxar.go
483 lines (402 loc) · 11.3 KB
/
pxar.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
package main
import (
"bytes"
"encoding/binary"
"fmt"
"math/bits"
"os"
"sort"
// "io/ioutil"
"path/filepath"
"github.com/dchest/siphash"
)
const (
PXAR_ENTRY uint64 = 0xd5956474e588acef
PXAR_ENTRY_V1 uint64 = 0x11da850a1c1cceff
PXAR_FILENAME uint64 = 0x16701121063917b3
PXAR_SYMLINK uint64 = 0x27f971e7dbf5dc5f
PXAR_DEVICE uint64 = 0x9fc9e906586d5ce9
PXAR_XATTR uint64 = 0x0dab0229b57dcd03
PXAR_ACL_USER uint64 = 0x2ce8540a457d55b8
PXAR_ACL_GROUP uint64 = 0x136e3eceb04c03ab
PXAR_ACL_GROUP_OBJ uint64 = 0x10868031e9582876
PXAR_ACL_DEFAULT uint64 = 0xbbbb13415a6896f5
PXAR_ACL_DEFAULT_USER uint64 = 0xc89357b40532cd1f
PXAR_ACL_DEFAULT_GROUP uint64 = 0xf90a8a5816038ffe
PXAR_FCAPS uint64 = 0x2da9dd9db5f7fb67
PXAR_QUOTA_PROJID uint64 = 0xe07540e82f7d1cbb
PXAR_HARDLINK uint64 = 0x51269c8422bd7275
PXAR_PAYLOAD uint64 = 0x28147a1b0b7c1a25
PXAR_GOODBYE uint64 = 0x2fec4fa642d5731d
PXAR_GOODBYE_TAIL_MARKER uint64 = 0xef5eed5b753e1555
)
var catalog_magic = []byte{145, 253, 96, 249, 196, 103, 88, 213}
const (
IFMT uint64 = 0o0170000
IFSOCK uint64 = 0o0140000
IFLNK uint64 = 0o0120000
IFREG uint64 = 0o0100000
IFBLK uint64 = 0o0060000
IFDIR uint64 = 0o0040000
IFCHR uint64 = 0o0020000
IFIFO uint64 = 0o0010000
ISUID uint64 = 0o0004000
ISGID uint64 = 0o0002000
ISVTX uint64 = 0o0001000
)
type MTime struct {
secs uint64
nanos uint32
padding uint32
}
type PXARFileEntry struct {
hdr uint64
len uint64
mode uint64
flags uint64
uid uint32
gid uint32
mtime MTime
}
type PXARFilenameEntry struct {
hdr uint64
len uint64
}
type GoodByeItem struct {
hash uint64
offset uint64
len uint64
}
type GoodByeBST struct {
self *GoodByeItem
left *GoodByeBST
right *GoodByeBST
}
func (B *GoodByeBST) AddNode(i *GoodByeItem) {
if i.hash < B.self.hash {
if B.left == nil {
B.left = &GoodByeBST{
self: i,
}
} else {
B.left.AddNode(i)
}
}
if i.hash > B.self.hash {
if B.right == nil {
B.right = &GoodByeBST{
self: i,
}
} else {
B.right.AddNode(i)
}
}
}
func pow_of_2(e uint64) uint64 {
return 1 << e
}
func log_of_2(k uint64) uint64 {
return 8*8 - uint64(bits.LeadingZeros64(k)) - 1
}
func make_bst_inner(input []GoodByeItem, n uint64, e uint64, output *[]GoodByeItem, i uint64) {
if n == 0 {
return
}
p := pow_of_2(e - 1)
q := pow_of_2(e)
var k uint64
if n >= p-1+p/2 {
k = (q - 2) / 2
} else {
v := p - 1 + p/2 - n
k = (q-2)/2 - v
}
(*output)[i] = input[k]
make_bst_inner(input, k, e-1, output, i*2+1)
make_bst_inner(input[k+1:], n-k-1, e-1, output, i*2+2)
}
func ca_make_bst(input []GoodByeItem, output *[]GoodByeItem) {
n := uint64(len(input))
make_bst_inner(input, n, log_of_2(n)+1, output, 0)
}
type PXAROutCB func([]byte)
type PXARArchive struct {
//Create(filename string, writeCB PXAROutCB)
//AddFile(filename string)
//AddDirectory(dirname string)
writeCB PXAROutCB
catalogWriteCB PXAROutCB
buffer bytes.Buffer
pos uint64
archivename string
catalog_pos uint64
}
//This function will flush the internal buffer and update position
//WriteCB for pxar stream will be called.
//It is useful when we building a data structure and we need to keep a specific offset and output it only at the end
func (a *PXARArchive) Flush() {
b := make([]byte, 64*1024)
for {
count, _ := a.buffer.Read(b)
if count <= 0 {
break
}
a.writeCB(b[:count])
a.pos = a.pos + uint64(count)
}
//fmt.Printf("Flush %d bytes\n", count)
}
func (a *PXARArchive) Create() {
a.pos = 0
a.catalog_pos = 8
}
type CatalogDir struct {
Pos uint64 //Points to next table so parent has always to be written before children
Name string
}
type CatalogFile struct {
Name string
MTime uint64
Size uint64
}
func append_u64_7bit(a []byte, v uint64) []byte {
x := a
for {
if v < 128 {
x = append(x, byte(v&0x7f))
break
}
x = append(x, byte(v&0x7f)|byte(0x80))
v = v >> 7
}
return x
}
//PXAR format, documentation had many missing bits i had to figure out
/*
Suppose we have
abc
file.txt
ced
file2.txt
file3.txt
First entry is always without filename
PXAR_ENTRY(DIR)
PXAR_FILENAME(file.txt)
PXAR_ENTRY(file, attributes etc)
PXAR_PAYLOAD(file.txt)
PXAR_FILENAME(ced)
PXAR_FILENAME(file2.txt)
PXAR_ENTRY(file,attributes etc)
PXAR_PAYLOAD(file2.txt)
PXAR_FILENAME(file3.txt)
PXAR_ENTRY(file,attributes etc)
PXAR_PAYLOAD(file3.txt)
PXAR_GOODBYE( relative to ced
will have entries sorted using casync algorithms below
for sip hash of "file2.txt" and "file3.txt", offset is relative to PXAR_GOODBYE header offset
last special entry with fixed hash and not sorted
)
PXAR_GOODBYE(relative to abc or top dir )
will have entries sorted using casync algorithms below
for sip hash of "file.txt" and "ced", offset is relative to PXAR_GOODBYE header offset
last special entry with fixed hash and not sorted
)
*/
func (a *PXARArchive) WriteDir(path string, dirname string, toplevel bool) CatalogDir {
//fmt.Printf("Write dir %s at %d\n", path, a.pos)
files, err := os.ReadDir(path)
if err != nil {
return CatalogDir{}
}
fileInfo, err := os.Stat(path)
if err != nil {
fmt.Printf("Failed to stat %s\n", path)
return CatalogDir{}
}
//Avoid writing filename entry on root
if !toplevel {
fname_entry := &PXARFilenameEntry{
hdr: PXAR_FILENAME,
len: uint64(16) + uint64(len(dirname)) + 1,
}
binary.Write(&a.buffer, binary.LittleEndian, fname_entry)
a.buffer.WriteString(dirname)
a.buffer.WriteByte(0x00)
} else {
if a.catalogWriteCB != nil {
a.catalogWriteCB(catalog_magic)
a.catalog_pos = 8
}
}
a.Flush()
dir_start_pos := a.pos
entry := &PXARFileEntry{
hdr: PXAR_ENTRY,
len: 56,
mode: IFDIR | 0o777,
flags: 0,
uid: 1000, //This is fixed because this project for now targeting windows , on which execute, traverse etc permissions don't exist
gid: 1000,
mtime: MTime{
secs: uint64(fileInfo.ModTime().Unix()),
nanos: 0,
padding: 0,
},
}
binary.Write(&a.buffer, binary.LittleEndian, entry)
a.Flush()
goodbyteitems := make([]GoodByeItem, 0)
catalog_files := make([]CatalogFile, 0)
catalog_dirs := make([]CatalogDir, 0)
for _, file := range files {
startpos := a.pos
if file.IsDir() {
D := a.WriteDir(filepath.Join(path, file.Name()), file.Name(), false)
catalog_dirs = append(catalog_dirs, D)
goodbyteitems = append(goodbyteitems, GoodByeItem{
offset: startpos,
hash: siphash.Hash(0x83ac3f1cfbb450db, 0xaa4f1b6879369fbd, []byte(file.Name())),
len: a.pos - startpos,
})
} else {
F := a.WriteFile(filepath.Join(path, file.Name()), file.Name())
catalog_files = append(catalog_files, F)
goodbyteitems = append(goodbyteitems, GoodByeItem{
offset: startpos,
hash: siphash.Hash(0x83ac3f1cfbb450db, 0xaa4f1b6879369fbd, []byte(file.Name())),
len: a.pos - startpos,
})
}
}
//Here we can write AFTER the recursion so leaves get written first
//We need to write leaves first because otherwise we won't know offsets
oldpos := a.catalog_pos
tabledata := make([]byte, 0)
tabledata = append_u64_7bit(tabledata, uint64(len(catalog_files)+len(catalog_dirs)))
for _, d := range catalog_dirs {
tabledata = append(tabledata, 'd')
tabledata = append_u64_7bit(tabledata, uint64(len(d.Name)))
tabledata = append(tabledata, []byte(d.Name)...)
tabledata = append_u64_7bit(tabledata, oldpos-d.Pos)
}
for _, f := range catalog_files {
tabledata = append(tabledata, 'f')
tabledata = append_u64_7bit(tabledata, uint64(len(f.Name)))
tabledata = append(tabledata, []byte(f.Name)...)
tabledata = append_u64_7bit(tabledata, f.Size)
tabledata = append_u64_7bit(tabledata, f.MTime)
}
catalog_outdata := make([]byte, 0)
catalog_outdata = append_u64_7bit(catalog_outdata, uint64(len(tabledata)))
catalog_outdata = append(catalog_outdata, tabledata...)
if a.catalogWriteCB != nil {
a.catalogWriteCB(catalog_outdata)
}
a.catalog_pos += uint64(len(catalog_outdata))
a.Flush()
//Sort goodbyeitems by sip hash to build later kinda of heap
sort.Slice(goodbyteitems, func(i, j int) bool {
return goodbyteitems[i].hash < goodbyteitems[j].hash
})
goodbyteitemsnew := make([]GoodByeItem, len(goodbyteitems))
//Make casync binary search tree structure out of the sorted array
ca_make_bst(goodbyteitems, &goodbyteitemsnew)
goodbyteitems = goodbyteitemsnew
a.Flush()
goodbye_start := a.pos
binary.Write(&a.buffer, binary.LittleEndian, PXAR_GOODBYE)
goodbyelen := uint64(16 + 24*(len(goodbyteitems)+1))
binary.Write(&a.buffer, binary.LittleEndian, goodbyelen)
for _, gi := range goodbyteitems {
gi.offset = a.pos - gi.offset
binary.Write(&a.buffer, binary.LittleEndian, gi)
}
gi := &GoodByeItem{
offset: goodbye_start - dir_start_pos,
len: goodbyelen,
hash: 0xef5eed5b753e1555,
}
binary.Write(&a.buffer, binary.LittleEndian, gi)
a.Flush()
if toplevel {
//We write special pointer to root dir here
tabledata := make([]byte, 0)
tabledata = append_u64_7bit(tabledata, uint64(1))
tabledata = append(tabledata, 'd')
tabledata = append_u64_7bit(tabledata, uint64(len(a.archivename)))
tabledata = append(tabledata, []byte(a.archivename)...)
tabledata = append_u64_7bit(tabledata, a.catalog_pos-oldpos)
catalog_outdata := make([]byte, 0)
catalog_outdata = append_u64_7bit(catalog_outdata, uint64(len(tabledata)))
catalog_outdata = append(catalog_outdata, tabledata...)
ptr := make([]byte, 0)
ptr = binary.LittleEndian.AppendUint64(ptr, a.catalog_pos)
if a.catalogWriteCB != nil {
a.catalogWriteCB(catalog_outdata)
a.catalogWriteCB(ptr)
}
}
return CatalogDir{
Name: dirname,
Pos: oldpos,
}
}
// On pxar first item and consquently entry point must always be WriteDir , because toplevel is always a directory
// So backing up single file is not possible
func (a *PXARArchive) WriteFile(path string, basename string) CatalogFile {
//fmt.Printf("Write file %s at %d\n", path, a.pos)
fileInfo, err := os.Stat(path)
if err != nil {
fmt.Printf("Failed to stat %s\n", path)
return CatalogFile{}
}
file, err := os.Open(path)
if err != nil {
fmt.Printf("Failed to open %s\n", path)
return CatalogFile{}
}
defer file.Close()
fname_entry := &PXARFilenameEntry{
hdr: PXAR_FILENAME,
len: uint64(16) + uint64(len(basename)) + 1,
}
binary.Write(&a.buffer, binary.LittleEndian, fname_entry)
a.buffer.WriteString(basename)
a.buffer.WriteByte(0x00)
entry := &PXARFileEntry{
hdr: PXAR_ENTRY,
len: 56,
mode: IFREG | 0o777,
flags: 0,
uid: 1000,
gid: 1000,
mtime: MTime{
secs: uint64(fileInfo.ModTime().Unix()),
nanos: 0,
padding: 0,
},
}
binary.Write(&a.buffer, binary.LittleEndian, entry)
binary.Write(&a.buffer, binary.LittleEndian, PXAR_PAYLOAD)
filesize := uint64(fileInfo.Size()) + 16 //File size + header size
binary.Write(&a.buffer, binary.LittleEndian, filesize)
a.Flush()
readbuffer := make([]byte, 1024*64)
for {
nread, err := file.Read(readbuffer)
if nread <= 0 {
break
}
if err != nil {
panic(err.Error())
}
a.buffer.Write(readbuffer[:nread])
a.Flush()
}
a.Flush()
return CatalogFile{
Name: basename,
MTime: uint64(fileInfo.ModTime().Unix()),
Size: uint64(fileInfo.Size()),
}
}