-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdfreorder.py
115 lines (91 loc) · 3.73 KB
/
pdfreorder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
Reorder spefiied pages of a PDF file.
Usage:
python pdfreorder.py --pages "page-spec" --inpath "path/file"
Command line options:
--pages Comma separated ordered list of pages and page ranges
Pages not in the list will not be written to the output
Thus pdfreorder can be used to extract pages
Note: this option must be quoted
--inpath Path and file name of input PDF file
The output file name is derived from the input file name by appending the
string "_reoder" to the input file name before the extension. The output
file is placed in the same directory as the input file.
Examples:
Swap the first two pages of 10 page document doc.pdf
python pdfreorder.py --pages "2, 1, 3-10" --inpath doc.pdf
Reverse the order of pages in 10 page document doc.pdf
python pdfreorder.py --pages "10-1" --inpath doc.pdf
Extract page 5 of doc.pdf, which has at least 5 pages
python pdfreorder.py --pages "5" --inpath doc.pdf
"""
import argparse
import PyPDF2
import os
import pdftools_utils as pu
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--pages', help='Pages to rotate', type=str, default = '1')
parser.add_argument('-i', '--inpath', help='Input path/file', type=str, default = '')
return parser.parse_args()
class PdfReorderer:
def __init__(self):
self.ofile = None
self.msg = ''
def validate_inputs(self, **kwargs):
"""
Test for valid inputs and return status.
Check for existence and validity of PDF input file.
"""
self.args_d = kwargs
if not os.path.isfile(self.args_d['inpath']):
ok = False
s = 'Cannot find input file {0}'
self.msg = s.format(self.args_d['inpath'])
elif not pu.ispdf(self.args_d['inpath']):
ok = False
s = '{0} does not look like a valid PDF.'
self.msg = s.format(self.args_d['inpath'])
elif pu.isRestricted(self.args_d['inpath']):
ok = False
self.msg = 'File is restricted:\n {0}'.format(self.args_d['inpath'])
elif not pu.pages(self.args_d['pages']
, pu.getNumPages(self.args_d['inpath'])):
ok = False
self.msg = 'No pages to process. Check pages specification.'
else:
ok = True
self.msg = 'Inputs validated'
return ok
def status(self):
return self.msg
def get_ofile(self):
return self.ofile
def process(self):
"""
Main processing core.
Read pages from input, reorder, and write specified pages to output.
"""
ok = True
with open(self.args_d['inpath'], 'rb') as fr:
Reader = PyPDF2.PdfFileReader(fr)
Writer = PyPDF2.PdfFileWriter()
pagesToReorder = pu.pages(self.args_d['pages'], Reader.numPages)
if pagesToReorder:
indir,infile = os.path.split(self.args_d['inpath'])
tmpfi = os.path.splitext(infile)[0] + '_reorder.pdf'
self.ofile = os.path.join(indir, tmpfi)
for pageNum in pagesToReorder:
pageObj = Reader.getPage(pageNum)
Writer.addPage(pageObj)
with open(self.ofile, 'wb') as fw:
Writer.write(fw)
else:
ok = False
self.msg = 'No pages to process'
return ok
if __name__ == "__main__":
args = parse_args()
R = PdfReorderer()
if not (R.validate_inputs(**vars(args)) and R.process()):
print(R.status())