-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdfreader.py
More file actions
32 lines (23 loc) · 773 Bytes
/
pdfreader.py
File metadata and controls
32 lines (23 loc) · 773 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import PyPDF2
def pdfread(book):
txtlist = []
# creating a pdf file object
pdfFileObj = open(book, 'rb')
# creating a pdf reader object
#pdfr = PyPDF2.PdfFileReader(pdfFileObj)
pdfr = PyPDF2.PdfReader(pdfFileObj)
# printing number of pages in pdf file
#print(pdfRead.numPages)
print(len(pdfr.pages))
for i in range(len(pdfr.pages)):
# creating a page object
pageObj = pdfr.getPage(i)
# extracting text from page
# print(pageObj.extractText())
print("************** page reading "+str(i)+" done *******************")
txtlist.append(pageObj.extractText())
# closing the pdf file object
pdfFileObj.close()
return txtlist
# article = " ".join(txtlist)
# print(article)