From Catholicpenguin
import os, sys, tempfile
import Image
from subprocess import Popen,PIPE
import getopt
'''
Simple script which takes a two-column PDF in standard research-paper format,
and converts it into a format suitable for reading on the OLPC. Requires PIL,
and xpdf with pdftoppm to be in the PATH.
(Get a Windows version from http://www.foolabs.com/xpdf/download.html)
'''
'''
-v: debug (default off)
-s: single column (default off, i.e., two column)
-o: offset (in pixels, at whatever DPI rendered) (default 0)
-d: DPI to render (default 240)
-f: PDF file (required)
Usage:
For a typical two-column pdf that is properly aligned,
./pdf2olpc.py -f file.pdf
Maybe it is shifted a bit? Bigger numbers shift the middle to the left
./pdf2olpc.py -f file.pdf -o 50
Grumble. It's a single-column article
./pdf2olpc.py -s -f file.pdf
Grumble. It's a single-column article that needs shifting (bigger == higher up)
and also has tiny margins (reduce DPI to get more of the page)
./pdf2olpc.py -s -f file.pdf -o 75 -d 130
'''
OFFSET = 0 # In pixels, for stupid offcentered papers
DEBUG = False
DPI = -1 # Controls the amount of border removed - 240 seems about right for
# most research papers. Bigger == more border removed
SINGLE = False
FILE = ''
print sys.argv
optlist,argv = getopt.getopt(sys.argv[1:],'vso:d:f:')
print optlist, argv
for k,v in optlist:
if k == '-v': DEBUG = True
if k == '-s': SINGLE = True
if k == '-o': OFFSET = int(v)
if k == '-d': DPI = int(v)
if k == '-f': FILE = v
# Defaults which worked for me
if DPI < 0 and not SINGLE:
DPI = 240
OFFSET = 0
if DPI < 0 and SINGLE:
DPI = 155
OFFSET = 115
pdf = FILE
pdf_fn,pdf_ext = os.path.splitext(os.path.split(pdf)[1])
# First, convert the PDF into images
#pdir = tempfile.mkdtemp()
pdir = 'pdftmp'
os.mkdir('pdftmp')
args = ['pdftoppm','-r',str(DPI),pdf,os.path.join(pdir,pdf_fn)]
if DEBUG: print 'Running pdftoppm with args:'+str(args)
o = Popen(args,stdout=PIPE).communicate()[0]
if DEBUG: print 'Done. Got result:'+str(o)
# Then, find out how many files we have
pdf_pages = sorted(os.listdir(pdir))
if DEBUG: print 'Got pages:' + str(pdf_pages)
# Make final directory for the book
bookdir = pdf_fn
os.mkdir(bookdir)
# Process the PDF page images and chop into 4 pieces sized exactly to the OLPC
# screen (900x1200)
bookpagecnt = 0
for pdf_page in pdf_pages:
im = Image.open(os.path.join(pdir,pdf_page))
if DEBUG: print 'Opened image:',pdf_page,im.format,"%dx%d" % im.size, im.mode
# Depending on SINGLE, either tile into 4, or in 1 big image in the center
def crop(box):
global bookpagecnt
region = im.crop(box)
bookpage = os.path.join(bookdir,pdf_fn + '%.4d'%bookpagecnt + '.png')
if DEBUG: print 'Saving page:',bookpage
region.save(bookpage,'PNG')
bookpagecnt+=1
xsize,ysize = im.size
if not SINGLE: # Double column format, so tile into 4
midx = xsize/2 + OFFSET
midy = ysize/2
box0 = (midx-900,midy-1200,midx,midy)
box1 = (midx-900,midy,midx,midy+1200)
box2 = (midx,midy-1200,midx+900,midy)
box3 = (midx,midy,midx+900,midy+1200)
crop(box0)
crop(box1)
crop(box2)
crop(box3)
else: # Single column, one big page
midx = xsize/2
midy = ysize/2 - OFFSET
box0 = (midx-900/2,midy-1200/2,midx+900/2,midy+1200/2)
crop(box0)
# Remove temp directory
for root, dirs, files in os.walk(pdir, topdown=False):
for name in files:
os.remove(os.path.join(root, name))
for name in dirs:
os.rmdir(os.path.join(root, name))
os.rmdir(pdir)