感谢 meuh 的帮助,我可以编写一个简短的 Python 程序,将我使用现有程序生成的 PDF 转换为 PostScript 文件,并使用特定于设备的setpagedevice命令。
该程序非常适合我的使用,但我认为它可能对其他人有用,作为起点。这里是:
#! /usr/bin/env python3
import argparse
import re
import subprocess
import sys
import PyPDF2
def make_postscript(f):
"""Convert a PDF file to PostScript, with pdf2ps, and yield it line by line."""
with subprocess.Popen(['pdf2ps', '/dev/stdin', '/dev/stdout'], stdin=f, stdout=subprocess.PIPE, stderr=None) as proc:
for line in proc.stdout:
yield line
def add_device_control(postscript, separator_pages):
"""Add device control commands to a PostScript file with DSC comments."""
DSC_page_re = re.compile(b'%%Page: (?P<page_name>.+) (?P<page_number>[1-9][0-9]*)$')
DSC_begin_page_setup_re = re.compile(b'%%BeginPageSetup$')
page_number = None
for line in postscript:
m = DSC_page_re.match(line)
if m:
assert page_number is None
page_number = int(m.group('page_number').decode('ASCII'))-1
yield line
continue
m = DSC_begin_page_setup_re.match(line)
if m:
assert page_number is not None
yield line
if page_number in separator_pages:
yield b'mark { << /PageSize [1191 842] /ImagingBBox null /MediaType (Plain) /MediaPosition null >> setpagedevice } stopped cleartomark\n'
elif page_number not in separator_pages:
yield b'mark { << /PageSize [595 842] /ImagingBBox null /MediaType (Labels) /MediaPosition 0 >> setpagedevice } stopped cleartomark\n'
page_number = None
continue
yield line
assert page_number is None
def walk_outline(outline, depth=0):
"""Walk through the outline of a PDF file in a depth-first search way, and yield each element with its zero-based depth."""
for obj in outline:
if isinstance(obj, PyPDF2.pdf.Destination):
yield depth, obj
elif isinstance(obj, list):
for result in walk_outline(obj, depth+1):
yield result
else:
assert False
def find_separator_pages(f):
"""Find the page number of the separator pages in a PDF file"""
separator_pages = set()
reader = PyPDF2.PdfFileReader(f)
for depth, obj in walk_outline(reader.outlines):
page_num = reader._getPageNumberByIndirect(obj.page)
if depth == 0:
assert page_num >= 0
separator_pages.add(page_num)
return separator_pages
def main():
parser = argparse.ArgumentParser()
parser.add_argument("input_file", metavar="input.pdf", type=argparse.FileType('rb'))
parser.add_argument("output_file", metavar="output.ps", nargs="?", type=argparse.FileType('wb'), default=sys.stdout.buffer)
args = parser.parse_args()
separator_pages = find_separator_pages(args.input_file)
args.input_file.seek(0)
postscript = make_postscript(args.input_file)
postscript = add_device_control(postscript, separator_pages)
for line in postscript:
args.output_file.write(line)
if __name__ == "__main__":
main()
感谢 meuh 的帮助,我可以编写一个简短的 Python 程序,将我使用现有程序生成的 PDF 转换为 PostScript 文件,并使用特定于设备的
setpagedevice
命令。该程序非常适合我的使用,但我认为它可能对其他人有用,作为起点。这里是:
再次感谢 meuh 在评论中的帮助。