Source code for PDF4Cat.splitter

import os
import io
import zipfile

from .cat import PDF4Cat

[docs]class Splitter(PDF4Cat): """Subclass of PDF4Cat parent class Args: doc_file (None, optional): Document file (for multiple operations, 'use input_doc_list') input_doc_list (list, optional): List of input docs passwd (str, optional): Document password (for crypt/decrypt) progress_callback (None, optional): Progress callback like: Raises: TypeError: If you use doc_file with input_doc_list (you can use only one) """ def __init__(self, *args, **kwargs): super(Splitter, self).__init__(*args, **kwargs) # (it is faster)
[docs] def gen_split(self, from_pdf = None, pages: list = [], fpages: str = '{name}_{num}.pdf', start_from: int = 0) -> tuple: # pdfname & pdfbytes """Generator, generate name with BytesIO object Args: from_pdf (None, optional): pdf document name (default use main doc from class param) pages (list, optional): List of pages to select like [1, 3, 5, 15] fpages (str, optional): Format pdf filenames start_from (int, optional): Enumerate from n Yields: tuple: filename, BytesIO """ if not from_pdf: from_pdf = self.pdf_open(self.doc_file, passwd=self.passwd) noOfPages = range(from_pdf.page_count) if pages: noOfPages = pages for pageNo in noOfPages: ### if pages and pageNo not in pages: continue # dst = from_pdf.convert_to_pdf() # if already pdf returns bytes # dst = self.pdf_open("pdf", stream=dst) # dst = self.pdf_open() dst.insert_pdf(from_pdf, from_page=pageNo, to_page=pageNo) # need load page io_data = io.BytesIO() dst.save(io_data) dst.close() del dst pdfn = fpages.format(name=self.doc_filename, num=pageNo+start_from) pdfp = io_data yield pdfn, pdfp
[docs] @PDF4Cat.run_in_subprocess # need add range def split_pages2zip( self, out_zip_file: str, pages: list = [], fpages: str = '{name}_{num}.pdf', start_from: int = 0) -> None: """Split pages to different pdfs and compress to zip Args: out_zip_file (str): Output zip file pages (list, optional): List of pages to select like [1, 3, 5, 15] fpages (str, optional): Format pdf filenames start_from (int, optional): Enumerate from n """ pdf = self.pdf_open(self.doc_file, passwd=self.passwd) if not pages: pcount = pdf.page_count else: pcount = len(pages) # Compression level: zipfile.ZIP_DEFLATED (8) and disable ZIP64 ext. with zipfile.ZipFile(out_zip_file, 'w', zipfile.ZIP_DEFLATED, False) as zf: for file_name, io_data in self.gen_split(pdf, pages, fpages, start_from): zf.writestr(file_name, io_data.getvalue()) self.counter += 1 #need enumerate self.progress_callback(self.counter, pcount) del io_data self.counter = 0