Source code for PDF4Cat.converter.images

import os
import io
import zipfile

from ..cat import PDF4Cat

[docs]class Img2Pdf(PDF4Cat):

	"""Subclass of PDF4Cat parent class
	
	Args:
		doc_file (None, optional): Document file (for multiple operations, 'use input_doc_list')
		input_doc_list (list, optional): List of input docs
		passwd (str, optional): Document password (for crypt/decrypt)
		progress_callback (None, optional): Progress callback like:
	
	Raises:
		TypeError: If you use doc_file with input_doc_list (you can use only one)
	"""
	
	def __init__(self, *args, **kwargs):
		super(Img2Pdf, self).__init__(*args, **kwargs)

[docs]	@PDF4Cat.run_in_subprocess
	def img2pdf(self, 
		output_pdf = None) -> None:
		"""Image to pdf
		
		Args:
			output_pdf (None, optional): Output pdf file
		"""
		if not output_pdf:
			output_pdf = os.path.join(self.doc_path, self.doc_name+"_out.pdf")
		output_pdf = os.path.join(os.getcwd(), output_pdf)

		pic = self.pdf_open(self.doc_file, passwd=self.passwd)
		pdfbytes = pic.convert_to_pdf()
		with open(output_pdf, 'wb') as pdf:
			pdf.write(pdfbytes)
		del pdfbytes

[docs]	@PDF4Cat.run_in_subprocess
	def imgs2pdf(self, 
		output_pdf = None) -> None:
		"""Multiple images to pdf
		
		Args:
			output_pdf (None, optional): Output pdf file
		"""
		if not output_pdf:
			output_pdf = os.path.join(self.doc_path, self.doc_name+"_out.pdf")
		output_pdf = os.path.join(os.getcwd(), output_pdf)

		len_docs = len(self.input_doc_list)

		result = self.pdf_open()
		for img_path in self.input_doc_list:
			pic = self.pdf_open(self.doc_file, passwd=self.passwd)
			pdfbytes = pic.convert_to_pdf()
			pdf_tmp = self.pdf_open("pdf", pdfbytes)
			pic.close()
			del pdfbytes
			result.insert_pdf(pdf_tmp)
			pdf_tmp.close()
			del pdf_tmp
			self.counter += 1
			self.progress_callback(self.counter, len_docs)
		result.save(output_pdf)

	# (it is faster)
[docs]	def gen_imagesi2p(self, 
		fimages: str = '{name}_{num}.pdf', 
		start_from: int = 0) -> tuple:
		"""Generator, generate name with BytesIO object
		
		Args:
			fimages (str, optional): Format image filenames
			start_from (int, optional): Enumerate from n
		
		Yields:
			tuple: filename, BytesIO
		"""
		for num, img in enumerate(self.input_doc_list): ###
			io_data = io.BytesIO()
			img_ext = os.path.splitext(img)[1][1:]
			pic = self.pdf_open(img)
			pdfbytes = pic.convert_to_pdf()
			pic.close()
			del pic
			io_data.write(pdfbytes)
			del pdfbytes

			imfn = fimages.format(name=os.path.basename(img), num=num+start_from)
			imfi = io_data.getvalue()
			yield imfn, imfi

[docs]	@PDF4Cat.run_in_subprocess
	def imgs2pdfs_zip(self, 
		out_zip_file: str, 
		fimages: str = '{name}_{num}.pdf',
		start_from: int = 0) -> None:
		"""Multiple images to multiple pdfs and compress to zip
		(using gen_imagesi2p generator)
		
		Args:
			out_zip_file (str): Output zip file
			fimages (str, optional): Format image filenames
			start_from (int, optional): Enumerate from n
		"""
		# Compression level: zipfile.ZIP_DEFLATED (8) and disable ZIP64 ext.
		with zipfile.ZipFile(out_zip_file, 'w', zipfile.ZIP_DEFLATED, False) as zf:

			for file_name, io_data in self.gen_imagesi2p(fimages, start_from):
				zf.writestr(file_name, io_data)
				self.counter += 1 #need enumerate
				self.progress_callback(self.counter, len(self.input_doc_list))

		self.counter = 0

#

[docs]class Pdf2Img(PDF4Cat):

	"""Subclass of PDF4Cat parent class
	
	Args:
		doc_file (None, optional): Document file (for multiple operations, 'use input_doc_list')
		input_doc_list (list, optional): List of input docs
		passwd (str, optional): Document password (for crypt/decrypt)
		progress_callback (None, optional): Progress callback like:
	
	Raises:
		TypeError: If you use doc_file with input_doc_list (you can use only one)
	"""

	def __init__(self, *args, **kwargs):
		super(Pdf2Img, self).__init__(*args, **kwargs)
		# self.pdf = self.pdf_open(self.doc_file, password=self.passwd)

	# (it is faster)
[docs]	def gen_imagesp2i(self, 
		pages: list = [], 
		fimages: str = '{name}_{num}.png', 
		start_from: int = 0, 
		zoom: float = 1.5) -> tuple:
		"""Generator, generate name with BytesIO object
		
		Args:
			pages (list, optional): List of pages to select like [1, 3, 5, 15]
			fimages (str, optional): Format image filenames
			start_from (int, optional): Enumerate from n
			zoom (float, optional): Zoom image (look fitz.Matrix docs)
		
		Yields:
			tuple: filename, BytesIO
		"""
		pdf = self.pdf_open(self.doc_file, passwd=self.passwd)
		ext_from_fimages = os.path.splitext(fimages)[1][1:]
		mat = self.fitz_Matrix(zoom, zoom)
		noOfPages = range(pdf.page_count)
		if pages:
			noOfPages = pages
		for pageNo in noOfPages:
			if pages and pageNo not in pages:
				continue
			io_data = io.BytesIO()
			#
			page = pdf.load_page(pageNo) #number of page
			pix = page.get_pixmap(matrix = mat)
			io_data.write(pix.tobytes(output=ext_from_fimages))
			#

			imfn = fimages.format(name=os.path.basename(self.doc_file), num=pageNo+start_from)
			imfi = io_data
			yield imfn, imfi

[docs]	@PDF4Cat.run_in_subprocess
	def pdf2imgs_zip(self, 
		out_zip_file: str, 
		pages: list = [],
		fimages: str = '{name}_{num}.png',
		start_from: int = 0,
		zoom: float = 1.5) -> None:
		"""Multiple pdfs to multiple images and compress to zip
		(using gen_imagesp2i generator)
		
		Args:
			out_zip_file (str): Output zip file
			pages (list, optional): List of pages to select like [1, 3, 5, 15]
			fimages (str, optional): Format image filenames
			start_from (int, optional): Enumerate from n
			zoom (float, optional): Zoom image (look fitz.Matrix docs)
		"""
		pdf = self.pdf_open(self.doc_file, passwd=self.passwd)
		if not pages:
			pcount = pdf.page_count
		else:
			pcount = len(pages)

		# Compression level: zipfile.ZIP_DEFLATED (8) and disable ZIP64 ext.
		with zipfile.ZipFile(out_zip_file, 'w', zipfile.ZIP_DEFLATED, False) as zf:
		
			for file_name, io_data in self.gen_imagesp2i(pages, fimages, start_from, zoom):
				zf.writestr(file_name, io_data.getvalue())
				self.counter += 1 #need enumerate
				self.progress_callback(self.counter, pcount)

		self.counter = 0