- python 第三方庫 PyPDF2或PyPDF4針對PDF進行相關(guān)操作的
下面使用PyPDF2庫進行最上層水印的去除
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.pdf import ContentStream
from PyPDF2.generic import TextStringObject, NameObject
from PyPDF2.utils import b_
def remove_watermark(input_file, output_file):
"""
pdf去除水印
:param input_file:
:param output_file:
:return:
"""
with open(input_file, "rb") as f:
# 讀取pdf文件
source = PdfFileReader(f, "rb")
# 創(chuàng)建pdf輸出對象
output = PdfFileWriter()
for page in range(source.getNumPages()):
# 獲取pdf一頁屬性信息
page = source.getPage(page)
# 獲取pdf一頁的內(nèi)容
content_object = page.getContents()
# content_object = page["/Contents"].getObject()
# 將內(nèi)容對象進行轉(zhuǎn)換
content = ContentStream(content_object, source)
for operands, operator in content.operations:
# 根據(jù)要去除的水印格式是“Tj”文本
if operator == b_("Tj"):
# 將獲取的文本替換為空
operands[0] = TextStringObject('')
# 轉(zhuǎn)換原來的內(nèi)容對象
page.__setitem__(NameObject('/Contents'), content)
# 增加到新的pdf上
output.addPage(page)
# 輸入新的pdf文件
with open(output_file, "wb") as outputStream:
output.write(outputStream)
最后編輯于 :
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者