itextsharp獲取PDF頁碼
PdfReader reader;
reader = new PdfReader(fileList[i]);
int iPageNum = reader.NumberOfPages;
提取PDF中的文本和圖片(參考):https://www.e-iceblue.cn/spirepdf/extract-text-and-image-from-pdf.html
Spire獲取PDF文件圖片:
public Image GetPDFImg(string filePath, int imgSequence)
{
Image image = null;
try
{
//加載PDF文檔
Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();
doc.LoadFromFile(filePath);
List<Image> ListImage = new List<Image>();
// 實例化一個Spire.Pdf.PdfPageBase對象,Pages從0開始
Spire.Pdf.PdfPageBase page = doc.Pages[imgSequence];
// 獲取PDF第一頁里面的所有圖片
Image[] images = page.ExtractImages();
if (images != null && images.Length > 0)
{
ListImage.AddRange(images);
}
if (ListImage.Count > 0)
{
image = ListImage[0];
//image.Save(@"D:\temp\" + GetDateTimeStr() + ".png", System.Drawing.Imaging.ImageFormat.Png);//保存獲取的圖片
}
doc.Dispose();
}
catch (Exception e)
{
MessageBox.Show(e.ToString(), "錯誤提示");
}
return image;
}
itextsharp獲取PDF文件圖片:
public void ExtractImage(string pdfFile)
{
PdfReader pdfReader = new PdfReader(pdfFile);
for (int pageNumber = 1; pageNumber <= pdfReader.NumberOfPages; pageNumber++)
{
PdfReader pdf = new PdfReader(pdfFile);
PdfDictionary pg = pdf.GetPageN(pageNumber);
PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
try
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
string width = tg.Get(PdfName.WIDTH).ToString();
string height = tg.Get(PdfName.HEIGHT).ToString();
ImageRenderInfo imgRI = ImageRenderInfo.CreateForXObject(new Matrix(), (PRIndirectReference)obj, tg);
//圖片保存
PdfImageObject image = imgRI.GetImage();
Image dotnetImg = image.GetDrawingImage();
dotnetImg.Save(@"E:\\000.png");
}
}
}
catch (Exception e)
{
MessageBox.Show(e.ToString(), "錯誤提示");
}
break;
}
}
合并PDF文件
private void mergePDFFiles(string[] fileList,string outMergeFile)
{
//try
//{
List<PdfReader> readerList = new List<PdfReader>();//記錄合并PDF集合
PdfReader reader;
Document document = new Document();
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(outMergeFile, FileMode.Create));
document.Open();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage newPage;
PdfDictionary page;
PdfNumber rotate;
for (int i = 0; i < fileList.Length; i++)
{
if (fileList[i] != null)
{
try
{
reader = new PdfReader(fileList[i]);
int iPageNum = reader.NumberOfPages;
for (int j = 1; j <= iPageNum; j++)
{
newPage = writer.GetImportedPage(reader, j);
page = reader.GetPageN(j);
rotate = page.GetAsNumber(PdfName.ROTATE);
iTextSharp.text.Rectangle r = reader.GetPageSizeWithRotation(j);
document.SetPageSize(r);
document.NewPage();
//解決PDF合并時,頁面翻轉(zhuǎn)問題
if (rotate == null)
{
cb.AddTemplate(newPage, 1f, 0, 0, 1f, 0, 0);
}
else
{
switch (rotate.IntValue)
{
case 0:
cb.AddTemplate(newPage, 1f, 0, 0, 1f, 0, 0);
break;
case 90:
cb.AddTemplate(newPage, 0, -1f, 1f, 0, 0, r.Height);
break;
case 180:
cb.AddTemplate(newPage, -1f, 0, 0, -1f, r.Width, r.Height);
break;
case 270:
cb.AddTemplate(newPage, 0, 1f, -1f, 0, r.Height, 0);
break;
//default:
// throw new InvalidOperationException(string.Format("Unexpected page rotation: [{0}].", pageRotation));
}
}
}
readerList.Add(reader);
}
catch
{
}
}
}
document.Close();
for (int i = 0; i < readerList.Count; i++)
{
readerList[i].Dispose();
}
//}
//catch (Exception)
//{
// MessageBox.Show("合并失敗彤路,請聯(lián)系管理員俭正!", "提示");
//}
}
獲取PDF文字信息
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using System.Diagnostics;
List<string> lst1 = new List<string>();
lst1.Add(@"C:\Users\253\Desktop\20211118\aaa.pdf");
for(int i=0;i<lst1.Count;i++)
{
PdfReader pdfReader = new PdfReader(lst1[i]);
iTextSharp.text.Rectangle rect = new iTextSharp.text.Rectangle(400, 400, 800, 800);//獲取指定區(qū)域文字信息
RenderFilter[] filter = { new RegionTextRenderFilter(rect) };
ITextExtractionStrategy strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
string atext = PdfTextExtractor.GetTextFromPage(pdfReader, 1, strategy);
atext = atext.Replace(" ", "").Replace("發(fā)文編號:\n", "");
atext = atext.Substring(0, atext.IndexOf("\n"));
pdfReader.Close();
}
//PdfReader pdfReader = new PdfReader(@"C:\Users\253\Desktop\20211118 \aaa.pdf");
//int numberOfPages = pdfReader.NumberOfPages;
//StringBuilder text = new StringBuilder();
//for (int i = 1; i <= numberOfPages; ++i)
//{
// text.Append(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(pdfReader, i));//獲取指定頁全部文字信息
// break;
//}
//pdfReader.Close();