一段代碼装黑,是為了遍歷每一頁中的圖片
PdfDictionary resources = pageDict.GetAsDictionary(PdfName.Resources);
PdfDictionary xObjects = resources.GetAsDictionary(PdfName.XObject);
if (xObjects == null)
return;
var keySet = xObjects.KeySet();
foreach (var pdfName in keySet.ToList())
{
PdfStream stream = xObjects.GetAsStream(pdfName);
PdfObject subtype = stream.Get(PdfName.Subtype);
結(jié)果對某個pdf進(jìn)行處理的時候双吆,在第三頁出現(xiàn)了運(yùn)行時的報錯。原因是stream.Get的時候缸托,stream對象的內(nèi)部成員map是null跷叉,因此 map.Get(key) 必然不可運(yùn)行逸雹!但是為什么會出現(xiàn)這種情況呢?先跟蹤一下 xObjects.GetAsStream(pdfName) 是怎么處理的
public virtual PdfObject Get(PdfName key, bool asDirect) {
if (!asDirect) {
return map.Get(key);
}
else {
PdfObject obj = map.Get(key);
if (obj != null && obj.GetObjectType() == INDIRECT_REFERENCE) {
return ((PdfIndirectReference)obj).GetRefersTo(true);
}
else {
return obj;
}
}
}
每一頁的xobject都是Indirect的object云挟,因此還需要從整個pdf文檔中找到這個object的真正定義梆砸,通過 PdfIndirectReference::GetRefersTo
public virtual PdfObject GetRefersTo(bool recursively) {
if (!recursively) {
if (refersTo == null && !CheckState(FLUSHED) && !CheckState(MODIFIED) && !CheckState(FREE) && GetReader()
!= null) {
refersTo = GetReader().ReadObject(this);
}
return refersTo;
}
else {
PdfObject currentRefersTo = GetRefersTo(false);
for (int i = 0; i < LENGTH_OF_INDIRECTS_CHAIN; i++) {
if (currentRefersTo is iText.Kernel.Pdf.PdfIndirectReference) {
currentRefersTo = ((iText.Kernel.Pdf.PdfIndirectReference)currentRefersTo).GetRefersTo(false);
}
else {
break;
}
}
return currentRefersTo;
}
}
一般每一頁的新對象,其 refersTo == null园欣,需要通過 GetReader().ReadObject(this) 從整個文檔中找到真正定義帖世。但是在出錯的一頁,refersTo 已經(jīng)不為空沸枯,指向一個 {10 0 R Flushed;}日矫。我們再跟蹤一下,對第一頁的對象绑榴,GetReader().ReadObject(this) 是怎么處理的
PdfReader::ReadObject
private PdfObject ReadObject(PdfIndirectReference reference, bool fixXref) {
if (reference == null) {
return null;
}
if (reference.refersTo != null) {
return reference.refersTo;
}
try {
currentIndirectReference = reference;
if (reference.GetObjStreamNumber() > 0) {
PdfStream objectStream = (PdfStream)pdfDocument.GetXref().Get(reference.GetObjStreamNumber()).GetRefersTo(
false);
ReadObjectStream(objectStream);
return reference.refersTo;
}
else {
if (reference.GetOffset() > 0) {
PdfObject @object;
try {
tokens.Seek(reference.GetOffset());
tokens.NextValidToken();
if (tokens.GetTokenType() != PdfTokenizer.TokenType.Obj || tokens.GetObjNr() != reference.GetObjNumber() ||
tokens.GetGenNr() != reference.GetGenNumber()) {
tokens.ThrowError(KernelExceptionMessageConstant.INVALID_OFFSET_FOR_THIS_OBJECT, reference.ToString());
}
@object = ReadObject(false);
}
會根據(jù)對象的offset哪轿,從原始文檔中讀出對象。正好第一頁中要讀的一個對象就是10翔怎,它讀出來的狀態(tài)是{10 0 R}窃诉,是沒有Flushed在后面的!那什么時候變成flushed的呢赤套?繼續(xù)跟蹤程序飘痛,發(fā)現(xiàn)是在每一頁處理結(jié)束后,程序都會調(diào)用一次 page.Flush(true);容握,對象10就變成了 {10 0 R Flushed;}宣脉。而 Flush 函數(shù)的說明是
Flushes page dictionary, its content streams, annotations and thumb image. If <c>flushResourcesContentStreams</c> is true, all content streams that are rendered on this page (like FormXObjects, annotation appearance streams, patterns) and also all images associated with this page will also be flushed.
這說明了,由于這個圖片同時被第1頁和第3頁使用了唯沮,而因為調(diào)用Flush的時候脖旱,后面的參數(shù)加了true,因此處理完第一頁的時候被寫出去了介蛉。繼續(xù)跟蹤一下,為什么Flush之后溶褪,stream對象的map變空了
PdfPage類
private void FlushResourcesContentStreams(PdfDictionary resources) {
if (resources != null && !resources.IsFlushed()) {
FlushWithResources(resources.GetAsDictionary(PdfName.XObject));
FlushWithResources(resources.GetAsDictionary(PdfName.Pattern));
FlushWithResources(resources.GetAsDictionary(PdfName.Shading));
}
}
private void FlushWithResources(PdfDictionary objsCollection) {
if (objsCollection == null || objsCollection.IsFlushed()) {
return;
}
foreach (PdfObject obj in objsCollection.Values()) {
if (obj.IsFlushed()) {
continue;
}
FlushResourcesContentStreams(((PdfDictionary)obj).GetAsDictionary(PdfName.Resources));
FlushMustBeIndirectObject(obj);
}
}
private void FlushMustBeIndirectObject(PdfObject obj) {
// TODO DEVSIX-744
obj.MakeIndirect(GetDocument()).Flush();
}
PdfObject類
public void Flush(bool canBeInObjStm) {
try {
PdfDocument document = GetIndirectReference().GetDocument();
if (document != null) {
if (document.IsAppendMode() && !IsModified()) {
ILogger logger = ITextLogManager.GetLogger(typeof(PdfObject));
logger.LogInformation(iText.IO.Logs.IoLogMessageConstant.PDF_OBJECT_FLUSHING_NOT_PERFORMED);
return;
}
document.CheckIsoConformance(this, IsoKey.PDF_OBJECT);
document.FlushObject(this, canBeInObjStm && GetObjectType() != STREAM && GetObjectType() != INDIRECT_REFERENCE
&& GetIndirectReference().GetGenNumber() == 0);
}
}
PdfWriter類
protected internal virtual void FlushObject(PdfObject pdfObject, bool canBeInObjStm) {
switch (pdfObject.GetObjectType()) {
case PdfObject.STREAM:
case PdfObject.DICTIONARY: {
PdfDictionary dictionary = ((PdfDictionary)pdfObject);
MarkDictionaryContentToFlush(dictionary);
dictionary.ReleaseContent();
break;
}
PdfDictionary類
protected internal virtual void ReleaseContent() {
map = null;
}