218 lines
6.4 KiB
C#
218 lines
6.4 KiB
C#
using iTextSharp.text.pdf;
|
|
using iTextSharp.text.pdf.parser;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
|
|
|
|
namespace PDFGenerator.Verifier
|
|
{
|
|
public class Verifier : IDisposable
|
|
{
|
|
private List<String> result;
|
|
private List<String> input;
|
|
|
|
private ReaderWriterLock rwl = new ReaderWriterLock();
|
|
private ReaderWriterLock rwl2 = new ReaderWriterLock();
|
|
private List<String> Result // the Number property
|
|
{
|
|
get
|
|
{
|
|
//Acquire a read lock on the resource.
|
|
rwl.AcquireReaderLock(Timeout.Infinite);
|
|
try
|
|
{
|
|
Thread.Sleep(50);
|
|
}
|
|
finally
|
|
{
|
|
rwl.ReleaseReaderLock();
|
|
}
|
|
return result;
|
|
}
|
|
set
|
|
{
|
|
//Acquire a write lock on the resource.
|
|
rwl.AcquireWriterLock(Timeout.Infinite);
|
|
try
|
|
{
|
|
Thread.Sleep(50);
|
|
result = value;
|
|
}
|
|
finally
|
|
{
|
|
//Release the lock.
|
|
rwl.ReleaseWriterLock();
|
|
}
|
|
}
|
|
}
|
|
|
|
private List<String> Input // the Number property
|
|
{
|
|
get
|
|
{
|
|
//Acquire a read lock on the resource.
|
|
rwl2.AcquireReaderLock(Timeout.Infinite);
|
|
try
|
|
{
|
|
Thread.Sleep(50);
|
|
}
|
|
finally
|
|
{
|
|
rwl2.ReleaseReaderLock();
|
|
}
|
|
return input;
|
|
}
|
|
set
|
|
{
|
|
//Acquire a write lock on the resource.
|
|
rwl2.AcquireWriterLock(Timeout.Infinite);
|
|
try
|
|
{
|
|
Thread.Sleep(50);
|
|
input = value;
|
|
}
|
|
finally
|
|
{
|
|
//Release the lock.
|
|
rwl2.ReleaseWriterLock();
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// get list of invalid pdf files
|
|
/// </summary>
|
|
/// <param name="folder">folder to verify</param>
|
|
/// <param name="threads">number of threads to use</param>
|
|
/// <returns></returns>
|
|
public List<String> GetInvalidFiles(string folder, int threads)
|
|
{
|
|
result = new List<string>();
|
|
|
|
input = new List<string>();
|
|
input.AddRange(Directory.GetFiles(folder, "*.pdf"));
|
|
|
|
Thread[] threadArray = new Thread[threads];
|
|
int threadNum;
|
|
|
|
ThreadStart myThreadStart = new ThreadStart(CheckFile);
|
|
|
|
for (threadNum = 0; threadNum < threads; threadNum++)
|
|
{
|
|
threadArray[threadNum] = new Thread(myThreadStart);
|
|
}
|
|
|
|
//Start the threads.
|
|
for (threadNum = 0; threadNum < threads; threadNum++)
|
|
{
|
|
threadArray[threadNum].Start();
|
|
}
|
|
|
|
//Wait until all the thread spawn out finish.
|
|
for (threadNum = 0; threadNum < threads; threadNum++)
|
|
threadArray[threadNum].Join();
|
|
|
|
|
|
return Result;
|
|
}
|
|
|
|
private void CheckFile()
|
|
{
|
|
while (Input.Count > 0)
|
|
{
|
|
string file = Extensions.RemoveAndGet(Input);
|
|
if (file != string.Empty && File.Exists(file) && !IsFileValid(file))
|
|
Result.Add(file);
|
|
}
|
|
}
|
|
|
|
private bool IsFileValid(string file)
|
|
{
|
|
bool result = true;
|
|
PdfReader reader = null;
|
|
try
|
|
{
|
|
reader = new PdfReader(file);
|
|
}
|
|
catch
|
|
{
|
|
Console.WriteLine(String.Concat("1 Error on: ", file));
|
|
return false;
|
|
}
|
|
PdfReaderContentParser parser;
|
|
MyImageRenderListener listener;
|
|
try
|
|
{
|
|
parser = new PdfReaderContentParser(reader);
|
|
listener = new MyImageRenderListener();
|
|
}
|
|
catch
|
|
{
|
|
Console.WriteLine(String.Concat("2 Error on: ", file));
|
|
return false;
|
|
}
|
|
for (int i = 1; i <= reader.NumberOfPages; i++)
|
|
{
|
|
try
|
|
{
|
|
parser.ProcessContent(i, listener);
|
|
}
|
|
catch
|
|
{
|
|
Console.WriteLine(String.Concat("3 Error on: ", file));
|
|
result = false;
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
private static PdfObject FindImageInPDFDictionary(PdfDictionary pg)
|
|
{
|
|
PdfDictionary res =
|
|
(PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
|
|
|
|
PdfDictionary xobj =
|
|
(PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
|
|
|
|
if (xobj != null)
|
|
{
|
|
foreach (PdfName name in xobj.Keys)
|
|
{
|
|
PdfObject obj = xobj.Get(name);
|
|
if (obj.IsIndirect())
|
|
{
|
|
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
|
|
PdfName type =
|
|
(PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
|
|
//image at the root of the pdf
|
|
if (PdfName.IMAGE.Equals(type))
|
|
{
|
|
return obj;
|
|
}// image inside a form
|
|
else if (PdfName.FORM.Equals(type))
|
|
{
|
|
return FindImageInPDFDictionary(tg);
|
|
} //image inside a group
|
|
else if (PdfName.GROUP.Equals(type))
|
|
{
|
|
return FindImageInPDFDictionary(tg);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
|
|
}
|
|
}
|
|
}
|