2025-04-15 12:10:19 +02:00

218 lines
6.4 KiB
C#

using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace PDFGenerator.Verifier
{
public class Verifier : IDisposable
{
private List<String> result;
private List<String> input;
private ReaderWriterLock rwl = new ReaderWriterLock();
private ReaderWriterLock rwl2 = new ReaderWriterLock();
private List<String> Result // the Number property
{
get
{
//Acquire a read lock on the resource.
rwl.AcquireReaderLock(Timeout.Infinite);
try
{
Thread.Sleep(50);
}
finally
{
rwl.ReleaseReaderLock();
}
return result;
}
set
{
//Acquire a write lock on the resource.
rwl.AcquireWriterLock(Timeout.Infinite);
try
{
Thread.Sleep(50);
result = value;
}
finally
{
//Release the lock.
rwl.ReleaseWriterLock();
}
}
}
private List<String> Input // the Number property
{
get
{
//Acquire a read lock on the resource.
rwl2.AcquireReaderLock(Timeout.Infinite);
try
{
Thread.Sleep(50);
}
finally
{
rwl2.ReleaseReaderLock();
}
return input;
}
set
{
//Acquire a write lock on the resource.
rwl2.AcquireWriterLock(Timeout.Infinite);
try
{
Thread.Sleep(50);
input = value;
}
finally
{
//Release the lock.
rwl2.ReleaseWriterLock();
}
}
}
/// <summary>
/// get list of invalid pdf files
/// </summary>
/// <param name="folder">folder to verify</param>
/// <param name="threads">number of threads to use</param>
/// <returns></returns>
public List<String> GetInvalidFiles(string folder, int threads)
{
result = new List<string>();
input = new List<string>();
input.AddRange(Directory.GetFiles(folder, "*.pdf"));
Thread[] threadArray = new Thread[threads];
int threadNum;
ThreadStart myThreadStart = new ThreadStart(CheckFile);
for (threadNum = 0; threadNum < threads; threadNum++)
{
threadArray[threadNum] = new Thread(myThreadStart);
}
//Start the threads.
for (threadNum = 0; threadNum < threads; threadNum++)
{
threadArray[threadNum].Start();
}
//Wait until all the thread spawn out finish.
for (threadNum = 0; threadNum < threads; threadNum++)
threadArray[threadNum].Join();
return Result;
}
private void CheckFile()
{
while (Input.Count > 0)
{
string file = Extensions.RemoveAndGet(Input);
if (file != string.Empty && File.Exists(file) && !IsFileValid(file))
Result.Add(file);
}
}
private bool IsFileValid(string file)
{
bool result = true;
PdfReader reader = null;
try
{
reader = new PdfReader(file);
}
catch
{
Console.WriteLine(String.Concat("1 Error on: ", file));
return false;
}
PdfReaderContentParser parser;
MyImageRenderListener listener;
try
{
parser = new PdfReaderContentParser(reader);
listener = new MyImageRenderListener();
}
catch
{
Console.WriteLine(String.Concat("2 Error on: ", file));
return false;
}
for (int i = 1; i <= reader.NumberOfPages; i++)
{
try
{
parser.ProcessContent(i, listener);
}
catch
{
Console.WriteLine(String.Concat("3 Error on: ", file));
result = false;
break;
}
}
return result;
}
private static PdfObject FindImageInPDFDictionary(PdfDictionary pg)
{
PdfDictionary res =
(PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj =
(PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
if (xobj != null)
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
PdfName type =
(PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
//image at the root of the pdf
if (PdfName.IMAGE.Equals(type))
{
return obj;
}// image inside a form
else if (PdfName.FORM.Equals(type))
{
return FindImageInPDFDictionary(tg);
} //image inside a group
else if (PdfName.GROUP.Equals(type))
{
return FindImageInPDFDictionary(tg);
}
}
}
}
return null;
}
public void Dispose()
{
}
}
}