using System; using System.Collections.Generic; using System.Linq; using System.Text; using iTextSharp.text.pdf; using System.IO; using System.Drawing; using Disco.BI.DocumentTemplateBI.Importer; using Disco.BI.DocumentTemplateBI; using System.Drawing.Drawing2D; using com.google.zxing; using com.google.zxing.multi.qrcode; using Disco.Data.Repository; using System.Web.Caching; using Disco.BI.Extensions; using Disco.Models.Repository; using System.Collections; using com.google.zxing.common; using BitMiracle.LibTiff.Classic; namespace Disco.BI.Interop.Pdf { public static class PdfImporter { private class DetectImageResult : IDisposable { public Result Result { get; set; } public Point ResultOffset { get; set; } public double ResultScale { get; set; } public void Dispose() { // Do Nothing; yet... } } private class DetectPageResult : IDisposable { public int PageNumber { get; set; } public DocumentUniqueIdentifier DetectedIdentifier { get; set; } public Disco.BI.Extensions.UtilityExtensions.ImageMontage ThumbnailImage { get; set; } public MemoryStream AttachmentThumbnailImage { get; set; } public Disco.BI.Extensions.UtilityExtensions.ImageMontage UndetectedPageImage { get; set; } public void DrawThumbnailImageResult(DetectImageResult Result, Image DetectedImage) { if (Result.Result.ResultPoints.Length == 4) { // Draw Square on Thumbnail using (Graphics thumbnailGraphics = Graphics.FromImage(ThumbnailImage.Montage)) { var thumbnailOffset = ThumbnailImage.MontageSourceImageOffsets[DetectedImage]; var linePoints = Result.Result.ResultPoints.Select(p => new Point((int)(thumbnailOffset + ((Result.ResultOffset.X + p.X) * Result.ResultScale * ThumbnailImage.MontageScale)), (int)((p.Y + Result.ResultOffset.Y) * Result.ResultScale * ThumbnailImage.MontageScale))).ToArray(); using (GraphicsPath graphicsPath = new GraphicsPath()) { for (int linePointIndex = 0; linePointIndex < (linePoints.Length - 1); linePointIndex++) graphicsPath.AddLine(linePoints[linePointIndex], linePoints[linePointIndex + 1]); graphicsPath.AddLine(linePoints[linePoints.Length - 1], linePoints[0]); using (SolidBrush graphicsBrush = new SolidBrush(Color.FromArgb(128, 255, 0, 0))) thumbnailGraphics.FillPath(graphicsBrush, graphicsPath); using (Pen graphicsPen = new Pen(Color.FromArgb(200, 255, 0, 0), 2)) thumbnailGraphics.DrawPath(graphicsPen, graphicsPath); } } } } public void Dispose() { if (ThumbnailImage != null) { ThumbnailImage.Dispose(); ThumbnailImage = null; } if (AttachmentThumbnailImage != null) { AttachmentThumbnailImage.Dispose(); AttachmentThumbnailImage = null; } if (UndetectedPageImage != null) { UndetectedPageImage.Dispose(); UndetectedPageImage = null; } } } private static DetectImageResult DetectImage(DiscoDataContext dbContext, Bitmap pageImageOriginal, string SessionId, IEnumerable detectDocumentTemplates) { Bitmap pageImage = pageImageOriginal; double pageImageModifiedScale = 1; try { // Resize if Resolution > 80; Set to 72 Dpi if (pageImage.HorizontalResolution > 80 || pageImage.VerticalResolution > 80) { pageImageModifiedScale = pageImage.HorizontalResolution / 72; int newWidth = (int)((72 / pageImage.HorizontalResolution) * pageImage.Width); int newHeight = (int)((72 / pageImage.VerticalResolution) * pageImage.Height); pageImage = pageImage.ResizeImage(newWidth, newHeight); } Result zxingResult = default(Result); Point zxingResultOffset = Point.Empty; QRCodeMultiReader zxingMfr = new QRCodeMultiReader(); Hashtable zxingMfrHints = new Hashtable(); zxingMfrHints.Add(DecodeHintType.TRY_HARDER, true); // Look in 'Known' locations foreach (DocumentTemplate dt in detectDocumentTemplates) { var locationBag = dt.QRCodeLocations(dbContext); foreach (var location in locationBag) { System.Drawing.Rectangle region = new Rectangle( (int)(pageImage.Width * location.Left), (int)(pageImage.Width * location.Top), (int)(pageImage.Width * location.Width), (int)(pageImage.Height * location.Height)); RGBLuminanceSource zxingSource; using (Bitmap pageImageRegion = new Bitmap(region.Width, region.Height)) { using (Graphics pageImageRegionGraphics = Graphics.FromImage(pageImageRegion)) { pageImageRegionGraphics.DrawImage(pageImage, 0, 0, region, GraphicsUnit.Pixel); } zxingSource = new RGBLuminanceSource(pageImageRegion, region.Width, region.Height); } var zxingHB = new HybridBinarizer(zxingSource); var zxingBB = new BinaryBitmap(zxingHB); try { zxingResult = zxingMfr.decode(zxingBB, zxingMfrHints); zxingResultOffset = region.Location; break; } catch (ReaderException) { // Ignore Location Errors } } if (zxingResult != null) break; } if (zxingResult == null) { // Not found with 'known' locations // Try whole image var zxingSource = new RGBLuminanceSource(pageImage, pageImage.Width, pageImage.Height); var zxingHB = new HybridBinarizer(zxingSource); var zxingBB = new BinaryBitmap(zxingHB); try { zxingResult = zxingMfr.decode(zxingBB, zxingMfrHints); } catch (ReaderException) { // Ignore Errors } } if (zxingResult != null) return new DetectImageResult() { Result = zxingResult, ResultOffset = zxingResultOffset, ResultScale = pageImageModifiedScale }; else return null; } catch (Exception ex) { throw ex; } finally { if (pageImageOriginal != pageImage) pageImage.Dispose(); } } private static DetectPageResult DetectPage(DiscoDataContext dbContext, PdfReader pdfReader, int PageNumber, string SessionId, string DataStoreSessionCacheLocation, IEnumerable detectDocumentTemplates) { DetectPageResult result = new DetectPageResult() { PageNumber = PageNumber }; DocumentImporterLog.LogImportPageProgress(SessionId, PageNumber, 10, "Loading Page Images"); using (DisposableImageCollection pageImages = pdfReader.PdfPageImages(PageNumber)) { if (pageImages.Count > 0) { result.ThumbnailImage = pageImages.BuildImageMontage(256, 256); var pageThumbnailFilename = Path.Combine(DataStoreSessionCacheLocation, string.Format("{0}-{1}", SessionId, PageNumber)); result.ThumbnailImage.Montage.SavePng(pageThumbnailFilename); DocumentImporterLog.LogImportPageImageUpdate(SessionId, PageNumber); double pageProgressInterval = 90 / pageImages.Count; foreach (var pageImageOriginal in pageImages) { DocumentImporterLog.LogImportPageProgress(SessionId, PageNumber, (int)(10 + (pageProgressInterval * pageImages.IndexOf(pageImageOriginal))), String.Format("Processing Page Image {0} of {1}", pageImages.IndexOf(pageImageOriginal) + 1, pageImages.Count)); using (var zxingResult = DetectImage(dbContext, pageImageOriginal, SessionId, detectDocumentTemplates)) { if (zxingResult != null) { if (DocumentUniqueIdentifier.IsDocumentUniqueIdentifier(zxingResult.Result.Text)) { result.DrawThumbnailImageResult(zxingResult, pageImageOriginal); result.ThumbnailImage.Montage.SavePng(pageThumbnailFilename); DocumentImporterLog.LogImportPageImageUpdate(SessionId, PageNumber); result.AttachmentThumbnailImage = new MemoryStream(); using (var attachmentThumbImage = pageImages.BuildImageMontage(48, 48, true)) { using (Image mimeTypeIcon = Disco.Properties.Resources.MimeType_pdf16) attachmentThumbImage.Montage.EmbedIconOverlay(mimeTypeIcon); attachmentThumbImage.Montage.SaveJpg(95, result.AttachmentThumbnailImage); } result.DetectedIdentifier = new DocumentUniqueIdentifier(zxingResult.Result.Text, PageNumber.ToString()); return result; } } } } // Page Unassigned result.UndetectedPageImage = pageImages.BuildImageMontage(700, 700); } return result; } } public static bool ProcessPdfAttachment(string Filename, DiscoDataContext dbContext, string SessionId, Cache HttpCache) { var dataStoreUnassignedLocation = DataStore.CreateLocation(dbContext, "DocumentDropBox_Unassigned"); DocumentImporterLog.LogImportProgress(SessionId, 0, "Reading File"); using (FileStream fs = new FileStream(Filename, FileMode.Open, FileAccess.ReadWrite, FileShare.None)) { var pdfReader = new PdfReader(fs); var pdfPagesAssigned = new Dictionary>(); var dataStoreSessionPagesCacheLocation = DataStore.CreateLocation(dbContext, "Cache\\DocumentDropBox_SessionPages"); var detectDocumentTemplates = dbContext.DocumentTemplates.ToArray(); double progressInterval = 70 / pdfReader.NumberOfPages; for (int PageNumber = 1; PageNumber <= pdfReader.NumberOfPages; PageNumber++) { DocumentImporterLog.LogImportProgress(SessionId, (int)(PageNumber * progressInterval), string.Format("Processing Page {0} of {1}", PageNumber, pdfReader.NumberOfPages)); DocumentImporterLog.LogImportPageStarting(SessionId, PageNumber); using (var pageResult = DetectPage(dbContext, pdfReader, PageNumber, SessionId, dataStoreSessionPagesCacheLocation, detectDocumentTemplates)) { if (pageResult.DetectedIdentifier != null) { var docId = pageResult.DetectedIdentifier; pdfPagesAssigned.Add(PageNumber, new Tuple(docId, pageResult.AttachmentThumbnailImage.ToArray())); docId.LoadComponents(dbContext); DocumentImporterLog.LogImportPageDetected(SessionId, PageNumber, docId.DocumentUniqueId, docId.DocumentTemplate.Description, docId.DocumentTemplate.Scope, docId.DataId, docId.DataDescription); } else { // Undetected Page - Write Preview-Images while still in Memory DocumentImporterLog.LogImportPageUndetected(SessionId, PageNumber); // Thumbnail: string unassignedImageThumbnailFilename = Path.Combine(dataStoreUnassignedLocation, string.Format("{0}_{1}_thumbnail.png", SessionId, PageNumber)); pageResult.ThumbnailImage.Montage.SavePng(unassignedImageThumbnailFilename); // Large Preview string unassignedImageFilename = Path.Combine(dataStoreUnassignedLocation, string.Format("{0}_{1}.jpg", SessionId, PageNumber)); pageResult.UndetectedPageImage.Montage.SaveJpg(90, unassignedImageFilename); } } } // Write out Assigned Documents var assignedDocuments = pdfPagesAssigned.GroupBy(u => u.Value.Item1.DocumentUniqueId).ToList(); if (assignedDocuments.Count > 0) { progressInterval = 20 / assignedDocuments.Count; foreach (var documentPortion in assignedDocuments) { DocumentImporterLog.LogImportProgress(SessionId, (int)(70 + (assignedDocuments.IndexOf(documentPortion) * progressInterval)), string.Format("Importing Documents {0} of {1}", assignedDocuments.IndexOf(documentPortion) + 1, assignedDocuments.Count)); var documentPortionInfo = documentPortion.First().Value; var documentPortionIdentifier = documentPortionInfo.Item1; var documentPortionThumbnail = documentPortionInfo.Item2; if (!documentPortionIdentifier.LoadComponents(dbContext)) { // Unknown Document Unique Id foreach (var dp in documentPortion) { var tag = int.Parse(dp.Value.Item1.Tag); if (pdfPagesAssigned.ContainsKey(tag)) pdfPagesAssigned.Remove(tag); } } else { using (MemoryStream msBuilder = new MemoryStream()) { var pdfDoc = new iTextSharp.text.Document(); var pdfCopy = new PdfCopy(pdfDoc, msBuilder); pdfDoc.Open(); pdfCopy.CloseStream = false; foreach (var dp in documentPortion.OrderBy(dg => dg.Value.Item1.Page)) { var pageSize = pdfReader.GetPageSizeWithRotation(dp.Key); var page = pdfCopy.GetImportedPage(pdfReader, dp.Key); pdfDoc.SetPageSize(pageSize); pdfDoc.NewPage(); pdfCopy.AddPage(page); } pdfDoc.Close(); pdfCopy.Close(); msBuilder.Position = 0; var attachmentSuccess = documentPortionIdentifier.ImportPdfAttachment(dbContext, msBuilder, documentPortionThumbnail); if (!attachmentSuccess) { // Unable to add Attachment foreach (var dp in documentPortion) { var tag = int.Parse(dp.Value.Item1.Tag); if (pdfPagesAssigned.ContainsKey(tag)) pdfPagesAssigned.Remove(tag); } } } } } } // Write out Unassigned Pages List pdfPagesUnassigned = new List(); for (int PageNumber = 1; PageNumber <= pdfReader.NumberOfPages; PageNumber++) if (!pdfPagesAssigned.ContainsKey(PageNumber)) pdfPagesUnassigned.Add(PageNumber); if (pdfPagesUnassigned.Count > 0) { progressInterval = 10 / pdfPagesUnassigned.Count; //dataStoreUnassignedLocation foreach (var PageNumber in pdfPagesUnassigned) { DocumentImporterLog.LogImportProgress(SessionId, (int)(90 + (pdfPagesUnassigned.IndexOf(PageNumber) * progressInterval)), string.Format("Processing Undetected Documents {0} of {1}", pdfPagesUnassigned.IndexOf(PageNumber) + 1, pdfPagesUnassigned.Count)); using (MemoryStream msBuilder = new MemoryStream()) { var pdfDoc = new iTextSharp.text.Document(); var pdfCopy = new PdfCopy(pdfDoc, msBuilder); pdfDoc.Open(); pdfCopy.CloseStream = false; var pageSize = pdfReader.GetPageSizeWithRotation(PageNumber); var page = pdfCopy.GetImportedPage(pdfReader, PageNumber); pdfDoc.SetPageSize(pageSize); pdfDoc.NewPage(); pdfCopy.AddPage(page); pdfDoc.Close(); pdfCopy.Close(); File.WriteAllBytes(Path.Combine(dataStoreUnassignedLocation, string.Format("{0}_{1}.pdf", SessionId, PageNumber)), msBuilder.ToArray()); DocumentImporterLog.LogImportPageUndetectedStored(SessionId, PageNumber); } } } } DocumentImporterLog.LogImportProgress(SessionId, 100, "Finished Importing Document"); return true; } public static bool ProcessPdfAttachment(string Filename, DiscoDataContext dbContext, string DocumentTemplateId, string DataId, string UserId, DateTime Timestamp) { using (FileStream fs = new FileStream(Filename, FileMode.Open, FileAccess.ReadWrite, FileShare.None)) { DocumentUniqueIdentifier identifier = new DocumentUniqueIdentifier(DocumentTemplateId, DataId, UserId, Timestamp); identifier.LoadComponents(dbContext); return identifier.ImportPdfAttachment(dbContext, fs, null); } } public static DisposableImageCollection GetPageImages(PdfReader pdfReader, int PageNumber) { var pageImages = new DisposableImageCollection(); var pdfPage = pdfReader.GetPageN(PageNumber); PdfDictionary pdfPageResouces = (PdfDictionary)((PdfDictionary)pdfPage.GetDirectObject(PdfName.RESOURCES)).GetDirectObject(PdfName.XOBJECT); foreach (var pdfResKey in pdfPageResouces.Keys) { var pdfRes = pdfPageResouces.GetDirectObject(pdfResKey); if (pdfRes.IsStream()) { var pdfResStream = (PdfStream)pdfRes; var pdfResSubType = pdfResStream.Get(PdfName.SUBTYPE); if (pdfResSubType != null && pdfResSubType == PdfName.IMAGE) { if (pdfResStream.Get(PdfName.FILTER) == PdfName.CCITTFAXDECODE) { // TIFF // Try Using GDI+ for TIFF... var width = ((PdfNumber)(pdfResStream.Get(PdfName.WIDTH))).IntValue; var height = ((PdfNumber)(pdfResStream.Get(PdfName.HEIGHT))).IntValue; var bpc = ((PdfNumber)(pdfResStream.Get(PdfName.BITSPERCOMPONENT))).IntValue; var compressionMethod = Compression.CCITTFAX3; var decodeParams = pdfResStream.GetAsDict(PdfName.DECODEPARMS); if (decodeParams != null && decodeParams.Contains(PdfName.K) && decodeParams.GetAsNumber(PdfName.K).IntValue < 0) compressionMethod = Compression.CCITTFAX4; using (MemoryStream tiffStream = PdfToTiffStream(PdfReader.GetStreamBytesRaw((PRStream)pdfResStream), width, height, bpc, compressionMethod)) { pageImages.Add((Bitmap)Bitmap.FromStream(tiffStream)); } continue; } if (pdfResStream.Get(PdfName.FILTER) == PdfName.DCTDECODE) { // JPG using (MemoryStream jpgStream = new MemoryStream(PdfReader.GetStreamBytesRaw((PRStream)pdfResStream))) { pageImages.Add((Bitmap)Bitmap.FromStream(jpgStream, true, true)); } continue; } } } } return pageImages; } private static MemoryStream PdfToTiffStream(byte[] PdfStream, int Width, int Height, int BitsPerComponent, Compression CompressionMethod) { var ms = new MemoryStream(); Tiff tif = Tiff.ClientOpen("in-memory", "w", ms, new TiffStream()); tif.SetField(TiffTag.IMAGEWIDTH, Width); tif.SetField(TiffTag.IMAGELENGTH, Height); tif.SetField(TiffTag.COMPRESSION, CompressionMethod); tif.SetField(TiffTag.BITSPERSAMPLE, BitsPerComponent); tif.SetField(TiffTag.SAMPLESPERPIXEL, 1); tif.WriteRawStrip(0, PdfStream, PdfStream.Length); tif.Flush(); return ms; } } }