'how to copy highlighted text from pdf file
i am using itextsharp library for developing c# application to merge all annotations comments from two different PDF file in another PDF file please help me thanks in advance i have tried code i have used this code i am able to find highlighted text but not in proper formatting.
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace PdfFileApp
{
public class pdftotext
{
public static void ReadAnnotation()
{
int pageTo = 0;
try
{
using (iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader("D:\\DEMO_Supp_First Proof.pdf"))
{
pageTo = reader.NumberOfPages;
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfDictionary page = reader.GetPageN(i);
PdfArray annots = page.GetAsArray(iTextSharp.text.pdf.PdfName.ANNOTS);
if (annots != null)
foreach (PdfObject annot in annots.ArrayList)
{
PdfDictionary annotationDic = (PdfDictionary)iTextSharp.text.pdf.PdfReader.GetPdfObject(annot);
PdfDictionary pdfDictionary = annots.GetAsDict(i);
PdfName subType = (PdfName)annotationDic.Get(PdfName.SUBTYPE);
var author = pdfDictionary.GetAsString(PdfName.T);
if (subType.Equals(PdfName.HIGHLIGHT))
{
PdfArray coordinates = annotationDic.GetAsArray(PdfName.RECT);
iTextSharp.text.Rectangle rect = new iTextSharp.text.Rectangle(float.Parse(coordinates.ArrayList[0].ToString(), CultureInfo.InvariantCulture.NumberFormat), float.Parse(coordinates.ArrayList[1].ToString(), CultureInfo.InvariantCulture.NumberFormat),
float.Parse(coordinates.ArrayList[2].ToString(), CultureInfo.InvariantCulture.NumberFormat), float.Parse(coordinates.ArrayList[3].ToString(), CultureInfo.InvariantCulture.NumberFormat));
RenderFilter[] filter = { new RegionTextRenderFilter(rect) };
ITextExtractionStrategy strategy;
StringBuilder sb = new StringBuilder();
strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
sb.AppendLine(PdfTextExtractor.GetTextFromPage(reader, i, strategy));
Console.WriteLine(sb.ToString());
Console.ReadLine();
var annotatedWord = sb.Replace(System.Environment.NewLine, string.Empty);
}
}
}
}
}
catch (Exception ex)
{
string error = ex.Message;
}
}
}
}
Solution 1:[1]
Install Aspose.pdf Library from Nuget packacge and use below code.
string highlightedText = "";
var document = new Aspose.Pdf.Document(@"Path");
Aspose.Pdf.Facades.PdfAnnotationEditor annotationEditor = new Aspose.Pdf.Facades.PdfAnnotationEditor();
annotationEditor.BindPdf(document);
// Extract annotations
var annotationTypes = new[] { Aspose.Pdf.Annotations.AnnotationType.FreeText, Aspose.Pdf.Annotations.AnnotationType.Highlight };
var annotations = annotationEditor.ExtractAnnotations(1, 2, annotationTypes);
foreach (var annotation in annotations)
{
var extractAnnotation = (Aspose.Pdf.Annotations.HighlightAnnotation)annotation;
highlightedText += extractAnnotation.GetMarkedText();
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Ali Khalid |
