'how to copy highlighted text from pdf file

i am using itextsharp library for developing c# application to merge all annotations comments from two different PDF file in another PDF file please help me thanks in advance i have tried code i have used this code i am able to find highlighted text but not in proper formatting.

using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace PdfFileApp
{
public class pdftotext
 {
     public static void ReadAnnotation()
     {
         int pageTo = 0;
         try
         {
             using (iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader("D:\\DEMO_Supp_First Proof.pdf"))
              {
                 pageTo = reader.NumberOfPages;
                 for (int i = 1; i <= reader.NumberOfPages; i++)
                 {
                     PdfDictionary page = reader.GetPageN(i);
                     PdfArray annots = page.GetAsArray(iTextSharp.text.pdf.PdfName.ANNOTS);
                      if (annots != null)
                         foreach (PdfObject annot in annots.ArrayList)
                         {
                            PdfDictionary annotationDic = (PdfDictionary)iTextSharp.text.pdf.PdfReader.GetPdfObject(annot);
                            PdfDictionary pdfDictionary = annots.GetAsDict(i);
                            PdfName subType = (PdfName)annotationDic.Get(PdfName.SUBTYPE);

                            var author = pdfDictionary.GetAsString(PdfName.T);
                            if (subType.Equals(PdfName.HIGHLIGHT))
                            {
                                PdfArray coordinates = annotationDic.GetAsArray(PdfName.RECT);

                                iTextSharp.text.Rectangle rect = new iTextSharp.text.Rectangle(float.Parse(coordinates.ArrayList[0].ToString(), CultureInfo.InvariantCulture.NumberFormat), float.Parse(coordinates.ArrayList[1].ToString(), CultureInfo.InvariantCulture.NumberFormat),
                                 float.Parse(coordinates.ArrayList[2].ToString(), CultureInfo.InvariantCulture.NumberFormat), float.Parse(coordinates.ArrayList[3].ToString(), CultureInfo.InvariantCulture.NumberFormat));

                                RenderFilter[] filter = { new RegionTextRenderFilter(rect) };
                                ITextExtractionStrategy strategy;
                                StringBuilder sb = new StringBuilder();

                                strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
                                sb.AppendLine(PdfTextExtractor.GetTextFromPage(reader, i, strategy));
                                Console.WriteLine(sb.ToString());
                                Console.ReadLine();
                                var annotatedWord = sb.Replace(System.Environment.NewLine, string.Empty);
                            }
                        }
                }
            }
        }
        catch (Exception ex)
        {
            string error = ex.Message;
        }
    }
}
}


Solution 1:[1]

Install Aspose.pdf Library from Nuget packacge and use below code.

      string highlightedText = "";

        var document = new Aspose.Pdf.Document(@"Path");
    
        Aspose.Pdf.Facades.PdfAnnotationEditor annotationEditor = new Aspose.Pdf.Facades.PdfAnnotationEditor();
        annotationEditor.BindPdf(document);
        // Extract annotations
        var annotationTypes = new[] { Aspose.Pdf.Annotations.AnnotationType.FreeText, Aspose.Pdf.Annotations.AnnotationType.Highlight };
        var annotations = annotationEditor.ExtractAnnotations(1, 2, annotationTypes);
        foreach (var annotation in annotations)
        {
            var extractAnnotation = (Aspose.Pdf.Annotations.HighlightAnnotation)annotation;
            highlightedText += extractAnnotation.GetMarkedText();

        }

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Ali Khalid