OCR PDF in C#

Beginning with GdPicture V10, you don’t need to render pages in your PDF documents to images, run OCR functions on them, and then create new PDF documents with text data. Instead, you can process OCR on any PDF document and the data will be added as invisible text on the page.

This example details how it works:

// We assume GdPicture has been correctly installed and unlocked.
GdPicturePDF oGdPicturePDF = new GdPicturePDF();
// Loading an input document.
GdPictureStatus status = oGdPicturePDF.LoadFromFile("input.pdf", false);
// Checking if loading has been successful.
if (status == GdPictureStatus.OK)
{
    int pageCount = oGdPicturePDF.GetPageCount();
    // Loop through pages.
    for (int i = 1; i <= pageCount; i++)
    {
        // Selecting a page.
        oGdPicturePDF.SelectPage(i);
        if (oGdPicturePDF.OcrPage("eng", "C:\\GdPicture.NET 14\\Redist\\OCR", "", 200) != GdPictureStatus.OK)
        {
            MessageBox.Show("Error occurred on the page " + i.ToString() + ". Error: " + oGdPicturePDF.GetStat().ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
        }
    }
    // Saving to a different file.
    status = oGdPicturePDF.SaveToFile("output.pdf", true);
    if (status == GdPictureStatus.OK)
        MessageBox.Show("Done!", "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
    else
        MessageBox.Show("The document can't be saved." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    // Closing and releasing resources.
    oGdPicturePDF.CloseDocument();
}
else
{
    MessageBox.Show("The document can't be opened." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
oGdPicturePDF.Dispose();
'We assume GdPicture has been correctly installed and unlocked.
Dim oGdPicturePDF As New GdPicturePDF()
'Loading an input document.
Dim status As GdPictureStatus = oGdPicturePDF.LoadFromFile("input.pdf", False)
'Checking if loading has been successful.
If status = GdPictureStatus.OK Then
    Dim pageCount As Integer = oGdPicturePDF.GetPageCount()
    'Loop through pages.
    For i As Integer = 1 To pageCount
        'Selecting a page.
        oGdPicturePDF.SelectPage(i)
        If oGdPicturePDF.OcrPage("eng", "C:\GdPicture.NET 14\Redist\OCR", "", 200) <> GdPictureStatus.OK Then
            MessageBox.Show("Error occurred on the page " + i.ToString() + ". Error: " + oGdPicturePDF.GetStat().ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
    Next
    'Saving to a different file.
    status = oGdPicturePDF.SaveToFile("output.pdf", True)
    If status = GdPictureStatus.OK Then
        MessageBox.Show("Done!", "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
    Else
        MessageBox.Show("The document can't be saved." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    End If
    'Closing and releasing resources.
    oGdPicturePDF.CloseDocument()
Else
    MessageBox.Show("The document can't be opened." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
End If
oGdPicturePDF.Dispose()