http://www.codeproject.com/KB/string/pdf2text.aspx
http://www.codeproject.com/KB/cpp/ExtractPDFText.aspx?fid=47947&df=90&mpp=25&noise=3&sort=Position&view=Quick&fr=51&select=1192500
http://www.tallcomponents.com/
http://www.codeproject.com/showcase/TallComponents.asp
it requires that you have the full version of Adobe installed on your PC so that you can gain access to the Adobe APIs (which doesn't technically qualify as a free way to do it). Here is the code I used to read the contents of a PDF. You will have to add a reference to the Adobe APIs in your project:
Dim objPDFPage As AcroPDPage |
Dim objPDFDoc As New AcroPDDoc |
Dim objPDFAVDoc As AcroAVDoc |
Dim objAcroApp As AcroApp |
Dim objPDFRectTemp As Object |
Dim objPDFRect As New AcroRect |
Dim lngTextRangeCount As Long |
Dim objPDFTextSelection As AcroPDTextSelect |
Dim temptextcount As Long |
Dim strText As String |
Dim lngPageCount As Long |
Dim Fora As Long |
objPDFDoc.Open(tbdocdisplaypath.Text) |
lngPageCount = objPDFDoc.GetNumPages |
For Fora = 0 To lngPageCount - 1 |
objPDFPage = objPDFDoc.AcquirePage(Fora) |
objPDFRectTemp = objPDFPage.GetSize |
objPDFRect.Left = 0 |
objPDFRect.right = objPDFRectTemp.x |
objPDFRect.Top = objPDFRectTemp.y |
objPDFRect.bottom = 0 |
' objPDFTextSelection = objPDFDoc.CreateTextSelect(lngPageCount, objPDFRect) |
objPDFTextSelection = objPDFDoc.CreateTextSelect(Fora, objPDFRect) |
' Get The Text Of The Range |
temptextcount = objPDFTextSelection.GetNumText |
For lngTextRangeCount = 1 To objPDFTextSelection.GetNumText |
doctextdoctext = doctext & objPDFTextSelection.GetText(lngTextRangeCount - 1) |
Next |
doctextdoctext = doctext & vbCrLf |
Next |
doctype = "PDF" |
objPDFDoc.Close() |
No comments:
Post a Comment