http://www.codeproject.com/KB/string/pdf2text.aspx
http://www.codeproject.com/KB/cpp/ExtractPDFText.aspx?fid=47947&df=90&mpp=25&noise=3&sort=Position&view=Quick&fr=51&select=1192500
http://www.tallcomponents.com/
http://www.codeproject.com/showcase/TallComponents.asp
it requires that you have the full version of Adobe installed on your PC so that you can gain access to the Adobe APIs (which doesn't technically qualify as a free way to do it). Here is the code I used to read the contents of a PDF. You will have to add a reference to the Adobe APIs in your project:
| Dim objPDFPage As AcroPDPage |
| Dim objPDFDoc As New AcroPDDoc |
| Dim objPDFAVDoc As AcroAVDoc |
| Dim objAcroApp As AcroApp |
| Dim objPDFRectTemp As Object |
| Dim objPDFRect As New AcroRect |
| Dim lngTextRangeCount As Long |
| Dim objPDFTextSelection As AcroPDTextSelect |
| Dim temptextcount As Long |
| Dim strText As String |
| Dim lngPageCount As Long |
| Dim Fora As Long |
| objPDFDoc.Open(tbdocdisplaypath.Text) |
| lngPageCount = objPDFDoc.GetNumPages |
| For Fora = 0 To lngPageCount - 1 |
| objPDFPage = objPDFDoc.AcquirePage(Fora) |
| objPDFRectTemp = objPDFPage.GetSize |
| objPDFRect.Left = 0 |
| objPDFRect.right = objPDFRectTemp.x |
| objPDFRect.Top = objPDFRectTemp.y |
| objPDFRect.bottom = 0 |
| ' objPDFTextSelection = objPDFDoc.CreateTextSelect(lngPageCount, objPDFRect) |
| objPDFTextSelection = objPDFDoc.CreateTextSelect(Fora, objPDFRect) |
| ' Get The Text Of The Range |
| temptextcount = objPDFTextSelection.GetNumText |
| For lngTextRangeCount = 1 To objPDFTextSelection.GetNumText |
| doctextdoctext = doctext & objPDFTextSelection.GetText(lngTextRangeCount - 1) |
| Next |
| doctextdoctext = doctext & vbCrLf |
| Next |
| doctype = "PDF" |
| objPDFDoc.Close() |

No comments:
Post a Comment