http://www.codeproject.com/KB/string/pdf2text.aspx
http://www.codeproject.com/KB/cpp/ExtractPDFText.aspx?fid=47947&df=90&mpp=25&noise=3&sort=Position&view=Quick&fr=51&select=1192500
http://www.tallcomponents.com/
http://www.codeproject.com/showcase/TallComponents.asp
similar threadit requires that you have the full version of Adobe installed on your PC so that you can gain access to the Adobe APIs (which doesn't technically qualify as a free way to do it). Here is the code I used to read the contents of a PDF. You will have to add a reference to the Adobe APIs in your project:
| Dim objPDFPage As AcroPDPage |
| Dim objPDFDoc As New AcroPDDoc |
| Dim objPDFAVDoc As AcroAVDoc |
| Dim objAcroApp As AcroApp |
| Dim objPDFRectTemp As Object |
| Dim objPDFRect As New AcroRect |
| Dim lngTextRangeCount As Long |
| Dim objPDFTextSelection As AcroPDTextSelect |
| Dim temptextcount As Long |
| Dim strText As String |
| Dim lngPageCount As Long |
| Dim Fora As Long |
| objPDFDoc.Open(tbdocdisplaypath.Text) |
| lngPageCount = objPDFDoc.GetNumPages |
| For Fora = 0 To lngPageCount - 1 |
| objPDFPage = objPDFDoc.AcquirePage(Fora) |
| objPDFRectTemp = objPDFPage.GetSize |
| objPDFRect.Left = 0 |
| objPDFRect.right = objPDFRectTemp.x |
| objPDFRect.Top = objPDFRectTemp.y |
| objPDFRect.bottom = 0 |
| ' objPDFTextSelection = objPDFDoc.CreateTextSelect(lngPageCount, objPDFRect) |
| objPDFTextSelection = objPDFDoc.CreateTextSelect(Fora, objPDFRect) |
| ' Get The Text Of The Range |
| temptextcount = objPDFTextSelection.GetNumText |
| For lngTextRangeCount = 1 To objPDFTextSelection.GetNumText |
| doctextdoctext = doctext & objPDFTextSelection.GetText(lngTextRangeCount - 1) |
| Next |
| doctextdoctext = doctext & vbCrLf |
| Next |
| doctype = "PDF" |
| objPDFDoc.Close() |


No comments:
Post a Comment