import java.io.File; import java.io.IOException; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.PDFTextStripperByArea; class mytext { public static void main(String[] args) { try { PDDocument document = null; document = PDDocument.load(new File("H:\\test.pdf")); document.getClass(); if (!document.isEncrypted()) { PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition(true); PDFTextStripper Tstripper = new PDFTextStripper(); String st = Tstripper.getText(document); System.out.println("Text:" + st); } } catch (Exception e) { e.printStackTrace(); } } }
PDF Box API can be downloaded from :
https://pdfbox.apache.org/index.html
its an open source.
Also available in my google drive version 2.0.8 - https://drive.google.com/open?id=1S_7Jq-D2FG1ebe04pOywdYy7YxxbTUFr