<!-- Optional: For advanced diff visualization --> <dependency> <groupId>com.github.difflib</groupId> <artifactId>difflib</artifactId> <version>1.3.0</version> </dependency> </dependencies> name: PDF Comparison on: pull_request: paths: - '**/*.pdf' workflow_dispatch: inputs: pdf1: description: 'First PDF file path' required: true pdf2: description: 'Second PDF file path' required: true
// Method 3: Image-based comparison (requires PDF to image conversion) public static ComparisonResult compareByImages(String pdfPath1, String pdfPath2) throws IOException // Convert PDF pages to images first List<BufferedImage> images1 = convertPDFToImages(pdfPath1); List<BufferedImage> images2 = convertPDFToImages(pdfPath2); ComparisonResult result = new ComparisonResult(); result.setImagesIdentical(compareImages(images1, images2)); return result; java by comparison pdf github
private static String extractTextFromPDF(String pdfPath) throws IOException try (PDDocument document = PDDocument.load(new File(pdfPath))) PDFTextStripper stripper = new PDFTextStripper(); return stripper.getText(document); images1 = convertPDFToImages(pdfPath1)
public static class PageDifference private int pageNumber; private String text1; private String text2; public PageDifference(int pageNumber, String text1, String text2) this.pageNumber = pageNumber; this.text1 = text1; this.text2 = text2; // Getters public int getPageNumber() return pageNumber; public String getText1() return text1; public String getText2() return text2; images2 = convertPDFToImages(pdfPath2)