diff --git a/src/main/java/com/mindee/image/ExtractedImage.java b/src/main/java/com/mindee/image/ExtractedImage.java index 7b3d0fef9..cae0e9d79 100644 --- a/src/main/java/com/mindee/image/ExtractedImage.java +++ b/src/main/java/com/mindee/image/ExtractedImage.java @@ -4,6 +4,7 @@ import java.awt.image.BufferedImage; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import javax.imageio.ImageIO; @@ -37,7 +38,7 @@ public ExtractedImage(BufferedImage image, String filename, String saveFormat, i * Write the image to a file. * Uses the default image format and filename. * - * @param outputPath the output directory (must exist). + * @param outputPath the output path, it may be a file or a directory. * @throws IOException Throws if the file can't be accessed. */ public void writeToFile(String outputPath) throws IOException { @@ -48,12 +49,14 @@ public void writeToFile(String outputPath) throws IOException { * Write the image to a file. * Uses the default image format and filename. * - * @param outputPath the output directory (must exist). + * @param outputPath the output path, it may be a file or a directory. * @throws IOException Throws if the file can't be accessed. */ public void writeToFile(Path outputPath) throws IOException { - var imagePath = outputPath.resolve(this.filename); - var outputfile = imagePath.toFile(); + if (Files.isDirectory(outputPath)) { + outputPath = outputPath.resolve(this.filename); + } + var outputfile = outputPath.toFile(); ImageIO.write(this.image, this.saveFormat, outputfile); } diff --git a/src/main/java/com/mindee/image/ExtractedImages.java b/src/main/java/com/mindee/image/ExtractedImages.java index 2da54231f..af0ea272b 100644 --- a/src/main/java/com/mindee/image/ExtractedImages.java +++ b/src/main/java/com/mindee/image/ExtractedImages.java @@ -10,8 +10,8 @@ public void saveAllToDisk(String outputPath) throws IOException { } public void saveAllToDisk(Path outputPath) throws IOException { - for (ExtractedImage image : this) { - image.writeToFile(outputPath); + for (ExtractedImage extractedImage : this) { + extractedImage.writeToFile(outputPath); } } } diff --git a/src/main/java/com/mindee/pdf/ExtractedPDF.java b/src/main/java/com/mindee/pdf/ExtractedPDF.java index 8b9c7c256..5280a8f68 100644 --- a/src/main/java/com/mindee/pdf/ExtractedPDF.java +++ b/src/main/java/com/mindee/pdf/ExtractedPDF.java @@ -3,6 +3,7 @@ import com.mindee.input.LocalInputSource; import java.io.IOException; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import lombok.Getter; @@ -26,14 +27,26 @@ public ExtractedPDF(byte[] fileBytes, String filename) { } /** - * Write the PDF to a file. + * Write the extracted PDF to a file. * - * @param outputPath the output directory (must exist). + * @param outputPath the output path, it may be a file or a directory. + * @throws IOException Throws if the file can't be accessed. + */ + public void writeToFile(Path outputPath) throws IOException { + if (Files.isDirectory(outputPath)) { + outputPath = outputPath.resolve(this.filename); + } + Files.write(outputPath, this.fileBytes); + } + + /** + * Write the extracted PDF to a file. + * + * @param outputPath the output path, it may be a file or a directory. * @throws IOException Throws if the file can't be accessed. */ public void writeToFile(String outputPath) throws IOException { - var pdfPath = Paths.get(outputPath, this.filename); - Files.write(pdfPath, this.fileBytes); + writeToFile(Paths.get(outputPath)); } /** diff --git a/src/main/java/com/mindee/pdf/ExtractedPDFs.java b/src/main/java/com/mindee/pdf/ExtractedPDFs.java index fb36467d1..743c94fed 100644 --- a/src/main/java/com/mindee/pdf/ExtractedPDFs.java +++ b/src/main/java/com/mindee/pdf/ExtractedPDFs.java @@ -1,6 +1,17 @@ package com.mindee.pdf; +import java.io.IOException; +import java.nio.file.Path; import java.util.ArrayList; public class ExtractedPDFs extends ArrayList { + public void saveAllToDisk(String outputPath) throws IOException { + saveAllToDisk(Path.of(outputPath)); + } + + public void saveAllToDisk(Path outputPath) throws IOException { + for (ExtractedPDF extractedPDF : this) { + extractedPDF.writeToFile(outputPath); + } + } } diff --git a/src/main/java/com/mindee/v2/fileoperations/Crop.java b/src/main/java/com/mindee/v2/fileoperations/Crop.java index ffe4db44a..11885e743 100644 --- a/src/main/java/com/mindee/v2/fileoperations/Crop.java +++ b/src/main/java/com/mindee/v2/fileoperations/Crop.java @@ -15,12 +15,12 @@ public Crop(LocalInputSource inputSource) throws IOException { this.imageExtractor = new ImageExtractor(inputSource); } - public ExtractedImage extractSingle(CropItem cropItem) throws IOException { + public ExtractedImage extractSingleCrop(CropItem cropItem) throws IOException { return this.imageExtractor .extractImage(cropItem.getLocation(), cropItem.getLocation().getPage(), 0); } - public ExtractedImages extractMultiple(List cropItems) { + public ExtractedImages extractMultipleCrops(List cropItems) { var extractedImages = new ExtractedImages(); for (int i = 0; i < cropItems.size(); i++) { var cropItem = cropItems.get(i); diff --git a/src/main/java/com/mindee/v2/fileoperations/Split.java b/src/main/java/com/mindee/v2/fileoperations/Split.java index 367717ff1..321e9f09f 100644 --- a/src/main/java/com/mindee/v2/fileoperations/Split.java +++ b/src/main/java/com/mindee/v2/fileoperations/Split.java @@ -16,11 +16,11 @@ public Split(LocalInputSource inputSource) throws IOException { this.pdfSplitter = new BasePDFExtractor(inputSource); } - public ExtractedPDF extractSingle(SplitRange splitRange) throws IOException { + public ExtractedPDF extractSingleSplit(SplitRange splitRange) throws IOException { return this.pdfSplitter.extractSinglePage(splitRange.getPageRangeDistinct(), true); } - public ExtractedPDFs extractMultiple(ArrayList splitRanges) throws IOException { + public ExtractedPDFs extractMultipleSplits(ArrayList splitRanges) throws IOException { return this.pdfSplitter .extractSubDocuments( splitRanges.stream().map(SplitRange::getPageRangeDistinct).collect(Collectors.toList()) diff --git a/src/main/java/com/mindee/v2/product/crop/CropItem.java b/src/main/java/com/mindee/v2/product/crop/CropItem.java index fa9e100a4..6c1aab815 100644 --- a/src/main/java/com/mindee/v2/product/crop/CropItem.java +++ b/src/main/java/com/mindee/v2/product/crop/CropItem.java @@ -2,8 +2,12 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +import com.mindee.image.ExtractedImage; +import com.mindee.input.LocalInputSource; +import com.mindee.v2.fileoperations.Crop; import com.mindee.v2.parsing.inference.field.FieldLocation; import com.mindee.v2.product.extraction.ExtractionResponse; +import java.io.IOException; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -40,4 +44,9 @@ public class CropItem { public String toString() { return "* :Location: " + location + "\n :Object Type: " + objectType; } + + public ExtractedImage extractFromInputSource(LocalInputSource inputSource) throws IOException { + var cropper = new Crop(inputSource); + return cropper.extractSingleCrop(this); + } } diff --git a/src/main/java/com/mindee/v2/product/crop/CropResult.java b/src/main/java/com/mindee/v2/product/crop/CropResult.java index 121e3f882..594b60062 100644 --- a/src/main/java/com/mindee/v2/product/crop/CropResult.java +++ b/src/main/java/com/mindee/v2/product/crop/CropResult.java @@ -2,6 +2,10 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +import com.mindee.image.ExtractedImages; +import com.mindee.input.LocalInputSource; +import com.mindee.v2.fileoperations.Crop; +import java.io.IOException; import java.util.ArrayList; import java.util.StringJoiner; import lombok.AllArgsConstructor; @@ -24,6 +28,15 @@ public final class CropResult { @JsonProperty("crops") private ArrayList crops; + /** + * Based on the crop results, extract the documents into individual files as an + * {@link ExtractedImages} instance. + */ + public ExtractedImages extractFromInputSource(LocalInputSource inputSource) throws IOException { + var cropper = new Crop(inputSource); + return cropper.extractMultipleCrops(this.crops); + } + @Override public String toString() { var joiner = new StringJoiner("\n"); diff --git a/src/main/java/com/mindee/v2/product/split/SplitRange.java b/src/main/java/com/mindee/v2/product/split/SplitRange.java index 5b7775a9d..8685c79d0 100644 --- a/src/main/java/com/mindee/v2/product/split/SplitRange.java +++ b/src/main/java/com/mindee/v2/product/split/SplitRange.java @@ -2,7 +2,12 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +import com.mindee.image.ExtractedImages; +import com.mindee.input.LocalInputSource; +import com.mindee.pdf.ExtractedPDF; +import com.mindee.v2.fileoperations.Split; import com.mindee.v2.product.extraction.ExtractionResponse; +import java.io.IOException; import java.util.ArrayList; import java.util.LinkedHashSet; import java.util.List; @@ -45,4 +50,13 @@ public class SplitRange { public List getPageRangeDistinct() { return new ArrayList<>(new LinkedHashSet<>(this.pageRange)); } + + /** + * Based on the crop results, extract the documents into individual files as an + * {@link ExtractedImages} instance. + */ + public ExtractedPDF extractFromInputSource(LocalInputSource inputSource) throws IOException { + var splitter = new Split(inputSource); + return splitter.extractSingleSplit(this); + } } diff --git a/src/main/java/com/mindee/v2/product/split/SplitResult.java b/src/main/java/com/mindee/v2/product/split/SplitResult.java index 97c1e65c2..10543a93e 100644 --- a/src/main/java/com/mindee/v2/product/split/SplitResult.java +++ b/src/main/java/com/mindee/v2/product/split/SplitResult.java @@ -2,6 +2,10 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +import com.mindee.input.LocalInputSource; +import com.mindee.pdf.ExtractedPDFs; +import com.mindee.v2.fileoperations.Split; +import java.io.IOException; import java.util.ArrayList; import java.util.StringJoiner; import lombok.AllArgsConstructor; @@ -24,6 +28,15 @@ public final class SplitResult { @JsonProperty("splits") private ArrayList splits; + /** + * Based on the split results, extract the documents into individual files as an + * {@link ExtractedPDFs} instance. + */ + public ExtractedPDFs extractFromInputSource(LocalInputSource inputSource) throws IOException { + var splitter = new Split(inputSource); + return splitter.extractMultipleSplits(this.splits); + } + @Override public String toString() { var joiner = new StringJoiner("\n"); diff --git a/src/test/java/com/mindee/v2/fileoperations/CropTest.java b/src/test/java/com/mindee/v2/fileoperations/CropTest.java index 11f788406..3a515f191 100644 --- a/src/test/java/com/mindee/v2/fileoperations/CropTest.java +++ b/src/test/java/com/mindee/v2/fileoperations/CropTest.java @@ -19,7 +19,7 @@ void singlePageSingleCrop_cropsCorrectly() throws Exception { var doc = localResponse.deserializeResponse(CropResponse.class); var extractedCrop = new Crop(inputSample) - .extractSingle(doc.getInference().getResult().getCrops().get(0)); + .extractSingleCrop(doc.getInference().getResult().getCrops().get(0)); assertEquals(0, extractedCrop.getPageId()); assertEquals("default_sample_000.jpg", extractedCrop.getFilename()); @@ -35,7 +35,7 @@ void singlePageMultiCrop_cropsCorrectly() throws Exception { var doc = localResponse.deserializeResponse(CropResponse.class); var extractedCrops = new Crop(inputSample) - .extractMultiple(doc.getInference().getResult().getCrops()); + .extractMultipleCrops(doc.getInference().getResult().getCrops()); assertEquals(2, extractedCrops.size()); @@ -59,7 +59,7 @@ void multiPageMultiCrop_cropsCorrectly() throws Exception { var doc = localResponse.deserializeResponse(CropResponse.class); var extractedCrops = new Crop(inputSample) - .extractMultiple(doc.getInference().getResult().getCrops()); + .extractMultipleCrops(doc.getInference().getResult().getCrops()); assertEquals(5, extractedCrops.size()); diff --git a/src/test/java/com/mindee/v2/fileoperations/SplitTest.java b/src/test/java/com/mindee/v2/fileoperations/SplitTest.java index abb43f432..b229c0955 100644 --- a/src/test/java/com/mindee/v2/fileoperations/SplitTest.java +++ b/src/test/java/com/mindee/v2/fileoperations/SplitTest.java @@ -18,7 +18,7 @@ void singlePage_splitsCorrectly() throws IOException { var doc = localResponse.deserializeResponse(SplitResponse.class); var extractedSplit = new Split(inputSample) - .extractSingle(doc.getInference().getResult().getSplits().get(0)); + .extractSingleSplit(doc.getInference().getResult().getSplits().get(0)); assertEquals("default_sample_000-000.pdf", extractedSplit.getFilename()); var asInputSource = extractedSplit.asInputSource(); @@ -33,7 +33,7 @@ void multiplePages_splitsCorrectly() throws IOException { var doc = localResponse.deserializeResponse(SplitResponse.class); var extractedSplits = new Split(inputSample) - .extractMultiple(doc.getInference().getResult().getSplits()); + .extractMultipleSplits(doc.getInference().getResult().getSplits()); assertEquals(2, extractedSplits.size()); diff --git a/src/test/java/com/mindee/v2/product/CropTest.java b/src/test/java/com/mindee/v2/product/CropTest.java index 1a93448a4..8b022afa4 100644 --- a/src/test/java/com/mindee/v2/product/CropTest.java +++ b/src/test/java/com/mindee/v2/product/CropTest.java @@ -1,14 +1,18 @@ package com.mindee.v2.product; import static com.mindee.TestingUtilities.assertStringEqualsFile; +import static com.mindee.TestingUtilities.getResourcePath; import static com.mindee.TestingUtilities.getV2ResourcePath; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import com.mindee.input.LocalInputSource; +import com.mindee.v2.fileoperations.Crop; import com.mindee.v2.parsing.LocalResponse; import com.mindee.v2.product.crop.CropResponse; import com.mindee.v2.product.extraction.ExtractionResponse; import java.io.IOException; +import java.nio.file.Files; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -123,5 +127,56 @@ void extractionMustHaveValidProperties() throws IOException { .getValue() ); } + + @Test + @DisplayName("extract all crops works") + void extractMultipleCrops() throws IOException { + var inputSource = new LocalInputSource(getV2ResourcePath("products/crop/default_sample.jpg")); + + CropResponse response = loadResponse("products/crop/default_sample_extraction.json"); + assertNotNull(response.getInference()); + + var crops = response.getInference().getResult().getCrops(); + + var cropper = new Crop(inputSource); + var classExtract = cropper.extractMultipleCrops(crops); + + assertNotNull(classExtract); + assertEquals(crops.size(), classExtract.size()); + + var methodExtract = response.getInference().getResult().extractFromInputSource(inputSource); + assertEquals(classExtract.size(), methodExtract.size()); + + var outputPath = getResourcePath("output"); + classExtract.saveAllToDisk(outputPath.toString()); + + assert Files.exists(outputPath.resolve("default_sample_001.jpg")); + assert Files.size(outputPath.resolve("default_sample_001.jpg")) >= 1500; + + assert Files.exists(outputPath.resolve("default_sample_002.jpg")); + assert Files.size(outputPath.resolve("default_sample_002.jpg")) >= 1500; + } + + @Test + @DisplayName("extract single crop works") + void extractSingleCrop() throws IOException { + var inputSource = new LocalInputSource(getV2ResourcePath("products/crop/default_sample.jpg")); + + CropResponse response = loadResponse("products/crop/default_sample_extraction.json"); + assertNotNull(response.getInference()); + + var extractedCrop = response + .getInference() + .getResult() + .getCrops() + .get(0) + .extractFromInputSource(inputSource); + + var outputPath = getResourcePath("output"); + extractedCrop.writeToFile(outputPath.resolve("default_sample_999.jpg")); + + assert Files.exists(outputPath.resolve("default_sample_999.jpg")); + assert Files.size(outputPath.resolve("default_sample_999.jpg")) >= 1500; + } } } diff --git a/src/test/java/com/mindee/v2/product/SplitTest.java b/src/test/java/com/mindee/v2/product/SplitTest.java index eae296def..73b9c121e 100644 --- a/src/test/java/com/mindee/v2/product/SplitTest.java +++ b/src/test/java/com/mindee/v2/product/SplitTest.java @@ -1,14 +1,18 @@ package com.mindee.v2.product; +import static com.mindee.TestingUtilities.getResourcePath; import static com.mindee.TestingUtilities.getV2ResourcePath; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import com.mindee.input.LocalInputSource; +import com.mindee.v2.fileoperations.Split; import com.mindee.v2.parsing.LocalResponse; import com.mindee.v2.product.extraction.ExtractionResponse; import com.mindee.v2.product.split.SplitRange; import com.mindee.v2.product.split.SplitResponse; import java.io.IOException; +import java.nio.file.Files; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -101,5 +105,60 @@ void extractionMustHaveValidProperties() throws IOException { .getValue() ); } + + @Test + @DisplayName("extract all crops works") + void extractMultipleSplits() throws IOException { + var inputSource = new LocalInputSource( + getV2ResourcePath("products/split/default_sample.pdf") + ); + + SplitResponse response = loadResponse("products/split/default_sample_extraction.json"); + assertNotNull(response.getInference()); + + var splits = response.getInference().getResult().getSplits(); + + var splitter = new Split(inputSource); + var classExtract = splitter.extractMultipleSplits(splits); + + assertNotNull(classExtract); + assertEquals(splits.size(), classExtract.size()); + + var methodExtract = response.getInference().getResult().extractFromInputSource(inputSource); + assertEquals(classExtract.size(), methodExtract.size()); + + var outputPath = getResourcePath("output"); + classExtract.saveAllToDisk(outputPath.toString()); + + assert Files.exists(outputPath.resolve("default_sample_000-000.pdf")); + assert Files.size(outputPath.resolve("default_sample_000-000.pdf")) >= 1500; + + assert Files.exists(outputPath.resolve("default_sample_001-001.pdf")); + assert Files.size(outputPath.resolve("default_sample_001-001.pdf")) >= 1500; + } + + @Test + @DisplayName("extract single crop works") + void extractSingleSplit() throws IOException { + var inputSource = new LocalInputSource( + getV2ResourcePath("products/split/default_sample.pdf") + ); + + SplitResponse response = loadResponse("products/split/default_sample_extraction.json"); + assertNotNull(response.getInference()); + + var extractedSplit = response + .getInference() + .getResult() + .getSplits() + .get(0) + .extractFromInputSource(inputSource); + + var outputPath = getResourcePath("output"); + extractedSplit.writeToFile(outputPath.resolve("default_sample_999.pdf")); + + assert Files.exists(outputPath.resolve("default_sample_999.pdf")); + assert Files.size(outputPath.resolve("default_sample_999.pdf")) >= 1500; + } } }