Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions src/main/java/com/mindee/image/ExtractedImage.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import javax.imageio.ImageIO;
Expand Down Expand Up @@ -37,7 +38,7 @@ public ExtractedImage(BufferedImage image, String filename, String saveFormat, i
* Write the image to a file.
* Uses the default image format and filename.
*
* @param outputPath the output directory (must exist).
* @param outputPath the output path, it may be a file or a directory.
* @throws IOException Throws if the file can't be accessed.
*/
public void writeToFile(String outputPath) throws IOException {
Expand All @@ -48,12 +49,14 @@ public void writeToFile(String outputPath) throws IOException {
* Write the image to a file.
* Uses the default image format and filename.
*
* @param outputPath the output directory (must exist).
* @param outputPath the output path, it may be a file or a directory.
* @throws IOException Throws if the file can't be accessed.
*/
public void writeToFile(Path outputPath) throws IOException {
var imagePath = outputPath.resolve(this.filename);
var outputfile = imagePath.toFile();
if (Files.isDirectory(outputPath)) {
outputPath = outputPath.resolve(this.filename);
}
var outputfile = outputPath.toFile();
ImageIO.write(this.image, this.saveFormat, outputfile);
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/mindee/image/ExtractedImages.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ public void saveAllToDisk(String outputPath) throws IOException {
}

public void saveAllToDisk(Path outputPath) throws IOException {
for (ExtractedImage image : this) {
image.writeToFile(outputPath);
for (ExtractedImage extractedImage : this) {
extractedImage.writeToFile(outputPath);
}
}
}
21 changes: 17 additions & 4 deletions src/main/java/com/mindee/pdf/ExtractedPDF.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.mindee.input.LocalInputSource;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import lombok.Getter;

Expand All @@ -26,14 +27,26 @@ public ExtractedPDF(byte[] fileBytes, String filename) {
}

/**
* Write the PDF to a file.
* Write the extracted PDF to a file.
*
* @param outputPath the output directory (must exist).
* @param outputPath the output path, it may be a file or a directory.
* @throws IOException Throws if the file can't be accessed.
*/
public void writeToFile(Path outputPath) throws IOException {
if (Files.isDirectory(outputPath)) {
outputPath = outputPath.resolve(this.filename);
}
Files.write(outputPath, this.fileBytes);
}

/**
* Write the extracted PDF to a file.
*
* @param outputPath the output path, it may be a file or a directory.
* @throws IOException Throws if the file can't be accessed.
*/
public void writeToFile(String outputPath) throws IOException {
var pdfPath = Paths.get(outputPath, this.filename);
Files.write(pdfPath, this.fileBytes);
writeToFile(Paths.get(outputPath));
}

/**
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/com/mindee/pdf/ExtractedPDFs.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
package com.mindee.pdf;

import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;

public class ExtractedPDFs extends ArrayList<ExtractedPDF> {
public void saveAllToDisk(String outputPath) throws IOException {
saveAllToDisk(Path.of(outputPath));
}

public void saveAllToDisk(Path outputPath) throws IOException {
for (ExtractedPDF extractedPDF : this) {
extractedPDF.writeToFile(outputPath);
}
}
}
4 changes: 2 additions & 2 deletions src/main/java/com/mindee/v2/fileoperations/Crop.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ public Crop(LocalInputSource inputSource) throws IOException {
this.imageExtractor = new ImageExtractor(inputSource);
}

public ExtractedImage extractSingle(CropItem cropItem) throws IOException {
public ExtractedImage extractSingleCrop(CropItem cropItem) throws IOException {
return this.imageExtractor
.extractImage(cropItem.getLocation(), cropItem.getLocation().getPage(), 0);
}

public ExtractedImages extractMultiple(List<CropItem> cropItems) {
public ExtractedImages extractMultipleCrops(List<CropItem> cropItems) {
var extractedImages = new ExtractedImages();
for (int i = 0; i < cropItems.size(); i++) {
var cropItem = cropItems.get(i);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/mindee/v2/fileoperations/Split.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ public Split(LocalInputSource inputSource) throws IOException {
this.pdfSplitter = new BasePDFExtractor(inputSource);
}

public ExtractedPDF extractSingle(SplitRange splitRange) throws IOException {
public ExtractedPDF extractSingleSplit(SplitRange splitRange) throws IOException {
return this.pdfSplitter.extractSinglePage(splitRange.getPageRangeDistinct(), true);
}

public ExtractedPDFs extractMultiple(ArrayList<SplitRange> splitRanges) throws IOException {
public ExtractedPDFs extractMultipleSplits(ArrayList<SplitRange> splitRanges) throws IOException {
return this.pdfSplitter
.extractSubDocuments(
splitRanges.stream().map(SplitRange::getPageRangeDistinct).collect(Collectors.toList())
Expand Down
9 changes: 9 additions & 0 deletions src/main/java/com/mindee/v2/product/crop/CropItem.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.mindee.image.ExtractedImage;
import com.mindee.input.LocalInputSource;
import com.mindee.v2.fileoperations.Crop;
import com.mindee.v2.parsing.inference.field.FieldLocation;
import com.mindee.v2.product.extraction.ExtractionResponse;
import java.io.IOException;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
Expand Down Expand Up @@ -40,4 +44,9 @@ public class CropItem {
public String toString() {
return "* :Location: " + location + "\n :Object Type: " + objectType;
}

public ExtractedImage extractFromInputSource(LocalInputSource inputSource) throws IOException {
var cropper = new Crop(inputSource);
return cropper.extractSingleCrop(this);
}
}
13 changes: 13 additions & 0 deletions src/main/java/com/mindee/v2/product/crop/CropResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.mindee.image.ExtractedImages;
import com.mindee.input.LocalInputSource;
import com.mindee.v2.fileoperations.Crop;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringJoiner;
import lombok.AllArgsConstructor;
Expand All @@ -24,6 +28,15 @@ public final class CropResult {
@JsonProperty("crops")
private ArrayList<CropItem> crops;

/**
* Based on the crop results, extract the documents into individual files as an
* {@link ExtractedImages} instance.
*/
public ExtractedImages extractFromInputSource(LocalInputSource inputSource) throws IOException {
var cropper = new Crop(inputSource);
return cropper.extractMultipleCrops(this.crops);
}

@Override
public String toString() {
var joiner = new StringJoiner("\n");
Expand Down
14 changes: 14 additions & 0 deletions src/main/java/com/mindee/v2/product/split/SplitRange.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.mindee.image.ExtractedImages;
import com.mindee.input.LocalInputSource;
import com.mindee.pdf.ExtractedPDF;
import com.mindee.v2.fileoperations.Split;
import com.mindee.v2.product.extraction.ExtractionResponse;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
Expand Down Expand Up @@ -45,4 +50,13 @@ public class SplitRange {
public List<Integer> getPageRangeDistinct() {
return new ArrayList<>(new LinkedHashSet<>(this.pageRange));
}

/**
* Based on the crop results, extract the documents into individual files as an
* {@link ExtractedImages} instance.
*/
public ExtractedPDF extractFromInputSource(LocalInputSource inputSource) throws IOException {
var splitter = new Split(inputSource);
return splitter.extractSingleSplit(this);
}
}
13 changes: 13 additions & 0 deletions src/main/java/com/mindee/v2/product/split/SplitResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.mindee.input.LocalInputSource;
import com.mindee.pdf.ExtractedPDFs;
import com.mindee.v2.fileoperations.Split;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringJoiner;
import lombok.AllArgsConstructor;
Expand All @@ -24,6 +28,15 @@ public final class SplitResult {
@JsonProperty("splits")
private ArrayList<SplitRange> splits;

/**
* Based on the split results, extract the documents into individual files as an
* {@link ExtractedPDFs} instance.
*/
public ExtractedPDFs extractFromInputSource(LocalInputSource inputSource) throws IOException {
var splitter = new Split(inputSource);
return splitter.extractMultipleSplits(this.splits);
}

@Override
public String toString() {
var joiner = new StringJoiner("\n");
Expand Down
6 changes: 3 additions & 3 deletions src/test/java/com/mindee/v2/fileoperations/CropTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ void singlePageSingleCrop_cropsCorrectly() throws Exception {
var doc = localResponse.deserializeResponse(CropResponse.class);

var extractedCrop = new Crop(inputSample)
.extractSingle(doc.getInference().getResult().getCrops().get(0));
.extractSingleCrop(doc.getInference().getResult().getCrops().get(0));

assertEquals(0, extractedCrop.getPageId());
assertEquals("default_sample_000.jpg", extractedCrop.getFilename());
Expand All @@ -35,7 +35,7 @@ void singlePageMultiCrop_cropsCorrectly() throws Exception {
var doc = localResponse.deserializeResponse(CropResponse.class);

var extractedCrops = new Crop(inputSample)
.extractMultiple(doc.getInference().getResult().getCrops());
.extractMultipleCrops(doc.getInference().getResult().getCrops());

assertEquals(2, extractedCrops.size());

Expand All @@ -59,7 +59,7 @@ void multiPageMultiCrop_cropsCorrectly() throws Exception {
var doc = localResponse.deserializeResponse(CropResponse.class);

var extractedCrops = new Crop(inputSample)
.extractMultiple(doc.getInference().getResult().getCrops());
.extractMultipleCrops(doc.getInference().getResult().getCrops());

assertEquals(5, extractedCrops.size());

Expand Down
4 changes: 2 additions & 2 deletions src/test/java/com/mindee/v2/fileoperations/SplitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ void singlePage_splitsCorrectly() throws IOException {
var doc = localResponse.deserializeResponse(SplitResponse.class);

var extractedSplit = new Split(inputSample)
.extractSingle(doc.getInference().getResult().getSplits().get(0));
.extractSingleSplit(doc.getInference().getResult().getSplits().get(0));

assertEquals("default_sample_000-000.pdf", extractedSplit.getFilename());
var asInputSource = extractedSplit.asInputSource();
Expand All @@ -33,7 +33,7 @@ void multiplePages_splitsCorrectly() throws IOException {
var doc = localResponse.deserializeResponse(SplitResponse.class);

var extractedSplits = new Split(inputSample)
.extractMultiple(doc.getInference().getResult().getSplits());
.extractMultipleSplits(doc.getInference().getResult().getSplits());

assertEquals(2, extractedSplits.size());

Expand Down
55 changes: 55 additions & 0 deletions src/test/java/com/mindee/v2/product/CropTest.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
package com.mindee.v2.product;

import static com.mindee.TestingUtilities.assertStringEqualsFile;
import static com.mindee.TestingUtilities.getResourcePath;
import static com.mindee.TestingUtilities.getV2ResourcePath;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;

import com.mindee.input.LocalInputSource;
import com.mindee.v2.fileoperations.Crop;
import com.mindee.v2.parsing.LocalResponse;
import com.mindee.v2.product.crop.CropResponse;
import com.mindee.v2.product.extraction.ExtractionResponse;
import java.io.IOException;
import java.nio.file.Files;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -123,5 +127,56 @@ void extractionMustHaveValidProperties() throws IOException {
.getValue()
);
}

@Test
@DisplayName("extract all crops works")
void extractMultipleCrops() throws IOException {
var inputSource = new LocalInputSource(getV2ResourcePath("products/crop/default_sample.jpg"));

CropResponse response = loadResponse("products/crop/default_sample_extraction.json");
assertNotNull(response.getInference());

var crops = response.getInference().getResult().getCrops();

var cropper = new Crop(inputSource);
var classExtract = cropper.extractMultipleCrops(crops);

assertNotNull(classExtract);
assertEquals(crops.size(), classExtract.size());

var methodExtract = response.getInference().getResult().extractFromInputSource(inputSource);
assertEquals(classExtract.size(), methodExtract.size());

var outputPath = getResourcePath("output");
classExtract.saveAllToDisk(outputPath.toString());

assert Files.exists(outputPath.resolve("default_sample_001.jpg"));
assert Files.size(outputPath.resolve("default_sample_001.jpg")) >= 1500;

assert Files.exists(outputPath.resolve("default_sample_002.jpg"));
assert Files.size(outputPath.resolve("default_sample_002.jpg")) >= 1500;
}

@Test
@DisplayName("extract single crop works")
void extractSingleCrop() throws IOException {
var inputSource = new LocalInputSource(getV2ResourcePath("products/crop/default_sample.jpg"));

CropResponse response = loadResponse("products/crop/default_sample_extraction.json");
assertNotNull(response.getInference());

var extractedCrop = response
.getInference()
.getResult()
.getCrops()
.get(0)
.extractFromInputSource(inputSource);

var outputPath = getResourcePath("output");
extractedCrop.writeToFile(outputPath.resolve("default_sample_999.jpg"));

assert Files.exists(outputPath.resolve("default_sample_999.jpg"));
assert Files.size(outputPath.resolve("default_sample_999.jpg")) >= 1500;
}
}
}
Loading