diff --git a/README.md b/README.md index 91e9c0d..f308e5f 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Create/delete tables, insert data with `ice insert -p ns1.table1 file://example. ## Installation -Pre-built binaries\* (+ links to Docker images for [ice](https://hub.docker.com/r/altinity/ice) and [ice-rest-catalog](https://hub.docker.com/r/altinity/ice-rest-catalog)) are available form [GitHub Releases](https://github.com/Altinity/ice/releases) page. +Pre-built binaries\* (+ links to Docker images for [ice](https://hub.docker.com/r/altinity/ice) and [ice-rest-catalog](https://hub.docker.com/r/altinity/ice-rest-catalog)) are available from [GitHub Releases](https://github.com/Altinity/ice/releases) page. > \* currently require `java` 21+ to run (available [here](https://adoptium.net/installation/)). ## Usage diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/ontime-2010-01-01.parquet b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/ontime-2010-01-01.parquet new file mode 100644 index 0000000..29493f8 Binary files /dev/null and b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/ontime-2010-01-01.parquet differ diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl index 0e6b07f..ca7a076 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl @@ -9,31 +9,64 @@ echo "OK Created namespace: ${NAMESPACE_NAME}" # Get the full path to the input file SCENARIO_DIR="{{SCENARIO_DIR}}" -INPUT_PATH="${SCENARIO_DIR}/${INPUT_FILE}" +INPUT_PATH_IRIS="${SCENARIO_DIR}/${INPUT_FILE_IRIS}" +INPUT_PATH_ONTIME="${SCENARIO_DIR}/${INPUT_FILE_ONTIME}" # Create table with partitioning and insert data -{{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME} ${INPUT_PATH} --partition="${PARTITION_SPEC}" -echo "OK Inserted data with partitioning into table ${TABLE_NAME}" +{{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME_IRIS} ${INPUT_PATH_IRIS} --partition="${PARTITION_SPEC_IRIS}" +echo "OK Inserted data with partitioning into table ${TABLE_NAME_IRIS}" # List partitions and validate output -LIST_PARTITIONS_OUT=$(mktemp) -trap "rm -f '${LIST_PARTITIONS_OUT}'" EXIT -{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME} > "${LIST_PARTITIONS_OUT}" -if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT}"; then +LIST_PARTITIONS_OUT_IRIS=$(mktemp) +trap "rm -f '${LIST_PARTITIONS_OUT_IRIS}'" EXIT +{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_IRIS} > "${LIST_PARTITIONS_OUT_IRIS}" +if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_IRIS}"; then echo "FAIL: list-partitions output missing 'partitions:' section" - cat "${LIST_PARTITIONS_OUT}" + cat "${LIST_PARTITIONS_OUT_IRIS}" exit 1 fi -if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT}"; then +if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_IRIS}"; then echo "FAIL: list-partitions output has no partition entries (expected at least one key=value)" - cat "${LIST_PARTITIONS_OUT}" + cat "${LIST_PARTITIONS_OUT_IRIS}" exit 1 fi -echo "OK Listed and validated partitions for ${TABLE_NAME}" +echo "OK Listed and validated partitions for ${TABLE_NAME_IRIS}" + +# Create table using file with single partition +{{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME_ONTIME} ${INPUT_PATH_ONTIME} --partition="${PARTITION_SPEC_ONTIME}" +echo "OK Inserted data with partitioning into table ${TABLE_NAME_ONTIME}" + +# List partitions and validate output +LIST_PARTITIONS_OUT_ONTIME=$(mktemp) +trap "rm -f '${LIST_PARTITIONS_OUT_ONTIME}'" EXIT +{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_ONTIME} > "${LIST_PARTITIONS_OUT_ONTIME}" +if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_ONTIME}"; then + echo "FAIL: list-partitions output missing 'partitions:' section" + cat "${LIST_PARTITIONS_OUT_ONTIME}" + exit 1 +fi +if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_ONTIME}"; then + echo "FAIL: list-partitions output has no partition entries (expected at least one key=value)" + cat "${LIST_PARTITIONS_OUT_ONTIME}" + exit 1 +fi +echo "OK Listed and validated partitions for ${TABLE_NAME_ONTIME}" + +# Validate data file was inserted to correct partitioned path + +FILES_OUT_ONTIME=$({{ICE_CLI}} --config {{CLI_CONFIG}} files ${TABLE_NAME_ONTIME}) + +if [[ "${FILES_OUT_ONTIME}" != *${EXPECTED_DATA_PATH_ONTIME}* ]]; then + echo "FAIL: expected substring '${EXPECTED_DATA_PATH_ONTIME}' not found in files command output: ${FILES_OUT_ONTIME}" + exit 1 +fi +echo "OK Validated correct partitioned data file path for ${TABLE_NAME_ONTIME}" # Cleanup -{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME} -echo "OK Deleted table: ${TABLE_NAME}" +{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_IRIS} +echo "OK Deleted table: ${TABLE_NAME_IRIS}" +{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_ONTIME} +echo "OK Deleted table: ${TABLE_NAME_ONTIME}" {{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME} echo "OK Deleted namespace: ${NAMESPACE_NAME}" diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml index 74af637..ed4fe7a 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml @@ -6,7 +6,11 @@ catalogConfig: env: NAMESPACE_NAME: "test_insert_partitioned" - TABLE_NAME: "test_insert_partitioned.iris_partitioned" - INPUT_FILE: "input.parquet" - PARTITION_SPEC: '[{"column":"variety","transform":"identity"}]' + TABLE_NAME_IRIS: "test_insert_partitioned.iris_partitioned" + TABLE_NAME_ONTIME: "test_insert_partitioned.ontime_partitioned" + INPUT_FILE_IRIS: "input.parquet" + INPUT_FILE_ONTIME: "ontime-2010-01-01.parquet" + PARTITION_SPEC_IRIS: '[{"column":"variety","transform":"identity"}]' + PARTITION_SPEC_ONTIME: '[{"column":"Year"}]' + EXPECTED_DATA_PATH_ONTIME: "s3://test-bucket/warehouse/test_insert_partitioned/ontime_partitioned/data/Year=2010/*.parquet" diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java index 1e51068..65cec97 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java @@ -526,7 +526,13 @@ private static List processFile( partitionKey)); } else { // Table isn't partitioned or sorted. Copy as is. - String dstDataFile = dstDataFileSource.get(file); + String dstDataFile; + if (partitionSpec.isPartitioned() && partitionKey != null) { + // File has inferred partition, use partition path + dstDataFile = dstDataFileSource.get(partitionSpec, partitionKey, file); + } else { + dstDataFile = dstDataFileSource.get(file); + } if (checkNotExists.apply(dstDataFile)) { return Collections.emptyList(); }