diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl index ca7a076..eee5a06 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl @@ -17,17 +17,15 @@ INPUT_PATH_ONTIME="${SCENARIO_DIR}/${INPUT_FILE_ONTIME}" echo "OK Inserted data with partitioning into table ${TABLE_NAME_IRIS}" # List partitions and validate output -LIST_PARTITIONS_OUT_IRIS=$(mktemp) -trap "rm -f '${LIST_PARTITIONS_OUT_IRIS}'" EXIT -{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_IRIS} > "${LIST_PARTITIONS_OUT_IRIS}" -if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_IRIS}"; then +LIST_PARTITIONS_OUT_IRIS=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_IRIS}") +if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_IRIS}"; then echo "FAIL: list-partitions output missing 'partitions:' section" - cat "${LIST_PARTITIONS_OUT_IRIS}" + printf '%s\n' "${LIST_PARTITIONS_OUT_IRIS}" exit 1 fi -if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_IRIS}"; then +if ! grep -qE -- "- *[^=]+=" <<<"${LIST_PARTITIONS_OUT_IRIS}"; then echo "FAIL: list-partitions output has no partition entries (expected at least one key=value)" - cat "${LIST_PARTITIONS_OUT_IRIS}" + printf '%s\n' "${LIST_PARTITIONS_OUT_IRIS}" exit 1 fi echo "OK Listed and validated partitions for ${TABLE_NAME_IRIS}" @@ -37,17 +35,15 @@ echo "OK Listed and validated partitions for ${TABLE_NAME_IRIS}" echo "OK Inserted data with partitioning into table ${TABLE_NAME_ONTIME}" # List partitions and validate output -LIST_PARTITIONS_OUT_ONTIME=$(mktemp) -trap "rm -f '${LIST_PARTITIONS_OUT_ONTIME}'" EXIT -{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_ONTIME} > "${LIST_PARTITIONS_OUT_ONTIME}" -if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_ONTIME}"; then +LIST_PARTITIONS_OUT_ONTIME=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_ONTIME}") +if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_ONTIME}"; then echo "FAIL: list-partitions output missing 'partitions:' section" - cat "${LIST_PARTITIONS_OUT_ONTIME}" + printf '%s\n' "${LIST_PARTITIONS_OUT_ONTIME}" exit 1 fi -if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_ONTIME}"; then +if ! grep -qE -- "- *[^=]+=" <<<"${LIST_PARTITIONS_OUT_ONTIME}"; then echo "FAIL: list-partitions output has no partition entries (expected at least one key=value)" - cat "${LIST_PARTITIONS_OUT_ONTIME}" + printf '%s\n' "${LIST_PARTITIONS_OUT_ONTIME}" exit 1 fi echo "OK Listed and validated partitions for ${TABLE_NAME_ONTIME}" @@ -62,11 +58,40 @@ if [[ "${FILES_OUT_ONTIME}" != *${EXPECTED_DATA_PATH_ONTIME}* ]]; then fi echo "OK Validated correct partitioned data file path for ${TABLE_NAME_ONTIME}" +# Create table with bucket partition and insert data +{{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME_BUCKET} ${INPUT_PATH_IRIS} --partition="${PARTITION_SPEC_BUCKET}" +echo "OK Inserted data with bucket partitioning into table ${TABLE_NAME_BUCKET}" + +# List partitions and validate output +LIST_PARTITIONS_OUT_BUCKET=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_BUCKET}") +if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then + echo "FAIL: list-partitions output missing 'partitions:' section" + printf '%s\n' "${LIST_PARTITIONS_OUT_BUCKET}" + exit 1 +fi +if ! grep -q "variety_bucket=" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then + echo "FAIL: list-partitions output missing bucket partition entries (expected 'variety_bucket=')" + printf '%s\n' "${LIST_PARTITIONS_OUT_BUCKET}" + exit 1 +fi +echo "OK Listed and validated bucket partitions for ${TABLE_NAME_BUCKET}" + +# Validate data file was inserted to correct bucket-partitioned path +FILES_OUT_BUCKET=$({{ICE_CLI}} --config {{CLI_CONFIG}} files ${TABLE_NAME_BUCKET}) + +if [[ "${FILES_OUT_BUCKET}" != *${EXPECTED_DATA_PATH_BUCKET}* ]]; then + echo "FAIL: expected substring '${EXPECTED_DATA_PATH_BUCKET}' not found in files command output: ${FILES_OUT_BUCKET}" + exit 1 +fi +echo "OK Validated correct bucket-partitioned data file path for ${TABLE_NAME_BUCKET}" + # Cleanup {{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_IRIS} echo "OK Deleted table: ${TABLE_NAME_IRIS}" {{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_ONTIME} echo "OK Deleted table: ${TABLE_NAME_ONTIME}" +{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_BUCKET} +echo "OK Deleted table: ${TABLE_NAME_BUCKET}" {{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME} echo "OK Deleted namespace: ${NAMESPACE_NAME}" diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml index ed4fe7a..16e2382 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml @@ -13,4 +13,8 @@ env: PARTITION_SPEC_IRIS: '[{"column":"variety","transform":"identity"}]' PARTITION_SPEC_ONTIME: '[{"column":"Year"}]' EXPECTED_DATA_PATH_ONTIME: "s3://test-bucket/warehouse/test_insert_partitioned/ontime_partitioned/data/Year=2010/*.parquet" + TABLE_NAME_BUCKET: "test_insert_partitioned.iris_bucket_partitioned" + PARTITION_SPEC_BUCKET: '[{"column":"variety","transform":"bucket[3]"}]' + # Scheme-agnostic: files output may use s3:// or s3a:// depending on Hadoop FS + EXPECTED_DATA_PATH_BUCKET: "iris_bucket_partitioned/data/variety_bucket=" diff --git a/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/run.sh.tmpl index 0fdc69a..e949224 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/run.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/run.sh.tmpl @@ -13,14 +13,14 @@ echo "OK Created namespace" --partition="${PARTITION_SPEC}" echo "OK Inserted data with custom partition name" -{{ICE_CLI}} --config {{CLI_CONFIG}} describe -s ${TABLE_NAME} > /tmp/custom_part_describe.txt +DESCRIBE_OUT=$({{ICE_CLI}} --config {{CLI_CONFIG}} describe -s "${TABLE_NAME}") -if ! grep -q "var_trunc" /tmp/custom_part_describe.txt; then - echo "FAIL describe -s output missing custom partition name 'var_trunc'" - cat /tmp/custom_part_describe.txt +if ! grep -q "var_bucket" <<<"${DESCRIBE_OUT}"; then + echo "FAIL describe -s output missing custom partition name 'var_bucket'" + printf '%s\n' "${DESCRIBE_OUT}" exit 1 fi -echo "OK Custom partition name 'var_trunc' found in describe output" +echo "OK Custom partition name 'var_bucket' found in describe output" {{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME} {{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME} diff --git a/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/scenario.yaml b/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/scenario.yaml index 47982de..8713963 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/scenario.yaml +++ b/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/scenario.yaml @@ -8,4 +8,4 @@ env: NAMESPACE_NAME: "test_custom_part" TABLE_NAME: "test_custom_part.iris_custom" INPUT_FILE: "input.parquet" - PARTITION_SPEC: '[{"column":"variety","transform":"truncate[3]","name":"var_trunc"}]' + PARTITION_SPEC: '[{"column":"variety","transform":"bucket[3]","name":"var_bucket"}]' diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java b/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java index 9a935a7..25b89b7 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java @@ -343,6 +343,7 @@ public static Map> partition( continue; } String transformName = transform.toString(); + switch (transformName) { case "hour", "day", "month", "year": if (fieldSpec.type().typeId() != Type.TypeID.DATE) { @@ -352,7 +353,7 @@ public static Map> partition( sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type())); break; default: - if (transformName.startsWith("truncate[") || transformName.startsWith("bucket[")) { + if (transformName.startsWith("bucket[")) { partitionRecord.setField( sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type())); } else {