diff --git a/bin/systemds-standalone.sh b/bin/systemds-standalone.sh new file mode 100755 index 00000000000..9efaa963a4b --- /dev/null +++ b/bin/systemds-standalone.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Standalone-Launcher für SystemDS + +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +JAR_FILE="$SCRIPT_DIR/../target/systemds-3.4.0-SNAPSHOT.jar" + +if [ ! -f "$JAR_FILE" ]; then + echo "ERROR: Standalone JAR nicht gefunden: $JAR_FILE" + exit 1 +fi + +java -cp "$JAR_FILE" org.apache.sysds.api.DMLScript "$@" diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java index af944fce750..99c4b9c2ecb 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java @@ -55,16 +55,16 @@ public class CompressionSettings { /** * The sampling ratio used when choosing ColGroups. Note that, default behavior is to use exact estimator if the * number of elements is below 1000. - * + * * DEPRECATED */ public final double samplingRatio; /** * The sampling ratio power to use when choosing sample size. This is used in accordance to the function: - * + * * sampleSize += nRows^samplePower; - * + * * The value is bounded to be in the range of 0 to 1, 1 giving a sample size of everything, and 0 adding 1. */ public final double samplePower; @@ -114,8 +114,9 @@ public class CompressionSettings { /** * Transpose input matrix, to optimize access when extracting bitmaps. This setting is changed inside the script * based on the transposeInput setting. - * - * This is intentionally left as a mutable value, since the transposition of the input matrix is decided in phase 3. + * + * This is intentionally left as a mutable value, since the transposition of the input matrix is decided in phase + * 3. */ public boolean transposed = false; @@ -135,6 +136,19 @@ public class CompressionSettings { public final boolean preferDeltaEncoding; + // Handling Targetloss for piecewise linear Kompression + + private double piecewiseTargetLoss = Double.NaN; + + public void setPiecewiseTargetLoss(double piecewiseTargetLoss) { + this.piecewiseTargetLoss = piecewiseTargetLoss; + + } + + public double getPiecewiseTargetLoss() { + return piecewiseTargetLoss; + } + protected CompressionSettings(double samplingRatio, double samplePower, boolean allowSharedDictionary, String transposeInput, int seed, boolean lossy, EnumSet validCompressions, boolean sortValuesByLength, PartitionerType columnPartitioner, int maxColGroupCoCode, double coCodePercentage, @@ -161,7 +175,7 @@ protected CompressionSettings(double samplingRatio, double samplePower, boolean this.sdcSortType = sdcSortType; this.scaleFactors = scaleFactors; this.preferDeltaEncoding = preferDeltaEncoding; - + if(!printedStatus && LOG.isDebugEnabled()) { printedStatus = true; LOG.debug(this.toString()); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java index 003703f86a4..e2bf69f5c15 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java @@ -65,7 +65,7 @@ public abstract class AColGroup implements Serializable { /** Public super types of compression ColGroups supported */ public static enum CompressionType { - UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCFOR, DDCFOR, DeltaDDC, LinearFunctional; + UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCFOR, DDCFOR, DeltaDDC, LinearFunctional, PiecewiseLinear; public boolean isDense() { return this == DDC || this == CONST || this == DDCFOR || this == DDCFOR; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java index 273df9ff26f..b51111a4aba 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java @@ -43,6 +43,7 @@ import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.functional.LinearRegression; +import org.apache.sysds.runtime.compress.colgroup.functional.PiecewiseLinearUtils; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.colgroup.insertionsort.AInsertionSorter; @@ -106,7 +107,7 @@ private ColGroupFactory(MatrixBlock in, CompressedSizeInfo csi, CompressionSetti /** * The actual compression method, that handles the logic of compressing multiple columns together. - * + * * @param in The input matrix, that could have been transposed. If it is transposed the compSettings should specify * this. * @param csi The compression information extracted from the estimation, this contains which groups of columns to @@ -120,7 +121,7 @@ public static List compressColGroups(MatrixBlock in, CompressedSizeIn /** * The actual compression method, that handles the logic of compressing multiple columns together. - * + * * @param in The input matrix, that could have been transposed. If it is transposed the compSettings should specify * this. * @param csi The compression information extracted from the estimation, this contains which groups of columns to @@ -135,7 +136,7 @@ public static List compressColGroups(MatrixBlock in, CompressedSizeIn } /** - * + * * @param in The input matrix, that could have been transposed. If it is transposed the compSettings should specify * this. * @param csi The compression information extracted from the estimation, this contains which groups of columns to @@ -232,8 +233,9 @@ private void logEstVsActual(double time, AColGroup act, CompressedSizeInfoColGro time, retType, estC, actC, act.getNumValues(), cols, wanted, warning)); } else { - LOG.debug(String.format("time[ms]: %10.2f %25s est %10.0f -- act %10.0f distinct:%5d cols:%s wanted:%s", - time, retType, estC, actC, act.getNumValues(), cols, wanted)); + LOG.debug( + String.format("time[ms]: %10.2f %25s est %10.0f -- act %10.0f distinct:%5d cols:%s wanted:%s", time, + retType, estC, actC, act.getNumValues(), cols, wanted)); } } @@ -303,6 +305,9 @@ else if(ct == CompressionType.LinearFunctional) { return compressLinearFunctional(colIndexes, in, cs); } } + else if(ct == CompressionType.PiecewiseLinear) { + return compressPiecewiseLinearFunctional(colIndexes, in, cs); + } else if(ct == CompressionType.DDCFOR) { AColGroup g = directCompressDDC(colIndexes, cg); if(g instanceof ColGroupDDC) @@ -698,7 +703,7 @@ private AColGroup directCompressDeltaDDC(IColIndex colIndexes, CompressedSizeInf if(cs.scaleFactors != null) { throw new NotImplementedException("Delta encoding with quantization not yet implemented"); } - + if(colIndexes.size() > 1) { return directCompressDeltaDDCMultiCol(colIndexes, cg); } @@ -730,7 +735,7 @@ private AColGroup directCompressDeltaDDCSingleCol(IColIndex colIndexes, Compress if(map.size() == 0) return new ColGroupEmpty(colIndexes); - + final double[] dictValues = map.getDictionary(); IDictionary dict = new DeltaDictionary(dictValues, 1); @@ -739,7 +744,8 @@ private AColGroup directCompressDeltaDDCSingleCol(IColIndex colIndexes, Compress return ColGroupDeltaDDC.create(colIndexes, dict, resData, null); } - private AColGroup directCompressDeltaDDCMultiCol(IColIndex colIndexes, CompressedSizeInfoColGroup cg) throws Exception { + private AColGroup directCompressDeltaDDCMultiCol(IColIndex colIndexes, CompressedSizeInfoColGroup cg) + throws Exception { final AMapToData d = MapToFactory.create(nRow, Math.max(Math.min(cg.getNumOffs() + 1, nRow), 126)); final int fill = d.getUpperBoundValue(); d.fill(fill); @@ -818,8 +824,8 @@ private boolean readToMapDDC(IColIndex colIndexes, DblArrayCountHashMap map, AMa int fill) { ReaderColumnSelection reader = (cs.scaleFactors == null) ? ReaderColumnSelection.createReader(in, colIndexes, - cs.transposed, rl, - ru) : ReaderColumnSelection.createQuantizedReader(in, colIndexes, cs.transposed, rl, ru, cs.scaleFactors); + cs.transposed, rl, ru) : ReaderColumnSelection.createQuantizedReader(in, colIndexes, cs.transposed, rl, ru, + cs.scaleFactors); DblArray cellVals = reader.nextRow(); boolean extra = false; @@ -1066,6 +1072,44 @@ private static AColGroup compressLinearFunctional(IColIndex colIndexes, MatrixBl return ColGroupLinearFunctional.create(colIndexes, coefficients, numRows); } + public static AColGroup compressPiecewiseLinearFunctional( + IColIndex colIndexes, MatrixBlock in, CompressionSettings cs) { + + final int numRows = in.getNumRows(); + AColGroup result = null; + + //Compress every column + for (int col = 0; col < colIndexes.size(); col++) { + // get Column Index + IColIndex.SliceResult sliceResult = colIndexes.slice(col, col + 1); + IColIndex singleColIndex = sliceResult.ret; // ← .ret nötig! + + // Get Column from Matrix + final int colIdx = colIndexes.get(col); + double[] column = PiecewiseLinearUtils.getColumn(in, colIdx); + + //Compress column + PiecewiseLinearUtils.SegmentedRegression fit = + PiecewiseLinearUtils.compressSegmentedLeastSquares(column, cs); + + AColGroup singleGroup = ColGroupPiecewiseLinearCompressed.create( + singleColIndex, + fit.getBreakpoints(), + fit.getSlopes(), + fit.getIntercepts(), + numRows); + + // Combine multiple columns + if (result == null) { + result = singleGroup; + } else { + result = result.combineWithSameIndex(numRows, col, singleGroup); + } + } + + return result; + } + private AColGroup compressSDCFromSparseTransposedBlock(IColIndex cols, int nrUniqueEstimate, double tupleSparsity) { if(cols.size() > 1) return compressMultiColSDCFromSparseTransposedBlock(cols, nrUniqueEstimate, tupleSparsity); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupPiecewiseLinearCompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupPiecewiseLinearCompressed.java new file mode 100644 index 00000000000..35891eb8c53 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupPiecewiseLinearCompressed.java @@ -0,0 +1,414 @@ +package org.apache.sysds.runtime.compress.colgroup; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.colgroup.scheme.ICLAScheme; +import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.data.SparseBlockMCSR; +import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.instructions.cp.CmCovObject; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.matrix.operators.BinaryOperator; +import org.apache.sysds.runtime.matrix.operators.CMOperator; +import org.apache.sysds.runtime.matrix.operators.ScalarOperator; +import org.apache.sysds.runtime.matrix.operators.UnaryOperator; + +import java.util.Arrays; + +public class ColGroupPiecewiseLinearCompressed extends AColGroupCompressed { + + IColIndex colIndexes; + int[] breakpoints; + double[] slopes; + double[] intercepts; + int numRows; + + protected ColGroupPiecewiseLinearCompressed(IColIndex colIndices) { + super(colIndices); + } + + public ColGroupPiecewiseLinearCompressed(IColIndex colIndexes, int[] breakpoints, double[] slopes, + double[] intercepts, int numRows) { + super(colIndexes); + this.colIndexes = colIndexes; + this.breakpoints = breakpoints; + this.slopes = slopes; + this.intercepts = intercepts; + this.numRows = numRows; + } + + public static AColGroup create(IColIndex colIndexes, int[] breakpoints, double[] slopes, double[] intercepts, + int numRows) { + if(breakpoints == null || breakpoints.length < 2) + throw new IllegalArgumentException("Need at least one segment"); + + int numSeg = breakpoints.length - 1; + if(slopes.length != numSeg || intercepts.length != numSeg) + throw new IllegalArgumentException("Inconsistent segment arrays"); + + int[] bpCopy = Arrays.copyOf(breakpoints, breakpoints.length); + double[] slopeCopy = Arrays.copyOf(slopes, slopes.length); + double[] interceptCopy = Arrays.copyOf(intercepts, intercepts.length); + + return new ColGroupPiecewiseLinearCompressed(colIndexes, bpCopy, slopeCopy, interceptCopy, numRows); + + } + + @Override + public void decompressToDenseBlock(DenseBlock db, int rl, int ru, int offR, int offC) { + + //Safety-Check: + if(db == null || colIndexes == null || colIndexes.size() == 0 || breakpoints == null || slopes == null || + intercepts == null) { + return; + } + //Validate Segments + int sizeSegment = breakpoints.length - 1; + if(sizeSegment <= 0 || rl >= ru) { + return; + } + //Find every Segment + final int column = _colIndexes.get(0); + for(int currentSeg = 0; currentSeg < sizeSegment; currentSeg++) { + int segStart = breakpoints[currentSeg]; + int segEnd = breakpoints[currentSeg + 1]; + if(segStart >= segEnd) + continue; + + double currentSlope = slopes[currentSeg]; + double currentIntercepts = intercepts[currentSeg]; + + int rowStart = Math.max(segStart, rl); + int rowEnd = Math.min(segEnd, ru); + if(rowStart >= rowEnd) + continue; + + // Filling DenseBlock Matrix + for(int r = rowStart; r < rowEnd; r++) { + double yhat = currentSlope * r + currentIntercepts; + int dbRow = offR + r; + int dbColumn = offC + column; + + if(dbRow >= 0 && dbRow < db.numRows() && dbColumn >= 0 && dbColumn < db.numCols()) { + db.set(dbRow, dbColumn, yhat); + } + } + } + } + + public int[] getBreakpoints() { + return breakpoints; + } + + public double[] getSlopes() { + return slopes; + } + + public double[] getIntercepts() { + return intercepts; + } + + @Override + public double getIdx(int r, int colIdx) { + //Check if the rowIDx is valid (safety check) + if(r < 0 || r >= numRows || colIdx < 0 || colIdx >= colIndexes.size()) { + return 0.0; + } + // Using Binary Search for efficient Search for the right Segment ( finding rowIdx r) + // have to use int higherBound = breakpoints.length - 2 because it's the last valid segment + int lowerBound = 0; + int higherBound = breakpoints.length - 2; + while(lowerBound <= higherBound) { + int mid = (lowerBound + higherBound) / 2; + if(r < breakpoints[mid] + 1) { + higherBound = mid - 1; + } + else + lowerBound = mid + 1; + } + int segment = Math.min(lowerBound, breakpoints.length - 2); + + return slopes[segment] * (double) r + intercepts[segment]; + } + + @Override + public int getNumValues() { + return breakpoints.length + slopes.length + intercepts.length; + } + + @Override + protected double computeMxx(double c, Builtin builtin) { + throw new NotImplementedException(); + } + + @Override + protected void computeColMxx(double[] c, Builtin builtin) { + throw new NotImplementedException(); + } + + @Override + protected void computeSum(double[] c, int nRows) { + throw new NotImplementedException(); + + } + + @Override + protected void computeSumSq(double[] c, int nRows) { + throw new NotImplementedException(); + + } + + @Override + protected void computeColSumsSq(double[] c, int nRows) { + throw new NotImplementedException(); + + } + + @Override + protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) { + throw new NotImplementedException(); + + } + + @Override + protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) { + throw new NotImplementedException(); + + } + + @Override + protected void computeProduct(double[] c, int nRows) { + throw new NotImplementedException(); + + } + + @Override + protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) { + throw new NotImplementedException(); + + } + + @Override + protected void computeColProduct(double[] c, int nRows) { + throw new NotImplementedException(); + + } + + @Override + protected double[] preAggSumRows() { + throw new NotImplementedException(); + } + + @Override + protected double[] preAggSumSqRows() { + throw new NotImplementedException(); + } + + @Override + protected double[] preAggProductRows() { + throw new NotImplementedException(); + } + + @Override + protected double[] preAggBuiltinRows(Builtin builtin) { + throw new NotImplementedException(); + } + + @Override + public boolean sameIndexStructure(AColGroupCompressed that) { + throw new NotImplementedException(); + } + + @Override + protected void tsmm(double[] result, int numColumns, int nRows) { + throw new NotImplementedException(); + + } + + @Override + public AColGroup copyAndSet(IColIndex colIndexes) { + throw new NotImplementedException(); + } + + @Override + public void decompressToDenseBlockTransposed(DenseBlock db, int rl, int ru) { + throw new NotImplementedException(); + + } + + @Override + public void decompressToSparseBlockTransposed(SparseBlockMCSR sb, int nColOut) { + throw new NotImplementedException(); + + } + + @Override + public CompressionType getCompType() { + throw new NotImplementedException(); + } + + @Override + protected ColGroupType getColGroupType() { + throw new NotImplementedException(); + } + + @Override + public void decompressToSparseBlock(SparseBlock sb, int rl, int ru, int offR, int offC) { + throw new NotImplementedException(); + + } + + @Override + public AColGroup rightMultByMatrix(MatrixBlock right, IColIndex allCols, int k) { + throw new NotImplementedException(); + } + + @Override + public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) { + throw new NotImplementedException(); + + } + + @Override + public void leftMultByAColGroup(AColGroup lhs, MatrixBlock result, int nRows) { + throw new NotImplementedException(); + + } + + @Override + public void tsmmAColGroup(AColGroup other, MatrixBlock result) { + throw new NotImplementedException(); + + } + + @Override + public AColGroup scalarOperation(ScalarOperator op) { + throw new NotImplementedException(); + } + + @Override + public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) { + throw new NotImplementedException(); + } + + @Override + public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) { + throw new NotImplementedException(); + } + + @Override + protected AColGroup sliceSingleColumn(int idx) { + throw new NotImplementedException(); + } + + @Override + protected AColGroup sliceMultiColumns(int idStart, int idEnd, IColIndex outputCols) { + throw new NotImplementedException(); + } + + @Override + public AColGroup sliceRows(int rl, int ru) { + throw new NotImplementedException(); + } + + @Override + public boolean containsValue(double pattern) { + throw new NotImplementedException(); + } + + @Override + public long getNumberNonZeros(int nRows) { + throw new NotImplementedException(); + } + + @Override + public AColGroup replace(double pattern, double replace) { + throw new NotImplementedException(); + } + + @Override + public void computeColSums(double[] c, int nRows) { + throw new NotImplementedException(); + + } + + @Override + public CmCovObject centralMoment(CMOperator op, int nRows) { + throw new NotImplementedException(); + } + + @Override + public AColGroup rexpandCols(int max, boolean ignore, boolean cast, int nRows) { + throw new NotImplementedException(); + } + + @Override + public double getCost(ComputationCostEstimator e, int nRows) { + throw new NotImplementedException(); + } + + @Override + public AColGroup unaryOperation(UnaryOperator op) { + throw new NotImplementedException(); + } + + @Override + public AColGroup append(AColGroup g) { + throw new NotImplementedException(); + } + + @Override + protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) { + throw new NotImplementedException(); + } + + @Override + public ICLAScheme getCompressionScheme() { + throw new NotImplementedException(); + } + + @Override + public AColGroup recompress() { + throw new NotImplementedException(); + } + + @Override + public CompressedSizeInfoColGroup getCompressionInfo(int nRow) { + throw new NotImplementedException(); + } + + @Override + protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) { + throw new NotImplementedException(); + } + + @Override + public AColGroup reduceCols() { + throw new NotImplementedException(); + } + + @Override + public double getSparsity() { + throw new NotImplementedException(); + } + + @Override + protected void sparseSelection(MatrixBlock selection, ColGroupUtils.P[] points, MatrixBlock ret, int rl, int ru) { + throw new NotImplementedException(); + } + + @Override + protected void denseSelection(MatrixBlock selection, ColGroupUtils.P[] points, MatrixBlock ret, int rl, int ru) { + throw new NotImplementedException(); + } + + @Override + public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) { + throw new NotImplementedException(); + } + +} + diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/functional/PiecewiseLinearUtils.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/functional/PiecewiseLinearUtils.java new file mode 100644 index 00000000000..7005be9de65 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/functional/PiecewiseLinearUtils.java @@ -0,0 +1,253 @@ +package org.apache.sysds.runtime.compress.colgroup.functional; + +import org.apache.sysds.runtime.compress.CompressionSettings; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class PiecewiseLinearUtils { + + private PiecewiseLinearUtils() { + + } + + public static final class SegmentedRegression { + private final int[] breakpoints; + private final double[] slopes; + private final double[] intercepts; + + public SegmentedRegression(int[] breakpoints, double[] slopes, double[] intercepts) { + this.breakpoints = breakpoints; + this.slopes = slopes; + this.intercepts = intercepts; + } + + public int[] getBreakpoints() { + return breakpoints; + } + + public double[] getSlopes() { + return slopes; + } + + public double[] getIntercepts() { + return intercepts; + } + } + + public static SegmentedRegression compressSegmentedLeastSquares(double[] column, CompressionSettings cs) { + //compute Breakpoints for a Column with dynamic Programming + final List breakpointsList = computeBreakpoints(cs, column); + final int[] breakpoints = breakpointsList.stream().mapToInt(Integer::intValue).toArray(); + + //get values for Regression + final int numSeg = breakpoints.length - 1; + final double[] slopes = new double[numSeg]; + final double[] intercepts = new double[numSeg]; + + // Regress per Segment + for (int seg = 0; seg < numSeg; seg++) { + final int SegStart = breakpoints[seg]; + final int SegEnd = breakpoints[seg + 1]; + + final double[] line = regressSegment(column, SegStart, SegEnd); + slopes[seg] = line[0]; //slope regession line + intercepts[seg] = line[1]; //intercept regression line + } + + return new SegmentedRegression(breakpoints, slopes, intercepts); + } + + public static SegmentedRegression compressSegmentedLeastSquaresV2(double[] column, CompressionSettings cs) { + //compute Breakpoints for a Column with Greedy Algorithm + + final List breakpointsList = computeBreakpointsGreedy(column, cs); + final int[] breakpoints = breakpointsList.stream().mapToInt(Integer::intValue).toArray(); + + //get values for Regression + final int numSeg = breakpoints.length - 1; + final double[] slopes = new double[numSeg]; + final double[] intercepts = new double[numSeg]; + + // Regress per Segment + for (int seg = 0; seg < numSeg; seg++) { + final int segstart = breakpoints[seg]; + final int segEnd = breakpoints[seg + 1]; + final double[] line = regressSegment(column, segstart, segEnd); + slopes[seg] = line[0]; + intercepts[seg] = line[1]; + } + return new SegmentedRegression(breakpoints,slopes, intercepts); + } + + public static double[] getColumn(MatrixBlock in, int colIndex) { + final int numRows = in.getNumRows(); + final double[] column = new double[numRows]; + + for (int row = 0; row < numRows; row++) { + column[row] = in.get(row, colIndex); + } + return column; + } + + public static List computeBreakpoints(CompressionSettings cs, double[] column) { + final int numElements = column.length; + final double targetMSE = cs.getPiecewiseTargetLoss(); + + + // TODO: Maybe remove Fallback if no targetloss is given + /*if (Double.isNaN(targetMSE) || targetMSE <= 0) { + final double segmentPenalty = 2.0 * Math.log(numElements); + return computeBreakpointsLambda(column, segmentPenalty); + }*/ + + // max targetloss + final double sseMax = numElements * targetMSE; + double minLoss = 0.0; + double maxLoss = numElements * 100.0; + List bestBreaks = null; + //compute breakpoints + while(maxLoss -minLoss > 1e-8) { + final double currentLoss = 0.5 * (minLoss + maxLoss); + final List breaks = computeBreakpointsLambda(column, currentLoss); + final double totalSSE = computeTotalSSE(column, breaks); + if (totalSSE <= sseMax) { + bestBreaks = breaks; + minLoss = currentLoss; + } + else { + maxLoss = currentLoss; + } + } + + if (bestBreaks == null) + bestBreaks = computeBreakpointsLambda(column, minLoss); + + return bestBreaks; + } + + public static List computeBreakpointsLambda(double[] column, double lambda) { + final int numrows = column.length; + final double[] costs = new double[numrows + 1]; //min Cost + final int[] prevStart = new int[numrows + 1]; //previous Start + costs[0] = 0.0; + // Find Cost + for (int rowEnd = 1; rowEnd <= numrows; rowEnd++) { + costs[rowEnd] = Double.POSITIVE_INFINITY; + //Test all possible Segment to find the lowest costs + for (int rowStart = 0; rowStart < rowEnd; rowStart++) { + //costs = current costs + segmentloss + penaltiy + final double costCurrentSegment = computeSegmentCost(column, rowStart, rowEnd); + final double totalCost = costs[rowStart] + costCurrentSegment + lambda; + // Check if it is the better solution + if (totalCost < costs[rowEnd]) { + costs[rowEnd] = totalCost; + prevStart[rowEnd] = rowStart; + } + } + } + //Check the optimal segmentlimits + final List segmentLimits = new ArrayList<>(); + int breakpointIndex = numrows; + while (breakpointIndex > 0) { + segmentLimits.add(breakpointIndex); + breakpointIndex = prevStart[breakpointIndex]; + } + segmentLimits.add(0); + Collections.sort(segmentLimits); + return segmentLimits; + } + + public static double computeSegmentCost(double[] column, int start, int end) { + final int segSize = end - start; + if (segSize <= 1) + return 0.0; + + final double[] ab = regressSegment(column, start, end); //Regressionline + final double slope = ab[0]; + final double intercept = ab[1]; + + double sumSquaredError = 0.0; + for (int i = start; i < end; i++) { + final double rowIdx = i; + final double actualValue = column[i]; + final double predictedValue = slope * rowIdx + intercept; + final double difference = actualValue - predictedValue; + sumSquaredError += difference * difference; + } + return sumSquaredError; + } + + public static double computeTotalSSE(double[] column, List breaks) { + double total = 0.0; + for (int s = 0; s < breaks.size() - 1; s++) { + final int start = breaks.get(s); + final int end = breaks.get(s + 1); + total += computeSegmentCost(column, start, end); + } + return total; + } + + public static double[] regressSegment(double[] column, int start, int end) { + final int numElements = end - start; + if (numElements <= 0) + return new double[] {0.0, 0.0}; + + double sumOfRowIndices = 0, sumOfColumnValues = 0, sumOfRowIndicesSquared = 0, productRowIndexTimesColumnValue = 0; + for (int i = start; i < end; i++) { + final double x = i; + final double y = column[i]; + sumOfRowIndices += x; + sumOfColumnValues += y; + sumOfRowIndicesSquared += x * x; + productRowIndexTimesColumnValue += x * y; + } + + final double numPointsInSegmentDouble = numElements; + final double denominatorForSlope = numPointsInSegmentDouble * sumOfRowIndicesSquared - sumOfRowIndices * sumOfRowIndices; + final double slope; + final double intercept; + if (denominatorForSlope == 0) { + slope = 0.0; + intercept = sumOfColumnValues / numPointsInSegmentDouble; + } + else { + slope = (numPointsInSegmentDouble * productRowIndexTimesColumnValue - sumOfRowIndices * sumOfColumnValues) / denominatorForSlope; + intercept = (sumOfColumnValues - slope * sumOfRowIndices) / numPointsInSegmentDouble; + } + return new double[] {slope, intercept}; + } + public static List computeBreakpointsGreedy(double[] column, CompressionSettings cs) { + final int numElements = column.length; + final double targetMSE = cs.getPiecewiseTargetLoss(); + if (Double.isNaN(targetMSE) || targetMSE <= 0) { + return Arrays.asList(0, numElements); // Fallback: ein Segment + } + + List breakpoints = new ArrayList<>(); + breakpoints.add(0); + int currentStart = 0; + + while (currentStart < numElements) { + int bestEnd = numElements; // Default: Rest als Segment + for (int end = currentStart + 1; end <= numElements; end++) { + double sse = computeSegmentCost(column, currentStart, end); + double sseMax = (end - currentStart) * targetMSE; + if (sse > sseMax) { + bestEnd = end - 1; // Letzter gültiger Endpunkt + break; + } + } + breakpoints.add(bestEnd); + currentStart = bestEnd; + } + + if (breakpoints.get(breakpoints.size() - 1) != numElements) { + breakpoints.add(numElements); + } + return breakpoints; + } +} diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupPiecewiseLinearCompressedTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupPiecewiseLinearCompressedTest.java new file mode 100644 index 00000000000..fa1f88fab98 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupPiecewiseLinearCompressedTest.java @@ -0,0 +1,753 @@ +package org.apache.sysds.test.component.compress.colgroup; + +import org.apache.sysds.runtime.compress.CompressionSettings; +import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory; +import org.apache.sysds.runtime.compress.colgroup.ColGroupPiecewiseLinearCompressed; +import org.apache.sysds.runtime.compress.colgroup.functional.PiecewiseLinearUtils; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; +import org.apache.sysds.runtime.compress.estim.EstimationFactors; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.util.DataConverter; +import org.apache.sysds.test.AutomatedTestBase; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.apache.sysds.runtime.compress.colgroup.functional.PiecewiseLinearUtils.*; +import static org.apache.sysds.test.functions.io.binary.BlocksizeTest.sparsity; +import static org.junit.Assert.*; + +public class ColGroupPiecewiseLinearCompressedTest extends AutomatedTestBase { + @Override + public void setUp() { + + } + + @Test + public void testComputeBreakpointsUniformColumn() { + CompressionSettings cs = new CompressionSettingsBuilder().create(); + cs.setPiecewiseTargetLoss(1e-3); + double[] column = {1.0, 1.0, 1.0, 1.0, 1.0}; // ← Test-spezifisch + List breaks = computeBreakpoints(cs, column); + assertEquals(Arrays.asList(0, 5), breaks); // Erwartet: keine Breaks + } + + @Test + public void testComputeBreakpointsLinearIncreasing() { + CompressionSettings cs = new CompressionSettingsBuilder().create(); + cs.setPiecewiseTargetLoss(1e-3); + double[] column = {0.0, 1.0, 2.0, 3.0, 4.0}; // ← andere column + List breaks = computeBreakpoints(cs, column); + assertEquals(Arrays.asList(0, 5), breaks); // Erwartet + + } + + + + @Test + public void testComputeBreakpointsTwoSegments() { + CompressionSettings cs = new CompressionSettingsBuilder().create(); + cs.setPiecewiseTargetLoss(1e-3); + // {1,1,1, 2,2,2} → 2 Segmente → [0,3,6] + double[] column = {1.0, 1.0, 1.0, 2.0, 2.0, 2.0}; + var breaks = computeBreakpoints(cs, column); + assertEquals(Arrays.asList(0, 3, 6), breaks); + } + + + + @Test + public void testComputeBreakpointsLambdaConst() { + double[] column = {1.0, 1.0, 1.0, 1.0, 1.0}; + List breaks = computeBreakpointsLambda(column, 5.0); + assertEquals(Arrays.asList(0, 5), breaks); + + breaks = computeBreakpointsLambda(column, 0.01); + assertEquals(Arrays.asList(0, 5), breaks); + } + + @Test + public void testComputeBreakpointsLambdaTwoSegments() { + double[] column = {1.0, 1.0, 1.0, 2.0, 2.0, 2.0}; // 6 Werte + + // mit kleinem lambda -> viele Segmente (kostenlos fast) + List breaks = computeBreakpointsLambda(column, 0.01); + assertTrue(breaks.contains(3)); + assertEquals(3, breaks.size()); + assertEquals(Arrays.asList(0, 3, 6), breaks); + + // mit großem lambda entspricht nur ein Segment + breaks = computeBreakpointsLambda(column, 1000.0); + assertEquals(Arrays.asList(0, 6), breaks); + } + + @Test + public void testComputeBreakpointsLambdaJumpWithTrend() { + double[] column = {0.0, 1.0, 2.0, 10.0, 11.0, 12.0}; + + // grobe Segmentanpassung: ein Segment pro „Abschnitt“ + List breaks = computeBreakpointsLambda(column, 0.5); + assertEquals(Arrays.asList(0, 3, 6), breaks); + + // nur ein Segment, wenn lambda sehr groß + breaks = computeBreakpointsLambda(column, 100.0); + assertEquals(Arrays.asList(0, 6), breaks); + } + + @Test + public void testComputeBreakpointsLambdaLinear() { + double[] column = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; + + List breaks = computeBreakpointsLambda(column, 1.0); + assertEquals(Arrays.asList(0, 6), breaks); + + // mit sehr kleinem lambda: wir prüfen nur, dass die Grenzen vernünftig sind + breaks = computeBreakpointsLambda(column, 0.001); + assertTrue(breaks.size() >= 2); + assertTrue(breaks.get(0) == 0); + assertTrue(breaks.get(breaks.size() - 1) == column.length); + } + + @Test + public void testComputeBreakpointsLambdaEdgeLambdaVerySmall() { + double[] column = {1.0, 1.1, 1.0, 1.1, 1.0}; + + List breaks = computeBreakpointsLambda(column, 0.001); + assertNotNull(breaks); + assertFalse(breaks.isEmpty()); + assertEquals(0, (int) breaks.get(0)); + assertEquals(column.length, (int) breaks.get(breaks.size() - 1)); + + // Prüfe, dass die Liste sortiert ist + for(int i = 1; i < breaks.size(); i++) { + assertTrue(breaks.get(i) >= breaks.get(i - 1)); + } + } + + @Test + public void testComputeBreakpointsLambdaEdgeLambdaVeryLarge() { + double[] column = {1.0, 2.0, 1.5, 2.5, 1.8}; + + List breaks = computeBreakpointsLambda(column, 1000.0); + assertEquals(Arrays.asList(0, 5), breaks); + } + + @Test + public void testComputeSegmentCostEmptyOrSingle() { + double[] column = {10.0, 20.0, 30.0}; + + // 0 Elemente (leer) + assertEquals(0.0, computeSegmentCost(column, 0, 0), 1e-10); + assertEquals(0.0, computeSegmentCost(column, 1, 1), 1e-10); + + // 1 Element → Regressionsgerade ist nicht eindeutig definiert, aber SSE=0 + assertEquals(0.0, computeSegmentCost(column, 0, 1), 1e-10); + assertEquals(0.0, computeSegmentCost(column, 1, 2), 1e-10); + assertEquals(0.0, computeSegmentCost(column, 2, 3), 1e-10); + } + + @Test + public void testComputeSegmentCostTwoConstantPoints() { + double[] column = {5.0, 5.0, 1.0, 1.0}; + + // Zwei identische Punkte (konstant) → SSE = 0 + double sse = computeSegmentCost(column, 0, 2); + assertEquals(0.0, sse, 1e-10); + } + + @Test + public void testComputeSegmentCostTwoDifferentPoints() { + double[] column = {0.0, 2.0, 1.0, 3.0}; + + // Zwei Punkte: (0,0) und (1,2) → Gerade y = 2*x, Fehler = 0 + double sse = computeSegmentCost(column, 0, 2); + assertEquals(0.0, sse, 1e-10); + + // Zwei Punkte: (2,1) und (3,3) → Gerade y = 2*x - 3, Fehler = 0 + sse = computeSegmentCost(column, 2, 4); + assertEquals(0.0, sse, 1e-10); + } + + @Test + public void testComputeSegmentCostConstantThree() { + double[] column = {0.0, 0.0, 0.0}; + double sse = computeSegmentCost(column, 0, 3); + assertEquals(0.0, sse, 1e-10); + } + + @Test + public void testComputeSegmentCostConsistentWithRegression() { + double[] column = {0.0, 2.0, 0.0, 4.0, 0.0, 6.0}; + + int start = 0, end = 3; + double[] ab = regressSegment(column, start, end); + double slope = ab[0], intercept = ab[1]; + double sse_hand = 0.0; + for(int i = start; i < end; i++) { + double yhat = slope * i + intercept; + double diff = column[i] - yhat; + sse_hand += diff * diff; + } + + double sse = computeSegmentCost(column, start, end); + assertEquals(sse_hand, sse, 1e-10); + } + + + @Test + public void testComputeTotalSSETwoSegments() { + // Beispiel: [0,0,0] und [1,1,1] (jeweils konstant) + double[] column = {0.0, 0.0, 0.0, 1.0, 1.0, 1.0}; + List breaks = Arrays.asList(0, 3, 6); // zwei Segmente + + double total = computeTotalSSE(column, breaks); + double sse1 = computeSegmentCost(column, 0, 3); // [0,0,0] → SSE = 0 + double sse2 = computeSegmentCost(column, 3, 6); // [1,1,1] → SSE = 0 + + // da beide Segmente konstant sind, muss totalSSE = 0 sein + assertEquals(0.0, total, 1e-10); + assertEquals(sse1 + sse2, total, 1e-10); + } + + @Test + public void testComputeTotalSSEThreeSegments() { + // Ein Segment mit drei identischen Werten, zwei Segmente mit jeweils zwei Werten + double[] column = {1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0}; + List breaks = Arrays.asList(0, 3, 5, 7); + + // Segment [0,3): konstant 1.0 → SSE = 0 + double sse1 = computeSegmentCost(column, 0, 3); // 0 + + // Segment [3,5): [2,2] → SSE = 0 + double sse2 = computeSegmentCost(column, 3, 5); // 0 + + // Segment [5,7): [3,3] → SSE = 0 + double sse3 = computeSegmentCost(column, 5, 7); // 0 + + double total = computeTotalSSE(column, breaks); + assertEquals(0.0, total, 1e-10); + assertEquals(sse1 + sse2 + sse3, total, 1e-10); + } + + @Test + public void testComputeTotalSSEGapStartEnd() { + double[] column = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; + List breaks = Arrays.asList(2, 5, 8); + + double total = computeTotalSSE(column, breaks); + double sse1 = computeSegmentCost(column, 2, 5); + double sse2 = computeSegmentCost(column, 5, 8); + + assertEquals(sse1 + sse2, total, 1e-10); + + } + + @Test + public void testComputeTotalSSEOneSegmentIdentical() { + double[] column = {1.0, 2.0, 3.0, 4.0, 5.0}; + double sseTotal = computeSegmentCost(column, 0, 5); + + List breaks = Arrays.asList(0, 5); + double total = computeTotalSSE(column, breaks); + + assertEquals(sseTotal, total, 1e-10); + } + + @Test + public void testComputeTotalSSENonConstant() { + double[] column = {0.0, 1.0, 2.0, 3.0, 4.0}; + List breaks = Arrays.asList(0, 2, 5); + + double total = computeTotalSSE(column, breaks); + double sse1 = computeSegmentCost(column, 0, 2); + double sse2 = computeSegmentCost(column, 2, 5); + + assertTrue(total >= 0.0); + assertEquals(sse1 + sse2, total, 1e-10); + } + + @Test + public void testComputeTotalSSEEdgeCases() { + double[] columnEmpty = {}; + List breaksEmpty = Arrays.asList(0, 0); + assertEquals(0.0, computeTotalSSE(columnEmpty, breaksEmpty), 1e-10); + + double[] columnOne = {42.0}; + List breaksOne = Arrays.asList(0, 1); + double total = computeTotalSSE(columnOne, breaksOne); + assertEquals(0.0, total, 1e-10); + } + + @Test + public void testRegressSegmentEmpty() { + double[] column = {1.0, 2.0, 3.0}; + double[] result = regressSegment(column, 0, 0); + assertEquals(0.0, result[0], 1e-10); + assertEquals(0.0, result[1], 1e-10); + } + + @Test + public void testRegressSegmentSinglePoint() { + double[] column = {1.0, 2.0, 3.0}; + double[] result = regressSegment(column, 1, 2); + + assertEquals(0.0, result[0], 1e-10); + assertEquals(2.0, result[1], 1e-10); + } + + @Test + public void testRegressSegmentTwoIdentical() { + double[] column = {5.0, 5.0, 1.0, 1.0}; + double[] result = regressSegment(column, 0, 2); + + assertEquals(0.0, result[0], 1e-10); + assertEquals(5.0, result[1], 1e-10); + } + + @Test + public void testRegressSegmentTwoPoints() { + double[] column = {0.0, 2.0}; + double[] result = regressSegment(column, 0, 2); + + assertEquals(2.0, result[0], 1e-10); + assertEquals(0.0, result[1], 1e-10); + } + + @Test + public void testRegressSegmentTwoPointsOffset() { + + double[] column = {1.0, 3.0, 5.0, 7.0}; + double[] result = regressSegment(column, 2, 4); + + assertEquals(2.0, result[0], 1e-10); + assertEquals(1.0, result[1], 1e-10); + } + + @Test + public void testRegressSegmentConstant() { + double[] column = {3.0, 3.0, 3.0, 3.0}; + double[] result = regressSegment(column, 0, 4); + + assertEquals(0.0, result[0], 1e-10); + assertEquals(3.0, result[1], 1e-10); + } + + @Test + public void testRegressSegmentLinear() { + double[] column = new double[4]; + double a = 1.5, b = 2.0; + for(int i = 0; i < 4; i++) { + column[i] = a * i + b; + } + + double[] result = regressSegment(column, 0, 4); + + assertEquals(a, result[0], 1e-10); + assertEquals(b, result[1], 1e-10); + } + + + + @Test + public void testCompressPiecewiseLinearFunctionalConst() { + // 1. MatrixBlock mit einer konstanten Spalte erzeugen + int nrows = 20, ncols = 1; + MatrixBlock in = new MatrixBlock(nrows, ncols, false); + for(int r = 0; r < nrows; r++) + in.set(r, 0, 1.0); + // 2. colIndexes für Spalte 0 + IColIndex colIndexes = ColIndexFactory.create(new int[] {0}); + // 3. CompressionSettings mit TargetLoss + CompressionSettings cs = new CompressionSettingsBuilder().create(); + cs.setPiecewiseTargetLoss(1e-6); + // 4. Aufruf der Kompressionsfunktion + AColGroup result = ColGroupFactory.compressPiecewiseLinearFunctional(colIndexes, in, cs); + + // 5. Ergebnis ist eine ColGroupPiecewiseLinearCompressed? + assertTrue(result instanceof ColGroupPiecewiseLinearCompressed); + ColGroupPiecewiseLinearCompressed plGroup = (ColGroupPiecewiseLinearCompressed) result; + + // 6. Breakpoints per Getter, nicht per create() + int[] breakpoints = plGroup.getBreakpoints(); + assertArrayEquals(new int[] {0, 20}, breakpoints); + + // 7. Pro Segment: 1 Segment → ein slope, ein intercept + double[] slopes = plGroup.getSlopes(); + double[] intercepts = plGroup.getIntercepts(); + assertEquals(1, slopes.length); + assertEquals(1, intercepts.length); + + // 8. Für konstante Daten: Steigung ~0, intercept ~1.0 + assertEquals(0.0, slopes[0], 1e-10); + assertEquals(1.0, intercepts[0], 1e-10); + + // 9. Check: colIndexes stimmt + IColIndex idx = plGroup.getColIndices(); + assertEquals(1, idx.size()); + assertEquals(0, idx.get(0)); + } + + @Test(expected = IllegalArgumentException.class) + public void testCreateNullBreakpoints() { + int[] nullBp = null; + ColGroupPiecewiseLinearCompressed.create(ColIndexFactory.create(new int[] {0}), nullBp, new double[] {1.0}, + new double[] {0.0}, 10); + } + + @Test(expected = IllegalArgumentException.class) + public void testCreateTooFewBreakpoints() { + int[] singleBp = {0}; + ColGroupPiecewiseLinearCompressed.create(ColIndexFactory.create(new int[] {0}), singleBp, new double[] {1.0}, + new double[] {0.0}, 10); + } + + @Test(expected = IllegalArgumentException.class) + public void testCreateInconsistentSlopes() { + int[] bp = {0, 5, 10}; + ColGroupPiecewiseLinearCompressed.create(ColIndexFactory.create(new int[] {0}), bp, + new double[] {1.0, 2.0, 3.0}, new double[] {0.0, 1.0}, 10); + } + + @Test(expected = IllegalArgumentException.class) + public void testCreateInconsistentIntercepts() { + int[] bp = {0, 5, 10}; + ColGroupPiecewiseLinearCompressed.create(ColIndexFactory.create(new int[] {0}), bp, new double[] {1.0, 2.0}, + new double[] {0.0}, 10); + } + + @Test + public void testCreateValidMultiSegment() { + int[] bp = {0, 3, 7, 10}; + double[] slopes = {1.0, -2.0, 0.5}; + double[] intercepts = {0.0, 5.0, -1.0}; + IColIndex cols = ColIndexFactory.create(new int[] {0, 1}); + + AColGroup cg = ColGroupPiecewiseLinearCompressed.create(cols, bp, slopes, intercepts, 10); + + assertTrue(cg instanceof ColGroupPiecewiseLinearCompressed); + assertNotSame(bp, ((ColGroupPiecewiseLinearCompressed) cg).getBreakpoints()); + } + + @Test + public void testCreateMultiColumn() { + IColIndex cols = ColIndexFactory.create(new int[] {5, 10, 15}); + int[] bp = {0, 5}; + double[] slopes = {3.0}; + double[] intercepts = {2.0}; + + AColGroup cg = ColGroupPiecewiseLinearCompressed.create(cols, bp, slopes, intercepts, 100); + assertTrue(cg instanceof ColGroupPiecewiseLinearCompressed); + assertTrue(cg instanceof ColGroupPiecewiseLinearCompressed); + + // + assertTrue(cg.getNumValues() > 0); + + for(int r = 0; r < 5; r++) { + double expected = 3.0 * r + 2.0; + // colIdx=0 → globale Spalte 5 + assertEquals(expected, cg.getIdx(r, 0), 1e-9); + // colIdx=1 → globale Spalte 10 + assertEquals(expected, cg.getIdx(r, 1), 1e-9); + // colIdx=2 → globale Spalte 15 + assertEquals(expected, cg.getIdx(r, 2), 1e-9); + } + + for(int r = 5; r < 10; r++) { + double expected = 3.0 * r + 2.0; + assertEquals(expected, cg.getIdx(r, 0), 1e-9); // Alle Columns gleich + } + assertEquals(cols.size(), 3); + } + + @Test + public void testCreateSingleColumn() { + IColIndex cols = ColIndexFactory.create(new int[] {5}); + int[] bp = {0, 5}; + double[] slopes = {3.0}; + double[] intercepts = {2.0}; + int numRows = 10; + + AColGroup cg = ColGroupPiecewiseLinearCompressed.create(cols, bp, slopes, intercepts, numRows); + + assertTrue(cg instanceof ColGroupPiecewiseLinearCompressed); + + assertEquals(2.0, cg.getIdx(0, 0), 1e-9); // 3*0 + 2 + assertEquals(5.0, cg.getIdx(1, 0), 1e-9); // 3*1 + 2 + } + + @Test + public void testCreateValidMinimal() { + + // 1 Segment: [0,10] → y = 2.0 * r + 1.0 + int[] bp = {0, 10}; + double[] slopes = {2.0}; + double[] intercepts = {1.0}; + IColIndex cols = ColIndexFactory.create(new int[] {0}); + int numRows = 10; + + AColGroup cg = ColGroupPiecewiseLinearCompressed.create(cols, bp, slopes, intercepts, numRows); + + // Korrekte Instanz + assertTrue(cg instanceof ColGroupPiecewiseLinearCompressed); + + // getNumValues() > 0 + assertTrue(cg.getNumValues() > 0); + + // r < numRows + for(int r = 0; r < numRows; r++) { + double expected = 2.0 * r + 1.0; + assertEquals("Row " + r, expected, cg.getIdx(r, 0), 1e-9); + } + + // Letzte gültige Row + assertEquals(19.0, cg.getIdx(9, 0), 1e-9); + + //Out-of-Bounds korrekt 0.0 + assertEquals(0.0, cg.getIdx(10, 0), 1e-9); + assertEquals(0.0, cg.getIdx(9, 1), 1e-9); + } + + @Test + public void testDecompressToDenseBlock() { + int[] bp = {0, 5, 10}; + double[] slopes = {1.0, 2.0}; + double[] intercepts = {0.0, 1.0}; + int numRows = 10; + + AColGroup cg = ColGroupPiecewiseLinearCompressed.create(ColIndexFactory.create(new int[] {0}), bp, slopes, + intercepts, numRows); + + // 1. MatrixBlock mit korrekten Dimensionen + MatrixBlock target = new MatrixBlock(numRows, 1, false); + + // 2. DenseBlock ZUERST alloziieren! + target.allocateDenseBlock(); // Oder target.allocateDenseBlock(true); + + // 3. Jetzt DenseBlock verfügbar + DenseBlock db = target.getDenseBlock(); + assertNotNull(db); // Sicherstellen! + + // 4. Dekomprimieren + cg.decompressToDenseBlock(db, 0, numRows, 0, 0); + + // 5. Prüfen + for(int r = 0; r < numRows; r++) { + double expected = (r < 5) ? 1.0 * r : 2.0 * r + 1.0; + assertEquals("Row " + r, expected, db.get(r, 0), 1e-9); + } + } + + private ColGroupPiecewiseLinearCompressed createTestGroup(int numRows) { + int[] bp = {0, 5, numRows}; + double[] slopes = {1.0, 3.0}; + double[] intercepts = {0.0, 2.0}; + return (ColGroupPiecewiseLinearCompressed) ColGroupPiecewiseLinearCompressed.create( + ColIndexFactory.create(new int[] {0}), bp, slopes, intercepts, numRows); + } + + @Test + public void testDecompressToDenseBlockFullRange() { + ColGroupPiecewiseLinearCompressed cg = createTestGroup(12); + + MatrixBlock target = new MatrixBlock(12, 1, false); + target.allocateDenseBlock(); + DenseBlock db = target.getDenseBlock(); + + cg.decompressToDenseBlock(db, 0, 12, 0, 0); + + // Segment 0 [0,5): y = r + assertEquals(0.0, db.get(0, 0), 1e-9); + assertEquals(4.0, db.get(4, 0), 1e-9); + + assertEquals(17.0, db.get(5, 0), 1e-9); + assertEquals(29.0, db.get(9, 0), 1e-9); + assertEquals(32.0, db.get(10, 0), 1e-9); + assertEquals(35.0, db.get(11, 0), 1e-9); + } + + @Test + public void testDecompressToDenseBlockPartialRange() { + ColGroupPiecewiseLinearCompressed cg = createTestGroup(12); + + MatrixBlock target = new MatrixBlock(12, 1, false); + target.allocateDenseBlock(); + DenseBlock db = target.getDenseBlock(); + + // rl=6, ru=9 → r=6,7,8 dekomprimieren + // offR=0 → schreibt in Target-Rows 6,7,8 + cg.decompressToDenseBlock(db, 6, 9, 0, 0); + + assertEquals(0.0, db.get(0, 0), 1e-9); // Unberührt (vor rl=6) + assertEquals(20.0, db.get(6, 0), 1e-9); + assertEquals(23.0, db.get(7, 0), 1e-9); + assertEquals(26.0, db.get(8, 0), 1e-9); + assertEquals(0.0, db.get(9, 0), 1e-9); // Unberührt (nach ru=9) + } + + @Test + public void testDecompressToDenseBlockEmptyRange() { + ColGroupPiecewiseLinearCompressed cg = createTestGroup(12); + + MatrixBlock target = new MatrixBlock(5, 1, false); + target.allocateDenseBlock(); + DenseBlock db = target.getDenseBlock(); + + // Leerer Bereich + cg.decompressToDenseBlock(db, 12, 12, 0, 0); // rl=ru + cg.decompressToDenseBlock(db, 3, 2, 0, 0); // rl>ru + + // Alles bleibt 0.0 + for(int r = 0; r < 5; r++) { + assertEquals(0.0, db.get(r, 0), 1e-9); + } + } + + @Test + public void testDecompressToDenseBlockNullSafety() { + ColGroupPiecewiseLinearCompressed cg = createTestGroup(10); + + // Null DenseBlock + cg.decompressToDenseBlock(null, 0, 10, 0, 0); + + // Ungültige Parameter (leerer Bereich) + MatrixBlock target = new MatrixBlock(10, 1, false); + target.allocateDenseBlock(); + DenseBlock db = target.getDenseBlock(); + + cg.decompressToDenseBlock(db, 12, 12, 0, 0); // rl == ru + cg.decompressToDenseBlock(db, 5, 2, 0, 0); // rl > ru + + // Target unverändert + for(int r = 0; r < 10; r++) { + assertEquals(0.0, db.get(r, 0), 1e-9); + } + } + + private CompressedSizeInfo createTestCompressedSizeInfo() { + IColIndex cols = ColIndexFactory.create(new int[] {0}); + EstimationFactors facts = new EstimationFactors(2, 10); + + CompressedSizeInfoColGroup info = new CompressedSizeInfoColGroup(cols, facts, + AColGroup.CompressionType.PiecewiseLinear); + + List infos = Arrays.asList(info); + CompressedSizeInfo csi = new CompressedSizeInfo(infos); + + return csi; + } + + @Test + public void testCompressPiecewiseLinearViaRealAPI() { + + MatrixBlock in = new MatrixBlock(10, 1, false); + in.allocateDenseBlock(); + for(int r = 0; r < 10; r++) { + in.set(r, 0, r * 0.5); + } + + CompressionSettings cs = new CompressionSettingsBuilder().addValidCompression( + AColGroup.CompressionType.PiecewiseLinear).create(); + + CompressedSizeInfo csi = createTestCompressedSizeInfo(); + + List colGroups = ColGroupFactory.compressColGroups(in, csi, cs); + + boolean hasPiecewise = colGroups.stream().anyMatch(cg -> cg instanceof ColGroupPiecewiseLinearCompressed); + assertTrue(hasPiecewise); + } + @Test + + public void testGreedy_linearColumn_singleSegment() { + // 2. Perfekte Gerade → 1 Segment + double[] linearCol = {1.0, 2.0, 3.0, 4.0, 5.0}; // y=x+1 + CompressionSettings cs = new CompressionSettingsBuilder().create(); + cs.setPiecewiseTargetLoss(1e-6); + + List breaks = PiecewiseLinearUtils.computeBreakpointsGreedy(linearCol, cs); + assertEquals("[0, 5]", breaks.toString()); // SSE=0 ✓ + } + + @Test + public void testGreedy_noisyColumn_multipleSegments() { + // 3. Mit Sprung → 2 Segmente + double[] noisyCol = {1.1, 1.9, 2.2, 10.1, 10.8, 11.3}; // Sprung bei 3 + CompressionSettings cs = new CompressionSettingsBuilder().create(); + cs.setPiecewiseTargetLoss(1.0); // Erlaubt MSE=1 + + List breaks = PiecewiseLinearUtils.computeBreakpointsGreedy(noisyCol, cs); + // Erwartet mind. 2 Segmente (Sprung erkennen) + assertTrue(breaks.size() >= 3); // [0, ?, 6] + } + + @Test + public void testGreedy_targetLossIncreasesSegments() { + // 4. Höherer Target-Loss → weniger Segmente + double[] colWithJumps = {1,2,3, 10,11,12, 20,21,22}; + CompressionSettings csStrict = new CompressionSettingsBuilder().create(); + csStrict.setPiecewiseTargetLoss(0.01); // Streng → viele Segmente + + CompressionSettings csLoose = new CompressionSettingsBuilder().create(); + csLoose.setPiecewiseTargetLoss(10.0); + + List strictBreaks = PiecewiseLinearUtils.computeBreakpointsGreedy(colWithJumps, csStrict); + List looseBreaks = PiecewiseLinearUtils.computeBreakpointsGreedy(colWithJumps, csLoose); + + // Strenger Target → mehr Segmente + assertTrue(strictBreaks.size() > looseBreaks.size()); + } + + + @Test + public void testMultiColumnTargetLossRespected() { + final int rows = 50, cols = 2; + double[][] data = getRandomMatrix(rows, cols, 0, 10, 1.0, 42L); + MatrixBlock orig = DataConverter.convertToMatrixBlock(data); + orig.allocateDenseBlock(); + + IColIndex colIdx = ColIndexFactory.create(0, cols-1); + CompressionSettings cs = new CompressionSettingsBuilder().create(); + cs.setPiecewiseTargetLoss(1.0); + + AColGroup cg = ColGroupFactory.compressPiecewiseLinearFunctional(colIdx, orig, cs); + + MatrixBlock target = new MatrixBlock(rows, cols, false); + target.allocateDenseBlock(); + cg.decompressToDenseBlock(target.getDenseBlock(), 0, rows-1, 0, cols-1); + + // Test MSE für jede Spalte + for (int c = 0; c < cols; c++) { + double mse = computeColumnMSE(orig, target, c); + assertTrue("Col " + c + " MSE=" + mse + " > target=1.0", mse <= 1.0); + } + } + + + private double computeColumnMSE(MatrixBlock orig, MatrixBlock reconstructed, int colIdx) { + double mse = 0.0; + final int numRows = orig.getNumRows(); + + DenseBlock origDb = orig.getDenseBlock(); + DenseBlock reconDb = reconstructed.getDenseBlock(); + + for (int row = 0; row < numRows; row++) { + final double origValue = origDb.get(row, colIdx); // ← DENSEBLOCK.GET! + final double reconValue = reconDb.get(row, colIdx); + final double squaredError = (origValue - reconValue) * (origValue - reconValue); + mse += squaredError; + } + + return mse / numRows; + } + + + + + +} diff --git a/use-java17-systemds.sh b/use-java17-systemds.sh new file mode 100755 index 00000000000..0c1a2fda871 --- /dev/null +++ b/use-java17-systemds.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# ------------------------------------------------------------------ +# SystemDS macOS Build-Skript +# Setzt JAVA_HOME, PATH, Maven und erzeugt systemds-standalone.sh +# ------------------------------------------------------------------ + +# 1️⃣ Setze Java 17 +export JAVA_HOME=$(/usr/libexec/java_home -v 17) +export PATH="$JAVA_HOME/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/opt/homebrew/bin:/opt/homebrew/sbin:$PATH" + +# 2️⃣ Optional: Python, ghcup, uix/Deno, Coursier, JetBrains Toolbox +export PATH="/Library/Frameworks/Python.framework/Versions/3.11/bin:$HOME/.ghcup/bin:$HOME/.uix/bin:$PATH" +export DENO_INSTALL="$HOME/.uix" +export PATH="$DENO_INSTALL/bin:$PATH" +export PATH="$PATH:/Users/mori/Library/Application Support/Coursier/bin" +export PATH="$PATH:/Users/mori/Library/Application Support/JetBrains/Toolbox/scripts" + +# 3️⃣ Prüfen, ob Maven existiert +if ! command -v mvn >/dev/null 2>&1; then + echo "ERROR: Maven (mvn) nicht gefunden. Bitte installieren!" + exit 1 +fi + +# 4️⃣ Prüfen, ob wir im Projekt-Root sind (pom.xml vorhanden) +if [ ! -f "pom.xml" ]; then + echo "ERROR: pom.xml nicht gefunden. Bitte ins SystemDS-Projekt-Root wechseln." + exit 1 +fi + +# 5️⃣ Maven Build ausführen +echo "📦 Starte Maven Build..." +mvn clean package -DskipTests + +# 6️⃣ Standalone-Skript erzeugen +echo "🔧 Erzeuge bin/systemds-standalone.sh..." + +mkdir -p bin +cat > bin/systemds-standalone.sh << 'EOF' +#!/bin/bash +# Standalone-Launcher für SystemDS + +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +JAR_FILE="$SCRIPT_DIR/../target/systemds-3.4.0-SNAPSHOT.jar" + +if [ ! -f "$JAR_FILE" ]; then + echo "ERROR: Standalone JAR nicht gefunden: $JAR_FILE" + exit 1 +fi + +java -cp "$JAR_FILE" org.apache.sysds.api.DMLScript "$@" +EOF + +# 7️⃣ Ausführbar machen +chmod +x bin/systemds-standalone.sh + +echo "✅ Fertig! Standalone-Skript erstellt: bin/systemds-standalone.sh" +