diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/DenseDfaMatcher.java b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/DenseDfaMatcher.java index c020ec4f8..e368036f8 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/DenseDfaMatcher.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/DenseDfaMatcher.java @@ -23,6 +23,8 @@ import org.apache.tsfile.common.regexp.pattern.Literal; import org.apache.tsfile.common.regexp.pattern.Pattern; import org.apache.tsfile.common.regexp.pattern.ZeroOrMore; +import org.apache.tsfile.utils.Accountable; +import org.apache.tsfile.utils.RamUsageEstimator; import java.util.Arrays; import java.util.List; @@ -32,8 +34,10 @@ import static org.apache.tsfile.utils.Preconditions.checkArgument; public class DenseDfaMatcher implements Matcher { - public static final int FAIL_STATE = -1; + private static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(DenseDfaMatcher.class); + public static final int FAIL_STATE = -1; private List pattern; private int start; private int end; @@ -63,7 +67,17 @@ public boolean match(byte[] input, int offset, int length) { return matcher.prefixMatch(input, offset, length); } - private static class DenseDfa { + @Override + public long ramBytesUsed() { + return INSTANCE_SIZE + + RamUsageEstimator.sizeOfObject(pattern) + + RamUsageEstimator.sizeOfObject(matcher); + } + + private static class DenseDfa implements Accountable { + private static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(DenseDfa.class); + // The DFA is encoded as a sequence of transitions for each possible byte value for each state. // I.e., 256 transitions per state. // The content of the transitions array is the base offset into @@ -141,6 +155,13 @@ public boolean prefixMatch(byte[] input, int offset, int length) { return accept[state >>> 8]; } + @Override + public long ramBytesUsed() { + return INSTANCE_SIZE + + RamUsageEstimator.sizeOf(transitions) + + RamUsageEstimator.sizeOf(accept); + } + private static NFA makeNfa(List pattern, int start, int end) { checkArgument(!pattern.isEmpty(), "pattern is empty"); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/FjsMatcher.java b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/FjsMatcher.java index 0ef73177c..22826c333 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/FjsMatcher.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/FjsMatcher.java @@ -22,6 +22,8 @@ import org.apache.tsfile.common.regexp.pattern.Any; import org.apache.tsfile.common.regexp.pattern.Literal; import org.apache.tsfile.common.regexp.pattern.Pattern; +import org.apache.tsfile.utils.Accountable; +import org.apache.tsfile.utils.RamUsageEstimator; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -31,6 +33,9 @@ import static org.apache.tsfile.utils.Preconditions.checkArgument; public class FjsMatcher implements Matcher { + private static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(FjsMatcher.class); + private final List pattern; private final int start; private final int end; @@ -56,7 +61,16 @@ public boolean match(byte[] input, int offset, int length) { return matcher.match(input, offset, length); } - private static class Fjs { + @Override + public long ramBytesUsed() { + return INSTANCE_SIZE + + RamUsageEstimator.sizeOfObject(pattern) + + RamUsageEstimator.sizeOfObject(matcher); + } + + private static class Fjs implements Accountable { + private static final long INSTANCE_SIZE = RamUsageEstimator.shallowSizeOfInstance(Fjs.class); + private final boolean exact; private final List patterns = new ArrayList<>(); private final List bmsShifts = new ArrayList<>(); @@ -219,5 +233,13 @@ public boolean match(byte[] input, int offset, int length) { return !exact || remaining == 0; } + + @Override + public long ramBytesUsed() { + return INSTANCE_SIZE + + RamUsageEstimator.sizeOfObject(patterns) + + RamUsageEstimator.sizeOfObject(bmsShifts) + + RamUsageEstimator.sizeOfObject(kmpShifts); + } } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikeMatcher.java b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikeMatcher.java index 49347c3d2..5ad3d4f36 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikeMatcher.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikeMatcher.java @@ -23,6 +23,8 @@ import org.apache.tsfile.common.regexp.pattern.Literal; import org.apache.tsfile.common.regexp.pattern.Pattern; import org.apache.tsfile.common.regexp.pattern.ZeroOrMore; +import org.apache.tsfile.utils.Accountable; +import org.apache.tsfile.utils.RamUsageEstimator; import java.util.ArrayList; import java.util.List; @@ -31,7 +33,10 @@ import static java.nio.charset.StandardCharsets.UTF_8; -public class LikeMatcher { +public class LikeMatcher implements Accountable { + private static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(LikeMatcher.class); + private final int minSize; private final OptionalInt maxSize; private final byte[] prefix; @@ -173,6 +178,21 @@ public boolean match(byte[] input, int offset, int length) { return true; } + @Override + public long ramBytesUsed() { + long size = + INSTANCE_SIZE + + RamUsageEstimator.shallowSizeOf(maxSize) + + RamUsageEstimator.sizeOf(prefix) + + RamUsageEstimator.sizeOf(suffix) + + RamUsageEstimator.shallowSizeOf(matcher); + + if (matcher.isPresent()) { + size += matcher.get().ramBytesUsed(); + } + return size; + } + private boolean startsWith(byte[] pattern, byte[] input, int offset) { for (int i = 0; i < pattern.length; i++) { if (pattern[i] != input[offset + i]) { diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikePattern.java b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikePattern.java index bda6ef3a1..762b4ff6f 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikePattern.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikePattern.java @@ -19,12 +19,18 @@ package org.apache.tsfile.common.regexp; +import org.apache.tsfile.utils.Accountable; +import org.apache.tsfile.utils.RamUsageEstimator; + import java.util.Objects; import java.util.Optional; import static java.util.Objects.requireNonNull; -public class LikePattern { +public class LikePattern implements Accountable { + private static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(LikePattern.class); + private final String pattern; private final Optional escape; private final LikeMatcher matcher; @@ -81,4 +87,11 @@ public String toString() { + (escape.map(character -> ", escape=" + character).orElse("")) + '}'; } + + @Override + public long ramBytesUsed() { + long size = + INSTANCE_SIZE + RamUsageEstimator.sizeOf(pattern) + RamUsageEstimator.shallowSizeOf(escape); + return size + matcher.ramBytesUsed(); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/Matcher.java b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/Matcher.java index 49536933d..f6676496d 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/Matcher.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/Matcher.java @@ -19,6 +19,8 @@ package org.apache.tsfile.common.regexp; -public interface Matcher { +import org.apache.tsfile.utils.Accountable; + +public interface Matcher extends Accountable { boolean match(byte[] input, int offset, int length); } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/NfaMatcher.java b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/NfaMatcher.java index c6d6c4777..d0e8308d1 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/NfaMatcher.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/NfaMatcher.java @@ -23,11 +23,15 @@ import org.apache.tsfile.common.regexp.pattern.Literal; import org.apache.tsfile.common.regexp.pattern.Pattern; import org.apache.tsfile.common.regexp.pattern.ZeroOrMore; +import org.apache.tsfile.utils.RamUsageEstimator; import java.util.Arrays; import java.util.List; public class NfaMatcher implements Matcher { + private static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(NfaMatcher.class); + private static final int ANY = -1; private static final int NONE = -2; private static final int INVALID_CODEPOINT = -1; @@ -173,4 +177,9 @@ public boolean match(byte[] input, int offset, int length) { return accept; } + + @Override + public long ramBytesUsed() { + return INSTANCE_SIZE + RamUsageEstimator.sizeOf(loopback) + RamUsageEstimator.sizeOf(match); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Any.java b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Any.java index 8c1e650a8..ccf84b171 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Any.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Any.java @@ -19,7 +19,11 @@ package org.apache.tsfile.common.regexp.pattern; +import org.apache.tsfile.utils.RamUsageEstimator; + public class Any implements Pattern { + private static final long INSTANCE_SIZE = RamUsageEstimator.shallowSizeOfInstance(Any.class); + private final int length; public Any(int length) { @@ -41,4 +45,9 @@ public String toString() { } return sb.toString(); } + + @Override + public long ramBytesUsed() { + return INSTANCE_SIZE; + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Literal.java b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Literal.java index 8ea9cf05e..b41422ba9 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Literal.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Literal.java @@ -19,7 +19,11 @@ package org.apache.tsfile.common.regexp.pattern; +import org.apache.tsfile.utils.RamUsageEstimator; + public class Literal implements Pattern { + private static final long INSTANCE_SIZE = RamUsageEstimator.shallowSizeOfInstance(Literal.class); + private final String value; public Literal(String value) { @@ -34,4 +38,9 @@ public String getValue() { public String toString() { return value; } + + @Override + public long ramBytesUsed() { + return INSTANCE_SIZE + RamUsageEstimator.sizeOf(value); + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Pattern.java b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Pattern.java index 959e5a711..2ed108473 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Pattern.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Pattern.java @@ -19,4 +19,6 @@ package org.apache.tsfile.common.regexp.pattern; -public interface Pattern {} +import org.apache.tsfile.utils.Accountable; + +public interface Pattern extends Accountable {} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/ZeroOrMore.java b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/ZeroOrMore.java index ac0a04433..17454671a 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/ZeroOrMore.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/ZeroOrMore.java @@ -19,9 +19,19 @@ package org.apache.tsfile.common.regexp.pattern; +import org.apache.tsfile.utils.RamUsageEstimator; + public class ZeroOrMore implements Pattern { + private static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(ZeroOrMore.class); + @Override public String toString() { return "%"; } + + @Override + public long ramBytesUsed() { + return INSTANCE_SIZE; + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/TimeDuration.java b/java/tsfile/src/main/java/org/apache/tsfile/utils/TimeDuration.java index 042c1d07c..9e835423d 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/utils/TimeDuration.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/TimeDuration.java @@ -30,6 +30,9 @@ import java.util.concurrent.TimeUnit; public class TimeDuration implements Serializable { + public static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(TimeDuration.class); + // month part of time duration public final int monthDuration; // non-month part of time duration, its precision is same as current time_precision