Skip to content

Commit e10d5d5

Browse files
jnthntatumcopybara-github
authored andcommitted
Add RegexPatternValidator.
PiperOrigin-RevId: 895515443
1 parent 0820886 commit e10d5d5

29 files changed

Lines changed: 1795 additions & 74 deletions

common/ast/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,13 @@ cc_library(
9898
deps = [
9999
"//common:constant",
100100
"//common:expr",
101+
"@com_google_absl//absl/base:core_headers",
101102
"@com_google_absl//absl/base:no_destructor",
102103
"@com_google_absl//absl/base:nullability",
103104
"@com_google_absl//absl/container:flat_hash_map",
104105
"@com_google_absl//absl/functional:overload",
106+
"@com_google_absl//absl/log:absl_check",
107+
"@com_google_absl//absl/strings",
105108
"@com_google_absl//absl/types:optional",
106109
"@com_google_absl//absl/types:variant",
107110
],

common/ast/metadata.cc

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,18 @@
1414

1515
#include "common/ast/metadata.h"
1616

17+
#include <cstddef>
1718
#include <memory>
19+
#include <string>
20+
#include <utility>
21+
#include <variant>
1822
#include <vector>
1923

2024
#include "absl/base/no_destructor.h"
25+
#include "absl/base/nullability.h"
2126
#include "absl/functional/overload.h"
27+
#include "absl/log/absl_check.h"
28+
#include "absl/strings/str_cat.h"
2229
#include "absl/types/variant.h"
2330

2431
namespace cel {
@@ -30,6 +37,96 @@ const TypeSpec& DefaultTypeSpec() {
3037
return *type;
3138
}
3239

40+
std::string FormatPrimitive(PrimitiveType t) {
41+
switch (t) {
42+
case PrimitiveType::kBool:
43+
return "bool";
44+
case PrimitiveType::kInt64:
45+
return "int";
46+
case PrimitiveType::kUint64:
47+
return "uint";
48+
case PrimitiveType::kDouble:
49+
return "double";
50+
case PrimitiveType::kString:
51+
return "string";
52+
case PrimitiveType::kBytes:
53+
return "bytes";
54+
default:
55+
return "*unspecified primitive*";
56+
}
57+
}
58+
59+
std::string FormatWellKnown(WellKnownTypeSpec t) {
60+
switch (t) {
61+
case WellKnownTypeSpec::kAny:
62+
return "google.protobuf.Any";
63+
case WellKnownTypeSpec::kDuration:
64+
return "google.protobuf.Duration";
65+
case WellKnownTypeSpec::kTimestamp:
66+
return "google.protobuf.Timestamp";
67+
default:
68+
return "*unspecified well known*";
69+
}
70+
}
71+
72+
using FormatIns = std::variant<const TypeSpec* absl_nonnull, std::string>;
73+
using FormatStack = std::vector<FormatIns>;
74+
75+
void HandleFormatTypeSpec(const TypeSpec& t, FormatStack& stack,
76+
std::string* out) {
77+
if (t.has_dyn()) {
78+
absl::StrAppend(out, "dyn");
79+
} else if (t.has_null()) {
80+
absl::StrAppend(out, "null");
81+
} else if (t.has_primitive()) {
82+
absl::StrAppend(out, FormatPrimitive(t.primitive()));
83+
} else if (t.has_wrapper()) {
84+
absl::StrAppend(out, "wrapper(", FormatPrimitive(t.wrapper()), ")");
85+
} else if (t.has_well_known()) {
86+
absl::StrAppend(out, FormatWellKnown(t.well_known()));
87+
return;
88+
} else if (t.has_abstract_type()) {
89+
const auto& abs_type = t.abstract_type();
90+
if (abs_type.parameter_types().empty()) {
91+
absl::StrAppend(out, abs_type.name());
92+
return;
93+
}
94+
absl::StrAppend(out, abs_type.name(), "(");
95+
stack.push_back(")");
96+
for (size_t i = abs_type.parameter_types().size(); i > 0; --i) {
97+
stack.push_back(&abs_type.parameter_types()[i - 1]);
98+
if (i > 1) {
99+
stack.push_back(", ");
100+
}
101+
}
102+
103+
} else if (t.has_type()) {
104+
if (t.type() == TypeSpec()) {
105+
absl::StrAppend(out, "type");
106+
return;
107+
}
108+
absl::StrAppend(out, "type(");
109+
stack.push_back(")");
110+
stack.push_back(&t.type());
111+
} else if (t.has_message_type()) {
112+
absl::StrAppend(out, t.message_type().type());
113+
} else if (t.has_type_param()) {
114+
absl::StrAppend(out, t.type_param().type());
115+
} else if (t.has_list_type()) {
116+
absl::StrAppend(out, "list(");
117+
stack.push_back(")");
118+
stack.push_back(&t.list_type().elem_type());
119+
} else if (t.has_map_type()) {
120+
absl::StrAppend(out, "map(");
121+
stack.push_back(")");
122+
stack.push_back(&t.map_type().value_type());
123+
stack.push_back(", ");
124+
stack.push_back(&t.map_type().key_type());
125+
} else {
126+
absl::StrAppend(out, "*error*");
127+
}
128+
}
129+
33130
TypeSpecKind CopyImpl(const TypeSpecKind& other) {
34131
return absl::visit(
35132
absl::Overload(
@@ -142,4 +239,24 @@ FunctionTypeSpec& FunctionTypeSpec::operator=(const FunctionTypeSpec& other) {
142239
return *this;
143240
}
144241

242+
std::string FormatTypeSpec(const TypeSpec& t) {
243+
// Use a stack to avoid recursion.
244+
// Probably overly defensive, but fuzzers will often notice the recursion
245+
// and try to trigger it.
246+
std::string out;
247+
FormatStack seq;
248+
seq.push_back(&t);
249+
while (!seq.empty()) {
250+
FormatIns ins = std::move(seq.back());
251+
seq.pop_back();
252+
if (std::holds_alternative<std::string>(ins)) {
253+
absl::StrAppend(&out, std::get<std::string>(ins));
254+
continue;
255+
}
256+
ABSL_DCHECK(std::holds_alternative<const TypeSpec*>(ins));
257+
HandleFormatTypeSpec(*std::get<const TypeSpec*>(ins), seq, &out);
258+
}
259+
return out;
260+
}
261+
145262
} // namespace cel

common/ast/metadata.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,9 @@ class TypeSpec {
740740
TypeSpecKind type_kind_;
741741
};
742742

743+
// Returns a string representation of the given TypeSpec.
744+
std::string FormatTypeSpec(const TypeSpec& t);
745+
743746
// Describes a resolved reference to a declaration.
744747
class Reference {
745748
public:

compiler/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ cc_library(
2727
"//checker:validation_result",
2828
"//parser:options",
2929
"//parser:parser_interface",
30+
"//validator",
3031
"@com_google_absl//absl/status",
3132
"@com_google_absl//absl/status:statusor",
3233
"@com_google_absl//absl/strings:string_view",
@@ -48,6 +49,7 @@ cc_library(
4849
"//internal:status_macros",
4950
"//parser",
5051
"//parser:parser_interface",
52+
"//validator",
5153
"@com_google_absl//absl/base:nullability",
5254
"@com_google_absl//absl/container:flat_hash_set",
5355
"@com_google_absl//absl/status",
@@ -78,6 +80,7 @@ cc_test(
7880
"//parser:macro",
7981
"//parser:parser_interface",
8082
"//testutil:baseline_tests",
83+
"//validator:timestamp_literal_validator",
8184
"@com_google_absl//absl/status",
8285
"@com_google_absl//absl/status:status_matchers",
8386
"@com_google_absl//absl/strings",

compiler/compiler.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "checker/validation_result.h"
2929
#include "parser/options.h"
3030
#include "parser/parser_interface.h"
31+
#include "validator/validator.h"
3132

3233
namespace cel {
3334

@@ -109,6 +110,7 @@ class CompilerBuilder {
109110

110111
virtual TypeCheckerBuilder& GetCheckerBuilder() = 0;
111112
virtual ParserBuilder& GetParserBuilder() = 0;
113+
virtual Validator& GetValidator() = 0;
112114

113115
virtual absl::StatusOr<std::unique_ptr<Compiler>> Build() = 0;
114116
};
@@ -135,6 +137,9 @@ class Compiler {
135137

136138
// Accessor for the underlying parser.
137139
virtual const Parser& GetParser() const = 0;
140+
141+
// Accessor for the underlying validator.
142+
virtual const Validator& GetValidator() const = 0;
138143
};
139144

140145
} // namespace cel

compiler/compiler_factory.cc

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "internal/status_macros.h"
3333
#include "parser/parser.h"
3434
#include "parser/parser_interface.h"
35+
#include "validator/validator.h"
3536
#include "google/protobuf/descriptor.h"
3637

3738
namespace cel {
@@ -41,8 +42,12 @@ namespace {
4142
class CompilerImpl : public Compiler {
4243
public:
4344
CompilerImpl(std::unique_ptr<TypeChecker> type_checker,
44-
std::unique_ptr<Parser> parser)
45-
: type_checker_(std::move(type_checker)), parser_(std::move(parser)) {}
45+
std::unique_ptr<Parser> parser,
46+
// Copy the validator in case builder is reused.
47+
Validator validator)
48+
: type_checker_(std::move(type_checker)),
49+
parser_(std::move(parser)),
50+
validator_(std::move(validator)) {}
4651

4752
absl::StatusOr<ValidationResult> Compile(
4853
absl::string_view expression,
@@ -54,15 +59,20 @@ class CompilerImpl : public Compiler {
5459
type_checker_->Check(std::move(ast)));
5560

5661
result.SetSource(std::move(source));
62+
if (!validator_.validations().empty()) {
63+
validator_.UpdateValidationResult(result);
64+
}
5765
return result;
5866
}
5967

6068
const TypeChecker& GetTypeChecker() const override { return *type_checker_; }
6169
const Parser& GetParser() const override { return *parser_; }
70+
const Validator& GetValidator() const override { return validator_; }
6271

6372
private:
6473
std::unique_ptr<TypeChecker> type_checker_;
6574
std::unique_ptr<Parser> parser_;
75+
Validator validator_;
6676
};
6777

6878
class CompilerBuilderImpl : public CompilerBuilder {
@@ -126,17 +136,19 @@ class CompilerBuilderImpl : public CompilerBuilder {
126136
TypeCheckerBuilder& GetCheckerBuilder() override {
127137
return *type_checker_builder_;
128138
}
139+
Validator& GetValidator() override { return validator_; }
129140

130141
absl::StatusOr<std::unique_ptr<Compiler>> Build() override {
131142
CEL_ASSIGN_OR_RETURN(auto parser, parser_builder_->Build());
132143
CEL_ASSIGN_OR_RETURN(auto type_checker, type_checker_builder_->Build());
133144
return std::make_unique<CompilerImpl>(std::move(type_checker),
134-
std::move(parser));
145+
std::move(parser), validator_);
135146
}
136147

137148
private:
138149
std::unique_ptr<TypeCheckerBuilder> type_checker_builder_;
139150
std::unique_ptr<ParserBuilder> parser_builder_;
151+
Validator validator_;
140152

141153
absl::flat_hash_set<std::string> library_ids_;
142154
absl::flat_hash_set<std::string> subsets_;

compiler/compiler_factory_test.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "parser/macro.h"
3636
#include "parser/parser_interface.h"
3737
#include "testutil/baseline_tests.h"
38+
#include "validator/timestamp_literal_validator.h"
3839
#include "google/protobuf/descriptor.h"
3940

4041
namespace cel {
@@ -287,6 +288,23 @@ TEST(CompilerFactoryTest, DisableStandardMacrosWithStdlib) {
287288
EXPECT_TRUE(result.IsValid());
288289
}
289290

291+
TEST(CompilerFactoryTest, AddValidator) {
292+
ASSERT_OK_AND_ASSIGN(
293+
auto builder,
294+
NewCompilerBuilder(cel::internal::GetSharedTestingDescriptorPool()));
295+
296+
ASSERT_THAT(builder->AddLibrary(StandardCompilerLibrary()), IsOk());
297+
builder->GetValidator().AddValidation(TimestampLiteralValidator());
298+
299+
ASSERT_OK_AND_ASSIGN(auto compiler, builder->Build());
300+
ASSERT_OK_AND_ASSIGN(ValidationResult result,
301+
compiler->Compile("timestamp('invalid')"));
302+
EXPECT_FALSE(result.IsValid());
303+
ASSERT_OK_AND_ASSIGN(result,
304+
compiler->Compile("timestamp('2024-01-01T00:00:00Z')"));
305+
EXPECT_TRUE(result.IsValid());
306+
}
307+
290308
TEST(CompilerFactoryTest, FailsIfLibraryAddedTwice) {
291309
ASSERT_OK_AND_ASSIGN(
292310
auto builder,

extensions/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,8 @@ cc_library(
774774
"//runtime:runtime_builder",
775775
"//runtime/internal:runtime_friend_access",
776776
"//runtime/internal:runtime_impl",
777+
"//validator",
778+
"//validator:regex_validator",
777779
"@com_google_absl//absl/base:no_destructor",
778780
"@com_google_absl//absl/base:nullability",
779781
"@com_google_absl//absl/functional:bind_front",
@@ -814,6 +816,7 @@ cc_test(
814816
"//runtime:reference_resolver",
815817
"//runtime:runtime_options",
816818
"//runtime:standard_runtime_builder_factory",
819+
"//validator",
817820
"@com_google_absl//absl/status",
818821
"@com_google_absl//absl/status:status_matchers",
819822
"@com_google_absl//absl/status:statusor",

extensions/regex_ext.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
#include "runtime/internal/runtime_friend_access.h"
4343
#include "runtime/internal/runtime_impl.h"
4444
#include "runtime/runtime_builder.h"
45+
#include "validator/regex_validator.h"
46+
#include "validator/validator.h"
4547
#include "google/protobuf/arena.h"
4648
#include "google/protobuf/descriptor.h"
4749
#include "google/protobuf/message.h"
@@ -341,4 +343,10 @@ CompilerLibrary RegexExtCompilerLibrary() {
341343
return CompilerLibrary::FromCheckerLibrary(RegexExtCheckerLibrary());
342344
}
343345

346+
Validation RegexExtValidator() {
347+
return RegexPatternValidator(
348+
/*id=*/"",
349+
{{"regex.extract", 1}, {"regex.extractAll", 1}, {"regex.replace", 1}});
350+
}
351+
344352
} // namespace cel::extensions

extensions/regex_ext.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
#include "eval/public/cel_function_registry.h"
8282
#include "eval/public/cel_options.h"
8383
#include "runtime/runtime_builder.h"
84+
#include "validator/validator.h"
8485

8586
namespace cel::extensions {
8687

@@ -119,5 +120,12 @@ CheckerLibrary RegexExtCheckerLibrary();
119120
// regex.extractAll(target: str, pattern: str) -> list<str>
120121
CompilerLibrary RegexExtCompilerLibrary();
121122

123+
// Returns a `Validation` that checks all calls to the CEL regex extension
124+
// functions.
125+
//
126+
// It validates that if the pattern is a literal string, it is a valid regular
127+
// expression.
128+
Validation RegexExtValidator();
129+
122130
} // namespace cel::extensions
123131
#endif // THIRD_PARTY_CEL_CPP_EXTENSIONS_REGEX_EXT_H_

0 commit comments

Comments
 (0)