From 651b07551b7d7759a761682644da75860491b1e4 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Fri, 9 Jan 2026 12:47:54 +0000 Subject: [PATCH 01/16] fix: Align vdu_rejects counter with actual VDU behaviour This counter is incremented whenever a VDU returns a value other than `1`, whereas `ok` and `true` are also treated as acceptable success values. This fixes the counter to only increment on actual failure responses. --- src/couch/src/couch_query_servers.erl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl index 1652fc09a2a..7ab662f850c 100644 --- a/src/couch/src/couch_query_servers.erl +++ b/src/couch/src/couch_query_servers.erl @@ -477,18 +477,18 @@ builtin_cmp_last(A, B) -> validate_doc_update(Db, DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> JsonEditDoc = couch_doc:to_json_obj(EditDoc, [revs]), JsonDiskDoc = json_doc(DiskDoc), - Resp = ddoc_prompt( - Db, - DDoc, - [<<"validate_doc_update">>], - [JsonEditDoc, JsonDiskDoc, Ctx, SecObj] - ), - if - Resp == 1 -> ok; - true -> couch_stats:increment_counter([couchdb, query_server, vdu_rejects], 1) - end, + Args = [JsonEditDoc, JsonDiskDoc, Ctx, SecObj], + + Resp = + case ddoc_prompt(Db, DDoc, [<<"validate_doc_update">>], Args) of + Code when Code =:= 1; Code =:= ok; Code =:= true -> + ok; + Other -> + couch_stats:increment_counter([couchdb, query_server, vdu_rejects], 1), + Other + end, case Resp of - RespCode when RespCode =:= 1; RespCode =:= ok; RespCode =:= true -> + ok -> ok; {[{<<"forbidden">>, Message}]} -> throw({forbidden, Message}); From 85c33978072daea69ac97928d2e39a6d6fe16988 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Thu, 8 Jan 2026 15:19:47 +0000 Subject: [PATCH 02/16] chore: Add some basic testing for the JS-based VDU interface --- test/elixir/test/config/suite.elixir | 6 ++ test/elixir/test/validate_doc_update_test.exs | 79 +++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 test/elixir/test/validate_doc_update_test.exs diff --git a/test/elixir/test/config/suite.elixir b/test/elixir/test/config/suite.elixir index 81ed1e63d7e..05fd4a7a520 100644 --- a/test/elixir/test/config/suite.elixir +++ b/test/elixir/test/config/suite.elixir @@ -519,6 +519,12 @@ "serial execution is not spuriously counted as loop on test_rewrite_suite_db", "serial execution is not spuriously counted as loop on test_rewrite_suite_db%2Fwith_slashes" ], + "ValidateDocUpdateTest": [ + "JavaScript VDU accepts a valid document", + "JavaScript VDU rejects an invalid document", + "JavaScript VDU accepts a valid change", + "JavaScript VDU rejects an invalid change", + ], "SecurityValidationTest": [ "Author presence and user security", "Author presence and user security when replicated", diff --git a/test/elixir/test/validate_doc_update_test.exs b/test/elixir/test/validate_doc_update_test.exs new file mode 100644 index 00000000000..5d15db10165 --- /dev/null +++ b/test/elixir/test/validate_doc_update_test.exs @@ -0,0 +1,79 @@ +defmodule ValidateDocUpdateTest do + use CouchTestCase + + @moduledoc """ + Test validate_doc_update behaviour + """ + + @js_type_check %{ + language: "javascript", + + validate_doc_update: ~s""" + function (newDoc) { + if (!newDoc.type) { + throw {forbidden: 'Documents must have a type field'}; + } + } + """ + } + + @tag :with_db + test "JavaScript VDU accepts a valid document", context do + db = context[:db_name] + Couch.put("/#{db}/_design/js-test", body: @js_type_check) + + resp = Couch.put("/#{db}/doc", body: %{"type" => "movie"}) + assert resp.status_code == 201 + assert resp.body["ok"] == true + end + + @tag :with_db + test "JavaScript VDU rejects an invalid document", context do + db = context[:db_name] + Couch.put("/#{db}/_design/js-test", body: @js_type_check) + + resp = Couch.put("/#{db}/doc", body: %{"not" => "valid"}) + assert resp.status_code == 403 + assert resp.body["error"] == "forbidden" + end + + @js_change_check %{ + language: "javascript", + + validate_doc_update: ~s""" + function (newDoc, oldDoc) { + if (oldDoc && newDoc.type !== oldDoc.type) { + throw {forbidden: 'Documents cannot change their type field'}; + } + } + """ + } + + @tag :with_db + test "JavaScript VDU accepts a valid change", context do + db = context[:db_name] + Couch.put("/#{db}/_design/js-test", body: @js_change_check) + + Couch.put("/#{db}/doc", body: %{"type" => "movie"}) + + doc = Couch.get("/#{db}/doc").body + updated = doc |> Map.merge(%{"type" => "movie", "title" => "Duck Soup"}) + resp = Couch.put("/#{db}/doc", body: updated) + + assert resp.status_code == 201 + end + + @tag :with_db + test "JavaScript VDU rejects an invalid change", context do + db = context[:db_name] + Couch.put("/#{db}/_design/js-test", body: @js_change_check) + + Couch.put("/#{db}/doc", body: %{"type" => "movie"}) + + doc = Couch.get("/#{db}/doc").body + updated = doc |> Map.put("type", "director") + resp = Couch.put("/#{db}/doc", body: updated) + + assert resp.status_code == 403 + end +end From 53926117767d02622c609b36eb4321402dbed966 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Fri, 9 Jan 2026 14:05:43 +0000 Subject: [PATCH 03/16] feat: Add the ability for VDUs to be written as Mango selectors --- src/couch_mrview/src/couch_mrview.erl | 2 +- src/mango/src/mango_native_proc.erl | 27 ++++ test/elixir/test/config/suite.elixir | 6 + test/elixir/test/validate_doc_update_test.exs | 133 ++++++++++++++++++ 4 files changed, 167 insertions(+), 1 deletion(-) diff --git a/src/couch_mrview/src/couch_mrview.erl b/src/couch_mrview/src/couch_mrview.erl index bc7b1f8abf3..244f668af03 100644 --- a/src/couch_mrview/src/couch_mrview.erl +++ b/src/couch_mrview/src/couch_mrview.erl @@ -62,7 +62,7 @@ validate_ddoc_fields(DDoc) -> [{<<"rewrites">>, [string, array]}], [{<<"shows">>, object}, {any, [object, string]}], [{<<"updates">>, object}, {any, [object, string]}], - [{<<"validate_doc_update">>, string}], + [{<<"validate_doc_update">>, [string, object]}], [{<<"views">>, object}, {<<"lib">>, object}], [{<<"views">>, object}, {any, object}, {<<"map">>, MapFuncType}], [{<<"views">>, object}, {any, object}, {<<"reduce">>, string}] diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl index 511a987199e..edcecd4b6fb 100644 --- a/src/mango/src/mango_native_proc.erl +++ b/src/mango/src/mango_native_proc.erl @@ -29,6 +29,7 @@ -record(st, { indexes = [], + validators = [], timeout = 5000 }). @@ -94,6 +95,32 @@ handle_call({prompt, [<<"nouveau_index_doc">>, Doc]}, _From, St) -> Else end, {reply, Vals, St}; +handle_call({prompt, [<<"ddoc">>, <<"new">>, DDocId, {DDoc}]}, _From, St) -> + NewSt = + case couch_util:get_value(<<"validate_doc_update">>, DDoc) of + undefined -> + St; + Selector0 -> + Selector = mango_selector:normalize(Selector0), + Validators = couch_util:set_value(DDocId, St#st.validators, Selector), + St#st{validators = Validators} + end, + {reply, true, NewSt}; +handle_call({prompt, [<<"ddoc">>, DDocId, [<<"validate_doc_update">>], Args]}, _From, St) -> + case couch_util:get_value(DDocId, St#st.validators) of + undefined -> + Msg = [<<"validate_doc_update">>, DDocId], + {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}; + Selector -> + [NewDoc, OldDoc, _Ctx, _SecObj] = Args, + Struct = {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, OldDoc}]}, + Reply = + case mango_selector:match(Selector, Struct) of + true -> true; + _ -> {[{<<"forbidden">>, <<"document is not valid">>}]} + end, + {reply, Reply, St} + end; handle_call(Msg, _From, St) -> {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}. diff --git a/test/elixir/test/config/suite.elixir b/test/elixir/test/config/suite.elixir index 05fd4a7a520..4df5991bdc0 100644 --- a/test/elixir/test/config/suite.elixir +++ b/test/elixir/test/config/suite.elixir @@ -524,6 +524,12 @@ "JavaScript VDU rejects an invalid document", "JavaScript VDU accepts a valid change", "JavaScript VDU rejects an invalid change", + "Mango VDU accepts a valid document", + "Mango VDU rejects an invalid document", + "updating a Mango VDU updates its effects", + "converting a Mango VDU to JavaScript updates its effects", + "deleting a Mango VDU removes its effects", + "Mango VDU rejects a doc if any existing ddoc fails to match", ], "SecurityValidationTest": [ "Author presence and user security", diff --git a/test/elixir/test/validate_doc_update_test.exs b/test/elixir/test/validate_doc_update_test.exs index 5d15db10165..93ed8f177cf 100644 --- a/test/elixir/test/validate_doc_update_test.exs +++ b/test/elixir/test/validate_doc_update_test.exs @@ -76,4 +76,137 @@ defmodule ValidateDocUpdateTest do assert resp.status_code == 403 end + + @mango_type_check %{ + language: "query", + + validate_doc_update: %{ + "newDoc" => %{"type" => %{"$exists" => true}} + } + } + + @tag :with_db + test "Mango VDU accepts a valid document", context do + db = context[:db_name] + resp = Couch.put("/#{db}/_design/mango-test", body: @mango_type_check) + assert resp.status_code == 201 + + resp = Couch.put("/#{db}/doc", body: %{"type" => "movie"}) + assert resp.status_code == 201 + assert resp.body["ok"] == true + end + + @tag :with_db + test "Mango VDU rejects an invalid document", context do + db = context[:db_name] + resp = Couch.put("/#{db}/_design/mango-test", body: @mango_type_check) + assert resp.status_code == 201 + + resp = Couch.put("/#{db}/doc", body: %{"no" => "type"}) + assert resp.status_code == 403 + assert resp.body["error"] == "forbidden" + end + + @tag :with_db + test "updating a Mango VDU updates its effects", context do + db = context[:db_name] + + resp = Couch.put("/#{db}/_design/mango-test", body: @mango_type_check) + assert resp.status_code == 201 + + ddoc = %{ + language: "query", + + validate_doc_update: %{ + "newDoc" => %{ + "type" => %{"$type" => "string"}, + "year" => %{"$lt" => 2026} + } + } + } + resp = Couch.put("/#{db}/_design/mango-test", body: ddoc, query: %{rev: resp.body["rev"]}) + assert resp.status_code == 201 + + resp = Couch.put("/#{db}/doc1", body: %{"type" => "movie", "year" => 1994}) + assert resp.status_code == 201 + + resp = Couch.put("/#{db}/doc2", body: %{"type" => 42, "year" => 1994}) + assert resp.status_code == 403 + assert resp.body["error"] == "forbidden" + + resp = Couch.put("/#{db}/doc3", body: %{"type" => "movie", "year" => 2094}) + assert resp.status_code == 403 + assert resp.body["error"] == "forbidden" + end + + @tag :with_db + test "converting a Mango VDU to JavaScript updates its effects", context do + db = context[:db_name] + + resp = Couch.put("/#{db}/_design/mango-test", body: @mango_type_check) + assert resp.status_code == 201 + + ddoc = %{ + language: "javascript", + + validate_doc_update: ~s""" + function (newDoc) { + if (typeof newDoc.year !== 'number') { + throw {forbidden: 'Documents must have a valid year field'}; + } + } + """ + } + resp = Couch.put("/#{db}/_design/mango-test", body: ddoc, query: %{rev: resp.body["rev"]}) + assert resp.status_code == 201 + + resp = Couch.put("/#{db}/doc1", body: %{"year" => 1994}) + assert resp.status_code == 201 + + resp = Couch.put("/#{db}/doc2", body: %{"year" => "1994"}) + assert resp.status_code == 403 + assert resp.body["error"] == "forbidden" + end + + @tag :with_db + test "deleting a Mango VDU removes its effects", context do + db = context[:db_name] + + resp = Couch.put("/#{db}/_design/mango-test", body: @mango_type_check) + assert resp.status_code == 201 + + resp = Couch.delete("/#{db}/_design/mango-test", query: %{rev: resp.body["rev"]}) + assert resp.status_code == 200 + + resp = Couch.put("/#{db}/doc", body: %{"no" => "type"}) + assert resp.status_code == 201 + end + + @tag :with_db + test "Mango VDU rejects a doc if any existing ddoc fails to match", context do + db = context[:db_name] + resp = Couch.put("/#{db}/_design/mango-test", body: @mango_type_check) + assert resp.status_code == 201 + + ddoc = %{ + language: "query", + + validate_doc_update: %{ + "newDoc" => %{"year" => %{"$lt" => 2026}} + } + } + resp = Couch.put("/#{db}/_design/mango-test-2", body: ddoc) + assert resp.status_code == 201 + + resp = Couch.put("/#{db}/doc1", body: %{"type" => "movie", "year" => 1994}) + assert resp.status_code == 201 + + resp = Couch.put("/#{db}/doc2", body: %{"year" => 1994}) + assert resp.status_code == 403 + assert resp.body["error"] == "forbidden" + + resp = Couch.put("/#{db}/doc3", body: %{"type" => "movie", "year" => 2094}) + assert resp.status_code == 403 + assert resp.body["error"] == "forbidden" + end end From c44c2581a677f6b34d3b4fe48d3d3b70b2c1c282 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Tue, 13 Jan 2026 14:27:48 +0000 Subject: [PATCH 04/16] [wip] mango unit tests --- src/mango/src/mango_selector.erl | 618 ++++++++++++++++++++++++++++++- 1 file changed, 617 insertions(+), 1 deletion(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 9c5b7a96f7f..d8e39d89248 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -1070,7 +1070,7 @@ check_beginswith(Field, Prefix) -> % in the middle of test output. match_int(mango_selector:normalize(Selector), ?TEST_DOC). -match_beginswith_test() -> +match_beginswith_errors_test() -> % matching ?assertEqual(true, check_beginswith(<<"_id">>, <<"f">>)), % no match (user_id field in the test doc contains an integer) @@ -1087,4 +1087,620 @@ match_beginswith_test() -> check_beginswith(<<"user_id">>, InvalidArg) ). +check_selector(Selector, Results) -> + SelPos = normalize({[{<<"x">>, Selector}]}), + SelNeg = normalize({[{<<"x">>, {[{<<"$not">>, Selector}]}}]}), + + Check = fun({Result, Value}) -> + Doc = {[{<<"x">>, Value}]}, + ?assertEqual(Result, match_int(SelPos, Doc)), + ?assertEqual(not Result, match_int(SelNeg, Doc)) + end, + + lists:foreach(Check, Results). + +match_lt_test() -> + check_selector({[{<<"$lt">>, 5}]}, [{true, 4}, {false, 5}, {false, 6}]), + + check_selector({[{<<"$lt">>, <<"hello">>}]}, [ + {true, <<"held">>}, + {false, <<"hello">>}, + {false, <<"help">>} + ]), + + check_selector({[{<<"$lt">>, [1, 2, 3]}]}, [ + {true, [1, 2, 2]}, + {true, [1, 2]}, + {false, [1, 2, 3]}, + {false, [1, 2, 4]}, + {false, [1, 3]} + ]). + +match_lte_test() -> + check_selector({[{<<"$lte">>, 5}]}, [{true, 4}, {true, 5}, {false, 6}]), + + check_selector({[{<<"$lte">>, <<"hello">>}]}, [ + {true, <<"held">>}, + {true, <<"hello">>}, + {false, <<"help">>} + ]), + + check_selector({[{<<"$lte">>, [1, 2, 3]}]}, [ + {true, [1, 2, 2]}, + {true, [1, 2]}, + {true, [1, 2, 3]}, + {false, [1, 2, 4]}, + {false, [1, 3]} + ]). + +match_gt_test() -> + check_selector({[{<<"$gt">>, 5}]}, [{false, 4}, {false, 5}, {true, 6}]), + + check_selector({[{<<"$gt">>, <<"hello">>}]}, [ + {false, <<"held">>}, + {false, <<"hello">>}, + {true, <<"help">>} + ]), + + check_selector({[{<<"$gt">>, [1, 2, 3]}]}, [ + {false, [1, 2, 2]}, + {false, [1, 2]}, + {false, [1, 2, 3]}, + {true, [1, 2, 4]}, + {true, [1, 3]} + ]). + +match_gte_test() -> + check_selector({[{<<"$gte">>, 5}]}, [{false, 4}, {true, 5}, {true, 6}]), + + check_selector({[{<<"$gte">>, <<"hello">>}]}, [ + {false, <<"held">>}, + {true, <<"hello">>}, + {true, <<"help">>} + ]), + + check_selector({[{<<"$gte">>, [1, 2, 3]}]}, [ + {false, [1, 2, 2]}, + {false, [1, 2]}, + {true, [1, 2, 3]}, + {true, [1, 2, 4]}, + {true, [1, 3]} + ]). + +match_eq_test() -> + check_selector({[{<<"$eq">>, 5}]}, [{true, 5}, {false, 6}]), + check_selector({[{<<"$eq">>, <<"hello">>}]}, [{true, <<"hello">>}, {false, <<"help">>}]), + + check_selector({[{<<"$eq">>, [1, [2, 3, 4], 5]}]}, [ + {true, [1, [2, 3, 4], 5]}, + {false, [1, [2, 3, 4]]}, + {false, [1, [2, 3, 4], 5, 6]}, + {false, [1, [2, 7, 4], 5]} + ]), + + check_selector({[{<<"$eq">>, {[{<<"a">>, {[{<<"b">>, {[{<<"c">>, 7}]}}]}}]}}]}, [ + {true, {[{<<"a">>, {[{<<"b">>, {[{<<"c">>, 7}]}}]}}]}}, + {false, {[{<<"a">>, {[{<<"b">>, {[{<<"c">>, 8}]}}]}}]}}, + {false, {[{<<"a">>, {[{<<"b">>, {[{<<"d">>, 7}]}}]}}]}}, + {false, {[{<<"a">>, {[{<<"d">>, {[{<<"c">>, 7}]}}]}}]}} + ]). + +match_ne_test() -> + check_selector({[{<<"$ne">>, 5}]}, [{false, 5}, {true, 6}]), + + % the %ne operator still requires a value to be present... + SelInt = normalize({[{<<"x">>, {[{<<"$ne">>, 5}]}}]}), + ?assertEqual(false, match_int(SelInt, {[]})), + + % ... which, due to normalization, means that using $not with $eq does not + % match the empty doc + SelNotEq = normalize({[{<<"$not">>, {[{<<"x">>, 5}]}}]}), + ?assertEqual(false, match_int(SelNotEq, {[]})), + + check_selector({[{<<"$ne">>, <<"hello">>}]}, [{false, <<"hello">>}, {true, <<"help">>}]), + + check_selector({[{<<"$ne">>, [1, [2, 3, 4], 5]}]}, [ + {false, [1, [2, 3, 4], 5]}, + {true, [1, [2, 3, 4]]}, + {true, [1, [2, 3, 4], 5, 6]}, + {true, [1, [2, 7, 4], 5]} + ]), + + check_selector({[{<<"$ne">>, {[{<<"a">>, {[{<<"b">>, {[{<<"c">>, 7}]}}]}}]}}]}, [ + {false, {[{<<"a">>, {[{<<"b">>, {[{<<"c">>, 7}]}}]}}]}}, + {true, {[{<<"a">>, {[{<<"b">>, {[{<<"c">>, 8}]}}]}}]}}, + {true, {[{<<"a">>, {[{<<"b">>, {[{<<"d">>, 7}]}}]}}]}}, + {true, {[{<<"a">>, {[{<<"d">>, {[{<<"c">>, 7}]}}]}}]}} + ]). + +match_in_test() -> + check_selector({[{<<"$in">>, []}]}, [ + {false, 0}, + {false, true}, + {false, <<"foo">>} + ]), + + check_selector( + {[ + {<<"$in">>, [ + 42, + false, + <<"bar">>, + [[<<"nested">>], <<"list">>], + {[{<<"b">>, 2}]} + ]} + ]}, + [ + {true, 42}, + {true, false}, + {true, <<"bar">>}, + {true, {[{<<"b">>, 2}]}}, + + {false, 43}, + {false, true}, + {false, <<"bars">>}, + {false, {[{<<"b">>, 2}, {<<"c">>, 3}]}}, + + % when the input is an array, $in matches if any of the array items + % match... + {true, [0, 42]}, + {true, [0, false]}, + {true, [0, <<"bar">>]}, + {true, [0, {[{<<"b">>, 2}]}]}, + + % ... which means it doesn't directly match when one of the + % candiate values is itself an array + {false, [[<<"nested">>], <<"list">>]}, + {true, [0, [[<<"nested">>], <<"list">>]]} + ] + ). + +match_nin_test() -> + check_selector({[{<<"$nin">>, []}]}, [ + {true, 0}, + {true, true}, + {true, <<"foo">>} + ]), + + check_selector( + {[ + {<<"$nin">>, [ + 42, + false, + <<"bar">>, + [[<<"nested">>], <<"list">>], + {[{<<"b">>, 2}]} + ]} + ]}, + [ + {false, 42}, + {false, false}, + {false, <<"bar">>}, + {false, {[{<<"b">>, 2}]}}, + + {true, 43}, + {true, true}, + {true, <<"bars">>}, + {true, {[{<<"b">>, 2}, {<<"c">>, 3}]}}, + + % when the input is an array, $nin matches if none of the array items + % match... + {false, [0, 42]}, + {false, [0, false]}, + {false, [0, <<"bar">>]}, + {false, [0, {[{<<"b">>, 2}]}]}, + + % ... which means it doesn't directly match when one of the + % candiate values is itself an array + {true, [[<<"nested">>], <<"list">>]}, + {false, [0, [[<<"nested">>], <<"list">>]]} + ] + ). + +match_all_test() -> + % { "$all": [] } matches nothing, not even arrays + check_selector({[{<<"$all">>, []}]}, [ + {false, []}, + {false, [42]}, + {false, {[]}}, + {false, <<"foo">>} + ]), + + % normally, input lists can contain the required items in any order + check_selector({[{<<"$all">>, [1, 2, 3, 4]}]}, [ + {true, [3, 2, 4, 1]}, + {true, [0, 4, 3, 5, 2, 1, 6]}, + {false, [3, 2, 4]}, + {false, []} + ]), + + % negation means the input must lack at least one of the items + check_selector({[{<<"$not">>, {[{<<"$all">>, [1, 2, 3, 4]}]}}]}, [ + {true, [2, 4, 1]}, + {false, [2, 4, 1, 3]}, + {true, []} + ]), + + % the special $all: [List] form allows the input to exactly match List... + check_selector({[{<<"$all">>, [[1, 2, 3, 4]]}]}, [ + {true, [1, 2, 3, 4]}, + {false, [4, 3, 2, 1]}, + {false, [1, 3, 4]}, + {false, []}, + % ... or to contain List + {true, [5, [1, 2, 3, 4], 6]}, + {false, [5, [1, 3, 4], 6]}, + {false, [5, [1, 3, 2, 4], 6]} + ]), + + % the special behaviour of $all: [X] only applies when X is a list + check_selector({[{<<"$all">>, [<<"hello">>]}]}, [ + {false, <<"hello">>}, + {true, [<<"hello">>]}, + {true, [0, <<"hello">>, 1]}, + {false, []} + ]), + + % values must match exactly and not contain extra fields + check_selector({[{<<"$all">>, [{[{<<"a">>, 1}]}]}]}, [ + {true, [{[{<<"a">>, 1}]}]}, + {false, [{[{<<"a">>, 1}, {<<"b">>, 2}]}]} + ]). + +match_exists_test() -> + check_selector({[{<<"x">>, {[{<<"$exists">>, true}]}}]}, [ + {true, {[{<<"x">>, 0}]}}, + {false, {[{<<"y">>, 0}]}}, + {false, {[]}} + ]), + + check_selector({[{<<"x">>, {[{<<"$exists">>, false}]}}]}, [ + {false, {[{<<"x">>, 0}]}}, + {true, {[{<<"y">>, 0}]}}, + {true, {[]}} + ]), + + % due to normalizing to { "x": { "$ne": 0 } }, this does not match the empty doc + SelNeg = normalize({[{<<"x">>, {[{<<"$not">>, {[{<<"$eq">>, 0}]}}]}}]}), + SelPos = normalize({[{<<"x">>, 0}]}), + ?assertEqual(false, match_int(SelNeg, {[]})), + ?assertEqual(false, match_int(SelPos, {[]})), + + % including { "$exists": true } in the negated part *does* match the empty doc + check_selector( + {[ + {<<"x">>, + {[ + {<<"$not">>, + {[ + {<<"$exists">>, true}, + {<<"$eq">>, 0} + ]}} + ]}} + ]}, + [ + {true, {[{<<"x">>, 1}]}}, + {false, {[{<<"x">>, 0}]}}, + {true, {[]}} + ] + ). + +match_type_test() -> + check_selector({[{<<"$type">>, <<"null">>}]}, [ + {true, null}, + {false, false}, + {false, {[]}} + ]), + + check_selector({[{<<"$type">>, <<"boolean">>}]}, [ + {true, true}, + {true, false}, + {false, 0} + ]), + + check_selector({[{<<"$type">>, <<"number">>}]}, [ + {true, 42}, + {true, 3.14}, + {true, 0}, + {false, true}, + {false, [1]}, + {false, <<"1">>} + ]), + + check_selector({[{<<"$type">>, <<"string">>}]}, [ + {true, <<"">>}, + {true, <<"hello">>}, + {false, []} + ]), + + check_selector({[{<<"$type">>, <<"array">>}]}, [ + {true, []}, + {true, [1, 2]}, + {false, {[]}}, + {false, <<"hi">>} + ]), + + check_selector({[{<<"$type">>, <<"object">>}]}, [ + {true, {[]}}, + {true, {[{<<"a">>, 1}]}}, + {false, [{<<"a">>, 1}]}, + {false, null} + ]). + +match_regex_test() -> + check_selector({[{<<"$regex">>, <<"^[0-9a-f]+$">>}]}, [ + {false, <<"">>}, + {true, <<"3a0df5e">>}, + {false, <<"3a0gf5e">>}, + {false, 42} + ]). + +match_beginswith_test() -> + check_selector({[{<<"$beginsWith">>, <<"foo">>}]}, [ + {true, <<"foo">>}, + {true, <<"food">>}, + {true, <<"fool me once">>}, + {false, <<"more food">>}, + {false, <<"fo">>}, + {false, 42} + ]). + +match_mod_test() -> + check_selector({[{<<"$mod">>, [28, 1]}]}, [ + {true, 1}, + {true, 29}, + {true, 57}, + {false, 58}, + {false, <<"57">>} + ]). + +match_size_test() -> + check_selector({[{<<"$size">>, 3}]}, [ + {false, 3}, + {false, <<"fun">>}, + {true, [0, 0, 0]}, + {false, [0, 0]}, + {false, [0, 0, 0, 0]} + ]). + +match_allmatch_test() -> + % $allMatch is defined to return false for empty lists + check_selector({[{<<"$allMatch">>, {[{<<"$eq">>, 0}]}}]}, [ + {false, []}, + {true, [0]}, + {false, [1]}, + {false, [0, 1]} + ]), + + % because of their behaviour on empty lists, { "$not": { "$allMatch": S } } + % is not equivalent to { "$elemMatch": { "$not": S } } + check_selector({[{<<"$elemMatch">>, {[{<<"$ne">>, 0}]}}]}, [ + {false, []}, + {false, [0]}, + {true, [1]}, + {true, [0, 1]} + ]). + +match_elemmatch_test() -> + check_selector({[{<<"$elemMatch">>, {[{<<"$eq">>, 0}]}}]}, [ + {false, []}, + {true, [0]}, + {false, [1]}, + {true, [0, 1]} + ]). + +match_keymapmatch_test() -> + check_selector({[{<<"$keyMapMatch">>, {[{<<"$regex">>, <<"^[a-z]+$">>}]}}]}, [ + {true, {[{<<"hello">>, 0}]}}, + {true, {[{<<"a">>, 1}, {<<"b">>, 2}]}}, + {true, {[{<<"a">>, 1}, {<<"b4">>, 2}]}}, + {false, {[{<<"b4">>, 2}]}}, + {false, {[]}} + ]). + +match_object_test() -> + Doc1 = {[]}, + Doc2 = {[{<<"x">>, {[]}}]}, + Doc3 = {[{<<"x">>, {[{<<"a">>, 1}]}}]}, + Doc4 = {[{<<"x">>, {[{<<"a">>, 1}, {<<"b">>, 2}]}}]}, + Doc5 = {[{<<"x">>, []}]}, + + % the empty selector matches any document + SelEmpty = normalize({[]}), + ?assertEqual({[]}, SelEmpty), + ?assertEqual(true, match_int(SelEmpty, Doc1)), + ?assertEqual(true, match_int(SelEmpty, Doc2)), + ?assertEqual(true, match_int(SelEmpty, Doc3)), + ?assertEqual(true, match_int(SelEmpty, Doc4)), + ?assertEqual(true, match_int(SelEmpty, Doc5)), + + % an inner empty object selector matches only empty objects + SelEmptyField = normalize({[{<<"x">>, {[]}}]}), + ?assertEqual({[{<<"x">>, {[{<<"$eq">>, {[]}}]}}]}, SelEmptyField), + ?assertEqual(false, match_int(SelEmptyField, Doc1)), + ?assertEqual(true, match_int(SelEmptyField, Doc2)), + ?assertEqual(false, match_int(SelEmptyField, Doc3)), + ?assertEqual(false, match_int(SelEmptyField, Doc4)), + ?assertEqual(false, match_int(SelEmptyField, Doc5)), + + % negated empty object selector matches a value which is present and is not the empty object + SelNotEmptyField = normalize({[{<<"$not">>, {[{<<"x">>, {[]}}]}}]}), + ?assertEqual({[{<<"x">>, {[{<<"$ne">>, {[]}}]}}]}, SelNotEmptyField), + ?assertEqual(false, match_int(SelNotEmptyField, Doc1)), + ?assertEqual(false, match_int(SelNotEmptyField, Doc2)), + ?assertEqual(true, match_int(SelNotEmptyField, Doc3)), + ?assertEqual(true, match_int(SelNotEmptyField, Doc4)), + ?assertEqual(true, match_int(SelNotEmptyField, Doc5)), + + % inner object selectors with fields match objects with at least those fields + Sel1Field = normalize({[{<<"x">>, {[{<<"a">>, 1}]}}]}), + ?assertEqual({[{<<"x.a">>, {[{<<"$eq">>, 1}]}}]}, Sel1Field), + ?assertEqual(false, match_int(Sel1Field, Doc1)), + ?assertEqual(false, match_int(Sel1Field, Doc2)), + ?assertEqual(true, match_int(Sel1Field, Doc3)), + ?assertEqual(true, match_int(Sel1Field, Doc4)), + ?assertEqual(false, match_int(Sel1Field, Doc5)), + + % inner object selectors with multiple fields are normalized with $and + Sel2Field = normalize({[{<<"x">>, {[{<<"a">>, 1}, {<<"b">>, 2}]}}]}), + ?assertEqual( + {[ + {<<"$and">>, [ + {[{<<"x.a">>, {[{<<"$eq">>, 1}]}}]}, + {[{<<"x.b">>, {[{<<"$eq">>, 2}]}}]} + ]} + ]}, + Sel2Field + ), + ?assertEqual(false, match_int(Sel2Field, Doc1)), + ?assertEqual(false, match_int(Sel2Field, Doc2)), + ?assertEqual(false, match_int(Sel2Field, Doc3)), + ?assertEqual(true, match_int(Sel2Field, Doc4)), + ?assertEqual(false, match_int(Sel2Field, Doc5)), + + % check shorthand syntax + SelShort = normalize({[{<<"x.b">>, 2}]}), + ?assertEqual({[{<<"x.b">>, {[{<<"$eq">>, 2}]}}]}, SelShort), + ?assertEqual(false, match_int(SelShort, Doc1)), + ?assertEqual(false, match_int(SelShort, Doc2)), + ?assertEqual(false, match_int(SelShort, Doc3)), + ?assertEqual(true, match_int(SelShort, Doc4)), + ?assertEqual(false, match_int(SelShort, Doc5)). + +match_and_test() -> + % $and with an empty array matches anything + SelEmpty = normalize({[{<<"x">>, {[{<<"$and">>, []}]}}]}), + ?assertEqual(true, match_int(SelEmpty, {[{<<"x">>, 0}]})), + ?assertEqual(true, match_int(SelEmpty, {[{<<"x">>, false}]})), + ?assertEqual(true, match_int(SelEmpty, {[{<<"x">>, []}]})), + ?assertEqual(true, match_int(SelEmpty, {[]})), + + % due to { "$or": [] } matching anything, negating { "$and": [] } also + % matches anything + SelNotEmpty = normalize({[{<<"x">>, {[{<<"$not">>, {[{<<"$and">>, []}]}}]}}]}), + ?assertEqual(true, match_int(SelNotEmpty, {[{<<"x">>, 0}]})), + ?assertEqual(true, match_int(SelNotEmpty, {[{<<"x">>, false}]})), + ?assertEqual(true, match_int(SelNotEmpty, {[{<<"x">>, []}]})), + + % and, because { "x": { "$and": [A, B] } } normalizes to + % { "$and": [{ "x": A }, { "x": B }] }, that means + % { "x": { "$not": { "$and": [] } } } normalizes to { "$or": [] }, + % so it matches docs where "x" is not present + ?assertEqual(true, match_int(SelNotEmpty, {[]})), + + % $and with multiple selectors matches if all selectors match + SelMulti = normalize( + {[ + {<<"x">>, + {[ + {<<"$and">>, [ + {[{<<"$gt">>, 3}]}, + {[{<<"$lt">>, 7}]} + ]} + ]}} + ]} + ), + ?assertEqual(true, match_int(SelMulti, {[{<<"x">>, 6}]})), + ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 2}]})), + ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 9}]})), + ?assertEqual(false, match_int(SelMulti, {[]})), + + % $not -> $and with multiple selectors matches if any selector does not match + SelNotMulti = normalize( + {[ + {<<"x">>, + {[ + {<<"$not">>, + {[ + {<<"$and">>, [ + {[{<<"$gt">>, 3}]}, + {[{<<"$lt">>, 7}]} + ]} + ]}} + ]}} + ]} + ), + ?assertEqual(false, match_int(SelNotMulti, {[{<<"x">>, 6}]})), + ?assertEqual(true, match_int(SelNotMulti, {[{<<"x">>, 2}]})), + ?assertEqual(true, match_int(SelNotMulti, {[{<<"x">>, 9}]})), + ?assertEqual(false, match_int(SelNotMulti, {[]})). + +match_or_test() -> + % $or with an empty array matches anything + SelEmpty = normalize({[{<<"x">>, {[{<<"$or">>, []}]}}]}), + ?assertEqual(true, match_int(SelEmpty, {[{<<"x">>, 0}]})), + ?assertEqual(true, match_int(SelEmpty, {[{<<"x">>, false}]})), + ?assertEqual(true, match_int(SelEmpty, {[{<<"x">>, []}]})), + ?assertEqual(true, match_int(SelEmpty, {[]})), + + % similar to $and, due to { "$or": [] } matching anything and our + % normalization rules, negating $or also matches anything + SelNotEmpty = normalize({[{<<"x">>, {[{<<"$not">>, {[{<<"$or">>, []}]}}]}}]}), + ?assertEqual(true, match_int(SelNotEmpty, {[{<<"x">>, 0}]})), + ?assertEqual(true, match_int(SelNotEmpty, {[{<<"x">>, false}]})), + ?assertEqual(true, match_int(SelNotEmpty, {[{<<"x">>, []}]})), + ?assertEqual(true, match_int(SelNotEmpty, {[]})), + + % $or with multiple selectors matches if any selector matches + SelMulti = normalize( + {[ + {<<"x">>, + {[ + {<<"$or">>, [ + {[{<<"$lt">>, 3}]}, + {[{<<"$gt">>, 7}]} + ]} + ]}} + ]} + ), + ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 6}]})), + ?assertEqual(true, match_int(SelMulti, {[{<<"x">>, 2}]})), + ?assertEqual(true, match_int(SelMulti, {[{<<"x">>, 9}]})), + ?assertEqual(false, match_int(SelMulti, {[]})), + + % $not -> $or with multiple selectors matches if no selector matches + SelNotMulti = normalize( + {[ + {<<"x">>, + {[ + {<<"$not">>, + {[ + {<<"$or">>, [ + {[{<<"$lt">>, 3}]}, + {[{<<"$gt">>, 7}]} + ]} + ]}} + ]}} + ]} + ), + ?assertEqual(true, match_int(SelNotMulti, {[{<<"x">>, 6}]})), + ?assertEqual(false, match_int(SelNotMulti, {[{<<"x">>, 2}]})), + ?assertEqual(false, match_int(SelNotMulti, {[{<<"x">>, 9}]})), + ?assertEqual(false, match_int(SelNotMulti, {[]})). + +match_nor_test() -> + % $nor with an empty array matches anything + SelEmpty = normalize({[{<<"x">>, {[{<<"$nor">>, []}]}}]}), + ?assertEqual(true, match_int(SelEmpty, {[{<<"x">>, 0}]})), + ?assertEqual(true, match_int(SelEmpty, {[{<<"x">>, false}]})), + ?assertEqual(true, match_int(SelEmpty, {[{<<"x">>, []}]})), + ?assertEqual(true, match_int(SelEmpty, {[]})), + + % $nor with multiple selectors matches if no selector matches + SelMulti = normalize( + {[ + {<<"x">>, + {[ + {<<"$nor">>, [ + {[{<<"$lt">>, 3}]}, + {[{<<"$gt">>, 7}]} + ]} + ]}} + ]} + ), + ?assertEqual(true, match_int(SelMulti, {[{<<"x">>, 6}]})), + ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 2}]})), + ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 9}]})), + ?assertEqual(false, match_int(SelMulti, {[]})). + -endif. From 6d23e9e18249ff84dffdef4590127492a4ea584c Mon Sep 17 00:00:00 2001 From: James Coglan Date: Tue, 13 Jan 2026 14:28:28 +0000 Subject: [PATCH 05/16] [wip] mango match with failures --- src/mango/src/mango_selector.erl | 256 +++++++++++++++++-------------- 1 file changed, 144 insertions(+), 112 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index d8e39d89248..ad3bc1a7fe8 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -23,6 +23,12 @@ -include_lib("couch/include/couch_db.hrl"). -include("mango.hrl"). +-record(failure, { + op, + type = mismatch, + params = [] +}). + % Validate and normalize each operator. This translates % every selector operator into a consistent version that % we can then rely on for all other selector functions. @@ -53,12 +59,19 @@ match(Selector, D) -> couch_stats:increment_counter([mango, evaluate_selector]), match_int(Selector, D). +match_int(Selector, D) -> + case match_failures(Selector, D) of + [] -> true; + [_ | _] -> false; + Other -> Other + end. + % An empty selector matches any value. -match_int({[]}, _) -> - true; -match_int(Selector, #doc{body = Body}) -> +match_failures({[]}, _) -> + []; +match_failures(Selector, #doc{body = Body}) -> match(Selector, Body, fun mango_json:cmp/2); -match_int(Selector, {Props}) -> +match_failures(Selector, {Props}) -> match(Selector, {Props}, fun mango_json:cmp/2). % Convert each operator into a normalized version as well @@ -365,35 +378,45 @@ negate({[{Field, Cond}]}) -> % We need to treat an empty array as always true. This will be applied % for $or, $in, $all, $nin as well. match({[{<<"$and">>, []}]}, _, _) -> - true; + []; match({[{<<"$and">>, Args}]}, Value, Cmp) -> - Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end, - lists:all(Pred, Args); + MatchSubSel = fun(SubSel) -> match(SubSel, Value, Cmp) end, + lists:flatmap(MatchSubSel, Args); match({[{<<"$or">>, []}]}, _, _) -> - true; + []; match({[{<<"$or">>, Args}]}, Value, Cmp) -> - Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end, - lists:any(Pred, Args); + SubSelFailures = [match(A, Value, Cmp) || A <- Args], + case lists:member([], SubSelFailures) of + true -> []; + _ -> lists:flatten(SubSelFailures) + end; +% TODO: producing good failure messages requires that normalize/1 fully removes +% $not from the tree by pushing it to the leaves. match({[{<<"$not">>, Arg}]}, Value, Cmp) -> - not match(Arg, Value, Cmp); -match({[{<<"$all">>, []}]}, _, _) -> - false; + case match(Arg, Value, Cmp) of + [] -> [#failure{op = 'not'}]; + _ -> [] + end; % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. +match({[{<<"$all">>, []}]}, _Values, _Cmp) -> + % { "$all": [] } is defined to eval to false, so return a failure + [#failure{op = all, params = [[]]}]; +match({[{<<"$all">>, [A]}]}, Values, _Cmp) when is_list(A), A == Values -> + []; match({[{<<"$all">>, Args}]}, Values, _Cmp) when is_list(Values) -> - Pred = fun(A) -> lists:member(A, Values) end, - HasArgs = lists:all(Pred, Args), - IsArgs = - case Args of - [A] when is_list(A) -> - A == Values; - _ -> - false + lists:flatmap( + fun(Arg) -> + case lists:member(Arg, Values) of + true -> []; + _ -> [#failure{op = all, params = [Arg]}] + end end, - HasArgs orelse IsArgs; -match({[{<<"$all">>, _Args}]}, _Values, _Cmp) -> - false; + Args + ); +match({[{<<"$all">>, _}]}, Value, _Cmp) -> + [#failure{op = all, type = bad_value, params = [Value]}]; %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer. %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}} %% gets normalized to: @@ -410,83 +433,48 @@ match({[{<<>>, Arg}]}, Values, Cmp) -> match(Arg, Values, Cmp); % Matches when any element in values matches the % sub-selector Arg. +match({[{<<"$elemMatch">>, _Arg}]}, [], _Cmp) -> + [#failure{op = elemMatch, type = empty_list}]; match({[{<<"$elemMatch">>, Arg}]}, Values, Cmp) when is_list(Values) -> - try - lists:foreach( - fun(V) -> - case match(Arg, V, Cmp) of - true -> throw(matched); - _ -> ok - end - end, - Values - ), - false - catch - throw:matched -> - true; - _:_ -> - false + ValueFailures = [match(Arg, V, Cmp) || V <- Values], + case lists:member([], ValueFailures) of + true -> []; + _ -> lists:flatten(ValueFailures) end; -match({[{<<"$elemMatch">>, _Arg}]}, _Value, _Cmp) -> - false; +match({[{<<"$elemMatch">>, _}]}, Value, _Cmp) -> + [#failure{op = elemMatch, type = bad_value, params = [Value]}]; % Matches when all elements in values match the % sub-selector Arg. match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Cmp) -> - try - lists:foreach( - fun(V) -> - case match(Arg, V, Cmp) of - false -> throw(unmatched); - _ -> ok - end - end, - Values - ), - true - catch - _:_ -> - false - end; -match({[{<<"$allMatch">>, _Arg}]}, _Value, _Cmp) -> - false; + MatchValue = fun(Value) -> match(Arg, Value, Cmp) end, + lists:flatmap(MatchValue, Values); +match({[{<<"$allMatch">>, _}]}, Value, _Cmp) -> + [#failure{op = allMatch, type = bad_value, params = [Value]}]; % Matches when any key in the map value matches the % sub-selector Arg. -match({[{<<"$keyMapMatch">>, Arg}]}, Value, Cmp) when is_tuple(Value) -> - try - lists:foreach( - fun(V) -> - case match(Arg, V, Cmp) of - true -> throw(matched); - _ -> ok - end - end, - [Key || {Key, _} <- element(1, Value)] - ), - false - catch - throw:matched -> - true; - _:_ -> - false +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Cmp) -> + [#failure{op = keyMapMatch, type = empty_list}]; +match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Cmp) when is_list(Value) -> + KeyFailures = [match(Arg, K, Cmp) || {K, _} <- Value], + case lists:member([], KeyFailures) of + true -> []; + _ -> lists:flatten(KeyFailures) end; -match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, _Cmp) -> - false; +match({[{<<"$keyMapMatch">>, _}]}, Value, _Cmp) -> + [#failure{op = keyMapMatch, type = bad_value, params = [Value]}]; % Our comparison operators are fairly straight forward match({[{<<"$lt">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) < 0; + compare(lt, Arg, Cmp(Value, Arg) < 0); match({[{<<"$lte">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) =< 0; + compare(lte, Arg, Cmp(Value, Arg) =< 0); match({[{<<"$eq">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) == 0; + compare(eq, Arg, Cmp(Value, Arg) == 0); match({[{<<"$ne">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) /= 0; + compare(ne, Arg, Cmp(Value, Arg) /= 0); match({[{<<"$gte">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) >= 0; + compare(gte, Arg, Cmp(Value, Arg) >= 0); match({[{<<"$gt">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) > 0; -match({[{<<"$in">>, []}]}, _, _) -> - false; + compare(gt, Arg, Cmp(Value, Arg) > 0); match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( @@ -497,50 +485,88 @@ match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) -> Values ) end, - lists:any(Pred, Args); + case lists:any(Pred, Args) of + true -> []; + _ -> [#failure{op = in, params = [Args]}] + end; match({[{<<"$in">>, Args}]}, Value, Cmp) -> Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end, - lists:any(Pred, Args); -match({[{<<"$nin">>, []}]}, _, _) -> - true; + case lists:any(Pred, Args) of + true -> []; + _ -> [#failure{op = in, params = [Args]}] + end; match({[{<<"$nin">>, Args}]}, Values, Cmp) when is_list(Values) -> - not match({[{<<"$in">>, Args}]}, Values, Cmp); + Pred = fun(Arg) -> + lists:foldl( + fun(Value, Match) -> + (Cmp(Value, Arg) /= 0) and Match + end, + true, + Values + ) + end, + case lists:all(Pred, Args) of + true -> []; + _ -> [#failure{op = nin, params = [Args]}] + end; match({[{<<"$nin">>, Args}]}, Value, Cmp) -> Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end, - lists:all(Pred, Args); + case lists:all(Pred, Args) of + true -> []; + _ -> [#failure{op = nin, params = [Args]}] + end; % This logic is a bit subtle. Basically, if value is % not undefined, then it exists. match({[{<<"$exists">>, ShouldExist}]}, Value, _Cmp) -> - Exists = Value /= undefined, - ShouldExist andalso Exists; + case {ShouldExist, Value} of + {true, undefined} -> [#failure{op = exists, params = [ShouldExist]}]; + {true, _} -> []; + {false, undefined} -> []; + {false, _} -> [#failure{op = exists, params = [ShouldExist]}] + end; match({[{<<"$type">>, Arg}]}, Value, _Cmp) when is_binary(Arg) -> - Arg == mango_json:type(Value); + case mango_json:type(Value) of + Arg -> []; + _ -> [#failure{op = type, params = [Arg]}] + end; match({[{<<"$mod">>, [D, R]}]}, Value, _Cmp) when is_integer(Value) -> - Value rem D == R; -match({[{<<"$mod">>, _}]}, _Value, _Cmp) -> - false; + case Value rem D of + R -> []; + _ -> [#failure{op = mod, params = [D, R]}] + end; +match({[{<<"$mod">>, _}]}, Value, _Cmp) -> + [#failure{op = mod, type = bad_value, params = [Value]}]; match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix), is_binary(Value) -> - string:prefix(Value, Prefix) /= nomatch; + case string:prefix(Value, Prefix) of + nomatch -> [#failure{op = beginsWith, params = [Prefix]}]; + _ -> [] + end; % When Value is not a string, do not match -match({[{<<"$beginsWith">>, Prefix}]}, _, _Cmp) when is_binary(Prefix) -> - false; +match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix) -> + [#failure{op = beginsWith, type = bad_value, params = [Value]}]; match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) -> try - match == re:run(Value, Regex, [{capture, none}]) + case re:run(Value, Regex, [{capture, none}]) of + match -> []; + _ -> [#failure{op = regex, params = [Regex]}] + end catch _:_ -> - false + [#failure{op = regex, params = [Regex]}] end; -match({[{<<"$regex">>, _}]}, _Value, _Cmp) -> - false; +match({[{<<"$regex">>, _}]}, Value, _Cmp) -> + [#failure{op = regex, type = bad_value, params = [Value]}]; match({[{<<"$size">>, Arg}]}, Values, _Cmp) when is_list(Values) -> - length(Values) == Arg; -match({[{<<"$size">>, _}]}, _Value, _Cmp) -> - false; + case length(Values) of + Arg -> []; + _ -> [#failure{op = size, params = [Arg]}] + end; +match({[{<<"$size">>, _}]}, Value, _Cmp) -> + [#failure{op = size, type = bad_value, params = [Value]}]; % We don't have any choice but to believe that the text % index returned valid matches match({[{<<"$default">>, _}]}, _Value, _Cmp) -> - true; + []; % All other operators are internal assertion errors for % matching because we either should've removed them during % normalization or something else broke. @@ -552,11 +578,11 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) -> match({[{Field, Cond}]}, Value, Cmp) -> case mango_doc:get_field(Value, Field) of not_found when Cond == {[{<<"$exists">>, false}]} -> - true; + []; not_found -> - false; + [#failure{op = '$'}]; bad_path -> - false; + [#failure{op = '$'}]; SubValue when Field == <<"_id">> -> match(Cond, SubValue, fun mango_json:cmp_raw/2); SubValue -> @@ -565,6 +591,12 @@ match({[{Field, Cond}]}, Value, Cmp) -> match({[_, _ | _] = _Props} = Sel, _Value, _Cmp) -> error({unnormalized_selector, Sel}). +compare(Op, Arg, Cond) -> + case Cond of + true -> []; + _ -> [#failure{op = Op, params = [Arg]}] + end. + % Returns true if Selector requires all % fields in RequiredFields to exist in any matching documents. From b3c6b7483cf39cf3257516a6c9074e311ad09105 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Thu, 15 Jan 2026 15:13:30 +0000 Subject: [PATCH 06/16] [wip] replace mango_selector:match/3 Cmp arg with a context record --- src/mango/src/mango_selector.erl | 106 ++++++++++++++++--------------- 1 file changed, 55 insertions(+), 51 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index ad3bc1a7fe8..ac885ba7604 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -23,6 +23,10 @@ -include_lib("couch/include/couch_db.hrl"). -include("mango.hrl"). +-record(ctx, { + cmp +}). + -record(failure, { op, type = mismatch, @@ -70,9 +74,9 @@ match_int(Selector, D) -> match_failures({[]}, _) -> []; match_failures(Selector, #doc{body = Body}) -> - match(Selector, Body, fun mango_json:cmp/2); + match_failures(Selector, Body); match_failures(Selector, {Props}) -> - match(Selector, {Props}, fun mango_json:cmp/2). + match(Selector, {Props}, #ctx{cmp = fun mango_json:cmp/2}). % Convert each operator into a normalized version as well % as convert an implicit operators into their explicit @@ -379,33 +383,33 @@ negate({[{Field, Cond}]}) -> % for $or, $in, $all, $nin as well. match({[{<<"$and">>, []}]}, _, _) -> []; -match({[{<<"$and">>, Args}]}, Value, Cmp) -> - MatchSubSel = fun(SubSel) -> match(SubSel, Value, Cmp) end, +match({[{<<"$and">>, Args}]}, Value, Ctx) -> + MatchSubSel = fun(SubSel) -> match(SubSel, Value, Ctx) end, lists:flatmap(MatchSubSel, Args); match({[{<<"$or">>, []}]}, _, _) -> []; -match({[{<<"$or">>, Args}]}, Value, Cmp) -> - SubSelFailures = [match(A, Value, Cmp) || A <- Args], - case lists:member([], SubSelFailures) of +match({[{<<"$or">>, Args}]}, Value, Ctx) -> + SubSelFailures = [match(A, Value, Ctx) || A <- Args], + case lists:any(fun(Res) -> Res == [] end, SubSelFailures) of true -> []; _ -> lists:flatten(SubSelFailures) end; % TODO: producing good failure messages requires that normalize/1 fully removes % $not from the tree by pushing it to the leaves. -match({[{<<"$not">>, Arg}]}, Value, Cmp) -> - case match(Arg, Value, Cmp) of +match({[{<<"$not">>, Arg}]}, Value, Ctx) -> + case match(Arg, Value, Ctx) of [] -> [#failure{op = 'not'}]; _ -> [] end; % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. -match({[{<<"$all">>, []}]}, _Values, _Cmp) -> +match({[{<<"$all">>, []}]}, _Values, _Ctx) -> % { "$all": [] } is defined to eval to false, so return a failure [#failure{op = all, params = [[]]}]; -match({[{<<"$all">>, [A]}]}, Values, _Cmp) when is_list(A), A == Values -> +match({[{<<"$all">>, [A]}]}, Values, _Ctx) when is_list(A), A == Values -> []; -match({[{<<"$all">>, Args}]}, Values, _Cmp) when is_list(Values) -> +match({[{<<"$all">>, Args}]}, Values, _Ctx) when is_list(Values) -> lists:flatmap( fun(Arg) -> case lists:member(Arg, Values) of @@ -415,7 +419,7 @@ match({[{<<"$all">>, Args}]}, Values, _Cmp) when is_list(Values) -> end, Args ); -match({[{<<"$all">>, _}]}, Value, _Cmp) -> +match({[{<<"$all">>, _}]}, Value, _Ctx) -> [#failure{op = all, type = bad_value, params = [Value]}]; %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer. %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}} @@ -429,53 +433,53 @@ match({[{<<"$all">>, _}]}, Value, _Cmp) -> %% }]} %% }]}. %% So we filter out the <<>>. -match({[{<<>>, Arg}]}, Values, Cmp) -> - match(Arg, Values, Cmp); +match({[{<<>>, Arg}]}, Values, Ctx) -> + match(Arg, Values, Ctx); % Matches when any element in values matches the % sub-selector Arg. -match({[{<<"$elemMatch">>, _Arg}]}, [], _Cmp) -> +match({[{<<"$elemMatch">>, _Arg}]}, [], _Ctx) -> [#failure{op = elemMatch, type = empty_list}]; -match({[{<<"$elemMatch">>, Arg}]}, Values, Cmp) when is_list(Values) -> - ValueFailures = [match(Arg, V, Cmp) || V <- Values], +match({[{<<"$elemMatch">>, Arg}]}, Values, Ctx) when is_list(Values) -> + ValueFailures = [match(Arg, V, Ctx) || V <- Values], case lists:member([], ValueFailures) of true -> []; _ -> lists:flatten(ValueFailures) end; -match({[{<<"$elemMatch">>, _}]}, Value, _Cmp) -> +match({[{<<"$elemMatch">>, _}]}, Value, _Ctx) -> [#failure{op = elemMatch, type = bad_value, params = [Value]}]; % Matches when all elements in values match the % sub-selector Arg. -match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Cmp) -> - MatchValue = fun(Value) -> match(Arg, Value, Cmp) end, +match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Ctx) -> + MatchValue = fun(Value) -> match(Arg, Value, Ctx) end, lists:flatmap(MatchValue, Values); -match({[{<<"$allMatch">>, _}]}, Value, _Cmp) -> +match({[{<<"$allMatch">>, _}]}, Value, _Ctx) -> [#failure{op = allMatch, type = bad_value, params = [Value]}]; % Matches when any key in the map value matches the % sub-selector Arg. -match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Cmp) -> +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Ctx) -> [#failure{op = keyMapMatch, type = empty_list}]; -match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Cmp) when is_list(Value) -> - KeyFailures = [match(Arg, K, Cmp) || {K, _} <- Value], +match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Ctx) when is_list(Value) -> + KeyFailures = [match(Arg, K, Ctx) || {K, _} <- Value], case lists:member([], KeyFailures) of true -> []; _ -> lists:flatten(KeyFailures) end; -match({[{<<"$keyMapMatch">>, _}]}, Value, _Cmp) -> +match({[{<<"$keyMapMatch">>, _}]}, Value, _Ctx) -> [#failure{op = keyMapMatch, type = bad_value, params = [Value]}]; % Our comparison operators are fairly straight forward -match({[{<<"$lt">>, Arg}]}, Value, Cmp) -> +match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(lt, Arg, Cmp(Value, Arg) < 0); -match({[{<<"$lte">>, Arg}]}, Value, Cmp) -> +match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(lte, Arg, Cmp(Value, Arg) =< 0); -match({[{<<"$eq">>, Arg}]}, Value, Cmp) -> +match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(eq, Arg, Cmp(Value, Arg) == 0); -match({[{<<"$ne">>, Arg}]}, Value, Cmp) -> +match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(ne, Arg, Cmp(Value, Arg) /= 0); -match({[{<<"$gte">>, Arg}]}, Value, Cmp) -> +match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(gte, Arg, Cmp(Value, Arg) >= 0); -match({[{<<"$gt">>, Arg}]}, Value, Cmp) -> +match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(gt, Arg, Cmp(Value, Arg) > 0); -match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) -> +match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( fun(Value, Match) -> @@ -489,13 +493,13 @@ match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) -> true -> []; _ -> [#failure{op = in, params = [Args]}] end; -match({[{<<"$in">>, Args}]}, Value, Cmp) -> +match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp}) -> Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end, case lists:any(Pred, Args) of true -> []; _ -> [#failure{op = in, params = [Args]}] end; -match({[{<<"$nin">>, Args}]}, Values, Cmp) when is_list(Values) -> +match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( fun(Value, Match) -> @@ -509,7 +513,7 @@ match({[{<<"$nin">>, Args}]}, Values, Cmp) when is_list(Values) -> true -> []; _ -> [#failure{op = nin, params = [Args]}] end; -match({[{<<"$nin">>, Args}]}, Value, Cmp) -> +match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp}) -> Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end, case lists:all(Pred, Args) of true -> []; @@ -517,34 +521,34 @@ match({[{<<"$nin">>, Args}]}, Value, Cmp) -> end; % This logic is a bit subtle. Basically, if value is % not undefined, then it exists. -match({[{<<"$exists">>, ShouldExist}]}, Value, _Cmp) -> +match({[{<<"$exists">>, ShouldExist}]}, Value, _Ctx) -> case {ShouldExist, Value} of {true, undefined} -> [#failure{op = exists, params = [ShouldExist]}]; {true, _} -> []; {false, undefined} -> []; {false, _} -> [#failure{op = exists, params = [ShouldExist]}] end; -match({[{<<"$type">>, Arg}]}, Value, _Cmp) when is_binary(Arg) -> +match({[{<<"$type">>, Arg}]}, Value, _Ctx) when is_binary(Arg) -> case mango_json:type(Value) of Arg -> []; _ -> [#failure{op = type, params = [Arg]}] end; -match({[{<<"$mod">>, [D, R]}]}, Value, _Cmp) when is_integer(Value) -> +match({[{<<"$mod">>, [D, R]}]}, Value, _Ctx) when is_integer(Value) -> case Value rem D of R -> []; _ -> [#failure{op = mod, params = [D, R]}] end; -match({[{<<"$mod">>, _}]}, Value, _Cmp) -> +match({[{<<"$mod">>, _}]}, Value, _Ctx) -> [#failure{op = mod, type = bad_value, params = [Value]}]; -match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix), is_binary(Value) -> +match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix), is_binary(Value) -> case string:prefix(Value, Prefix) of nomatch -> [#failure{op = beginsWith, params = [Prefix]}]; _ -> [] end; % When Value is not a string, do not match -match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix) -> +match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix) -> [#failure{op = beginsWith, type = bad_value, params = [Value]}]; -match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) -> +match({[{<<"$regex">>, Regex}]}, Value, _Ctx) when is_binary(Value) -> try case re:run(Value, Regex, [{capture, none}]) of match -> []; @@ -554,18 +558,18 @@ match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) -> _:_ -> [#failure{op = regex, params = [Regex]}] end; -match({[{<<"$regex">>, _}]}, Value, _Cmp) -> +match({[{<<"$regex">>, _}]}, Value, _Ctx) -> [#failure{op = regex, type = bad_value, params = [Value]}]; -match({[{<<"$size">>, Arg}]}, Values, _Cmp) when is_list(Values) -> +match({[{<<"$size">>, Arg}]}, Values, _Ctx) when is_list(Values) -> case length(Values) of Arg -> []; _ -> [#failure{op = size, params = [Arg]}] end; -match({[{<<"$size">>, _}]}, Value, _Cmp) -> +match({[{<<"$size">>, _}]}, Value, _Ctx) -> [#failure{op = size, type = bad_value, params = [Value]}]; % We don't have any choice but to believe that the text % index returned valid matches -match({[{<<"$default">>, _}]}, _Value, _Cmp) -> +match({[{<<"$default">>, _}]}, _Value, _Ctx) -> []; % All other operators are internal assertion errors for % matching because we either should've removed them during @@ -575,7 +579,7 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) -> % We need to traverse value to find field. The call to % mango_doc:get_field/2 may return either not_found or % bad_path in which case matching fails. -match({[{Field, Cond}]}, Value, Cmp) -> +match({[{Field, Cond}]}, Value, Ctx) -> case mango_doc:get_field(Value, Field) of not_found when Cond == {[{<<"$exists">>, false}]} -> []; @@ -584,11 +588,11 @@ match({[{Field, Cond}]}, Value, Cmp) -> bad_path -> [#failure{op = '$'}]; SubValue when Field == <<"_id">> -> - match(Cond, SubValue, fun mango_json:cmp_raw/2); + match(Cond, SubValue, Ctx#ctx{cmp = fun mango_json:cmp_raw/2}); SubValue -> - match(Cond, SubValue, Cmp) + match(Cond, SubValue, Ctx) end; -match({[_, _ | _] = _Props} = Sel, _Value, _Cmp) -> +match({[_, _ | _] = _Props} = Sel, _Value, _Ctx) -> error({unnormalized_selector, Sel}). compare(Op, Arg, Cond) -> From 0a69b792e986cccedf48bddfd97c13def8039268 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Thu, 15 Jan 2026 16:57:29 +0000 Subject: [PATCH 07/16] [wip] add paths to mango failures --- src/mango/src/mango_selector.erl | 306 +++++++++++++++++++++++-------- 1 file changed, 233 insertions(+), 73 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index ac885ba7604..e50be4a7a55 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -24,13 +24,15 @@ -include("mango.hrl"). -record(ctx, { - cmp + cmp, + path = [] }). -record(failure, { op, type = mismatch, - params = [] + params = [], + path = [] }). % Validate and normalize each operator. This translates @@ -398,29 +400,29 @@ match({[{<<"$or">>, Args}]}, Value, Ctx) -> % $not from the tree by pushing it to the leaves. match({[{<<"$not">>, Arg}]}, Value, Ctx) -> case match(Arg, Value, Ctx) of - [] -> [#failure{op = 'not'}]; + [] -> [#failure{op = 'not', path = Ctx#ctx.path}]; _ -> [] end; % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. -match({[{<<"$all">>, []}]}, _Values, _Ctx) -> +match({[{<<"$all">>, []}]}, _Values, Ctx) -> % { "$all": [] } is defined to eval to false, so return a failure - [#failure{op = all, params = [[]]}]; + [#failure{op = all, params = [[]], path = Ctx#ctx.path}]; match({[{<<"$all">>, [A]}]}, Values, _Ctx) when is_list(A), A == Values -> []; -match({[{<<"$all">>, Args}]}, Values, _Ctx) when is_list(Values) -> +match({[{<<"$all">>, Args}]}, Values, Ctx) when is_list(Values) -> lists:flatmap( fun(Arg) -> case lists:member(Arg, Values) of true -> []; - _ -> [#failure{op = all, params = [Arg]}] + _ -> [#failure{op = all, params = [Arg], path = Ctx#ctx.path}] end end, Args ); -match({[{<<"$all">>, _}]}, Value, _Ctx) -> - [#failure{op = all, type = bad_value, params = [Value]}]; +match({[{<<"$all">>, _}]}, Value, Ctx) -> + [#failure{op = all, type = bad_value, params = [Value], path = Ctx#ctx.path}]; %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer. %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}} %% gets normalized to: @@ -437,49 +439,53 @@ match({[{<<>>, Arg}]}, Values, Ctx) -> match(Arg, Values, Ctx); % Matches when any element in values matches the % sub-selector Arg. -match({[{<<"$elemMatch">>, _Arg}]}, [], _Ctx) -> - [#failure{op = elemMatch, type = empty_list}]; -match({[{<<"$elemMatch">>, Arg}]}, Values, Ctx) when is_list(Values) -> - ValueFailures = [match(Arg, V, Ctx) || V <- Values], +match({[{<<"$elemMatch">>, _Arg}]}, [], Ctx) -> + [#failure{op = elemMatch, type = empty_list, path = Ctx#ctx.path}]; +match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) when is_list(Values) -> + ValueFailures = [ + match(Arg, V, Ctx#ctx{path = [Idx | Path]}) + || {Idx, V} <- lists:enumerate(0, Values) + ], case lists:member([], ValueFailures) of true -> []; _ -> lists:flatten(ValueFailures) end; -match({[{<<"$elemMatch">>, _}]}, Value, _Ctx) -> - [#failure{op = elemMatch, type = bad_value, params = [Value]}]; +match({[{<<"$elemMatch">>, _}]}, Value, Ctx) -> + [#failure{op = elemMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Matches when all elements in values match the % sub-selector Arg. -match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Ctx) -> - MatchValue = fun(Value) -> match(Arg, Value, Ctx) end, - lists:flatmap(MatchValue, Values); -match({[{<<"$allMatch">>, _}]}, Value, _Ctx) -> - [#failure{op = allMatch, type = bad_value, params = [Value]}]; +match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, #ctx{path = Path} = Ctx) -> + EnumValues = lists:enumerate(0, Values), + MatchValue = fun({Idx, Value}) -> match(Arg, Value, Ctx#ctx{path = [Idx | Path]}) end, + lists:flatmap(MatchValue, EnumValues); +match({[{<<"$allMatch">>, _}]}, Value, Ctx) -> + [#failure{op = allMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Matches when any key in the map value matches the % sub-selector Arg. -match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Ctx) -> - [#failure{op = keyMapMatch, type = empty_list}]; +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, Ctx) -> + [#failure{op = keyMapMatch, type = empty_list, path = Ctx#ctx.path}]; match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Ctx) when is_list(Value) -> KeyFailures = [match(Arg, K, Ctx) || {K, _} <- Value], case lists:member([], KeyFailures) of true -> []; _ -> lists:flatten(KeyFailures) end; -match({[{<<"$keyMapMatch">>, _}]}, Value, _Ctx) -> - [#failure{op = keyMapMatch, type = bad_value, params = [Value]}]; +match({[{<<"$keyMapMatch">>, _}]}, Value, Ctx) -> + [#failure{op = keyMapMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Our comparison operators are fairly straight forward -match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(lt, Arg, Cmp(Value, Arg) < 0); -match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(lte, Arg, Cmp(Value, Arg) =< 0); -match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(eq, Arg, Cmp(Value, Arg) == 0); -match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(ne, Arg, Cmp(Value, Arg) /= 0); -match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(gte, Arg, Cmp(Value, Arg) >= 0); -match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(gt, Arg, Cmp(Value, Arg) > 0); -match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> +match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(lt, Arg, Path, Cmp(Value, Arg) < 0); +match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(lte, Arg, Path, Cmp(Value, Arg) =< 0); +match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(eq, Arg, Path, Cmp(Value, Arg) == 0); +match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(ne, Arg, Path, Cmp(Value, Arg) /= 0); +match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(gte, Arg, Path, Cmp(Value, Arg) >= 0); +match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(gt, Arg, Path, Cmp(Value, Arg) > 0); +match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( fun(Value, Match) -> @@ -491,15 +497,15 @@ match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> end, case lists:any(Pred, Args) of true -> []; - _ -> [#failure{op = in, params = [Args]}] + _ -> [#failure{op = in, params = [Args], path = Path}] end; -match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp}) -> +match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) -> Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end, case lists:any(Pred, Args) of true -> []; - _ -> [#failure{op = in, params = [Args]}] + _ -> [#failure{op = in, params = [Args], path = Path}] end; -match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> +match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( fun(Value, Match) -> @@ -511,62 +517,62 @@ match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> end, case lists:all(Pred, Args) of true -> []; - _ -> [#failure{op = nin, params = [Args]}] + _ -> [#failure{op = nin, params = [Args], path = Path}] end; -match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp}) -> +match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) -> Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end, case lists:all(Pred, Args) of true -> []; - _ -> [#failure{op = nin, params = [Args]}] + _ -> [#failure{op = nin, params = [Args], path = Path}] end; % This logic is a bit subtle. Basically, if value is % not undefined, then it exists. -match({[{<<"$exists">>, ShouldExist}]}, Value, _Ctx) -> +match({[{<<"$exists">>, ShouldExist}]}, Value, Ctx) -> case {ShouldExist, Value} of - {true, undefined} -> [#failure{op = exists, params = [ShouldExist]}]; + {true, undefined} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}]; {true, _} -> []; {false, undefined} -> []; - {false, _} -> [#failure{op = exists, params = [ShouldExist]}] + {false, _} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}] end; -match({[{<<"$type">>, Arg}]}, Value, _Ctx) when is_binary(Arg) -> +match({[{<<"$type">>, Arg}]}, Value, Ctx) when is_binary(Arg) -> case mango_json:type(Value) of Arg -> []; - _ -> [#failure{op = type, params = [Arg]}] + _ -> [#failure{op = type, params = [Arg], path = Ctx#ctx.path}] end; -match({[{<<"$mod">>, [D, R]}]}, Value, _Ctx) when is_integer(Value) -> +match({[{<<"$mod">>, [D, R]}]}, Value, Ctx) when is_integer(Value) -> case Value rem D of R -> []; - _ -> [#failure{op = mod, params = [D, R]}] + _ -> [#failure{op = mod, params = [D, R], path = Ctx#ctx.path}] end; -match({[{<<"$mod">>, _}]}, Value, _Ctx) -> - [#failure{op = mod, type = bad_value, params = [Value]}]; -match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix), is_binary(Value) -> +match({[{<<"$mod">>, _}]}, Value, Ctx) -> + [#failure{op = mod, type = bad_value, params = [Value], path = Ctx#ctx.path}]; +match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix), is_binary(Value) -> case string:prefix(Value, Prefix) of - nomatch -> [#failure{op = beginsWith, params = [Prefix]}]; + nomatch -> [#failure{op = beginsWith, params = [Prefix], path = Ctx#ctx.path}]; _ -> [] end; % When Value is not a string, do not match -match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix) -> - [#failure{op = beginsWith, type = bad_value, params = [Value]}]; -match({[{<<"$regex">>, Regex}]}, Value, _Ctx) when is_binary(Value) -> +match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix) -> + [#failure{op = beginsWith, type = bad_value, params = [Value], path = Ctx#ctx.path}]; +match({[{<<"$regex">>, Regex}]}, Value, Ctx) when is_binary(Value) -> try case re:run(Value, Regex, [{capture, none}]) of match -> []; - _ -> [#failure{op = regex, params = [Regex]}] + _ -> [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}] end catch _:_ -> - [#failure{op = regex, params = [Regex]}] + [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}] end; -match({[{<<"$regex">>, _}]}, Value, _Ctx) -> - [#failure{op = regex, type = bad_value, params = [Value]}]; -match({[{<<"$size">>, Arg}]}, Values, _Ctx) when is_list(Values) -> +match({[{<<"$regex">>, _}]}, Value, Ctx) -> + [#failure{op = regex, type = bad_value, params = [Value], path = Ctx#ctx.path}]; +match({[{<<"$size">>, Arg}]}, Values, Ctx) when is_list(Values) -> case length(Values) of Arg -> []; - _ -> [#failure{op = size, params = [Arg]}] + _ -> [#failure{op = size, params = [Arg], path = Ctx#ctx.path}] end; -match({[{<<"$size">>, _}]}, Value, _Ctx) -> - [#failure{op = size, type = bad_value, params = [Value]}]; +match({[{<<"$size">>, _}]}, Value, Ctx) -> + [#failure{op = size, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % We don't have any choice but to believe that the text % index returned valid matches match({[{<<"$default">>, _}]}, _Value, _Ctx) -> @@ -579,26 +585,27 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) -> % We need to traverse value to find field. The call to % mango_doc:get_field/2 may return either not_found or % bad_path in which case matching fails. -match({[{Field, Cond}]}, Value, Ctx) -> +match({[{Field, Cond}]}, Value, #ctx{path = Path} = Ctx) -> + InnerCtx = Ctx#ctx{path = [Field | Path]}, case mango_doc:get_field(Value, Field) of not_found when Cond == {[{<<"$exists">>, false}]} -> []; not_found -> - [#failure{op = '$'}]; + [#failure{op = field, type = not_found, path = InnerCtx#ctx.path}]; bad_path -> - [#failure{op = '$'}]; + [#failure{op = field, type = bad_path, path = InnerCtx#ctx.path}]; SubValue when Field == <<"_id">> -> - match(Cond, SubValue, Ctx#ctx{cmp = fun mango_json:cmp_raw/2}); + match(Cond, SubValue, InnerCtx#ctx{cmp = fun mango_json:cmp_raw/2}); SubValue -> - match(Cond, SubValue, Ctx) + match(Cond, SubValue, InnerCtx) end; match({[_, _ | _] = _Props} = Sel, _Value, _Ctx) -> error({unnormalized_selector, Sel}). -compare(Op, Arg, Cond) -> +compare(Op, Arg, Path, Cond) -> case Cond of true -> []; - _ -> [#failure{op = Op, params = [Arg]}] + _ -> [#failure{op = Op, params = [Arg], path = Path}] end. % Returns true if Selector requires all @@ -1739,4 +1746,157 @@ match_nor_test() -> ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 9}]})), ?assertEqual(false, match_int(SelMulti, {[]})). +match_failures_object_test() -> + Selector = normalize( + {[ + {<<"a">>, 1}, + {<<"b">>, {[{<<"c">>, 3}]}} + ]} + ), + + Fails0 = match_failures( + Selector, + {[ + {<<"a">>, 1}, + {<<"b">>, {[{<<"c">>, 3}]}} + ]} + ), + ?assertEqual([], Fails0), + + Fails1 = match_failures( + Selector, + {[ + {<<"a">>, 0}, + {<<"b">>, {[{<<"c">>, 3}]}} + ]} + ), + ?assertEqual( + [#failure{op = eq, type = mismatch, params = [1], path = [<<"a">>]}], + Fails1 + ), + + Fails2 = match_failures( + Selector, + {[ + {<<"a">>, 1}, + {<<"b">>, {[{<<"c">>, 4}]}} + ]} + ), + ?assertEqual( + [#failure{op = eq, type = mismatch, params = [3], path = [<<"b.c">>]}], + Fails2 + ). + +match_failures_elemmatch_test() -> + SelElemMatch = normalize( + {[ + {<<"a">>, + {[ + {<<"$elemMatch">>, {[{<<"$gt">>, 4}]}} + ]}} + ]} + ), + + Fails0 = match_failures( + SelElemMatch, {[{<<"a">>, [5, 3, 2]}]} + ), + ?assertEqual([], Fails0), + + Fails1 = match_failures( + SelElemMatch, {[{<<"a">>, []}]} + ), + ?assertEqual( + [#failure{op = elemMatch, type = empty_list, params = [], path = [<<"a">>]}], + Fails1 + ), + + Fails2 = match_failures( + SelElemMatch, {[{<<"a">>, [3, 2]}]} + ), + ?assertEqual( + [ + #failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]}, + #failure{op = gt, type = mismatch, params = [4], path = [1, <<"a">>]} + ], + Fails2 + ). + +match_failures_allmatch_test() -> + SelAllMatch = normalize( + {[ + {<<"a">>, + {[ + {<<"$allMatch">>, {[{<<"$gt">>, 4}]}} + ]}} + ]} + ), + + Fails0 = match_failures( + SelAllMatch, {[{<<"a">>, [5]}]} + ), + ?assertEqual([], Fails0), + + Fails1 = match_failures( + SelAllMatch, {[{<<"a">>, [4]}]} + ), + ?assertEqual( + [#failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]}], + Fails1 + ), + + Fails2 = match_failures( + SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]} + ), + ?assertEqual( + [ + #failure{op = gt, type = mismatch, params = [4], path = [2, <<"a">>]}, + #failure{op = gt, type = mismatch, params = [4], path = [4, <<"a">>]} + ], + Fails2 + ). + +match_failures_allmatch_object_test() -> + SelAllMatch = normalize( + {[ + {<<"a.b">>, + {[ + {<<"$allMatch">>, {[{<<"c">>, {[{<<"$gt">>, 4}]}}]}} + ]}} + ]} + ), + + Fails0 = match_failures( + SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}]}]}}]} + ), + ?assertEqual([], Fails0), + + Fails1 = match_failures( + SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]} + ), + ?assertEqual( + [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"a.b">>]}], + Fails1 + ), + + Fails2 = match_failures( + SelAllMatch, + {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}, {[{<<"c">>, 6}]}, {[{<<"c">>, 3}]}]}]}}]} + ), + ?assertEqual( + [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 2, <<"a.b">>]}], + Fails2 + ), + + Fails3 = match_failures( + SelAllMatch, + {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]} + ), + ?assertEqual( + [ + #failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"a.b">>]}, + #failure{op = field, type = not_found, params = [], path = [<<"c">>, 1, <<"a.b">>]} + ], + Fails3 + ). + -endif. From 1a12577158d84339dff4c75a4be9c0888dfd3e3c Mon Sep 17 00:00:00 2001 From: James Coglan Date: Fri, 16 Jan 2026 16:01:27 +0000 Subject: [PATCH 08/16] [wip] make $allMatch return true for empty lists and apply DeMorgan to $elemMatch and $allMatch --- src/mango/src/mango_selector.erl | 11 ++++++++--- src/mango/test/03-operator-test.py | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index e50be4a7a55..18d05ed253a 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -350,6 +350,10 @@ negate({[{<<"$and">>, Args}]}) -> {[{<<"$or">>, [negate(A) || A <- Args]}]}; negate({[{<<"$or">>, Args}]}) -> {[{<<"$and">>, [negate(A) || A <- Args]}]}; +negate({[{<<"$elemMatch">>, Arg}]}) -> + {[{<<"$allMatch">>, negate(Arg)}]}; +negate({[{<<"$allMatch">>, Arg}]}) -> + {[{<<"$elemMatch">>, negate(Arg)}]}; negate({[{<<"$default">>, _}]} = Arg) -> ?MANGO_ERROR({bad_arg, '$not', Arg}); % Negating comparison operators is straight forward @@ -454,7 +458,7 @@ match({[{<<"$elemMatch">>, _}]}, Value, Ctx) -> [#failure{op = elemMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Matches when all elements in values match the % sub-selector Arg. -match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, #ctx{path = Path} = Ctx) -> +match({[{<<"$allMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) when is_list(Values) -> EnumValues = lists:enumerate(0, Values), MatchValue = fun({Idx, Value}) -> match(Arg, Value, Ctx#ctx{path = [Idx | Path]}) end, lists:flatmap(MatchValue, EnumValues); @@ -1507,9 +1511,10 @@ match_size_test() -> ]). match_allmatch_test() -> - % $allMatch is defined to return false for empty lists + % TODO: we have made a breaking change and made $allMatch return true for + % empty lists, since this makes negation consistent check_selector({[{<<"$allMatch">>, {[{<<"$eq">>, 0}]}}]}, [ - {false, []}, + {true, []}, {true, [0]}, {false, [1]}, {false, [0, 1]} diff --git a/src/mango/test/03-operator-test.py b/src/mango/test/03-operator-test.py index 1dfd1a72510..81f5470819c 100644 --- a/src/mango/test/03-operator-test.py +++ b/src/mango/test/03-operator-test.py @@ -69,7 +69,7 @@ def test_empty_all_match(self): amdocs = [{"bad_doc": "a", "emptybang": []}] self.db.save_docs(amdocs, w=3) docs = self.db.find({"emptybang": {"$allMatch": {"foo": {"$eq": 2}}}}) - self.assertEqual(len(docs), 0) + self.assertEqual(len(docs), 1) def test_in_operator_array(self): docs = self.db.find( From 49c299b9152c561684bdb04c1dbf49cc30f02b92 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Fri, 16 Jan 2026 15:41:43 +0000 Subject: [PATCH 09/16] [wip] proper negation handling --- src/mango/src/mango_selector.erl | 121 ++++++++++++++++++++----------- 1 file changed, 77 insertions(+), 44 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 18d05ed253a..ecbadf57c21 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -25,6 +25,7 @@ -record(ctx, { cmp, + negate = false, path = [] }). @@ -32,6 +33,7 @@ op, type = mismatch, params = [], + negate = false, path = [] }). @@ -400,31 +402,42 @@ match({[{<<"$or">>, Args}]}, Value, Ctx) -> true -> []; _ -> lists:flatten(SubSelFailures) end; -% TODO: producing good failure messages requires that normalize/1 fully removes -% $not from the tree by pushing it to the leaves. -match({[{<<"$not">>, Arg}]}, Value, Ctx) -> - case match(Arg, Value, Ctx) of - [] -> [#failure{op = 'not', path = Ctx#ctx.path}]; - _ -> [] - end; +match({[{<<"$not">>, Arg}]}, Value, #ctx{negate = Neg} = Ctx) -> + match(Arg, Value, Ctx#ctx{negate = not Neg}); % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. -match({[{<<"$all">>, []}]}, _Values, Ctx) -> +match({[{<<"$all">>, []}]}, _Values, #ctx{negate = Neg, path = Path}) -> % { "$all": [] } is defined to eval to false, so return a failure - [#failure{op = all, params = [[]], path = Ctx#ctx.path}]; -match({[{<<"$all">>, [A]}]}, Values, _Ctx) when is_list(A), A == Values -> - []; -match({[{<<"$all">>, Args}]}, Values, Ctx) when is_list(Values) -> + case Neg of + true -> []; + false -> [#failure{op = all, params = [[]], negate = Neg, path = Path}] + end; +match({[{<<"$all">>, [A]}]}, Values, #ctx{negate = Neg, path = Path}) when + is_list(A), A == Values +-> + case Neg of + true -> [#failure{op = all, params = [[A]], negate = Neg, path = Path}]; + false -> [] + end; +match({[{<<"$all">>, Args}]}, Values, #ctx{negate = true, path = Path}) when is_list(Values) -> + ArgResults = [lists:member(Arg, Values) || Arg <- Args], + case lists:member(false, ArgResults) of + true -> []; + false -> [#failure{op = app, params = [Args], negate = true, path = Path}] + end; +match({[{<<"$all">>, Args}]}, Values, #ctx{path = Path}) when is_list(Values) -> lists:flatmap( fun(Arg) -> case lists:member(Arg, Values) of true -> []; - _ -> [#failure{op = all, params = [Arg], path = Ctx#ctx.path}] + _ -> [#failure{op = all, params = [Arg], path = Path}] end end, Args ); +match({[{<<"$all">>, _}]}, _Value, #ctx{negate = true}) -> + []; match({[{<<"$all">>, _}]}, Value, Ctx) -> [#failure{op = all, type = bad_value, params = [Value], path = Ctx#ctx.path}]; %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer. @@ -466,8 +479,13 @@ match({[{<<"$allMatch">>, _}]}, Value, Ctx) -> [#failure{op = allMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Matches when any key in the map value matches the % sub-selector Arg. -match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, Ctx) -> - [#failure{op = keyMapMatch, type = empty_list, path = Ctx#ctx.path}]; +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, #ctx{negate = true}) -> + []; +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, #ctx{path = Path}) -> + [#failure{op = keyMapMatch, type = empty_list, path = Path}]; +match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, #ctx{negate = true} = Ctx) when is_list(Value) -> + MatchKey = fun(K) -> match(Arg, K, Ctx) end, + lists:flatmap(MatchKey, [K || {K, _} <- Value]); match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Ctx) when is_list(Value) -> KeyFailures = [match(Arg, K, Ctx) || {K, _} <- Value], case lists:member([], KeyFailures) of @@ -538,45 +556,60 @@ match({[{<<"$exists">>, ShouldExist}]}, Value, Ctx) -> {false, undefined} -> []; {false, _} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}] end; -match({[{<<"$type">>, Arg}]}, Value, Ctx) when is_binary(Arg) -> - case mango_json:type(Value) of - Arg -> []; - _ -> [#failure{op = type, params = [Arg], path = Ctx#ctx.path}] +match({[{<<"$type">>, Arg}]}, Value, #ctx{negate = Neg, path = Path}) when is_binary(Arg) -> + case {Neg, mango_json:type(Value)} of + {false, Arg} -> []; + {true, Type} when Type /= Arg -> []; + _ -> [#failure{op = type, params = [Arg], negate = Neg, path = Path}] end; -match({[{<<"$mod">>, [D, R]}]}, Value, Ctx) when is_integer(Value) -> - case Value rem D of - R -> []; - _ -> [#failure{op = mod, params = [D, R], path = Ctx#ctx.path}] +match({[{<<"$mod">>, [D, R]}]}, Value, #ctx{negate = Neg, path = Path}) when is_integer(Value) -> + case {Neg, Value rem D} of + {false, R} -> []; + {true, Rem} when Rem /= R -> []; + _ -> [#failure{op = mod, params = [D, R], negate = Neg, path = Path}] end; -match({[{<<"$mod">>, _}]}, Value, Ctx) -> - [#failure{op = mod, type = bad_value, params = [Value], path = Ctx#ctx.path}]; -match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix), is_binary(Value) -> - case string:prefix(Value, Prefix) of - nomatch -> [#failure{op = beginsWith, params = [Prefix], path = Ctx#ctx.path}]; - _ -> [] +match({[{<<"$mod">>, _}]}, _Value, #ctx{negate = true}) -> + []; +match({[{<<"$mod">>, _}]}, Value, #ctx{path = Path}) -> + [#failure{op = mod, type = bad_value, params = [Value], path = Path}]; +match({[{<<"$beginsWith">>, Prefix}]}, Value, #ctx{negate = Neg, path = Path}) when + is_binary(Prefix), is_binary(Value) +-> + case {Neg, string:prefix(Value, Prefix)} of + {true, nomatch} -> []; + {false, M} when M /= nomatch -> []; + _ -> [#failure{op = beginsWith, params = [Prefix], negate = Neg, path = Path}] end; % When Value is not a string, do not match -match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix) -> - [#failure{op = beginsWith, type = bad_value, params = [Value], path = Ctx#ctx.path}]; -match({[{<<"$regex">>, Regex}]}, Value, Ctx) when is_binary(Value) -> +match({[{<<"$beginsWith">>, _Prefix}]}, _Value, #ctx{negate = true}) -> + []; +match({[{<<"$beginsWith">>, _Prefix}]}, Value, #ctx{path = Path}) -> + [#failure{op = beginsWith, type = bad_value, params = [Value], path = Path}]; +match({[{<<"$regex">>, Regex}]}, Value, #ctx{negate = Neg, path = Path}) when is_binary(Value) -> try - case re:run(Value, Regex, [{capture, none}]) of - match -> []; - _ -> [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}] + case {Neg, re:run(Value, Regex, [{capture, none}])} of + {false, match} -> []; + {true, M} when M /= match -> []; + _ -> [#failure{op = regex, params = [Regex], negate = Neg, path = Path}] end catch _:_ -> - [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}] + [#failure{op = regex, params = [Regex], negate = Neg, path = Path}] end; -match({[{<<"$regex">>, _}]}, Value, Ctx) -> - [#failure{op = regex, type = bad_value, params = [Value], path = Ctx#ctx.path}]; -match({[{<<"$size">>, Arg}]}, Values, Ctx) when is_list(Values) -> - case length(Values) of - Arg -> []; - _ -> [#failure{op = size, params = [Arg], path = Ctx#ctx.path}] +match({[{<<"$regex">>, _}]}, _Value, #ctx{negate = true}) -> + []; +match({[{<<"$regex">>, _}]}, Value, #ctx{path = Path}) -> + [#failure{op = regex, type = bad_value, params = [Value], path = Path}]; +match({[{<<"$size">>, Arg}]}, Values, #ctx{negate = Neg, path = Path}) when is_list(Values) -> + case {Neg, length(Values)} of + {false, Arg} -> []; + {true, Len} when Len /= Arg -> []; + _ -> [#failure{op = size, params = [Arg], negate = Neg, path = Path}] end; -match({[{<<"$size">>, _}]}, Value, Ctx) -> - [#failure{op = size, type = bad_value, params = [Value], path = Ctx#ctx.path}]; +match({[{<<"$size">>, _}]}, _Value, #ctx{negate = true}) -> + []; +match({[{<<"$size">>, _}]}, Value, #ctx{path = Path}) -> + [#failure{op = size, type = bad_value, params = [Value], path = Path}]; % We don't have any choice but to believe that the text % index returned valid matches match({[{<<"$default">>, _}]}, _Value, _Ctx) -> From 7799f642d9767d8ef5e04a4789d782b9c49eea83 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Tue, 20 Jan 2026 16:43:46 +0000 Subject: [PATCH 10/16] [wip] verbose mode --- src/mango/src/mango_selector.erl | 502 +++++++++++++++++------------ src/mango/test/03-operator-test.py | 2 +- 2 files changed, 298 insertions(+), 206 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index ecbadf57c21..32d0426a5d5 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -15,6 +15,7 @@ -export([ normalize/1, match/2, + match_failures/2, has_required_fields/2, is_constant_field/2, fields/1 @@ -25,6 +26,7 @@ -record(ctx, { cmp, + verbose = false, negate = false, path = [] }). @@ -33,8 +35,7 @@ op, type = mismatch, params = [], - negate = false, - path = [] + ctx }). % Validate and normalize each operator. This translates @@ -67,20 +68,19 @@ match(Selector, D) -> couch_stats:increment_counter([mango, evaluate_selector]), match_int(Selector, D). +match_failures(Selector, D) -> + couch_stats:increment_counter([mango, evaluate_selector]), + match_int(Selector, D, true). + match_int(Selector, D) -> - case match_failures(Selector, D) of - [] -> true; - [_ | _] -> false; - Other -> Other - end. + match_int(Selector, D, false). -% An empty selector matches any value. -match_failures({[]}, _) -> - []; -match_failures(Selector, #doc{body = Body}) -> - match_failures(Selector, Body); -match_failures(Selector, {Props}) -> - match(Selector, {Props}, #ctx{cmp = fun mango_json:cmp/2}). +match_int(Selector, D, Verbose) -> + Ctx = #ctx{cmp = fun mango_json:cmp/2, verbose = Verbose}, + case D of + #doc{body = Body} -> match(Selector, Body, Ctx); + Other -> match(Selector, Other, Ctx) + end. % Convert each operator into a normalized version as well % as convert an implicit operators into their explicit @@ -352,10 +352,6 @@ negate({[{<<"$and">>, Args}]}) -> {[{<<"$or">>, [negate(A) || A <- Args]}]}; negate({[{<<"$or">>, Args}]}) -> {[{<<"$and">>, [negate(A) || A <- Args]}]}; -negate({[{<<"$elemMatch">>, Arg}]}) -> - {[{<<"$allMatch">>, negate(Arg)}]}; -negate({[{<<"$allMatch">>, Arg}]}) -> - {[{<<"$elemMatch">>, negate(Arg)}]}; negate({[{<<"$default">>, _}]} = Arg) -> ?MANGO_ERROR({bad_arg, '$not', Arg}); % Negating comparison operators is straight forward @@ -387,59 +383,76 @@ negate({[{<<"$", _/binary>>, _}]} = Cond) -> negate({[{Field, Cond}]}) -> {[{Field, negate(Cond)}]}. +% An empty selector matches any value. +match({[]}, _, #ctx{verbose = false}) -> + true; +match({[]}, _, #ctx{verbose = true}) -> + []; % We need to treat an empty array as always true. This will be applied % for $or, $in, $all, $nin as well. -match({[{<<"$and">>, []}]}, _, _) -> +match({[{<<"$and">>, []}]}, _, #ctx{verbose = false}) -> + true; +match({[{<<"$and">>, []}]}, _, #ctx{negate = false}) -> []; +match({[{<<"$and">>, []}]}, _, Ctx) -> + [#failure{op = 'and', type = empty_list, params = [[]], ctx = Ctx}]; +match({[{<<"$and">>, Args}]}, Value, #ctx{verbose = false} = Ctx) -> + Pred = fun(SubSel) -> match(SubSel, Value, Ctx) end, + lists:all(Pred, Args); +match({[{<<"$and">>, Args}]}, Value, #ctx{negate = true} = Ctx) -> + NotArgs = [{[{<<"$not">>, A}]} || A <- Args], + PosCtx = Ctx#ctx{negate = false}, + match({[{<<"$or">>, NotArgs}]}, Value, PosCtx); match({[{<<"$and">>, Args}]}, Value, Ctx) -> MatchSubSel = fun(SubSel) -> match(SubSel, Value, Ctx) end, lists:flatmap(MatchSubSel, Args); -match({[{<<"$or">>, []}]}, _, _) -> +match({[{<<"$or">>, []}]}, _, #ctx{verbose = false}) -> + true; +match({[{<<"$or">>, []}]}, _, #ctx{negate = false}) -> []; +match({[{<<"$or">>, []}]}, _, Ctx) -> + [#failure{op = 'or', type = empty_list, params = [[]], ctx = Ctx}]; +match({[{<<"$or">>, Args}]}, Value, #ctx{verbose = false} = Ctx) -> + Pred = fun(SubSel) -> match(SubSel, Value, Ctx) end, + lists:any(Pred, Args); +match({[{<<"$or">>, Args}]}, Value, #ctx{negate = true} = Ctx) -> + NotArgs = [{[{<<"$not">>, A}]} || A <- Args], + PosCtx = Ctx#ctx{negate = false}, + match({[{<<"$and">>, NotArgs}]}, Value, PosCtx); match({[{<<"$or">>, Args}]}, Value, Ctx) -> SubSelFailures = [match(A, Value, Ctx) || A <- Args], - case lists:any(fun(Res) -> Res == [] end, SubSelFailures) of + case lists:member([], SubSelFailures) of true -> []; - _ -> lists:flatten(SubSelFailures) + false -> lists:flatten(SubSelFailures) end; +match({[{<<"$not">>, Arg}]}, Value, #ctx{verbose = false} = Ctx) -> + not match(Arg, Value, Ctx); match({[{<<"$not">>, Arg}]}, Value, #ctx{negate = Neg} = Ctx) -> match(Arg, Value, Ctx#ctx{negate = not Neg}); +match({[{<<"$all">>, []}]}, _, #ctx{verbose = false}) -> + false; +match({[{<<"$all">>, []}]}, _, #ctx{negate = false} = Ctx) -> + [#failure{op = all, type = empty_list, params = [[]], ctx = Ctx}]; +match({[{<<"$all">>, []}]}, _, #ctx{negate = true}) -> + []; % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. -match({[{<<"$all">>, []}]}, _Values, #ctx{negate = Neg, path = Path}) -> - % { "$all": [] } is defined to eval to false, so return a failure - case Neg of - true -> []; - false -> [#failure{op = all, params = [[]], negate = Neg, path = Path}] - end; -match({[{<<"$all">>, [A]}]}, Values, #ctx{negate = Neg, path = Path}) when - is_list(A), A == Values --> - case Neg of - true -> [#failure{op = all, params = [[A]], negate = Neg, path = Path}]; - false -> [] - end; -match({[{<<"$all">>, Args}]}, Values, #ctx{negate = true, path = Path}) when is_list(Values) -> - ArgResults = [lists:member(Arg, Values) || Arg <- Args], - case lists:member(false, ArgResults) of - true -> []; - false -> [#failure{op = app, params = [Args], negate = true, path = Path}] - end; -match({[{<<"$all">>, Args}]}, Values, #ctx{path = Path}) when is_list(Values) -> - lists:flatmap( - fun(Arg) -> - case lists:member(Arg, Values) of - true -> []; - _ -> [#failure{op = all, params = [Arg], path = Path}] - end +match({[{<<"$all">>, Args}]}, Values, #ctx{verbose = false}) when is_list(Values) -> + Pred = fun(A) -> lists:member(A, Values) end, + HasArgs = lists:all(Pred, Args), + IsArgs = + case Args of + [A] when is_list(A) -> + A == Values; + _ -> + false end, - Args - ); -match({[{<<"$all">>, _}]}, _Value, #ctx{negate = true}) -> - []; -match({[{<<"$all">>, _}]}, Value, Ctx) -> - [#failure{op = all, type = bad_value, params = [Value], path = Ctx#ctx.path}]; + HasArgs orelse IsArgs; +match({[{<<"$all">>, _Args}]}, _Values, #ctx{verbose = false}) -> + false; +match({[{<<"$all">>, Args}]} = Expr, Values, Ctx) -> + match_with_failure(Expr, Values, all, [Args], Ctx); %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer. %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}} %% gets normalized to: @@ -456,58 +469,130 @@ match({[{<<>>, Arg}]}, Values, Ctx) -> match(Arg, Values, Ctx); % Matches when any element in values matches the % sub-selector Arg. -match({[{<<"$elemMatch">>, _Arg}]}, [], Ctx) -> - [#failure{op = elemMatch, type = empty_list, path = Ctx#ctx.path}]; -match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) when is_list(Values) -> +match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{verbose = false} = Ctx) when is_list(Values) -> + try + lists:foreach( + fun(V) -> + case match(Arg, V, Ctx) of + true -> throw(matched); + _ -> ok + end + end, + Values + ), + false + catch + throw:matched -> + true; + _:_ -> + false + end; +match({[{<<"$elemMatch">>, _Arg}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$elemMatch">>, _Arg}]}, [], #ctx{negate = false} = Ctx) -> + [#failure{op = elemMatch, type = empty_list, ctx = Ctx}]; +match({[{<<"$elemMatch">>, _Arg}]}, [], #ctx{negate = true}) -> + []; +match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{negate = true} = Ctx) -> + PosCtx = Ctx#ctx{negate = false}, + match({[{<<"$allMatch">>, {[{<<"$not">>, Arg}]}}]}, Values, PosCtx); +match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) -> ValueFailures = [ match(Arg, V, Ctx#ctx{path = [Idx | Path]}) || {Idx, V} <- lists:enumerate(0, Values) ], case lists:member([], ValueFailures) of true -> []; - _ -> lists:flatten(ValueFailures) + false -> lists:flatten(ValueFailures) end; -match({[{<<"$elemMatch">>, _}]}, Value, Ctx) -> - [#failure{op = elemMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Matches when all elements in values match the % sub-selector Arg. -match({[{<<"$allMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) when is_list(Values) -> +match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, #ctx{verbose = false} = Ctx) -> + try + lists:foreach( + fun(V) -> + case match(Arg, V, Ctx) of + false -> throw(unmatched); + _ -> ok + end + end, + Values + ), + true + catch + _:_ -> + false + end; +match({[{<<"$allMatch">>, _Arg}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$allMatch">>, _Arg}]}, [], #ctx{negate = false} = Ctx) -> + [#failure{op = allMatch, type = empty_list, ctx = Ctx}]; +match({[{<<"$allMatch">>, _Arg}]}, [], #ctx{negate = true}) -> + []; +match({[{<<"$allMatch">>, Arg}]}, Values, #ctx{negate = true} = Ctx) -> + PosCtx = Ctx#ctx{negate = false}, + match({[{<<"$elemMatch">>, {[{<<"$not">>, Arg}]}}]}, Values, PosCtx); +match({[{<<"$allMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) -> + MatchValue = fun({Idx, V}) -> match(Arg, V, Ctx#ctx{path = [Idx | Path]}) end, EnumValues = lists:enumerate(0, Values), - MatchValue = fun({Idx, Value}) -> match(Arg, Value, Ctx#ctx{path = [Idx | Path]}) end, lists:flatmap(MatchValue, EnumValues); -match({[{<<"$allMatch">>, _}]}, Value, Ctx) -> - [#failure{op = allMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Matches when any key in the map value matches the % sub-selector Arg. +match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{verbose = false} = Ctx) when is_tuple(Value) -> + try + lists:foreach( + fun(V) -> + case match(Arg, V, Ctx) of + true -> throw(matched); + _ -> ok + end + end, + [Key || {Key, _} <- element(1, Value)] + ), + false + catch + throw:matched -> + true; + _:_ -> + false + end; +match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, #ctx{negate = false} = Ctx) -> + [#failure{op = keyMapMatch, type = empty_list, ctx = Ctx}]; match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, #ctx{negate = true}) -> []; -match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, #ctx{path = Path}) -> - [#failure{op = keyMapMatch, type = empty_list, path = Path}]; -match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, #ctx{negate = true} = Ctx) when is_list(Value) -> - MatchKey = fun(K) -> match(Arg, K, Ctx) end, - lists:flatmap(MatchKey, [K || {K, _} <- Value]); -match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Ctx) when is_list(Value) -> - KeyFailures = [match(Arg, K, Ctx) || {K, _} <- Value], +match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{negate = true, path = Path} = Ctx) when + is_tuple(Value) +-> + Keys = [Key || {Key, _} <- element(1, Value)], + MatchKey = fun(K) -> match(Arg, K, Ctx#ctx{path = [K | Path]}) end, + lists:flatmap(MatchKey, Keys); +match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{path = Path} = Ctx) when is_tuple(Value) -> + Keys = [Key || {Key, _} <- element(1, Value)], + KeyFailures = [match(Arg, K, Ctx#ctx{path = [K | Path]}) || K <- Keys], case lists:member([], KeyFailures) of true -> []; - _ -> lists:flatten(KeyFailures) + false -> lists:flatten(KeyFailures) end; -match({[{<<"$keyMapMatch">>, _}]}, Value, Ctx) -> - [#failure{op = keyMapMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; +match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, Ctx) -> + [#failure{op = keyMapMatch, type = bad_value, ctx = Ctx}]; % Our comparison operators are fairly straight forward -match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(lt, Arg, Path, Cmp(Value, Arg) < 0); -match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(lte, Arg, Path, Cmp(Value, Arg) =< 0); -match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(eq, Arg, Path, Cmp(Value, Arg) == 0); -match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(ne, Arg, Path, Cmp(Value, Arg) /= 0); -match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(gte, Arg, Path, Cmp(Value, Arg) >= 0); -match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(gt, Arg, Path, Cmp(Value, Arg) > 0); -match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) -> +match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(lt, Arg, Ctx, Cmp(Value, Arg) < 0); +match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(lte, Arg, Ctx, Cmp(Value, Arg) =< 0); +match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(eq, Arg, Ctx, Cmp(Value, Arg) == 0); +match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(ne, Arg, Ctx, Cmp(Value, Arg) /= 0); +match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(gte, Arg, Ctx, Cmp(Value, Arg) >= 0); +match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(gt, Arg, Ctx, Cmp(Value, Arg) > 0); +match({[{<<"$in">>, []}]}, _, #ctx{verbose = false}) -> + false; +match({[{<<"$in">>, Args}]}, Values, #ctx{verbose = false, cmp = Cmp}) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( fun(Value, Match) -> @@ -517,103 +602,68 @@ match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list( Values ) end, - case lists:any(Pred, Args) of - true -> []; - _ -> [#failure{op = in, params = [Args], path = Path}] - end; -match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + lists:any(Pred, Args); +match({[{<<"$in">>, Args}]}, Value, #ctx{verbose = false, cmp = Cmp}) -> Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end, - case lists:any(Pred, Args) of - true -> []; - _ -> [#failure{op = in, params = [Args], path = Path}] - end; -match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) -> - Pred = fun(Arg) -> - lists:foldl( - fun(Value, Match) -> - (Cmp(Value, Arg) /= 0) and Match - end, - true, - Values - ) - end, - case lists:all(Pred, Args) of - true -> []; - _ -> [#failure{op = nin, params = [Args], path = Path}] - end; -match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + lists:any(Pred, Args); +match({[{<<"$in">>, Args}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, in, [Args], Ctx); +match({[{<<"$nin">>, []}]}, _, #ctx{verbose = false}) -> + true; +match({[{<<"$nin">>, Args}]}, Values, #ctx{verbose = false} = Ctx) when is_list(Values) -> + not match({[{<<"$in">>, Args}]}, Values, Ctx); +match({[{<<"$nin">>, Args}]}, Value, #ctx{verbose = false, cmp = Cmp}) -> Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end, - case lists:all(Pred, Args) of - true -> []; - _ -> [#failure{op = nin, params = [Args], path = Path}] - end; + lists:all(Pred, Args); +match({[{<<"$nin">>, Args}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, nin, [Args], Ctx); % This logic is a bit subtle. Basically, if value is % not undefined, then it exists. -match({[{<<"$exists">>, ShouldExist}]}, Value, Ctx) -> - case {ShouldExist, Value} of - {true, undefined} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}]; - {true, _} -> []; - {false, undefined} -> []; - {false, _} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}] - end; -match({[{<<"$type">>, Arg}]}, Value, #ctx{negate = Neg, path = Path}) when is_binary(Arg) -> - case {Neg, mango_json:type(Value)} of - {false, Arg} -> []; - {true, Type} when Type /= Arg -> []; - _ -> [#failure{op = type, params = [Arg], negate = Neg, path = Path}] - end; -match({[{<<"$mod">>, [D, R]}]}, Value, #ctx{negate = Neg, path = Path}) when is_integer(Value) -> - case {Neg, Value rem D} of - {false, R} -> []; - {true, Rem} when Rem /= R -> []; - _ -> [#failure{op = mod, params = [D, R], negate = Neg, path = Path}] - end; -match({[{<<"$mod">>, _}]}, _Value, #ctx{negate = true}) -> - []; -match({[{<<"$mod">>, _}]}, Value, #ctx{path = Path}) -> - [#failure{op = mod, type = bad_value, params = [Value], path = Path}]; -match({[{<<"$beginsWith">>, Prefix}]}, Value, #ctx{negate = Neg, path = Path}) when +match({[{<<"$exists">>, ShouldExist}]}, Value, #ctx{verbose = false}) -> + Exists = Value /= undefined, + ShouldExist andalso Exists; +match({[{<<"$exists">>, ShouldExist}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, exists, [ShouldExist], Ctx); +match({[{<<"$type">>, Arg}]}, Value, #ctx{verbose = false}) when is_binary(Arg) -> + Arg == mango_json:type(Value); +match({[{<<"$type">>, Arg}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, type, [Arg], Ctx); +match({[{<<"$mod">>, [D, R]}]}, Value, #ctx{verbose = false}) when is_integer(Value) -> + Value rem D == R; +match({[{<<"$mod">>, _}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$mod">>, [D, R]}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, mod, [D, R], Ctx); +match({[{<<"$beginsWith">>, Prefix}]}, Value, #ctx{verbose = false}) when is_binary(Prefix), is_binary(Value) -> - case {Neg, string:prefix(Value, Prefix)} of - {true, nomatch} -> []; - {false, M} when M /= nomatch -> []; - _ -> [#failure{op = beginsWith, params = [Prefix], negate = Neg, path = Path}] - end; + string:prefix(Value, Prefix) /= nomatch; % When Value is not a string, do not match -match({[{<<"$beginsWith">>, _Prefix}]}, _Value, #ctx{negate = true}) -> - []; -match({[{<<"$beginsWith">>, _Prefix}]}, Value, #ctx{path = Path}) -> - [#failure{op = beginsWith, type = bad_value, params = [Value], path = Path}]; -match({[{<<"$regex">>, Regex}]}, Value, #ctx{negate = Neg, path = Path}) when is_binary(Value) -> +match({[{<<"$beginsWith">>, Prefix}]}, _, #ctx{verbose = false}) when is_binary(Prefix) -> + false; +match({[{<<"$beginsWith">>, Prefix}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, beginsWith, [Prefix], Ctx); +match({[{<<"$regex">>, Regex}]}, Value, #ctx{verbose = false}) when is_binary(Value) -> try - case {Neg, re:run(Value, Regex, [{capture, none}])} of - {false, match} -> []; - {true, M} when M /= match -> []; - _ -> [#failure{op = regex, params = [Regex], negate = Neg, path = Path}] - end + match == re:run(Value, Regex, [{capture, none}]) catch _:_ -> - [#failure{op = regex, params = [Regex], negate = Neg, path = Path}] + false end; -match({[{<<"$regex">>, _}]}, _Value, #ctx{negate = true}) -> - []; -match({[{<<"$regex">>, _}]}, Value, #ctx{path = Path}) -> - [#failure{op = regex, type = bad_value, params = [Value], path = Path}]; -match({[{<<"$size">>, Arg}]}, Values, #ctx{negate = Neg, path = Path}) when is_list(Values) -> - case {Neg, length(Values)} of - {false, Arg} -> []; - {true, Len} when Len /= Arg -> []; - _ -> [#failure{op = size, params = [Arg], negate = Neg, path = Path}] - end; -match({[{<<"$size">>, _}]}, _Value, #ctx{negate = true}) -> - []; -match({[{<<"$size">>, _}]}, Value, #ctx{path = Path}) -> - [#failure{op = size, type = bad_value, params = [Value], path = Path}]; +match({[{<<"$regex">>, _}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$regex">>, Regex}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, regex, [Regex], Ctx); +match({[{<<"$size">>, Arg}]}, Values, #ctx{verbose = false}) when is_list(Values) -> + length(Values) == Arg; +match({[{<<"$size">>, _}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$size">>, Arg}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, size, [Arg], Ctx); % We don't have any choice but to believe that the text % index returned valid matches -match({[{<<"$default">>, _}]}, _Value, _Ctx) -> - []; +match({[{<<"$default">>, _}]}, _Value, #ctx{verbose = false}) -> + true; % All other operators are internal assertion errors for % matching because we either should've removed them during % normalization or something else broke. @@ -622,15 +672,24 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) -> % We need to traverse value to find field. The call to % mango_doc:get_field/2 may return either not_found or % bad_path in which case matching fails. -match({[{Field, Cond}]}, Value, #ctx{path = Path} = Ctx) -> +match({[{Field, Cond}]}, Value, #ctx{verbose = Verb, path = Path} = Ctx) -> InnerCtx = Ctx#ctx{path = [Field | Path]}, case mango_doc:get_field(Value, Field) of not_found when Cond == {[{<<"$exists">>, false}]} -> - []; + case Verb of + true -> []; + false -> true + end; not_found -> - [#failure{op = field, type = not_found, path = InnerCtx#ctx.path}]; + case Verb of + true -> [#failure{op = field, type = not_found, ctx = InnerCtx}]; + false -> false + end; bad_path -> - [#failure{op = field, type = bad_path, path = InnerCtx#ctx.path}]; + case Verb of + true -> [#failure{op = field, type = bad_path, ctx = InnerCtx}]; + false -> false + end; SubValue when Field == <<"_id">> -> match(Cond, SubValue, InnerCtx#ctx{cmp = fun mango_json:cmp_raw/2}); SubValue -> @@ -639,10 +698,18 @@ match({[{Field, Cond}]}, Value, #ctx{path = Path} = Ctx) -> match({[_, _ | _] = _Props} = Sel, _Value, _Ctx) -> error({unnormalized_selector, Sel}). -compare(Op, Arg, Path, Cond) -> - case Cond of - true -> []; - _ -> [#failure{op = Op, params = [Arg], path = Path}] +match_with_failure(Expr, Value, Op, Params, #ctx{negate = Neg} = Ctx) -> + case not match(Expr, Value, Ctx#ctx{verbose = false}) of + Neg -> []; + _ -> [#failure{op = Op, params = Params, ctx = Ctx}] + end. + +compare(_, _, #ctx{verbose = false}, Cond) -> + Cond; +compare(Op, Arg, #ctx{negate = Neg} = Ctx, Cond) -> + case not Cond of + Neg -> []; + _ -> [#failure{op = Op, params = [Arg], ctx = Ctx}] end. % Returns true if Selector requires all @@ -1171,10 +1238,21 @@ check_selector(Selector, Results) -> SelPos = normalize({[{<<"x">>, Selector}]}), SelNeg = normalize({[{<<"x">>, {[{<<"$not">>, Selector}]}}]}), + ListToBool = fun(List) -> + case List of + [] -> true; + [_ | _] -> false + end + end, + Check = fun({Result, Value}) -> Doc = {[{<<"x">>, Value}]}, - ?assertEqual(Result, match_int(SelPos, Doc)), - ?assertEqual(not Result, match_int(SelNeg, Doc)) + + ?assertEqual(Result, match_int(SelPos, Doc, false)), + ?assertEqual(Result, ListToBool(match_int(SelPos, Doc, true))), + + ?assertEqual(not Result, match_int(SelNeg, Doc, false)), + ?assertEqual(not Result, ListToBool(match_int(SelNeg, Doc, true))) end, lists:foreach(Check, Results). @@ -1544,10 +1622,9 @@ match_size_test() -> ]). match_allmatch_test() -> - % TODO: we have made a breaking change and made $allMatch return true for - % empty lists, since this makes negation consistent + % $allMatch is defined to return false for empty lists check_selector({[{<<"$allMatch">>, {[{<<"$eq">>, 0}]}}]}, [ - {true, []}, + {false, []}, {true, [0]}, {false, [1]}, {false, [0, 1]} @@ -1808,8 +1885,8 @@ match_failures_object_test() -> {<<"b">>, {[{<<"c">>, 3}]}} ]} ), - ?assertEqual( - [#failure{op = eq, type = mismatch, params = [1], path = [<<"a">>]}], + ?assertMatch( + [#failure{op = eq, type = mismatch, params = [1], ctx = #ctx{path = [<<"a">>]}}], Fails1 ), @@ -1820,8 +1897,8 @@ match_failures_object_test() -> {<<"b">>, {[{<<"c">>, 4}]}} ]} ), - ?assertEqual( - [#failure{op = eq, type = mismatch, params = [3], path = [<<"b.c">>]}], + ?assertMatch( + [#failure{op = eq, type = mismatch, params = [3], ctx = #ctx{path = [<<"b.c">>]}}], Fails2 ). @@ -1843,18 +1920,18 @@ match_failures_elemmatch_test() -> Fails1 = match_failures( SelElemMatch, {[{<<"a">>, []}]} ), - ?assertEqual( - [#failure{op = elemMatch, type = empty_list, params = [], path = [<<"a">>]}], + ?assertMatch( + [#failure{op = elemMatch, type = empty_list, params = [], ctx = #ctx{path = [<<"a">>]}}], Fails1 ), Fails2 = match_failures( SelElemMatch, {[{<<"a">>, [3, 2]}]} ), - ?assertEqual( + ?assertMatch( [ - #failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]}, - #failure{op = gt, type = mismatch, params = [4], path = [1, <<"a">>]} + #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}}, + #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [1, <<"a">>]}} ], Fails2 ). @@ -1877,18 +1954,18 @@ match_failures_allmatch_test() -> Fails1 = match_failures( SelAllMatch, {[{<<"a">>, [4]}]} ), - ?assertEqual( - [#failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]}], + ?assertMatch( + [#failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}}], Fails1 ), Fails2 = match_failures( SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]} ), - ?assertEqual( + ?assertMatch( [ - #failure{op = gt, type = mismatch, params = [4], path = [2, <<"a">>]}, - #failure{op = gt, type = mismatch, params = [4], path = [4, <<"a">>]} + #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [2, <<"a">>]}}, + #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [4, <<"a">>]}} ], Fails2 ). @@ -1911,8 +1988,12 @@ match_failures_allmatch_object_test() -> Fails1 = match_failures( SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]} ), - ?assertEqual( - [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"a.b">>]}], + ?assertMatch( + [ + #failure{ + op = gt, type = mismatch, params = [4], ctx = #ctx{path = [<<"c">>, 0, <<"a.b">>]} + } + ], Fails1 ), @@ -1920,8 +2001,12 @@ match_failures_allmatch_object_test() -> SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}, {[{<<"c">>, 6}]}, {[{<<"c">>, 3}]}]}]}}]} ), - ?assertEqual( - [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 2, <<"a.b">>]}], + ?assertMatch( + [ + #failure{ + op = gt, type = mismatch, params = [4], ctx = #ctx{path = [<<"c">>, 2, <<"a.b">>]} + } + ], Fails2 ), @@ -1929,10 +2014,17 @@ match_failures_allmatch_object_test() -> SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]} ), - ?assertEqual( + ?assertMatch( [ - #failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"a.b">>]}, - #failure{op = field, type = not_found, params = [], path = [<<"c">>, 1, <<"a.b">>]} + #failure{ + op = gt, type = mismatch, params = [4], ctx = #ctx{path = [<<"c">>, 0, <<"a.b">>]} + }, + #failure{ + op = field, + type = not_found, + params = [], + ctx = #ctx{path = [<<"c">>, 1, <<"a.b">>]} + } ], Fails3 ). diff --git a/src/mango/test/03-operator-test.py b/src/mango/test/03-operator-test.py index 81f5470819c..1dfd1a72510 100644 --- a/src/mango/test/03-operator-test.py +++ b/src/mango/test/03-operator-test.py @@ -69,7 +69,7 @@ def test_empty_all_match(self): amdocs = [{"bad_doc": "a", "emptybang": []}] self.db.save_docs(amdocs, w=3) docs = self.db.find({"emptybang": {"$allMatch": {"foo": {"$eq": 2}}}}) - self.assertEqual(len(docs), 1) + self.assertEqual(len(docs), 0) def test_in_operator_array(self): docs = self.db.find( From d0e7ac6e3a2c8241039908af91877a8dc549a1a9 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Mon, 26 Jan 2026 11:10:58 +0000 Subject: [PATCH 11/16] [wip] install erlperf --- .gitignore | 2 ++ rebar.config.script | 4 +++- test/fixtures/allowed-xref.txt | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 28dcadf05f7..cf6e1ccc4f0 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ share/server/main-coffee.js share/server/main.js share/server/main-ast-bypass.js share/www +src/argparse/ src/bear/ src/certifi/ src/couch/priv/couch_js/**/config.h @@ -55,6 +56,7 @@ src/couch/priv/couch_js/**/*.d src/couch/priv/icu_driver/couch_icu_driver.d src/cowlib/ src/mango/src/mango_cursor_text.nocompile +src/erlperf/ src/excoveralls/ src/fauxton/ src/folsom/ diff --git a/rebar.config.script b/rebar.config.script index efc03a35e64..91c91f4a1dd 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -164,7 +164,9 @@ DepDescs = [ {jiffy, "jiffy", {tag, "1.1.2"}}, {mochiweb, "mochiweb", {tag, "v3.3.0"}}, {meck, "meck", {tag, "v1.1.0"}}, -{recon, "recon", {tag, "2.5.6"}} +{recon, "recon", {tag, "2.5.6"}}, +{argparse, {url, "https://github.com/max-au/argparse"}, "1.2.4"}, +{erlperf, {url, "https://github.com/max-au/erlperf"}, "2.3.0"} ]. WithProper = lists:keyfind(with_proper, 1, CouchConfig) == {with_proper, true}. diff --git a/test/fixtures/allowed-xref.txt b/test/fixtures/allowed-xref.txt index c630fc109e1..8292d692b0f 100644 --- a/test/fixtures/allowed-xref.txt +++ b/test/fixtures/allowed-xref.txt @@ -1,2 +1,6 @@ +src/erlperf_cli.erl:{102,1}: Warning: erlperf_cli:main/1 calls undefined function args:format_error/3 (Xref) +src/erlperf_cli.erl:{102,1}: Warning: erlperf_cli:main/1 calls undefined function args:parse/3 (Xref) src/ioq.erl: Warning: ioq:get_disk_queues/0 is undefined function (Xref) src/weatherreport_check_ioq.erl:{95,1}: Warning: weatherreport_check_ioq:check_legacy_int/1 calls undefined function ioq:get_disk_queues/0 (Xref) +Warning: args:format_error/3 is undefined function (Xref) +Warning: args:parse/3 is undefined function (Xref) From 711fddb27a4e1a3b410f13366e2ebf600d37daac Mon Sep 17 00:00:00 2001 From: James Coglan Date: Mon, 26 Jan 2026 11:12:37 +0000 Subject: [PATCH 12/16] [wip] some benches --- src/mango/src/mango_selector.erl | 37 ++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 32d0426a5d5..66cddb93ce1 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -2029,4 +2029,41 @@ match_failures_allmatch_object_test() -> Fails3 ). +bench(Name, Selector, Doc) -> + Sel1 = normalize(Selector), + [Normal, Verbose] = erlperf:compare( + [ + #{runner => fun() -> match_int(Sel1, Doc, V) end} + || V <- [false, true] + ], + #{} + ), + ?debugFmt("~nbench[~s: normal ] = ~p~n", [Name, Normal]), + ?debugFmt("~nbench[~s: verbose] = ~p~n", [Name, Verbose]). + +bench_and_test() -> + Sel = + {[ + {<<"x">>, + {[ + {<<"$and">>, [{[{<<"$gt">>, V}]} || V <- [100, 200, 300, 400, 500]]} + ]}} + ]}, + Doc = {[{<<"x">>, 25}]}, + bench("$and", Sel, Doc). + +bench_allmatch_test() -> + Sel = + {[ + {<<"x">>, + {[ + {<<"$allMatch">>, {[{<<"$gt">>, 10}]}} + ]}} + ]}, + Doc = + {[ + {<<"x">>, [0, 23, 45, 67, 89, 12, 34, 56, 78]} + ]}, + bench("$allMatch", Sel, Doc). + -endif. From 1c23788a9cc5acb081069f48cc8bf84d36e07d7e Mon Sep 17 00:00:00 2001 From: James Coglan Date: Fri, 30 Jan 2026 17:44:25 +0000 Subject: [PATCH 13/16] [wip] return failure report from mango VDU on PUT /db/doc --- src/couch/src/couch_query_servers.erl | 2 + src/mango/src/mango_native_proc.erl | 11 +- src/mango/src/mango_selector.erl | 152 ++++++++++++++---- test/elixir/test/validate_doc_update_test.exs | 3 + 4 files changed, 136 insertions(+), 32 deletions(-) diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl index 7ab662f850c..5d7e9ac1fab 100644 --- a/src/couch/src/couch_query_servers.erl +++ b/src/couch/src/couch_query_servers.erl @@ -490,6 +490,8 @@ validate_doc_update(Db, DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> case Resp of ok -> ok; + {[{<<"forbidden">>, Message}, {<<"failures">>, Failures}]} -> + throw({forbidden, Message, Failures}); {[{<<"forbidden">>, Message}]} -> throw({forbidden, Message}); {[{<<"unauthorized">>, Message}]} -> diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl index edcecd4b6fb..b540047e509 100644 --- a/src/mango/src/mango_native_proc.erl +++ b/src/mango/src/mango_native_proc.erl @@ -115,9 +115,14 @@ handle_call({prompt, [<<"ddoc">>, DDocId, [<<"validate_doc_update">>], Args]}, _ [NewDoc, OldDoc, _Ctx, _SecObj] = Args, Struct = {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, OldDoc}]}, Reply = - case mango_selector:match(Selector, Struct) of - true -> true; - _ -> {[{<<"forbidden">>, <<"document is not valid">>}]} + case mango_selector:match_failures(Selector, Struct) of + [] -> + true; + Failures -> + {[ + {<<"forbidden">>, <<"forbidden">>}, + {<<"failures">>, Failures} + ]} end, {reply, Reply, St} end; diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 66cddb93ce1..b752059ef39 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -70,7 +70,7 @@ match(Selector, D) -> match_failures(Selector, D) -> couch_stats:increment_counter([mango, evaluate_selector]), - match_int(Selector, D, true). + [format_failure(F) || F <- match_int(Selector, D, true)]. match_int(Selector, D) -> match_int(Selector, D, false). @@ -575,8 +575,8 @@ match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{path = Path} = Ctx) when is_tup true -> []; false -> lists:flatten(KeyFailures) end; -match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, Ctx) -> - [#failure{op = keyMapMatch, type = bad_value, ctx = Ctx}]; +match({[{<<"$keyMapMatch">>, _Arg}]}, Value, Ctx) -> + [#failure{op = keyMapMatch, type = bad_value, params = [Value], ctx = Ctx}]; % Our comparison operators are fairly straight forward match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> compare(lt, Arg, Ctx, Cmp(Value, Arg) < 0); @@ -712,6 +712,88 @@ compare(Op, Arg, #ctx{negate = Neg} = Ctx, Cond) -> _ -> [#failure{op = Op, params = [Arg], ctx = Ctx}] end. +format_failure(#failure{op = Op, type = Type, params = Params, ctx = Ctx}) -> + Path = format_path(Ctx#ctx.path), + Msg = format_op(Op, Ctx#ctx.negate, Type, Params), + {[{<<"path">>, Path}, {<<"message">>, list_to_binary(Msg)}]}. + +format_op(Op, _, empty_list, _) -> + io_lib:format("operator $~p was invoked with an empty list", [Op]); +format_op(Op, _, bad_value, [Value]) -> + io_lib:format("operator $~p was invoked with a bad value: ~s", [Op, jiffy:encode(Value)]); +format_op(_, _, not_found, []) -> + io_lib:format("must be present", []); +format_op(_, _, bad_path, []) -> + io_lib:format("used an invalid path", []); +format_op(eq, false, mismatch, [X]) -> + io_lib:format("must be equal to ~s", [jiffy:encode(X)]); +format_op(ne, false, mismatch, [X]) -> + io_lib:format("must not be equal to ~s", [jiffy:encode(X)]); +format_op(lt, false, mismatch, [X]) -> + io_lib:format("must be less than ~s", [jiffy:encode(X)]); +format_op(lte, false, mismatch, [X]) -> + io_lib:format("must be less than or equal to ~s", [jiffy:encode(X)]); +format_op(gt, false, mismatch, [X]) -> + io_lib:format("must be greater than ~s", [jiffy:encode(X)]); +format_op(gte, false, mismatch, [X]) -> + io_lib:format("must be greater than or equal to ~s", [jiffy:encode(X)]); +format_op(in, false, mismatch, [X]) -> + io_lib:format("must be one of ~s", [jiffy:encode(X)]); +format_op(nin, false, mismatch, [X]) -> + io_lib:format("must not be one of ~s", [jiffy:encode(X)]); +format_op(all, false, mismatch, [X]) -> + io_lib:format("must contain all the values in ~s", [jiffy:encode(X)]); +format_op(exists, false, mismatch, [true]) -> + io_lib:format("must be present", []); +format_op(exists, false, mismatch, [false]) -> + io_lib:format("must not be present", []); +format_op(type, false, mismatch, [Type]) -> + io_lib:format("must be of type '~s'", [Type]); +format_op(type, true, mismatch, [Type]) -> + io_lib:format("must not be of type '~s'", [Type]); +format_op(mod, false, mismatch, [D, R]) -> + io_lib:format("must leave a remainder of ~p when divided by ~p", [R, D]); +format_op(mod, true, mismatch, [D, R]) -> + io_lib:format("must leave a remainder other than ~p when divided by ~p", [R, D]); +format_op(regex, false, mismatch, [P]) -> + io_lib:format("must match the pattern '~s'", [P]); +format_op(regex, true, mismatch, [P]) -> + io_lib:format("must not match the pattern '~s'", [P]); +format_op(beginsWith, false, mismatch, [P]) -> + io_lib:format("must begin with '~s'", [P]); +format_op(beginsWith, true, mismatch, [P]) -> + io_lib:format("must not begin with '~s'", [P]); +format_op(size, false, mismatch, [N]) -> + io_lib:format("must contain ~p items", [N]); +format_op(size, true, mismatch, [N]) -> + io_lib:format("must not contain ~p items", [N]); +format_op(eq, true, Type, Params) -> + format_op(ne, false, Type, Params); +format_op(ne, true, Type, Params) -> + format_op(eq, false, Type, Params); +format_op(lt, true, Type, Params) -> + format_op(gte, false, Type, Params); +format_op(lte, true, Type, Params) -> + format_op(gt, false, Type, Params); +format_op(gt, true, Type, Params) -> + format_op(lte, false, Type, Params); +format_op(gte, true, Type, Params) -> + format_op(le, false, Type, Params); +format_op(in, true, Type, Params) -> + format_op(nin, false, Type, Params); +format_op(nin, true, Type, Params) -> + format_op(in, false, Type, Params); +format_op(exists, true, Type, [Exist]) -> + format_op(exists, false, Type, [not Exist]). + +format_path([]) -> + []; +format_path([Item | Rest]) when is_binary(Item) -> + {ok, Path} = mango_util:parse_field(Item), + format_path(Rest) ++ Path; +format_path([Item | Rest]) when is_integer(Item) -> + format_path(Rest) ++ [list_to_binary(integer_to_list(Item))]. + % Returns true if Selector requires all % fields in RequiredFields to exist in any matching documents. @@ -1869,33 +1951,36 @@ match_failures_object_test() -> ]} ), - Fails0 = match_failures( + Fails0 = match_int( Selector, {[ {<<"a">>, 1}, {<<"b">>, {[{<<"c">>, 3}]}} - ]} + ]}, + true ), ?assertEqual([], Fails0), - Fails1 = match_failures( + Fails1 = match_int( Selector, {[ {<<"a">>, 0}, {<<"b">>, {[{<<"c">>, 3}]}} - ]} + ]}, + true ), ?assertMatch( [#failure{op = eq, type = mismatch, params = [1], ctx = #ctx{path = [<<"a">>]}}], Fails1 ), - Fails2 = match_failures( + Fails2 = match_int( Selector, {[ {<<"a">>, 1}, {<<"b">>, {[{<<"c">>, 4}]}} - ]} + ]}, + true ), ?assertMatch( [#failure{op = eq, type = mismatch, params = [3], ctx = #ctx{path = [<<"b.c">>]}}], @@ -1912,21 +1997,21 @@ match_failures_elemmatch_test() -> ]} ), - Fails0 = match_failures( - SelElemMatch, {[{<<"a">>, [5, 3, 2]}]} + Fails0 = match_int( + SelElemMatch, {[{<<"a">>, [5, 3, 2]}]}, true ), ?assertEqual([], Fails0), - Fails1 = match_failures( - SelElemMatch, {[{<<"a">>, []}]} + Fails1 = match_int( + SelElemMatch, {[{<<"a">>, []}]}, true ), ?assertMatch( [#failure{op = elemMatch, type = empty_list, params = [], ctx = #ctx{path = [<<"a">>]}}], Fails1 ), - Fails2 = match_failures( - SelElemMatch, {[{<<"a">>, [3, 2]}]} + Fails2 = match_int( + SelElemMatch, {[{<<"a">>, [3, 2]}]}, true ), ?assertMatch( [ @@ -1946,21 +2031,21 @@ match_failures_allmatch_test() -> ]} ), - Fails0 = match_failures( - SelAllMatch, {[{<<"a">>, [5]}]} + Fails0 = match_int( + SelAllMatch, {[{<<"a">>, [5]}]}, true ), ?assertEqual([], Fails0), - Fails1 = match_failures( - SelAllMatch, {[{<<"a">>, [4]}]} + Fails1 = match_int( + SelAllMatch, {[{<<"a">>, [4]}]}, true ), ?assertMatch( [#failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}}], Fails1 ), - Fails2 = match_failures( - SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]} + Fails2 = match_int( + SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]}, true ), ?assertMatch( [ @@ -1980,13 +2065,13 @@ match_failures_allmatch_object_test() -> ]} ), - Fails0 = match_failures( - SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}]}]}}]} + Fails0 = match_int( + SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}]}]}}]}, true ), ?assertEqual([], Fails0), - Fails1 = match_failures( - SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]} + Fails1 = match_int( + SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]}, true ), ?assertMatch( [ @@ -1997,9 +2082,10 @@ match_failures_allmatch_object_test() -> Fails1 ), - Fails2 = match_failures( + Fails2 = match_int( SelAllMatch, - {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}, {[{<<"c">>, 6}]}, {[{<<"c">>, 3}]}]}]}}]} + {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}, {[{<<"c">>, 6}]}, {[{<<"c">>, 3}]}]}]}}]}, + true ), ?assertMatch( [ @@ -2010,9 +2096,10 @@ match_failures_allmatch_object_test() -> Fails2 ), - Fails3 = match_failures( + Fails3 = match_int( SelAllMatch, - {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]} + {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]}, + true ), ?assertMatch( [ @@ -2029,6 +2116,13 @@ match_failures_allmatch_object_test() -> Fails3 ). +format_path_test() -> + ?assertEqual([], format_path([])), + ?assertEqual([<<"a">>], format_path([<<"a">>])), + ?assertEqual([<<"a">>, <<"b">>], format_path([<<"b">>, <<"a">>])), + ?assertEqual([<<"a">>, <<"b">>, <<"c">>], format_path([<<"b.c">>, <<"a">>])), + ?assertEqual([<<"a">>, <<"42">>, <<"b">>, <<"c">>], format_path([<<"b.c">>, 42, <<"a">>])). + bench(Name, Selector, Doc) -> Sel1 = normalize(Selector), [Normal, Verbose] = erlperf:compare( diff --git a/test/elixir/test/validate_doc_update_test.exs b/test/elixir/test/validate_doc_update_test.exs index 93ed8f177cf..9279e1da0e9 100644 --- a/test/elixir/test/validate_doc_update_test.exs +++ b/test/elixir/test/validate_doc_update_test.exs @@ -105,6 +105,9 @@ defmodule ValidateDocUpdateTest do resp = Couch.put("/#{db}/doc", body: %{"no" => "type"}) assert resp.status_code == 403 assert resp.body["error"] == "forbidden" + assert resp.body["reason"] == [ + %{"path" => ["newDoc", "type"], "message" => "must be present"} + ] end @tag :with_db From 8c667b8bcab4bff37bde3e14fb008b4c054472d6 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Wed, 11 Feb 2026 10:45:47 +0000 Subject: [PATCH 14/16] [wip] normalise $data operators --- src/mango/src/mango_selector.erl | 134 ++++++++++++++++++++++++++++++- src/mango/src/mango_util.erl | 40 ++++++--- 2 files changed, 160 insertions(+), 14 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index b752059ef39..25e22371033 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -85,6 +85,13 @@ match_int(Selector, D, Verbose) -> % Convert each operator into a normalized version as well % as convert an implicit operators into their explicit % versions. +% {$Op: {$data: Path}} +norm_ops({[{<<"$", _/binary>>, {[{<<"$data">>, Path}]}}]} = Cond) when is_binary(Path) -> + norm_data(Cond); +% {Field: {$data: Path}} +norm_ops({[{Field, {[{<<"$data">>, Path}]}}]}) when is_binary(Path) -> + Eq = norm_data({[{<<"$eq">>, {[{<<"$data">>, Path}]}}]}), + {[{Field, Eq}]}; norm_ops({[{<<"$and">>, Args}]}) when is_list(Args) -> {[{<<"$and">>, [norm_ops(A) || A <- Args]}]}; norm_ops({[{<<"$and">>, Arg}]}) -> @@ -128,8 +135,8 @@ norm_ops({[{<<"$regex">>, Regex}]} = Cond) when is_binary(Regex) -> _ -> ?MANGO_ERROR({bad_arg, '$regex', Regex}) end; -norm_ops({[{<<"$all">>, Args}]}) when is_list(Args) -> - {[{<<"$all">>, Args}]}; +norm_ops({[{<<"$all">>, Args}]} = Cond) when is_list(Args) -> + Cond; norm_ops({[{<<"$all">>, Arg}]}) -> ?MANGO_ERROR({bad_arg, '$all', Arg}); norm_ops({[{<<"$elemMatch">>, {_} = Arg}]}) -> @@ -144,8 +151,8 @@ norm_ops({[{<<"$keyMapMatch">>, {_} = Arg}]}) -> {[{<<"$keyMapMatch">>, norm_ops(Arg)}]}; norm_ops({[{<<"$keyMapMatch">>, Arg}]}) -> ?MANGO_ERROR({bad_arg, '$keyMapMatch', Arg}); -norm_ops({[{<<"$size">>, Arg}]}) when is_integer(Arg), Arg >= 0 -> - {[{<<"$size">>, Arg}]}; +norm_ops({[{<<"$size">>, Arg}]} = Cond) when is_integer(Arg), Arg >= 0 -> + Cond; norm_ops({[{<<"$size">>, Arg}]}) -> ?MANGO_ERROR({bad_arg, '$size', Arg}); norm_ops({[{<<"$text">>, Arg}]}) when @@ -205,6 +212,37 @@ norm_ops({[_, _ | _] = Props}) -> norm_ops(Value) -> {[{<<"$eq">>, Value}]}. +% {$data: Path} may only be used as an argument to "leaf" operators that expect +% a literal value as input. If it were combined with combinators like $and or +% $allMatch it would allow the input document to inject its own selectors. +-define(DATA_OPS, [ + <<"$eq">>, + <<"$ne">>, + <<"$lt">>, + <<"$lte">>, + <<"$gt">>, + <<"$gte">>, + <<"$in">>, + <<"$nin">>, + <<"$all">>, + <<"$type">>, + <<"$size">>, + <<"$mod">>, + <<"$regex">>, + <<"$beginsWith">> +]). + +norm_data({[{Op, {[{<<"$data">>, Field}]}}]}) when is_binary(Field) -> + case lists:member(Op, ?DATA_OPS) of + true -> + {ok, Path} = mango_util:parse_field(Field, relative), + {[{Op, {[{<<"$data">>, Path}]}}]}; + false -> + ?MANGO_ERROR({bad_arg, '$data', Op}) + end; +norm_data({[{_, _}]} = Cond) -> + Cond. + % This takes a selector and normalizes all of the % field names as far as possible. For instance: % @@ -899,6 +937,94 @@ fields({[]}) -> ]} ). +normalize_data_basic_test() -> + Selector = normalize({[{<<"a">>, {[{<<"$data">>, <<"b">>}]}}]}), + ?assertEqual( + {[ + { + <<"a">>, {[{<<"$eq">>, {[{<<"$data">>, [<<"b">>]}]}}]} + } + ]}, + Selector + ). + +normalize_data_path_test() -> + Selector = normalize({[{<<"a">>, {[{<<"$data">>, <<"b.c.42.d">>}]}}]}), + ?assertEqual( + {[ + { + <<"a">>, {[{<<"$eq">>, {[{<<"$data">>, [<<"b">>, <<"c">>, <<"42">>, <<"d">>]}]}}]} + } + ]}, + Selector + ). + +normalize_data_sibling_test() -> + Selector = normalize({[{<<"a">>, {[{<<"$data">>, <<".b">>}]}}]}), + ?assertEqual( + {[ + { + <<"a">>, {[{<<"$eq">>, {[{<<"$data">>, [{[{<<"parent">>, 1}]}, <<"b">>]}]}}]} + } + ]}, + Selector + ). + +normalize_data_parent_test() -> + Selector = normalize({[{<<"a">>, {[{<<"$data">>, <<"..b">>}]}}]}), + ?assertEqual( + {[ + { + <<"a">>, {[{<<"$eq">>, {[{<<"$data">>, [{[{<<"parent">>, 2}]}, <<"b">>]}]}}]} + } + ]}, + Selector + ). + +normalize_data_allowed_operator_test() -> + Selector = normalize({[{<<"a">>, {[{<<"$lt">>, {[{<<"$data">>, <<"b">>}]}}]}}]}), + ?assertEqual( + {[ + { + <<"a">>, {[{<<"$lt">>, {[{<<"$data">>, [<<"b">>]}]}}]} + } + ]}, + Selector + ). + +normalize_data_allowed_operator_all_test() -> + Selector = normalize({[{<<"a">>, {[{<<"$all">>, {[{<<"$data">>, <<"b">>}]}}]}}]}), + ?assertEqual( + {[ + { + <<"a">>, {[{<<"$all">>, {[{<<"$data">>, [<<"b">>]}]}}]} + } + ]}, + Selector + ). + +normalize_data_disallowed_operator_test() -> + Selector = {[{<<"a">>, {[{<<"$allMatch">>, {[{<<"$data">>, <<"b">>}]}}]}}]}, + Error = {mango_error, mango_selector, {bad_arg, '$data', <<"$allMatch">>}}, + ?assertException(throw, Error, normalize(Selector)). + +normalize_data_multi_field_test() -> + Selector = normalize( + {[ + {<<"a">>, {[{<<"$data">>, <<"c">>}]}}, + {<<"b">>, {[{<<"$data">>, <<"c">>}]}} + ]} + ), + ?assertEqual( + {[ + {<<"$and">>, [ + {[{<<"a">>, {[{<<"$eq">>, {[{<<"$data">>, [<<"c">>]}]}}]}}]}, + {[{<<"b">>, {[{<<"$eq">>, {[{<<"$data">>, [<<"c">>]}]}}]}}]} + ]} + ]}, + Selector + ). + is_constant_field_basic_test() -> Selector = normalize({[{<<"A">>, <<"foo">>}]}), Field = <<"A">>, diff --git a/src/mango/src/mango_util.erl b/src/mango/src/mango_util.erl index 837cbf3dbe8..5bd8dc15cc8 100644 --- a/src/mango/src/mango_util.erl +++ b/src/mango/src/mango_util.erl @@ -41,6 +41,7 @@ join/2, parse_field/1, + parse_field/2, cached_re/2 ]). @@ -350,16 +351,36 @@ cached_re(Name, RE) -> end. parse_field(Field) -> - case binary:match(Field, <<"\\">>, []) of - nomatch -> - % Fast path, no regex required - {ok, check_non_empty(Field, binary:split(Field, <<".">>, [global]))}; - _ -> - parse_field_slow(Field) - end. + parse_field(Field, absolute). + +parse_field(Field, Mode) -> + Parts = + case binary:match(Field, <<"\\">>, []) of + nomatch -> + % Fast path, no regex required + binary:split(Field, <<".">>, [global]); + _ -> + parse_field_slow(Field) + end, + {Prefix, Path} = + case Mode of + absolute -> + {[], Parts}; + relative -> + lists:foldl( + fun + (<<>>, {[], []}) -> {[{[{<<"parent">>, 1}]}], []}; + (<<>>, {[{[{<<"parent">>, N}]}], []}) -> {[{[{<<"parent">>, N + 1}]}], []}; + (Pt, {Pre, Pts}) -> {Pre, Pts ++ [Pt]} + end, + {[], []}, + Parts + ) + end, + {ok, check_non_empty(Field, Prefix ++ Path)}. parse_field_slow(Field) -> - Path = lists:map( + lists:map( fun (P) when P =:= <<>> -> ?MANGO_ERROR({invalid_field_name, Field}); @@ -367,8 +388,7 @@ parse_field_slow(Field) -> re:replace(P, <<"\\\\">>, <<>>, [global, {return, binary}]) end, re:split(Field, <<"(?>) - ), - {ok, Path}. + ). check_non_empty(Field, Parts) -> case lists:member(<<>>, Parts) of From 7e8056066eb473da3e0c0984f953f084af2c5492 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Wed, 11 Feb 2026 10:47:09 +0000 Subject: [PATCH 15/16] [wip] make mango_doc:get_field functions trace the stack of objects --- src/mango/src/mango_doc.erl | 51 ++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/src/mango/src/mango_doc.erl b/src/mango/src/mango_doc.erl index 255debf9e6d..8093c6f7292 100644 --- a/src/mango/src/mango_doc.erl +++ b/src/mango/src/mango_doc.erl @@ -21,6 +21,8 @@ get_field/2, get_field/3, + get_field_with_stack/3, + get_field_from_stack/2, rem_field/2, set_field/3 ]). @@ -377,26 +379,31 @@ do_update_to_insert([{_, _} | Rest], Doc) -> get_field(Props, Field) -> get_field(Props, Field, no_validation). -get_field(Props, Field, Validator) when is_binary(Field) -> - {ok, Path} = mango_util:parse_field(Field), - get_field(Props, Path, Validator); -get_field(Props, [], no_validation) -> - Props; -get_field(Props, [], Validator) -> - case (catch Validator(Props)) of +get_field(Props, Field, no_validation) -> + {Value, _} = get_field_with_stack(Props, Field, []), + Value; +get_field(Props, Field, Validator) -> + {Value, _} = get_field_with_stack(Props, Field, []), + case (catch Validator(Value)) of true -> - Props; + Value; _ -> invalid_value - end; -get_field({Props}, [Name | Rest], Validator) -> + end. + +get_field_with_stack(Props, Field, Stack) when is_binary(Field) -> + {ok, Path} = mango_util:parse_field(Field), + get_field_with_stack(Props, Path, Stack); +get_field_with_stack(Props, [], Stack) -> + {Props, Stack}; +get_field_with_stack({Props}, [Name | Rest], Stack) -> case lists:keyfind(Name, 1, Props) of {Name, Value} -> - get_field(Value, Rest, Validator); + get_field_with_stack(Value, Rest, [{Props} | Stack]); false -> - not_found + {not_found, [{Props} | Stack]} end; -get_field(Values, [Name | Rest], Validator) when is_list(Values) -> +get_field_with_stack(Values, [Name | Rest], Stack) when is_list(Values) -> % Name might be an integer index into an array try Pos = binary_to_integer(Name), @@ -404,16 +411,24 @@ get_field(Values, [Name | Rest], Validator) when is_list(Values) -> true -> % +1 because Erlang uses 1 based list indices Value = lists:nth(Pos + 1, Values), - get_field(Value, Rest, Validator); + get_field_with_stack(Value, Rest, Stack); false -> - bad_path + {bad_path, Stack} end catch error:badarg -> - bad_path + {bad_path, Stack} end; -get_field(_, [_ | _], _) -> - bad_path. +get_field_with_stack(_, [_ | _], Stack) -> + {bad_path, Stack}. + +get_field_from_stack([{[{<<"parent">>, N}]} | Rest], Stack) -> + case length(Stack) >= N of + true -> get_field(lists:nth(N, Stack), Rest); + false -> not_found + end; +get_field_from_stack(Path, Stack) -> + get_field(lists:last(Stack), Path). rem_field(Props, Field) when is_binary(Field) -> {ok, Path} = mango_util:parse_field(Field), From c829346ad26336c060048dde8b66c25bceb3c3b7 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Wed, 11 Feb 2026 10:48:04 +0000 Subject: [PATCH 16/16] [wip] implement $data operator in mango_selector:match --- src/mango/src/mango_selector.erl | 99 +++++++++++++++++-- test/elixir/test/config/suite.elixir | 1 + test/elixir/test/validate_doc_update_test.exs | 41 ++++++++ 3 files changed, 131 insertions(+), 10 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 25e22371033..b8b78f69177 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -28,7 +28,8 @@ cmp, verbose = false, negate = false, - path = [] + path = [], + stack = [] }). -record(failure, { @@ -426,6 +427,22 @@ match({[]}, _, #ctx{verbose = false}) -> true; match({[]}, _, #ctx{verbose = true}) -> []; +% Resolve $data lookups before evaluating the surrounding operator +match({[{Op, {[{<<"$data">>, Path}]}}]}, Value, #ctx{stack = Stack, verbose = Verbose} = Ctx) -> + case mango_doc:get_field_from_stack(Path, Stack) of + not_found -> + case Verbose of + true -> [#failure{op = data, type = not_found, ctx = Ctx#ctx{path = Path}}]; + false -> false + end; + bad_path -> + case Verbose of + true -> [#failure{op = data, type = bad_path, ctx = Ctx#ctx{path = Path}}]; + false -> false + end; + Found -> + match({[{Op, Found}]}, Value, Ctx) + end; % We need to treat an empty array as always true. This will be applied % for $or, $in, $all, $nin as well. match({[{<<"$and">>, []}]}, _, #ctx{verbose = false}) -> @@ -710,28 +727,28 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) -> % We need to traverse value to find field. The call to % mango_doc:get_field/2 may return either not_found or % bad_path in which case matching fails. -match({[{Field, Cond}]}, Value, #ctx{verbose = Verb, path = Path} = Ctx) -> +match({[{Field, Cond}]}, Value, #ctx{verbose = Verb, path = Path, stack = Stack} = Ctx) -> InnerCtx = Ctx#ctx{path = [Field | Path]}, - case mango_doc:get_field(Value, Field) of - not_found when Cond == {[{<<"$exists">>, false}]} -> + case mango_doc:get_field_with_stack(Value, Field, Stack) of + {not_found, _} when Cond == {[{<<"$exists">>, false}]} -> case Verb of true -> []; false -> true end; - not_found -> + {not_found, _} -> case Verb of true -> [#failure{op = field, type = not_found, ctx = InnerCtx}]; false -> false end; - bad_path -> + {bad_path, _} -> case Verb of true -> [#failure{op = field, type = bad_path, ctx = InnerCtx}]; false -> false end; - SubValue when Field == <<"_id">> -> - match(Cond, SubValue, InnerCtx#ctx{cmp = fun mango_json:cmp_raw/2}); - SubValue -> - match(Cond, SubValue, InnerCtx) + {SubValue, NewStack} when Field == <<"_id">> -> + match(Cond, SubValue, InnerCtx#ctx{cmp = fun mango_json:cmp_raw/2, stack = NewStack}); + {SubValue, NewStack} -> + match(Cond, SubValue, InnerCtx#ctx{stack = NewStack}) end; match({[_, _ | _] = _Props} = Sel, _Value, _Ctx) -> error({unnormalized_selector, Sel}). @@ -1933,6 +1950,68 @@ match_object_test() -> ?assertEqual(true, match_int(SelShort, Doc4)), ?assertEqual(false, match_int(SelShort, Doc5)). +match_data_test() -> + SelAbs = normalize({[{<<"a">>, {[{<<"$gt">>, {[{<<"$data">>, <<"b">>}]}}]}}]}), + ?assertEqual(true, match_int(SelAbs, {[{<<"a">>, 2}, {<<"b">>, 1}]})), + ?assertEqual(false, match_int(SelAbs, {[{<<"a">>, 2}, {<<"b">>, 2}]})), + + ?assertEqual(false, match_int(SelAbs, {[{<<"a">>, 2}]})), + ?assertMatch( + [#failure{op = data, type = not_found, params = [], ctx = #ctx{path = [<<"b">>]}}], + match_int(SelAbs, {[{<<"a">>, 2}]}, true) + ), + + ?assertEqual(false, match_int(SelAbs, {[{<<"b">>, 2}]})), + ?assertMatch( + [#failure{op = field, type = not_found, params = [], ctx = #ctx{path = [<<"a">>]}}], + match_int(SelAbs, {[{<<"b">>, 2}]}, true) + ), + + SelRel = normalize({[{<<"a.b">>, {[{<<"$gt">>, {[{<<"$data">>, <<".c">>}]}}]}}]}), + ?assertEqual(true, match_int(SelRel, {[{<<"a">>, {[{<<"b">>, 2}, {<<"c">>, 1}]}}]})), + ?assertEqual(false, match_int(SelRel, {[{<<"a">>, {[{<<"b">>, 2}, {<<"c">>, 2}]}}]})), + + SelRelOutOfBounds = normalize({[{<<"a">>, {[{<<"$gt">>, {[{<<"$data">>, <<"..b">>}]}}]}}]}), + ?assertEqual(false, match_int(SelRelOutOfBounds, {[{<<"a">>, 1}]})), + + SelRelAllMatch = normalize( + {[ + {<<"a">>, + {[ + {<<"$allMatch">>, + {[ + {<<"b">>, {[{<<"$gt">>, {[{<<"$data">>, <<".c">>}]}}]}} + ]}} + ]}} + ]} + ), + ?assertEqual( + true, + match_int( + SelRelAllMatch, + {[ + {<<"a">>, [ + {[{<<"b">>, 2}, {<<"c">>, 1}]}, + {[{<<"b">>, 5}, {<<"c">>, 3}]}, + {[{<<"b">>, 7}, {<<"c">>, 6}]} + ]} + ]} + ) + ), + ?assertEqual( + false, + match_int( + SelRelAllMatch, + {[ + {<<"a">>, [ + {[{<<"b">>, 2}, {<<"c">>, 1}]}, + {[{<<"b">>, 2}, {<<"c">>, 3}]}, + {[{<<"b">>, 7}, {<<"c">>, 6}]} + ]} + ]} + ) + ). + match_and_test() -> % $and with an empty array matches anything SelEmpty = normalize({[{<<"x">>, {[{<<"$and">>, []}]}}]}), diff --git a/test/elixir/test/config/suite.elixir b/test/elixir/test/config/suite.elixir index 4df5991bdc0..e5046b6597d 100644 --- a/test/elixir/test/config/suite.elixir +++ b/test/elixir/test/config/suite.elixir @@ -530,6 +530,7 @@ "converting a Mango VDU to JavaScript updates its effects", "deleting a Mango VDU removes its effects", "Mango VDU rejects a doc if any existing ddoc fails to match", + "Mango VDU allows comparisons via $data", ], "SecurityValidationTest": [ "Author presence and user security", diff --git a/test/elixir/test/validate_doc_update_test.exs b/test/elixir/test/validate_doc_update_test.exs index 9279e1da0e9..97f7ad82bd6 100644 --- a/test/elixir/test/validate_doc_update_test.exs +++ b/test/elixir/test/validate_doc_update_test.exs @@ -212,4 +212,45 @@ defmodule ValidateDocUpdateTest do assert resp.status_code == 403 assert resp.body["error"] == "forbidden" end + + @tag :with_db + test "Mango VDU allows comparisons via $data", context do + db = context[:db_name] + + resp = Couch.put("/#{db}/_design/mango-test", body: %{ + language: "query", + + validate_doc_update: %{ + "$or" => [ + %{ "oldDoc" => :null }, + %{ "oldDoc.tags" => %{ "$size" => 0 } }, + %{ "newDoc.tags" => %{ "$all" => %{ "$data" => "oldDoc.tags" } } } + ] + } + }) + assert resp.status_code == 201 + + resp = Couch.put("/#{db}/doc", body: %{ + "tags" => ["a"] + }) + assert resp.status_code == 201 + rev = resp.body["rev"] + + resp = Couch.put("/#{db}/doc", query: %{rev: rev}, body: %{ + "tags" => ["a", "b"] + }) + assert resp.status_code == 201 + rev = resp.body["rev"] + + resp = Couch.put("/#{db}/doc", query: %{rev: rev}, body: %{ + "tags" => ["b", "a", "c"] + }) + assert resp.status_code == 201 + rev = resp.body["rev"] + + resp = Couch.put("/#{db}/doc", query: %{rev: rev}, body: %{ + "tags" => ["b", "c"] + }) + assert resp.status_code == 403 + end end