-
Notifications
You must be signed in to change notification settings - Fork 1k
Couch stats resource tracker v2 #5213
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
1fa358d
e67690a
e10d82e
b3a38c2
a4fc931
d2c7eba
7ad90ad
03fa0cb
bb00b81
edd5d0e
f06d4a4
c02d57b
5864ba3
e1fead4
4535f1a
48db790
4db08c9
1d34432
46fba8e
5ac8a91
ce91a40
de9a430
1a9e4d1
ba1baaa
e213714
8d0e9fd
1702115
4c091bc
ad84bf8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
handle_replicate_req/1, | ||
handle_reload_query_servers_req/1, | ||
handle_task_status_req/1, | ||
handle_resource_status_req/1, | ||
handle_up_req/1, | ||
handle_utils_dir_req/1, | ||
handle_utils_dir_req/2, | ||
|
@@ -224,6 +225,110 @@ handle_task_status_req(#httpd{method = 'GET'} = Req) -> | |
handle_task_status_req(Req) -> | ||
send_method_not_allowed(Req, "GET,HEAD"). | ||
|
||
handle_resource_status_req(#httpd{method = 'POST'} = Req) -> | ||
ok = chttpd:verify_is_server_admin(Req), | ||
chttpd:validate_ctype(Req, "application/json"), | ||
{Props} = chttpd:json_body_obj(Req), | ||
Action = proplists:get_value(<<"action">>, Props), | ||
Key = proplists:get_value(<<"key">>, Props), | ||
Val = proplists:get_value(<<"val">>, Props), | ||
|
||
CountBy = fun couch_stats_resource_tracker:count_by/1, | ||
GroupBy = fun couch_stats_resource_tracker:group_by/2, | ||
SortedBy1 = fun couch_stats_resource_tracker:sorted_by/1, | ||
SortedBy2 = fun couch_stats_resource_tracker:sorted_by/2, | ||
ConvertEle = fun erlang:binary_to_existing_atom/1, | ||
ConvertList = fun(L) -> [ConvertEle(E) || E <- L] end, | ||
ToJson = fun couch_stats_resource_tracker:term_to_flat_json/1, | ||
JsonKeys = fun(PL) -> [[ToJson(K), V] || {K, V} <- PL] end, | ||
|
||
Fun = case {Action, Key, Val} of | ||
{<<"count_by">>, Keys, undefined} when is_list(Keys) -> | ||
Keys1 = [ConvertEle(K) || K <- Keys], | ||
fun() -> CountBy(Keys1) end; | ||
{<<"count_by">>, Key, undefined} -> | ||
Key1 = ConvertEle(Key), | ||
fun() -> CountBy(Key1) end; | ||
{<<"group_by">>, Keys, Vals} when is_list(Keys) andalso is_list(Vals) -> | ||
Keys1 = ConvertList(Keys), | ||
Vals1 = ConvertList(Vals), | ||
fun() -> GroupBy(Keys1, Vals1) end; | ||
{<<"group_by">>, Key, Vals} when is_list(Vals) -> | ||
Key1 = ConvertEle(Key), | ||
Vals1 = ConvertList(Vals), | ||
fun() -> GroupBy(Key1, Vals1) end; | ||
{<<"group_by">>, Keys, Val} when is_list(Keys) -> | ||
Keys1 = ConvertList(Keys), | ||
Val1 = ConvertEle(Val), | ||
fun() -> GroupBy(Keys1, Val1) end; | ||
{<<"group_by">>, Key, Val} -> | ||
Key1 = ConvertEle(Key), | ||
Val1 = ConvertList(Val), | ||
fun() -> GroupBy(Key1, Val1) end; | ||
|
||
{<<"sorted_by">>, Key, undefined} -> | ||
Key1 = ConvertEle(Key), | ||
fun() -> JsonKeys(SortedBy1(Key1)) end; | ||
{<<"sorted_by">>, Keys, undefined} when is_list(Keys) -> | ||
Keys1 = [ConvertEle(K) || K <- Keys], | ||
fun() -> JsonKeys(SortedBy1(Keys1)) end; | ||
{<<"sorted_by">>, Keys, Vals} when is_list(Keys) andalso is_list(Vals) -> | ||
Keys1 = ConvertList(Keys), | ||
Vals1 = ConvertList(Vals), | ||
fun() -> JsonKeys(SortedBy2(Keys1, Vals1)) end; | ||
{<<"sorted_by">>, Key, Vals} when is_list(Vals) -> | ||
Key1 = ConvertEle(Key), | ||
Vals1 = ConvertList(Vals), | ||
fun() -> JsonKeys(SortedBy2(Key1, Vals1)) end; | ||
{<<"sorted_by">>, Keys, Val} when is_list(Keys) -> | ||
Keys1 = ConvertList(Keys), | ||
Val1 = ConvertEle(Val), | ||
fun() -> JsonKeys(SortedBy2(Keys1, Val1)) end; | ||
{<<"sorted_by">>, Key, Val} -> | ||
Key1 = ConvertEle(Key), | ||
Val1 = ConvertList(Val), | ||
fun() -> JsonKeys(SortedBy2(Key1, Val1)) end; | ||
_ -> | ||
throw({badrequest, invalid_resource_request}) | ||
end, | ||
|
||
Fun1 = fun() -> | ||
case Fun() of | ||
Map when is_map(Map) -> | ||
{maps:fold( | ||
fun | ||
(_K,0,A) -> A; %% TODO: Skip 0 value entries? | ||
(K,V,A) -> [{ToJson(K), V} | A] | ||
end, | ||
[], Map)}; | ||
List when is_list(List) -> | ||
List | ||
end | ||
end, | ||
|
||
{Resp, _Bad} = rpc:multicall(erlang, apply, [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. erpc might be a better option https://www.erlang.org/doc/apps/kernel/erpc.html#multicall/3 |
||
fun() -> | ||
{node(), Fun1()} | ||
end, | ||
[] | ||
]), | ||
%%io:format("{CSRT}***** GOT RESP: ~p~n", [Resp]), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Left-over debug comment |
||
send_json(Req, {Resp}); | ||
handle_resource_status_req(#httpd{method = 'GET'} = Req) -> | ||
ok = chttpd:verify_is_server_admin(Req), | ||
{Resp, Bad} = rpc:multicall(erlang, apply, [ | ||
fun() -> | ||
{node(), couch_stats_resource_tracker:active()} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of having to run in a closure just to get node() in the response, consider using an explicit list of nodes |
||
end, | ||
[] | ||
]), | ||
%% TODO: incorporate Bad responses | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consider using erpc, the handling of bad responses is a bit cleaner there, I think |
||
send_json(Req, {Resp}); | ||
handle_resource_status_req(Req) -> | ||
ok = chttpd:verify_is_server_admin(Req), | ||
send_method_not_allowed(Req, "GET,HEAD,POST"). | ||
|
||
|
||
handle_replicate_req(#httpd{method = 'POST', user_ctx = Ctx, req_body = PostBody} = Req) -> | ||
chttpd:validate_ctype(Req, "application/json"), | ||
%% see HACK in chttpd.erl about replication | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,6 +53,8 @@ | |
-define(INTERACTIVE_EDIT, interactive_edit). | ||
-define(REPLICATED_CHANGES, replicated_changes). | ||
|
||
-define(LOG_UNEXPECTED_MSG(Msg), couch_log:warning("[~p:~p:~p/~p]{~p[~p]} Unexpected message: ~w", [?MODULE, ?LINE, ?FUNCTION_NAME, ?FUNCTION_ARITY, self(), element(2, process_info(self(), message_queue_len)), Msg])). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am surprised linter doesn't complain about such long line. |
||
|
||
-type branch() :: {Key::term(), Value::term(), Tree::term()}. | ||
-type path() :: {Start::pos_integer(), branch()}. | ||
-type update_type() :: replicated_changes | interactive_edit. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -306,6 +306,10 @@ | |
{type, counter}, | ||
{desc, <<"number of couch_server LRU operations skipped">>} | ||
]}. | ||
{[couchdb, couch_server, open], [ | ||
{type, counter}, | ||
{desc, <<"number of couch_server open operations invoked">>} | ||
]}. | ||
{[couchdb, query_server, vdu_rejects], [ | ||
{type, counter}, | ||
{desc, <<"number of rejections by validate_doc_update function">>} | ||
|
@@ -418,10 +422,39 @@ | |
{type, counter}, | ||
{desc, <<"number of other requests">>} | ||
]}. | ||
{[couchdb, query_server, volume, ddoc_filter], [ | ||
{type, counter}, | ||
{desc, <<"number of docs filtered by ddoc filters">>} | ||
]}. | ||
{[couchdb, legacy_checksums], [ | ||
{type, counter}, | ||
{desc, <<"number of legacy checksums found in couch_file instances">>} | ||
]}. | ||
{[couchdb, btree, folds], [ | ||
{type, counter}, | ||
{desc, <<"number of couch btree kv fold callback invocations">>} | ||
]}. | ||
{[couchdb, btree, get_node, kp_node], [ | ||
{type, counter}, | ||
{desc, <<"number of couch btree kp_nodes read">>} | ||
]}. | ||
{[couchdb, btree, get_node, kv_node], [ | ||
{type, counter}, | ||
{desc, <<"number of couch btree kv_nodes read">>} | ||
]}. | ||
Comment on lines
+437
to
+444
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wonder if this is a bit too low level? Or rather, btrees are used for view and db files. It's not clear which btrees the metrics refer to. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
So that's the thing, we don't have current visibility into the volume of The idea here is to start tracking the amount of core btree operations by the workers, allowing us to track it at a process level like we're doing with the rest of the stats in this PR, and then we'll at least be able to see the total volume of couch_btree operations induced, and then use the reports to quantify the requests/dbs/etc that performed those operations. A key aspect of the tracking here is that As for tracking of I thought about trying to funnel down into |
||
{[couchdb, btree, write_node, kp_node], [ | ||
{type, counter}, | ||
{desc, <<"number of couch btree kp_nodes written">>} | ||
]}. | ||
{[couchdb, btree, write_node, kv_node], [ | ||
{type, counter}, | ||
{desc, <<"number of couch btree kv_nodes written">>} | ||
]}. | ||
%% CSRT (couch_stats_resource_tracker) stats | ||
{[couchdb, csrt, delta_missing_t0], [ | ||
{type, counter}, | ||
{desc, <<"number of csrt contexts without a proper startime">>} | ||
]}. | ||
{[pread, exceed_eof], [ | ||
{type, counter}, | ||
{desc, <<"number of the attempts to read beyond end of db file">>} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -542,6 +542,8 @@ filter_docs(Req, Db, DDoc, FName, Docs) -> | |
{ok, filter_docs_int(Db, DDoc, FName, JsonReq, JsonDocs)} | ||
catch | ||
throw:{os_process_error, {exit_status, 1}} -> | ||
%% TODO: wire in csrt tracking | ||
couch_stats:increment_counter([couchdb, query_server, js_filter_error]), | ||
Comment on lines
+545
to
+546
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already have a general counter for os process errors, normal exits and error exits. Probably don't need an explicit one just for filters? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in 4db08c9. |
||
%% batch used too much memory, retry sequentially. | ||
Fun = fun(JsonDoc) -> | ||
filter_docs_int(Db, DDoc, FName, JsonReq, [JsonDoc]) | ||
|
@@ -550,6 +552,12 @@ filter_docs(Req, Db, DDoc, FName, Docs) -> | |
end. | ||
|
||
filter_docs_int(Db, DDoc, FName, JsonReq, JsonDocs) -> | ||
%% Count usage in _int version as this can be repeated for OS error | ||
%% Pros & cons... might not have actually processed `length(JsonDocs)` docs | ||
%% but it certainly undercounts if we count in `filter_docs/5` above | ||
%% TODO: wire in csrt tracking | ||
couch_stats:increment_counter([couchdb, query_server, js_filter]), | ||
couch_stats:increment_counter([couchdb, query_server, js_filtered_docs], length(JsonDocs)), | ||
[true, Passes] = ddoc_prompt( | ||
Db, | ||
DDoc, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we're introducing a new API add some docs or some description (examples) how it works, and the intent behind it. What would CouchDB users use it for, how it is different than metrics, and active tasks, etc...