未验证 提交 ea490d3b 编写于 作者: Q QinZuoyan 提交者: GitHub

shell: improve subcommand app_stat and nodes (#277)

上级 7314ec10
......@@ -102,7 +102,6 @@ void info_collector::on_app_stat()
all.storage_count += row.storage_count;
all.rdb_block_cache_hit_count += row.rdb_block_cache_hit_count;
all.rdb_block_cache_total_count += row.rdb_block_cache_total_count;
all.rdb_block_cache_mem_usage += row.rdb_block_cache_mem_usage;
all.rdb_index_and_filter_blocks_mem_usage += row.rdb_index_and_filter_blocks_mem_usage;
all.rdb_memtable_mem_usage += row.rdb_memtable_mem_usage;
read_qps[i] = row.get_qps + row.multi_get_qps + row.scan_qps;
......@@ -139,7 +138,6 @@ void info_collector::on_app_stat()
std::abs(row.rdb_block_cache_total_count) < 1e-6
? 0
: row.rdb_block_cache_hit_count / row.rdb_block_cache_total_count * 1000000);
counters->rdb_block_cache_mem_usage->set(row.rdb_block_cache_mem_usage);
counters->rdb_index_and_filter_blocks_mem_usage->set(
row.rdb_index_and_filter_blocks_mem_usage);
counters->rdb_memtable_mem_usage->set(row.rdb_memtable_mem_usage);
......@@ -192,7 +190,6 @@ info_collector::AppStatCounters *info_collector::get_app_counters(const std::str
INIT_COUNTER(storage_mb);
INIT_COUNTER(storage_count);
INIT_COUNTER(rdb_block_cache_hit_rate);
INIT_COUNTER(rdb_block_cache_mem_usage);
INIT_COUNTER(rdb_index_and_filter_blocks_mem_usage);
INIT_COUNTER(rdb_memtable_mem_usage);
INIT_COUNTER(read_qps);
......
......@@ -485,7 +485,6 @@ struct row_data
double storage_count = 0;
double rdb_block_cache_hit_count = 0;
double rdb_block_cache_total_count = 0;
double rdb_block_cache_mem_usage = 0;
double rdb_index_and_filter_blocks_mem_usage = 0;
double rdb_memtable_mem_usage = 0;
};
......@@ -531,8 +530,6 @@ update_app_pegasus_perf_counter(row_data &row, const std::string &counter_name,
row.rdb_block_cache_hit_count += value;
else if (counter_name == "rdb.block_cache.total_count")
row.rdb_block_cache_total_count += value;
else if (counter_name == "rdb.block_cache.memory_usage")
row.rdb_block_cache_mem_usage += value;
else if (counter_name == "rdb.index_and_filter_blocks.memory_usage")
row.rdb_index_and_filter_blocks_mem_usage += value;
else if (counter_name == "rdb.memtable.memory_usage")
......
......@@ -164,9 +164,37 @@ inline bool ls_apps(command_executor *e, shell_context *sc, arguments args)
return true;
}
struct list_nodes_helper
{
std::string node_name;
std::string node_status;
int primary_count;
int secondary_count;
int64_t memused_res_mb;
int64_t block_cache_bytes;
int64_t mem_tbl_bytes;
int64_t mem_idx_bytes;
int64_t disk_available_total_ratio;
int64_t disk_available_min_ratio;
list_nodes_helper(const std::string &n, const std::string &s)
: node_name(n),
node_status(s),
primary_count(0),
secondary_count(0),
memused_res_mb(0),
block_cache_bytes(0),
mem_tbl_bytes(0),
mem_idx_bytes(0),
disk_available_total_ratio(0),
disk_available_min_ratio(0)
{
}
};
inline bool ls_nodes(command_executor *e, shell_context *sc, arguments args)
{
static struct option long_options[] = {{"detailed", no_argument, 0, 'd'},
{"resolve_ip", no_argument, 0, 'r'},
{"resource_usage", no_argument, 0, 'u'},
{"status", required_argument, 0, 's'},
{"output", required_argument, 0, 'o'},
{0, 0, 0, 0}};
......@@ -174,17 +202,25 @@ inline bool ls_nodes(command_executor *e, shell_context *sc, arguments args)
std::string status;
std::string output_file;
bool detailed = false;
bool resolve_ip = false;
bool resource_usage = false;
optind = 0;
while (true) {
int option_index = 0;
int c;
c = getopt_long(args.argc, args.argv, "ds:o:", long_options, &option_index);
c = getopt_long(args.argc, args.argv, "drus:o:", long_options, &option_index);
if (c == -1)
break;
switch (c) {
case 'd':
detailed = true;
break;
case 'r':
resolve_ip = true;
break;
case 'u':
resource_usage = true;
break;
case 's':
status = optarg;
break;
......@@ -217,9 +253,184 @@ inline bool ls_nodes(command_executor *e, shell_context *sc, arguments args)
status.c_str());
}
::dsn::error_code err = sc->ddl_client->list_nodes(s, detailed, output_file);
if (err != ::dsn::ERR_OK)
std::cout << "list nodes failed, error=" << err.to_string() << std::endl;
std::map<dsn::rpc_address, dsn::replication::node_status::type> nodes;
auto r = sc->ddl_client->list_nodes(s, nodes);
if (r != dsn::ERR_OK) {
std::cout << "list nodes failed, error=" << r.to_string() << std::endl;
return true;
}
std::map<dsn::rpc_address, list_nodes_helper> tmp_map;
int alive_node_count = 0;
for (auto &kv : nodes) {
if (kv.second == dsn::replication::node_status::NS_ALIVE)
alive_node_count++;
std::string status_str = dsn::enum_to_string(kv.second);
status_str = status_str.substr(status_str.find("NS_") + 3);
std::string node_name = kv.first.to_std_string();
if (resolve_ip) {
// TODO: put hostname_from_ip_port into common utils
node_name = sc->ddl_client->hostname_from_ip_port(node_name.c_str());
}
tmp_map.emplace(kv.first, list_nodes_helper(node_name, status_str));
}
if (detailed) {
std::vector<::dsn::app_info> apps;
r = sc->ddl_client->list_apps(dsn::app_status::AS_AVAILABLE, apps);
if (r != dsn::ERR_OK) {
std::cout << "list apps failed, error=" << r.to_string() << std::endl;
return true;
}
for (auto &app : apps) {
int32_t app_id;
int32_t partition_count;
std::vector<dsn::partition_configuration> partitions;
r = sc->ddl_client->list_app(app.app_name, app_id, partition_count, partitions);
if (r != dsn::ERR_OK) {
std::cout << "list app " << app.app_name << " failed, error=" << r.to_string()
<< std::endl;
return true;
}
for (const dsn::partition_configuration &p : partitions) {
if (!p.primary.is_invalid()) {
auto find = tmp_map.find(p.primary);
if (find != tmp_map.end()) {
find->second.primary_count++;
}
}
for (const dsn::rpc_address &addr : p.secondaries) {
auto find = tmp_map.find(addr);
if (find != tmp_map.end()) {
find->second.secondary_count++;
}
}
}
}
}
if (resource_usage) {
std::vector<node_desc> nodes;
if (!fill_nodes(sc, "replica-server", nodes)) {
derror("get replica server node list failed");
return true;
}
::dsn::command command;
command.cmd = "perf-counters";
command.arguments.push_back(".*memused.res(MB)");
command.arguments.push_back(".*rdb.block_cache.memory_usage");
command.arguments.push_back(".*disk.available.total.ratio");
command.arguments.push_back(".*disk.available.min.ratio");
command.arguments.push_back(".*@.*");
std::vector<std::pair<bool, std::string>> results;
call_remote_command(sc, nodes, command, results);
for (int i = 0; i < nodes.size(); ++i) {
dsn::rpc_address node_addr = nodes[i].address;
auto tmp_it = tmp_map.find(node_addr);
if (tmp_it == tmp_map.end())
continue;
if (!results[i].first) {
derror("query perf counter info from node %s failed", node_addr.to_string());
return true;
}
dsn::perf_counter_info info;
dsn::blob bb(results[i].second.data(), 0, results[i].second.size());
if (!dsn::json::json_forwarder<dsn::perf_counter_info>::decode(bb, info)) {
derror("decode perf counter info from node %s failed, result = %s",
node_addr.to_string(),
results[i].second.c_str());
return true;
}
if (info.result != "OK") {
derror("query perf counter info from node %s returns error, error = %s",
node_addr.to_string(),
info.result.c_str());
return true;
}
list_nodes_helper &h = tmp_it->second;
for (dsn::perf_counter_metric &m : info.counters) {
if (m.name == "replica*server*memused.res(MB)")
h.memused_res_mb = m.value;
else if (m.name == "replica*app.pegasus*rdb.block_cache.memory_usage")
h.block_cache_bytes = m.value;
else if (m.name == "replica*eon.replica_stub*disk.available.total.ratio")
h.disk_available_total_ratio = m.value;
else if (m.name == "replica*eon.replica_stub*disk.available.min.ratio")
h.disk_available_min_ratio = m.value;
else {
int32_t app_id_x, partition_index_x;
std::string counter_name;
bool parse_ret = parse_app_pegasus_perf_counter_name(
m.name, app_id_x, partition_index_x, counter_name);
dassert(parse_ret, "name = %s", m.name.c_str());
if (counter_name == "rdb.memtable.memory_usage")
h.mem_tbl_bytes += m.value;
else if (counter_name == "rdb.index_and_filter_blocks.memory_usage")
h.mem_idx_bytes += m.value;
}
}
}
}
// print configuration_list_nodes_response
std::streambuf *buf;
std::ofstream of;
if (!output_file.empty()) {
of.open(output_file);
buf = of.rdbuf();
} else {
buf = std::cout.rdbuf();
}
std::ostream out(buf);
dsn::utils::table_printer tp;
tp.add_title("address");
tp.add_column("status");
if (detailed) {
tp.add_column("replica_count", tp_alignment::kRight);
tp.add_column("primary_count", tp_alignment::kRight);
tp.add_column("secondary_count", tp_alignment::kRight);
}
if (resource_usage) {
tp.add_column("memused_res_mb", tp_alignment::kRight);
tp.add_column("block_cache_mb", tp_alignment::kRight);
tp.add_column("mem_tbl_mb", tp_alignment::kRight);
tp.add_column("mem_idx_mb", tp_alignment::kRight);
tp.add_column("disk_avl_total_ratio", tp_alignment::kRight);
tp.add_column("disk_avl_min_ratio", tp_alignment::kRight);
}
for (auto &kv : tmp_map) {
tp.add_row(kv.second.node_name);
tp.append_data(kv.second.node_status);
if (detailed) {
tp.append_data(kv.second.primary_count + kv.second.secondary_count);
tp.append_data(kv.second.primary_count);
tp.append_data(kv.second.secondary_count);
}
if (resource_usage) {
tp.append_data(kv.second.memused_res_mb);
tp.append_data(kv.second.block_cache_bytes / (1 << 20U));
tp.append_data(kv.second.mem_tbl_bytes / (1 << 20U));
tp.append_data(kv.second.mem_idx_bytes / (1 << 20U));
tp.append_data(kv.second.disk_available_total_ratio);
tp.append_data(kv.second.disk_available_min_ratio);
}
}
tp.output(out);
out << std::endl;
dsn::utils::table_printer tp_count;
tp_count.add_row_name_and_data("total_node_count", nodes.size());
tp_count.add_row_name_and_data("alive_node_count", alive_node_count);
tp_count.add_row_name_and_data("unalive_node_count", nodes.size() - alive_node_count);
tp_count.output(out, ": ");
out << std::endl;
return true;
}
......@@ -3772,7 +3983,6 @@ inline bool app_stat(command_executor *e, shell_context *sc, arguments args)
sum.storage_count += row.storage_count;
sum.rdb_block_cache_hit_count += row.rdb_block_cache_hit_count;
sum.rdb_block_cache_total_count += row.rdb_block_cache_total_count;
sum.rdb_block_cache_mem_usage += row.rdb_block_cache_mem_usage;
sum.rdb_index_and_filter_blocks_mem_usage += row.rdb_index_and_filter_blocks_mem_usage;
sum.rdb_memtable_mem_usage += row.rdb_memtable_mem_usage;
}
......@@ -3808,8 +4018,9 @@ inline bool app_stat(command_executor *e, shell_context *sc, arguments args)
tp.add_column("rejected", tp_alignment::kRight);
tp.add_column("file_mb", tp_alignment::kRight);
tp.add_column("file_num", tp_alignment::kRight);
tp.add_column("mem_tbl_mb", tp_alignment::kRight);
tp.add_column("mem_idx_mb", tp_alignment::kRight);
tp.add_column("hit_rate", tp_alignment::kRight);
tp.add_column("rdb_mem_mb", tp_alignment::kRight);
}
for (row_data &row : rows) {
......@@ -3832,15 +4043,13 @@ inline bool app_stat(command_executor *e, shell_context *sc, arguments args)
tp.append_data(row.recent_write_throttling_reject_count);
tp.append_data(row.storage_mb);
tp.append_data((uint64_t)row.storage_count);
tp.append_data(row.rdb_memtable_mem_usage / (1 << 20U));
tp.append_data(row.rdb_index_and_filter_blocks_mem_usage / (1 << 20U));
double block_cache_hit_rate =
std::abs(row.rdb_block_cache_total_count) < 1e-6
? 0.0
: row.rdb_block_cache_hit_count / row.rdb_block_cache_total_count;
tp.append_data(block_cache_hit_rate);
tp.append_data((row.rdb_block_cache_mem_usage +
row.rdb_index_and_filter_blocks_mem_usage +
row.rdb_memtable_mem_usage) /
(1 << 20U));
}
}
tp.output(out);
......
......@@ -56,7 +56,8 @@ static command_executor commands[] = {
{
"nodes",
"get the node status for this cluster",
"[-d|--detailed] [-o|--output file_name] [-s|--status all|alive|unalive]",
"[-d|--detailed] [-r|--resolve_ip] [-u|--resource_usage] "
"[-o|--output file_name] [-s|--status all|alive|unalive]",
ls_nodes,
},
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册