monitor_ob.yaml 35.8 KB
Newer Older
W
wangzelin.wzl 已提交
1 2 3 4 5
obInputBasic: &obInputBasic
  plugin: mysqlTableInput
  config:
    timeout: 10s
    pluginConfig:
O
ob-robot 已提交
6
      collect_interval: ${monagent.second.metric.cache.update.interval}
W
wangzelin.wzl 已提交
7 8 9 10 11 12 13
      connection:
        url: ${monagent.ob.monitor.user}:${monagent.ob.monitor.password}@tcp(127.0.0.1:${monagent.ob.sql.port})/oceanbase?interpolateParams=true
        maxIdle: 2
        maxOpen: 32
      defaultConditionValues:
        ob_svr_ip: ${monagent.host.ip}
        ob_svr_port: ${monagent.ob.rpc.port}
O
ob-robot 已提交
14
        ob_is_rootservice: true
W
wangzelin.wzl 已提交
15
      collectConfig:
O
ob-robot 已提交
16 17
        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ with_rootserver from __all_server where svr_ip = ? and svr_port = ?
          name: ob_role
18
          sqlSlowThreshold: 100ms
O
ob-robot 已提交
19 20 21
          params: [ ob_svr_ip, ob_svr_port ]
          minObVersion: ~
          maxObVersion: 4.0.0.0
22 23
          conditionValues:
            ob_is_rootservice: with_rootserver
O
ob-robot 已提交
24 25 26 27 28
          enableCache: true
          cacheExpire: 1m
        - sql: select (case when with_rootserver='YES' then 1 else 0 end) as with_rootserver from DBA_OB_SERVERS where svr_ip = ? and svr_port = ?
          name: ob_role
          sqlSlowThreshold: 100ms
X
xueran 已提交
29
          params: [ ob_svr_ip, ob_svr_port ]
O
ob-robot 已提交
30 31 32 33 34 35
          minObVersion: 4.0.0.0
          maxObVersion: ~
          conditionValues:
            ob_is_rootservice: with_rootserver
          enableCache: true
          cacheExpire: 1m
X
xueran 已提交
36

O
ob-robot 已提交
37 38
        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ tenant_id, cache_name, cache_size from __all_virtual_kvcache_info where svr_ip = ? and svr_port = ?
          params: [ ob_svr_ip, ob_svr_port ]
X
xueran 已提交
39
          sqlSlowThreshold: 100ms
O
ob-robot 已提交
40 41 42 43 44 45 46
          name: ob_cache
          minObVersion: ~
          maxObVersion: 4.0.0.0
          tags:
            ob_tenant_id: tenant_id
            tenant_name: tenant_name
            cache_name: cache_name
X
xueran 已提交
47
          metrics:
O
ob-robot 已提交
48 49 50 51
            size_bytes: cache_size
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}
        - sql: select /* MONITOR_AGENT */ tenant_id, cache_name, cache_size from GV$OB_KVCACHE where svr_ip = ? and svr_port = ?
X
xueran 已提交
52
          params: [ ob_svr_ip, ob_svr_port ]
O
ob-robot 已提交
53
          sqlSlowThreshold: 100ms
W
wangzelin.wzl 已提交
54
          name: ob_cache
O
ob-robot 已提交
55 56
          minObVersion: 4.0.0.0
          maxObVersion: ~
W
wangzelin.wzl 已提交
57
          tags:
O
ob-robot 已提交
58
            ob_tenant_id: tenant_id
W
wangzelin.wzl 已提交
59 60 61 62
            tenant_name: tenant_name
            cache_name: cache_name
          metrics:
            size_bytes: cache_size
O
ob-robot 已提交
63 64 65 66
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ con_id tenant_id, stat_id, value from v$sysstat where stat_id IN (10000, 10001, 10002, 10003, 10004, 10005, 10006, 140002, 140003, 140005, 140006, 40030, 80040, 80041, 130000, 130001, 130002, 130004, 20000, 20001, 20002, 30000, 30001, 30002, 30005, 30006, 30007, 30008, 30009, 30010, 30011, 30012, 30013, 30080, 30081, 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, 40009, 40010, 40011, 40012, 40018, 40019, 40116, 40117, 40118, 50000, 50001, 50002, 50004, 50005, 50008, 50009, 50010, 50011, 50037, 50038, 60000, 60001, 60002, 60003, 60004, 60005, 60019, 60020, 60021, 60022, 60023, 60024, 80057, 120000, 120001, 120009, 120008) and (con_id > 1000 or con_id = 1) and class < 1000
W
wangzelin.wzl 已提交
67
          name: ob_sysstat
O
ob-robot 已提交
68 69 70
          sqlSlowThreshold: 100ms
          minObVersion: ~
          maxObVersion: 4.0.0.0
W
wangzelin.wzl 已提交
71
          tags:
O
ob-robot 已提交
72
            ob_tenant_id: tenant_id
W
wangzelin.wzl 已提交
73 74 75
            stat_id: stat_id
          metrics:
            value: value
O
ob-robot 已提交
76 77 78 79 80 81 82
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}
        - sql: select /* MONITOR_AGENT */ con_id tenant_id, stat_id, value from v$sysstat where stat_id IN (10000, 10001, 10002, 10003, 10004, 10005, 10006, 140002, 140003, 140005, 140006, 40030, 80040, 80041, 130000, 130001, 130002, 130004, 20000, 20001, 20002, 30000, 30001, 30002, 30005, 30006, 30007, 30008, 30009, 30010, 30011, 30012, 30013, 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, 40009, 40010, 40011, 40012, 40018, 40019, 50000, 50001, 60087, 50004, 50005, 50008, 50009, 50010, 50011, 50037, 50038, 60000, 60001, 60002, 60003, 60004, 60005, 60019, 60020, 60021, 60022, 60023, 60024, 80057, 120000, 120001, 120009, 120008) and (con_id > 1000 or con_id = 1) and class < 1000
          name: ob_sysstat
          sqlSlowThreshold: 100ms
          minObVersion: 4.0.0.0
          maxObVersion: ~
W
wangzelin.wzl 已提交
83
          tags:
O
ob-robot 已提交
84 85
            ob_tenant_id: tenant_id
            stat_id: stat_id
W
wangzelin.wzl 已提交
86
          metrics:
O
ob-robot 已提交
87
            value: value
W
wangzelin.wzl 已提交
88
          enableCache: true
O
ob-robot 已提交
89 90
          cacheExpire: ${monagent.second.metric.cache.update.interval}

W
wangzelin.wzl 已提交
91 92
        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ group_concat(svr_ip SEPARATOR ',') as servers, status, count(1) as cnt from __all_server group by status
          name: ob_server
O
ob-robot 已提交
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
          sqlSlowThreshold: 100ms
          minObVersion: ~
          maxObVersion: 4.0.0.0
          tags:
            server_ips: servers
            status: status
          metrics:
            num: cnt
          enableCache: true
          cacheExpire: 60s
        - sql: select /* MONITOR_AGENT */ group_concat(svr_ip SEPARATOR ',') as servers, status, count(1) as cnt from DBA_OB_SERVERS group by status
          name: ob_server
          sqlSlowThreshold: 100ms
          minObVersion: 4.0.0.0
          maxObVersion: ~
W
wangzelin.wzl 已提交
108 109 110 111 112 113 114
          tags:
            server_ips: servers
            status: status
          metrics:
            num: cnt
          enableCache: true
          cacheExpire: 60s
O
ob-robot 已提交
115 116

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(100000000) */ tenant_name, tenant_id, count(*)  as cnt from gv$table group by tenant_id
W
wangzelin.wzl 已提交
117
          name: ob_table
O
ob-robot 已提交
118 119 120 121
          sqlSlowThreshold: 100ms
          condition: ob_is_rootservice
          minObVersion: ~
          maxObVersion: 4.0.0.0
122
          tags:
O
ob-robot 已提交
123
            ob_tenant_id: tenant_id
124 125 126 127
          metrics:
            num: cnt
          enableCache: true
          cacheExpire: 1h
O
ob-robot 已提交
128
        - sql: select /*+ MONITOR_AGENT QUERY_TIMEOUT(100000000) */ con_id tenant_id, count(*) as cnt from CDB_TABLES group by con_id
129
          name: ob_table
O
ob-robot 已提交
130 131 132 133
          sqlSlowThreshold: 100ms
          condition: ob_is_rootservice
          minObVersion: 4.0.0.0
          maxObVersion: ~
W
wangzelin.wzl 已提交
134
          tags:
O
ob-robot 已提交
135
            ob_tenant_id: tenant_id
W
wangzelin.wzl 已提交
136 137 138 139
          metrics:
            num: cnt
          enableCache: true
          cacheExpire: 1h
O
ob-robot 已提交
140 141 142 143

        - sql: select  /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ case when cnt is null then 0 else cnt end as cnt, tenant_name, tenant_id from (select __all_tenant.tenant_name, __all_tenant.tenant_id, cnt from __all_tenant left join (select count(`state`='ACTIVE' OR NULL) as cnt, tenant as tenant_name from __all_virtual_processlist where svr_ip = ? and svr_port = ? group by tenant) t1 on __all_tenant.tenant_name = t1.tenant_name) t2
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
W
wangzelin.wzl 已提交
144
          name: ob_active_session
O
ob-robot 已提交
145 146
          minObVersion: ~
          maxObVersion: 4.0.0.0
W
wangzelin.wzl 已提交
147
          tags:
O
ob-robot 已提交
148
            ob_tenant_id: tenant_id
W
wangzelin.wzl 已提交
149 150 151
            tenant_name: tenant_name
          metrics:
            num: cnt
O
ob-robot 已提交
152 153 154 155 156 157 158 159
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}
        - sql: select /* MONITOR_AGENT */ case when cnt is null then 0 else cnt end as cnt, tenant_name, tenant_id from (select DBA_OB_TENANTS.tenant_name, DBA_OB_TENANTS.tenant_id, cnt from DBA_OB_TENANTS left join (select count(`state`='ACTIVE' OR NULL) as cnt, tenant as tenant_name from GV$OB_PROCESSLIST where svr_ip = ? and svr_port = ? group by tenant) t1 on DBA_OB_TENANTS.tenant_name = t1.tenant_name where DBA_OB_TENANTS.tenant_type<>'META') t2
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
          name: ob_active_session
          minObVersion: 4.0.0.0
          maxObVersion: ~
W
wangzelin.wzl 已提交
160
          tags:
O
ob-robot 已提交
161 162
            ob_tenant_id: tenant_id
            tenant_name: tenant_name
W
wangzelin.wzl 已提交
163
          metrics:
O
ob-robot 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
            num: cnt
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}

        - sql: select  /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ case when cnt is null then 0 else cnt end as cnt, tenant_name, tenant_id from (select __all_tenant.tenant_name, __all_tenant.tenant_id, cnt from __all_tenant left join (select count(1) as cnt, tenant as tenant_name from __all_virtual_processlist where svr_ip = ? and svr_port = ? group by tenant) t1 on __all_tenant.tenant_name = t1.tenant_name) t2
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
          name: ob_all_session
          minObVersion: ~
          maxObVersion: 4.0.0.0
          tags:
            ob_tenant_id: tenant_id
            tenant_name: tenant_name
          metrics:
            num: cnt
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}
        - sql: select /* MONITOR_AGENT */ case when cnt is null then 0 else cnt end as cnt, tenant_name, tenant_id from (select DBA_OB_TENANTS.tenant_name, DBA_OB_TENANTS.tenant_id, cnt from DBA_OB_TENANTS left join (select count(1) as cnt, tenant as tenant_name from GV$OB_PROCESSLIST where svr_ip = ? and svr_port = ? group by tenant) t1 on DBA_OB_TENANTS.tenant_name = t1.tenant_name where DBA_OB_TENANTS.tenant_type<>'META') t2
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
          name: ob_all_session
          minObVersion: 4.0.0.0
          maxObVersion: ~
187
          tags:
O
ob-robot 已提交
188
            ob_tenant_id: tenant_id
189
            tenant_name: tenant_name
O
ob-robot 已提交
190 191 192 193 194 195 196 197 198 199 200 201
          metrics:
            num: cnt
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ tenant_id, mem_used, access_count, hit_count from v$plan_cache_stat
          name: ob_plan_cache
          sqlSlowThreshold: 100ms
          minObVersion: ~
          maxObVersion: 4.0.0.0
          tags:
            ob_tenant_id: tenant_id
202 203 204 205
          metrics:
            memory_bytes: mem_used
            access_total: access_count
            hit_total: hit_count
O
ob-robot 已提交
206 207 208
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}
        - sql: select /* MONITOR_AGENT */ tenant_id, mem_used, access_count, hit_count from V$OB_PLAN_CACHE_STAT
X
xueran 已提交
209
          name: ob_plan_cache
O
ob-robot 已提交
210 211 212
          sqlSlowThreshold: 100ms
          minObVersion: 4.0.0.0
          maxObVersion: ~
W
wangzelin.wzl 已提交
213
          tags:
O
ob-robot 已提交
214
            ob_tenant_id: tenant_id
W
wangzelin.wzl 已提交
215 216 217 218
          metrics:
            memory_bytes: mem_used
            access_total: access_count
            hit_total: hit_count
O
ob-robot 已提交
219 220 221 222
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ con_id tenant_id, sum(total_waits) as total_waits, sum(time_waited_micro) / 1000000 as time_waited from v$system_event where v$system_event.wait_class <> 'IDLE' and (con_id > 1000 or con_id = 1) group by tenant_id
W
wangzelin.wzl 已提交
223
          name: ob_waitevent
O
ob-robot 已提交
224 225 226
          sqlSlowThreshold: 100ms
          minObVersion: ~
          maxObVersion: 4.0.0.0
W
wangzelin.wzl 已提交
227
          tags:
O
ob-robot 已提交
228
            ob_tenant_id: tenant_id
W
wangzelin.wzl 已提交
229 230 231
          metrics:
            wait_total: total_waits
            wait_seconds_total: time_waited
O
ob-robot 已提交
232 233 234 235 236 237 238
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}
        - sql: select /* MONITOR_AGENT */ con_id tenant_id, sum(total_waits) as total_waits, sum(time_waited_micro) / 1000000 as time_waited from v$system_event where v$system_event.wait_class <> 'IDLE' and (con_id > 1000 or con_id = 1) group by tenant_id
          name: ob_waitevent
          sqlSlowThreshold: 100ms
          minObVersion: 4.0.0.0
          maxObVersion: ~
W
wangzelin.wzl 已提交
239
          tags:
O
ob-robot 已提交
240
            ob_tenant_id: tenant_id
W
wangzelin.wzl 已提交
241
          metrics:
O
ob-robot 已提交
242 243 244 245
            wait_total: total_waits
            wait_seconds_total: time_waited
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}
W
wangzelin.wzl 已提交
246

O
ob-robot 已提交
247 248
        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ con_id tenant_id, case when event_id = 10000 then 'INTERNAL' when event_id = 13000 then 'SYNC_RPC' when event_id = 14003 then 'ROW_LOCK_WAIT' when (event_id >= 10001 and event_id <= 11006) or (event_id >= 11008 and event_id <= 11011) then 'IO' when event like 'latch:%' then 'LATCH' else 'OTHER' END event_group, sum(total_waits) as total_waits, sum(time_waited_micro / 1000000) as time_waited from v$system_event where v$system_event.wait_class <> 'IDLE' and (con_id > 1000 or con_id = 1) group by tenant_id, event_group
          name: ob_system_event
249
          sqlSlowThreshold: 100ms
O
ob-robot 已提交
250 251 252 253 254
          minObVersion: ~
          maxObVersion: 4.0.0.0
          tags:
            ob_tenant_id: tenant_id
            event_group: event_group
255
          metrics:
O
ob-robot 已提交
256 257 258 259 260 261
            total_waits: total_waits
            time_waited: time_waited
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}
        - sql: select /* MONITOR_AGENT */ con_id tenant_id, case when event_id = 10000 then 'INTERNAL' when event_id = 13000 then 'SYNC_RPC' when event_id = 14003 then 'ROW_LOCK_WAIT' when (event_id >= 10001 and event_id <= 11006) or (event_id >= 11008 and event_id <= 11011) then 'IO' when event like 'latch:%' then 'LATCH' else 'OTHER' END event_group, sum(total_waits) as total_waits, sum(time_waited_micro / 1000000) as time_waited from v$system_event where v$system_event.wait_class <> 'IDLE' and (con_id > 1000 or con_id = 1) group by tenant_id, event_group
          name: ob_system_event
X
xueran 已提交
262
          sqlSlowThreshold: 100ms
O
ob-robot 已提交
263 264 265 266 267
          minObVersion: 4.0.0.0
          maxObVersion: ~
          tags:
            ob_tenant_id: tenant_id
            event_group: event_group
X
xueran 已提交
268
          metrics:
O
ob-robot 已提交
269 270 271 272 273 274
            total_waits: total_waits
            time_waited: time_waited
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ cpu_total, cpu_assigned, mem_total, mem_assigned,disk_total, cpu_assigned_percent, mem_assigned_percent from __all_virtual_server_stat where svr_ip = ? and svr_port = ?
X
xueran 已提交
275
          params: [ ob_svr_ip, ob_svr_port ]
O
ob-robot 已提交
276 277 278 279
          sqlSlowThreshold: 100ms
          name: ob_server_resource
          minObVersion: ~
          maxObVersion: 2.0.0
W
wangzelin.wzl 已提交
280
          metrics:
O
ob-robot 已提交
281 282 283 284 285 286 287
            cpu: cpu_total
            cpu_assigned: cpu_assigned
            memory_bytes: mem_total
            memory_assigned_bytes: mem_assigned
            disk_bytes: disk_total
            cpu_assigned_percent: cpu_assigned_percent
            memory_assigned_percent: mem_assigned_percent
W
wangzelin.wzl 已提交
288
          enableCache: true
O
ob-robot 已提交
289 290 291 292
          cacheExpire: 60s
        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ cpu_total,cpu_max_assigned as cpu_assigned,mem_total,mem_max_assigned as mem_assigned,disk_total, cpu_assigned_percent, mem_assigned_percent from __all_virtual_server_stat where svr_ip = ? and svr_port = ?
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
W
wangzelin.wzl 已提交
293
          name: ob_server_resource
O
ob-robot 已提交
294 295
          minObVersion: 2.0.0
          maxObVersion: 4.0.0.0
W
wangzelin.wzl 已提交
296 297 298 299 300 301 302 303 304 305
          metrics:
            cpu: cpu_total
            cpu_assigned: cpu_assigned
            memory_bytes: mem_total
            memory_assigned_bytes: mem_assigned
            disk_bytes: disk_total
            cpu_assigned_percent: cpu_assigned_percent
            memory_assigned_percent: mem_assigned_percent
          enableCache: true
          cacheExpire: 60s
C
chris-sun-star 已提交
306
        - sql: select /* MONITOR_AGENT */ cpu_capacity_max as cpu_total,cpu_assigned_max as cpu_assigned,mem_capacity as mem_total,mem_assigned as mem_assigned,data_disk_capacity as disk_total, (cpu_assigned_max / cpu_capacity_max) as cpu_assigned_percent, (mem_assigned / mem_capacity) as mem_assigned_percent from GV$OB_SERVERS where svr_ip = ? and svr_port = ?
O
ob-robot 已提交
307 308
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
C
chris-sun-star 已提交
309
          name: ob_server_resource
O
ob-robot 已提交
310 311
          minObVersion: 4.0.0.0
          maxObVersion: ~
C
chris-sun-star 已提交
312 313 314 315 316 317 318 319 320 321
          metrics:
            cpu: cpu_total
            cpu_assigned: cpu_assigned
            memory_bytes: mem_total
            memory_assigned_bytes: mem_assigned
            disk_bytes: disk_total
            cpu_assigned_percent: cpu_assigned_percent
            memory_assigned_percent: mem_assigned_percent
          enableCache: true
          cacheExpire: 60s
O
ob-robot 已提交
322 323 324 325 326 327 328 329 330

        - sql: SELECT /*+read_consistency(weak) */ COALESCE(tenant_id, -1) as tenant_id, tenant_name, SUM(max_cpu) AS max_cpu, SUM(min_cpu) AS min_cpu, SUM(max_memory) AS max_memory, SUM(min_memory) AS min_memory FROM v$unit GROUP BY tenant_id
          sqlSlowThreshold: 100ms
          name: ob_tenant_resource
          minObVersion: ~
          maxObVersion: 4.0.0.0
          tags:
            tenant_name: tenant_name
            ob_tenant_id: tenant_id
W
wangzelin.wzl 已提交
331
          metrics:
O
ob-robot 已提交
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
            max_cpu: max_cpu
            min_cpu: min_cpu
            max_memory: max_memory
            min_memory: min_memory
          enableCache: true
          cacheExpire: 60s
        - sql: select coalesce(t1.tenant_id, -1) as tenant_id, tenant_name, sum(max_cpu) as max_cpu, sum(min_cpu) as min_cpu, sum(max_memory) as max_memory, sum(min_memory) as min_memory from (select t1.unit_id, t1.svr_ip, t1.svr_port, t2.tenant_id, t1.min_cpu, t1.max_cpu, t1.min_memory, t1.max_memory from (select  unit_id, svr_ip, svr_port, sum(min_cpu) as min_cpu, sum(max_cpu) as max_cpu, sum(memory_size) as min_memory, sum(memory_size) as max_memory from v$ob_units  group by unit_id ) t1 join dba_ob_units t2 on t1.unit_id = t2.unit_id) t1 join dba_ob_tenants t2 on t1.tenant_id = t2.tenant_id where tenant_type <>'meta' group by tenant_id
          sqlSlowThreshold: 100ms
          name: ob_tenant_resource
          minObVersion: 4.0.0.0
          maxObVersion: ~
          tags:
            tenant_name: tenant_name
            ob_tenant_id: tenant_id
          metrics:
            max_cpu: max_cpu
            min_cpu: min_cpu
            max_memory: max_memory
            min_memory: min_memory
          enableCache: true
          cacheExpire: 60s

        - sql: SELECT t2.tenant_id, t2.tenant_name, t1.cpu_total, t1.cpu_assigned, t1.mem_total, t1.mem_assigned FROM __all_virtual_server_stat t1 JOIN (SELECT tenant_id, tenant_name, svr_ip, svr_port FROM `gv$unit`) t2 ON t1.svr_ip=t2.svr_ip AND t1.svr_port=t2.svr_port WHERE t1.svr_ip = ? AND t1.svr_port = ?
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
          name: ob_tenant_assigned
          minObVersion: ~
          maxObVersion: 2.0.0
          metrics:
            cpu_total: cpu_total
            cpu_assigned: cpu_assigned
            mem_total: mem_total
            mem_assigned: mem_assigned
          tags:
            tenant_name: tenant_name
            ob_tenant_id: tenant_id
          enableCache: true
          cacheExpire: 60s
        - sql: select /* MONITOR_AGENT */ t4.tenant_id, t4.tenant_name, cpu_capacity as cpu_total,cpu_assigned,mem_capacity as mem_total,mem_assigned as mem_assigned from GV$OB_SERVERS t1 JOIN (SELECT t2.tenant_id, t3.tenant_name, t2.svr_ip, t2.svr_port FROM gv$ob_units t2 left join DBA_OB_TENANTS t3 on t2.tenant_id=t3.tenant_id WHERE T3.tenant_type<>'META') t4 ON t1.svr_ip=t4.svr_ip AND t1.svr_port=t4.svr_port WHERE t1.svr_ip = ? AND t1.svr_port = ?
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
          name: ob_tenant_assigned
          minObVersion: 4.0.0.0
          maxObVersion: ~
          metrics:
            cpu_total: cpu_total
            cpu_assigned: cpu_assigned
            mem_total: mem_total
            mem_assigned: mem_assigned
          tags:
            tenant_name: tenant_name
            ob_tenant_id: tenant_id
          enableCache: true
          cacheExpire: 60s

        - sql: SELECT t2.tenant_id, t2.tenant_name, t1.total_size FROM __all_virtual_disk_stat t1 JOIN (SELECT tenant_id, tenant_name, svr_ip, svr_port FROM `gv$unit`) t2 ON t1.svr_ip=t2.svr_ip AND t1.svr_port=t2.svr_port WHERE t1.svr_ip = ? AND t1.svr_port = ?
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
          name: ob_tenant_disk
          minObVersion: ~
          maxObVersion: 4.0.0.0
          tags:
            tenant_name: tenant_name
            ob_tenant_id: tenant_id
          metrics:
            total_size: total_size
          enableCache: true
          cacheExpire: 60s
        - sql: select coalesce(t1.tenant_id, -1) as tenant_id, tenant_name, sum(data_disk_in_use) as data_disk_in_use, sum(log_disk_in_use) as log_disk_in_use from (select t1.unit_id, t1.svr_ip, t1.svr_port, t2.tenant_id, t1.data_disk_in_use, t1.log_disk_in_use from (select  unit_id, svr_ip, svr_port, sum(data_disk_in_use) as data_disk_in_use, sum(log_disk_in_use) as log_disk_in_use from v$ob_units  group by unit_id ) t1 join dba_ob_units t2 on t1.unit_id = t2.unit_id) t1 join dba_ob_tenants t2 on t1.tenant_id = t2.tenant_id where tenant_type <>'meta' group by tenant_id
          sqlSlowThreshold: 100ms
          name: ob_tenant_disk
          minObVersion: 4.0.0.0
          maxObVersion: ~
          tags:
            tenant_name: tenant_name
            ob_tenant_id: tenant_id
          metrics:
            data_size: data_disk_in_use
            log_size: log_disk_in_use
          enableCache: true
          cacheExpire: 60s

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ total_size, free_size from __all_virtual_disk_stat where svr_ip = ? and svr_port = ?
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
          name: ob_disk
          minObVersion: ~
          maxObVersion: 4.0.0.0
          metrics:
            total_bytes: total_size
            free_bytes: free_size
          enableCache: true
          cacheExpire: 60s
        - sql: select /* MONITOR_AGENT */ data_disk_capacity as total_size, (data_disk_capacity - data_disk_in_use) as free_size from GV$OB_SERVERS where svr_ip = ? and svr_port = ?
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
          name: ob_disk
          minObVersion: 4.0.0.0
          maxObVersion: ~
          metrics:
            total_bytes: total_size
            free_bytes: free_size
          enableCache: true
          cacheExpire: ${monagent.second.metric.cache.update.interval}

obInputExtra: &obInputExtra
  plugin: mysqlTableInput
  config:
    timeout: 10s
    pluginConfig:
      collect_interval: ${monagent.collector.ob.extra.interval}
      connection:
        url: ${monagent.ob.monitor.user}:${monagent.ob.monitor.password}@tcp(127.0.0.1:${monagent.ob.sql.port})/oceanbase?interpolateParams=true
        maxIdle: 2
        maxOpen: 32
      defaultConditionValues:
        ob_svr_ip: ${monagent.host.ip}
        ob_svr_port: ${monagent.ob.rpc.port}
        ob_is_rootservice: true
      collectConfig:
        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ with_rootserver from __all_server where svr_ip = ? and svr_port = ?
          name: ob_role
          sqlSlowThreshold: 100ms
          params: [ ob_svr_ip, ob_svr_port ]
          minObVersion: ~
          maxObVersion: 4.0.0.0
          conditionValues:
            ob_is_rootservice: with_rootserver
          enableCache: true
          cacheExpire: 1m
        - sql: select (case when with_rootserver='YES' then 1 else 0 end) as with_rootserver from DBA_OB_SERVERS where svr_ip = ? and svr_port = ?
          name: ob_role
          sqlSlowThreshold: 100ms
          params: [ ob_svr_ip, ob_svr_port ]
          minObVersion: 4.0.0.0
          maxObVersion: ~
          conditionValues:
            ob_is_rootservice: with_rootserver
          enableCache: true
          cacheExpire: 1m

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ count(*) as cnt from v$unit
          name: ob_unit
          sqlSlowThreshold: 100ms
          minObVersion: ~
          maxObVersion: 4.0.0.0
          metrics:
            num: cnt
          enableCache: true
          cacheExpire: 60s
        - sql: select /* MONITOR_AGENT */ count(*) as cnt from V$OB_UNITS
          name: ob_unit
          sqlSlowThreshold: 100ms
          minObVersion: 4.0.0.0
          maxObVersion: ~
          metrics:
            num: cnt
          enableCache: true
          cacheExpire: 60s

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(100000000) */ count(*) as cnt from gv$table where table_type in (5) and index_status in (5, 6)
W
wangzelin.wzl 已提交
493
          name: ob_index
O
ob-robot 已提交
494 495 496 497
          sqlSlowThreshold: 100ms
          condition: ob_is_rootservice
          minObVersion: ~
          maxObVersion: 4.0.0.0
498 499
          metrics:
            error_num: cnt
O
ob-robot 已提交
500 501 502
          enableCache: true
          cacheExpire: 60s
        - sql: select /*+ MONITOR_AGENT QUERY_TIMEOUT(100000000) */ count(*) as cnt from CDB_INDEXES where status in ('ERROR','UNUSABLE')
503
          name: ob_index
O
ob-robot 已提交
504 505 506 507
          sqlSlowThreshold: 100ms
          condition: ob_is_rootservice
          minObVersion: 4.0.0.0
          maxObVersion: ~
W
wangzelin.wzl 已提交
508 509
          metrics:
            error_num: cnt
O
ob-robot 已提交
510 511 512 513 514 515
          enableCache: true
          cacheExpire: 60s

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ tenant_id,  active, total, freeze_trigger, freeze_cnt from gv$memstore where ip = ? and port = ?
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
W
wangzelin.wzl 已提交
516
          name: ob_memstore
O
ob-robot 已提交
517 518
          minObVersion: ~
          maxObVersion: 4.0.0.0
C
chris-sun-star 已提交
519
          tags:
O
ob-robot 已提交
520
            ob_tenant_id: tenant_id
C
chris-sun-star 已提交
521 522 523 524 525
          metrics:
            active_bytes: active
            total_bytes: total
            freeze_trigger_bytes: freeze_trigger
            freeze_times: freeze_cnt
O
ob-robot 已提交
526 527 528 529 530
          enableCache: true
          cacheExpire: 60s
        - sql: select /* MONITOR_AGENT */ tenant_id, active_span as active, memstore_used as total, freeze_trigger, freeze_cnt from GV$OB_MEMSTORE  where svr_ip = ? and svr_port = ?
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
C
chris-sun-star 已提交
531
          name: ob_memstore
O
ob-robot 已提交
532 533
          minObVersion: 4.0.0.0
          maxObVersion: ~
W
wangzelin.wzl 已提交
534
          tags:
O
ob-robot 已提交
535
            ob_tenant_id: tenant_id
W
wangzelin.wzl 已提交
536 537 538 539 540
          metrics:
            active_bytes: active
            total_bytes: total
            freeze_trigger_bytes: freeze_trigger
            freeze_times: freeze_cnt
O
ob-robot 已提交
541 542 543 544 545 546
          enableCache: true
          cacheExpire: 60s

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ sum(hold) as hold, sum(used) as used from __all_virtual_memory_info where tenant_id = 500 and svr_ip = ? and svr_port = ? and mod_name <> 'OB_KVSTORE_CACHE_MB'
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
W
wangzelin.wzl 已提交
547
          name: ob_tenant500_memory
O
ob-robot 已提交
548 549
          minObVersion: ~
          maxObVersion: 4.0.0.0
W
wangzelin.wzl 已提交
550 551 552
          metrics:
            hold_bytes: hold
            used_bytes: used
O
ob-robot 已提交
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
          enableCache: true
          cacheExpire: 60s
        - sql: select /* MONITOR_AGENT */ sum(hold) as hold, sum(used) as used from GV$OB_MEMORY where tenant_id = 500 and svr_ip = ? and svr_port = ? and MOD_NAME <> 'KvstorCacheMb'
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
          name: ob_tenant500_memory
          minObVersion: 4.0.0.0
          maxObVersion: ~
          metrics:
            hold_bytes: hold
            used_bytes: used
          enableCache: true
          cacheExpire: 60s

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ tenant_id, __all_unit_config.name, max_cpu, min_cpu, max_memory, min_memory, max_iops, min_iops from __all_resource_pool, __all_unit_config, __all_unit  where __all_resource_pool.unit_config_id = __all_unit_config.unit_config_id and __all_unit.resource_pool_id = __all_resource_pool.resource_pool_id and __all_unit.svr_ip = ?
          params: [ ob_svr_ip ]
          sqlSlowThreshold: 100ms
W
wangzelin.wzl 已提交
570
          name: ob_unit_config
O
ob-robot 已提交
571 572
          minObVersion: ~
          maxObVersion: 4.0.0.0
W
wangzelin.wzl 已提交
573
          tags:
O
ob-robot 已提交
574
            ob_tenant_id: tenant_id
W
wangzelin.wzl 已提交
575 576 577 578 579 580 581 582
            unit_config_name: name
          metrics:
            max_cpu: max_cpu
            min_cpu: min_cpu
            max_memory_bytes: max_memory
            max_iops: max_iops
            min_iops: min_iops
            max_session_num: max_session_num
O
ob-robot 已提交
583 584 585 586 587
          enableCache: true
          cacheExpire: 60s
        - sql: select /* MONITOR_AGENT */ DBA_OB_RESOURCE_POOLS.tenant_id, DBA_OB_UNIT_CONFIGS.name, DBA_OB_UNIT_CONFIGS.max_cpu, DBA_OB_UNIT_CONFIGS.min_cpu, DBA_OB_UNIT_CONFIGS.memory_size as max_memory, DBA_OB_UNIT_CONFIGS.max_iops, DBA_OB_UNIT_CONFIGS.min_iops from DBA_OB_RESOURCE_POOLS, DBA_OB_UNIT_CONFIGS, DBA_OB_UNITS  where DBA_OB_RESOURCE_POOLS.unit_config_id = DBA_OB_UNIT_CONFIGS.unit_config_id and DBA_OB_UNITS.resource_pool_id = DBA_OB_RESOURCE_POOLS.resource_pool_id and DBA_OB_UNITS.svr_ip = ?
          params: [ob_svr_ip]
          sqlSlowThreshold: 100ms
C
chris-sun-star 已提交
588
          name: ob_unit_config
O
ob-robot 已提交
589 590
          minObVersion: 4.0.0.0
          maxObVersion: ~
C
chris-sun-star 已提交
591
          tags:
O
ob-robot 已提交
592
            ob_tenant_id: tenant_id
C
chris-sun-star 已提交
593 594 595 596 597 598 599 600 601 602
            unit_config_name: name
          metrics:
            max_cpu: max_cpu
            min_cpu: min_cpu
            max_memory_bytes: max_memory
            min_memory_bytes: min_memory
            max_iops: max_iops
            min_iops: min_iops
          enableCache: true
          cacheExpire: 60s
O
ob-robot 已提交
603 604 605 606 607 608 609

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(100000000) */ tenant_id, 1 as role, case when cnt is null then 0 else cnt end as cnt from (select tenant_id, count(*) as cnt from __all_virtual_partition_info where svr_ip = ? and svr_port = ? group by tenant_id)
          params: [ ob_svr_ip, ob_svr_port ]
          sqlSlowThreshold: 100ms
          name: ob_partition
          minObVersion: ~
          maxObVersion: 4.0.0.0
W
wangzelin.wzl 已提交
610
          tags:
O
ob-robot 已提交
611 612 613 614 615 616 617 618 619 620 621 622 623 624
            ob_tenant_id: tenant_id
            role: role
          metrics:
            num: cnt
          enableCache: true
          cacheExpire: 1h

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ zone, name, value, time_to_usec(now()) as current from __all_zone
          name: ob_zone
          sqlSlowThreshold: 100ms
          minObVersion: ~
          maxObVersion: 4.0.0.0
          tags:
            zone: zone
W
wangzelin.wzl 已提交
625
            name: name
O
ob-robot 已提交
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647
          condition: ob_is_rootservice
          metrics:
            stat: value
            current_timestamp: current
          enableCache: true
          cacheExpire: 60s

        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ __all_tenant.tenant_name as tenant_name, cast(v_acc_response_time.response_time / 1000000 as float) as response_time_seconds,  v_acc_response_time.count as bucket, case when  v_acc_response_time.response_time = (select max(response_time) from __all_virtual_query_response_time) then v_acc_response_time.count else null end as count, case when  v_acc_response_time.response_time = (select max(response_time) from __all_virtual_query_response_time) then cast(v_acc_response_time.sum / 1000000 as float) else null end as sum from (select b.tenant_id, b.response_time  as response_time, sum(a.count) as count, sum(a.total) as sum from  __all_virtual_query_response_time a,  __all_virtual_query_response_time b where a.response_time <= b.response_time and a.svr_ip = b.svr_ip and a.svr_port = b.svr_port and b.svr_ip = ? and b.svr_port = ? group by b.tenant_id, b.response_time) v_acc_response_time,  __all_tenant where v_acc_response_time.tenant_id = __all_tenant.tenant_id;
          params: [ob_svr_ip, ob_svr_port]
          name: ob_query_response_time_seconds
          sqlSlowThreshold: 100ms
          minObVersion: ~
          maxObVersion: 4.0.0.0
          tags:
            tenant_name: tenant_name
            le: response_time_seconds
          metrics:
            bucket: bucket
            count: count
            sum: sum
          enableCache: true
          cacheExpire: 60s
W
wangzelin.wzl 已提交
648

O
ob-robot 已提交
649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
        - sql: select /*+ MONITOR_AGENT READ_CONSISTENCY(WEAK) */ count(1) as cnt from __all_virtual_trans_stat where part_trans_action > 2 and ctx_create_time < date_sub(now(), interval 600 second) and svr_ip = ? and svr_port = ?
          params: [ob_svr_ip, ob_svr_port]
          name: ob_trans
          sqlSlowThreshold: 100ms
          condition: ob_is_rootservice
          minObVersion: ~
          maxObVersion: 4.0.0.0
          metrics:
            expire_num: cnt
          enableCache: true
          cacheExpire: 60s

jointableProcessor: &jointableProcessor
  plugin: jointable
  config:
    timeout: 10s
    pluginConfig:
      connection:
        url: ${monagent.ob.monitor.user}:${monagent.ob.monitor.password}@tcp(127.0.0.1:${monagent.ob.sql.port})/oceanbase?interpolateParams=true
        maxIdle: 2
        maxOpen: 32
      joinTableConfigs:
        - queryDataConfigs:
            - querySql: SELECT tenant_id ob_tenant_id, tenant_name FROM __all_tenant
              queryArgs: [ ]
              minOBVersion: ~
              maxOBVersion: 4.0.0.0
          cacheExpire: 3m
          conditions:
            - metrics: [ "ob_cache", "ob_partition", "ob_table", "ob_plan_cache", "ob_waitevent", "ob_system_event", "ob_tenant_task", "ob_clog", "ob_memstore", "ob_compaction", "ob_unit_config", "ob_tenant", "ob_tenant_server", "ob_tenant_server_available" ]
              tagNames: [ "ob_tenant_id" ]
              removeNotMatchedTagValueMessage: true
        - queryDataConfigs:
            - querySql: SELECT tenant_id ob_tenant_id, tenant_name FROM DBA_OB_TENANTS WHERE tenant_type<>'META'
              queryArgs: [ ]
              minOBVersion: 4.0.0.0
              maxOBVersion: ~
          cacheExpire: 3m
          conditions:
            - metrics: [ "ob_cache", "ob_partition", "ob_table", "ob_plan_cache", "ob_waitevent", "ob_system_event", "ob_tenant_task", "ob_clog", "ob_memstore", "ob_compaction", "ob_unit_config" ]
              tagNames: [ "ob_tenant_id" ]
              removeNotMatchedTagValueMessage: true
        - queryDataConfigs:
            - querySql: SELECT tenant_id ob_tenant_id, tenant_name FROM __all_tenant
              queryArgs: [ ]
              minOBVersion: ~
              maxOBVersion: 4.0.0.0
          cacheExpire: 3m
          conditions:
            - metrics: [ "ob_sysstat" ]
              tagNames: [ "ob_tenant_id" ]
              removeNotMatchedTagValueMessage: false # contain internal tenant, do not remove
        - queryDataConfigs:
            - querySql: SELECT tenant_id ob_tenant_id, tenant_name FROM DBA_OB_TENANTS WHERE tenant_type<>'META'
              queryArgs: [ ]
              minOBVersion: 4.0.0.0
              maxOBVersion: ~
          cacheExpire: 3m
          conditions:
            - metrics: [ "ob_sysstat" ]
              tagNames: [ "ob_tenant_id" ]
              removeNotMatchedTagValueMessage: false # contain internal tenant, do not remove

obRetagProcessor: &obRetagProcessor
W
wangzelin.wzl 已提交
713 714
  plugin: retagProcessor
  config:
O
ob-robot 已提交
715
    timeout: 10s
W
wangzelin.wzl 已提交
716 717 718 719 720
    pluginConfig:
      newTags:
        app: OB
        obzone: ${monagent.ob.zone.name}
        svr_ip: ${monagent.host.ip}
O
ob-robot 已提交
721
        svr_port: ${monagent.ob.rpc.port}
W
wangzelin.wzl 已提交
722
        ob_cluster_name: ${monagent.ob.cluster.name}
O
ob-robot 已提交
723
        ob_cluster_id: ${monagent.ob.cluster.id}
W
wangzelin.wzl 已提交
724

O
ob-robot 已提交
725
basicExporter: &basicExporter
W
wangzelin.wzl 已提交
726 727
  plugin: prometheusExporter
  config:
O
ob-robot 已提交
728 729 730 731 732 733 734 735 736
    timeout: 10s
    pluginConfig:
      formatType: fmtText
      exposeUrl: /metrics/ob/basic

extraExporter: &extraExporter
  plugin: prometheusExporter
  config:
    timeout: 10s
W
wangzelin.wzl 已提交
737 738
    pluginConfig:
      formatType: fmtText
O
ob-robot 已提交
739
      exposeUrl: /metrics/ob/extra
W
wangzelin.wzl 已提交
740 741 742 743

modules:
  - module: monitor.ob
    moduleType: monagent.pipeline
O
ob-robot 已提交
744
    process: ob_monagent
W
wangzelin.wzl 已提交
745 746 747 748 749 750
    config:
      name: monitor.ob
      status: ${monagent.pipeline.ob.status}
      pipelines:
        - name: ob_basic
          config:
O
ob-robot 已提交
751
            scheduleStrategy: bySource
W
wangzelin.wzl 已提交
752 753 754 755
          structure:
            inputs:
              - <<: *obInputBasic
            processors:
O
ob-robot 已提交
756 757
              - <<: *obRetagProcessor
              - <<: *jointableProcessor
W
wangzelin.wzl 已提交
758
            exporter:
O
ob-robot 已提交
759
              <<: *basicExporter
W
wangzelin.wzl 已提交
760 761
        - name: ob_extra
          config:
O
ob-robot 已提交
762
            scheduleStrategy: bySource
W
wangzelin.wzl 已提交
763 764 765 766
          structure:
            inputs:
              - <<: *obInputExtra
            processors:
O
ob-robot 已提交
767 768
              - <<: *obRetagProcessor
              - <<: *jointableProcessor
W
wangzelin.wzl 已提交
769
            exporter:
O
ob-robot 已提交
770
              <<: *extraExporter