diff --git a/changelogs/unreleased/pawel-reduce_cardinality_of_prometheus_metrics.yml b/changelogs/unreleased/pawel-reduce_cardinality_of_prometheus_metrics.yml new file mode 100644 index 0000000000000000000000000000000000000000..0cee0b634d63f1065777c76ca86e56d068d733ef --- /dev/null +++ b/changelogs/unreleased/pawel-reduce_cardinality_of_prometheus_metrics.yml @@ -0,0 +1,5 @@ +--- +title: Reduce the number of buckets in gitlab_cache_operation_duration_seconds metric +merge_request: 15881 +author: +type: changed diff --git a/lib/gitlab/metrics/influx_db.rb b/lib/gitlab/metrics/influx_db.rb index 6ea132fc5bf80605b429fee92a72f44b59af9cce..877cebf6786a6b73fefb2839e0229fc7ca1ce99b 100644 --- a/lib/gitlab/metrics/influx_db.rb +++ b/lib/gitlab/metrics/influx_db.rb @@ -38,6 +38,7 @@ module Gitlab # This is memoized since this method is called for every instrumented # method. Loading data from an external cache on every method call slows # things down too much. + # in milliseconds @method_call_threshold ||= settings[:method_call_threshold] end diff --git a/lib/gitlab/metrics/method_call.rb b/lib/gitlab/metrics/method_call.rb index 9112164f22ec93004a2def1bbc4e2f0d0112857b..329b07af5db384d8a0e293b851ccdba7a3550002 100644 --- a/lib/gitlab/metrics/method_call.rb +++ b/lib/gitlab/metrics/method_call.rb @@ -35,8 +35,8 @@ module Gitlab @transaction = transaction @name = name @labels = { module: @module_name, method: @method_name } - @real_time = 0 - @cpu_time = 0 + @real_time = 0.0 + @cpu_time = 0.0 @call_count = 0 end @@ -54,7 +54,7 @@ module Gitlab @call_count += 1 if call_measurement_enabled? && above_threshold? - self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time / 1000.0) + self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time) end retval @@ -65,8 +65,8 @@ module Gitlab Metric.new( Instrumentation.series, { - duration: real_time, - cpu_duration: cpu_time, + duration: real_time.in_milliseconds.to_i, + cpu_duration: cpu_time.in_milliseconds.to_i, call_count: call_count }, method: @name @@ -76,7 +76,7 @@ module Gitlab # Returns true if the total runtime of this method exceeds the method call # threshold. def above_threshold? - real_time >= Metrics.method_call_threshold + real_time.in_milliseconds >= Metrics.method_call_threshold end def call_measurement_enabled? diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb index b68800417a251e1416839b1946b002384b13bfab..4e1ea62351f21a0df970a7196f4341e7ab87f96c 100644 --- a/lib/gitlab/metrics/samplers/ruby_sampler.rb +++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb @@ -52,7 +52,7 @@ module Gitlab metrics[:memory_usage].set(labels, System.memory_usage) metrics[:file_descriptors].set(labels, System.file_descriptor_count) - metrics[:sampler_duration].observe(labels.merge(worker_label), (System.monotonic_time - start_time) / 1000.0) + metrics[:sampler_duration].observe(labels.merge(worker_label), System.monotonic_time - start_time) ensure GC::Profiler.clear end diff --git a/lib/gitlab/metrics/subscribers/rails_cache.rb b/lib/gitlab/metrics/subscribers/rails_cache.rb index efd3c9daf7934fa175fea0ad7874fb0c47965938..250897a79c2984e6f78bda5ebd6f18d071d60115 100644 --- a/lib/gitlab/metrics/subscribers/rails_cache.rb +++ b/lib/gitlab/metrics/subscribers/rails_cache.rb @@ -66,7 +66,7 @@ module Gitlab :gitlab_cache_operation_duration_seconds, 'Cache access time', Transaction::BASE_LABELS.merge({ action: nil }), - [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] + [0.001, 0.01, 0.1, 1, 10] ) end diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb index c2cbd3c16a162a4128636e83a7f515f7682a6b80..e60e245cf89709354ec6bf5d530acbfd5f37c03e 100644 --- a/lib/gitlab/metrics/system.rb +++ b/lib/gitlab/metrics/system.rb @@ -35,27 +35,27 @@ module Gitlab if Process.const_defined?(:CLOCK_THREAD_CPUTIME_ID) def self.cpu_time Process - .clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :millisecond) + .clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :float_second) end else def self.cpu_time Process - .clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :millisecond) + .clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :float_second) end end # Returns the current real time in a given precision. # - # Returns the time as a Fixnum. - def self.real_time(precision = :millisecond) + # Returns the time as a Float for precision = :float_second. + def self.real_time(precision = :float_second) Process.clock_gettime(Process::CLOCK_REALTIME, precision) end - # Returns the current monotonic clock time in a given precision. + # Returns the current monotonic clock time as seconds with microseconds precision. # - # Returns the time as a Fixnum. - def self.monotonic_time(precision = :millisecond) - Process.clock_gettime(Process::CLOCK_MONOTONIC, precision) + # Returns the time as a Float. + def self.monotonic_time + Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second) end end end diff --git a/lib/gitlab/metrics/transaction.rb b/lib/gitlab/metrics/transaction.rb index ee3afc5ffdb1f9f1c4d54670bfdd1bac1099457e..e7975c023a9ace0b1b8303dfdf5bae172b287c9d 100644 --- a/lib/gitlab/metrics/transaction.rb +++ b/lib/gitlab/metrics/transaction.rb @@ -35,6 +35,10 @@ module Gitlab @finished_at ? (@finished_at - @started_at) : 0.0 end + def duration_milliseconds + duration.in_milliseconds.to_i + end + def allocated_memory @memory_after - @memory_before end @@ -50,7 +54,7 @@ module Gitlab @memory_after = System.memory_usage @finished_at = System.monotonic_time - self.class.metric_transaction_duration_seconds.observe(labels, duration * 1000) + self.class.metric_transaction_duration_seconds.observe(labels, duration) self.class.metric_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0) Thread.current[THREAD_KEY] = nil @@ -97,7 +101,7 @@ module Gitlab end def track_self - values = { duration: duration, allocated_memory: allocated_memory } + values = { duration: duration_milliseconds, allocated_memory: allocated_memory } @values.each do |name, value| values[name] = value diff --git a/spec/lib/gitlab/metrics/method_call_spec.rb b/spec/lib/gitlab/metrics/method_call_spec.rb index 78767d06462a74e3be0e41b23a96e24b3890e46a..41a9d1d9c907ce94b77788cf35d5ef211127701b 100644 --- a/spec/lib/gitlab/metrics/method_call_spec.rb +++ b/spec/lib/gitlab/metrics/method_call_spec.rb @@ -96,14 +96,17 @@ describe Gitlab::Metrics::MethodCall do describe '#to_metric' do it 'returns a Metric instance' do + expect(method_call).to receive(:real_time).and_return(4.0001) + expect(method_call).to receive(:cpu_time).and_return(3.0001) + method_call.measure { 'foo' } metric = method_call.to_metric expect(metric).to be_an_instance_of(Gitlab::Metrics::Metric) expect(metric.series).to eq('rails_method_calls') - expect(metric.values[:duration]).to be_a_kind_of(Numeric) - expect(metric.values[:cpu_duration]).to be_a_kind_of(Numeric) + expect(metric.values[:duration]).to eq(4000) + expect(metric.values[:cpu_duration]).to eq(3000) expect(metric.values[:call_count]).to be_an(Integer) expect(metric.tags).to eq({ method: 'Foo#bar' }) @@ -116,13 +119,13 @@ describe Gitlab::Metrics::MethodCall do end it 'returns false when the total call time is not above the threshold' do - expect(method_call).to receive(:real_time).and_return(9) + expect(method_call).to receive(:real_time).and_return(0.009) expect(method_call.above_threshold?).to eq(false) end it 'returns true when the total call time is above the threshold' do - expect(method_call).to receive(:real_time).and_return(9000) + expect(method_call).to receive(:real_time).and_return(9) expect(method_call.above_threshold?).to eq(true) end diff --git a/spec/lib/gitlab/metrics/system_spec.rb b/spec/lib/gitlab/metrics/system_spec.rb index 4d94d8705fbe749301d13b299a6e1ef50cbe50dd..14afcdf5daaa89b48304638177bdbafe5809ec25 100644 --- a/spec/lib/gitlab/metrics/system_spec.rb +++ b/spec/lib/gitlab/metrics/system_spec.rb @@ -29,19 +29,19 @@ describe Gitlab::Metrics::System do describe '.cpu_time' do it 'returns a Fixnum' do - expect(described_class.cpu_time).to be_an(Integer) + expect(described_class.cpu_time).to be_an(Float) end end describe '.real_time' do it 'returns a Fixnum' do - expect(described_class.real_time).to be_an(Integer) + expect(described_class.real_time).to be_an(Float) end end describe '.monotonic_time' do - it 'returns a Fixnum' do - expect(described_class.monotonic_time).to be_an(Integer) + it 'returns a Float' do + expect(described_class.monotonic_time).to be_an(Float) end end end