diff --git a/logstash-core/lib/logstash/instrument/periodic_poller/cgroup.rb b/logstash-core/lib/logstash/instrument/periodic_poller/cgroup.rb index 5213f83ccfb..c40db2e2bfd 100644 --- a/logstash-core/lib/logstash/instrument/periodic_poller/cgroup.rb +++ b/logstash-core/lib/logstash/instrument/periodic_poller/cgroup.rb @@ -38,21 +38,26 @@ def override(other) end ## `/proc/self/cgroup` contents look like this - # 5:cpu,cpuacct:/ - # 4:cpuset:/ - # 2:net_cls,net_prio:/ - # 0::/user.slice/user-1000.slice/session-932.scope - ## e.g. N:controller:/path-to-info - # we find the controller and path - # we skip the line without a controller e.g. 0::/path - # we assume there are these symlinks: - # `/sys/fs/cgroup/cpu` -> `/sys/fs/cgroup/cpu,cpuacct - # `/sys/fs/cgroup/cpuacct` -> `/sys/fs/cgroup/cpu,cpuacct + # cgroupv1 (per-controller hierarchies): + # 5:cpu,cpuacct:/ + # 4:cpuset:/ + # 2:net_cls,net_prio:/ + # 0::/user.slice/user-1000.slice/session-932.scope + # cgroupv2 (unified hierarchy): + # 0::/path + # + # For v1, we find the controller and path from lines matching N:controller:/path. + # We assume these symlinks exist: + # `/sys/fs/cgroup/cpu` -> `/sys/fs/cgroup/cpu,cpuacct` + # `/sys/fs/cgroup/cpuacct` -> `/sys/fs/cgroup/cpu,cpuacct` + # For v2, the 0::/path line identifies the cgroup path under the unified + # hierarchy at /sys/fs/cgroup. Data files (cpu.stat, cpu.max) live there. CGROUP_FILE = "/proc/self/cgroup" CPUACCT_DIR = "/sys/fs/cgroup/cpuacct" CPU_DIR = "/sys/fs/cgroup/cpu" CRITICAL_PATHS = [CGROUP_FILE, CPUACCT_DIR, CPU_DIR] + CGROUP_V2_DIR = "/sys/fs/cgroup" CONTROLLER_CPUACCT_LABEL = "cpuacct" CONTROLLER_CPU_LABEL = "cpu" @@ -88,6 +93,42 @@ def controller_groups end end + class CGroupV2Resources + CGROUP_V2_RE = Regexp.compile("^0::(/.*)") + + def cgroup_available? + path = resolve_v2_path + return false if path.nil? + cpu_override = Override.new("ls.cgroup.cpu.path.override") + resolved = cpu_override.override(path) + ::File.exist?(::File.join(CGROUP_V2_DIR, resolved, "cpu.stat")) + end + + def controller_groups + path = resolve_v2_path + return {} if path.nil? + { + CONTROLLER_CPU_LABEL => CpuResourceV2.new(path), + CONTROLLER_CPUACCT_LABEL => CpuAcctResourceV2.new(path) + } + end + + private + + def resolve_v2_path + @v2_path ||= read_v2_path + end + + def read_v2_path + return nil unless ::File.exist?(CGROUP_FILE) + IO.readlines(CGROUP_FILE).each do |line| + match = CGROUP_V2_RE.match(line.strip) + return match[1] if match + end + nil + end + end + module ControllerResource attr_reader :base_path, :override, :offset_path @@ -138,6 +179,27 @@ def cpuacct_usage end end + class CpuAcctResourceV2 + include LogStash::Util::Loggable + include ControllerResource + def initialize(original_path) + common_initialize(CGROUP_V2_DIR, "ls.cgroup.cpuacct.path.override", original_path) + end + + def to_hash + {:control_group => offset_path, :usage_nanos => cpuacct_usage} + end + private + def cpuacct_usage + lines = call_if_file_exists(:read_lines, "cpu.stat", []) + lines.each do |line| + fields = line.split(/\s+/) + return fields[1].to_i * 1000 if fields.first == "usage_usec" + end + -1 + end + end + class CpuResource include LogStash::Util::Loggable include ControllerResource @@ -170,6 +232,52 @@ def build_cpu_stats_hash end end + class CpuResourceV2 + include LogStash::Util::Loggable + include ControllerResource + def initialize(original_path) + common_initialize(CGROUP_V2_DIR, "ls.cgroup.cpu.path.override", original_path) + end + + def to_hash + { + :control_group => offset_path, + :cfs_period_micros => cfs_period_us, + :cfs_quota_micros => cfs_quota_us, + :stat => build_cpu_stats_hash + } + end + private + def cfs_period_us + read_cpu_max[1] + end + + def cfs_quota_us + read_cpu_max[0] + end + + def read_cpu_max + @cpu_max ||= begin + line = call_if_file_exists(:read_lines, "cpu.max", []).first + if line.nil? + [-1, -1] + else + parts = line.split(/\s+/) + quota = parts[0] == "max" ? -1 : parts[0].to_i + period = parts[1].to_i + [quota, period] + end + end + end + + def build_cpu_stats_hash + stats = CpuStatsV2.new + lines = call_if_file_exists(:read_lines, "cpu.stat", []) + stats.update(lines) + stats.to_hash + end + end + class UnimplementedResource attr_reader :controller, :original_path @@ -210,19 +318,47 @@ def to_hash end end + class CpuStatsV2 + def initialize + @number_of_elapsed_periods = -1 + @number_of_times_throttled = -1 + @time_throttled_nanos = -1 + end + + def update(lines) + lines.each do |line| + fields = line.split(/\s+/) + next unless fields.size > 1 + case fields.first + when "nr_periods" then @number_of_elapsed_periods = fields[1].to_i + when "nr_throttled" then @number_of_times_throttled = fields[1].to_i + when "throttled_usec" then @time_throttled_nanos = fields[1].to_i * 1000 + end + end + end + + def to_hash + { + :number_of_elapsed_periods => @number_of_elapsed_periods, + :number_of_times_throttled => @number_of_times_throttled, + :time_throttled_nanos => @time_throttled_nanos + } + end + end + CGROUP_RESOURCES = CGroupResources.new + CGROUP_V2_RESOURCES = CGroupV2Resources.new class << self def get_all - unless CGROUP_RESOURCES.cgroup_available? - logger.debug("One or more required cgroup files or directories not found: #{CRITICAL_PATHS.join(', ')}") - return - end + resolve_resources unless @resolved + return nil if @active_resources.nil? - groups = CGROUP_RESOURCES.controller_groups + groups = @active_resources.controller_groups if groups.empty? - logger.debug("The main cgroup file did not have any controllers: #{CGROUP_FILE}") + logger.debug("#{@active_label}: no controllers found") unless @logged_empty + @logged_empty = true return end @@ -231,15 +367,33 @@ def get_all next unless controller.implemented? cgroups_stats[name.to_sym] = controller.to_hash end - cgroups_stats + cgroups_stats.empty? ? nil : cgroups_stats rescue => e - logger.debug("Error, cannot retrieve cgroups information", :exception => e.class.name, :message => e.message, :backtrace => e.backtrace.take(4)) if logger.debug? + logger.debug("Error, cannot retrieve #{@active_label} cgroups information", :exception => e.class.name, :message => e.message, :backtrace => e.backtrace.take(4)) if logger.debug? nil end def get get_all end + + private + + def resolve_resources + @resolved = true + if CGROUP_RESOURCES.cgroup_available? + @active_resources = CGROUP_RESOURCES + @active_label = "cgroupv1" + logger.debug("using cgroupv1") + elsif CGROUP_V2_RESOURCES.cgroup_available? + @active_resources = CGROUP_V2_RESOURCES + @active_label = "cgroupv2" + logger.debug("using cgroupv2") + else + @active_resources = nil + logger.debug("no cgroup support detected (neither v1 nor v2)") + end + end end end end end end diff --git a/logstash-core/spec/logstash/instrument/periodic_poller/cgroup_spec.rb b/logstash-core/spec/logstash/instrument/periodic_poller/cgroup_spec.rb index 2873b3a9b29..aed90ad4a2e 100644 --- a/logstash-core/spec/logstash/instrument/periodic_poller/cgroup_spec.rb +++ b/logstash-core/spec/logstash/instrument/periodic_poller/cgroup_spec.rb @@ -184,6 +184,112 @@ module LogStash module Instrument module PeriodicPoller end end + describe Cgroup::CGroupV2Resources do + subject(:cgroup_v2_resources) { described_class.new } + + context "method: cgroup_available?" do + context "cgroupv2 files exist" do + before do + allow(::File).to receive(:exist?).with("/proc/self/cgroup").and_return(true) + allow(IO).to receive(:readlines).with("/proc/self/cgroup").and_return(["0::#{relative_path}\n"]) + allow(::File).to receive(:exist?).with("/sys/fs/cgroup#{relative_path}/cpu.stat").and_return(true) + end + it "returns true" do + expect(cgroup_v2_resources.cgroup_available?).to be_truthy + end + end + + context "proc cgroup file does not exist" do + before do + allow(::File).to receive(:exist?).with("/proc/self/cgroup").and_return(false) + end + it "returns false" do + expect(cgroup_v2_resources.cgroup_available?).to be_falsey + end + end + + context "no v2 entry in proc cgroup" do + before do + allow(::File).to receive(:exist?).with("/proc/self/cgroup").and_return(true) + allow(IO).to receive(:readlines).with("/proc/self/cgroup").and_return(["3:cpu:/docker/abc\n"]) + end + it "returns false" do + expect(cgroup_v2_resources.cgroup_available?).to be_falsey + end + end + + context "cpu.stat file does not exist" do + before do + allow(::File).to receive(:exist?).with("/proc/self/cgroup").and_return(true) + allow(IO).to receive(:readlines).with("/proc/self/cgroup").and_return(["0::#{relative_path}\n"]) + allow(::File).to receive(:exist?).with("/sys/fs/cgroup#{relative_path}/cpu.stat").and_return(false) + end + it "returns false" do + expect(cgroup_v2_resources.cgroup_available?).to be_falsey + end + end + + context "with override" do + before do + java.lang.System.setProperty("ls.cgroup.cpu.path.override", "/custom") + allow(::File).to receive(:exist?).with("/proc/self/cgroup").and_return(true) + allow(IO).to receive(:readlines).with("/proc/self/cgroup").and_return(["0::#{relative_path}\n"]) + allow(::File).to receive(:exist?).with("/sys/fs/cgroup/custom/cpu.stat").and_return(true) + end + after do + java.lang.System.clearProperty("ls.cgroup.cpu.path.override") + end + it "uses the override path for availability check" do + expect(cgroup_v2_resources.cgroup_available?).to be_truthy + end + end + end + + context "method: controller_groups" do + before do + allow(::File).to receive(:exist?).with("/proc/self/cgroup").and_return(true) + allow(IO).to receive(:readlines).with("/proc/self/cgroup").and_return(["0::#{relative_path}\n"]) + end + + it "returns cpu and cpuacct controllers" do + controllers = cgroup_v2_resources.controller_groups + + expect(controllers["cpu"]).to be_a(Cgroup::CpuResourceV2) + expect(controllers["cpu"].base_path).to eq("/sys/fs/cgroup") + expect(controllers["cpu"].offset_path).to eq(relative_path) + + expect(controllers["cpuacct"]).to be_a(Cgroup::CpuAcctResourceV2) + expect(controllers["cpuacct"].base_path).to eq("/sys/fs/cgroup") + expect(controllers["cpuacct"].offset_path).to eq(relative_path) + end + end + + context "method: controller_groups with override" do + before do + java.lang.System.setProperty("ls.cgroup.cpu.path.override", "/foo") + java.lang.System.setProperty("ls.cgroup.cpuacct.path.override", "/bar") + allow(::File).to receive(:exist?).with("/proc/self/cgroup").and_return(true) + allow(IO).to receive(:readlines).with("/proc/self/cgroup").and_return(["0::#{relative_path}\n"]) + end + after do + java.lang.System.clearProperty("ls.cgroup.cpu.path.override") + java.lang.System.clearProperty("ls.cgroup.cpuacct.path.override") + end + it "returns overridden control groups" do + controllers = cgroup_v2_resources.controller_groups + controller = controllers["cpu"] + expect(controller).to be_a(Cgroup::CpuResourceV2) + expect(controller.base_path).to eq("/sys/fs/cgroup") + expect(controller.offset_path).to eq("/foo") + + controller = controllers["cpuacct"] + expect(controller).to be_a(Cgroup::CpuAcctResourceV2) + expect(controller.base_path).to eq("/sys/fs/cgroup") + expect(controller.offset_path).to eq("/bar") + end + end + end + describe Cgroup::CpuAcctResource do subject(:cpuacct_resource) { described_class.new("/bar") } describe "method: to_hash, without override" do @@ -212,6 +318,55 @@ module LogStash module Instrument module PeriodicPoller end end + describe Cgroup::CpuAcctResourceV2 do + subject(:cpuacct_resource) { described_class.new("/bar") } + describe "method: to_hash, without override" do + context "when the files cannot be found" do + it "fills in the hash with minus one" do + expect(cpuacct_resource.base_path).to eq("/sys/fs/cgroup") + expect(cpuacct_resource.offset_path).to eq("/bar") + expect(cpuacct_resource.to_hash).to eq({:control_group => "/bar", :usage_nanos => -1}) + end + end + + context "when cpu.stat exists but has no usage_usec" do + before do + allow(::File).to receive(:exist?).and_return(true) + allow(IO).to receive(:readlines).with("/sys/fs/cgroup/bar/cpu.stat").and_return(["nr_periods 10\n"]) + end + it "fills in the hash with minus one for usage" do + expect(cpuacct_resource.to_hash).to eq({:control_group => "/bar", :usage_nanos => -1}) + end + end + + context "when cpu.stat exists with usage_usec" do + before do + allow(::File).to receive(:exist?).and_return(true) + allow(IO).to receive(:readlines).with("/sys/fs/cgroup/bar/cpu.stat").and_return(["usage_usec 5000\n", "user_usec 3000\n"]) + end + it "converts microseconds to nanoseconds" do + expect(cpuacct_resource.to_hash).to eq({:control_group => "/bar", :usage_nanos => 5000000}) + end + end + end + + describe "method: to_hash, with override" do + before do + java.lang.System.setProperty("ls.cgroup.cpuacct.path.override", "/quux") + end + after do + java.lang.System.clearProperty("ls.cgroup.cpuacct.path.override") + end + context "when the files cannot be found" do + it "fills in the hash with minus one" do + expect(cpuacct_resource.base_path).to eq("/sys/fs/cgroup") + expect(cpuacct_resource.offset_path).to eq("/quux") + expect(cpuacct_resource.to_hash).to eq({:control_group => "/quux", :usage_nanos => -1}) + end + end + end + end + describe Cgroup::CpuResource do subject(:cpu_resource) { described_class.new("/bar") } describe "method: fill, without override" do @@ -241,7 +396,82 @@ module LogStash module Instrument module PeriodicPoller end end + describe Cgroup::CpuResourceV2 do + subject(:cpu_resource) { described_class.new("/bar") } + describe "method: to_hash, without override" do + context "when the files cannot be found" do + it "fills in the hash with minus one" do + expect(cpu_resource.base_path).to eq("/sys/fs/cgroup") + expect(cpu_resource.offset_path).to eq("/bar") + expect(cpu_resource.to_hash).to eq({:cfs_period_micros => -1, :cfs_quota_micros => -1, :control_group => "/bar", :stat => {:number_of_elapsed_periods => -1, :number_of_times_throttled => -1, :time_throttled_nanos => -1}}) + end + end + + context "when cpu.max and cpu.stat exist" do + before do + allow(::File).to receive(:exist?).and_return(true) + allow(IO).to receive(:readlines).with("/sys/fs/cgroup/bar/cpu.max").and_return(["50000 100000\n"]) + allow(IO).to receive(:readlines).with("/sys/fs/cgroup/bar/cpu.stat").and_return([ + "usage_usec 5000", + "nr_periods 10", + "nr_throttled 2", + "throttled_usec 3000" + ]) + end + it "returns correct values" do + expect(cpu_resource.to_hash).to eq({ + :control_group => "/bar", + :cfs_period_micros => 100000, + :cfs_quota_micros => 50000, + :stat => { + :number_of_elapsed_periods => 10, + :number_of_times_throttled => 2, + :time_throttled_nanos => 3000000 + } + }) + end + end + + context "when cpu.max has 'max' for unlimited quota" do + before do + allow(::File).to receive(:exist?).and_return(true) + allow(IO).to receive(:readlines).with("/sys/fs/cgroup/bar/cpu.max").and_return(["max 100000\n"]) + allow(IO).to receive(:readlines).with("/sys/fs/cgroup/bar/cpu.stat").and_return([]) + end + it "returns -1 for quota" do + hash = cpu_resource.to_hash + expect(hash[:cfs_quota_micros]).to eq(-1) + expect(hash[:cfs_period_micros]).to eq(100000) + end + end + end + + describe "method: to_hash, with override" do + before do + java.lang.System.setProperty("ls.cgroup.cpu.path.override", "/quux") + end + after do + java.lang.System.clearProperty("ls.cgroup.cpu.path.override") + end + context "when the files cannot be found" do + it "fills in the hash with minus one" do + expect(cpu_resource.base_path).to eq("/sys/fs/cgroup") + expect(cpu_resource.offset_path).to eq("/quux") + expect(cpu_resource.to_hash).to eq({:cfs_period_micros => -1, :cfs_quota_micros => -1, :control_group => "/quux", :stat => {:number_of_elapsed_periods => -1, :number_of_times_throttled => -1, :time_throttled_nanos => -1}}) + end + end + end + end + describe Cgroup do + before(:each) do + # Reset cached resolution state so each test can set up its own mocks + described_class.instance_variable_set(:@resolved, false) + described_class.instance_variable_set(:@active_resources, nil) + described_class.instance_variable_set(:@active_label, nil) + described_class.instance_variable_set(:@logged_empty, false) + end + describe "class method: get_all" do let(:cpuacct_usage) { 1982 } let(:cfs_period_micros) { 500 } @@ -280,9 +510,104 @@ module LogStash module Instrument module PeriodicPoller end end - context "when an exception is raised" do + describe "class method: get_all with cgroupv2" do + let(:cpuacct_usage_usec) { 1982 } + let(:cfs_period_micros) { 100000 } + let(:cfs_quota_micros) { 50000 } + let(:cpu_stats_number_of_periods) { 1 } + let(:cpu_stats_number_of_time_throttled) { 2 } + let(:cpu_stats_time_throttled_usec) { 3000 } + let(:cpu_stat_v2_content) do + [ + "usage_usec #{cpuacct_usage_usec}", + "user_usec 1000", + "system_usec 982", + "nr_periods #{cpu_stats_number_of_periods}", + "nr_throttled #{cpu_stats_number_of_time_throttled}", + "throttled_usec #{cpu_stats_time_throttled_usec}" + ] + end + let(:cpu_max_content) { ["#{cfs_quota_micros} #{cfs_period_micros}"] } + before do + Cgroup::CGROUP_V2_RESOURCES.instance_variable_set(:@v2_path, nil) + Cgroup.instance_variable_set(:@resolved, false) + + allow(Cgroup::CGROUP_RESOURCES).to receive(:cgroup_available?).and_return(false) allow(::File).to receive(:exist?).and_return(true) + allow(IO).to receive(:readlines).with("/proc/self/cgroup").and_return(["0::#{relative_path}\n"]) + allow(IO).to receive(:readlines).with("/sys/fs/cgroup#{relative_path}/cpu.stat").and_return(cpu_stat_v2_content) + allow(IO).to receive(:readlines).with("/sys/fs/cgroup#{relative_path}/cpu.max").and_return(cpu_max_content) + end + + it "returns all the stats with values converted from v2 format" do + expect(described_class.get_all).to match( + :cpuacct => { + :control_group => relative_path, + :usage_nanos => cpuacct_usage_usec * 1000, + }, + :cpu => { + :control_group => relative_path, + :cfs_period_micros => cfs_period_micros, + :cfs_quota_micros => cfs_quota_micros, + :stat => { + :number_of_elapsed_periods => cpu_stats_number_of_periods, + :number_of_times_throttled => cpu_stats_number_of_time_throttled, + :time_throttled_nanos => cpu_stats_time_throttled_usec * 1000 + } + } + ) + end + end + + describe "class method: get_all fallback order" do + context "when v1 is available" do + before do + allow(Cgroup::CGROUP_RESOURCES).to receive(:cgroup_available?).and_return(true) + allow(Cgroup::CGROUP_RESOURCES).to receive(:controller_groups).and_return( + "cpu" => double("cpu", implemented?: true, to_hash: {:control_group => "/v1", :cfs_period_micros => 100}), + "cpuacct" => double("cpuacct", implemented?: true, to_hash: {:control_group => "/v1", :usage_nanos => 200}) + ) + end + + it "uses v1 and does not consult v2" do + expect(Cgroup::CGROUP_V2_RESOURCES).not_to receive(:cgroup_available?) + result = described_class.get_all + expect(result[:cpu][:control_group]).to eq("/v1") + end + end + + context "when v1 is not available but v2 is" do + before do + allow(Cgroup::CGROUP_RESOURCES).to receive(:cgroup_available?).and_return(false) + allow(Cgroup::CGROUP_V2_RESOURCES).to receive(:cgroup_available?).and_return(true) + allow(Cgroup::CGROUP_V2_RESOURCES).to receive(:controller_groups).and_return( + "cpu" => double("cpu", implemented?: true, to_hash: {:control_group => "/v2", :cfs_period_micros => 100}), + "cpuacct" => double("cpuacct", implemented?: true, to_hash: {:control_group => "/v2", :usage_nanos => 200}) + ) + end + + it "falls back to v2" do + result = described_class.get_all + expect(result[:cpu][:control_group]).to eq("/v2") + end + end + + context "when neither v1 nor v2 is available" do + before do + allow(Cgroup::CGROUP_RESOURCES).to receive(:cgroup_available?).and_return(false) + allow(Cgroup::CGROUP_V2_RESOURCES).to receive(:cgroup_available?).and_return(false) + end + + it "returns nil" do + expect(described_class.get_all).to be_nil + end + end + end + + context "when an exception is raised in v1" do + before do + allow(Cgroup::CGROUP_RESOURCES).to receive(:cgroup_available?).and_return(true) allow(Cgroup::CGROUP_RESOURCES).to receive(:controller_groups).and_raise("Something went wrong") end @@ -290,6 +615,32 @@ module LogStash module Instrument module PeriodicPoller expect(described_class.get_all).to be_nil end end + + context "when an exception is raised in v2" do + before do + allow(Cgroup::CGROUP_RESOURCES).to receive(:cgroup_available?).and_return(false) + allow(Cgroup::CGROUP_V2_RESOURCES).to receive(:cgroup_available?).and_return(true) + allow(Cgroup::CGROUP_V2_RESOURCES).to receive(:controller_groups).and_raise("Something went wrong") + end + + it "method: get_all returns nil" do + expect(described_class.get_all).to be_nil + end + end + + context "when resolution is cached" do + it "does not re-resolve on subsequent calls" do + allow(Cgroup::CGROUP_RESOURCES).to receive(:cgroup_available?).and_return(true) + allow(Cgroup::CGROUP_RESOURCES).to receive(:controller_groups).and_return( + "cpu" => double("cpu", implemented?: true, to_hash: {:control_group => "/v1"}), + "cpuacct" => double("cpuacct", implemented?: true, to_hash: {:control_group => "/v1"}) + ) + described_class.get_all + expect(Cgroup::CGROUP_RESOURCES).to have_received(:cgroup_available?).once + described_class.get_all + expect(Cgroup::CGROUP_RESOURCES).to have_received(:cgroup_available?).once + end + end end end end end end diff --git a/logstash-core/spec/logstash/instrument/periodic_poller/os_spec.rb b/logstash-core/spec/logstash/instrument/periodic_poller/os_spec.rb index 2edd9bc1930..48fa34eec90 100644 --- a/logstash-core/spec/logstash/instrument/periodic_poller/os_spec.rb +++ b/logstash-core/spec/logstash/instrument/periodic_poller/os_spec.rb @@ -21,6 +21,14 @@ describe LogStash::Instrument::PeriodicPoller::Os do let(:metric) { LogStash::Instrument::Metric.new(LogStash::Instrument::Collector.new) } + before(:each) do + # Reset cached cgroup resolution so each test context can mock independently + LogStash::Instrument::PeriodicPoller::Cgroup.instance_variable_set(:@resolved, false) + LogStash::Instrument::PeriodicPoller::Cgroup.instance_variable_set(:@active_resources, nil) + LogStash::Instrument::PeriodicPoller::Cgroup.instance_variable_set(:@active_label, nil) + LogStash::Instrument::PeriodicPoller::Cgroup.instance_variable_set(:@logged_empty, false) + end + context "recorded cgroup metrics (mocked cgroup env)" do subject { described_class.new(metric, {})} @@ -94,4 +102,76 @@ def mval(*metric_path) end end end + + context "recorded cgroup v2 metrics (mocked cgroup env)" do + subject { described_class.new(metric, {})} + + let(:snapshot_store) { metric.collector.snapshot_metric.metric_store } + let(:os_metrics) { snapshot_store.get_shallow(:os) } + + let(:v2_relative_path) { "/system.slice/docker-abc123def456.scope" } + let(:proc_self_cgroup_v2) { ["0::#{v2_relative_path}"] } + + let(:usage_usec) { 378477588 } + let(:cpu_period_micros) { 100000 } + let(:cpu_quota_micros) { 150000 } + let(:cpu_stats_number_of_periods) { 4157 } + let(:cpu_stats_number_of_time_throttled) { 460 } + let(:cpu_stats_throttled_usec) { 581617440 } + + let(:cpu_stat_file_content) do + [ + "usage_usec #{usage_usec}", + "user_usec 340000000", + "system_usec 38477588", + "nr_periods #{cpu_stats_number_of_periods}", + "nr_throttled #{cpu_stats_number_of_time_throttled}", + "throttled_usec #{cpu_stats_throttled_usec}" + ] + end + + let(:cpu_max_content) { ["#{cpu_quota_micros} #{cpu_period_micros}"] } + + before do + # Reset memoized state on the singleton so mocks take effect + LogStash::Instrument::PeriodicPoller::Cgroup::CGROUP_V2_RESOURCES.instance_variable_set(:@v2_path, nil) + LogStash::Instrument::PeriodicPoller::Cgroup.instance_variable_set(:@resolved, false) + + allow(::File).to receive(:exist?).and_return(false) + allow(::File).to receive(:exist?).with("/proc/self/cgroup").and_return(true) + allow(::File).to receive(:exist?).with("/sys/fs/cgroup#{v2_relative_path}/cpu.stat").and_return(true) + allow(::File).to receive(:exist?).with("/sys/fs/cgroup#{v2_relative_path}/cpu.max").and_return(true) + allow(IO).to receive(:readlines).with("/proc/self/cgroup").and_return(proc_self_cgroup_v2) + allow(IO).to receive(:readlines).with("/sys/fs/cgroup#{v2_relative_path}/cpu.stat").and_return(cpu_stat_file_content) + allow(IO).to receive(:readlines).with("/sys/fs/cgroup#{v2_relative_path}/cpu.max").and_return(cpu_max_content) + + subject.collect + end + + def mval(*metric_path) + metric_path.reduce(os_metrics) {|acc, k| acc[k]}.value + end + + it "should have a value for #{[:cgroup, :cpuacct, :control_group]} that is a String" do + expect(mval(:cgroup, :cpuacct, :control_group)).to be_a(String) + end + + it "should have a value for #{[:cgroup, :cpu, :control_group]} that is a String" do + expect(mval(:cgroup, :cpu, :control_group)).to be_a(String) + end + + [ + [:cgroup, :cpuacct, :usage_nanos], + [:cgroup, :cpu, :cfs_period_micros], + [:cgroup, :cpu, :cfs_quota_micros], + [:cgroup, :cpu, :stat, :number_of_elapsed_periods], + [:cgroup, :cpu, :stat, :number_of_times_throttled], + [:cgroup, :cpu, :stat, :time_throttled_nanos] + ].each do |path| + path = Array(path) + it "should have a value for #{path} that is Numeric" do + expect(mval(*path)).to be_a(Numeric) + end + end + end end diff --git a/qa/docker/shared_examples/container.rb b/qa/docker/shared_examples/container.rb index a6290e6ec01..a5d37e58c3b 100644 --- a/qa/docker/shared_examples/container.rb +++ b/qa/docker/shared_examples/container.rb @@ -87,6 +87,21 @@ expect(java_process(@container)["args"]).to match /-Dls.cgroup.cpuacct.path.override=/ end + it 'should report cgroup cpu and cpuacct stats via the monitoring API' do + node_stats = get_node_stats(@container) + cgroup = node_stats.dig('os', 'cgroup') + expect(cgroup).not_to be_nil, "Expected cgroup stats in /_node/stats response" + + expect(cgroup.dig('cpuacct', 'control_group')).to be_a(String) + expect(cgroup.dig('cpuacct', 'usage_nanos')).to be >= 0 + + expect(cgroup.dig('cpu', 'control_group')).to be_a(String) + expect(cgroup.dig('cpu', 'cfs_period_micros')).to be > 0 + expect(cgroup.dig('cpu', 'stat', 'number_of_elapsed_periods')).to be >= -1 + expect(cgroup.dig('cpu', 'stat', 'number_of_times_throttled')).to be >= -1 + expect(cgroup.dig('cpu', 'stat', 'time_throttled_nanos')).to be >= -1 + end + it 'should have a pid of 1' do expect(java_process(@container)["pid"]).to eql '1' end