From d75870152ac87ea2df0b2aafd7908da8094628a2 Mon Sep 17 00:00:00 2001 From: Vince Foley Date: Fri, 15 Aug 2025 14:09:49 -0700 Subject: [PATCH] Improve Span error reporting --- lib/new_relic/error/reporter/crash_report.ex | 79 +++++++++++--------- lib/new_relic/transaction/complete.ex | 57 +++++++++----- lib/new_relic/transaction/reporter.ex | 3 +- lib/new_relic/transaction/sidecar.ex | 5 +- test/other_transaction_test.exs | 14 +++- 5 files changed, 94 insertions(+), 64 deletions(-) diff --git a/lib/new_relic/error/reporter/crash_report.ex b/lib/new_relic/error/reporter/crash_report.ex index b359540a..c5dfdf25 100644 --- a/lib/new_relic/error/reporter/crash_report.ex +++ b/lib/new_relic/error/reporter/crash_report.ex @@ -42,43 +42,48 @@ defmodule NewRelic.Error.Reporter.CrashReport do def report_error(:process, report) do {kind, exception, stacktrace} = parse_error_info(report[:error_info]) - - {exception_type, exception_reason, exception_stacktrace} = - Util.Error.normalize(kind, exception, stacktrace, report[:initial_call]) - - process_name = parse_process_name(report[:registered_name], stacktrace) - expected = parse_error_expected(exception) - automatic_attributes = NewRelic.Config.automatic_attributes() - - Collector.ErrorTrace.Harvester.report_error(%NewRelic.Error.Trace{ - timestamp: System.system_time(:millisecond) / 1_000, - error_type: exception_type, - message: exception_reason, - expected: expected, - stack_trace: exception_stacktrace, - transaction_name: "OtherTransaction/Elixir/ElixirProcess//#{process_name}", - user_attributes: - Map.merge(automatic_attributes, %{ - process: process_name - }) - }) - - Collector.TransactionErrorEvent.Harvester.report_error(%NewRelic.Error.Event{ - timestamp: System.system_time(:millisecond) / 1_000, - error_class: exception_type, - error_message: exception_reason, - expected: expected, - transaction_name: "OtherTransaction/Elixir/ElixirProcess//#{process_name}", - user_attributes: - Map.merge(automatic_attributes, %{ - process: process_name, - stacktrace: Enum.join(exception_stacktrace, "\n") - }) - }) - - unless expected do - NewRelic.report_metric({:supportability, :error_event}, error_count: 1) - NewRelic.report_metric(:error, error_count: 1) + already_reported? = {kind, exception, List.first(stacktrace)} == report[:dictionary][:nr_error_explicitly_reported] + + if already_reported? do + :ignore + else + {exception_type, exception_reason, exception_stacktrace} = + Util.Error.normalize(kind, exception, stacktrace, report[:initial_call]) + + process_name = parse_process_name(report[:registered_name], stacktrace) + expected = parse_error_expected(exception) + automatic_attributes = NewRelic.Config.automatic_attributes() + + Collector.ErrorTrace.Harvester.report_error(%NewRelic.Error.Trace{ + timestamp: System.system_time(:millisecond) / 1_000, + error_type: exception_type, + message: exception_reason, + expected: expected, + stack_trace: exception_stacktrace, + transaction_name: "OtherTransaction/Elixir/ElixirProcess//#{process_name}", + user_attributes: + Map.merge(automatic_attributes, %{ + process: process_name + }) + }) + + Collector.TransactionErrorEvent.Harvester.report_error(%NewRelic.Error.Event{ + timestamp: System.system_time(:millisecond) / 1_000, + error_class: exception_type, + error_message: exception_reason, + expected: expected, + transaction_name: "OtherTransaction/Elixir/ElixirProcess//#{process_name}", + user_attributes: + Map.merge(automatic_attributes, %{ + process: process_name, + stacktrace: Enum.join(exception_stacktrace, "\n") + }) + }) + + unless expected do + NewRelic.report_metric({:supportability, :error_event}, error_count: 1) + NewRelic.report_metric(:error, error_count: 1) + end end end diff --git a/lib/new_relic/transaction/complete.ex b/lib/new_relic/transaction/complete.ex index f52b2239..8d110778 100644 --- a/lib/new_relic/transaction/complete.ex +++ b/lib/new_relic/transaction/complete.ex @@ -7,7 +7,7 @@ defmodule NewRelic.Transaction.Complete do alias NewRelic.Transaction def run(tx_attrs, pid) do - {tx_segments, tx_attrs, tx_error, span_events, apdex, tx_metrics} = + {tx_segments, tx_attrs, tx_error_info, span_events, apdex, tx_metrics} = tx_attrs |> transform_name_attrs |> transform_time_attrs @@ -18,7 +18,7 @@ defmodule NewRelic.Transaction.Complete do report_transaction_event(tx_attrs) report_transaction_trace(tx_attrs, tx_segments) - report_transaction_error_event(tx_attrs, tx_error) + report_transaction_error_event(tx_attrs, tx_error_info) report_http_dispatcher_metric(tx_attrs) report_transaction_metric(tx_attrs) report_queue_time_metric(tx_attrs) @@ -144,13 +144,26 @@ defmodule NewRelic.Transaction.Complete do segment_tree = Map.update!(segment_tree, :children, &(&1 ++ top_children ++ stray_children)) + tx_error_info = + case tx_error do + nil -> + nil + + {:error, error} -> + expected = parse_error_expected(error.reason) + {type, reason, stacktrace} = Util.Error.normalize(error.kind, error.reason, error.stack) + + {:error, error, type, reason, stacktrace, expected} + end + span_events = extract_span_events( NewRelic.Config.feature(:infinite_tracing), tx_attrs, pid, process_spawns, - process_exits + process_exits, + tx_error_info ) apdex = calculate_apdex(tx_attrs, tx_error) @@ -167,7 +180,7 @@ defmodule NewRelic.Transaction.Complete do |> Map.put(:total_time_s, total_time_s(tx_attrs, concurrent_process_time_ms)) |> Map.put(:process_spawns, length(process_spawns)) - {[segment_tree], tx_attrs, tx_error, span_events, apdex, tx_metrics} + {[segment_tree], tx_attrs, tx_error_info, span_events, apdex, tx_metrics} end defp total_time_s(%{transactionType: :Web, "http.server": "cowboy"}, concurrent_process_time_ms) do @@ -179,17 +192,17 @@ defmodule NewRelic.Transaction.Complete do (tx_attrs.duration_ms + concurrent_process_time_ms) / 1000 end - defp extract_span_events(:infinite, tx_attrs, pid, spawns, exits) do + defp extract_span_events(:infinite, tx_attrs, pid, spawns, exits, tx_error_info) do spawned_process_span_events(tx_attrs, spawns, exits) - |> add_spansactions(tx_attrs, pid) + |> add_spansactions(tx_attrs, pid, tx_error_info) end - defp extract_span_events(:sampling, %{sampled: true} = tx_attrs, pid, spawns, exits) do + defp extract_span_events(:sampling, %{sampled: true} = tx_attrs, pid, spawns, exits, tx_error_info) do spawned_process_span_events(tx_attrs, spawns, exits) - |> add_spansactions(tx_attrs, pid) + |> add_spansactions(tx_attrs, pid, tx_error_info) end - defp extract_span_events(_trace_mode, _tx_attrs, _pid, _spawns, _exits) do + defp extract_span_events(_trace_mode, _tx_attrs, _pid, _spawns, _exits, _tx_error_info) do [] end @@ -216,11 +229,20 @@ defmodule NewRelic.Transaction.Complete do :priority, :tracingVendors, :trustedParentId, - :root_process_error, :error ] - defp add_spansactions(spans, tx_attrs, pid) do - root_process_error_attribute = (tx_attrs[:root_process_error] && %{error: true}) || %{} + defp add_spansactions(spans, tx_attrs, pid, tx_error_info) do + error_attrs = + case tx_error_info do + {:error, _error, type, reason, _stack, false = _expected} -> + %{"error.class": type, "error.message": reason} + + {:error, _error, type, reason, _stack, true = _expected} -> + %{"error.class": type, "error.message": reason, "error.expected": true} + + _ -> + %{} + end [ %NewRelic.Span.Event{ @@ -238,7 +260,7 @@ defmodule NewRelic.Transaction.Complete do category_attributes: tx_attrs |> Map.drop(@spansaction_exclude_attrs) - |> Map.merge(root_process_error_attribute) + |> Map.merge(error_attrs) |> Map.merge(NewRelic.Config.automatic_attributes()) |> Map.merge(%{ "transaction.name": Util.metric_join(["#{tx_attrs[:transactionType]}Transaction", tx_attrs.name]), @@ -480,12 +502,11 @@ defmodule NewRelic.Transaction.Complete do defp report_transaction_error_event(_tx_attrs, nil), do: :ignore - defp report_transaction_error_event(%{name: tx_name, transactionType: type} = tx_attrs, {:error, error}) do + defp report_transaction_error_event( + %{name: tx_name, transactionType: type} = tx_attrs, + {:error, error, exception_type, exception_reason, exception_stacktrace, expected} + ) do attributes = Map.drop(tx_attrs, [:error, :"error.kind", :"error.reason", :"error.stack"]) - expected = parse_error_expected(error.reason) - - {exception_type, exception_reason, exception_stacktrace} = - Util.Error.normalize(error.kind, error.reason, error.stack) report_error_trace( tx_attrs, diff --git a/lib/new_relic/transaction/reporter.ex b/lib/new_relic/transaction/reporter.ex index 9c83d112..6b2c049a 100644 --- a/lib/new_relic/transaction/reporter.ex +++ b/lib/new_relic/transaction/reporter.ex @@ -101,7 +101,8 @@ defmodule NewRelic.Transaction.Reporter do :ok end - def error(%{kind: _kind, reason: _reason, stack: _stack} = error) do + def error(%{kind: kind, reason: reason, stack: stack} = error) do + Process.put(:nr_error_explicitly_reported, {kind, reason, List.first(stack)}) Transaction.Sidecar.add(error: true, transaction_error: {:error, error}) end diff --git a/lib/new_relic/transaction/sidecar.ex b/lib/new_relic/transaction/sidecar.ex index 9b00867f..99c492d5 100644 --- a/lib/new_relic/transaction/sidecar.ex +++ b/lib/new_relic/transaction/sidecar.ex @@ -196,10 +196,7 @@ defmodule NewRelic.Transaction.Sidecar do attributes = with {reason, stack} when reason != :shutdown <- down_reason, false <- match?(%{expected: true}, reason) do - Map.merge(state.attributes, %{ - root_process_error: true, - transaction_error: {:error, %{kind: :exit, reason: reason, stack: stack}} - }) + Map.put(state.attributes, :transaction_error, {:error, %{kind: :exit, reason: reason, stack: stack}}) else _ -> state.attributes end diff --git a/test/other_transaction_test.exs b/test/other_transaction_test.exs index 3ae7a836..c75f41c5 100644 --- a/test/other_transaction_test.exs +++ b/test/other_transaction_test.exs @@ -179,8 +179,7 @@ defmodule OtherTransactionTest do [%{spans: spans}] = TestHelper.gather_harvest(TelemetrySdk.Spans.Harvester, 0) spansaction = TestHelper.find_event(spans, %{"nr.entryPoint": true, name: "Test/Error"}) - assert spansaction.attributes[:error] - refute spansaction.attributes[:root_process_error] + assert spansaction.attributes[:"error.class"] end defmodule ExpectedError do @@ -188,9 +187,9 @@ defmodule OtherTransactionTest do end @tag :capture_log - test "don't fail transaction on expected error exit" do + test "Mark expected error on Transaction event" do TestHelper.run_with(:nr_config, trace_mode: :infinite) - + TestHelper.restart_harvest_cycle(Collector.TransactionEvent.HarvestCycle) TestHelper.restart_harvest_cycle(TelemetrySdk.Spans.HarvestCycle) {:ok, _sup} = Task.Supervisor.start_link(name: TestTaskSup) test = self() @@ -211,6 +210,13 @@ defmodule OtherTransactionTest do spansaction = TestHelper.find_event(spans, %{"nr.entryPoint": true, name: "Test/ExpectedError"}) refute spansaction.attributes[:error] + assert spansaction.attributes[:"error.class"] + assert spansaction.attributes[:"error.expected"] + + events = TestHelper.gather_harvest(Collector.TransactionEvent.Harvester) + event = TestHelper.find_event(events, "OtherTransaction/Test/ExpectedError") + + assert event[:error] end test "Report a raise that is rescued inside a Transaction" do