Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions dbms/src/Common/FailPoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ namespace DB
M(exception_when_fetch_disagg_pages) \
M(cop_send_failure) \
M(file_cache_fg_download_fail) \
M(file_cache_bg_download_fail) \
M(file_cache_bg_download_schedule_fail) \
M(force_set_parallel_prehandle_threshold) \
M(force_raise_prehandle_exception) \
M(force_agg_on_partial_block) \
Expand All @@ -135,6 +137,7 @@ namespace DB
M(force_join_v2_probe_disable_lm) \
M(force_s3_random_access_file_init_fail) \
M(force_s3_random_access_file_read_fail) \
M(force_s3_random_access_file_seek_chunked) \
M(force_release_snap_meet_null_storage)

#define APPLY_FOR_PAUSEABLE_FAILPOINTS_ONCE(M) \
Expand Down
134 changes: 134 additions & 0 deletions dbms/src/Common/TiFlashMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,27 @@

namespace DB
{
namespace
{
constexpr std::array remote_cache_file_type_labels = {"merged", "coldata", "other"};
constexpr std::array remote_cache_wait_result_labels = {"hit", "timeout", "failed"};
constexpr std::array remote_cache_reject_reason_labels = {"too_many_download"};
constexpr std::array remote_cache_download_stage_labels = {"queue_wait", "download"};
constexpr auto remote_cache_wait_on_downloading_buckets = ExpBuckets{0.0001, 2, 20};
constexpr auto remote_cache_bg_download_stage_buckets = ExpBuckets{0.0001, 2, 20};

static_assert(
remote_cache_file_type_labels.size() == static_cast<size_t>(TiFlashMetrics::RemoteCacheFileTypeMetric::Count));
static_assert(
remote_cache_wait_result_labels.size() == static_cast<size_t>(TiFlashMetrics::RemoteCacheWaitResultMetric::Count));
static_assert(
remote_cache_reject_reason_labels.size()
== static_cast<size_t>(TiFlashMetrics::RemoteCacheRejectReasonMetric::Count));
static_assert(
remote_cache_download_stage_labels.size()
== static_cast<size_t>(TiFlashMetrics::RemoteCacheDownloadStageMetric::Count));
} // namespace

TiFlashMetrics & TiFlashMetrics::instance()
{
static TiFlashMetrics inst; // Instantiated on first use.
Expand Down Expand Up @@ -78,6 +99,82 @@ TiFlashMetrics::TiFlashMetrics()
.Name("tiflash_storage_s3_store_summary_bytes")
.Help("S3 storage summary bytes by store and file type")
.Register(*registry);

registered_remote_cache_wait_on_downloading_result_family
= &prometheus::BuildCounter()
.Name("tiflash_storage_remote_cache_wait_on_downloading_result")
.Help("Bounded wait result of remote cache downloading")
.Register(*registry);
registered_remote_cache_wait_on_downloading_bytes_family
= &prometheus::BuildCounter()
.Name("tiflash_storage_remote_cache_wait_on_downloading_bytes")
.Help("Bytes covered by remote cache bounded wait")
.Register(*registry);
// Timeline for one cache miss with possible follower requests:
//
// req A: miss -> create Empty -> enqueue bg task ---- queue_wait ---- download ---- Complete/Failed
// req B: sees Empty -> -------- wait_on_downloading_seconds --------> hit/timeout/failed
// req C: sees Empty -> --- wait_on_downloading_seconds ---> hit/timeout/failed
//
// `tiflash_storage_remote_cache_bg_download_stage_seconds`
// - downloader-task view
// - measures how long the background download itself spent in `queue_wait` and `download`
registered_remote_cache_bg_download_stage_seconds_family
= &prometheus::BuildHistogram()
.Name("tiflash_storage_remote_cache_bg_download_stage_seconds")
.Help("Remote cache background download stage duration")
.Register(*registry);
// `tiflash_storage_remote_cache_wait_on_downloading_seconds`
// - follower-request view
// - measures how long a request waited on an existing `Empty` segment before ending as hit/timeout/failed
registered_remote_cache_wait_on_downloading_seconds_family
= &prometheus::BuildHistogram()
.Name("tiflash_storage_remote_cache_wait_on_downloading_seconds")
.Help("Bounded wait duration of remote cache downloading")
.Register(*registry);
registered_remote_cache_reject_family = &prometheus::BuildCounter()
.Name("tiflash_storage_remote_cache_reject")
.Help("Remote cache admission rejection by reason and file type")
.Register(*registry);

for (size_t file_type_idx = 0; file_type_idx < remote_cache_file_type_labels.size(); ++file_type_idx)
{
for (size_t result_idx = 0; result_idx < remote_cache_wait_result_labels.size(); ++result_idx)
{
auto labels = prometheus::Labels{
{"result", std::string(remote_cache_wait_result_labels[result_idx])},
{"file_type", std::string(remote_cache_file_type_labels[file_type_idx])},
};
remote_cache_wait_on_downloading_result_metrics[file_type_idx][result_idx]
= &registered_remote_cache_wait_on_downloading_result_family->Add(labels);
remote_cache_wait_on_downloading_bytes_metrics[file_type_idx][result_idx]
= &registered_remote_cache_wait_on_downloading_bytes_family->Add(labels);
prometheus::Histogram::BucketBoundaries wait_buckets = ExpBuckets{
remote_cache_wait_on_downloading_buckets.start,
remote_cache_wait_on_downloading_buckets.base,
remote_cache_wait_on_downloading_buckets.size};
remote_cache_wait_on_downloading_seconds_metrics[file_type_idx][result_idx]
= &registered_remote_cache_wait_on_downloading_seconds_family->Add(labels, wait_buckets);
}
for (size_t reason_idx = 0; reason_idx < remote_cache_reject_reason_labels.size(); ++reason_idx)
{
remote_cache_reject_metrics[file_type_idx][reason_idx] = &registered_remote_cache_reject_family->Add(
{{"reason", std::string(remote_cache_reject_reason_labels[reason_idx])},
{"file_type", std::string(remote_cache_file_type_labels[file_type_idx])}});
}
for (size_t stage_idx = 0; stage_idx < remote_cache_download_stage_labels.size(); ++stage_idx)
{
prometheus::Histogram::BucketBoundaries buckets = ExpBuckets{
remote_cache_bg_download_stage_buckets.start,
remote_cache_bg_download_stage_buckets.base,
remote_cache_bg_download_stage_buckets.size};
remote_cache_bg_download_stage_seconds_metrics[file_type_idx][stage_idx]
= &registered_remote_cache_bg_download_stage_seconds_family->Add(
{{"stage", std::string(remote_cache_download_stage_labels[stage_idx])},
{"file_type", std::string(remote_cache_file_type_labels[file_type_idx])}},
buckets);
}
}
}

void TiFlashMetrics::addReplicaSyncRU(UInt32 keyspace_id, UInt64 ru)
Expand Down Expand Up @@ -287,4 +384,41 @@ void TiFlashMetrics::setS3StoreSummaryBytes(UInt64 store_id, UInt64 data_file_by
it->second.data_file_bytes->Set(data_file_bytes);
it->second.dt_file_bytes->Set(dt_file_bytes);
}

prometheus::Counter & TiFlashMetrics::getRemoteCacheWaitOnDownloadingResultCounter(
TiFlashMetrics::RemoteCacheFileTypeMetric file_type,
TiFlashMetrics::RemoteCacheWaitResultMetric result)
{
return *remote_cache_wait_on_downloading_result_metrics[static_cast<size_t>(file_type)]
[static_cast<size_t>(result)];
}

prometheus::Counter & TiFlashMetrics::getRemoteCacheWaitOnDownloadingBytesCounter(
TiFlashMetrics::RemoteCacheFileTypeMetric file_type,
TiFlashMetrics::RemoteCacheWaitResultMetric result)
{
return *remote_cache_wait_on_downloading_bytes_metrics[static_cast<size_t>(file_type)][static_cast<size_t>(result)];
}

prometheus::Histogram & TiFlashMetrics::getRemoteCacheWaitOnDownloadingSecondsHistogram(
TiFlashMetrics::RemoteCacheFileTypeMetric file_type,
TiFlashMetrics::RemoteCacheWaitResultMetric result)
{
return *remote_cache_wait_on_downloading_seconds_metrics[static_cast<size_t>(file_type)]
[static_cast<size_t>(result)];
}

prometheus::Histogram & TiFlashMetrics::getRemoteCacheBgDownloadStageSecondsHistogram(
TiFlashMetrics::RemoteCacheFileTypeMetric file_type,
TiFlashMetrics::RemoteCacheDownloadStageMetric stage)
{
return *remote_cache_bg_download_stage_seconds_metrics[static_cast<size_t>(file_type)][static_cast<size_t>(stage)];
}

prometheus::Counter & TiFlashMetrics::getRemoteCacheRejectCounter(
TiFlashMetrics::RemoteCacheFileTypeMetric file_type,
TiFlashMetrics::RemoteCacheRejectReasonMetric reason)
{
return *remote_cache_reject_metrics[static_cast<size_t>(file_type)][static_cast<size_t>(reason)];
}
} // namespace DB
Loading