Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support time based retention #17413

Merged
merged 33 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
12ee96f
Add charts
stelfrag Mar 26, 2024
2cde1a5
Add option to specify time retention
stelfrag Apr 16, 2024
5134d9d
Allow disk space to be 0 (unlimited)
stelfrag Apr 16, 2024
7eb0afd
Test configured disk space to be total free space - 10%
stelfrag Apr 16, 2024
01356e6
Further fix on human readable retention
stelfrag Apr 16, 2024
1f5874c
Remove dbengine parallel initialization option
stelfrag Apr 17, 2024
688cdf9
Allow tier disk space to be 0 in which case the current disk space av…
stelfrag Apr 22, 2024
90de153
Proper calculation of iterations
stelfrag Apr 22, 2024
c7b72f2
Function to return sqlite database space
stelfrag Apr 23, 2024
a44d3db
Do not account for metadata size if old settings are detected
stelfrag Apr 26, 2024
4588cec
Consider regacy tier multihost disk space MB settings as well
stelfrag Apr 30, 2024
358fe53
Adjust time retention calculation
stelfrag May 8, 2024
73c4299
Cleanup / allow 5% free disk space when all disk is to be used just t…
stelfrag May 13, 2024
d9f339c
Update some defaults
stelfrag May 13, 2024
10245f1
Use default 1024 MB for each tier
stelfrag May 17, 2024
b257cb9
Update src/database/engine/rrdengine.c
stelfrag Jun 3, 2024
61ac802
Switch retention days to integer
stelfrag Jun 3, 2024
6478cef
Respect "dbengine multihost disk space MB" setting for tier 0 if "dbe…
stelfrag Jun 3, 2024
fd5856f
Add dbengine_tier label
stelfrag Jun 3, 2024
aa4463a
Fix retention_percentage check
stelfrag Jun 3, 2024
6ff8431
Change to tier
stelfrag Jun 3, 2024
ba62e60
Time % calculation includes first datafile
stelfrag Jun 4, 2024
5afba82
Add disk space used by metadata in node_instances
stelfrag Jun 4, 2024
f209cd7
Revent changes to node_instances API
stelfrag Jun 5, 2024
71fabbd
Do not take into account metadata diskspace usage for now
stelfrag Jun 5, 2024
11b44d3
Add note for the disk space usage calculation in dbengine_tier_retent…
stelfrag Jun 5, 2024
178143f
Restore update every checks
stelfrag Jun 5, 2024
3387c00
Check backfill option before tier disk space config to reorder it in …
stelfrag Jun 5, 2024
aaee15c
Check tier iteration option before the disk space config per tier
stelfrag Jun 5, 2024
044669e
Check if legacy dbengine multihost disk space MB option is set before…
stelfrag Jun 6, 2024
8c07067
Fix warning
stelfrag Jun 6, 2024
7164004
Allow dbengine tier 0 disk space MB to be zero and not default to 256…
stelfrag Jun 6, 2024
c2a00d7
Check for options in global section
stelfrag Jun 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
56 changes: 42 additions & 14 deletions src/daemon/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,24 @@ static void backwards_compatible_config() {
config_move(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space",
CONFIG_SECTION_DB, "dbengine multihost disk space MB");

config_move(CONFIG_SECTION_DB, "dbengine disk space MB",
CONFIG_SECTION_DB, "dbengine multihost disk space MB");

config_move(CONFIG_SECTION_DB, "dbengine multihost disk space MB",
CONFIG_SECTION_DB, "dbengine tier 0 disk space MB");

config_move(CONFIG_SECTION_DB, "dbengine tier 1 multihost disk space MB",
CONFIG_SECTION_DB, "dbengine tier 1 disk space MB");

config_move(CONFIG_SECTION_DB, "dbengine tier 2 multihost disk space MB",
CONFIG_SECTION_DB, "dbengine tier 2 disk space MB");

config_move(CONFIG_SECTION_DB, "dbengine tier 3 multihost disk space MB",
CONFIG_SECTION_DB, "dbengine tier 3 disk space MB");

config_move(CONFIG_SECTION_DB, "dbengine tier 4 multihost disk space MB",
CONFIG_SECTION_DB, "dbengine tier 4 disk space MB");

config_move(CONFIG_SECTION_GLOBAL, "memory deduplication (ksm)",
CONFIG_SECTION_DB, "memory deduplication (ksm)");

Expand Down Expand Up @@ -1099,7 +1117,14 @@ static int get_hostname(char *buf, size_t buf_size) {
return gethostname(buf, buf_size);
}

static void get_netdata_configured_variables() {
static void get_netdata_configured_variables()
{
legacy_multihost_db_space = config_exists(CONFIG_SECTION_DB, "dbengine multihost disk space MB");
if (!legacy_multihost_db_space)
legacy_multihost_db_space = config_exists(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space");
if (!legacy_multihost_db_space)
legacy_multihost_db_space = config_exists(CONFIG_SECTION_GLOBAL, "dbengine disk space");

backwards_compatible_config();

// ------------------------------------------------------------------------
Expand Down Expand Up @@ -1201,20 +1226,23 @@ static void get_netdata_configured_variables() {

// ------------------------------------------------------------------------
// get default Database Engine disk space quota in MiB
//
// // if (!config_exists(CONFIG_SECTION_DB, "dbengine disk space MB") && !config_exists(CONFIG_SECTION_DB, "dbengine multihost disk space MB"))
//
// default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
// if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
// netdata_log_error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB);
// default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB;
// config_set_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
// }
//
// default_multidb_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", compute_multidb_diskspace());
// if(default_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
// netdata_log_error("Invalid multidb disk space %d given. Defaulting to %d.", default_multidb_disk_quota_mb, default_rrdeng_disk_quota_mb);
// default_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb;
// config_set_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", default_multidb_disk_quota_mb);
// }

default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
netdata_log_error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB);
default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB;
config_set_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
}

default_multidb_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", compute_multidb_diskspace());
if(default_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
netdata_log_error("Invalid multidb disk space %d given. Defaulting to %d.", default_multidb_disk_quota_mb, default_rrdeng_disk_quota_mb);
default_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb;
config_set_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", default_multidb_disk_quota_mb);
}
#else
if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead.");
Expand Down
2 changes: 2 additions & 0 deletions src/daemon/service.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,8 @@ void *service_main(void *ptr)
}
real_step = USEC_PER_SEC;

dbengine_retention_statistics();

svc_rrd_cleanup_obsolete_charts_from_all_hosts();

if (service_running(SERVICE_MAINTENANCE))
Expand Down
57 changes: 53 additions & 4 deletions src/database/contexts/api_v2.c
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,30 @@ void buffer_json_query_timings(BUFFER *wb, const char *key, struct query_timings

void build_info_to_json_object(BUFFER *b);

static void convert_seconds_to_dhms(time_t seconds, char *result, int result_size) {
int days, hours, minutes;

days = (int) (seconds / (24 * 3600));
seconds = (int) (seconds % (24 * 3600));
hours = (int) (seconds / 3600);
seconds %= 3600;
minutes = (int) (seconds / 60);
seconds %= 60;

// Format the result into the provided string buffer
BUFFER *buf = buffer_create(128, NULL);
if (days)
buffer_sprintf(buf,"%d day%s%s", days, days==1 ? "" : "s", hours || minutes ? ", " : "");
if (hours)
buffer_sprintf(buf,"%d hour%s%s", hours, hours==1 ? "" : "s", minutes ? ", " : "");
if (minutes)
buffer_sprintf(buf,"%d minute%s%s", minutes, minutes==1 ? "" : "s", seconds ? ", " : "");
if (seconds)
buffer_sprintf(buf,"%d second%s", (int) seconds, seconds==1 ? "" : "s");
strncpyz(result, buffer_tostring(buf), result_size);
buffer_free(buf);
}

void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now_s, bool info, bool array) {
if(!now_s)
now_s = now_realtime_sec();
Expand Down Expand Up @@ -1151,11 +1175,15 @@ void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now
buffer_json_cloud_status(wb, now_s);

buffer_json_member_add_array(wb, "db_size");
size_t group_seconds = localhost->rrd_update_every;
for (size_t tier = 0; tier < storage_tiers; tier++) {
STORAGE_ENGINE *eng = localhost->db[tier].eng;
if (!eng) continue;

group_seconds *= storage_tiers_grouping_iterations[tier];
uint64_t max = storage_engine_disk_space_max(eng->seb, localhost->db[tier].si);
if (!max)
max = get_directory_free_bytes_space(multidb_ctx[tier]);
uint64_t used = storage_engine_disk_space_used(eng->seb, localhost->db[tier].si);
time_t first_time_s = storage_engine_global_first_time_s(eng->seb, localhost->db[tier].si);
size_t currently_collected_metrics = storage_engine_collected_metrics(eng->seb, localhost->db[tier].si);
Expand All @@ -1168,6 +1196,10 @@ void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now

buffer_json_add_array_item_object(wb);
buffer_json_member_add_uint64(wb, "tier", tier);
char human_retention[128];
convert_seconds_to_dhms((time_t) group_seconds, human_retention, sizeof(human_retention) - 1);
buffer_json_member_add_string(wb, "point_every", human_retention);

buffer_json_member_add_uint64(wb, "metrics", storage_engine_metrics(eng->seb, localhost->db[tier].si));
buffer_json_member_add_uint64(wb, "samples", storage_engine_samples(eng->seb, localhost->db[tier].si));

Expand All @@ -1178,13 +1210,30 @@ void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now
}

if(first_time_s) {
time_t retention = now_s - first_time_s;

buffer_json_member_add_time_t(wb, "from", first_time_s);
buffer_json_member_add_time_t(wb, "to", now_s);
buffer_json_member_add_time_t(wb, "retention", now_s - first_time_s);
buffer_json_member_add_time_t(wb, "retention", retention);

convert_seconds_to_dhms(retention, human_retention, sizeof(human_retention) - 1);
buffer_json_member_add_string(wb, "retention_human", human_retention);

if(used || max) // we have disk space information
buffer_json_member_add_time_t(wb, "expected_retention",
(time_t) ((NETDATA_DOUBLE) (now_s - first_time_s) * 100.0 / percent));
if(used || max) { // we have disk space information
time_t time_retention = multidb_ctx[tier]->config.max_retention_s;
time_t space_retention = (time_t)((NETDATA_DOUBLE)(now_s - first_time_s) * 100.0 / percent);
time_t actual_retention = MIN(space_retention, time_retention ? time_retention : space_retention);

if (time_retention) {
convert_seconds_to_dhms(time_retention, human_retention, sizeof(human_retention) - 1);
buffer_json_member_add_time_t(wb, "requested_retention", time_retention);
buffer_json_member_add_string(wb, "requested_retention_human", human_retention);
}

convert_seconds_to_dhms(actual_retention, human_retention, sizeof(human_retention) - 1);
buffer_json_member_add_time_t(wb, "expected_retention", actual_retention);
buffer_json_member_add_string(wb, "expected_retention_human", human_retention);
}
}

if(currently_collected_metrics)
Expand Down
2 changes: 1 addition & 1 deletion src/database/engine/datafile.c
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ int init_data_files(struct rrdengine_instance *ctx)
if (ctx->loading.create_new_datafile_pair)
create_new_datafile_pair(ctx, false);

stelfrag marked this conversation as resolved.
Show resolved Hide resolved
while(rrdeng_ctx_exceeded_disk_quota(ctx))
while(rrdeng_ctx_tier_cap_exceeded(ctx))
datafile_delete(ctx, ctx->datafiles.first, false, false);
}

Expand Down
11 changes: 11 additions & 0 deletions src/database/engine/journalfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ static void journalfile_restore_extent_metadata(struct rrdengine_instance *ctx,
}

time_t now_s = max_acceptable_collected_time();
stelfrag marked this conversation as resolved.
Show resolved Hide resolved
time_t extent_first_time_s = journalfile->v2.first_time_s ? journalfile->v2.first_time_s : LONG_MAX;
for (i = 0; i < count ; ++i) {
nd_uuid_t *temp_id;
uint8_t page_type = jf_metric_data->descr[i].type;
Expand Down Expand Up @@ -728,8 +729,18 @@ static void journalfile_restore_extent_metadata(struct rrdengine_instance *ctx,
journalfile->datafile,
jf_metric_data->extent_offset, jf_metric_data->extent_size, jf_metric_data->descr[i].page_length);

extent_first_time_s = MIN(extent_first_time_s, vd.start_time_s);

mrg_metric_release(main_mrg, metric);
}

journalfile->v2.first_time_s = extent_first_time_s;

time_t old = __atomic_load_n(&ctx->atomic.first_time_s, __ATOMIC_RELAXED);;
do {
if(old <= extent_first_time_s)
break;
} while(!__atomic_compare_exchange_n(&ctx->atomic.first_time_s, &old, extent_first_time_s, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
}

/*
Expand Down
9 changes: 0 additions & 9 deletions src/database/engine/pagecache.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,6 @@ struct page_descr_with_data {
} link;
};

#define PAGE_INFO_SCRATCH_SZ (8)
struct rrdeng_page_info {
uint8_t scratch[PAGE_INFO_SCRATCH_SZ]; /* scratch area to be used by page-cache users */

usec_t start_time_ut;
usec_t end_time_ut;
uint32_t page_length;
};

struct pg_alignment {
uint32_t refcount;
};
Expand Down