aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYunchih Chen <yunchih.cat@gmail.com>2018-12-10 22:49:37 +0800
committerYunchih Chen <yunchih.cat@gmail.com>2019-03-07 15:02:17 +0800
commitafbf0023413af9f249ff5033029d242515354913 (patch)
tree31522cbb2a8b147b3e9835f913681bf7a159f654
parentfa55b263ffe043ea38f5f62b504630fba44b9469 (diff)
downloadnfcollect-afbf0023413af9f249ff5033029d242515354913.tar
nfcollect-afbf0023413af9f249ff5033029d242515354913.tar.gz
nfcollect-afbf0023413af9f249ff5033029d242515354913.tar.bz2
nfcollect-afbf0023413af9f249ff5033029d242515354913.tar.lz
nfcollect-afbf0023413af9f249ff5033029d242515354913.tar.xz
nfcollect-afbf0023413af9f249ff5033029d242515354913.tar.zst
nfcollect-afbf0023413af9f249ff5033029d242515354913.zip
Improve garbage collection
-rw-r--r--bin/nfcollect.c14
-rw-r--r--include/main.h4
-rw-r--r--lib/commit.c43
-rw-r--r--lib/sql.c26
4 files changed, 65 insertions, 22 deletions
diff --git a/bin/nfcollect.c b/bin/nfcollect.c
index 3ffd3fa..52791f8 100644
--- a/bin/nfcollect.c
+++ b/bin/nfcollect.c
@@ -101,6 +101,9 @@ int main(int argc, char *argv[]) {
case 's':
storage_size = atoi(optarg);
break;
+ case 'V':
+ do_vacuum = true;
+ break;
case '?':
fprintf(stderr, "Unknown argument, see --help\n");
exit(1);
@@ -122,9 +125,18 @@ int main(int argc, char *argv[]) {
if (signal(SIGHUP, sig_handler) == SIG_ERR)
ERROR("Could not set SIGHUP handler");
+ // Vacuum and get current space consumption
+ if (do_vacuum && check_file_exist(storage)) {
+ INFO(PACKAGE ": vacuum database on startup");
+ sqlite3 *db = NULL;
+ db_open(&db, storage);
+ db_vacuum(db);
+ db_close(db);
+ }
+
pthread_mutex_init(&g.storage_consumed_lock, NULL);
g.storage_budget = storage_size * 1024 * 1024; // MB
- g.storage_consumed = 0;
+ g.storage_consumed = check_file_size(storage);
g.storage_file = (const char *)storage;
g.max_nr_entries = g_max_nr_entries_default;
diff --git a/include/main.h b/include/main.h
index f64fe46..efcfc70 100644
--- a/include/main.h
+++ b/include/main.h
@@ -42,9 +42,11 @@
#define g_sqlite_table_header "nfcollect_v1_header"
#define g_sqlite_table_data "nfcollect_v1_data"
#define g_sqlite_nr_fail_retry 8
+// Number of blocks recycled at each GC when space is depleted
#define g_gc_rate 16
+#define g_gc_cap 0.85
// Default number of packets stored in a block
-#define g_max_nr_entries_default (256*1024/24)
+#define g_max_nr_entries_default (256 * 1024 / 24)
#ifdef DEBUG_OUTPUT
#define DEBUG_ON 1
#else
diff --git a/lib/commit.c b/lib/commit.c
index 45d4d73..b61b05b 100644
--- a/lib/commit.c
+++ b/lib/commit.c
@@ -1,21 +1,50 @@
#include "collect.h"
#include "main.h"
#include "sql.h"
+#include "util.h"
#include <zstd.h>
static void do_gc(sqlite3 *db, State *s) {
- uint32_t cur_size = s->header->raw_size;
+ int64_t cur_size = (int64_t)s->header->raw_size;
pthread_mutex_lock(&s->global->storage_consumed_lock);
- uint32_t remain_size =
+ int64_t remain_size =
s->global->storage_budget - s->global->storage_consumed - cur_size;
- uint32_t gc_size = -remain_size + cur_size * g_gc_rate;
- if (gc_size >= s->global->storage_consumed)
- gc_size = s->global->storage_consumed;
+ int64_t gc_size = 0;
+ if (remain_size <= 0) {
+ gc_size = -remain_size + cur_size * g_gc_rate;
+ if (gc_size >= s->global->storage_consumed)
+ gc_size = s->global->storage_consumed * g_gc_cap;
+ else if (gc_size >= s->global->storage_budget * g_gc_cap)
+ gc_size = s->global->storage_budget * g_gc_cap;
+ }
+ DEBUG("do_gc: gc_size %.2f KB, remain %.2f KB, cur_size, %.2f KB\n",
+ gc_size / 1024.0, remain_size / 1024.0, cur_size / 1024.0);
pthread_mutex_unlock(&s->global->storage_consumed_lock);
- if (remain_size <= 0)
- db_delete_oldest_bytes(db, gc_size);
+ uint32_t gc_count = 0;
+ if (gc_size > 0) {
+ gc_count = db_delete_oldest_bytes(db, gc_size);
+ db_vacuum(db);
+ }
+
+ int64_t dbsize = check_file_size(s->global->storage_file);
+ pthread_mutex_lock(&s->global->storage_consumed_lock);
+ s->global->storage_consumed = dbsize;
+ pthread_mutex_unlock(&s->global->storage_consumed_lock);
+
+ if (gc_count) {
+ INFO("gc: storage budget: %.2f MB, storage consumed: %.2f MB, (%.2f "
+ "MB/%d entries) vacuumed",
+ s->global->storage_budget / 1024.0 / 1024.0,
+ s->global->storage_consumed / 1024.0 / 1024.0,
+ gc_size / 1024.0 / 1024.0, gc_count);
+ } else {
+ DEBUG("gc: storage budget: %.2f MB, storage consumed: %.2f MB, skip "
+ "vacuuming",
+ s->global->storage_budget / 1024.0 / 1024.0,
+ s->global->storage_consumed / 1024.0 / 1024.0);
+ }
}
static int commit_lz4(State *s, void **buf) {
diff --git a/lib/sql.c b/lib/sql.c
index 1b4df89..b7ebcaf 100644
--- a/lib/sql.c
+++ b/lib/sql.c
@@ -220,7 +220,8 @@ int db_delete_oldest_bytes(sqlite3 *db, int64_t bytes) {
if (!bytes)
return 0;
- db_exec_fatal(db, "BEGIN TRANSACTION", "db_delete: Can't begin txn");
+ db_exec_fatal(db, "BEGIN TRANSACTION",
+ "db_delete_oldest_byte: Can't begin txn");
rc = sqlite3_prepare_v2(db, select_sql, -1, &stmt, 0);
if (rc != SQLITE_OK) {
ERROR("Can't select (%i): %s\n", rc, sqlite3_errmsg(db));
@@ -255,24 +256,23 @@ int db_delete_oldest_bytes(sqlite3 *db, int64_t bytes) {
}
strcat(buf, _buf);
- bytes -= size;
count++;
}
- if (!*buf)
- return 0;
+ if (count) {
+ const char *_delete_sql =
+ "DELETE FROM " g_sqlite_table_data " WHERE id in (%s);";
+ char *delete_sql = malloc(strlen(_delete_sql) + strlen(buf) + 1);
+ sprintf(delete_sql, _delete_sql, buf);
+ db_exec_fatal(db, delete_sql, "Can't delete");
+ DEBUG("Deleted old data, SQL: %s", delete_sql);
+ free(delete_sql);
+ }
- const char *_delete_sql =
- "DELETE FROM " g_sqlite_table_data " WHERE id in (%s);";
- char *delete_sql = malloc(strlen(_delete_sql) + strlen(buf) + 1);
- sprintf(delete_sql, _delete_sql, buf);
- db_exec_fatal(db, delete_sql, "Can't delete");
- DEBUG("Deleted old data, SQL: %s", delete_sql);
- free(delete_sql);
free(buf);
-
assert(SQLITE_SCHEMA != sqlite3_finalize(stmt));
- db_exec_fatal(db, "END TRANSACTION", "db_begin: Can't end txn");
+ db_exec_fatal(db, "END TRANSACTION",
+ "db_delete_oldest_byte: Can't end txn");
return count;
}