From 716599d9d7eb17b999c33e0668fc8dbf3a69839f Mon Sep 17 00:00:00 2001 From: Not Zed Date: Fri, 18 Jun 2021 11:59:05 +0930 Subject: [PATCH] Indexing improved to use per-directory change resolution. --- TODO | 3 -- dbindex.c | 116 +++++++++++++++++++++++++++++++++++++++++++------ dbindex.h | 12 +++++ disk-indexer.c | 110 +++++++++++++++++++++------------------------- 4 files changed, 164 insertions(+), 77 deletions(-) diff --git a/TODO b/TODO index 23e767f..98db9f7 100644 --- a/TODO +++ b/TODO @@ -34,9 +34,6 @@ o web frontend + can use the shuffle code again o internals - - kill ez_bitset and find an alternative merge algorithm for indexer - - incremental indexing add/remove - - can simply be done on a per-directory basis - incremental all-playlist and all-shuffle? - add items as scanned? - when you add an item to a playlist, randomly swap it's order with an existing item diff --git a/dbindex.c b/dbindex.c index 7cc872e..c2876a0 100644 --- a/dbindex.c +++ b/dbindex.c @@ -88,7 +88,8 @@ struct dbindex { MDB_dbi list_by_name; MDB_dbi file; - MDB_dbi file_by_path; // key is "diskid{hex}/path" UNIQUE TODO: limited to 511 bytes length + MDB_dbi file_by_dir; // key is "diskid{hex}/path" UNIQUE TODO: limited to 511 bytes length + MDB_dbi file_by_path; // key is "diskid{hex}/path/name" UNIQUE TODO: limited to 511 bytes length MDB_dbi file_by_disk; // key is diskid FOREIGN MDB_dbi file_by_title; // key is title (maybe all lower case?) MDB_dbi file_by_artist; // key is artist @@ -189,6 +190,7 @@ dbindex *dbindex_open(const char *ipath) { res |= mdb_dbi_open(tx, "list#name", MDB_CREATE, &db->list_by_name); res |= mdb_dbi_open(tx, "file", MDB_CREATE | MDB_INTEGERKEY, &db->file); + res |= mdb_dbi_open(tx, "file#dir", MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &db->file_by_dir); res |= mdb_dbi_open(tx, "file#path", MDB_CREATE, &db->file_by_path); res |= mdb_dbi_open(tx, "file#disk", MDB_CREATE | MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP , &db->file_by_disk); res |= mdb_dbi_open(tx, "file#title", MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &db->file_by_title); @@ -621,6 +623,72 @@ fail1: return is; } +static int dbfile_node_cmp(const void *ap, const void *bp) { + const struct dbfile_node *a = ap; + const struct dbfile_node *b = bp; + + return strcmp(a->path, b->path); +} + +void dbfile_node_free(struct dbfile_node *n) { + if (n) { + dbfile_free(n->file); + free(n); + } +} + +// function name/syntax? +// path must be of the form "/foo/bar" +int dbfile_node_scan_path(dbtxn *tx, dbindex *db, dbid_t diskid, const char *path, ez_tree *tree) { + MDB_val key, dat; + MDB_cursor *cursor; + size_t len = strlen(path) + 8; + int res; + char start[len + 1]; + + ez_tree_init(tree, dbfile_node_cmp); + + if ((res = mdb_cursor_open(tx, db->file_by_dir, &cursor))) + goto fail; + + sprintf(start, "%08x%s", diskid, path); + + // path must end in / + // scan all items >= path where there is only 1 / more + + printf("@ dir '%s'\n", start); + + key.mv_data = start; + key.mv_size = len; + + if ((res = mdb_cursor_get(cursor, &key, &dat, MDB_SET)) == 0) + res = mdb_cursor_get(cursor, &key, &dat, MDB_GET_MULTIPLE); + while (res == 0) { + size_t count = dat.mv_size / sizeof(dbid_t); + dbid_t *files = dat.mv_data; + + for (int i=0;ifile = dbfile_get(tx, db, files[i]); + node->path = node->file->path; + ez_tree_put(tree, node); + + printf("%4d: path '%.*s'\n", *(dbid_t *)dat.mv_data, (int)key.mv_size, (char *)key.mv_data); + } + res = mdb_cursor_get(cursor, &key, &dat, MDB_NEXT_MULTIPLE); + } + printf("res=%d '%s'\n", res, mdb_strerror(res)); + + mdb_cursor_close(cursor); +fail: + res = res == MDB_NOTFOUND ? 0 : res; + + if (res != 0) + ez_tree_clear(tree, (void(*)(void *))dbfile_node_free); + return res; +} + void dbfile_free(dbfile *f) { if (f) { ez_blob_free(DBFILE_DESC, f); @@ -636,7 +704,7 @@ dbfile *dbfile_get(dbtxn *tx, dbindex *db, int fileid) { db->res = mdb_get(tx, db->file, &key, &data); - printf("dbfile_get(%d) = %d\n", fileid, db->res); + //printf("dbfile_get(%d) = %d\n", fileid, db->res); if (db->res == 0) { dbfile *p = calloc(1, sizeof(*p)); @@ -689,19 +757,28 @@ static int hist_equals(const void *p, const void *q) { int dbfile_del(dbtxn *tx, dbindex *db, dbfile *f) { MDB_val key, dat; int res; + char *dpath = NULL; // Remove secondary keys / constraints dat.mv_data = &f->id; dat.mv_size = sizeof(f->id); - // - by disk+path (and unique constraint) - char *dpath = dbfile_path(f); + // - by disk+dir+name (and unique constraint) + dpath = dbfile_path(f); key.mv_data = dpath; key.mv_size = strlen(dpath); - res = mdb_del(tx, db->file_by_path, &key, NULL); - free(dpath); - if (res) + if (res = mdb_del(tx, db->file_by_path, &key, NULL)) + goto fail; + + // - by disk+dir + char *tmp = strrchr(dpath, '/'); + if (!tmp) { + res = EINVAL; + goto fail; + } + key.mv_size = tmp - dpath; + if (res = mdb_del(tx, db->file_by_dir, &key, NULL)) goto fail; // - by diskid @@ -814,6 +891,7 @@ int dbfile_del(dbtxn *tx, dbindex *db, dbfile *f) { ez_array_clear(&array); } fail: + free(dpath); return res; } @@ -889,6 +967,7 @@ fail: int dbfile_add(MDB_txn *tx, dbindex *db, dbfile *f) { MDB_val key, data; + char *dpath = NULL; int res; // Check foreign constraints @@ -916,18 +995,28 @@ int dbfile_add(MDB_txn *tx, dbindex *db, dbfile *f) { data.mv_data = &f->id; data.mv_size = sizeof(f->id); - // - by disk+path (and unique constraint) - char *dpath = dbfile_path(f); - + // - by disk+dir+name (and unique constraint) + dpath = dbfile_path(f); key.mv_data = dpath; key.mv_size = strlen(dpath); - res = mdb_put(tx, db->file_by_path, &key, &data, MDB_NOOVERWRITE); - free(dpath); - if (res) { + if (res = mdb_put(tx, db->file_by_path, &key, &data, MDB_NOOVERWRITE)) { fprintf(stderr, "UNIQUE: path on this disk exists\n"); goto fail; } + // - by disk+dir + char *tmp = strrchr(dpath, '/'); + if (!tmp) { + fprintf(stderr, "INTERNAL: no directory for file\n"); + res = EINVAL; + goto fail; + } + key.mv_size = tmp - dpath; + if (res = mdb_put(tx, db->file_by_dir, &key, &data, MDB_NODUPDATA)) { + fprintf(stderr, "UNIQUE: file on this path exists\n"); + goto fail; + } + // - by diskid key.mv_data = &f->diskid; key.mv_size = sizeof(f->diskid); @@ -951,6 +1040,7 @@ int dbfile_add(MDB_txn *tx, dbindex *db, dbfile *f) { } fail: + free(dpath); return res; } diff --git a/dbindex.h b/dbindex.h index f668ff5..835c7f0 100644 --- a/dbindex.h +++ b/dbindex.h @@ -17,6 +17,7 @@ . */ +#include "ez-tree.h" #include "ez-blob.h" #include @@ -101,6 +102,13 @@ typedef struct dbindex dbindex; typedef struct MDB_txn dbtxn; typedef struct dbscan dbscan; +// for storing files in a tree +struct dbfile_node { + ez_node tn; + const char *path; // copy of file->path for key lookup + dbfile *file; +}; + // database location, default is ~/.local/lib/playerz/db. free after use. char *dbindex_home(void); @@ -133,6 +141,10 @@ int dbfile_update(dbtxn *txn, dbindex *db, dbfile *o, dbfile *f); int dbfile_inlist(dbtxn *tx, dbindex *db, dbid_t fileid, dbid_t listid); +// struct dbfile_node things +void dbfile_node_free(struct dbfile_node *p); +int dbfile_node_scan_path(dbtxn *tx, dbindex *db, dbid_t diskid, const char *path, ez_tree *tree); + // TBD? seems not dbscan *dbfile_scan_disk(dbtxn *tx, dbindex *db, int diskid); uint32_t dbfile_scan_next(dbscan *scan); diff --git a/disk-indexer.c b/disk-indexer.c index 354916d..ff0f804 100644 --- a/disk-indexer.c +++ b/disk-indexer.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -34,12 +35,13 @@ #include "ez-list.h" #include "ez-set.h" -#include "ez-bitset.h" #include "dbindex.h" #include "notify.h" +#define HAVE_FSTATAT 1 + struct indexer { char *root; @@ -54,8 +56,6 @@ struct indexer { ez_list queue; struct dirent *entry; - ez_bitset *existing; - // files to care about regex_t match; @@ -229,27 +229,7 @@ int indexer_init(struct indexer *ix, dbindex *db, const char *path, const char * printf("%8d : add new disk %s\n", disk->id, uuid); ix->disk = disk; } else { - int count = 0; - dbtxn *tx = dbindex_begin(db, NULL, 1); - - if (tx) { - dbscan *scan = dbfile_scan_disk(tx, db, ix->disk->id); - uint32_t fid; - - if (scan) { - ix->existing = ez_bitset_new(); - - while ((fid = dbfile_scan_next(scan)) != ~0) { - count++; - ez_bitset_set(ix->existing, fid, 1); - } - dbfile_scan_close(scan); - } - dbindex_commit(tx); - printf("bitset count %d actual count %d\n", ez_bitset_card(ix->existing), count); - } - - printf("%8d : add old disk %s (%d existing files)\n", ix->disk->id, uuid, count); + printf("%8d : add old disk %s\n", ix->disk->id, uuid); } // FIXME: error handling @@ -265,9 +245,6 @@ int indexer_init(struct indexer *ix, dbindex *db, const char *path, const char * } void indexer_destroy(struct indexer *ix) { - if (ix->existing) - ez_bitset_free(ix->existing); - dbdisk_free(ix->disk); struct dir_node *scan; @@ -315,21 +292,31 @@ in-place: */ // Add or update file -int indexer_add_file(struct indexer *ix, struct stat *st, const char *filepath, const char *diskpath) { +int indexer_add_file(struct indexer *ix, struct stat *st, const char *filepath, const char *diskpath, ez_tree *files) { dbfile *o = NULL, *f; + int res; - // If already there, and unchanged, do nothing - if (ix->existing) { - o = dbfile_get_path(ix->tx, ix->db, ix->disk->id, diskpath); + struct dbfile_node key = { .path = diskpath }; + struct dbfile_node *n; + printf(" add '%s'\n", key.path); + n = ez_tree_get(files, &key); + if (n) { + o = n->file; if (o) { - ez_bitset_set(ix->existing, o->id, 0); + printf(" exists %zd == %zd? %ld == %ld?\n", + o->size, st->st_size, + o->mtime, st->st_mtime); + // exists + ez_tree_remove(files, &key); if (o->size == st->st_size && o->mtime == st->st_mtime) { - dbfile_free(o); ix->unchanged += 1; + dbfile_node_free(n); return 0; } } + } else { + printf(" new file\n"); } // Get or update metadata @@ -346,17 +333,17 @@ int indexer_add_file(struct indexer *ix, struct stat *st, const char *filepath, f->mtime = st->st_mtime; if (o) { - dbfile_update(ix->tx, ix->db, o, f); + res = dbfile_update(ix->tx, ix->db, o, f); ix->updated += 1; } else { - dbfile_add(ix->tx, ix->db, f); + res = dbfile_add(ix->tx, ix->db, f); ix->added += 1; } dbfile_free(f); - dbfile_free(o); + dbfile_node_free(n); - return 0; + return res; } static void indexer_info(struct indexer *ix) { @@ -376,11 +363,17 @@ static void indexer_info(struct indexer *ix) { int indexer_scan(struct indexer *ix) { struct dir_node *scan = NULL; int count = 0, res = 0; + ez_tree files = { 0 }; gettimeofday(&ix->start, NULL); while ((scan = ez_list_remhead(&ix->queue))) { - //printf("scan %s\n", scan->path); + + printf("\nscan %s\n", scan->path); + res = dbfile_node_scan_path(ix->tx, ix->db, ix->disk->id, scan->path + strlen(ix->root), &files); + if (res != 0) + goto fail; + printf("existing: %d\n", ez_tree_size(&files)); DIR *d = opendir(scan->path); if (d) { @@ -404,13 +397,18 @@ int indexer_scan(struct indexer *ix) { sprintf(name, "%s/%s", scan->path, ep->d_name); - //printf(" %s\n", name); + printf(" %s\n", name); - // FIXME: use fstatat? - if ((res = lstat(name, &st)) == 0) { +#ifdef HAVE_FSTATAT + res = fstatat(dirfd(d), ep->d_name, &st, AT_SYMLINK_NOFOLLOW); +#else + res = lstat(name, &st); +#endif + if (res == 0) { if (S_ISREG(st.st_mode)) { if (regexec(&ix->match, ep->d_name, 0, NULL, 0) == 0) { - indexer_add_file(ix, &st, name, name + strlen(ix->root)); + if (res = indexer_add_file(ix, &st, name, name + strlen(ix->root), &files)) + goto fail; count++; if ((count % 1000) == 0) indexer_info(ix); @@ -435,27 +433,16 @@ int indexer_scan(struct indexer *ix) { goto fail; } dir_free(scan); - } - if (ix->existing) { - printf("stale count: %d\n", ez_bitset_card(ix->existing)); - ez_bitset_scan scan = { 0 }; - uint32_t bit; - - for (bit = ez_bitset_scan_init(ix->existing, &scan); bit != ~0; bit = ez_bitset_scan_next(&scan)) { - dbfile *f = dbfile_get(ix->tx, ix->db, bit); - - if (f) { - printf(" stale %s\n", f->path); - - dbfile_del(ix->tx, ix->db, f); - ix->removed += 1; - - dbfile_free(f); - } else { - printf(" ** db corrupt missing file: %d\n", bit); - } + // check for stale files @ path + printf("stale count: %d\n", ez_tree_size(&files)); + ez_tree_scan tscan; + for (struct dbfile_node *n = ez_tree_scan_init(&files, &tscan, EZ_LINK_RIGHT); n ; n = ez_tree_scan_next(&tscan)) { + printf(" stale %s\n", n->file->path); + dbfile_del(ix->tx, ix->db, n->file); + ix->removed += 1; } + ez_tree_clear(&files, (void(*)(void *))dbfile_node_free); } dbindex_commit(ix->tx); @@ -467,6 +454,7 @@ int indexer_scan(struct indexer *ix) { fail: if (scan) dir_free(scan); + ez_tree_clear(&files, (void(*)(void *))dbfile_node_free); dbindex_abort(ix->tx); -- 2.39.5