From 6a9bfc2e49194b9d6ce3f09dcee287e3c2d55f8b Mon Sep 17 00:00:00 2001 From: Not Zed Date: Thu, 10 Jun 2021 18:08:16 +0930 Subject: [PATCH] Work on internal playlist details and utilities. Split indexer-cmd out of indexer and removed the proto code too. --- .gitignore | 3 +- Makefile | 14 +- README | 58 +++++++- dbindex.c | 183 +++++++++++++++++--------- disk-indexer.c | 260 +----------------------------------- disk-util.c | 201 ---------------------------- index-util.c | 349 +++++++++++++++++++++++++++++++++++++++++++++++++ indexer-cmd.c | 34 +++++ 8 files changed, 571 insertions(+), 531 deletions(-) delete mode 100644 disk-util.c create mode 100644 index-util.c create mode 100644 indexer-cmd.c diff --git a/.gitignore b/.gitignore index c8ca3b7..ee71b93 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,8 @@ dbmarshal.[ch] audio-cmd disk-indexer disk-monitor -disk-util +index-util +indexer-cmd http-monitor input-monitor music-player diff --git a/Makefile b/Makefile index 45fe3da..bed492b 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,7 @@ CFLAGS+=$(foreach x,$(pkgs),$(CFLAGS_$(x))) LDFLAGS=$(foreach x,$(pkgs),$(LDFLAGS_$(x))) LDLIBS=$(foreach x,$(pkgs),$(LDLIBS_$(x))) -lrt -lpthread ../libeze/libeze.a -PROGS=disk-indexer disk-monitor audio-cmd music-player input-monitor http-monitor disk-util +PROGS=disk-indexer disk-monitor audio-cmd music-player input-monitor http-monitor index-util GENERATED=dbmarshal.c dbmarshal.h player.h @@ -46,7 +46,7 @@ all: $(PROGS) #input-monitor: input-monitor.o notify.o blobs.o dump: dump.o dbindex.o -#disk-util: disk-util.o dbindex.o dbmarshal.o +#index-util: index-util.o dbindex.o dbmarshal.o dbmarshal.h: blobs.o $(EZE)/ez-blob-compiler @@ -87,7 +87,7 @@ http-monitor.o: http-monitor.c player.h engine: engine.o #ez-blob-io.o http: http.o -bin_COMMANDS = disk-monitor disk-indexer audio-cmd music-player input-monitor http-monitor disk-util +bin_COMMANDS = disk-monitor disk-indexer indexer-cmd audio-cmd music-player input-monitor http-monitor index-util SOURCES= \ analyse.c \ @@ -97,17 +97,21 @@ SOURCES= \ dbmarshal.c \ disk-indexer.c \ disk-monitor.c \ - disk-util.c \ + index-util.c \ + indexer-cmd.c \ input-monitor.c \ music-player.c \ notify.c +all: $(bin_COMMANDS) + disk-monitor: disk-monitor.o dbindex.o dbmarshal.o blobs.o notify.o disk-indexer: disk-indexer.o dbindex.o dbmarshal.o blobs.o notify.o analyse.o +indexer-cmd: indexer-cmd.o notify.o blobs.o audio-cmd: audio-cmd.o notify.o blobs.o music-player: music-player.o notify.o dbindex.o dbmarshal.o blobs.o input-monitor: input-monitor.o notify.o blobs.o -disk-util: disk-util.o dbindex.o dbmarshal.o blobs.o +index-util: index-util.o dbindex.o dbmarshal.o blobs.o http-monitor: http-monitor.o dbindex.o notify.o dbmarshal.o blobs.o ../libeze/libeze.a dbindex.o: dbmarshal.h diff --git a/README b/README index b68d051..b7572c6 100644 --- a/README +++ b/README @@ -1,17 +1,67 @@ +disk-indexer +------------ + +Server to index disks. + +Environment + + -d path Path to database. + +Start Server + + All known disks will be (re)scanned and updated to reflect their current + collection of music files. + +indexer-cmd +----------- + +Send commands to disk indexer. + + check Perform some consistency checks on the database. + + shuffle (re)create the shuffle playlist. + + add uuid path Add a new disk or rescan an existing disk. + + quit Tell the server to shut down. + +disk-util +--------- + +Environment & Object Selectors + + -d path Path to database. + + -f fid File ID + -d diskid Disk ID + -s seq Sequence Number + +Commands + + shuffle Shuffle all files to the 'shuffle' playlist + file-dump noop + files Dump files + lists Show list names + disks Show disk names + Design Thoughts -------------- Overall playlist behaviour? - - Need a "current playlist" - - When a playlist finishes, go back to the all playlist. - - Also need a "scratch playlist" + - priority is + - play now file + - jukebox playlist (add to playing) + - requested playlist + - default playlist + + - junk playlist - or junk flag? System playlists * default / all:shuffle - All tracks, shuffled. The default playlist. + All tracks, (shuffled?). The default playlist. * queue User selected tracks, in order. diff --git a/dbindex.c b/dbindex.c index 283f696..a74d559 100644 --- a/dbindex.c +++ b/dbindex.c @@ -71,11 +71,11 @@ TODO: playlist should be linked list Alternative: - forward: list_by_file [list.id] -> [seq][file.id] with custom dupsort compare - reverse: file_by_list [file.id] -> [list.id][seq] + forward: file_by_list [list.id] -> [seq][file.id] with custom dupsort compare on [seq] only + reverse: list_by_file [file.id] -> [list.id][seq] reverse is required to navigate the playlist properly if the sequence order changes. - alternative idea: just use the shuffle list as the playlist always? + alternative idea: just use the shuffle list as the playlist always and copy it in when necessary */ /* Value stored in file-by-list */ @@ -114,7 +114,7 @@ struct dbindex { MDB_dbi file_by_suffix; - // ? maybe it should be a playlist ? + // LEGACY - TBD ? maybe it should be a playlist ? MDB_dbi shuffle; // seq to file MDB_dbi shuffle_by_file;// file to seq FOREIGN @@ -144,16 +144,12 @@ static uint32_t find_next_id(MDB_txn *tx, MDB_dbi db) { int r; uint32_t id = 1; - printf("find last value of db\n"); mdb_cursor_open(tx, db, &cursor); r = mdb_cursor_get(cursor, &key, &data, MDB_LAST); if (r == 0) { assert(key.mv_size == sizeof(id)); memcpy(&id, key.mv_data, sizeof(id)); - printf("found, was %d\n", id); id += 1; - } else { - printf("not found (%d), using %d\n", r, id); } mdb_cursor_close(cursor); @@ -234,7 +230,7 @@ dbindex *dbindex_open(const char *ipath) { db->listid = find_next_id(tx, db->list); db->fileid = find_next_id(tx, db->file); - { + if (0) { MDB_cursor *cursor; MDB_val key = { 0 }, data = { 0 }; int r; @@ -265,7 +261,7 @@ dbindex *dbindex_open(const char *ipath) { db = NULL; } - printf("dbindex open, disk.id=%d list.id=%d file.id=%d\n", db->diskid, db->listid, db->fileid); + printf("index open: disk.id=%d list.id=%d file.id=%d\n", db->diskid, db->listid, db->fileid); free(dpath); return db; @@ -1456,6 +1452,8 @@ int dblist_del(dbtxn *txn, dbindex *db, int listid) { MDB_cursor *cursor; int res; + // TODO: deleting the reverse list can perform GET_BOTH_RANGE i think + mdb_txn_begin(db->env, txn, 0, &tx); dblist_dump(tx, db); @@ -1526,9 +1524,9 @@ int dblist_add_file(MDB_txn *txn, dbindex *db, dblist *d, int fileid) { data.mv_data = &fvalue; data.mv_size = sizeof(fvalue); - printf("put file by list: listid = %d { seq = %d fileid = %d }\n", d->id, fvalue.seq, fvalue.fileid); + printf("put file by list: { listid = %d } <- { seq = %d fileid = %d }\n", d->id, fvalue.seq, fvalue.fileid); - if ((res = mdb_put(tx, db->file_by_list, &key, &data, MDB_NOOVERWRITE | MDB_NODUPDATA))) + if ((res = mdb_put(tx, db->file_by_list, &key, &data, MDB_NODUPDATA))) goto fail; key.mv_data = &fileid; @@ -1538,7 +1536,7 @@ int dblist_add_file(MDB_txn *txn, dbindex *db, dblist *d, int fileid) { printf("put list by file: fileid = %d { listid = %d .seq = %d }\n", fileid, rvalue.listid, rvalue.seq); - if ((res = mdb_put(tx, db->list_by_file, &key, &data, MDB_NOOVERWRITE | MDB_NODUPDATA))) + if ((res = mdb_put(tx, db->list_by_file, &key, &data, MDB_NODUPDATA))) goto fail; // update list record with changed size @@ -1619,63 +1617,93 @@ int dblist_del_file(MDB_txn *txn, dbindex *db, struct dblistcursor *list) { struct dbfilelist fvalue = { .seq = list->seq, .fileid = list->fileid }; struct dblistfile rvalue = { .listid = list->listid, .seq = list->seq }; - dblist_dump(txn, db); + printf("list_del_file: lid=%4d seq=%4d fid=%4d\n", list->listid, list->seq, list->fileid); - key.mv_data = &list->listid; - key.mv_size = sizeof(list->listid); - if (res = mdb_get(txn, db->list, &key, &data)) + if (res = mdb_txn_begin(db->env, txn, 0, &tx)) goto fail0; - // find list:seq from list_by_file - // ... + int delcursor = 0; + int delfile = 0; + int dellist = 0; - printf("delete @ %d from list %d\n", list->seq, list->listid); + if (list->seq == 0) { + // No sequence, lookup (first) fileid for the list + if (res = mdb_cursor_open(tx, db->list_by_file, &cursor)) + goto fail; - if (res = mdb_txn_begin(db->env, txn, 0, &tx)) - goto fail0; + key.mv_data = &list->fileid; + key.mv_size = sizeof(list->fileid); + data.mv_data = &rvalue; + data.mv_size = sizeof(rvalue); - // Delete forward and reverse based on all parameters + if (res = mdb_cursor_get(cursor, &key, &data, MDB_GET_BOTH_RANGE)) + goto fail; - key.mv_data = &list->listid; - key.mv_size = sizeof(list->listid); - data.mv_data = &fvalue; - data.mv_size = sizeof(fvalue); + fvalue.seq = list->seq = ((struct dblistfile *)data.mv_data)->seq; - if (res = mdb_cursor_open(tx, db->file_by_list, &cursor)) - goto fail1; + printf("list_del_file: found seq=%4d\n", list->seq); - printf(" lookup listid %d seq %d\n", list->listid, fvalue.seq); - if (res = mdb_cursor_get(cursor, &key, &data, MDB_GET_BOTH)) - goto fail; + delcursor = 1; + delfile = 1; + } else if (list->fileid == 0) { + // Lookup fileid for list[seq] + if (res = mdb_cursor_open(tx, db->file_by_list, &cursor)) + goto fail; - printf("file found %d list %d seq %d\n", ((struct dbfilelist *)data.mv_data)->fileid, *(int*)key.mv_data, ((struct dbfilelist *)data.mv_data)->seq); + key.mv_data = &list->listid; + key.mv_size = sizeof(list->listid); + data.mv_data = &fvalue; + data.mv_size = sizeof(fvalue); - printf(" delete file by list\n"); + if (res = mdb_cursor_get(cursor, &key, &data, MDB_GET_BOTH)) + goto fail; - uint32_t fid = ((struct dbfilelist *)data.mv_data)->fileid; + list->fileid = ((struct dbfilelist *)data.mv_data)->fileid; - if (res = mdb_del(tx, db->file_by_list, &key, &data)) - goto fail; + printf("list_del_file: found fid=%4d\n", list->fileid); - key.mv_data = &fid; - key.mv_size = sizeof(fid); - data.mv_data = &rvalue; - data.mv_size = sizeof(rvalue); + delcursor = 1; + dellist = 1; + } else { + // use supplied values + delfile = 1; + dellist = 1; + } - printf(" delete list by file file=%d list=%d seq=%d\n", fid, rvalue.listid, rvalue.seq); - if (res = mdb_del(tx, db->list_by_file, &key, &data)) + if (delcursor && (res = mdb_cursor_del(cursor, 0))) goto fail; - mdb_cursor_close(cursor); + if (delfile) { + key.mv_data = &list->listid; + key.mv_size = sizeof(list->listid); + data.mv_data = &fvalue; + data.mv_size = sizeof(fvalue); + + if (res = mdb_del(tx, db->file_by_list, &key, &data)) + goto fail; + } + + if (dellist) { + key.mv_data = &list->fileid; + key.mv_size = sizeof(list->fileid); + data.mv_data = &rvalue; + data.mv_size = sizeof(rvalue); + + if (res = mdb_del(tx, db->list_by_file, &key, &data)) + goto fail; + } + + if (delcursor) + mdb_cursor_close(cursor); - dblist_dump(tx, db); mdb_txn_commit(tx); return 0; fail: printf("fail: %s\n", mdb_strerror(res)); - mdb_cursor_close(cursor); -fail1: + if (delcursor) + mdb_cursor_close(cursor); + mdb_txn_abort(tx); fail0: return res; @@ -2372,29 +2400,44 @@ dbfile *dbscan_list_entry(dbtxn *tx, dbscan *scan, dbindex *db, dbid_t listid, i if (listid == 0) { if (dbscan_init(tx, scan, db, db->file_by_path, DBFILE_DESC, dbfile_decode_raw) == 0) { - MDB_cursor *cursor; + if (fileid != 0) { + // If starting on a given file, first look it up to find the path start + MDB_cursor *cursor; + + dbscan_init_key(scan, fileid); - dbscan_init_key(scan, fileid); + // Get file or next file + scan->res = mdb_cursor_open(tx, db->file, &cursor); + scan->res = mdb_cursor_get(cursor, &scan->key, &scan->data, MDB_SET_RANGE); + mdb_cursor_close(cursor); + if (scan->res == 0) { + dbfile *file = dbscan_decode(scan); + + if (file) { + // position by-path cursor for scanning + char path[strlen(file->path) + 10]; + MDB_val key; - // Get file or next file - scan->res = mdb_cursor_open(tx, db->file, &cursor); - scan->res = mdb_cursor_get(cursor, &scan->key, &scan->data, MDB_SET_RANGE); - mdb_cursor_close(cursor); - if (scan->res == 0) { - dbfile *file = dbscan_decode(scan); + sprintf(path, "%08x%s", file->diskid, file->path); - if (file) { - // position by-path cursor for scanning - char path[strlen(file->path) + 10]; - MDB_val key; + printf("scan path from path %s\n", path); - sprintf(path, "%08x%s", file->diskid, file->path); - key.mv_data = path; - key.mv_size = strlen(path); - scan->res = mdb_cursor_get(scan->cursor, &key, &scan->key, MDB_SET); + key.mv_data = path; + key.mv_size = strlen(path); + scan->res = mdb_cursor_get(scan->cursor, &key, &scan->key, MDB_SET); - return file; + return file; + } } + } else { + // Just start at the start + MDB_val key; + + printf("scan path from start\n"); + + if ((scan->res = mdb_cursor_get(scan->cursor, &key, &scan->key, MDB_FIRST)) == 0 + && (scan->res = mdb_get(scan->tx, scan->db->file, &scan->key, &scan->data)) == 0) + return dbscan_decode(scan); } } } else { @@ -2444,9 +2487,21 @@ static dbfile *scan_list_entry_next(dbscan *scan, MDB_cursor_op next0, MDB_curso MDB_val key; if (scan->list_entry.listid == 0) { +#if 0 if ((scan->res = mdb_cursor_get(scan->cursor, &key, &scan->key, next0)) == 0 && (scan->res = mdb_get(scan->tx, scan->db->file, &scan->key, &scan->data)) == 0) return dbscan_decode(scan); +#else + if ((scan->res = mdb_cursor_get(scan->cursor, &key, &scan->key, next0)) != 0) { + printf("cursor get: %s\n", mdb_strerror(scan->res)); + return NULL; + } + if ((scan->res = mdb_get(scan->tx, scan->db->file, &scan->key, &scan->data)) != 0) { + printf("data get: %s\n", mdb_strerror(scan->res)); + return NULL; + } + return dbscan_decode(scan); +#endif } else { MDB_val data; diff --git a/disk-indexer.c b/disk-indexer.c index 5977f8b..8092c08 100644 --- a/disk-indexer.c +++ b/disk-indexer.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -38,7 +39,6 @@ #include "dbindex.h" #include "notify.h" -#include "analyse.h" struct indexer { @@ -538,272 +538,20 @@ static void indexer(const char *path) { dbindex_close(db); } -void check(const char *path) { - dbindex *db = dbindex_open(path); - - // Check indices - printf("Check file-by-diskid index\n"); - dbtxn *tx = dbindex_begin(db, NULL, 1); - dbscan *scan = dbfile_scan_disk(tx, db, -1); - uint32_t fid; - int count =0; - - while ((fid = dbfile_scan_next(scan)) != ~0) { - dbfile *f = dbfile_get(tx, db, fid); - if (f == NULL) { - printf(" %d missing\n", fid); - } else { - printf(" in %d\n", fid); - } - dbfile_free(f); - count++; - } - printf("total %d\n", count); - dbfile_scan_close(scan); - - dbindex_close(db); -} - -#if 1 - - -/* - (naive) idea for full text sub-string search inside lmdb - just build full suffix tables - - [suffix] [all members] - - sub-string search is: - suffix >= query and suffix[0 .. query.length] == query - - */ -#if 0 -void build_suffix(dbtxn *tx, dbindex *db, uint32_t fileid, const char *words) { - int state = 0; - size_t len = strlen(words); - char word[len+1]; // + ?? - wchar_t lwords[len+1]; - - /* convert to wide char astring */ - size_t res; - - len = mbrtowcs(lwords, words, len, NULL); - if (len == (size_t)-1) - return; - - - - //printf("words: %s\n", words); - - /* - Basic idea: - Break string into words. - Strip puncutation. - lower-scase - Build suffix tables (in-memory db?) - - we want ' included though, translate other 's into '? or remove all? - */ - - int high = 0; - - wchar_t c; - wchar_t *p = lwords; - wchar_t *s = p; - do { - c = *p; - - if (c == 0 || !iswgraph(c) || iswpunct(c)) { - *p = 0; - while (p - s >= 3) { - wchar_t *t = s++; - - len = wcstombs(word, &t, sizeof(word), NULL); - if (len < sizeof(word)) { - dbfile_put_suffix(tx, db, word, fileid); - } else { - fprintf(stderr, "overflow %s\n", words); - } - } - s = ++p; - } else { - *p++ = towlower(c); - } - } while (c); - -} -#endif - -int dbfile_clear_suffix(dbtxn *tx, dbindex *db); - -void suffix(const char *path) { - dbindex *db = dbindex_open(path); - dbtxn *tx = dbindex_begin(db, NULL, 0); - - dbfile_clear_suffix(tx, db); - - dbscan *scan = dbfile_scan_disk(tx, db, -1); - uint32_t fid; - ez_list list = EZ_INIT_LIST(list); - - while ((fid = dbfile_scan_next(scan)) != ~0) { - dbfile *f = dbfile_get(tx, db, fid); - struct string_node *w; - - //printf("%s\n", f->title); - analyse_words(&list, 1, f->title); - while ((w = ez_list_remhead(&list))) { - //printf(" %s\n", w->value); - dbfile_put_suffix(tx, db, w->value, f->id); - free(w); - } - - dbfile_free(f); - } - - dbfile_scan_close(scan); - dbindex_commit(tx); - dbindex_close(db); -} - -void search_suffix(const char *path) { - dbindex *db = dbindex_open(path); - if (1) { - dbtxn *tx = dbindex_begin(db, NULL, 0); - - dbfile_search_substring(tx, db, "union"); - dbindex_abort(tx); - } else { - dbfile *matches[50]; - int len = dbfile_search(db, "cnic", matches, 50); - printf("matches: %d\n", len); - } - dbindex_close(db); -} -#endif - -#include - int main(int argc, char **argv) { - av_log_set_level(AV_LOG_ERROR); - - //avcodec_register_all(); - //av_register_all(); - //avformat_network_init(); const char *dbdir = MAIN_INDEX; setlocale(LC_ALL, "en_AU.UTF-8"); + av_log_set_level(AV_LOG_ERROR); + if (argc > 2 && strcmp(argv[1], "-d") == 0) { dbdir = argv[2]; argv += 2; argc -= 2; } - if (1) { - //suffix(dbdir); - search_suffix(dbdir); - return 0; - } - - -#if 0 - { - int dbfile_searchx(dbindex *db, const char *pattern, dbfile **results, int maxlen); - dbindex *db = dbindex_open(MAIN_INDEX); - dbfile *list[150]; - int len = dbfile_searchx(db, "deep sessions", list, 150); - - for (int i=0;iid, list[i]->diskid, list[i]->title, list[i]->path); - } - - dbindex_close(db); - return 0; - } -#endif - - if (argc > 1) { - if (strcmp(argv[1], "check") == 0) - check(dbdir); - else { - notify_t q = notify_writer_new(NOTIFY_INDEXER); - - if (q) { - if (strcmp(argv[1], "quit") == 0) { - notify_msg_send(q, NOTIFY_QUIT, 0, 0); - } else if (strcmp(argv[1], "shuffle") == 0) { - notify_msg_send(q, NOTIFY_SHUFFLE, 0, 0); - } else if (strcmp(argv[1], "add") == 0 && argc == 4) { - dbdisk disk = { - .uuid = argv[2], - .label = "system", - .type = "system", - .mount = argv[3] - }; - notify_msg_send(q, NOTIFY_DISK_ADD, 0, &disk); - } - notify_close(q); - } - } - } else - indexer(dbdir); - - return 0; - - - char *uuid = argc > 1 ? argv[1] : "some-disk"; - - - dbindex *db = dbindex_open("/home/notzed/playerz.db"); - int res; - - if (!db) { - return 1; - } - - if (0) { - dbtxn *tx = dbindex_begin(db, NULL, 1); - int diskid = 4; - dbscan *scan = dbfile_scan_disk(tx, db, diskid); - int count = 0; - uint32_t fid; - - while ((fid = dbfile_scan_next(scan)) != ~0) { - count++; - } - printf(" %d files on disk %d\n", count, diskid); - - dbindex_commit(tx); - dbindex_close(db); - return 0; - } - - struct indexer ix; - - res = indexer_init(&ix, db, "/data/hd4/Music", uuid); - if (res == 0) { - - //for (int i=1;i. -*/ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include - -#include "ez-list.h" -#include "ez-set.h" -#include "ez-bitset.h" - -#include "dbindex.h" - -void dbshuffle_init2(dbindex *db); -int dbdisk_del_id(dbtxn *txn, dbindex *db, int diskid); -void dbindex_validate(dbindex *db); - -int dblist_reset(dbtxn *tx, dbindex *db); -int dblist_del_file(dbtxn *txn, dbindex *db, struct dblistcursor *list); - -int main(int argc, char **argv) { - const char *dbdir = MAIN_INDEX; - int fileid = 0, diskid = 0, listid = 0; - int seq = 0; - - setlocale(LC_ALL, "en_AU.UTF-8"); - - if (argc > 2 && strcmp(argv[1], "-d") == 0) { - dbdir = argv[2]; - argv += 2; - argc -= 2; - } - - dbindex *db = dbindex_open(dbdir); - - for (int i=1;iid, file->title, file->path); - dbfile_free(file); - } - dbscan_free(&scan); - dbindex_abort(tx); - } else if (strcmp(cmd, "--file-del") == 0) { - dbtxn *tx = dbindex_begin(db, NULL, 0); - int fid = atoi(argv[++i]); - - if (dbfile_del_id(tx, db, fid) == 0) - dbindex_commit(tx); - else - dbindex_abort(tx); - } else if (strcmp(cmd, "--lists") == 0) { - dbtxn *tx = dbindex_begin(db, NULL, 1); - dbscan scan; - - for (dblist *list = dbscan_list(tx, &scan, db, 0); list; list = dbscan_list_next(&scan)) { - printf("id: %d\n", list->id); - printf("list: %s\n", list->name); - printf("size: %d\n", list->size); - printf("\n"); - dblist_free(list); - } - dbscan_free(&scan); - dbindex_abort(tx); - } else if (strcmp(cmd, "--lists-reset") == 0) { - dbtxn *tx = dbindex_begin(db, NULL, 0); - - dblist_reset(tx, db); - dbindex_commit(tx); - } else if (strcmp(cmd, "--list-add") == 0) { - dblist list = { - .name = argv[++i] - }; - - dblist_add(NULL, db, &list); - } else if (strcmp(cmd, "--list-del") == 0) { - int lid = atoi(argv[++i]); - - dblist_del(NULL, db, lid); - } else if (strcmp(cmd, "--list-add-file") == 0) { - int lid = atoi(argv[++i]); - int fid = atoi(argv[++i]); - dbtxn *tx = dbindex_begin(db, NULL, 0); - dblist *list = dblist_get(tx, db, lid); - - if (dblist_add_file(tx, db, list, fid) == 0) - dbindex_commit(tx); - else - dbindex_abort(tx); - } else if (strcmp(cmd, "--list-del-file") == 0) { - int lid = atoi(argv[++i]); - int seq = atoi(argv[++i]); - //int fid = atoi(argv[++i]); - dbtxn *tx = dbindex_begin(db, NULL, 0); - struct dblistcursor list = { - .listid = lid, - .seq = seq, - //.fileid = fid - }; - - if (dblist_del_file(tx, db, &list) == 0) - dbindex_commit(tx); - else - dbindex_abort(tx); - } else if (strcmp(cmd, "--list-dump") == 0) { - int lid = atoi(argv[++i]); - dbtxn *tx = dbindex_begin(db, NULL, 1); - dbscan scan; - - for (dbfile *file = dbscan_list_entry(tx, &scan, db, lid, seq, fileid); file; file = dbscan_list_entry_next(&scan)) { - printf("%4d seq=%4d title: %-60s %s\n", file->id, dbscan_list_entry_seq(&scan), file->title, file->path); - dbfile_free(file); - } - dbscan_free(&scan); - dbindex_abort(tx); - } else if (strcmp(cmd, "--disks") == 0) { - dbtxn *tx = dbindex_begin(db, NULL, 1); - dbscan scan; - - for (dbdisk *disk = dbscan_disk(tx, &scan, db, 0); disk; disk = dbscan_disk_next(&scan)) { - printf("id: %d\n", disk->id); - printf("uuid: %s\n", disk->uuid); - printf("label: %s\n", disk->label); - printf("type: %s\n", disk->type); - printf("mount: %s\n", disk->mount); - printf("\n"); - dbdisk_free(disk); - } - dbscan_free(&scan); - dbindex_abort(tx); - } else if (strcmp(cmd, "--disk-del") == 0) { - dbtxn *tx = dbindex_begin(db, NULL, 0); - int diskid = atoi(argv[++i]); - - dbdisk_del_id(tx, db, diskid); - dbindex_commit(tx); - } else if (strcmp(cmd, "--validate") == 0) { - dbindex_validate(db); - } else if (strcmp(cmd, "--search") == 0) { - const char *match = argv[++i]; - dbfile *results[100]; - int res = dbfile_search(db, match, results, 100); - - if (res >= 0) { - for (int i=0;iid, results[i]->title); - dbfile_free(results[i]); - } - } else { - printf("search failed\n"); - } - } - } - - dbindex_close(db); - - return 0; -} diff --git a/index-util.c b/index-util.c new file mode 100644 index 0000000..48f4406 --- /dev/null +++ b/index-util.c @@ -0,0 +1,349 @@ +/* disk-util.c: utilities for managing indices. + + Copyright (C) 2021 Michael Zucchi + + This program is free software: you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +#include "ez-list.h" +#include "ez-set.h" +#include "ez-bitset.h" + +#include "dbindex.h" + +void dbshuffle_init2(dbindex *db); +int dbdisk_del_id(dbtxn *txn, dbindex *db, int diskid); +void dbindex_validate(dbindex *db); + +int dblist_reset(dbtxn *tx, dbindex *db); +int dblist_del_file(dbtxn *txn, dbindex *db, struct dblistcursor *list); + +/* ********************************************************************** */ + +#if 0 + +#include "analyse.h" + +// WORK IN PROGRESS, not hooked up yet + +/* + (naive) idea for full text sub-string search inside lmdb + just build full suffix tables + + [suffix] [all members] + + sub-string search is: + suffix >= query and suffix[0 .. query.length] == query + + */ +#if 0 +void build_suffix(dbtxn *tx, dbindex *db, uint32_t fileid, const char *words) { + int state = 0; + size_t len = strlen(words); + char word[len+1]; // + ?? + wchar_t lwords[len+1]; + + /* convert to wide char astring */ + size_t res; + + len = mbrtowcs(lwords, words, len, NULL); + if (len == (size_t)-1) + return; + + + + //printf("words: %s\n", words); + + /* + Basic idea: + Break string into words. + Strip puncutation. + lower-scase + Build suffix tables (in-memory db?) + + we want ' included though, translate other 's into '? or remove all? + */ + + int high = 0; + + wchar_t c; + wchar_t *p = lwords; + wchar_t *s = p; + do { + c = *p; + + if (c == 0 || !iswgraph(c) || iswpunct(c)) { + *p = 0; + while (p - s >= 3) { + wchar_t *t = s++; + + len = wcstombs(word, &t, sizeof(word), NULL); + if (len < sizeof(word)) { + dbfile_put_suffix(tx, db, word, fileid); + } else { + fprintf(stderr, "overflow %s\n", words); + } + } + s = ++p; + } else { + *p++ = towlower(c); + } + } while (c); + +} +#endif + +int dbfile_clear_suffix(dbtxn *tx, dbindex *db); + +void suffix(const char *path) { + dbindex *db = dbindex_open(path); + dbtxn *tx = dbindex_begin(db, NULL, 0); + + dbfile_clear_suffix(tx, db); + + dbscan *scan = dbfile_scan_disk(tx, db, -1); + uint32_t fid; + ez_list list = EZ_INIT_LIST(list); + + while ((fid = dbfile_scan_next(scan)) != ~0) { + dbfile *f = dbfile_get(tx, db, fid); + struct string_node *w; + + //printf("%s\n", f->title); + analyse_words(&list, 1, f->title); + while ((w = ez_list_remhead(&list))) { + //printf(" %s\n", w->value); + dbfile_put_suffix(tx, db, w->value, f->id); + free(w); + } + + dbfile_free(f); + } + + dbfile_scan_close(scan); + dbindex_commit(tx); + dbindex_close(db); +} + +void search_suffix(const char *path) { + dbindex *db = dbindex_open(path); + if (1) { + dbtxn *tx = dbindex_begin(db, NULL, 0); + + dbfile_search_substring(tx, db, "union"); + dbindex_abort(tx); + } else { + dbfile *matches[50]; + int len = dbfile_search(db, "cnic", matches, 50); + printf("matches: %d\n", len); + } + dbindex_close(db); +} +#endif + +/* ********************************************************************** */ + +// some basic consistency checking +// see also dbindex_validate, need to be merged +static void check(dbindex *db) { + printf("Check file-by-diskid index\n"); + dbtxn *tx = dbindex_begin(db, NULL, 1); + dbscan *scan = dbfile_scan_disk(tx, db, -1); + uint32_t fid; + int count =0; + + while ((fid = dbfile_scan_next(scan)) != ~0) { + dbfile *f = dbfile_get(tx, db, fid); + if (f == NULL) { + printf(" %d missing\n", fid); + } else { + printf(" in %d\n", fid); + } + dbfile_free(f); + count++; + } + printf("total %d\n", count); + dbfile_scan_close(scan); +} + +/* ********************************************************************** */ + +int main(int argc, char **argv) { + const char *dbdir = MAIN_INDEX; + int fileid = 0, diskid = 0, listid = 0; + int seq = 0; + + setlocale(LC_ALL, "en_AU.UTF-8"); + + if (argc > 2 && strcmp(argv[1], "-d") == 0) { + dbdir = argv[2]; + argv += 2; + argc -= 2; + } + + dbindex *db = dbindex_open(dbdir); + + for (int i=1;iid, file->title, file->path); + dbfile_free(file); + } + dbscan_free(&scan); + dbindex_abort(tx); + } else if (strcmp(cmd, "file-del") == 0) { + dbtxn *tx = dbindex_begin(db, NULL, 0); + + if (dbfile_del_id(tx, db, fileid) == 0) + dbindex_commit(tx); + else + dbindex_abort(tx); + } else if (strcmp(cmd, "lists") == 0) { + dbtxn *tx = dbindex_begin(db, NULL, 1); + dbscan scan; + + for (dblist *list = dbscan_list(tx, &scan, db, 0); list; list = dbscan_list_next(&scan)) { + printf("lid=%4d size=%5d name=%s\n", list->id, list->size, list->name); + dblist_free(list); + } + dbscan_free(&scan); + dbindex_abort(tx); + } else if (strcmp(cmd, "lists-reset") == 0) { + dbtxn *tx = dbindex_begin(db, NULL, 0); + + dblist_reset(tx, db); + dbindex_commit(tx); + } else if (strcmp(cmd, "list-add") == 0) { + dblist list = { + .name = argv[++i] + }; + + dblist_add(NULL, db, &list); + } else if (strcmp(cmd, "list-del") == 0) { + dblist_del(NULL, db, listid); + } else if (strcmp(cmd, "list-add-file") == 0) { + if (listid != 0 && fileid != 0) { + dbtxn *tx = dbindex_begin(db, NULL, 0); + dblist *list = dblist_get(tx, db, listid); + + if (list) { + if (dblist_add_file(tx, db, list, fileid) == 0) + dbindex_commit(tx); + else + dbindex_abort(tx); + } else { + printf("%s: unknown list %d\n", cmd, listid); + } + } else { + printf("%s: Must supply fileid and listid\n", cmd); + } + } else if (strcmp(cmd, "list-del-file") == 0) { + dbtxn *tx = dbindex_begin(db, NULL, 0); + struct dblistcursor list = { + .listid = listid, + .seq = seq, + .fileid = fileid + }; + + if (dblist_del_file(tx, db, &list) == 0) + dbindex_commit(tx); + else + dbindex_abort(tx); + } else if (strcmp(cmd, "list-dump") == 0) { + dbtxn *tx = dbindex_begin(db, NULL, 1); + dbscan scan; + + for (dbfile *file = dbscan_list_entry(tx, &scan, db, listid, seq, fileid); file; file = dbscan_list_entry_next(&scan)) { + printf("fid=%4d seq=%4d title: %-60s %s\n", file->id, dbscan_list_entry_seq(&scan), file->title, file->path); + dbfile_free(file); + } + dbscan_free(&scan); + dbindex_abort(tx); + } else if (strcmp(cmd, "disks") == 0) { + dbtxn *tx = dbindex_begin(db, NULL, 1); + dbscan scan; + + for (dbdisk *disk = dbscan_disk(tx, &scan, db, 0); disk; disk = dbscan_disk_next(&scan)) { + printf("did=%4d uuid='%s' type='%s' label='%s' mount='%s'\n", disk->id, disk->uuid, disk->type, disk->label, disk->mount); + dbdisk_free(disk); + } + dbscan_free(&scan); + dbindex_abort(tx); + } else if (strcmp(cmd, "disk-del") == 0) { + if (diskid != 0) { + dbtxn *tx = dbindex_begin(db, NULL, 0); + + dbdisk_del_id(tx, db, diskid); + dbindex_commit(tx); + } else { + printf("%s: Must supply diskid\n", cmd); + } + } else if (strcmp(cmd, "validate") == 0) { + dbindex_validate(db); + } else if (strcmp(cmd, "search") == 0) { + const char *match = argv[++i]; + dbfile *results[100]; + int res = dbfile_search(db, match, results, 100); + + if (res >= 0) { + for (int i=0;iid, results[i]->title); + dbfile_free(results[i]); + } + } else { + printf("search failed\n"); + } + } + } + + dbindex_close(db); + + return 0; +} diff --git a/indexer-cmd.c b/indexer-cmd.c new file mode 100644 index 0000000..99b524b --- /dev/null +++ b/indexer-cmd.c @@ -0,0 +1,34 @@ + +#include +#include +#include + +#include "notify.h" +#include "dbindex.h" + +int main(int argc, char **argv) { + if (argc > 1) { + notify_t q = notify_writer_new(NOTIFY_INDEXER); + + if (q) { + if (strcmp(argv[1], "quit") == 0) { + notify_msg_send(q, NOTIFY_QUIT, 0, 0); + } else if (strcmp(argv[1], "shuffle") == 0) { + notify_msg_send(q, NOTIFY_SHUFFLE, 0, 0); + } else if (strcmp(argv[1], "add") == 0 && argc == 4) { + dbdisk disk = { + .uuid = argv[2], + .label = "system", + .type = "system", + .mount = argv[3] + }; + notify_msg_send(q, NOTIFY_DISK_ADD, 0, &disk); + } + notify_close(q); + } else { + fprintf(stderr, "error: Unable to open IPC channel\n"); + } + } + + return 0; +} -- 2.39.5