Tabelle categories, Keyspace themes_<x>

(Präfixe von Labeln werden nicht berücksichtigt (zB vocnet:hasAsLabel"München"))

SpaltennameDatentypBeschreibung
idVARCHAR (PRIMKEY)Primärschlüssel
parent_idVARCHARId der Oberkategorie
xtree_uriVARCHARxTree-URI
labelVARCHARlabel
notationVARCHARnotation
CREATE TABLE IF NOT EXISTS categories (id TEXT PRIMARY KEY, parent_id TEXT, xtree_uri TEXT, label TEXT, notation TEXT);

CREATE MATERIALIZED VIEW IF NOT EXISTS categories_parents AS SELECT parent_id, id, xtree_uri, label, notation FROM categories WHERE parent_id IS NOT NULL AND id IS NOT NULL PRIMARY KEY (parent_id, id) WITH CLUSTERING ORDER BY (parent_id ASC, id ASC);


Tabelle keywords, Keyspace themes_<x>

(Präfixe von Labeln werden nicht berücksichtigt (zB vocnet:hasAsLabel"München"))

SpaltennameDatentypBeschreibung
idVARCHAR (PRIMKEY)Primärschlüssel
created_atVARCHARZeitpunkt des Anlegens
primary_gnd_uriVARCHARPrimäre GND-URI
related_gnd-urisList<VARCHAR>zusätzliche GND-URIs
gnd_entity_idVARCHARPrimärschlüssel des Gnd-typ (eg Historisches Ereignis) aus der Tabelle gnd_entities
hierarchical_category_idsList<VARCHAR>Ids aller darüberliegenden Kategorien (komplette Hierarchie, zB wg Liste aller Schlagwörter eines Themenportals)
category_idsList<VARCHAR>Ids der direkt darüberliegenden Kategorien.
xtree_uriVARCHARxTree-URI
labelVARCHARlabel
synonymsList<VARCHAR>alternative labels
noteVARCHARBemerkung
CREATE TABLE IF NOT EXISTS keywords (id TEXT PRIMARY KEY, created_at TEXT, primary_gnd_uri TEXT, related_gnd_uris LIST<TEXT>, gnd_entity_id TEXT, hierarchical_category_ids LIST<TEXT>, category_id LIST<TEXT>, "xtree_uri TEXT, label TEXT, synonyms LIST<TEXT>, note List<TEXT>);


Tabelle keywords_search, Keyspace themes_<x>

SpaltennameDatentypBeschreibung
category_idVARCHAR (PRIMKEY)Top-Level-Category-Id (Themenportal-ID)
searchVARCHAR (PRIMKEY)keyword-label, analyzed
keyword_idVARCHAR (PRIMKEY)keywordId
CREATE TABLE IF NOT EXISTS keywords_search (category_id TEXT, keyword_id TEXT, search TEXT, PRIMARY KEY (category_id, search, keyword_id));

CREATE CUSTOM INDEX IF NOT EXISTS idx_search ON keywords_search (search) USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH OPTIONS ={ 'mode': 'CONTAINS','analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer','case_sensitive': 'false'};


Tabelle keywords_hierarchies, Keyspace themes_<x>

SpaltennameDatentypBeschreibung
category_idVARCHAR (PRIMKEY)category-id
keyword_idsList<VARCHAR>keyword-ids
CREATE TABLE IF NOT EXISTS keywords_hierarchies (category_id TEXT, keyword_ids List<TEXT>, PRIMARY KEY (category_id));


Tabelle objects_keywords, Keyspace target_<x>

 

DIe Tabelle wird von dem Zuordnungs-Frontend und der Ingest/ReindexApp verwendet. Sie speichert die Zuordnung einer ddbId zu einem Schlagwort.

Ein eventueller Performancegewinn lässt sich erzielen, wenn hier auch das Label des Schlagworts gespeichert wird. Dann sind aber auf ein Schlagwort mit einer keywordId keine Updates möglich (Tabelle keywords)

SpaltennameDatentypBeschreibung
ddb_idVARCHAR (PRIMKEY)ddbId
keyword_idVARCHAR (PRIMKEY)Referenz zur Tabelle keywords
top_level_category_idVARCHAR (PRIMKEY)Referenz zur Tabelle categories, iE Themenportal (oberste Ebene)
child_category_idVARCHARDie Ebene unterhalb der obersten Ebene
creatorVARCHARCreator
assignment_typeVARCHARWie wurde die referenz angelegt (MACHINE, ARCHIVE, ANYBODY)
created_atDATEZeitpunkt des Anlegens
modified_atDATEZeitpunkt der letzten Änderung (nur status kann geändert werden)
confidence_valueDOUBLEKonfidenzwert
statusVARCHARStatus (AUTOSUGGESTED, SUGGESTED, DELETESUGGESTED, ACCEPTED, REJECTED, PUBLISHED, DELETED)
CREATE TABLE IF NOT EXISTS objects_keywords (top_level_category_id TEXT, child_category_id TEXT, ddb_id TEXT, keyword_id TEXT, creator TEXT, status TEXT, assignment_type TEXT, created_at TEXT, modified_at TEXT, confidence_value DOUBLE, PRIMARY KEY (top_level_category_id, ddb_id, keyword_id, creator));

CREATE MATERIALIZED VIEW IF NOT EXISTS objects_keywords_creator AS SELECT top_level_category_id, creator, ddb_id, keyword_id, child_category_id, status, assignment_type, created_at, modified_at, confidence_value FROM objects_keywords WHERE top_level_category_id IS NOT NULL AND creator IS NOT NULL AND ddb_id IS NOT NULL AND keyword_id IS NOT NULL PRIMARY KEY (top_level_category_id, creator, ddb_id, keyword_id) WITH CLUSTERING ORDER BY (top_level_category_id ASC, creator ASC, ddb_id ASC, keyword_id ASC);

CREATE MATERIALIZED VIEW IF NOT EXISTS objects_keywords_status AS SELECT top_level_category_id, status, ddb_id, keyword_id, child_category_id, creator, assignment_type, created_at, modified_at, confidence_value FROM objects_keywords WHERE top_level_category_id IS NOT NULL AND status IS NOT NULL AND ddb_id IS NOT NULL AND keyword_id IS NOT NULL AND creator IS NOT NULL PRIMARY KEY (top_level_category_id, status, ddb_id, keyword_id, creator) WITH CLUSTERING ORDER BY (top_level_category_id ASC, status ASC, ddb_id ASC, keyword_id ASC, creator ASC);

CREATE MATERIALIZED VIEW IF NOT EXISTS objects_keywords_ddbid AS SELECT ddb_id, top_level_category_id, keyword_id, child_category_id,  status, assignment_type, creator, created_at, modified_at, confidence_value FROM objects_keywords WHERE ddb_id IS NOT NULL AND top_level_category_id IS NOT NULL AND keyword_id IS NOT NULL AND creator IS NOT NULL PRIMARY KEY (ddb_id, top_level_category_id, keyword_id, creator) WITH CLUSTERING ORDER BY (ddb_id ASC, top_level_category_id ASC, keyword_id ASC, creator ASC);

CREATE MATERIALIZED VIEW IF NOT EXISTS objects_keywords_keyword AS SELECT keyword_id, top_level_category_id, ddb_id, child_category_id, status, assignment_type, creator, created_at, modified_at, confidence_value FROM objects_keywords WHERE keyword_id IS NOT NULL AND top_level_category_id IS NOT NULL AND ddb_id IS NOT NULL AND creator IS NOT NULL PRIMARY KEY (keyword_id, top_level_category_id, ddb_id, creator) WITH CLUSTERING ORDER BY (keyword_id ASC, top_level_category_id ASC, ddb_id ASC, creator ASC);

CREATE MATERIALIZED VIEW IF NOT EXISTS objects_keywords_creators_with_status AS SELECT top_level_category_id, status, creator, ddb_id, keyword_id, child_category_id FROM objects_keywords WHERE top_level_category_id IS NOT NULL AND status IS NOT NULL AND creator IS NOT NULL AND ddb_id IS NOT NULL AND keyword_id IS NOT NULL PRIMARY KEY (top_level_category_id, status, creator, ddb_id, keyword_id) WITH CLUSTERING ORDER BY (status ASC, creator ASC, ddb_id ASC, keyword_id ASC);


Tabelle gnd_entities, Keyspace themes_<x>

Diese Tabelle wird nur benötigt falls Keywords nach gnd-typ (z.B. "Historisches Ereignis") gruppierbar oder suchbar sein sollen.

SpaltennameDatentypBeschreibung
idVARCHAR (PRIMKEY)Primärschlüssel
nameVARCHARName zB gndo:HistoricSingleEventOrEra
descriptionVARCHARBeschreibung (zB Historisches Einzelereignis (sih))
CREATE TABLE IF NOT EXISTS gnd_entities (id TEXT PRIMARY KEY, name TEXT, description TEXT);


Tabelle release_ddbids, Keyspace themes_<x>

Die Schlagwort-App schreibt in diese Tabelle die zu reindexierenden ddbIds mit neu zugeordneten oder gelöschten Schlagworten.

Die Tabelle wird von der Reindex-App gelesen

Spaltenname

Datentyp

Beschreibung

idVARCHAR (PRIMKEY)Primärschlüssel
CREATE TABLE IF NOT EXISTS release_ddbids (id TEXT PRIMARY KEY);



Tabelle release_runs, Keyspace themes_<x>

Die Tabelle hält pro Themenportal den Zeitpunkt der letzten Prozessierung von neuen Zuordnungen durch die Schlagwort-App.

Spaltenname

Datentyp

Beschreibung

top_level_category_idVARCHAR (PRIMKEY)Primärschlüssel (ThemenportalId)
timeVARCHARZeitpunkt der letzten Prozessierung
successfulINT (PRIMKEY)Ob die Prozessierung erfolgreich war
CREATE TABLE IF NOT EXISTS release_runs (top_level_category_id TEXT, successful INT, time TEXT, PRIMARY KEY(top_level_category_id, successful));


  • No labels