AbstractDb.java

package com.renomad.minum.database;

import com.renomad.minum.logging.ILogger;
import com.renomad.minum.state.Context;
import com.renomad.minum.utils.FileUtils;

import java.io.IOException;
import java.nio.file.Path;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;

/**
 * The abstract database class is a representation of the essential capabilities of
 * a Minum database.
 * <p>
 *     There are two kinds of database provided, which only differ in how they
 *     store data on disk.  The "classic" kind, {@link Db}, stores each piece of
 *     data in its own file.  This is the simplest approach.
 * </p>
 * <p>
 *     However, for significant speed gains, the new {@link DbEngine2} will
 *     store each change as an append to a file, and will consolidate the on-disk
 *     data occasionally, and on start.  That way is thousands of times faster
 *     to write to disk and to read from disk at startup.
 * </p>
 * @param <T> This is the type of data, which is always an implementation of
 *           the {@link DbData} class.  See the code of {@link com.renomad.minum.security.Inmate}
 *           for an example of how this should look.
 */
public abstract class AbstractDb<T extends DbData<?>> {

    /**
     * The directory of the database on disk
     */
    protected final Path dbDirectory;

    /**
     * An empty instance of the type of data stored by this
     * database, used for better handling of generics.
     */
    protected final T emptyInstance;

    /**
     * Used for handling some file utilities in the database like creating directories
     */
    protected final FileUtils fileUtils;

    /**
     * Holds some system-wide information that is beneficial for components of the database
     */
    protected final Context context;

    /**
     * Used for providing logging throughout the database
     */
    protected final ILogger logger;

    /**
     * The internal data structure of the database that resides in memory.  The beating heart
     * of the database while it runs.
     */
    protected final Map<Long, T> data;

    /**
     * The current index, used when creating new data items.  Each item has its own
     * index value, this is where it is tracked.
     */
    protected AtomicLong index;

    // components for registered indexes (for faster read performance)

    /**
     * This data structure is a nested map used for providing indexed data search.
     * <br>
     * The outer map is between the name of the index and the inner map.
     * <br>
     * The inner map is between strings and sets of items related to that string.
     */
    protected final Map<String, Map<String, Set<T>>> registeredIndexes;

    /**
     * This map holds the functions that are registered to indexes, which are used
     * to construct the mappings between string values and items in the database.
     */
    protected final Map<String, Function<T, String>> partitioningMap;

    protected AbstractDb(Path dbDirectory, Context context, T instance) {
        this.dbDirectory = dbDirectory;
        this.context = context;
        this.emptyInstance = instance;
        this.data = new ConcurrentHashMap<>();
        this.logger = context.getLogger();
        this.registeredIndexes = new HashMap<>();
        this.partitioningMap = new HashMap<>();
        this.fileUtils = new FileUtils(logger, context.getConstants());
    }

    /**
     * Used to cleanly stop the database.
     * <br>
     * In the case of {@link Db} this will interrupt its internal queue and tell it
     * to finish up processing.
     * <br>
     * In the case of {@link DbEngine2} this will flush data to disk.
     */
    public abstract void stop();

    /**
     * Used to cleanly stop the database, with extra allowance of time
     * for cleanup.
     * <br>
     * Note that this method mostly applies to {@link Db}, and not as much
     * to {@link DbEngine2}.  Only Db uses a processing queue on a thread which
     * is what requires a longer shutdown time for interruption.
     * @param count number of loops before we are done waiting for a clean close
     *              and instead crash the instance closed.
     * @param sleepTime how long to wait, in milliseconds, for each iteration of the waiting loop.
     */
    public abstract void stop(int count, int sleepTime);


    /**
     * Write data to the database.  Use an index of 0 to store new data, and a positive
     * non-zero value to update data.
     * <p><em>
     * Example of adding new data to the database:
     * </p></em>
     * {@snippet :
     *          final var newSalt = StringUtils.generateSecureRandomString(10);
     *          final var hashedPassword = CryptoUtils.createPasswordHash(newPassword, newSalt);
     *          final var newUser = new User(0L, newUsername, hashedPassword, newSalt);
     *          userDb.write(newUser);
     * }
     * <p><em>
     * Example of updating data:
     * </p></em>
     * {@snippet :
     *         // write the updated salted password to the database
     *         final var updatedUser = new User(
     *                 user().getIndex(),
     *                 user().getUsername(),
     *                 hashedPassword,
     *                 newSalt);
     *         userDb.write(updatedUser);
     * }
     *
     * @param newData the data we are writing
     * @return the data with its new index assigned.
     */
    public abstract T write(T newData);

    /**
     * Write database data into memory
     * @param newData the new data may be totally new or an update
     * @param newElementCreated if true, this is a create.  If false, an update.
     */
    protected void writeToMemory(T newData, boolean newElementCreated) {
        // if we got here, we are safe to proceed with putting the data into memory and disk
        logger.logTrace(() -> String.format("in thread %s, writing data %s", Thread.currentThread().getName(), newData));
        T oldData = data.put(newData.getIndex(), newData);

        // handle the indexes differently depending on whether this is a create or delete
        if (newElementCreated) {
            addToIndexes(newData);
        } else {
            removeFromIndexes(oldData);
            addToIndexes(newData);
        }
    }

    /**
     * When new data comes in, we look at its "index" value. If
     * it is zero, it's a create, and we assign it a new value.  If it is
     * positive, it is an update, and we had better find it in the database
     * already, or else throw an exception.
     * @return true if a create, false if an update
     */
    protected boolean processDataIndex(T newData) {
        // *** deal with the in-memory portion ***
        boolean newElementCreated = false;
        // create a new index for the data, if needed
        if (newData.getIndex() == 0) {
            newData.setIndex(index.getAndIncrement());
            newElementCreated = true;
        } else {
            // if the data does not exist, and a positive non-zero
            // index was provided, throw an exception.
            boolean dataEntryExists = data.values().stream().anyMatch(x -> x.getIndex() == newData.getIndex());
            if (!dataEntryExists) {
                throw new DbException(
                        String.format("Positive indexes are only allowed when updating existing data. Index: %d",
                                newData.getIndex()));
            }
        }
        return newElementCreated;
    }

    /**
     * Delete data
     * <p><em>Example:</p></em>
     * {@snippet :
     *      userDb.delete(user);
     * }
     *
     * @param dataToDelete the data we are serializing and writing
     */
    public abstract void delete(T dataToDelete);


    /**
     * Remove a particular item from the internal data structure in memory
     */
    protected void deleteFromMemory(T dataToDelete) {
        long dataIndex;
        if (dataToDelete == null) {
            throw new DbException("Invalid to be given a null value to delete");
        }
        dataIndex = dataToDelete.getIndex();
        if (!data.containsKey(dataIndex)) {
            throw new DbException("no data was found with index of " + dataIndex);
        }
        long finalDataIndex = dataIndex;
        logger.logTrace(() -> String.format("in thread %s, deleting data with index %d", Thread.currentThread().getName(), finalDataIndex));
        data.remove(dataIndex);
        removeFromIndexes(dataToDelete);
        // if all the data was just now deleted, we need to
        // reset the index back to 1

        if (data.isEmpty()) {
            index.set(1);
        }
    }


    /**
     *  add the data to registered indexes.
     *  <br>
     *  For each of the registered indexes,
     *  get the stored function to obtain a string value which helps divide
     *  the overall data into partitions.
     */
    protected void addToIndexes(T dbData) {

        for (var entry : partitioningMap.entrySet()) {
            // a function provided by the user to obtain an index-key: a unique or semi-unique
            // value to help partition / index the data
            Function<T, String> indexStringFunction = entry.getValue();
            String propertyAsString = indexStringFunction.apply(dbData);
            Map<String, Set<T>> stringIndexMap = registeredIndexes.get(entry.getKey());
            synchronized (this) {
                stringIndexMap.computeIfAbsent(propertyAsString, k -> new HashSet<>());
            }
            // if the index-key provides a 1-to-1 mapping to items, like UUIDs, then
            // each value will have only one item in the collection.  In other cases,
            // like when partitioning the data into multiple groups, there could easily
            // be many items per index value.
            Set<T> dataSet = stringIndexMap.get(propertyAsString);
            dataSet.add(dbData);
        }
    }

    /**
     * Run when an item is deleted from the database
     */
    private void removeFromIndexes(T dbData) {
        for (var entry : partitioningMap.entrySet()) {
            // a function provided by the user to obtain an index-key: a unique or semi-unique
            // value to help partition / index the data
            Function<T, String> indexStringFunction = entry.getValue();
            String propertyAsString = indexStringFunction.apply(dbData);
            Map<String, Set<T>> stringIndexMap = registeredIndexes.get(entry.getKey());
            synchronized (this) {
                stringIndexMap.get(propertyAsString).removeIf(x -> x.getIndex() == dbData.getIndex());

                // in certain cases, we're removing one of the items that is indexed but
                // there are more left.  If there's nothing left though, we'll remove the mapping.
                if (stringIndexMap.get(propertyAsString).isEmpty()) {
                    stringIndexMap.remove(propertyAsString);
                }
            }
        }
    }


    /**
     * Grabs all the data from disk and returns it as a list.  This
     * method is run by various programs when the system first loads.
     */
    public abstract void loadData() throws IOException;

    /**
     * This method provides read capability for the values of a database.
     * <br>
     * The returned collection is a read-only view over the data, through {@link Collections#unmodifiableCollection(Collection)}
     *
     * <p><em>Example:</em></p>
     * {@snippet :
     * boolean doesUserAlreadyExist(String username) {
     *     return userDb.values().stream().anyMatch(x -> x.getUsername().equals(username));
     * }
     * }
     */
    public abstract Collection<T> values();

    /**
     * Register an index in the database for higher performance data access.
     * <p>
     *     This command should be run immediately after database declaration,
     *     or more specifically, before any data is loaded from disk. Otherwise,
     *     it would be possible to skip indexing that data.
     * </p>
     * <br>
     * Example:
     *  {@snippet :
     *           final var myDatabase = context.getDb("photos", Photograph.EMPTY);
     *           myDatabase.registerIndex("url", photo -> photo.getUrl());
     *  }
     * @param indexName a string used to distinguish this index.  This string will be used again
     *                  when requesting data in a method like {@link #getIndexedData} or {@link #findExactlyOne}
     * @param keyObtainingFunction a function which obtains data from the data in this database, used
     *                             to partition the data into groups (potentially up to a 1-to-1 correspondence
     *                             between id and object)
     * @return true if the registration succeeded
     * @throws DbException if the parameters are not entered properly, if the index has already
     * been registered, or if the data has already been loaded. It is necessary that
     * this is run immediately after declaring the database. To explain further: the data is not
     * actually loaded until the first time it is needed, such as running a write or delete, or
     * if the {@link #loadData()} ()} method is run.  Creating an index map for the data that
     * is read from disk only occurs once, at data load time.  Thus, it is crucial that the
     * registerIndex command is run before any data is loaded.
     */
    public boolean registerIndex(String indexName, Function<T, String> keyObtainingFunction) {
        if (keyObtainingFunction == null) {
            throw new DbException("When registering an index, the partitioning algorithm must not be null");
        }
        if (indexName == null || indexName.isBlank()) {
            throw new DbException("When registering an index, value must be a non-empty string");
        }
        if (registeredIndexes.containsKey(indexName)) {
            throw new DbException("It is forbidden to register the same index more than once.  Duplicate index: \""+indexName+"\"");
        }
        HashMap<String, Set<T>> stringCollectionHashMap = new HashMap<>();
        registeredIndexes.put(indexName, stringCollectionHashMap);
        partitioningMap.put(indexName, keyObtainingFunction);
        return true;
    }

    /**
     * Given the name of a registered index (see {@link #registerIndex(String, Function)}),
     * use the key to find the collection of data that matches it.
     * @param indexName the name of an index
     * @param key a string value that matches a partition calculated from the partition
     *            function provided to {@link #registerIndex(String, Function)}
     * @return a collection of data, an empty collection if nothing found
     */
    public Collection<T> getIndexedData(String indexName, String key) {
        if (!registeredIndexes.containsKey(indexName)) {
            throw new DbException("There is no index registered on the database Db<"+this.emptyInstance.getClass().getSimpleName()+"> with a name of \""+indexName+"\"");
        }
        Set<T> values = registeredIndexes.get(indexName).get(key);
        // return an empty set rather than null
        return Objects.requireNonNullElseGet(values, Set::of);
    }

    /**
     * Get a set of the currently-registered indexes on this database, useful
     * for debugging.
     */
    public Set<String> getSetOfIndexes() {
        return partitioningMap.keySet();
    }

    /**
     * A utility to find exactly one item from the database.
     * <br>
     * This utility will search the indexes for a particular data by
     * indexName and indexKey.  If not found, it will return null. If
     * found, it will be returned. If more than one are found, an exception
     * will be thrown.  Use this tool when the data has been uniquely
     * indexed, like for example when setting a unique identifier into
     * each data.
     * @param indexName the name of the index, an arbitrary value set by the
     *                  user to help distinguish among potentially many indexes
     *                  set on this data
     * @param indexKey the key for this particular value, such as a UUID or a name
     *                 or any other way to partition the data
     * @see #findExactlyOne(String, String, Callable)
     */
    public T findExactlyOne(String indexName, String indexKey) {
        return findExactlyOne(indexName, indexKey, () -> null);
    }

    /**
     * Find one item, with an alternate value if null
     * <br>
     * This utility will search the indexes for a particular data by
     * indexName and indexKey.  If not found, it will return null. If
     * found, it will be returned. If more than one are found, an exception
     * will be thrown.  Use this tool when the data has been uniquely
     * indexed, like for example when setting a unique identifier into
     * each data.
     * @param indexName the name of the index, an arbitrary value set by the
     *                  user to help distinguish among potentially many indexes
     *                  set on this data
     * @param indexKey the key for this particular value, such as a UUID or a name
     *                 or any other way to partition the data
     * @param alternate a functional interface that will be run if the result would
     *                  have been null, useful for situations where you don't want
     *                  the output to be null when nothing is found.
     * @see #findExactlyOne(String, String)
     */
    public T findExactlyOne(String indexName, String indexKey, Callable<T> alternate) {
        Collection<T> indexedData = getIndexedData(indexName, indexKey);
        if (indexedData.isEmpty()) {
            try {
                return alternate.call();
            } catch (Exception ex) {
                throw new DbException(ex);
            }
        } else if (indexedData.size() == 1) {
            return indexedData.stream().findFirst().orElseThrow();
        } else {
            throw new DbException("More than one item found when searching database Db<%s> on index \"%s\" with key %s"
                    .formatted(emptyInstance.getClass().getSimpleName(), indexName, indexKey));
        }
    }
}