DatabaseConsolidator.java

1
package com.renomad.minum.database;
2
3
import com.renomad.minum.logging.ILogger;
4
import com.renomad.minum.state.Context;
5
import com.renomad.minum.utils.IFileUtils;
6
7
import java.io.IOException;
8
import java.nio.charset.StandardCharsets;
9
import java.nio.file.Path;
10
import java.text.ParseException;
11
import java.util.*;
12
import java.util.stream.Collectors;
13
14
import static com.renomad.minum.database.ChecksumUtility.buildChecksum;
15
import static com.renomad.minum.database.ChecksumUtility.compareWithChecksum;
16
import static com.renomad.minum.database.DatabaseAppender.simpleDateFormat;
17
18
/**
19
 * Consolidates the database append logs.
20
 * <br>
21
 * As the append logs get filled up, the consolidator comes
22
 * along after to analyze those changes and determine a
23
 * consolidated version.  For example, if the append logs
24
 * have three updates for a particular element, then the consolidated file
25
 * will have just the last update.
26
 */
27
final class DatabaseConsolidator {
28
29
    /**
30
     * This is the path to the append-only files, where incoming
31
     * changes to the data are quickly stored.
32
     */
33
    private final Path appendLogDirectory;
34
35
    /**
36
     * This is the path to where we store consolidated data, so that
37
     * database startup is as fast as possible.
38
     */
39
    private final Path consolidatedDataDirectory;
40
41
    private final ILogger logger;
42
43
    private final int maxLinesPerFile;
44
    private final IFileUtils fileUtils;
45
46
    /**
47
     * This represents an instruction for how to change the overall consolidated
48
     * database files on disk.  Instructions are either to UPDATE or DELETE. This
49
     * also encapsulates the data we're updating.
50
     */
51
    record DatabaseChangeInstruction(DatabaseChangeAction action, long dataIndex, String data) {}
52
53
    DatabaseConsolidator(Path persistenceDirectory, Context context, IFileUtils fileUtils) throws IOException {
54
        this.fileUtils = fileUtils;
55
        this.appendLogDirectory = persistenceDirectory.resolve("append_logs");
56
        this.consolidatedDataDirectory = persistenceDirectory.resolve("consolidated_data");
57
        var constants = context.getConstants();
58
        this.logger = context.getLogger();
59 1 1. <init> : removed call to com/renomad/minum/utils/IFileUtils::makeDirectory → KILLED
        fileUtils.makeDirectory(this.consolidatedDataDirectory);
60
        this.maxLinesPerFile = constants.maxLinesPerConsolidatedDatabaseFile;
61
    }
62
63
    /**
64
     * Loop through all the append-only files
65
     */
66
    void consolidate() throws IOException, ParseException {
67
        logger.logDebug(() -> "Starting database consolidator");
68
        List<Date> sortedList = getSortedAppendLogs(appendLogDirectory);
69 1 1. consolidate : negated conditional → KILLED
        if (sortedList.isEmpty()) {
70
            logger.logDebug(() -> "No database files found to consolidate - exiting");
71
            return;
72
        } else {
73
            logger.logDebug(() -> "Files to consolidate: " + sortedList.stream().map(simpleDateFormat::format).collect(Collectors.joining(";")));
74
        }
75
76
        // process the files in order.  This does potentially cause
77
        // multiple updates for the consolidated files, but that's
78
        // safer than building up too large a structure in memory
79
        // before writing, and in any case, we're prioritizing efficiency
80
        // so there should only be one write to each file per loop.
81
        //
82
        // after each append-only file is fully processed, it gets deleted.
83
        for (Date date : sortedList) {
84
            String filename = simpleDateFormat.format(date);
85
            logger.logDebug(() -> "consolidator processing file " + filename + " in " + appendLogDirectory);
86 1 1. consolidate : removed call to com/renomad/minum/database/DatabaseConsolidator::processAppendLogFile → KILLED
            processAppendLogFile(filename, appendLogDirectory, fileUtils, maxLinesPerFile, logger, consolidatedDataDirectory);
87
            logger.logDebug(() -> "consolidator finished with file " + filename + " in " + appendLogDirectory);
88
        }
89
        logger.logDebug(() -> "Database consolidation finished");
90
    }
91
92
93
    /**
94
     * The expectation is that after we finish reading the X lines in
95
     * this append log, we will have a set of clear instructions to
96
     * apply to our previously consolidated files. There should end up
97
     * being just one action for each id - update or delete.
98
     * <br>
99
     * Build a data structure holding instructions for the next step.
100
     */
101
    static void processAppendLogFile(String filename, Path appendLogDirectory, IFileUtils fileUtils,
102
                                     int maxLinesPerFile, ILogger logger, Path consolidatedDataDirectory) throws IOException {
103
        Path fullPathToFile = appendLogDirectory.resolve(filename);
104
        List<String> lines = fileUtils.readAllLines(fullPathToFile);
105
106
        Map<Long, DatabaseChangeInstruction> resultingInstructions = new HashMap<>();
107
108
        // process each line from the file
109
110
        for (String line : lines) {
111
            DatabaseChangeInstruction databaseChange = parseDatabaseChangeInstructionString(line, filename);
112
113
            // the trick here is that by using a Map, only the last item added will remain at the end
114
            resultingInstructions.put(databaseChange.dataIndex(), databaseChange);
115
        }
116
117
        // now we have the concise list of state changes, but the next step is figuring out how
118
        // to organize them by their destination.  consolidated files will be grouped somehow.
119
        // For example, indexes 1 - 1000, 1001-2000, etc (there may be more than 1000 per file).
120
        // <br>
121
        // So, we will group our data that way,
122
        // and then efficiently update the files (a bad outcome, in contrast, would be updating
123
        // the files multiple times each).
124
125
        Map<Long, Collection<DatabaseChangeInstruction>> groupedInstructions = groupInstructionsByPartition(resultingInstructions, maxLinesPerFile);
126
127 1 1. processAppendLogFile : removed call to com/renomad/minum/database/DatabaseConsolidator::rewriteFiles → KILLED
        rewriteFiles(groupedInstructions, fileUtils, maxLinesPerFile, logger, consolidatedDataDirectory);
128
129
        // delete the file
130 1 1. processAppendLogFile : removed call to com/renomad/minum/utils/IFileUtils::delete → KILLED
        fileUtils.delete(fullPathToFile);
131
    }
132
133
    /**
134
     * Given a {@link Map} of database change instructions, grouped by keys representing the
135
     * first index in a group of indexes (like 1 to 100, or 101 to 200, etc), write the
136
     * data to files, with consideration for what might already exist.  That is to say,
137
     * if we are adding grouped instructions to an existing file such as "1_to_100", then
138
     * we want to merge our incoming data with what is already there.  Otherwise, we are just
139
     * creating a new file.
140
     */
141
    static void rewriteFiles(Map<Long, Collection<DatabaseChangeInstruction>> groupedInstructions,
142
                             IFileUtils fileUtils, int maxLinesPerFile, ILogger logger, Path consolidatedDataDirectory) throws IOException {
143
        for (Map.Entry<Long, Collection<DatabaseChangeInstruction>> instructions : groupedInstructions.entrySet()) {
144 2 1. rewriteFiles : Replaced long addition with subtraction → TIMED_OUT
2. rewriteFiles : Replaced integer subtraction with addition → KILLED
            String filename = String.format("%d_to_%d", instructions.getKey(), instructions.getKey() + (maxLinesPerFile - 1));
145
            logger.logTrace(() -> "Writing consolidated data to " + filename);
146
            List<String> data;
147
            // if the file doesn't exist, we'll just start with an empty list. If it
148
            // does exist, read its lines into a List data structure.
149
            Path fullPathToConsolidatedFile = consolidatedDataDirectory.resolve(filename);
150 1 1. rewriteFiles : negated conditional → KILLED
            if (!fileUtils.exists(fullPathToConsolidatedFile)) {
151
                data = new ArrayList<>();
152
            } else {
153
                data = readConsolidatedFileWithChecksum(fullPathToConsolidatedFile, fileUtils);
154
            }
155
156
            // update the data in memory per the instructions
157
            Collection<String> updatedData = updateData(filename, data, instructions.getValue());
158
159
            String checksumString = buildChecksum(updatedData);
160
161
            // write the data to disk
162
            fileUtils.write(fullPathToConsolidatedFile, updatedData, StandardCharsets.US_ASCII);
163
164
            // write a hash of the data to use as a checksum.  This value will be checked
165
            // when reading the data later on, to confirm nothing has changed since writing.
166
            Path fullPathToChecksumFile = consolidatedDataDirectory.resolve(filename + ".checksum");
167 1 1. rewriteFiles : removed call to com/renomad/minum/utils/IFileUtils::writeString → TIMED_OUT
            fileUtils.writeString(fullPathToChecksumFile, checksumString);
168
        }
169
    }
170
171
    /**
172
     * Reads data from consolidated file, confirming the checksum in the process.
173
     */
174
    static List<String> readConsolidatedFileWithChecksum(Path fullPathToConsolidatedFile, IFileUtils fileUtils) throws IOException {
175
        // get all the data from the consolidated file
176
        List<String> data = fileUtils.readAllLines(fullPathToConsolidatedFile);
177
178
        compareWithChecksum(fullPathToConsolidatedFile, data, fileUtils);
179
180 1 1. readConsolidatedFileWithChecksum : replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::readConsolidatedFileWithChecksum → KILLED
        return data;
181
    }
182
183
184
    /**
185
     * Here, we have raw lines of data from a file, and a list of instructions for updating
186
     * that data.  We will organize the raw data better, apply the instructions, and return
187
     * the updated data
188
     *
189
     * @param linesOfData  raw lines of data from a file
190
     * @param instructions details of how to change the data in the file, either UPDATE or DELETE
191
     * @return an updated and sorted list of strings (sorted by index, which is the first value on each line)
192
     */
193
    static Collection<String> updateData(String filename, List<String> linesOfData, Collection<DatabaseChangeInstruction> instructions) {
194
        SortedMap<Long, String> result = new TreeMap<>();
195
        // put the original data into a map
196
        for (String data : linesOfData) {
197
            // the first pipe symbol is where the index number ends.  Apologies for
198
            // the overlap of terms here, index and index.
199
            int indexOfFirstPipe = data.indexOf('|');
200 1 1. updateData : negated conditional → KILLED
            if (indexOfFirstPipe == -1) {
201
                throw new DbException(String.format("Error parsing line in file.  File: %s line: %s", filename, data));
202
            }
203
            String dataIndexString = data.substring(0, indexOfFirstPipe);
204
            long dataIndexLong;
205
            try {
206
                dataIndexLong = Long.parseLong(dataIndexString);
207
            } catch (NumberFormatException ex) {
208
                throw new DbException(String.format("Failed to parse index from line in file. File: %s line: %s", filename, data), ex);
209
            }
210
            result.put(dataIndexLong, data);
211
        }
212
213
        // change that data per instructions
214
        for (DatabaseChangeInstruction instruction : instructions) {
215 1 1. updateData : negated conditional → KILLED
            if (DatabaseChangeAction.UPDATE.equals(instruction.action())) {
216
                result.put(instruction.dataIndex(), instruction.data());
217
            } else {
218
                // only other option is DELETE
219
                result.remove(instruction.dataIndex());
220
            }
221
        }
222 1 1. updateData : replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::updateData → TIMED_OUT
        return result.values();
223
    }
224
225
    /**
226
     * This method will group the instructions for changes to the database by which
227
     * consolidated files they apply to, so that we only need to make one change
228
     * to each file.  Files are named like this: 1, 1001, etc., or
229
     * in other words, the starting index of each set of consolidated data.
230
     * @param databaseChangeInstructionMap this is a map between keys representing the
231
     *                                     index of the data, and the data itself.
232
     * @return a map consisting of keys representing the target file for the data, and
233
     * a collection of DatabaseChangeInstruction data to place in that file.
234
     */
235
    static Map<Long, Collection<DatabaseChangeInstruction>> groupInstructionsByPartition(
236
            Map<Long, DatabaseChangeInstruction> databaseChangeInstructionMap, int maxLinesPerFile) {
237
238
        // initialize a data structure to store our results
239
        Map<Long, Collection<DatabaseChangeInstruction>> instructionsGroupedByPartition = new HashMap<>();
240
241
        // loop through the incoming data, grouping and ordering as necessary
242
        for (var databaseChangeInstruction : databaseChangeInstructionMap.entrySet()) {
243
244
            // determine the expected filename for this file.  For example, if the index is 1234, then
245
            // the filename should be 1001
246 4 1. groupInstructionsByPartition : Replaced long division with multiplication → KILLED
2. groupInstructionsByPartition : Replaced long addition with subtraction → KILLED
3. groupInstructionsByPartition : Replaced long multiplication with division → KILLED
4. groupInstructionsByPartition : Replaced long subtraction with addition → KILLED
            long expectedFilename = (((databaseChangeInstruction.getKey() - 1) / maxLinesPerFile) * maxLinesPerFile) + 1;
247
248
            // If there is no key found, we need to add one, and add a new collection
249 1 1. lambda$groupInstructionsByPartition$7 : replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::lambda$groupInstructionsByPartition$7 → KILLED
            instructionsGroupedByPartition.computeIfAbsent(expectedFilename, x -> new ArrayList<>());
250
251
            // add a new item to the collection for this filename
252
            instructionsGroupedByPartition.get(expectedFilename).add(databaseChangeInstruction.getValue());
253
        }
254
255 1 1. groupInstructionsByPartition : replaced return value with Collections.emptyMap for com/renomad/minum/database/DatabaseConsolidator::groupInstructionsByPartition → KILLED
        return instructionsGroupedByPartition;
256
    }
257
258
    /**
259
     * read first 6 characters - is it update or delete?
260
     * skip a character
261
     * read digits until we hit a pipe symbol, that's our index.
262
     * read the rest of the content
263
     */
264
    static DatabaseChangeInstruction parseDatabaseChangeInstructionString(String databaseInstructionString, String filename) {
265
        String actionString = databaseInstructionString.substring(0, 6);
266
        DatabaseChangeAction action;
267 1 1. parseDatabaseChangeInstructionString : negated conditional → KILLED
        if ("UPDATE".equals(actionString)) {
268
            action = DatabaseChangeAction.UPDATE;
269 1 1. parseDatabaseChangeInstructionString : negated conditional → KILLED
        } else if ("DELETE".equals(actionString)) {
270
            action = DatabaseChangeAction.DELETE;
271
        } else {
272
            throw new DbException("Line in append-only log was missing an action (UPDATE or DELETE) in the first characters. Line was: " + databaseInstructionString);
273
        }
274
        // confusing overlap of terms - index is used here to mean two things:
275
        // a) where we find the first pipe symbol
276
        // b) the index value of the data
277
        int indexOfPipe = databaseInstructionString.indexOf('|', 7);
278 1 1. parseDatabaseChangeInstructionString : negated conditional → KILLED
        if (indexOfPipe == -1) {
279
            throw new DbException(
280
                    "Failed to find index of the first pipe in the file %s, with content %s".formatted(filename, databaseInstructionString));
281
        }
282
        String dataIndex = databaseInstructionString.substring(7, indexOfPipe);
283
        long dataIndexLong = Long.parseLong(dataIndex);
284
285 1 1. parseDatabaseChangeInstructionString : replaced return value with null for com/renomad/minum/database/DatabaseConsolidator::parseDatabaseChangeInstructionString → TIMED_OUT
        return new DatabaseChangeInstruction(action, dataIndexLong, databaseInstructionString.substring(7));
286
    }
287
288
    /**
289
     * Given a directory, convert the list of files into a sorted
290
     * list of dates.
291
     * @return a sorted list of dates, or an empty list if nothing found
292
     */
293
    static List<Date> getSortedAppendLogs(Path appendLogDirectory) throws ParseException {
294
        // get the list of file names, which are date-time stamps
295
        String[] fileList = appendLogDirectory.toFile().list();
296
297
        // if there aren't any append-only files, bail out with an empty list
298 1 1. getSortedAppendLogs : negated conditional → KILLED
        if (fileList == null) {
299
            return List.of();
300
        }
301
302
        List<Date> appendLogDates = convertFileListToDateList(fileList);
303
304
        // sort
305 1 1. getSortedAppendLogs : replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::getSortedAppendLogs → KILLED
        return appendLogDates.stream().sorted().toList();
306
    }
307
308
    /**
309
     * Convert a list of filenames to a list of dates
310
     */
311
    static List<Date> convertFileListToDateList(String[] listOfFiles) throws ParseException {
312
        // initialize a list which will hold the dates associated with each file name
313
        List<Date> appendLogDates = new ArrayList<>();
314
315
        // convert the names to dates
316
        for (String file : listOfFiles) {
317
            Date date = simpleDateFormat.parse(file);
318
            appendLogDates.add(date);
319
        }
320
321 1 1. convertFileListToDateList : replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::convertFileListToDateList → KILLED
        return appendLogDates;
322
    }
323
}

Mutations

59

1.1
Location : <init>
Killed by : com.renomad.minum.database.DatabaseConsolidatorTests
removed call to com/renomad/minum/utils/IFileUtils::makeDirectory → KILLED

69

1.1
Location : consolidate
Killed by : com.renomad.minum.database.DbFileConverterTests
negated conditional → KILLED

86

1.1
Location : consolidate
Killed by : com.renomad.minum.database.DbFileConverterTests
removed call to com/renomad/minum/database/DatabaseConsolidator::processAppendLogFile → KILLED

127

1.1
Location : processAppendLogFile
Killed by : com.renomad.minum.FunctionalTests.test_PathFunction_Response(com.renomad.minum.FunctionalTests)
removed call to com/renomad/minum/database/DatabaseConsolidator::rewriteFiles → KILLED

130

1.1
Location : processAppendLogFile
Killed by : com.renomad.minum.web.WebPerformanceTests.test3(com.renomad.minum.web.WebPerformanceTests)
removed call to com/renomad/minum/utils/IFileUtils::delete → KILLED

144

1.1
Location : rewriteFiles
Killed by : none
Replaced long addition with subtraction → TIMED_OUT

2.2
Location : rewriteFiles
Killed by : com.renomad.minum.security.TheBrigTests
Replaced integer subtraction with addition → KILLED

150

1.1
Location : rewriteFiles
Killed by : com.renomad.minum.security.TheBrigTests
negated conditional → KILLED

167

1.1
Location : rewriteFiles
Killed by : none
removed call to com/renomad/minum/utils/IFileUtils::writeString → TIMED_OUT

180

1.1
Location : readConsolidatedFileWithChecksum
Killed by : com.renomad.minum.security.TheBrigTests
replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::readConsolidatedFileWithChecksum → KILLED

200

1.1
Location : updateData
Killed by : com.renomad.minum.database.DatabaseConsolidatorTests
negated conditional → KILLED

215

1.1
Location : updateData
Killed by : com.renomad.minum.security.TheBrigTests
negated conditional → KILLED

222

1.1
Location : updateData
Killed by : none
replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::updateData → TIMED_OUT

246

1.1
Location : groupInstructionsByPartition
Killed by : com.renomad.minum.security.TheBrigTests
Replaced long division with multiplication → KILLED

2.2
Location : groupInstructionsByPartition
Killed by : com.renomad.minum.security.TheBrigTests
Replaced long addition with subtraction → KILLED

3.3
Location : groupInstructionsByPartition
Killed by : com.renomad.minum.security.TheBrigTests
Replaced long multiplication with division → KILLED

4.4
Location : groupInstructionsByPartition
Killed by : com.renomad.minum.security.TheBrigTests
Replaced long subtraction with addition → KILLED

249

1.1
Location : lambda$groupInstructionsByPartition$7
Killed by : com.renomad.minum.security.TheBrigTests
replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::lambda$groupInstructionsByPartition$7 → KILLED

255

1.1
Location : groupInstructionsByPartition
Killed by : com.renomad.minum.web.WebPerformanceTests.test3(com.renomad.minum.web.WebPerformanceTests)
replaced return value with Collections.emptyMap for com/renomad/minum/database/DatabaseConsolidator::groupInstructionsByPartition → KILLED

267

1.1
Location : parseDatabaseChangeInstructionString
Killed by : com.renomad.minum.database.DatabaseConsolidatorTests
negated conditional → KILLED

269

1.1
Location : parseDatabaseChangeInstructionString
Killed by : com.renomad.minum.database.DatabaseConsolidatorTests
negated conditional → KILLED

278

1.1
Location : parseDatabaseChangeInstructionString
Killed by : com.renomad.minum.database.DbFileConverterTests
negated conditional → KILLED

285

1.1
Location : parseDatabaseChangeInstructionString
Killed by : none
replaced return value with null for com/renomad/minum/database/DatabaseConsolidator::parseDatabaseChangeInstructionString → TIMED_OUT

298

1.1
Location : getSortedAppendLogs
Killed by : com.renomad.minum.database.DatabaseConsolidatorTests
negated conditional → KILLED

305

1.1
Location : getSortedAppendLogs
Killed by : com.renomad.minum.database.DbFileConverterTests
replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::getSortedAppendLogs → KILLED

321

1.1
Location : convertFileListToDateList
Killed by : com.renomad.minum.database.DbFileConverterTests
replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::convertFileListToDateList → KILLED

Active mutators

Tests examined


Report generated by PIT 1.17.0