DatabaseConsolidator.java

1
package com.renomad.minum.database;
2
3
import com.renomad.minum.logging.ILogger;
4
import com.renomad.minum.state.Context;
5
import com.renomad.minum.utils.FileUtils;
6
7
import java.io.IOException;
8
import java.nio.charset.StandardCharsets;
9
import java.nio.file.Files;
10
import java.nio.file.Path;
11
import java.text.ParseException;
12
import java.util.*;
13
import java.util.stream.Collectors;
14
15
import static com.renomad.minum.database.DatabaseAppender.simpleDateFormat;
16
17
/**
18
 * Consolidates the database append logs.
19
 * <br>
20
 * As the append logs get filled up, the consolidator comes
21
 * along after to analyze those changes and determine a
22
 * consolidated version.  For example, if the append logs
23
 * have three updates for a particular element, then the consolidated file
24
 * will have just the last update.
25
 */
26
final class DatabaseConsolidator {
27
28
    /**
29
     * This is the path to the append-only files, where incoming
30
     * changes to the data are quickly stored.
31
     */
32
    private final Path appendLogDirectory;
33
34
    /**
35
     * This is the path to where we store consolidated data, so that
36
     * database startup is as fast as possible.
37
     */
38
    private final Path consolidatedDataDirectory;
39
40
    private final ILogger logger;
41
42
    private final int maxLinesPerFile;
43
44
    /**
45
     * This represents an instruction for how to change the overall consolidated
46
     * database files on disk.  Instructions are either to UPDATE or DELETE. This
47
     * also encapsulates the data we're updating.
48
     */
49
    private record DatabaseChangeInstruction(DatabaseChangeAction action, long dataIndex, String data) {}
50
51
    DatabaseConsolidator(Path persistenceDirectory, Context context) {
52
        this.appendLogDirectory = persistenceDirectory.resolve("append_logs");
53
        this.consolidatedDataDirectory = persistenceDirectory.resolve("consolidated_data");
54
        var constants = context.getConstants();
55
        this.logger = context.getLogger();
56
        FileUtils fileUtils = new FileUtils(logger, constants);
57 1 1. <init> : removed call to com/renomad/minum/utils/FileUtils::makeDirectory → KILLED
        fileUtils.makeDirectory(this.consolidatedDataDirectory);
58
        this.maxLinesPerFile = constants.maxLinesPerConsolidatedDatabaseFile;
59
    }
60
61
    /**
62
     * Loop through all the append-only files
63
     */
64
    void consolidate() throws IOException {
65
        logger.logDebug(() -> "Starting database consolidator");
66
        List<Date> sortedList = getSortedAppendLogs(appendLogDirectory);
67 1 1. consolidate : negated conditional → KILLED
        if (sortedList.isEmpty()) {
68
            logger.logDebug(() -> "No database files found to consolidate - exiting");
69
            return;
70
        } else {
71
            logger.logDebug(() -> "Files to consolidate: " + sortedList.stream().map(simpleDateFormat::format).collect(Collectors.joining(";")));
72
        }
73
74
        // process the files in order.  This does potentially cause
75
        // multiple updates for the consolidated files, but that's
76
        // safer than building up too large a structure in memory
77
        // before writing, and in any case, we're prioritizing efficiency
78
        // so there should only be one write to each file per loop.
79
        //
80
        // after each append-only file is fully processed, it gets deleted.
81
        for (Date date : sortedList) {
82
            String filename = simpleDateFormat.format(date);
83
            logger.logDebug(() -> "consolidator processing file " + filename + " in " + appendLogDirectory);
84 1 1. consolidate : removed call to com/renomad/minum/database/DatabaseConsolidator::processAppendLogFile → KILLED
            processAppendLogFile(filename);
85
            logger.logDebug(() -> "consolidator finished with file " + filename + " in " + appendLogDirectory);
86
        }
87
        logger.logDebug(() -> "Database consolidation finished");
88
    }
89
90
91
    /**
92
     * The expectation is that after we finish reading the X lines in
93
     * this append log, we will have a set of clear instructions to
94
     * apply to our previously consolidated files. There should end up
95
     * being just one action for each id - update or delete.
96
     * <br>
97
     * Build a data structure holding instructions for the next step.
98
     */
99
    private void processAppendLogFile(String filename) throws IOException {
100
        Path fullPathToFile = this.appendLogDirectory.resolve(filename);
101
        List<String> lines = Files.readAllLines(fullPathToFile);
102
        Map<Long, DatabaseChangeInstruction> resultingInstructions = new HashMap<>();
103
104
        // process each line from the file
105
106
        for (String line : lines) {
107
            DatabaseChangeInstruction databaseChange = parseDatabaseChangeInstructionString(line, filename);
108
109
            // the trick here is that by using a Map, only the last item added will remain at the end
110
            resultingInstructions.put(databaseChange.dataIndex(), databaseChange);
111
        }
112
113
        // now we have the concise list of state changes, but the next step is figuring out how
114
        // to organize them by their destination.  consolidated files will be grouped somehow.
115
        // For example, indexes 1 - 1000, 1001-2000, etc (there may be more than 1000 per file).
116
        // <br>
117
        // So, we will group our data that way,
118
        // and then efficiently update the files (a bad outcome, in contrast, would be updating
119
        // the files multiple times each).
120
121
        Map<Long, Collection<DatabaseChangeInstruction>> groupedInstructions = groupInstructionsByPartition(resultingInstructions);
122
123 1 1. processAppendLogFile : removed call to com/renomad/minum/database/DatabaseConsolidator::rewriteFiles → KILLED
        rewriteFiles(groupedInstructions);
124
125
        // delete the file
126 1 1. processAppendLogFile : removed call to java/nio/file/Files::delete → KILLED
        Files.delete(fullPathToFile);
127
    }
128
129
    /**
130
     * Given a {@link Map} of database change instructions, grouped by keys representing the
131
     * first index in a group of indexes (like 1 to 100, or 101 to 200, etc), write the
132
     * data to files, with consideration for what might already exist.  That is to say,
133
     * if we are adding grouped instructions to an existing file such as "1_to_100", then
134
     * we want to merge our incoming data with what is already there.  Otherwise, we are just
135
     * creating a new file.
136
     */
137
    private void rewriteFiles(Map<Long, Collection<DatabaseChangeInstruction>> groupedInstructions) throws IOException {
138
        for (Map.Entry<Long, Collection<DatabaseChangeInstruction>> instructions : groupedInstructions.entrySet()) {
139 2 1. rewriteFiles : Replaced long addition with subtraction → KILLED
2. rewriteFiles : Replaced integer subtraction with addition → KILLED
            String filename = String.format("%d_to_%d", instructions.getKey(), instructions.getKey() + (maxLinesPerFile - 1));
140
            logger.logTrace(() -> "Writing consolidated data to " + filename);
141
            List<String> data;
142
            // if the file doesn't exist, we'll just start with an empty list. If it
143
            // does exist, read its lines into a List data structure.
144
            Path fullPathToConsolidatedFile = this.consolidatedDataDirectory.resolve(filename);
145 1 1. rewriteFiles : negated conditional → KILLED
            if (!Files.exists(fullPathToConsolidatedFile)) {
146
                data = new ArrayList<>();
147
            } else {
148
                data = Files.readAllLines(fullPathToConsolidatedFile);
149
            }
150
151
            // update the data in memory per the instructions
152
            Collection<String> updatedData = updateData(filename, data, instructions.getValue());
153
154
            // write the data to disk
155
            Files.write(fullPathToConsolidatedFile, updatedData, StandardCharsets.US_ASCII);
156
        }
157
    }
158
159
    /**
160
     * Here, we have raw lines of data from a file, and a list of instructions for updating
161
     * that data.  We will organize the raw data better, apply the instructions, and return
162
     * the updated data
163
     *
164
     * @param linesOfData  raw lines of data from a file
165
     * @param instructions details of how to change the data in the file, either UPDATE or DELETE
166
     * @return an updated and sorted list of strings (sorted by index, which is the first value on each line)
167
     */
168
    static Collection<String> updateData(String filename, List<String> linesOfData, Collection<DatabaseChangeInstruction> instructions) {
169
        SortedMap<Long, String> result = new TreeMap<>();
170
        // put the original data into a map
171
        for (String data : linesOfData) {
172
            // the first pipe symbol is where the index number ends.  Apologies for
173
            // the overlap of terms here, index and index.
174
            int indexOfFirstPipe = data.indexOf('|');
175 1 1. updateData : negated conditional → KILLED
            if (indexOfFirstPipe == -1) {
176
                throw new DbException(String.format("Error parsing line in file.  File: %s line: %s", filename, data));
177
            }
178
            String dataIndexString = data.substring(0, indexOfFirstPipe);
179
            long dataIndexLong;
180
            try {
181
                dataIndexLong = Long.parseLong(dataIndexString);
182
            } catch (NumberFormatException ex) {
183
                throw new DbException(String.format("Failed to parse index from line in file. File: %s line: %s", filename, data), ex);
184
            }
185
            result.put(dataIndexLong, data);
186
        }
187
188
        // change that data per instructions
189
        for (DatabaseChangeInstruction instruction : instructions) {
190 1 1. updateData : negated conditional → KILLED
            if (DatabaseChangeAction.UPDATE.equals(instruction.action())) {
191
                result.put(instruction.dataIndex(), instruction.data());
192
            } else {
193
                // only other option is DELETE
194
                result.remove(instruction.dataIndex());
195
            }
196
        }
197 1 1. updateData : replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::updateData → KILLED
        return result.values();
198
    }
199
200
    /**
201
     * This method will group the instructions for changes to the database by which
202
     * consolidated files they apply to, so that we only need to make one change
203
     * to each file.  Files are named like this: 1, 1001, etc., or
204
     * in other words, the starting index of each set of consolidated data.
205
     * @param databaseChangeInstructionMap this is a map between keys representing the
206
     *                                     index of the data, and the data itself.
207
     * @return a map consisting of keys representing the target file for the data, and
208
     * a collection of DatabaseChangeInstruction data to place in that file.
209
     */
210
    private Map<Long, Collection<DatabaseChangeInstruction>> groupInstructionsByPartition(
211
            Map<Long, DatabaseChangeInstruction> databaseChangeInstructionMap) {
212
213
        // initialize a data structure to store our results
214
        Map<Long, Collection<DatabaseChangeInstruction>> instructionsGroupedByPartition = new HashMap<>();
215
216
        // loop through the incoming data, grouping and ordering as necessary
217
        for (var databaseChangeInstruction : databaseChangeInstructionMap.entrySet()) {
218
219
            // determine the expected filename for this file.  For example, if the index is 1234, then
220
            // the filename should be 1001
221 4 1. groupInstructionsByPartition : Replaced long addition with subtraction → KILLED
2. groupInstructionsByPartition : Replaced long subtraction with addition → KILLED
3. groupInstructionsByPartition : Replaced long multiplication with division → KILLED
4. groupInstructionsByPartition : Replaced long division with multiplication → KILLED
            long expectedFilename = (((databaseChangeInstruction.getKey() - 1) / maxLinesPerFile) * maxLinesPerFile) + 1;
222
223
            // If there is no key found, we need to add one, and add a new collection
224 1 1. lambda$groupInstructionsByPartition$7 : replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::lambda$groupInstructionsByPartition$7 → KILLED
            instructionsGroupedByPartition.computeIfAbsent(expectedFilename, x -> new ArrayList<>());
225
226
            // add a new item to the collection for this filename
227
            instructionsGroupedByPartition.get(expectedFilename).add(databaseChangeInstruction.getValue());
228
        }
229
230 1 1. groupInstructionsByPartition : replaced return value with Collections.emptyMap for com/renomad/minum/database/DatabaseConsolidator::groupInstructionsByPartition → KILLED
        return instructionsGroupedByPartition;
231
    }
232
233
    /**
234
     * read first 6 characters - is it update or delete?
235
     * skip a character
236
     * read digits until we hit a pipe symbol, that's our index.
237
     * read the rest of the content
238
     */
239
    static DatabaseChangeInstruction parseDatabaseChangeInstructionString(String databaseInstructionString, String filename) {
240
        String actionString = databaseInstructionString.substring(0, 6);
241
        DatabaseChangeAction action;
242 1 1. parseDatabaseChangeInstructionString : negated conditional → KILLED
        if ("UPDATE".equals(actionString)) {
243
            action = DatabaseChangeAction.UPDATE;
244 1 1. parseDatabaseChangeInstructionString : negated conditional → KILLED
        } else if ("DELETE".equals(actionString)) {
245
            action = DatabaseChangeAction.DELETE;
246
        } else {
247
            throw new DbException("Line in append-only log was missing an action (UPDATE or DELETE) in the first characters. Line was: " + databaseInstructionString);
248
        }
249
        // confusing overlap of terms - index is used here to mean two things:
250
        // a) where we find the first pipe symbol
251
        // b) the index value of the data
252
        int indexOfPipe = databaseInstructionString.indexOf('|', 7);
253 1 1. parseDatabaseChangeInstructionString : negated conditional → KILLED
        if (indexOfPipe == -1) {
254
            throw new DbException(
255
                    "Failed to find index of the first pipe in the file %s, with content %s".formatted(filename, databaseInstructionString));
256
        }
257
        String dataIndex = databaseInstructionString.substring(7, indexOfPipe);
258
        long dataIndexLong = Long.parseLong(dataIndex);
259
260 1 1. parseDatabaseChangeInstructionString : replaced return value with null for com/renomad/minum/database/DatabaseConsolidator::parseDatabaseChangeInstructionString → KILLED
        return new DatabaseChangeInstruction(action, dataIndexLong, databaseInstructionString.substring(7));
261
    }
262
263
    /**
264
     * Given a directory, convert the list of files into a sorted
265
     * list of dates.
266
     * @return a sorted list of dates, or an empty list if nothing found
267
     */
268
    static List<Date> getSortedAppendLogs(Path appendLogDirectory) {
269
        // get the list of file names, which are date-time stamps
270
        String[] fileList = appendLogDirectory.toFile().list();
271
272
        // if there aren't any append-only files, bail out with an empty list
273 1 1. getSortedAppendLogs : negated conditional → KILLED
        if (fileList == null) {
274
            return List.of();
275
        }
276
277
        List<Date> appendLogDates = convertFileListToDateList(fileList);
278
279
        // sort
280 1 1. getSortedAppendLogs : replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::getSortedAppendLogs → KILLED
        return appendLogDates.stream().sorted().toList();
281
    }
282
283
    /**
284
     * Convert a list of filenames to a list of dates
285
     */
286
    static List<Date> convertFileListToDateList(String[] listOfFiles) {
287
        // initialize a list which will hold the dates associated with each file name
288
        List<Date> appendLogDates = new ArrayList<>();
289
290
        // convert the names to dates
291
        for (String file : listOfFiles) {
292
            Date date;
293
            try {
294
                date = simpleDateFormat.parse(file);
295
            } catch (ParseException e) {
296
                throw new DbException(e);
297
            }
298
            appendLogDates.add(date);
299
        }
300
301 1 1. convertFileListToDateList : replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::convertFileListToDateList → KILLED
        return appendLogDates;
302
    }
303
}

Mutations

57

1.1
Location : <init>
Killed by : com.renomad.minum.database.DbEngine2Tests.test_NegativeCase_NoIndex(com.renomad.minum.database.DbEngine2Tests)
removed call to com/renomad/minum/utils/FileUtils::makeDirectory → KILLED

67

1.1
Location : consolidate
Killed by : com.renomad.minum.database.DbFileConverterTests.testConvertDbEngine2FolderStructureToDbClassicForm_EdgeCase_CorruptData(com.renomad.minum.database.DbFileConverterTests)
negated conditional → KILLED

84

1.1
Location : consolidate
Killed by : com.renomad.minum.database.DbFileConverterTests.testConvertDbEngine2FolderStructureToDbClassicForm_EdgeCase_CorruptData(com.renomad.minum.database.DbFileConverterTests)
removed call to com/renomad/minum/database/DatabaseConsolidator::processAppendLogFile → KILLED

123

1.1
Location : processAppendLogFile
Killed by : com.renomad.minum.database.DbEngine2Tests.test_LoadingData_MultipleThreads(com.renomad.minum.database.DbEngine2Tests)
removed call to com/renomad/minum/database/DatabaseConsolidator::rewriteFiles → KILLED

126

1.1
Location : processAppendLogFile
Killed by : com.renomad.minum.database.DbEngine2Tests.test_ConvertingDatabase_Db_To_DbEngine2(com.renomad.minum.database.DbEngine2Tests)
removed call to java/nio/file/Files::delete → KILLED

139

1.1
Location : rewriteFiles
Killed by : com.renomad.minum.database.DbEngine2Tests.test_ConvertingDatabase_Db_To_DbEngine2(com.renomad.minum.database.DbEngine2Tests)
Replaced long addition with subtraction → KILLED

2.2
Location : rewriteFiles
Killed by : com.renomad.minum.database.DbEngine2Tests.test_ConvertingDatabase_Db_To_DbEngine2(com.renomad.minum.database.DbEngine2Tests)
Replaced integer subtraction with addition → KILLED

145

1.1
Location : rewriteFiles
Killed by : com.renomad.minum.database.DbEngine2Tests.test_LoadingData_MultipleThreads(com.renomad.minum.database.DbEngine2Tests)
negated conditional → KILLED

175

1.1
Location : updateData
Killed by : com.renomad.minum.database.DatabaseConsolidatorTests.testUpdatingData_EdgeCase_ParsingErrorForIndex(com.renomad.minum.database.DatabaseConsolidatorTests)
negated conditional → KILLED

190

1.1
Location : updateData
Killed by : com.renomad.minum.database.DbEngine2Tests.test_LoadingData_MultipleThreads(com.renomad.minum.database.DbEngine2Tests)
negated conditional → KILLED

197

1.1
Location : updateData
Killed by : com.renomad.minum.database.DbEngine2Tests.test_LoadingData_MultipleThreads(com.renomad.minum.database.DbEngine2Tests)
replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::updateData → KILLED

221

1.1
Location : groupInstructionsByPartition
Killed by : com.renomad.minum.database.DbEngine2Tests.test_ConvertingDatabase_Db_To_DbEngine2(com.renomad.minum.database.DbEngine2Tests)
Replaced long addition with subtraction → KILLED

2.2
Location : groupInstructionsByPartition
Killed by : com.renomad.minum.database.DbEngine2Tests.test_ConvertingDatabase_Db_To_DbEngine2(com.renomad.minum.database.DbEngine2Tests)
Replaced long subtraction with addition → KILLED

3.3
Location : groupInstructionsByPartition
Killed by : com.renomad.minum.database.DbEngine2Tests.test_ConvertingDatabase_Db_To_DbEngine2(com.renomad.minum.database.DbEngine2Tests)
Replaced long multiplication with division → KILLED

4.4
Location : groupInstructionsByPartition
Killed by : com.renomad.minum.database.DbEngine2Tests.test_Performance(com.renomad.minum.database.DbEngine2Tests)
Replaced long division with multiplication → KILLED

224

1.1
Location : lambda$groupInstructionsByPartition$7
Killed by : com.renomad.minum.database.DbEngine2Tests.test_LoadingData_MultipleThreads(com.renomad.minum.database.DbEngine2Tests)
replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::lambda$groupInstructionsByPartition$7 → KILLED

230

1.1
Location : groupInstructionsByPartition
Killed by : com.renomad.minum.database.DbEngine2Tests.test_LoadingData_MultipleThreads(com.renomad.minum.database.DbEngine2Tests)
replaced return value with Collections.emptyMap for com/renomad/minum/database/DatabaseConsolidator::groupInstructionsByPartition → KILLED

242

1.1
Location : parseDatabaseChangeInstructionString
Killed by : com.renomad.minum.database.DatabaseConsolidatorTests.testParsingDatabaseChangeStrings_EdgeCase_InvalidAction(com.renomad.minum.database.DatabaseConsolidatorTests)
negated conditional → KILLED

244

1.1
Location : parseDatabaseChangeInstructionString
Killed by : com.renomad.minum.database.DatabaseConsolidatorTests.testParsingDatabaseChangeStrings_EdgeCase_InvalidAction(com.renomad.minum.database.DatabaseConsolidatorTests)
negated conditional → KILLED

253

1.1
Location : parseDatabaseChangeInstructionString
Killed by : com.renomad.minum.database.DbFileConverterTests.testConvertDbEngine2FolderStructureToDbClassicForm_EdgeCase_CorruptData(com.renomad.minum.database.DbFileConverterTests)
negated conditional → KILLED

260

1.1
Location : parseDatabaseChangeInstructionString
Killed by : com.renomad.minum.database.DbEngine2Tests.test_LoadingData_MultipleThreads(com.renomad.minum.database.DbEngine2Tests)
replaced return value with null for com/renomad/minum/database/DatabaseConsolidator::parseDatabaseChangeInstructionString → KILLED

273

1.1
Location : getSortedAppendLogs
Killed by : com.renomad.minum.database.DbEngine2Tests.test_LoadingData_NegativeCase(com.renomad.minum.database.DbEngine2Tests)
negated conditional → KILLED

280

1.1
Location : getSortedAppendLogs
Killed by : com.renomad.minum.database.DbFileConverterTests.testConvertDbEngine2FolderStructureToDbClassicForm_EdgeCase_CorruptData(com.renomad.minum.database.DbFileConverterTests)
replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::getSortedAppendLogs → KILLED

301

1.1
Location : convertFileListToDateList
Killed by : com.renomad.minum.database.DbFileConverterTests.testConvertDbEngine2FolderStructureToDbClassicForm_EdgeCase_CorruptData(com.renomad.minum.database.DbFileConverterTests)
replaced return value with Collections.emptyList for com/renomad/minum/database/DatabaseConsolidator::convertFileListToDateList → KILLED

Active mutators

Tests examined


Report generated by PIT 1.17.0