| 1 | package com.renomad.minum.htmlparsing; | |
| 2 | ||
| 3 | import java.util.ArrayList; | |
| 4 | import java.util.List; | |
| 5 | import java.util.Map; | |
| 6 | import java.util.Objects; | |
| 7 | ||
| 8 | /** | |
| 9 | * Represents the expected types of things we may encounter when parsing an HTML string, which | |
| 10 | * for our purposes is {@link ParseNodeType}. | |
| 11 | * <p> | |
| 12 | * See <a href="https://www.w3.org/TR/2011/WD-html-markup-20110113/syntax.html#syntax-elements">W3.org Elements</a> | |
| 13 | * </p> | |
| 14 | */ | |
| 15 | public final class HtmlParseNode { | |
| 16 | ||
| 17 | private final ParseNodeType type; | |
| 18 | private final TagInfo tagInfo; | |
| 19 | private final List<HtmlParseNode> innerContent; | |
| 20 | private final String textContent; | |
| 21 | ||
| 22 | public HtmlParseNode(ParseNodeType type, | |
| 23 | TagInfo tagInfo, | |
| 24 | List<HtmlParseNode> innerContent, | |
| 25 | String textContent) { | |
| 26 | ||
| 27 | this.type = type; | |
| 28 | this.tagInfo = tagInfo; | |
| 29 | this.innerContent = new ArrayList<>(innerContent); | |
| 30 | this.textContent = textContent; | |
| 31 | } | |
| 32 | ||
| 33 | public static final HtmlParseNode EMPTY = new HtmlParseNode(ParseNodeType.ELEMENT, TagInfo.EMPTY, List.of(), "EMPTY HTMLPARSENODE"); | |
| 34 | ||
| 35 | /** | |
| 36 | * Return a list of strings of the text content of the tree. | |
| 37 | * <p> | |
| 38 | * This method traverses the tree from this node downwards, | |
| 39 | * adding the text content as it goes. Its main purpose is to | |
| 40 | * quickly render all the strings out of an HTML document at once. | |
| 41 | * </p> | |
| 42 | */ | |
| 43 | public List<String> print() { | |
| 44 | var myList = new ArrayList<String>(); | |
| 45 |
1
1. print : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalk → KILLED |
recursiveTreeWalk(myList, innerContent, textContent); |
| 46 |
1
1. print : replaced return value with Collections.emptyList for com/renomad/minum/htmlparsing/HtmlParseNode::print → KILLED |
return myList; |
| 47 | } | |
| 48 | ||
| 49 | static void recursiveTreeWalk(List<String> myList, List<HtmlParseNode> innerContent, String textContent) { | |
| 50 | for (HtmlParseNode hpn : innerContent) { | |
| 51 |
1
1. recursiveTreeWalk : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalk → KILLED |
recursiveTreeWalk(myList, hpn.innerContent, hpn.textContent); |
| 52 | } | |
| 53 |
2
1. recursiveTreeWalk : negated conditional → KILLED 2. recursiveTreeWalk : negated conditional → KILLED |
if (textContent != null && ! textContent.isBlank()) { |
| 54 | myList.add(textContent); | |
| 55 | } | |
| 56 | } | |
| 57 | ||
| 58 | /** | |
| 59 | * Return a list of {@link HtmlParseNode} nodes in the HTML that match provided attributes. | |
| 60 | */ | |
| 61 | public List<HtmlParseNode> search(TagName tagName, Map<String, String> attributes) { | |
| 62 | var myList = new ArrayList<HtmlParseNode>(); | |
| 63 |
1
1. search : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalkSearch → KILLED |
recursiveTreeWalkSearch(myList, tagName, attributes); |
| 64 |
1
1. search : replaced return value with Collections.emptyList for com/renomad/minum/htmlparsing/HtmlParseNode::search → KILLED |
return myList; |
| 65 | } | |
| 66 | ||
| 67 | private void recursiveTreeWalkSearch(List<HtmlParseNode> myList, TagName tagName, Map<String, String> attributes) { | |
| 68 |
2
1. recursiveTreeWalkSearch : negated conditional → KILLED 2. recursiveTreeWalkSearch : negated conditional → KILLED |
if (this.tagInfo.getTagName().equals(tagName) && this.tagInfo.containsAllAttributes(attributes.entrySet())) { |
| 69 | myList.add(this); | |
| 70 | } | |
| 71 | for (var htmlParseNode : innerContent) { | |
| 72 |
1
1. recursiveTreeWalkSearch : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalkSearch → KILLED |
htmlParseNode.recursiveTreeWalkSearch(myList, tagName, attributes); |
| 73 | } | |
| 74 | } | |
| 75 | ||
| 76 | /** | |
| 77 | * Return the inner text of these nodes | |
| 78 | * <p> | |
| 79 | * If this element has only one inner | |
| 80 | * content item, and it's a {@link ParseNodeType#CHARACTERS} element, return its text content. | |
| 81 | * </p> | |
| 82 | * <p> | |
| 83 | * If there is more than one node, run the {@link #print()} command on each, appending | |
| 84 | * to a single string. | |
| 85 | * </p> | |
| 86 | */ | |
| 87 | static String innerText(List<HtmlParseNode> innerContent) { | |
| 88 |
1
1. innerText : negated conditional → KILLED |
if (innerContent == null) return ""; |
| 89 |
2
1. innerText : negated conditional → KILLED 2. innerText : negated conditional → KILLED |
if (innerContent.size() == 1 && innerContent.getFirst().type == ParseNodeType.CHARACTERS) { |
| 90 |
1
1. innerText : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::innerText → KILLED |
return innerContent.getFirst().textContent; |
| 91 | } else { | |
| 92 | StringBuilder sb = new StringBuilder(); | |
| 93 | for (HtmlParseNode hpn : innerContent) { | |
| 94 | sb.append(hpn.print()); | |
| 95 | } | |
| 96 |
1
1. innerText : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::innerText → KILLED |
return sb.toString(); |
| 97 | } | |
| 98 | } | |
| 99 | ||
| 100 | /** | |
| 101 | * Gets the type of this node - either it's an element, with opening and | |
| 102 | * closing tags and attributes and an inner content, or it's just plain text. | |
| 103 | */ | |
| 104 | public ParseNodeType getType() { | |
| 105 |
1
1. getType : replaced return value with null for com/renomad/minum/htmlparsing/HtmlParseNode::getType → KILLED |
return type; |
| 106 | } | |
| 107 | ||
| 108 | /** | |
| 109 | * Returns the {@link TagInfo}, which contains valuable information | |
| 110 | * like the type of element (p, a, div, and so on) and attributes | |
| 111 | * like class, id, etc. | |
| 112 | */ | |
| 113 | public TagInfo getTagInfo() { | |
| 114 |
1
1. getTagInfo : replaced return value with null for com/renomad/minum/htmlparsing/HtmlParseNode::getTagInfo → KILLED |
return tagInfo; |
| 115 | } | |
| 116 | ||
| 117 | /** | |
| 118 | * The inner content is the data between the opening and closing | |
| 119 | * tags of this element, comprised of potentially other complex | |
| 120 | * elements and/or characters or a mix (or nothing at all, which | |
| 121 | * will return an empty list). | |
| 122 | */ | |
| 123 | public List<HtmlParseNode> getInnerContent() { | |
| 124 |
1
1. getInnerContent : replaced return value with Collections.emptyList for com/renomad/minum/htmlparsing/HtmlParseNode::getInnerContent → KILLED |
return new ArrayList<>(innerContent); |
| 125 | } | |
| 126 | ||
| 127 | void addToInnerContent(HtmlParseNode htmlParseNode) { | |
| 128 | innerContent.add(htmlParseNode); | |
| 129 | } | |
| 130 | ||
| 131 | /** | |
| 132 | * If the {@link ParseNodeType} is {@link ParseNodeType#CHARACTERS}, then this | |
| 133 | * will have text content. Otherwise, it returns an empty string. | |
| 134 | */ | |
| 135 | public String getTextContent() { | |
| 136 |
1
1. getTextContent : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::getTextContent → KILLED |
return textContent; |
| 137 | } | |
| 138 | ||
| 139 | /** | |
| 140 | * Return the inner text of a node | |
| 141 | * <p> | |
| 142 | * If this element has only one inner | |
| 143 | * content item, and it's a {@link ParseNodeType#CHARACTERS} element, return its text content. | |
| 144 | * </p> | |
| 145 | * <p> | |
| 146 | * If there is more than one node, concatenates them to a single string, with each section wrapped | |
| 147 | * in square brackets. | |
| 148 | * </p> | |
| 149 | */ | |
| 150 | public String innerText() { | |
| 151 |
1
1. innerText : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::innerText → KILLED |
return innerText(innerContent); |
| 152 | } | |
| 153 | ||
| 154 | @Override | |
| 155 | public boolean equals(Object o) { | |
| 156 |
2
1. equals : negated conditional → KILLED 2. equals : replaced boolean return with false for com/renomad/minum/htmlparsing/HtmlParseNode::equals → KILLED |
if (this == o) return true; |
| 157 |
2
1. equals : negated conditional → KILLED 2. equals : replaced boolean return with true for com/renomad/minum/htmlparsing/HtmlParseNode::equals → KILLED |
if (!(o instanceof HtmlParseNode that)) return false; |
| 158 |
5
1. equals : negated conditional → KILLED 2. equals : negated conditional → KILLED 3. equals : negated conditional → KILLED 4. equals : negated conditional → KILLED 5. equals : replaced boolean return with true for com/renomad/minum/htmlparsing/HtmlParseNode::equals → KILLED |
return type == that.type && Objects.equals(tagInfo, that.tagInfo) && Objects.equals(innerContent, that.innerContent) && Objects.equals(textContent, that.textContent); |
| 159 | } | |
| 160 | ||
| 161 | @Override | |
| 162 | public int hashCode() { | |
| 163 |
1
1. hashCode : replaced int return with 0 for com/renomad/minum/htmlparsing/HtmlParseNode::hashCode → KILLED |
return Objects.hash(type, tagInfo, innerContent, textContent); |
| 164 | } | |
| 165 | ||
| 166 | @Override | |
| 167 | public String toString() { | |
| 168 |
1
1. toString : negated conditional → KILLED |
if (this.getType().equals(ParseNodeType.ELEMENT)) { |
| 169 | var sb = new StringBuilder(); | |
| 170 | sb.append("<"); | |
| 171 | String lowercaseElement = this.tagInfo.getTagName().toString().toLowerCase(); | |
| 172 | sb.append(lowercaseElement); | |
| 173 | ||
| 174 | for (Map.Entry<String, String> entry : this.tagInfo.getAttributes().entrySet()) { | |
| 175 | sb.append(" "); | |
| 176 | sb.append(entry.getKey()); | |
| 177 | sb.append("="); | |
| 178 | sb.append("\"").append(entry.getValue()).append("\""); | |
| 179 | } | |
| 180 | ||
| 181 | sb.append(">"); | |
| 182 | ||
| 183 | for (HtmlParseNode hpn : this.innerContent) { | |
| 184 | sb.append(hpn); | |
| 185 | } | |
| 186 | ||
| 187 | sb.append("</").append(lowercaseElement).append(">"); | |
| 188 |
1
1. toString : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::toString → KILLED |
return sb.toString(); |
| 189 | } else { | |
| 190 |
1
1. toString : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::toString → KILLED |
return textContent; |
| 191 | } | |
| 192 | ||
| 193 | ||
| 194 | } | |
| 195 | ||
| 196 | } | |
Mutations | ||
| 45 |
1.1 |
|
| 46 |
1.1 |
|
| 51 |
1.1 |
|
| 53 |
1.1 2.2 |
|
| 63 |
1.1 |
|
| 64 |
1.1 |
|
| 68 |
1.1 2.2 |
|
| 72 |
1.1 |
|
| 88 |
1.1 |
|
| 89 |
1.1 2.2 |
|
| 90 |
1.1 |
|
| 96 |
1.1 |
|
| 105 |
1.1 |
|
| 114 |
1.1 |
|
| 124 |
1.1 |
|
| 136 |
1.1 |
|
| 151 |
1.1 |
|
| 156 |
1.1 2.2 |
|
| 157 |
1.1 2.2 |
|
| 158 |
1.1 2.2 3.3 4.4 5.5 |
|
| 163 |
1.1 |
|
| 168 |
1.1 |
|
| 188 |
1.1 |
|
| 190 |
1.1 |