1 | package com.renomad.minum.htmlparsing; | |
2 | ||
3 | import java.util.ArrayList; | |
4 | import java.util.List; | |
5 | import java.util.Map; | |
6 | import java.util.Objects; | |
7 | ||
8 | /** | |
9 | * Represents the expected types of things we may encounter when parsing an HTML string, which | |
10 | * for our purposes is {@link ParseNodeType}. | |
11 | * <p> | |
12 | * See <a href="https://www.w3.org/TR/2011/WD-html-markup-20110113/syntax.html#syntax-elements">W3.org Elements</a> | |
13 | * </p> | |
14 | */ | |
15 | public final class HtmlParseNode { | |
16 | ||
17 | private final ParseNodeType type; | |
18 | private final TagInfo tagInfo; | |
19 | private final List<HtmlParseNode> innerContent; | |
20 | private final String textContent; | |
21 | ||
22 | public HtmlParseNode(ParseNodeType type, | |
23 | TagInfo tagInfo, | |
24 | List<HtmlParseNode> innerContent, | |
25 | String textContent) { | |
26 | ||
27 | this.type = type; | |
28 | this.tagInfo = tagInfo; | |
29 | this.innerContent = new ArrayList<>(innerContent); | |
30 | this.textContent = textContent; | |
31 | } | |
32 | ||
33 | public static final HtmlParseNode EMPTY = new HtmlParseNode(ParseNodeType.ELEMENT, TagInfo.EMPTY, List.of(), "EMPTY HTMLPARSENODE"); | |
34 | ||
35 | /** | |
36 | * Return a list of strings of the text content of the tree. | |
37 | * <p> | |
38 | * This method traverses the tree from this node downwards, | |
39 | * adding the text content as it goes. Its main purpose is to | |
40 | * quickly render all the strings out of an HTML document at once. | |
41 | * </p> | |
42 | */ | |
43 | public List<String> print() { | |
44 | var myList = new ArrayList<String>(); | |
45 |
1
1. print : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalk → KILLED |
recursiveTreeWalk(myList, innerContent, textContent); |
46 |
1
1. print : replaced return value with Collections.emptyList for com/renomad/minum/htmlparsing/HtmlParseNode::print → KILLED |
return myList; |
47 | } | |
48 | ||
49 | static void recursiveTreeWalk(List<String> myList, List<HtmlParseNode> innerContent, String textContent) { | |
50 | for (HtmlParseNode hpn : innerContent) { | |
51 |
1
1. recursiveTreeWalk : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalk → KILLED |
recursiveTreeWalk(myList, hpn.innerContent, hpn.textContent); |
52 | } | |
53 |
2
1. recursiveTreeWalk : negated conditional → KILLED 2. recursiveTreeWalk : negated conditional → KILLED |
if (textContent != null && ! textContent.isBlank()) { |
54 | myList.add(textContent); | |
55 | } | |
56 | } | |
57 | ||
58 | /** | |
59 | * Return a list of {@link HtmlParseNode} nodes in the HTML that match provided attributes. | |
60 | */ | |
61 | public List<HtmlParseNode> search(TagName tagName, Map<String, String> attributes) { | |
62 | var myList = new ArrayList<HtmlParseNode>(); | |
63 |
1
1. search : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalkSearch → KILLED |
recursiveTreeWalkSearch(myList, tagName, attributes); |
64 |
1
1. search : replaced return value with Collections.emptyList for com/renomad/minum/htmlparsing/HtmlParseNode::search → KILLED |
return myList; |
65 | } | |
66 | ||
67 | private void recursiveTreeWalkSearch(List<HtmlParseNode> myList, TagName tagName, Map<String, String> attributes) { | |
68 |
2
1. recursiveTreeWalkSearch : negated conditional → KILLED 2. recursiveTreeWalkSearch : negated conditional → KILLED |
if (this.tagInfo.getTagName().equals(tagName) && this.tagInfo.containsAllAttributes(attributes.entrySet())) { |
69 | myList.add(this); | |
70 | } | |
71 | for (var htmlParseNode : innerContent) { | |
72 |
1
1. recursiveTreeWalkSearch : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalkSearch → KILLED |
htmlParseNode.recursiveTreeWalkSearch(myList, tagName, attributes); |
73 | } | |
74 | } | |
75 | ||
76 | /** | |
77 | * Return the inner text of these nodes | |
78 | * <p> | |
79 | * If this element has only one inner | |
80 | * content item, and it's a {@link ParseNodeType#CHARACTERS} element, return its text content. | |
81 | * </p> | |
82 | * <p> | |
83 | * If there is more than one node, run the {@link #print()} command on each, appending | |
84 | * to a single string. | |
85 | * </p> | |
86 | */ | |
87 | static String innerText(List<HtmlParseNode> innerContent) { | |
88 |
1
1. innerText : negated conditional → KILLED |
if (innerContent == null) return ""; |
89 |
2
1. innerText : negated conditional → KILLED 2. innerText : negated conditional → KILLED |
if (innerContent.size() == 1 && innerContent.getFirst().type == ParseNodeType.CHARACTERS) { |
90 |
1
1. innerText : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::innerText → KILLED |
return innerContent.getFirst().textContent; |
91 | } else { | |
92 | StringBuilder sb = new StringBuilder(); | |
93 | for (HtmlParseNode hpn : innerContent) { | |
94 | sb.append(hpn.print()); | |
95 | } | |
96 |
1
1. innerText : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::innerText → KILLED |
return sb.toString(); |
97 | } | |
98 | } | |
99 | ||
100 | /** | |
101 | * Gets the type of this node - either it's an element, with opening and | |
102 | * closing tags and attributes and an inner content, or it's just plain text. | |
103 | */ | |
104 | public ParseNodeType getType() { | |
105 |
1
1. getType : replaced return value with null for com/renomad/minum/htmlparsing/HtmlParseNode::getType → KILLED |
return type; |
106 | } | |
107 | ||
108 | /** | |
109 | * Returns the {@link TagInfo}, which contains valuable information | |
110 | * like the type of element (p, a, div, and so on) and attributes | |
111 | * like class, id, etc. | |
112 | */ | |
113 | public TagInfo getTagInfo() { | |
114 |
1
1. getTagInfo : replaced return value with null for com/renomad/minum/htmlparsing/HtmlParseNode::getTagInfo → KILLED |
return tagInfo; |
115 | } | |
116 | ||
117 | /** | |
118 | * The inner content is the data between the opening and closing | |
119 | * tags of this element, comprised of potentially other complex | |
120 | * elements and/or characters or a mix (or nothing at all, which | |
121 | * will return an empty list). | |
122 | */ | |
123 | public List<HtmlParseNode> getInnerContent() { | |
124 |
1
1. getInnerContent : replaced return value with Collections.emptyList for com/renomad/minum/htmlparsing/HtmlParseNode::getInnerContent → KILLED |
return new ArrayList<>(innerContent); |
125 | } | |
126 | ||
127 | void addToInnerContent(HtmlParseNode htmlParseNode) { | |
128 | innerContent.add(htmlParseNode); | |
129 | } | |
130 | ||
131 | /** | |
132 | * If the {@link ParseNodeType} is {@link ParseNodeType#CHARACTERS}, then this | |
133 | * will have text content. Otherwise, it returns an empty string. | |
134 | */ | |
135 | public String getTextContent() { | |
136 |
1
1. getTextContent : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::getTextContent → KILLED |
return textContent; |
137 | } | |
138 | ||
139 | /** | |
140 | * Return the inner text of a node | |
141 | * <p> | |
142 | * If this element has only one inner | |
143 | * content item, and it's a {@link ParseNodeType#CHARACTERS} element, return its text content. | |
144 | * </p> | |
145 | * <p> | |
146 | * If there is more than one node, concatenates them to a single string, with each section wrapped | |
147 | * in square brackets. | |
148 | * </p> | |
149 | */ | |
150 | public String innerText() { | |
151 |
1
1. innerText : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::innerText → KILLED |
return innerText(innerContent); |
152 | } | |
153 | ||
154 | @Override | |
155 | public boolean equals(Object o) { | |
156 |
2
1. equals : negated conditional → KILLED 2. equals : replaced boolean return with false for com/renomad/minum/htmlparsing/HtmlParseNode::equals → KILLED |
if (this == o) return true; |
157 |
2
1. equals : negated conditional → KILLED 2. equals : replaced boolean return with true for com/renomad/minum/htmlparsing/HtmlParseNode::equals → KILLED |
if (!(o instanceof HtmlParseNode that)) return false; |
158 |
5
1. equals : negated conditional → KILLED 2. equals : negated conditional → KILLED 3. equals : negated conditional → KILLED 4. equals : negated conditional → KILLED 5. equals : replaced boolean return with true for com/renomad/minum/htmlparsing/HtmlParseNode::equals → KILLED |
return type == that.type && Objects.equals(tagInfo, that.tagInfo) && Objects.equals(innerContent, that.innerContent) && Objects.equals(textContent, that.textContent); |
159 | } | |
160 | ||
161 | @Override | |
162 | public int hashCode() { | |
163 |
1
1. hashCode : replaced int return with 0 for com/renomad/minum/htmlparsing/HtmlParseNode::hashCode → TIMED_OUT |
return Objects.hash(type, tagInfo, innerContent, textContent); |
164 | } | |
165 | ||
166 | @Override | |
167 | public String toString() { | |
168 |
1
1. toString : negated conditional → KILLED |
if (this.getType().equals(ParseNodeType.ELEMENT)) { |
169 | var sb = new StringBuilder(); | |
170 | sb.append("<"); | |
171 | String lowercaseElement = this.tagInfo.getTagName().toString().toLowerCase(); | |
172 | sb.append(lowercaseElement); | |
173 | ||
174 | for (Map.Entry<String, String> entry : this.tagInfo.getAttributes().entrySet()) { | |
175 | sb.append(" "); | |
176 | sb.append(entry.getKey()); | |
177 | sb.append("="); | |
178 | sb.append("\"").append(entry.getValue()).append("\""); | |
179 | } | |
180 | ||
181 | sb.append(">"); | |
182 | ||
183 | for (HtmlParseNode hpn : this.innerContent) { | |
184 | sb.append(hpn); | |
185 | } | |
186 | ||
187 | sb.append("</").append(lowercaseElement).append(">"); | |
188 |
1
1. toString : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::toString → KILLED |
return sb.toString(); |
189 | } else { | |
190 |
1
1. toString : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::toString → KILLED |
return textContent; |
191 | } | |
192 | ||
193 | ||
194 | } | |
195 | ||
196 | } | |
Mutations | ||
45 |
1.1 |
|
46 |
1.1 |
|
51 |
1.1 |
|
53 |
1.1 2.2 |
|
63 |
1.1 |
|
64 |
1.1 |
|
68 |
1.1 2.2 |
|
72 |
1.1 |
|
88 |
1.1 |
|
89 |
1.1 2.2 |
|
90 |
1.1 |
|
96 |
1.1 |
|
105 |
1.1 |
|
114 |
1.1 |
|
124 |
1.1 |
|
136 |
1.1 |
|
151 |
1.1 |
|
156 |
1.1 2.2 |
|
157 |
1.1 2.2 |
|
158 |
1.1 2.2 3.3 4.4 5.5 |
|
163 |
1.1 |
|
168 |
1.1 |
|
188 |
1.1 |
|
190 |
1.1 |