1 | package com.renomad.minum.htmlparsing; | |
2 | ||
3 | import java.util.ArrayList; | |
4 | import java.util.List; | |
5 | import java.util.Map; | |
6 | import java.util.Objects; | |
7 | ||
8 | /** | |
9 | * Represents the expected types of things we may encounter when parsing an HTML string, which | |
10 | * for our purposes is {@link ParseNodeType}. | |
11 | * <p> | |
12 | * See <a href="https://www.w3.org/TR/2011/WD-html-markup-20110113/syntax.html#syntax-elements">W3.org Elements</a> | |
13 | * </p> | |
14 | */ | |
15 | public final class HtmlParseNode { | |
16 | ||
17 | private final ParseNodeType type; | |
18 | private final TagInfo tagInfo; | |
19 | private final List<HtmlParseNode> innerContent; | |
20 | private final String textContent; | |
21 | ||
22 | public HtmlParseNode(ParseNodeType type, | |
23 | TagInfo tagInfo, | |
24 | List<HtmlParseNode> innerContent, | |
25 | String textContent) { | |
26 | ||
27 | this.type = type; | |
28 | this.tagInfo = tagInfo; | |
29 | this.innerContent = new ArrayList<>(innerContent); | |
30 | this.textContent = textContent; | |
31 | } | |
32 | ||
33 | public static final HtmlParseNode EMPTY = new HtmlParseNode(ParseNodeType.ELEMENT, TagInfo.EMPTY, List.of(), "EMPTY HTMLPARSENODE"); | |
34 | ||
35 | /** | |
36 | * Return a list of strings of the text content of the tree. | |
37 | * <p> | |
38 | * This method traverses the tree from this node downwards, | |
39 | * adding the text content as it goes. Its main purpose is to | |
40 | * quickly render all the strings out of an HTML document at once. | |
41 | * </p> | |
42 | */ | |
43 | public List<String> print() { | |
44 | var myList = new ArrayList<String>(); | |
45 |
1
1. print : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalk → KILLED |
recursiveTreeWalk(myList, innerContent, textContent); |
46 |
1
1. print : replaced return value with Collections.emptyList for com/renomad/minum/htmlparsing/HtmlParseNode::print → KILLED |
return myList; |
47 | } | |
48 | ||
49 | static void recursiveTreeWalk(List<String> myList, List<HtmlParseNode> innerContent, String textContent) { | |
50 | for (HtmlParseNode hpn : innerContent) { | |
51 |
1
1. recursiveTreeWalk : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalk → KILLED |
recursiveTreeWalk(myList, hpn.innerContent, hpn.textContent); |
52 | } | |
53 |
2
1. recursiveTreeWalk : negated conditional → KILLED 2. recursiveTreeWalk : negated conditional → KILLED |
if (textContent != null && ! textContent.isBlank()) { |
54 | myList.add(textContent); | |
55 | } | |
56 | } | |
57 | ||
58 | /** | |
59 | * Return a list of {@link HtmlParseNode} nodes in the HTML that match provided attributes. | |
60 | */ | |
61 | public List<HtmlParseNode> search(TagName tagName, Map<String, String> attributes) { | |
62 | var myList = new ArrayList<HtmlParseNode>(); | |
63 |
1
1. search : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalkSearch → KILLED |
recursiveTreeWalkSearch(myList, tagName, attributes); |
64 |
1
1. search : replaced return value with Collections.emptyList for com/renomad/minum/htmlparsing/HtmlParseNode::search → KILLED |
return myList; |
65 | } | |
66 | ||
67 | private void recursiveTreeWalkSearch(List<HtmlParseNode> myList, TagName tagName, Map<String, String> attributes) { | |
68 |
2
1. recursiveTreeWalkSearch : negated conditional → KILLED 2. recursiveTreeWalkSearch : negated conditional → KILLED |
if (this.tagInfo.getTagName().equals(tagName) && this.tagInfo.containsAllAttributes(attributes.entrySet())) { |
69 | myList.add(this); | |
70 | } | |
71 | for (var htmlParseNode : innerContent) { | |
72 |
1
1. recursiveTreeWalkSearch : removed call to com/renomad/minum/htmlparsing/HtmlParseNode::recursiveTreeWalkSearch → KILLED |
htmlParseNode.recursiveTreeWalkSearch(myList, tagName, attributes); |
73 | } | |
74 | } | |
75 | ||
76 | /** | |
77 | * Return the inner text of these nodes | |
78 | * <p> | |
79 | * If this element has only one inner | |
80 | * content item, and it's a {@link ParseNodeType#CHARACTERS} element, return its text content. | |
81 | * </p> | |
82 | * <p> | |
83 | * If there is more than one node, run the {@link #print()} command on each, appending | |
84 | * to a single string. | |
85 | * </p> | |
86 | */ | |
87 | static String innerText(List<HtmlParseNode> innerContent) { | |
88 |
1
1. innerText : negated conditional → KILLED |
if (innerContent == null) return ""; |
89 |
2
1. innerText : negated conditional → KILLED 2. innerText : negated conditional → KILLED |
if (innerContent.size() == 1 && innerContent.getFirst().type == ParseNodeType.CHARACTERS) { |
90 |
1
1. innerText : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::innerText → KILLED |
return innerContent.getFirst().textContent; |
91 | } else { | |
92 | StringBuilder sb = new StringBuilder(); | |
93 | for (HtmlParseNode hpn : innerContent) { | |
94 | sb.append(hpn.print()); | |
95 | } | |
96 |
1
1. innerText : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::innerText → KILLED |
return sb.toString(); |
97 | } | |
98 | } | |
99 | ||
100 | public ParseNodeType getType() { | |
101 |
1
1. getType : replaced return value with null for com/renomad/minum/htmlparsing/HtmlParseNode::getType → KILLED |
return type; |
102 | } | |
103 | ||
104 | public TagInfo getTagInfo() { | |
105 |
1
1. getTagInfo : replaced return value with null for com/renomad/minum/htmlparsing/HtmlParseNode::getTagInfo → KILLED |
return tagInfo; |
106 | } | |
107 | ||
108 | public List<HtmlParseNode> getInnerContent() { | |
109 |
1
1. getInnerContent : replaced return value with Collections.emptyList for com/renomad/minum/htmlparsing/HtmlParseNode::getInnerContent → KILLED |
return new ArrayList<>(innerContent); |
110 | } | |
111 | ||
112 | void addToInnerContent(HtmlParseNode htmlParseNode) { | |
113 | innerContent.add(htmlParseNode); | |
114 | } | |
115 | ||
116 | public String getTextContent() { | |
117 |
1
1. getTextContent : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::getTextContent → KILLED |
return textContent; |
118 | } | |
119 | ||
120 | @Override | |
121 | public boolean equals(Object o) { | |
122 |
2
1. equals : negated conditional → KILLED 2. equals : replaced boolean return with false for com/renomad/minum/htmlparsing/HtmlParseNode::equals → KILLED |
if (this == o) return true; |
123 |
2
1. equals : negated conditional → KILLED 2. equals : replaced boolean return with true for com/renomad/minum/htmlparsing/HtmlParseNode::equals → KILLED |
if (!(o instanceof HtmlParseNode that)) return false; |
124 |
5
1. equals : negated conditional → KILLED 2. equals : negated conditional → KILLED 3. equals : negated conditional → KILLED 4. equals : negated conditional → KILLED 5. equals : replaced boolean return with true for com/renomad/minum/htmlparsing/HtmlParseNode::equals → KILLED |
return type == that.type && Objects.equals(tagInfo, that.tagInfo) && Objects.equals(innerContent, that.innerContent) && Objects.equals(textContent, that.textContent); |
125 | } | |
126 | ||
127 | @Override | |
128 | public int hashCode() { | |
129 |
1
1. hashCode : replaced int return with 0 for com/renomad/minum/htmlparsing/HtmlParseNode::hashCode → KILLED |
return Objects.hash(type, tagInfo, innerContent, textContent); |
130 | } | |
131 | ||
132 | public String innerText() { | |
133 |
1
1. innerText : replaced return value with "" for com/renomad/minum/htmlparsing/HtmlParseNode::innerText → KILLED |
return innerText(innerContent); |
134 | } | |
135 | ||
136 | } | |
Mutations | ||
45 |
1.1 |
|
46 |
1.1 |
|
51 |
1.1 |
|
53 |
1.1 2.2 |
|
63 |
1.1 |
|
64 |
1.1 |
|
68 |
1.1 2.2 |
|
72 |
1.1 |
|
88 |
1.1 |
|
89 |
1.1 2.2 |
|
90 |
1.1 |
|
96 |
1.1 |
|
101 |
1.1 |
|
105 |
1.1 |
|
109 |
1.1 |
|
117 |
1.1 |
|
122 |
1.1 2.2 |
|
123 |
1.1 2.2 |
|
124 |
1.1 2.2 3.3 4.4 5.5 |
|
129 |
1.1 |
|
133 |
1.1 |