To answer a comment @Saurabh Verma, How can I parse nested tables in HTML using Jsoup and convert the extracted data to JSON?
I had a use case where I needed to parse nested tables in HTML and extract the data into a JSON format. Below is an example how to navigate through nested tables and extract data using Jsoup.
Element contentNodesTable = doc.select("td:contains(Content Nodes:) + td table").first();
public class JsoupExample {
public static void main(String[] args) {
String html = "<table border="1"><tr><td>Start Date:</td><td>2024-03-27 04:04:47.612PM</td></tr><tr><td>End Date:</td><td>2024-03-27 04:04:47.737PM</td></tr><tr><td>Duration:</td><td>0d 0h 0m 0s 125.237ms</td></tr><tr><td>Successful:</td><td>Yes</td></tr><tr><td>Content Nodes:</td><td><table border=\"1\" cellspacing=\"0\" cellpadding=\"1\"><tr><td># Created</td><td># Replaced</td><td># Skipped</td><td>Data Written</td><td># Properties</td></tr><tr><td>0</td><td>1</td><td>0</td><td>180.37kB</td><td>26</td></tr></table></td></tr></table>";
Document doc = Jsoup.parse(html);
Map<String, Object> rep = new HashMap<>();
rep.put("Start Date", doc.select("td:contains(Start Date:) + td").text());
rep.put("End Date", doc.select("td:contains(End Date:) + td").text());
rep.put("Duration", doc.select("td:contains(Duration:) + td").text());
rep.put("Successful", doc.select("td:contains(Successful:) + td").text());
Map<String, Object> contentNodes = new HashMap<>();
Element contentNodesTable = doc.select("td:contains(Content Nodes:) + td table").first();
// Extract data from the inner table
Elements rows = contentNodesTable.select("tr");
Elements headers = rows.get(0).select("td");
Elements values = rows.get(1).select("td");
for (int i = 0; i < headers.size(); i++) {
String header = headers.get(i).text();
String value = values.get(i).text();
contentNodes.put(header, value);
}
rep.put("Content Nodes", contentNodes);
System.out.println("JSON Response: " + rep);
}
}