So... I wrote a "simple" .xlsx parser only for getting the checkboxes since I couldn't get it working with apache poi, here you go.
The code currently still has 2 problems, which I would appreciate some help with:
package com.osiris.danielmanager.excel;
import org.junit.jupiter.api.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
public class ParseExcelForCheckboxes {
public static List<CheckboxInfo> parseXLSX(File file) throws Exception {
List<CheckboxInfo> checkboxes = new ArrayList<>();
Map<String, String> sheetNames = new HashMap<>();
Map<String, List<String>> sheetAndRelationshipPaths = new HashMap<>();
try (ZipInputStream zis = new ZipInputStream(new FileInputStream(file))) {
ZipEntry entry;
Map<String, String> xmlFiles = new HashMap<>();
// Extract XML files from .xlsx
while ((entry = zis.getNextEntry()) != null) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int length;
while ((length = zis.read(buffer)) > 0) {
baos.write(buffer, 0, length);
}
xmlFiles.put(entry.getName(), baos.toString(StandardCharsets.UTF_8));
}
// Parse sheet names and relationships
if (xmlFiles.containsKey("xl/workbook.xml")) {
String workbookXml = xmlFiles.get("xl/workbook.xml");
Document doc = parseXml(workbookXml);
NodeList sheets = doc.getElementsByTagName("sheet");
for (int i = 0; i < sheets.getLength(); i++) {
Element sheet = (Element) sheets.item(i);
String sheetId = sheet.getAttribute("sheetId");
String sheetName = sheet.getAttribute("name");
sheetNames.put(sheetId, sheetName);
// Find the corresponding relationship for each sheet
String sheetRelsPath = "xl/worksheets/_rels/sheet" + sheetId + ".xml.rels";
if (xmlFiles.containsKey(sheetRelsPath)) {
String relsXml = xmlFiles.get(sheetRelsPath);
Document relsDoc = parseXml(relsXml);
NodeList relationships = relsDoc.getElementsByTagName("Relationship");
for (int j = 0; j < relationships.getLength(); j++) {
Element relationship = (Element) relationships.item(j);
String type = relationship.getAttribute("Type");
if (type.contains("ctrlProp")) {
String absolutePath = relationship.getAttribute("Target").replace("../ctrlProps/", "xl/ctrlProps/");
var list = sheetAndRelationshipPaths.get(sheetId);
if (list == null) {
list = new ArrayList<>();
sheetAndRelationshipPaths.put(sheetId, list);
}
list.add(absolutePath);
}
}
}
}
}
// Parse checkboxes in each sheet
for (String sheetId : sheetNames.keySet()) {
String sheetName = sheetNames.get(sheetId);
if (sheetAndRelationshipPaths.containsKey(sheetId)) {
// Extract the control properties xml for checkboxes
for (String xmlFilePath : sheetAndRelationshipPaths.get(sheetId)) {
String ctrlPropsXml = xmlFiles.get(xmlFilePath);
Objects.requireNonNull(ctrlPropsXml);
Document ctrlDoc = parseXml(ctrlPropsXml);
NodeList controls = ctrlDoc.getElementsByTagName("formControlPr");
for (int i = 0; i < controls.getLength(); i++) {
Element control = (Element) controls.item(i);
if ("CheckBox".equals(control.getAttribute("objectType"))) {
CheckboxInfo checkboxInfo = new CheckboxInfo();
checkboxInfo.sheetName = sheetName;
checkboxInfo.isChecked = "Checked".equalsIgnoreCase(control.getAttribute("checked"));
checkboxInfo.cellReference = control.getAttribute("cellReference");
checkboxes.add(checkboxInfo);
}
}
}
}
}
}
return checkboxes;
}
private static Document parseXml(String xmlContent) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
return builder.parse(new ByteArrayInputStream(xmlContent.getBytes()));
}
public static void main(String[] args) {
try {
File file = new File("example.xlsx"); // Replace with your .xlsx file path
List<CheckboxInfo> checkboxes = parseXLSX(file);
for (CheckboxInfo checkbox : checkboxes) {
System.out.println(checkbox);
}
} catch (Exception e) {
e.printStackTrace();
}
}
@Test
void test() throws Exception {
var f = Paths.get("./simple.xlsx").toFile();
var result = parseXLSX(f);
System.out.println();
}
public static class CheckboxInfo {
public String sheetName;
public boolean isChecked;
public String cellReference;
@Override
public String toString() {
return "Checkbox [Sheet: " + sheetName + ", Checked: " + isChecked + ", Cell: " + cellReference + "]";
}
}
}