79347161

Date: 2025-01-10 22:20:37
Score: 1
Natty:
Report link

So... I wrote a "simple" .xlsx parser only for getting the checkboxes since I couldn't get it working with apache poi, here you go.

The code currently still has 2 problems, which I would appreciate some help with:

  1. No approximate for checkbox position (row/column)
  2. The returned checkboxes list seems to be out of order, optimally the checkboxes should be added by going through a row, adding all checkboxes from left to right and so on...
package com.osiris.danielmanager.excel;

import org.junit.jupiter.api.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

public class ParseExcelForCheckboxes {

    public static List<CheckboxInfo> parseXLSX(File file) throws Exception {
        List<CheckboxInfo> checkboxes = new ArrayList<>();
        Map<String, String> sheetNames = new HashMap<>();
        Map<String, List<String>> sheetAndRelationshipPaths = new HashMap<>();

        try (ZipInputStream zis = new ZipInputStream(new FileInputStream(file))) {
            ZipEntry entry;
            Map<String, String> xmlFiles = new HashMap<>();

            // Extract XML files from .xlsx
            while ((entry = zis.getNextEntry()) != null) {
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                byte[] buffer = new byte[1024];
                int length;
                while ((length = zis.read(buffer)) > 0) {
                    baos.write(buffer, 0, length);
                }
                xmlFiles.put(entry.getName(), baos.toString(StandardCharsets.UTF_8));
            }

            // Parse sheet names and relationships
            if (xmlFiles.containsKey("xl/workbook.xml")) {
                String workbookXml = xmlFiles.get("xl/workbook.xml");
                Document doc = parseXml(workbookXml);
                NodeList sheets = doc.getElementsByTagName("sheet");
                for (int i = 0; i < sheets.getLength(); i++) {
                    Element sheet = (Element) sheets.item(i);
                    String sheetId = sheet.getAttribute("sheetId");
                    String sheetName = sheet.getAttribute("name");
                    sheetNames.put(sheetId, sheetName);

                    // Find the corresponding relationship for each sheet
                    String sheetRelsPath = "xl/worksheets/_rels/sheet" + sheetId + ".xml.rels";
                    if (xmlFiles.containsKey(sheetRelsPath)) {
                        String relsXml = xmlFiles.get(sheetRelsPath);
                        Document relsDoc = parseXml(relsXml);
                        NodeList relationships = relsDoc.getElementsByTagName("Relationship");
                        for (int j = 0; j < relationships.getLength(); j++) {
                            Element relationship = (Element) relationships.item(j);
                            String type = relationship.getAttribute("Type");
                            if (type.contains("ctrlProp")) {
                                String absolutePath = relationship.getAttribute("Target").replace("../ctrlProps/", "xl/ctrlProps/");
                                var list = sheetAndRelationshipPaths.get(sheetId);
                                if (list == null) {
                                    list = new ArrayList<>();
                                    sheetAndRelationshipPaths.put(sheetId, list);
                                }
                                list.add(absolutePath);
                            }
                        }
                    }
                }
            }

            // Parse checkboxes in each sheet
            for (String sheetId : sheetNames.keySet()) {
                String sheetName = sheetNames.get(sheetId);
                if (sheetAndRelationshipPaths.containsKey(sheetId)) {

                    // Extract the control properties xml for checkboxes
                    for (String xmlFilePath : sheetAndRelationshipPaths.get(sheetId)) {
                        String ctrlPropsXml = xmlFiles.get(xmlFilePath);
                        Objects.requireNonNull(ctrlPropsXml);
                        Document ctrlDoc = parseXml(ctrlPropsXml);

                        NodeList controls = ctrlDoc.getElementsByTagName("formControlPr");
                        for (int i = 0; i < controls.getLength(); i++) {
                            Element control = (Element) controls.item(i);
                            if ("CheckBox".equals(control.getAttribute("objectType"))) {
                                CheckboxInfo checkboxInfo = new CheckboxInfo();
                                checkboxInfo.sheetName = sheetName;
                                checkboxInfo.isChecked = "Checked".equalsIgnoreCase(control.getAttribute("checked"));
                                checkboxInfo.cellReference = control.getAttribute("cellReference");
                                checkboxes.add(checkboxInfo);
                            }
                        }
                    }
                }
            }
        }

        return checkboxes;
    }

    private static Document parseXml(String xmlContent) throws Exception {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        return builder.parse(new ByteArrayInputStream(xmlContent.getBytes()));
    }

    public static void main(String[] args) {
        try {
            File file = new File("example.xlsx"); // Replace with your .xlsx file path
            List<CheckboxInfo> checkboxes = parseXLSX(file);
            for (CheckboxInfo checkbox : checkboxes) {
                System.out.println(checkbox);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Test
    void test() throws Exception {
        var f = Paths.get("./simple.xlsx").toFile();
        var result = parseXLSX(f);
        System.out.println();
    }

    public static class CheckboxInfo {
        public String sheetName;
        public boolean isChecked;
        public String cellReference;

        @Override
        public String toString() {
            return "Checkbox [Sheet: " + sheetName + ", Checked: " + isChecked + ", Cell: " + cellReference + "]";
        }
    }
}

Reasons:
  • Blacklisted phrase (1.5): would appreciate
  • Long answer (-1):
  • Has code block (-0.5):
  • Self-answer (0.5):
  • Low reputation (0.5):
Posted by: Osiris Team