Thanks a lot for the suggestion! it was indeed much simpler to just modify the JSON Employee Vector Search and the other vector index Results
I implemented a solution that builds upon your approach while adding some additional robustness to handle different types of empty values in my Neo4j vector search results.
def filter_empty_connections(results):
"""
Filter out empty connections from vector search results across all data types
(Employee, Work, Project, WorkAllocation, WorkScheduleRule).
"""
if not results or "results" not in results:
return results
def is_empty_value(val):
"""Check if a value is considered empty."""
if isinstance(val, list):
# Check if list is empty or contains only empty/null values
return len(val) == 0 or all(is_empty_value(item) for item in val)
if isinstance(val, dict):
# Check if dict is empty or contains only empty/null values
return len(val) == 0 or all(is_empty_value(v) for v in val.values())
return val is None or val == ""
def filter_single_item(item):
"""Filter empty connections from a single item's connections array."""
if "connections" not in item:
return item
filtered_connections = []
for conn in item["connections"]:
# Skip the connection if all its non-type fields are empty
has_non_empty_value = False
for key, val in conn.items():
if key != "type" and not is_empty_value(val):
has_non_empty_value = True
break
if has_non_empty_value:
filtered_connections.append(conn)
item["connections"] = filtered_connections
return item
filtered_items = [
filter_single_item(item)
for item in results["results"]
]
return {"results": filtered_items}
Then I integrated it into my index functions like this:
def employee_details_index(query, query_embedding, n_results=50):
# ... existing query execution code ...
structured_results = []
for row in results:
employee_data = {
"employeeName": row["employeeName"],
"score": row["score"],
"employee": json.loads(row["employeeJson"]),
"connections": json.loads(row["connectionsJson"])
}
structured_results.append(employee_data)
# Apply filtering before returning
filtered_results = filter_empty_connections({"results": structured_results})
return filtered_results
This approach successfully removed empty connections like "education": [] and "has_unavailability": [] from the results while keeping the connection entries that had actual data.
Thank you again for pointing me in the right direction! This solution worked perfectly for my use case.