medax_pipeline/graphCreation/process_references.py
2025-04-16 22:12:19 +02:00

103 lines
3.9 KiB
Python

import networkx as nx
def parse_synthea_reference(ref):
if not ref.startswith('#'):
#print("reference: ", ref)
if '?' in ref and '|' in ref:
parsed_ref = ref.split('|')[1]
# elif '/' in ref:
# parsed_ref = ref.split('/')[1]
else:
parsed_ref = ref.split(':')[2]
else:
parsed_ref = 'mock'
return(parsed_ref)
def process_references(graph):
isSynthea = False
nodes_with_reference = [[n, attr['reference']] for n, attr in graph.nodes(data=True) if 'reference' in attr]
directly_referenced_nodes = []
indirectly_referenced_nodes = []
dummy_references = []
if isSynthea:
nodes_with_mock_reference = []
for i in range(len(nodes_with_reference)):
reference = nodes_with_reference[i][1]
parsed_reference = parse_synthea_reference(reference)
if parsed_reference != 'mock':
nodes_with_reference[i].append(parsed_reference)
else:
nodes_with_mock_reference.append(i)
for i in sorted(nodes_with_mock_reference, reverse=True):
del nodes_with_reference[i]
id_to_node = {data["id"]: node for node, data in graph.nodes(data=True) if "id" in data}
id_to_identifier_node = {data["value"]: node for node, data in graph.nodes(data=True) if ("value" in data and data['label'] == 'identifier')}
for i in nodes_with_reference:
ref_id=i[2]
if ref_id in id_to_node.keys():
directly_referenced_nodes.append([i[0], id_to_node[ref_id]])
elif ref_id in id_to_identifier_node.keys():
indirectly_referenced_nodes.append([i[0], id_to_identifier_node[ref_id]])
#else:
# print("KEY ERROR: Key neither in to_node nor in to_identifier_node", i)
for i in indirectly_referenced_nodes:
node_from=list(graph.predecessors(i[0]))[0]
node_to=list(graph.predecessors(i[1]))[0]
ref_type=graph.nodes[i[0]]['label']
graph.add_edge(node_from, node_to, edge_type='reference', reference_type=ref_type)
else:
#for node, data in graph.nodes(data=True):
# if "id" in data:
# if not "resourceType" in data:
# print("FAILS AT: ", data, node)
id_to_node = {data["resourceType"]+'/'+data["id"]: node for node, data in graph.nodes(data=True) if ("id" in data and "resourceType" in data)}
for i in nodes_with_reference:
ref_id=i[1]
if ref_id in id_to_node.keys():
directly_referenced_nodes.append([i[0], id_to_node[ref_id]])
else:
dummy_references.append([i[0], ref_id])
for i in directly_referenced_nodes:
node_from=list(graph.predecessors(i[0]))[0]
node_to=i[1]
ref_type=graph.nodes[i[0]]['label']
graph.add_edge(node_from, node_to, edge_type='reference', reference_type=ref_type)
for i in dummy_references:
#print(i)
node_to='dummy_' + i[1]
graph.add_node(node_to, label='dummy', unique_id=i[1])
node_from=list(graph.predecessors(i[0]))[0]
ref_type=graph.nodes[i[0]]['label']
graph.add_edge(node_from, node_to, edge_type='reference', reference_type=ref_type)
#graph.remove_nodes_from([i[0] for i in nodes_with_reference])
graph.remove_nodes_from([i[0] for i in directly_referenced_nodes])
graph.remove_nodes_from([i[0] for i in indirectly_referenced_nodes])
graph.remove_nodes_from([i[0] for i in dummy_references])
nodes_to_remove = [n for n, attr in graph.nodes(data=True) if attr.get('label') in ['root', 'entry', 'request']]
graph.remove_nodes_from(nodes_to_remove)
#graph.remove_nodes_from(list(nx.isolates(graph)))