import networkx as nx def parse_synthea_reference(ref): if not ref.startswith('#'): #print("reference: ", ref) if '?' in ref and '|' in ref: parsed_ref = ref.split('|')[1] # elif '/' in ref: # parsed_ref = ref.split('/')[1] else: parsed_ref = ref.split(':')[2] else: parsed_ref = 'mock' return(parsed_ref) def process_references(graph): isSynthea = False nodes_with_reference = [[n, attr['reference']] for n, attr in graph.nodes(data=True) if 'reference' in attr] directly_referenced_nodes = [] indirectly_referenced_nodes = [] dummy_references = [] if isSynthea: nodes_with_mock_reference = [] for i in range(len(nodes_with_reference)): reference = nodes_with_reference[i][1] parsed_reference = parse_synthea_reference(reference) if parsed_reference != 'mock': nodes_with_reference[i].append(parsed_reference) else: nodes_with_mock_reference.append(i) for i in sorted(nodes_with_mock_reference, reverse=True): del nodes_with_reference[i] id_to_node = {data["id"]: node for node, data in graph.nodes(data=True) if "id" in data} id_to_identifier_node = {data["value"]: node for node, data in graph.nodes(data=True) if ("value" in data and data['label'] == 'identifier')} for i in nodes_with_reference: ref_id=i[2] if ref_id in id_to_node.keys(): directly_referenced_nodes.append([i[0], id_to_node[ref_id]]) elif ref_id in id_to_identifier_node.keys(): indirectly_referenced_nodes.append([i[0], id_to_identifier_node[ref_id]]) #else: # print("KEY ERROR: Key neither in to_node nor in to_identifier_node", i) for i in indirectly_referenced_nodes: node_from=list(graph.predecessors(i[0]))[0] node_to=list(graph.predecessors(i[1]))[0] ref_type=graph.nodes[i[0]]['label'] graph.add_edge(node_from, node_to, edge_type='reference', reference_type=ref_type) else: #for node, data in graph.nodes(data=True): # if "id" in data: # if not "resourceType" in data: # print("FAILS AT: ", data, node) id_to_node = {data["resourceType"]+'/'+data["id"]: node for node, data in graph.nodes(data=True) if ("id" in data and "resourceType" in data)} for i in nodes_with_reference: ref_id=i[1] if ref_id in id_to_node.keys(): directly_referenced_nodes.append([i[0], id_to_node[ref_id]]) else: dummy_references.append([i[0], ref_id]) for i in directly_referenced_nodes: node_from=list(graph.predecessors(i[0]))[0] node_to=i[1] ref_type=graph.nodes[i[0]]['label'] graph.add_edge(node_from, node_to, edge_type='reference', reference_type=ref_type) for i in dummy_references: #print(i) node_to='dummy_' + i[1] graph.add_node(node_to, label='dummy', unique_id=i[1]) node_from=list(graph.predecessors(i[0]))[0] ref_type=graph.nodes[i[0]]['label'] graph.add_edge(node_from, node_to, edge_type='reference', reference_type=ref_type) #graph.remove_nodes_from([i[0] for i in nodes_with_reference]) graph.remove_nodes_from([i[0] for i in directly_referenced_nodes]) graph.remove_nodes_from([i[0] for i in indirectly_referenced_nodes]) graph.remove_nodes_from([i[0] for i in dummy_references]) nodes_to_remove = [n for n, attr in graph.nodes(data=True) if attr.get('label') in ['root', 'entry', 'request']] graph.remove_nodes_from(nodes_to_remove) #graph.remove_nodes_from(list(nx.isolates(graph)))