release commit

This commit is contained in:
2025-04-16 22:12:19 +02:00
commit a9db0be88a
89 changed files with 2336827 additions and 0 deletions

View File

@ -0,0 +1,43 @@
import json
import networkx as nx
def add_nodes_from_dict(graph, parent_node, current_dict):
for key, value in current_dict.items():
if isinstance(value, dict):
# Create a new node for the nested dictionary
new_node = f"{parent_node}.{key}"
graph.add_node(new_node, label=key)
# Add an edge from the parent node to the new node
graph.add_edge(parent_node, new_node, edge_type=key)
# Recurse into the nested dictionary
add_nodes_from_dict(graph, new_node, value)
elif isinstance(value, list):
# if list doesn't contain any nested dictionaries, make it a value in the node
if any(isinstance(item, dict) for item in value)==False:
graph.nodes[parent_node][key] = value
else:
# Process each dictionary in the list
for index, item in enumerate(value):
if isinstance(item, dict):
if len(value)>1:
item_node = f"{parent_node}.{key}[{index}]"
else:
item_node = f"{parent_node}.{key}"
graph.add_node(item_node, label=key)
graph.add_edge(parent_node, item_node, edge_type=key)
add_nodes_from_dict(graph, item_node, item)
else:
# For non-dict and non-list values, add them as attributes to the parent node
graph.nodes[parent_node][key] = value
def add_json_to_networkx(json_data, bundle_name, graph):
if not isinstance(graph, nx.DiGraph):
raise ValueError("The provided graph must be a networkx.DiGraph")
root_node = bundle_name+'_bundle'
graph.add_node(root_node, label='root')
add_nodes_from_dict(graph, root_node, json_data)

View File

@ -0,0 +1,40 @@
import networkx as nx
class Resource:
def __init__(self, resource_type):
self.resource_type = resource_type
def create_resource_class(resource_type):
return type(resource_type, (Resource,), {})
def set_resource_type(graph):
for node, data in graph.nodes(data=True):
print(node, data)
print("-----------------------------")
nodes_to_replace = []
for node, data in graph.nodes(data=True):
print(isinstance(node, Resource), node, type(node))
if isinstance(node, Resource):
print("Found a resource: ", node)
resource_type = node.resource_type
if resource_type:
# Dynamically create a new class based on the resource_type
NewResourceClass = create_resource_class(resource_type)
new_node = NewResourceClass(resource_type)
nodes_to_replace.append((node, new_node, data))
else:
print(f"Warning: Node {node} is a resource but has no resource_type")
# Replace old nodes with new ones
for old_node, new_node, data in nodes_to_replace:
graph.add_node(new_node, **data)
for pred in graph.predecessors(old_node):
graph.add_edge(pred, new_node)
for succ in graph.successors(old_node):
graph.add_edge(new_node, succ)
graph.remove_node(old_node)
"""
for node, data in graph.nodes(data=True):
print(node, data) """

View File

@ -0,0 +1,102 @@
import networkx as nx
def parse_synthea_reference(ref):
if not ref.startswith('#'):
#print("reference: ", ref)
if '?' in ref and '|' in ref:
parsed_ref = ref.split('|')[1]
# elif '/' in ref:
# parsed_ref = ref.split('/')[1]
else:
parsed_ref = ref.split(':')[2]
else:
parsed_ref = 'mock'
return(parsed_ref)
def process_references(graph):
isSynthea = False
nodes_with_reference = [[n, attr['reference']] for n, attr in graph.nodes(data=True) if 'reference' in attr]
directly_referenced_nodes = []
indirectly_referenced_nodes = []
dummy_references = []
if isSynthea:
nodes_with_mock_reference = []
for i in range(len(nodes_with_reference)):
reference = nodes_with_reference[i][1]
parsed_reference = parse_synthea_reference(reference)
if parsed_reference != 'mock':
nodes_with_reference[i].append(parsed_reference)
else:
nodes_with_mock_reference.append(i)
for i in sorted(nodes_with_mock_reference, reverse=True):
del nodes_with_reference[i]
id_to_node = {data["id"]: node for node, data in graph.nodes(data=True) if "id" in data}
id_to_identifier_node = {data["value"]: node for node, data in graph.nodes(data=True) if ("value" in data and data['label'] == 'identifier')}
for i in nodes_with_reference:
ref_id=i[2]
if ref_id in id_to_node.keys():
directly_referenced_nodes.append([i[0], id_to_node[ref_id]])
elif ref_id in id_to_identifier_node.keys():
indirectly_referenced_nodes.append([i[0], id_to_identifier_node[ref_id]])
#else:
# print("KEY ERROR: Key neither in to_node nor in to_identifier_node", i)
for i in indirectly_referenced_nodes:
node_from=list(graph.predecessors(i[0]))[0]
node_to=list(graph.predecessors(i[1]))[0]
ref_type=graph.nodes[i[0]]['label']
graph.add_edge(node_from, node_to, edge_type='reference', reference_type=ref_type)
else:
#for node, data in graph.nodes(data=True):
# if "id" in data:
# if not "resourceType" in data:
# print("FAILS AT: ", data, node)
id_to_node = {data["resourceType"]+'/'+data["id"]: node for node, data in graph.nodes(data=True) if ("id" in data and "resourceType" in data)}
for i in nodes_with_reference:
ref_id=i[1]
if ref_id in id_to_node.keys():
directly_referenced_nodes.append([i[0], id_to_node[ref_id]])
else:
dummy_references.append([i[0], ref_id])
for i in directly_referenced_nodes:
node_from=list(graph.predecessors(i[0]))[0]
node_to=i[1]
ref_type=graph.nodes[i[0]]['label']
graph.add_edge(node_from, node_to, edge_type='reference', reference_type=ref_type)
for i in dummy_references:
#print(i)
node_to='dummy_' + i[1]
graph.add_node(node_to, label='dummy', unique_id=i[1])
node_from=list(graph.predecessors(i[0]))[0]
ref_type=graph.nodes[i[0]]['label']
graph.add_edge(node_from, node_to, edge_type='reference', reference_type=ref_type)
#graph.remove_nodes_from([i[0] for i in nodes_with_reference])
graph.remove_nodes_from([i[0] for i in directly_referenced_nodes])
graph.remove_nodes_from([i[0] for i in indirectly_referenced_nodes])
graph.remove_nodes_from([i[0] for i in dummy_references])
nodes_to_remove = [n for n, attr in graph.nodes(data=True) if attr.get('label') in ['root', 'entry', 'request']]
graph.remove_nodes_from(nodes_to_remove)
#graph.remove_nodes_from(list(nx.isolates(graph)))

View File

@ -0,0 +1,107 @@
import networkx as nx
def find_paths(graph, start_node):
def is_leaf(node):
#Checks if a node is a leaf (no outgoing edges)
return graph.out_degree(node) == 0
def custom_dfs(path, reference_count):
#Performs a DFS to find paths for both patterns
current_node = path[-1]
'''if the current node is labeled 'resource', the path length is greater than 3,
and we have exactly one 'reference' edge in the path'''
if len(path) > 3 and graph.nodes[current_node].get('label') == 'resource' and reference_count == 1:
# add path to the list of property paths containing a reference
reference_paths.append(list(path))
'''if the current node is a leaf node (no outgoing edges),
the path length is greater than 2, and we have no references in the path'''
if len(path) > 2 and is_leaf(current_node) and reference_count == 0:
'''add path to the dictionary of property paths ending in leaves,
by the corresponding property key'''
leaf_paths.setdefault(path[1].split('.')[-1], []).extend(list(path))
# check neighbors
for neighbor in graph.successors(current_node):
edge_type = graph.edges[current_node, neighbor].get('edge_type', None)
new_reference_count = reference_count + (1 if edge_type == 'reference' else 0)
# continue the search only if we have at most one 'reference' edge so far
if new_reference_count <= 1:
custom_dfs(path + [neighbor], new_reference_count)
reference_paths = []
leaf_paths = {}
custom_dfs([start_node], 0)
return reference_paths, leaf_paths
def property_convolution(graph):
# Find all nodes with label 'resource'
resource_nodes = [n for n, attr in graph.nodes(data=True) if attr.get('label') == 'resource']
#print("Got all nodes with label 'resource'", flush=True)
'''collect all paths starting with a resource node, that contain one reference edge,
end with a resource node and are >3 nodes long'''
'''collect all paths starting with a resource node, that do not contain reference edges,
end with a leaf node and are >2 nodes long'''
property_paths_with_reference = []
property_paths_with_leaves = {}
for resource_node in resource_nodes:
temp_ref_paths, temp_leaf_paths = find_paths(graph, resource_node)
# add paths to the list of property paths containing a reference, for all nodes
property_paths_with_reference.extend(temp_ref_paths)
# add paths to the dictionary of property paths ending in leaves, by the corresponding resouce key
property_paths_with_leaves[resource_node] = temp_leaf_paths
# print("Collected all paths", flush=True)
# transfer reference edge to first property node for all reference paths
for i in property_paths_with_reference:
ref_edge_data = graph.get_edge_data(i[-2], i[-1])
ref_type = ref_edge_data.get('reference_type')
graph.remove_edge(i[-2], i[-1])
graph.add_edge(i[1], i[-1], edge_type='reference', reference_type=ref_type)
'''after transferrence, add the modified reference path (that now ends in a leaf)
to the dictionary of leaf paths, by corresponding resource and property keys'''
property_paths_with_leaves[i[0]].setdefault(i[1].split('.')[-1], []).extend(i[:-1])
#print("Transfered all references edges", flush=True)
'''create a list of collections of property paths ending in leaves,
removing duplicate nodes from each path collection'''
list_property_paths_with_leaves = [list(dict.fromkeys(i)) for j in property_paths_with_leaves.values() for i in j.values()]
nodes_to_remove=[]
for i in list_property_paths_with_leaves:
for j in range(len(i)-1, 1, -1):
source_attributes = graph.nodes[i[j]]
marker='|'.join(i[j].split('resource.')[1].split('.')[1:])
# transfer attributes to first property node
for attr, value in source_attributes.items():
if attr != 'label':
graph.nodes[i[1]][marker+'_'+attr] = value
nodes_to_remove.append(i[j])
#print("Transferred attributes for all paths", flush=True)
graph.remove_nodes_from(nodes_to_remove)
for i in resource_nodes:
unique_resource_id = graph.nodes[i]['resourceType']+'/'+graph.nodes[i]['id']
graph.nodes[i]['unique_id'] = unique_resource_id
for j in graph.successors(i):
if graph[i][j].get('edge_type') != 'reference':
graph.nodes[j]['unique_id'] = unique_resource_id+'/'+j.split('.')[-1]