108 lines
4.6 KiB
Python
108 lines
4.6 KiB
Python
import networkx as nx
|
|
|
|
def find_paths(graph, start_node):
|
|
def is_leaf(node):
|
|
#Checks if a node is a leaf (no outgoing edges)
|
|
return graph.out_degree(node) == 0
|
|
|
|
def custom_dfs(path, reference_count):
|
|
#Performs a DFS to find paths for both patterns
|
|
current_node = path[-1]
|
|
|
|
'''if the current node is labeled 'resource', the path length is greater than 3,
|
|
and we have exactly one 'reference' edge in the path'''
|
|
if len(path) > 3 and graph.nodes[current_node].get('label') == 'resource' and reference_count == 1:
|
|
# add path to the list of property paths containing a reference
|
|
reference_paths.append(list(path))
|
|
|
|
'''if the current node is a leaf node (no outgoing edges),
|
|
the path length is greater than 2, and we have no references in the path'''
|
|
if len(path) > 2 and is_leaf(current_node) and reference_count == 0:
|
|
'''add path to the dictionary of property paths ending in leaves,
|
|
by the corresponding property key'''
|
|
leaf_paths.setdefault(path[1].split('.')[-1], []).extend(list(path))
|
|
|
|
# check neighbors
|
|
for neighbor in graph.successors(current_node):
|
|
edge_type = graph.edges[current_node, neighbor].get('edge_type', None)
|
|
new_reference_count = reference_count + (1 if edge_type == 'reference' else 0)
|
|
|
|
# continue the search only if we have at most one 'reference' edge so far
|
|
if new_reference_count <= 1:
|
|
custom_dfs(path + [neighbor], new_reference_count)
|
|
|
|
reference_paths = []
|
|
leaf_paths = {}
|
|
|
|
custom_dfs([start_node], 0)
|
|
|
|
return reference_paths, leaf_paths
|
|
|
|
def property_convolution(graph):
|
|
|
|
# Find all nodes with label 'resource'
|
|
resource_nodes = [n for n, attr in graph.nodes(data=True) if attr.get('label') == 'resource']
|
|
|
|
#print("Got all nodes with label 'resource'", flush=True)
|
|
|
|
'''collect all paths starting with a resource node, that contain one reference edge,
|
|
end with a resource node and are >3 nodes long'''
|
|
'''collect all paths starting with a resource node, that do not contain reference edges,
|
|
end with a leaf node and are >2 nodes long'''
|
|
|
|
property_paths_with_reference = []
|
|
property_paths_with_leaves = {}
|
|
|
|
for resource_node in resource_nodes:
|
|
temp_ref_paths, temp_leaf_paths = find_paths(graph, resource_node)
|
|
# add paths to the list of property paths containing a reference, for all nodes
|
|
property_paths_with_reference.extend(temp_ref_paths)
|
|
# add paths to the dictionary of property paths ending in leaves, by the corresponding resouce key
|
|
property_paths_with_leaves[resource_node] = temp_leaf_paths
|
|
|
|
# print("Collected all paths", flush=True)
|
|
|
|
# transfer reference edge to first property node for all reference paths
|
|
for i in property_paths_with_reference:
|
|
ref_edge_data = graph.get_edge_data(i[-2], i[-1])
|
|
ref_type = ref_edge_data.get('reference_type')
|
|
graph.remove_edge(i[-2], i[-1])
|
|
graph.add_edge(i[1], i[-1], edge_type='reference', reference_type=ref_type)
|
|
|
|
'''after transferrence, add the modified reference path (that now ends in a leaf)
|
|
to the dictionary of leaf paths, by corresponding resource and property keys'''
|
|
property_paths_with_leaves[i[0]].setdefault(i[1].split('.')[-1], []).extend(i[:-1])
|
|
|
|
#print("Transfered all references edges", flush=True)
|
|
|
|
'''create a list of collections of property paths ending in leaves,
|
|
removing duplicate nodes from each path collection'''
|
|
list_property_paths_with_leaves = [list(dict.fromkeys(i)) for j in property_paths_with_leaves.values() for i in j.values()]
|
|
|
|
nodes_to_remove=[]
|
|
|
|
for i in list_property_paths_with_leaves:
|
|
for j in range(len(i)-1, 1, -1):
|
|
|
|
source_attributes = graph.nodes[i[j]]
|
|
|
|
marker='|'.join(i[j].split('resource.')[1].split('.')[1:])
|
|
|
|
# transfer attributes to first property node
|
|
for attr, value in source_attributes.items():
|
|
if attr != 'label':
|
|
graph.nodes[i[1]][marker+'_'+attr] = value
|
|
|
|
nodes_to_remove.append(i[j])
|
|
|
|
#print("Transferred attributes for all paths", flush=True)
|
|
|
|
graph.remove_nodes_from(nodes_to_remove)
|
|
|
|
for i in resource_nodes:
|
|
unique_resource_id = graph.nodes[i]['resourceType']+'/'+graph.nodes[i]['id']
|
|
graph.nodes[i]['unique_id'] = unique_resource_id
|
|
for j in graph.successors(i):
|
|
if graph[i][j].get('edge_type') != 'reference':
|
|
graph.nodes[j]['unique_id'] = unique_resource_id+'/'+j.split('.')[-1]
|