189 lines
6.0 KiB
Python
189 lines
6.0 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
|
|
from pathlib import Path
|
|
|
|
#import networkx as nx
|
|
import yaml
|
|
from collections import defaultdict
|
|
|
|
#extract all node types and generate basic yaml config part for nodes
|
|
|
|
def write_automated_schema(graph, filePath, mSchemaPath):
|
|
schemaData = {
|
|
'nodes': {},
|
|
'edges': {}
|
|
}
|
|
|
|
|
|
if Path(filePath).exists():
|
|
schemaData = loadManualSchema(filePath)
|
|
elif mSchemaPath:
|
|
print("using the manual schema")
|
|
schemaData = loadManualSchema(mSchemaPath)
|
|
|
|
|
|
|
|
for node in graph.nodes():
|
|
label = graph.nodes[node].get('label')
|
|
|
|
if label == 'resource':
|
|
label = graph.nodes[node].get('resourceType')
|
|
|
|
label = label.capitalize()
|
|
|
|
if not label in schemaData['nodes']:
|
|
schemaData['nodes'][label] = {}
|
|
|
|
if not 'properties' in schemaData['nodes'][label]:
|
|
schemaData['nodes'][label]['properties'] = {}
|
|
|
|
for k in graph.nodes[node].keys():
|
|
#print(k, '----- ', graph.nodes[node][k])
|
|
#if k != 'label':
|
|
schemaData['nodes'][label]['properties'][k] = 'str'
|
|
|
|
#schemaData['nodes'][label]['properties'].update(graph.nodes[node].keys())
|
|
|
|
|
|
file=open(filePath, 'w')
|
|
|
|
for n in schemaData['nodes']:
|
|
temp = n+':\n'
|
|
if 'is_a' in schemaData['nodes'][n]:
|
|
temp += ' is_a: ' + schemaData['nodes'][n]['is_a'] + '\n'
|
|
else:
|
|
temp += ' is_a: named thing\n'
|
|
|
|
if 'represented_as' in schemaData['nodes'][n]:
|
|
temp += ' represented_as: ' + schemaData['nodes'][n]['represented_as'] + '\n'
|
|
else:
|
|
temp += ' represented_as: node\n'
|
|
|
|
if 'label_in_input' in schemaData['nodes'][n]:
|
|
temp += ' label_in_input: ' + schemaData['nodes'][n]['label_in_input'] + '\n'
|
|
|
|
if 'preferred_id' in schemaData['nodes'][n]:
|
|
temp += ' preferred_id: ' + schemaData['nodes'][n]['preferred_id'] + '\n'
|
|
else:
|
|
temp += ' preferred_id: fhir_id\n'
|
|
|
|
temp += ' label_in_input: ' + n + '\n'
|
|
|
|
temp += ' properties:\n'
|
|
# get property values from schemaData if exists
|
|
|
|
for pKey in schemaData['nodes'][n]['properties']:
|
|
temp += ' ' + pKey + ': ' + schemaData['nodes'][n]['properties'][pKey] + '\n'
|
|
#elif schemaData['nodes']['properties']:
|
|
#print("----> ", schemaData['nodes']['properties'])
|
|
""" else:
|
|
for attr in schemaData['nodes'][n]:
|
|
temp += ' ' + attr + ': str\n' """
|
|
|
|
temp += '\n'
|
|
|
|
file.write(temp)
|
|
|
|
file.write('\n')
|
|
|
|
#extract all relationship types and generate basic yaml config part for relationships
|
|
#if not edgeTypes: edgeTypes = set()
|
|
|
|
for u, v, a in graph.edges(data=True):
|
|
|
|
#edge_label = graph[u][v].get('edge_type', '')
|
|
source_label = graph.nodes[u].get('label')
|
|
target_label = graph.nodes[v].get('label')
|
|
|
|
if source_label == 'resource':
|
|
source_label = graph.nodes[u].get('resourceType', str(u))
|
|
|
|
if target_label == 'resource':
|
|
target_label = graph.nodes[v].get('resourceType', str(v))
|
|
|
|
source_label = source_label.capitalize()
|
|
#target_label = target_label.capitalize()
|
|
|
|
|
|
if source_label + ' to ' + target_label + ' association' in schemaData['edges']:
|
|
# add missing attributes
|
|
continue
|
|
elif source_label + ' derived from ' + target_label + ' association' in schemaData['edges']:
|
|
continue
|
|
elif source_label + ' has member ' + target_label + ' association' in schemaData['edges']:
|
|
continue
|
|
elif source_label + ' reasoned by ' + target_label + ' association' in schemaData['edges']:
|
|
continue
|
|
elif source_label + ' is ' + target_label + ' association' in schemaData['edges']:
|
|
continue
|
|
else:
|
|
#schemaData['edges'][source_label + ' to ' + target_label + ' association'] = set()
|
|
schemaData['edges'][source_label + ' to ' + target_label + ' association'] = {
|
|
'is_a': 'association',
|
|
'represented_as': 'edge',
|
|
'label_in_input': source_label + '_to_' + target_label,
|
|
'properties': a
|
|
}
|
|
|
|
|
|
for label in schemaData['edges']:
|
|
temp = '' + label + ':\n'
|
|
for key in schemaData['edges'][label]:
|
|
if key == 'properties':
|
|
temp += ' properties:\n'
|
|
for prop in schemaData['edges'][label][key]:
|
|
temp += ' ' + prop + ': ' + schemaData['edges'][label][key][prop] + '\n'
|
|
else:
|
|
temp+= ' ' + key + ': ' + schemaData['edges'][label][key] + '\n'
|
|
|
|
temp += '\n'
|
|
file.write(temp)
|
|
|
|
|
|
file.close()
|
|
|
|
def loadManualSchema(path):
|
|
schemaData = {
|
|
'nodes': {},
|
|
'edges': {}
|
|
}
|
|
edgeTypes = set()
|
|
|
|
with open(path, 'r') as file:
|
|
# Load YAML with comments stripped
|
|
data = yaml.safe_load(file)
|
|
|
|
for label, attrs in data.items():
|
|
cLabel = label.capitalize()
|
|
if not label == 'Title':
|
|
if attrs["represented_as"] == 'node':
|
|
if not hasattr(schemaData['nodes'], cLabel):
|
|
schemaData['nodes'][cLabel] = set()
|
|
|
|
#assuming uniqueness in schema file here. If the same node type exits twice, it will be overwritten.
|
|
schemaData['nodes'][cLabel] = attrs
|
|
#for a in attrs:
|
|
|
|
#print(v)
|
|
""" for k, v in attrs:
|
|
if not k == ''
|
|
schemaData['nodes'][label][k] = v """
|
|
else:
|
|
if not hasattr(schemaData['edges'], cLabel):
|
|
schemaData['edges'][cLabel] = set()
|
|
|
|
#assuming uniqueness in schema file here. If the same node type exits twice, it will be overwritten.
|
|
schemaData['edges'][cLabel] = attrs
|
|
|
|
return schemaData
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|