release commit
This commit is contained in:
188
schema_config_generation.py
Normal file
188
schema_config_generation.py
Normal file
@ -0,0 +1,188 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
#import networkx as nx
|
||||
import yaml
|
||||
from collections import defaultdict
|
||||
|
||||
#extract all node types and generate basic yaml config part for nodes
|
||||
|
||||
def write_automated_schema(graph, filePath, mSchemaPath):
|
||||
schemaData = {
|
||||
'nodes': {},
|
||||
'edges': {}
|
||||
}
|
||||
|
||||
|
||||
if Path(filePath).exists():
|
||||
schemaData = loadManualSchema(filePath)
|
||||
elif mSchemaPath:
|
||||
print("using the manual schema")
|
||||
schemaData = loadManualSchema(mSchemaPath)
|
||||
|
||||
|
||||
|
||||
for node in graph.nodes():
|
||||
label = graph.nodes[node].get('label')
|
||||
|
||||
if label == 'resource':
|
||||
label = graph.nodes[node].get('resourceType')
|
||||
|
||||
label = label.capitalize()
|
||||
|
||||
if not label in schemaData['nodes']:
|
||||
schemaData['nodes'][label] = {}
|
||||
|
||||
if not 'properties' in schemaData['nodes'][label]:
|
||||
schemaData['nodes'][label]['properties'] = {}
|
||||
|
||||
for k in graph.nodes[node].keys():
|
||||
#print(k, '----- ', graph.nodes[node][k])
|
||||
#if k != 'label':
|
||||
schemaData['nodes'][label]['properties'][k] = 'str'
|
||||
|
||||
#schemaData['nodes'][label]['properties'].update(graph.nodes[node].keys())
|
||||
|
||||
|
||||
file=open(filePath, 'w')
|
||||
|
||||
for n in schemaData['nodes']:
|
||||
temp = n+':\n'
|
||||
if 'is_a' in schemaData['nodes'][n]:
|
||||
temp += ' is_a: ' + schemaData['nodes'][n]['is_a'] + '\n'
|
||||
else:
|
||||
temp += ' is_a: named thing\n'
|
||||
|
||||
if 'represented_as' in schemaData['nodes'][n]:
|
||||
temp += ' represented_as: ' + schemaData['nodes'][n]['represented_as'] + '\n'
|
||||
else:
|
||||
temp += ' represented_as: node\n'
|
||||
|
||||
if 'label_in_input' in schemaData['nodes'][n]:
|
||||
temp += ' label_in_input: ' + schemaData['nodes'][n]['label_in_input'] + '\n'
|
||||
|
||||
if 'preferred_id' in schemaData['nodes'][n]:
|
||||
temp += ' preferred_id: ' + schemaData['nodes'][n]['preferred_id'] + '\n'
|
||||
else:
|
||||
temp += ' preferred_id: fhir_id\n'
|
||||
|
||||
temp += ' label_in_input: ' + n + '\n'
|
||||
|
||||
temp += ' properties:\n'
|
||||
# get property values from schemaData if exists
|
||||
|
||||
for pKey in schemaData['nodes'][n]['properties']:
|
||||
temp += ' ' + pKey + ': ' + schemaData['nodes'][n]['properties'][pKey] + '\n'
|
||||
#elif schemaData['nodes']['properties']:
|
||||
#print("----> ", schemaData['nodes']['properties'])
|
||||
""" else:
|
||||
for attr in schemaData['nodes'][n]:
|
||||
temp += ' ' + attr + ': str\n' """
|
||||
|
||||
temp += '\n'
|
||||
|
||||
file.write(temp)
|
||||
|
||||
file.write('\n')
|
||||
|
||||
#extract all relationship types and generate basic yaml config part for relationships
|
||||
#if not edgeTypes: edgeTypes = set()
|
||||
|
||||
for u, v, a in graph.edges(data=True):
|
||||
|
||||
#edge_label = graph[u][v].get('edge_type', '')
|
||||
source_label = graph.nodes[u].get('label')
|
||||
target_label = graph.nodes[v].get('label')
|
||||
|
||||
if source_label == 'resource':
|
||||
source_label = graph.nodes[u].get('resourceType', str(u))
|
||||
|
||||
if target_label == 'resource':
|
||||
target_label = graph.nodes[v].get('resourceType', str(v))
|
||||
|
||||
source_label = source_label.capitalize()
|
||||
#target_label = target_label.capitalize()
|
||||
|
||||
|
||||
if source_label + ' to ' + target_label + ' association' in schemaData['edges']:
|
||||
# add missing attributes
|
||||
continue
|
||||
elif source_label + ' derived from ' + target_label + ' association' in schemaData['edges']:
|
||||
continue
|
||||
elif source_label + ' has member ' + target_label + ' association' in schemaData['edges']:
|
||||
continue
|
||||
elif source_label + ' reasoned by ' + target_label + ' association' in schemaData['edges']:
|
||||
continue
|
||||
elif source_label + ' is ' + target_label + ' association' in schemaData['edges']:
|
||||
continue
|
||||
else:
|
||||
#schemaData['edges'][source_label + ' to ' + target_label + ' association'] = set()
|
||||
schemaData['edges'][source_label + ' to ' + target_label + ' association'] = {
|
||||
'is_a': 'association',
|
||||
'represented_as': 'edge',
|
||||
'label_in_input': source_label + '_to_' + target_label,
|
||||
'properties': a
|
||||
}
|
||||
|
||||
|
||||
for label in schemaData['edges']:
|
||||
temp = '' + label + ':\n'
|
||||
for key in schemaData['edges'][label]:
|
||||
if key == 'properties':
|
||||
temp += ' properties:\n'
|
||||
for prop in schemaData['edges'][label][key]:
|
||||
temp += ' ' + prop + ': ' + schemaData['edges'][label][key][prop] + '\n'
|
||||
else:
|
||||
temp+= ' ' + key + ': ' + schemaData['edges'][label][key] + '\n'
|
||||
|
||||
temp += '\n'
|
||||
file.write(temp)
|
||||
|
||||
|
||||
file.close()
|
||||
|
||||
def loadManualSchema(path):
|
||||
schemaData = {
|
||||
'nodes': {},
|
||||
'edges': {}
|
||||
}
|
||||
edgeTypes = set()
|
||||
|
||||
with open(path, 'r') as file:
|
||||
# Load YAML with comments stripped
|
||||
data = yaml.safe_load(file)
|
||||
|
||||
for label, attrs in data.items():
|
||||
cLabel = label.capitalize()
|
||||
if not label == 'Title':
|
||||
if attrs["represented_as"] == 'node':
|
||||
if not hasattr(schemaData['nodes'], cLabel):
|
||||
schemaData['nodes'][cLabel] = set()
|
||||
|
||||
#assuming uniqueness in schema file here. If the same node type exits twice, it will be overwritten.
|
||||
schemaData['nodes'][cLabel] = attrs
|
||||
#for a in attrs:
|
||||
|
||||
#print(v)
|
||||
""" for k, v in attrs:
|
||||
if not k == ''
|
||||
schemaData['nodes'][label][k] = v """
|
||||
else:
|
||||
if not hasattr(schemaData['edges'], cLabel):
|
||||
schemaData['edges'][cLabel] = set()
|
||||
|
||||
#assuming uniqueness in schema file here. If the same node type exits twice, it will be overwritten.
|
||||
schemaData['edges'][cLabel] = attrs
|
||||
|
||||
return schemaData
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user