284 lines
7.6 KiB
YAML
284 lines
7.6 KiB
YAML
Title: BioCypher graph schema configuration file
|
|
|
|
# This configuration file establishes the hierarchy and connectivity in a newly
|
|
# set-up BioCypher property graph database. Naming should adhere to Biolink
|
|
# nomenclature (available at https://biolink.github.io/biolink-model/ or via
|
|
# the python module 'biolink-model-toolkit').
|
|
|
|
# The BioCypher YAML file specifies only the leaves of the hierarchy tree of
|
|
# the desired graph; the hierarchical structure of entities will be derived
|
|
# from the Biolink model + BRO model. Thus, only the immediate constituents
|
|
# of the graph need to be specified in the schema config.
|
|
|
|
|
|
# ---
|
|
# "Named Things"
|
|
# ---
|
|
# The implementation of named things is fairly straightforward, since they are
|
|
# usually represented in node form, which is also the Biolink recommendation.
|
|
# The same is not true for associations.
|
|
#
|
|
# A little more complex is the representation of aggregates of named things.
|
|
|
|
clinicalStatus:
|
|
is_a: ClinicalEntity
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: clinicalStatus
|
|
properties:
|
|
coding_system: str
|
|
label: str
|
|
coding_code: str
|
|
|
|
Condition:
|
|
is_a: ClinicalEntity
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: Condition
|
|
properties:
|
|
input_format: HL7 FHIR
|
|
data_specification: Medical Informatics Initiative Germany Core Data Set, Basic Modules
|
|
|
|
diagnosis:
|
|
is_a: ClinicalEntity
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: diagnosis
|
|
properties:
|
|
type.coding_code: str
|
|
sequence: str
|
|
label: str
|
|
type.coding_system: str
|
|
|
|
DiagnosticReport:
|
|
is_a: ClinicalEntity
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: DiagnosticReport
|
|
properties:
|
|
resourceType: str
|
|
label: str
|
|
status: str
|
|
id: str
|
|
|
|
Encounter:
|
|
is_a: ClinicalEntity
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: Encounter
|
|
properties:
|
|
resourceType: str
|
|
label: str
|
|
status: str
|
|
id: str
|
|
|
|
identifier:
|
|
is_a: Attribute
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: identifier
|
|
properties:
|
|
label: str
|
|
value: str
|
|
system: str
|
|
|
|
interpretation: #
|
|
is_a: named thing
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: interpretation
|
|
properties:
|
|
extension.valueCoding_system: str
|
|
extension_url: str
|
|
extension.valueCoding_display: str
|
|
coding_code: str
|
|
coding_system: str
|
|
label: str
|
|
extension.valueCoding_code: str
|
|
|
|
maritalStatus:
|
|
is_a: OrganismAttribute
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: maritalStatus
|
|
properties:
|
|
label: str
|
|
coding_system: str
|
|
coding_code: str
|
|
|
|
Observation:
|
|
is_a: ClinicalEntity
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: Observation
|
|
properties:
|
|
resourceType: str
|
|
label: str
|
|
effectiveDateTime: str
|
|
status: str
|
|
id: str
|
|
|
|
Organization:
|
|
is_a: AdministrativeEntity
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: Organization
|
|
properties:
|
|
label: str
|
|
id: str
|
|
name: str
|
|
resourceType: str
|
|
|
|
Patient:
|
|
is_a: Human
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: Patient
|
|
properties:
|
|
resourceType: str
|
|
label: str
|
|
gender: str
|
|
id: str
|
|
birthDate: str
|
|
|
|
Procedure:
|
|
# is_a: Procedure
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: Procedure
|
|
properties:
|
|
label: str
|
|
performedDateTime: str
|
|
resourceType: str
|
|
status: str
|
|
id: str
|
|
|
|
referenceRange: #
|
|
is_a: named thing
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: referenceRange
|
|
properties:
|
|
high_system: str
|
|
high_value: str
|
|
high_code: str
|
|
label: str
|
|
high_unit: str
|
|
|
|
search: #
|
|
is_a: named thing
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: search
|
|
properties:
|
|
label: str
|
|
mode: str
|
|
|
|
type:
|
|
is_a: Attribute
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: type
|
|
properties:
|
|
coding_system: str
|
|
label: str
|
|
coding_code: str
|
|
coding_display: str
|
|
|
|
verificationStatus:
|
|
is_a: Attribute
|
|
represented_as: node
|
|
preferred_id: fhir_id
|
|
label_in_input: verificationStatus
|
|
properties:
|
|
coding_system: str
|
|
label: str
|
|
coding_code: str
|
|
coding_display: str
|
|
|
|
|
|
# ---
|
|
# Associations
|
|
# ---
|
|
# Associations are not supposed to be represented in node form as per the
|
|
# specifications of Biolink. However, in an analytic context, it often makes
|
|
# sense to represent interactions as nodes in Neo4j, because it enables, for
|
|
# instance, the annotation of a relationship with a publication as source of
|
|
# evidence (also known as reification in the knowledge graph world).
|
|
|
|
# The Biolink specifications for these types of relationships do
|
|
# not go into depth; for example, the hierarchy for molecular interactions
|
|
# (ie, "associations") ends at "PairwiseMolecularInteraction", there are no
|
|
# explicit terms for protein-protein-interaction, phosphorylation, miRNA-
|
|
# targeting, etc. Biolink proposes to use interaction identifiers from
|
|
# ontologies, such as https://www.ebi.ac.uk/ols/ontologies/mi/.
|
|
|
|
# association to connect anything to an identifier node
|
|
# if functional, includes:
|
|
# IDENTIFIED_BY_Condition_Identifier,
|
|
# IDENTIFIED_BY_DiagnosticReport_Identifier,
|
|
# IDENTIFIED_BY_Encounter_Identifier,
|
|
# IDENTIFIED_BY_Observation_Identifier,
|
|
# IDENTIFIED_BY_Organization_Identifier
|
|
# IDENTIFIED_BY_Patient_Identifier,
|
|
# IDENTIFIED_BY_Procedure_Identifier
|
|
|
|
condition to identifier association:
|
|
is_a: association
|
|
represented_as: edge
|
|
label_in_input: IDENTIFIED_BY_Condition_Identifier
|
|
|
|
diagnostic report to identifier association:
|
|
is_a: association
|
|
represented_as: edge
|
|
label_in_input: IDENTIFIED_BY_DiagnosticReport_Identifier
|
|
|
|
observation to identifier association:
|
|
is_a: association
|
|
represented_as: edge
|
|
label_in_input: IDENTIFIED_BY_Observation_Identifier
|
|
|
|
observation derived from observation association:
|
|
is_a: association
|
|
represented_as: edge
|
|
label_in_input: DERIVED_FROM_Observation_Observation
|
|
|
|
observation has member observation association:
|
|
is_a: association
|
|
represented_as: edge
|
|
label_in_input: HAS_MEMBER_Observation_Observation
|
|
|
|
procedure to identifier association:
|
|
is_a: association
|
|
represented_as: edge
|
|
label_in_input: IDENTIFIED_BY_Procedure_Identifier
|
|
|
|
procedure to diagnostic report association:
|
|
is_a: association
|
|
represented_as: edge
|
|
label_in_input: IDENTIFIED_BY_Procedure_Identifier
|
|
|
|
procedure reasoned by observation association:
|
|
is_a: association
|
|
represented_as: edge
|
|
label_in_input: HAS_REASON_REFERENCE_Procedure_Observation
|
|
|
|
procedure performer is practitioner association:
|
|
is_a: association
|
|
represented_as: edge
|
|
label_in_input: HAS_ACTOR_ProcedurePerformer_Practitioner
|
|
|
|
#represented_as: edge
|
|
#label_in_input: DERIVED_FROM_Observation_Observation:
|
|
#represented_as: edge
|
|
#label_in_input: DERIVED_FROM_Observation_Observation
|
|
#protein interaction:
|
|
# is_a: Pairwise molecular interaction
|
|
# represented_as: edge
|
|
# label_in_input: protein_protein_interaction
|
|
|
|
#protein to disease association:
|
|
# is_a: Association
|
|
# represented_as: edge
|
|
# label_in_input: protein_disease_association
|