medax_pipeline/config/manual_schema_config.yaml
2025-04-16 22:12:19 +02:00

284 lines
7.6 KiB
YAML

Title: BioCypher graph schema configuration file
# This configuration file establishes the hierarchy and connectivity in a newly
# set-up BioCypher property graph database. Naming should adhere to Biolink
# nomenclature (available at https://biolink.github.io/biolink-model/ or via
# the python module 'biolink-model-toolkit').
# The BioCypher YAML file specifies only the leaves of the hierarchy tree of
# the desired graph; the hierarchical structure of entities will be derived
# from the Biolink model + BRO model. Thus, only the immediate constituents
# of the graph need to be specified in the schema config.
# ---
# "Named Things"
# ---
# The implementation of named things is fairly straightforward, since they are
# usually represented in node form, which is also the Biolink recommendation.
# The same is not true for associations.
#
# A little more complex is the representation of aggregates of named things.
clinicalStatus:
is_a: ClinicalEntity
represented_as: node
preferred_id: fhir_id
label_in_input: clinicalStatus
properties:
coding_system: str
label: str
coding_code: str
Condition:
is_a: ClinicalEntity
represented_as: node
preferred_id: fhir_id
label_in_input: Condition
properties:
input_format: HL7 FHIR
data_specification: Medical Informatics Initiative Germany Core Data Set, Basic Modules
diagnosis:
is_a: ClinicalEntity
represented_as: node
preferred_id: fhir_id
label_in_input: diagnosis
properties:
type.coding_code: str
sequence: str
label: str
type.coding_system: str
DiagnosticReport:
is_a: ClinicalEntity
represented_as: node
preferred_id: fhir_id
label_in_input: DiagnosticReport
properties:
resourceType: str
label: str
status: str
id: str
Encounter:
is_a: ClinicalEntity
represented_as: node
preferred_id: fhir_id
label_in_input: Encounter
properties:
resourceType: str
label: str
status: str
id: str
identifier:
is_a: Attribute
represented_as: node
preferred_id: fhir_id
label_in_input: identifier
properties:
label: str
value: str
system: str
interpretation: #
is_a: named thing
represented_as: node
preferred_id: fhir_id
label_in_input: interpretation
properties:
extension.valueCoding_system: str
extension_url: str
extension.valueCoding_display: str
coding_code: str
coding_system: str
label: str
extension.valueCoding_code: str
maritalStatus:
is_a: OrganismAttribute
represented_as: node
preferred_id: fhir_id
label_in_input: maritalStatus
properties:
label: str
coding_system: str
coding_code: str
Observation:
is_a: ClinicalEntity
represented_as: node
preferred_id: fhir_id
label_in_input: Observation
properties:
resourceType: str
label: str
effectiveDateTime: str
status: str
id: str
Organization:
is_a: AdministrativeEntity
represented_as: node
preferred_id: fhir_id
label_in_input: Organization
properties:
label: str
id: str
name: str
resourceType: str
Patient:
is_a: Human
represented_as: node
preferred_id: fhir_id
label_in_input: Patient
properties:
resourceType: str
label: str
gender: str
id: str
birthDate: str
Procedure:
# is_a: Procedure
represented_as: node
preferred_id: fhir_id
label_in_input: Procedure
properties:
label: str
performedDateTime: str
resourceType: str
status: str
id: str
referenceRange: #
is_a: named thing
represented_as: node
preferred_id: fhir_id
label_in_input: referenceRange
properties:
high_system: str
high_value: str
high_code: str
label: str
high_unit: str
search: #
is_a: named thing
represented_as: node
preferred_id: fhir_id
label_in_input: search
properties:
label: str
mode: str
type:
is_a: Attribute
represented_as: node
preferred_id: fhir_id
label_in_input: type
properties:
coding_system: str
label: str
coding_code: str
coding_display: str
verificationStatus:
is_a: Attribute
represented_as: node
preferred_id: fhir_id
label_in_input: verificationStatus
properties:
coding_system: str
label: str
coding_code: str
coding_display: str
# ---
# Associations
# ---
# Associations are not supposed to be represented in node form as per the
# specifications of Biolink. However, in an analytic context, it often makes
# sense to represent interactions as nodes in Neo4j, because it enables, for
# instance, the annotation of a relationship with a publication as source of
# evidence (also known as reification in the knowledge graph world).
# The Biolink specifications for these types of relationships do
# not go into depth; for example, the hierarchy for molecular interactions
# (ie, "associations") ends at "PairwiseMolecularInteraction", there are no
# explicit terms for protein-protein-interaction, phosphorylation, miRNA-
# targeting, etc. Biolink proposes to use interaction identifiers from
# ontologies, such as https://www.ebi.ac.uk/ols/ontologies/mi/.
# association to connect anything to an identifier node
# if functional, includes:
# IDENTIFIED_BY_Condition_Identifier,
# IDENTIFIED_BY_DiagnosticReport_Identifier,
# IDENTIFIED_BY_Encounter_Identifier,
# IDENTIFIED_BY_Observation_Identifier,
# IDENTIFIED_BY_Organization_Identifier
# IDENTIFIED_BY_Patient_Identifier,
# IDENTIFIED_BY_Procedure_Identifier
condition to identifier association:
is_a: association
represented_as: edge
label_in_input: IDENTIFIED_BY_Condition_Identifier
diagnostic report to identifier association:
is_a: association
represented_as: edge
label_in_input: IDENTIFIED_BY_DiagnosticReport_Identifier
observation to identifier association:
is_a: association
represented_as: edge
label_in_input: IDENTIFIED_BY_Observation_Identifier
observation derived from observation association:
is_a: association
represented_as: edge
label_in_input: DERIVED_FROM_Observation_Observation
observation has member observation association:
is_a: association
represented_as: edge
label_in_input: HAS_MEMBER_Observation_Observation
procedure to identifier association:
is_a: association
represented_as: edge
label_in_input: IDENTIFIED_BY_Procedure_Identifier
procedure to diagnostic report association:
is_a: association
represented_as: edge
label_in_input: IDENTIFIED_BY_Procedure_Identifier
procedure reasoned by observation association:
is_a: association
represented_as: edge
label_in_input: HAS_REASON_REFERENCE_Procedure_Observation
procedure performer is practitioner association:
is_a: association
represented_as: edge
label_in_input: HAS_ACTOR_ProcedurePerformer_Practitioner
#represented_as: edge
#label_in_input: DERIVED_FROM_Observation_Observation:
#represented_as: edge
#label_in_input: DERIVED_FROM_Observation_Observation
#protein interaction:
# is_a: Pairwise molecular interaction
# represented_as: edge
# label_in_input: protein_protein_interaction
#protein to disease association:
# is_a: Association
# represented_as: edge
# label_in_input: protein_disease_association