Title: BioCypher graph schema configuration file

# This configuration file establishes the hierarchy and connectivity in a newly
# set-up BioCypher property graph database. Naming should adhere to Biolink
# nomenclature (available at https://biolink.github.io/biolink-model/ or via 
# the python module 'biolink-model-toolkit').

# The BioCypher YAML file specifies only the leaves of the hierarchy tree of
# the desired graph; the hierarchical structure of entities will be derived 
# from the Biolink model + BRO model. Thus, only the immediate constituents
# of the graph need to be specified in the schema config.


# ---
# "Named Things"
# ---
# The implementation of named things is fairly straightforward, since they are
# usually represented in node form, which is also the Biolink recommendation.
# The same is not true for associations.
#
# A little more complex is the representation of aggregates of named things.

clinicalStatus:
    is_a: ClinicalEntity 
    represented_as: node
    preferred_id: fhir_id
    label_in_input: clinicalStatus
    properties:
        coding_system: str
        label: str
        coding_code: str

Condition:
    is_a: ClinicalEntity 
    represented_as: node
    preferred_id: fhir_id
    label_in_input: Condition
    properties:
      input_format: HL7 FHIR
      data_specification: Medical Informatics Initiative Germany Core Data Set, Basic Modules

diagnosis:
    is_a: ClinicalEntity
    represented_as: node
    preferred_id: fhir_id
    label_in_input: diagnosis
    properties:
        type.coding_code: str
        sequence: str
        label: str
        type.coding_system: str

DiagnosticReport:
    is_a: ClinicalEntity 
    represented_as: node
    preferred_id: fhir_id
    label_in_input: DiagnosticReport
    properties:
        resourceType: str
        label: str
        status: str
        id: str

Encounter:
    is_a: ClinicalEntity 
    represented_as: node
    preferred_id: fhir_id
    label_in_input: Encounter
    properties:
        resourceType: str
        label: str
        status: str
        id: str

identifier:
    is_a: Attribute
    represented_as: node
    preferred_id: fhir_id
    label_in_input: identifier
    properties:
        label: str
        value: str
        system: str

interpretation: #
    is_a: named thing
    represented_as: node
    preferred_id: fhir_id
    label_in_input: interpretation
    properties:
        extension.valueCoding_system: str
        extension_url: str
        extension.valueCoding_display: str
        coding_code: str
        coding_system: str
        label: str
        extension.valueCoding_code: str

maritalStatus:
    is_a: OrganismAttribute 
    represented_as: node
    preferred_id: fhir_id
    label_in_input: maritalStatus
    properties:
        label: str
        coding_system: str
        coding_code: str

Observation:
    is_a: ClinicalEntity 
    represented_as: node
    preferred_id: fhir_id
    label_in_input: Observation
    properties:
        resourceType: str
        label: str
        effectiveDateTime: str
        status: str
        id: str

Organization:
    is_a: AdministrativeEntity
    represented_as: node
    preferred_id: fhir_id
    label_in_input: Organization
    properties:
        label: str
        id: str
        name: str
        resourceType: str

Patient:
    is_a: Human 
    represented_as: node
    preferred_id: fhir_id
    label_in_input: Patient
    properties:
        resourceType: str
        label: str
        gender: str
        id: str
        birthDate: str

Procedure:
   # is_a: Procedure 
    represented_as: node
    preferred_id: fhir_id
    label_in_input: Procedure
    properties:
        label: str
        performedDateTime: str
        resourceType: str
        status: str
        id: str

referenceRange: #
    is_a: named thing
    represented_as: node
    preferred_id: fhir_id
    label_in_input: referenceRange
    properties:
        high_system: str
        high_value: str
        high_code: str
        label: str
        high_unit: str

search: #
    is_a: named thing
    represented_as: node
    preferred_id: fhir_id
    label_in_input: search
    properties:
        label: str
        mode: str

type:
    is_a: Attribute
    represented_as: node
    preferred_id: fhir_id
    label_in_input: type
    properties:
        coding_system: str
        label: str
        coding_code: str
        coding_display: str

verificationStatus:
    is_a: Attribute
    represented_as: node
    preferred_id: fhir_id
    label_in_input: verificationStatus
    properties:
        coding_system: str
        label: str
        coding_code: str
        coding_display: str


# ---
# Associations
# ---
# Associations are not supposed to be represented in node form as per the 
# specifications of Biolink. However, in an analytic context, it often makes 
# sense to represent interactions as nodes in Neo4j, because it enables, for
# instance, the annotation of a relationship with a publication as source of
# evidence (also known as reification in the knowledge graph world). 

# The Biolink specifications for these types of relationships do 
# not go into depth; for example, the hierarchy for molecular interactions
# (ie, "associations") ends at "PairwiseMolecularInteraction", there are no 
# explicit terms for protein-protein-interaction, phosphorylation, miRNA-
# targeting, etc. Biolink proposes to use interaction identifiers from 
# ontologies, such as https://www.ebi.ac.uk/ols/ontologies/mi/.

        # association to connect anything to an identifier node
        # if functional, includes: 
        # IDENTIFIED_BY_Condition_Identifier, 
        # IDENTIFIED_BY_DiagnosticReport_Identifier, 
        # IDENTIFIED_BY_Encounter_Identifier, 
        # IDENTIFIED_BY_Observation_Identifier, 
        # IDENTIFIED_BY_Organization_Identifier
        # IDENTIFIED_BY_Patient_Identifier,
        # IDENTIFIED_BY_Procedure_Identifier

condition to identifier association: 
  is_a: association
  represented_as: edge
  label_in_input: IDENTIFIED_BY_Condition_Identifier

diagnostic report to identifier association: 
  is_a: association
  represented_as: edge
  label_in_input: IDENTIFIED_BY_DiagnosticReport_Identifier

observation to identifier association: 
  is_a: association
  represented_as: edge
  label_in_input: IDENTIFIED_BY_Observation_Identifier

observation derived from observation association: 
  is_a: association
  represented_as: edge
  label_in_input: DERIVED_FROM_Observation_Observation

observation has member observation association: 
  is_a: association
  represented_as: edge
  label_in_input: HAS_MEMBER_Observation_Observation

procedure to identifier association: 
  is_a: association
  represented_as: edge
  label_in_input: IDENTIFIED_BY_Procedure_Identifier

procedure to diagnostic report association: 
  is_a: association
  represented_as: edge
  label_in_input: IDENTIFIED_BY_Procedure_Identifier

procedure reasoned by observation association: 
  is_a: association
  represented_as: edge
  label_in_input: HAS_REASON_REFERENCE_Procedure_Observation

procedure performer is practitioner association:
  is_a: association
  represented_as: edge
  label_in_input: HAS_ACTOR_ProcedurePerformer_Practitioner

  #represented_as: edge
  #label_in_input: DERIVED_FROM_Observation_Observation:
  #represented_as: edge
  #label_in_input: DERIVED_FROM_Observation_Observation
      #protein interaction:
      #    is_a: Pairwise molecular interaction
      #    represented_as: edge
      #    label_in_input: protein_protein_interaction

      #protein to disease association:
      #   is_a: Association
      #    represented_as: edge
      #    label_in_input: protein_disease_association