release commit

2025-04-16 22:12:19 +02:00
commit a9db0be88a
89 changed files with 2336827 additions and 0 deletions
--- a/biocypher/init.py
+++ b/biocypher/init.py
@ -0,0 +1,41 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher: a unifying framework for biomedical knowledge graphs.
+"""
+
+__all__ = [
+    "__version__",
+    "__author__",
+    "module_data",
+    "config",
+    "logfile",
+    "log",
+    "Driver",
+    "BioCypher",
+    "Resource",
+]
+
+from ._get import Resource
+from ._core import BioCypher
+from ._config import config, module_data
+from ._logger import log, logger, logfile
+from ._metadata import __author__, __version__
+
+
+class Driver(BioCypher):
+    # initialise parent class but log a warning
+    def __init__(self, *args, **kwargs):
+        logger.warning(
+            "The class `Driver` is deprecated and will be removed in a future "
+            "release. Please use `BioCypher` instead."
+        )
+        super().__init__(*args, **kwargs)
--- a/biocypher/_config/init.py
+++ b/biocypher/_config/init.py
@ -0,0 +1,148 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+Module data directory, including:
+
+* The BioLink database schema
+* The default config files
+"""
+
+from typing import Any, Optional
+import os
+import warnings
+
+import yaml
+import appdirs
+
+__all__ = ["module_data", "module_data_path", "read_config", "config", "reset"]
+
+_USER_CONFIG_DIR = appdirs.user_config_dir("biocypher", "saezlab")
+_USER_CONFIG_FILE = os.path.join(_USER_CONFIG_DIR, "conf.yaml")
+
+
+class MyLoader(yaml.SafeLoader):
+    def construct_scalar(self, node):
+        # Check if the scalar contains double quotes and an escape sequence
+        value = super().construct_scalar(node)
+        q = bool(node.style == '"')
+        b = bool("\\" in value.encode("unicode_escape").decode("utf-8"))
+        if q and b:
+            warnings.warn(
+                (
+                    "Double quotes detected in YAML configuration scalar: "
+                    f"{value.encode('unicode_escape')}. "
+                    "These allow escape sequences and may cause problems, for "
+                    "instance with the Neo4j admin import files (e.g. '\\t'). "
+                    "Make sure you wanted to do this, and use single quotes "
+                    "whenever possible."
+                ),
+                category=UserWarning,
+            )
+        return value
+
+
+def module_data_path(name: str) -> str:
+    """
+    Absolute path to a YAML file shipped with the module.
+    """
+
+    here = os.path.dirname(os.path.abspath(__file__))
+
+    return os.path.join(here, f"{name}.yaml")
+
+
+def module_data(name: str) -> Any:
+    """
+    Retrieve the contents of a YAML file shipped with this module.
+    """
+
+    path = module_data_path(name)
+
+    return _read_yaml(path)
+
+
+def _read_yaml(path: str) -> Optional[dict]:
+    if os.path.exists(path):
+        with open(path, "r") as fp:
+            return yaml.load(fp.read(), Loader=MyLoader)
+
+
+def read_config() -> dict:
+    """
+    Read the module config.
+
+    Read and merge the built-in default, the user level and directory level
+    configuration, with the later taking precendence over the former.
+
+    TODO explain path configuration
+    """
+
+    defaults = module_data("biocypher_config")
+    user = _read_yaml(_USER_CONFIG_FILE) or {}
+    # TODO account for .yml?
+    local = (
+        _read_yaml("biocypher_config.yaml")
+        or _read_yaml("config/biocypher_config.yaml")
+        or {}
+    )
+
+    for key in defaults:
+        value = (
+            local[key] if key in local else user[key] if key in user else None
+        )
+
+        if value is not None:
+            if isinstance(
+                defaults[key], str
+            ):  # first level config (like title)
+                defaults[key] = value
+            else:
+                defaults[key].update(value)
+
+    return defaults
+
+
+def config(*args, **kwargs) -> Optional[Any]:
+    """
+    Set or get module config parameters.
+    """
+
+    if args and kwargs:
+        raise ValueError(
+            "Setting and getting values in the same call is not allowed.",
+        )
+
+    if args:
+        result = tuple(globals()["_config"].get(key, None) for key in args)
+
+        return result[0] if len(result) == 1 else result
+
+    for key, value in kwargs.items():
+        globals()["_config"][key].update(value)
+
+
+def reset():
+    """
+    Reload configuration from the config files.
+    """
+
+    globals()["_config"] = read_config()
+
+
+reset()
+
+
+def update_from_file(path: str):
+    """
+    Update the module configuration from a YAML file.
+    """
+
+    config(**_read_yaml(path))
--- a/biocypher/_config/biocypher_config.yaml
+++ b/biocypher/_config/biocypher_config.yaml
@ -0,0 +1,141 @@
+Title: BioCypher python module configuration file
+
+## Some options are not used by default. Uncomment them to use them.
+
+biocypher:
+  ### Required parameters ###
+  ## DBMS type
+
+  dbms: neo4j
+
+  ## Schema configuration
+
+  # schema_config_path: config/schema_config.yaml
+
+  ## Offline mode: do not connect to a running DBMS instance
+  ## Can be used e.g. for writing batch import files
+
+  offline: true
+
+  ## Strict mode: do not allow to create new nodes or relationships without
+  ## specifying source, version, and license parameters
+
+  strict_mode: false
+
+  ## Ontology configuration
+
+  head_ontology:
+    url: https://github.com/biolink/biolink-model/raw/v3.2.1/biolink-model.owl.ttl
+    root_node: entity
+    # switch_label_and_id: true
+
+  ### Optional parameters ###
+
+  ## Logging
+  # Write log to disk
+  log_to_disk: true
+
+  # Activate more granular logging
+  debug: true
+
+  # Change the log directory
+  # log_directory: biocypher-log
+
+  ## Data output directory
+  # output_directory: biocypher-out
+
+  ## Resource cache directory
+  # cache_directory: .cache
+
+  ## Optional tail ontologies
+
+  # tail_ontologies:
+  #   so:
+  #     url: test/ontologies/so.owl
+  #     head_join_node: sequence variant
+  #     tail_join_node: sequence_variant
+  #     switch_label_and_id: true
+  #   mondo:
+  #     url: test/ontologies/mondo.owl
+  #     head_join_node: disease
+  #     tail_join_node: disease
+  #     switch_label_and_id: true
+
+### DBMS configuration ###
+
+neo4j:
+  ### Neo4j configuration ###
+  ## Database name
+
+  database_name: neo4j
+
+  ## Wipe DB before import (offline mode: --force)
+
+  wipe: true
+
+  ## Neo4j authentication
+
+  uri: neo4j://localhost:7687
+  user: neo4j
+  password: neo4j
+
+  ## Neo4j admin import batch writer settings
+
+  delimiter: ";"
+  array_delimiter: "|"
+  quote_character: "'"
+
+  ## MultiDB functionality
+  ## Set to false for using community edition or older versions of Neo4j
+
+  multi_db: true
+
+  ## Import options
+
+  skip_duplicate_nodes: false
+  skip_bad_relationships: false
+
+  ## Import call prefixes
+
+  # import_call_bin_prefix: bin/
+  # import_call_file_prefix: path/to/files/
+
+postgresql:
+  ### PostgreSQL configuration ###
+
+  # PostgreSQL connection credentials
+  database_name: postgres # DB name
+  user: postgres # user name
+  password: postgres # password
+  host: localhost # host
+  port: 5432 # port
+
+  # PostgreSQL import batch writer settings
+  quote_character: '"'
+  delimiter: '\t'
+  # import_call_bin_prefix: '' # path to "psql"
+  # import_call_file_prefix: '/path/to/files'
+
+rdf:
+  ### RDF configuration ###
+  rdf_format: turtle
+
+sqlite:
+  ### SQLite configuration ###
+
+  # SQLite connection credentials
+  database_name: sqlite.db # DB name
+
+  # SQLite import batch writer settings
+  quote_character: '"'
+  delimiter: '\t'
+  # import_call_bin_prefix: '' # path to "sqlite3"
+  # import_call_file_prefix: '/path/to/files'
+
+csv:
+  ### CSV/Pandas configuration ###
+  delimiter: ","
+
+networkx:
+  ### NetworkX configuration ###
+  some_config: some_value # placeholder for technical reasons TODO
--- a/biocypher/_config/test_config.yaml
+++ b/biocypher/_config/test_config.yaml
@ -0,0 +1,5 @@
+# We test the quote detection
+
+valid: 'This is a valid string'
+also_valid: "This is also a valid string"
+invalid: "\t"
--- a/biocypher/_config/test_schema_config.yaml
+++ b/biocypher/_config/test_schema_config.yaml
@ -0,0 +1,140 @@
+Title: BioCypher graph schema configuration file
+
+# ---
+# "Named Things"
+# ---
+
+protein:
+  represented_as: node
+  preferred_id: uniprot
+  input_label: protein
+  db_collection_name: proteins
+  properties:
+    name: str
+    score: float
+    taxon: int
+    genes: str[]
+
+microRNA:
+  represented_as: node
+  preferred_id: mirbase.mature
+  input_label: mirna
+
+complex:
+  synonym_for: macromolecular complex
+  represented_as: node
+  preferred_id: complexportal
+  input_label: complex
+
+pathway:
+  represented_as: node
+  preferred_id: [reactome, wikipathways]
+  input_label: [reactome, wikipathways]
+
+gene:
+  represented_as: node
+  preferred_id: hgnc
+  input_label: [hgnc, ensg]
+  exclude_properties: accession
+
+disease:
+  represented_as: node
+  preferred_id: doid
+  input_label: Disease
+
+side effect:
+  is_a: phenotypic feature
+  represented_as: node
+  preferred_id: sider.effect
+  input_label: sider
+
+sequence variant:
+  represented_as: node
+  preferred_id: [clinically relevant, known, somatic]
+  input_label: [Clinically_relevant_variant, Known_variant, Somatic_mutation]
+  properties:
+    source: str
+    original_source: str
+    effect: str
+    biotype: str
+
+snRNA sequence:
+  is_a: nucleic acid entity
+  represented_as: node
+  preferred_id: [intact, rnacentral]
+  input_label: [intact_snrna, rnacentral_snrna]
+  properties:
+    ac: str
+    fullName: str
+    shortName: str
+    preferredName: str
+  exclude_properties: sequence
+
+DNA sequence:
+  is_a: nucleic acid entity
+  represented_as: node
+  preferred_id: ensembl
+  input_label: dna
+  properties:
+    ac: str
+    fullName: str
+    shortName: str
+    preferredName: str
+    sequence: str
+
+dsDNA sequence:
+  is_a: [DNA sequence, nucleic acid entity]
+  inherit_properties: True
+  represented_as: node
+  preferred_id: [intact, uniparc]
+  input_label: [intact_dsdna, uniprot_archive_dsdna]
+
+# ---
+# Associations
+# ---
+
+post translational interaction:
+  is_a: pairwise molecular interaction
+  represented_as: node
+  label_as_edge: INTERACTS_POST_TRANSLATIONAL
+  input_label: post_translational
+
+phosphorylation:
+  is_a: post translational interaction
+  represented_as: edge
+  input_label: phosphorylation
+
+gene to disease association:
+  represented_as: edge
+  label_as_edge: PERTURBED_IN_DISEASE
+  input_label: [protein_disease, gene_disease]
+  exclude_properties: accession
+
+mutation to tissue association:
+  is_a: [genotype to tissue association, entity to tissue association, association]
+  represented_as: edge
+  label_as_edge: Is_Mutated_In
+  input_label: Gene_Is_Mutated_In_Cell_Tissue
+
+variant to gene association: # -> Known.... and Somatic....
+  represented_as: edge
+  source: [known.sequence variant, somatic.sequence variant]
+  target: gene
+  input_label: [
+    VARIANT_FOUND_IN_GENE_Known_variant_Gene,
+    VARIANT_FOUND_IN_GENE_Somatic_mutation_Gene
+  ]
+
+gene to gene association:
+  represented_as: edge
+  input_label: gene_gene
+  properties:
+    directional: bool
+    curated: bool
+    score: float
+    id: str  # should be removed
+
+gene to variant association:  # should be removed
+  is_a: gene to variant association
+  represented_as: edge
+  input_label: gene_variant
--- a/biocypher/_config/test_schema_config_disconnected.yaml
+++ b/biocypher/_config/test_schema_config_disconnected.yaml
@ -0,0 +1,3 @@
+disconnected:
+  represented_as: node
+  label_in_input: disconnected
--- a/biocypher/_config/test_schema_config_extended.yaml
+++ b/biocypher/_config/test_schema_config_extended.yaml
@ -0,0 +1,152 @@
+Title: BioCypher graph schema configuration file
+
+# ---
+# "Named Things"
+# ---
+
+protein:
+  represented_as: node
+  preferred_id: uniprot
+  input_label: protein
+  db_collection_name: proteins
+  properties:
+    name: str
+    score: float
+    taxon: int
+    genes: str[]
+
+microRNA:
+  represented_as: node
+  preferred_id: mirbase.mature
+  input_label: mirna
+
+complex:
+  synonym_for: macromolecular complex
+  represented_as: node
+  preferred_id: complexportal
+  input_label: complex
+
+pathway:
+  represented_as: node
+  preferred_id: [reactome, wikipathways]
+  input_label: [reactome, wikipathways]
+
+gene:
+  represented_as: node
+  preferred_id: hgnc
+  input_label: [hgnc, ensg]
+  exclude_properties: accession
+
+disease:
+  represented_as: node
+  preferred_id: doid
+  input_label: Disease
+
+side effect:
+  is_a: phenotypic feature
+  represented_as: node
+  preferred_id: sider.effect
+  input_label: sider
+
+sequence variant:
+  represented_as: node
+  preferred_id: [clinically relevant, known, somatic]
+  input_label: [Clinically_relevant_variant, Known_variant, Somatic_mutation]
+  properties:
+    source: str
+    original_source: str
+    effect: str
+    biotype: str
+
+altered gene product level:
+  represented_as: node
+  input_label: agpl
+
+decreased gene product level:
+  represented_as: node
+  input_label: agpl_decreased
+
+lethal variant:
+  represented_as: node
+  input_label: lethal
+
+snRNA sequence:
+  is_a: nucleic acid entity
+  represented_as: node
+  preferred_id: [intact, rnacentral]
+  input_label: [intact_snrna, rnacentral_snrna]
+  properties:
+    ac: str
+    fullName: str
+    shortName: str
+    preferredName: str
+  exclude_properties: sequence
+
+DNA sequence:
+  is_a: nucleic acid entity
+  represented_as: node
+  preferred_id: ensembl
+  input_label: dna
+  properties:
+    ac: str
+    fullName: str
+    shortName: str
+    preferredName: str
+    sequence: str
+
+dsDNA sequence:
+  is_a: [DNA sequence, nucleic acid entity]
+  inherit_properties: True
+  represented_as: node
+  preferred_id: [intact, uniparc]
+  input_label: [intact_dsdna, uniprot_archive_dsdna]
+
+# ---
+# Associations
+# ---
+
+post translational interaction:
+  is_a: pairwise molecular interaction
+  represented_as: node
+  label_as_edge: INTERACTS_POST_TRANSLATIONAL
+  input_label: post_translational
+
+phosphorylation:
+  is_a: post translational interaction
+  represented_as: edge
+  use_id: false
+  input_label: phosphorylation
+
+gene to disease association:
+  represented_as: edge
+  label_as_edge: PERTURBED_IN_DISEASE
+  input_label: [protein_disease, gene_disease]
+  exclude_properties: accession
+
+mutation to tissue association:
+  is_a: [genotype to tissue association, entity to tissue association, association]
+  represented_as: edge
+  label_as_edge: Is_Mutated_In
+  input_label: Gene_Is_Mutated_In_Cell_Tissue
+
+variant to gene association: # -> Known.... and Somatic....
+  represented_as: edge
+  source: [known.sequence variant, somatic.sequence variant]
+  target: gene
+  input_label: [
+    VARIANT_FOUND_IN_GENE_Known_variant_Gene,
+    VARIANT_FOUND_IN_GENE_Somatic_mutation_Gene
+  ]
+
+gene to gene association:
+  represented_as: edge
+  input_label: gene_gene
+  properties:
+    directional: bool
+    curated: bool
+    score: float
+
+gene to variant association:
+  is_a: gene to variant association
+  represented_as: edge
+  input_label: gene_variant
--- a/biocypher/_core.py
+++ b/biocypher/_core.py
@ -0,0 +1,734 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher core module. Interfaces with the user and distributes tasks to
+submodules.
+"""
+from typing import Optional
+from datetime import datetime
+import os
+import json
+
+from more_itertools import peekable
+import yaml
+
+import pandas as pd
+
+from ._logger import logger
+
+logger.debug(f"Loading module {__name__}.")
+
+from ._get import Downloader
+from ._config import config as _config
+from ._config import update_from_file as _file_update
+from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
+from ._mapping import OntologyMapping
+from ._ontology import Ontology
+from ._translate import Translator
+from ._deduplicate import Deduplicator
+from .output.in_memory._pandas import Pandas
+from .output.write._get_writer import DBMS_TO_CLASS, get_writer
+from .output.connect._neo4j_driver import get_driver
+
+__all__ = ["BioCypher"]
+
+SUPPORTED_DBMS = DBMS_TO_CLASS.keys()
+
+REQUIRED_CONFIG = [
+    "dbms",
+    "offline",
+    "strict_mode",
+    "head_ontology",
+]
+
+
+class BioCypher:
+    """
+    Orchestration of BioCypher operations. Instantiate this class to interact
+    with BioCypher.
+
+    Args:
+
+        dbms (str): The database management system to use. For supported
+            systems see SUPPORTED_DBMS.
+
+        offline (bool): Whether to run in offline mode. If True, no
+            connection to the database will be made.
+
+        strict_mode (bool): Whether to run in strict mode. If True, the
+            translator will raise an error if a node or edge does not
+            provide source, version, and licence information.
+
+        biocypher_config_path (str): Path to the BioCypher config file.
+
+        schema_config_path (str): Path to the user schema config
+            file.
+
+        head_ontology (dict): The head ontology defined by URL ('url') and root
+            node ('root_node').
+
+        tail_ontologies (dict): The tail ontologies defined by URL and
+            join nodes for both head and tail ontology.
+
+        output_directory (str): Path to the output directory. If not
+            provided, the default value 'biocypher-out' will be used.
+
+    """
+
+    def __init__(
+        self,
+        dbms: str = None,
+        offline: bool = None,
+        strict_mode: bool = None,
+        biocypher_config_path: str = None,
+        schema_config_path: str = None,
+        head_ontology: dict = None,
+        tail_ontologies: dict = None,
+        output_directory: str = None,
+        cache_directory: str = None,
+        # legacy params
+        db_name: str = None,
+    ):
+        # Update configuration if custom path is provided
+        if biocypher_config_path:
+            _file_update(biocypher_config_path)
+
+        if db_name:
+            logger.warning(
+                "The parameter `db_name` is deprecated. Please set the "
+                "`database_name` setting in the `biocypher_config.yaml` file "
+                "instead."
+            )
+            _config(**{db_name: {"database_name": db_name}})
+
+        # Load configuration
+        self.base_config = _config("biocypher")
+
+        # Check for required configuration
+        for key in REQUIRED_CONFIG:
+            if key not in self.base_config:
+                raise ValueError(f"Configuration key {key} is required.")
+
+        # Set configuration - mandatory
+        self._dbms = dbms or self.base_config["dbms"]
+
+        if offline is None:
+            self._offline = self.base_config["offline"]
+        else:
+            self._offline = offline
+
+        if strict_mode is None:
+            self._strict_mode = self.base_config["strict_mode"]
+        else:
+            self._strict_mode = strict_mode
+
+        self._schema_config_path = schema_config_path or self.base_config.get(
+            "schema_config_path"
+        )
+
+        if not self._schema_config_path:
+            logger.warning("Running BioCypher without schema configuration.")
+        else:
+            logger.info(
+                f"Running BioCypher with schema configuration from {self._schema_config_path}."
+            )
+
+        self._head_ontology = head_ontology or self.base_config["head_ontology"]
+
+        # Set configuration - optional
+        self._output_directory = output_directory or self.base_config.get(
+            "output_directory"
+        )
+        self._cache_directory = cache_directory or self.base_config.get(
+            "cache_directory"
+        )
+        self._tail_ontologies = tail_ontologies or self.base_config.get(
+            "tail_ontologies"
+        )
+
+        if self._dbms not in SUPPORTED_DBMS:
+            raise ValueError(
+                f"DBMS {self._dbms} not supported. "
+                f"Please select from {SUPPORTED_DBMS}."
+            )
+
+        # Initialize
+        self._ontology_mapping = None
+        self._deduplicator = None
+        self._translator = None
+        self._downloader = None
+        self._ontology = None
+        self._writer = None
+        self._pd = None
+
+    def _get_deduplicator(self) -> Deduplicator:
+        """
+        Create deduplicator if not exists and return.
+        """
+
+        if not self._deduplicator:
+            self._deduplicator = Deduplicator()
+
+        return self._deduplicator
+
+    def _get_ontology_mapping(self) -> OntologyMapping:
+        """
+        Create ontology mapping if not exists and return.
+        """
+
+        if not self._schema_config_path:
+            self._ontology_mapping = OntologyMapping()
+
+        if not self._ontology_mapping:
+            self._ontology_mapping = OntologyMapping(
+                config_file=self._schema_config_path,
+            )
+
+        return self._ontology_mapping
+
+    def _get_ontology(self) -> Ontology:
+        """
+        Create ontology if not exists and return.
+        """
+
+        if not self._ontology:
+            self._ontology = Ontology(
+                ontology_mapping=self._get_ontology_mapping(),
+                head_ontology=self._head_ontology,
+                tail_ontologies=self._tail_ontologies,
+            )
+
+        return self._ontology
+
+    def _get_translator(self) -> Translator:
+        """
+        Create translator if not exists and return.
+        """
+
+        if not self._translator:
+            self._translator = Translator(
+                ontology=self._get_ontology(),
+                strict_mode=self._strict_mode,
+            )
+
+        return self._translator
+
+    def _get_writer(self):
+        """
+        Create writer if not online. Set as instance variable `self._writer`.
+        """
+
+        if self._offline:
+            timestamp = lambda: datetime.now().strftime("%Y%m%d%H%M%S")
+            outdir = self._output_directory or os.path.join(
+                "biocypher-out", timestamp()
+            )
+            self._output_directory = os.path.abspath(outdir)
+
+            self._writer = get_writer(
+                dbms=self._dbms,
+                translator=self._get_translator(),
+                deduplicator=self._get_deduplicator(),
+                output_directory=self._output_directory,
+                strict_mode=self._strict_mode,
+            )
+        else:
+            raise NotImplementedError("Cannot get writer in online mode.")
+
+    def _get_driver(self):
+        """
+        Create driver if not exists. Set as instance variable `self._driver`.
+        """
+
+        if not self._offline:
+            self._driver = get_driver(
+                dbms=self._dbms,
+                translator=self._get_translator(),
+                deduplicator=self._get_deduplicator(),
+            )
+        else:
+            raise NotImplementedError("Cannot get driver in offline mode.")
+
+    def write_nodes(
+        self, nodes, batch_size: int = int(1e6), force: bool = False
+    ) -> bool:
+        """
+        Write nodes to database. Either takes an iterable of tuples (if given,
+        translates to ``BioCypherNode`` objects) or an iterable of
+        ``BioCypherNode`` objects.
+
+        Args:
+            nodes (iterable): An iterable of nodes to write to the database.
+
+            batch_size (int): The batch size to use when writing to disk.
+
+            force (bool): Whether to force writing to the output directory even
+                if the node type is not present in the schema config file.
+
+        Returns:
+            bool: True if successful.
+        """
+
+        if not self._writer:
+            self._get_writer()
+
+        nodes = peekable(nodes)
+        if not isinstance(nodes.peek(), BioCypherNode):
+            tnodes = self._translator.translate_nodes(nodes)
+        else:
+            tnodes = nodes
+        # write node files
+        return self._writer.write_nodes(
+            tnodes, batch_size=batch_size, force=force
+        )
+
+    def write_edges(self, edges, batch_size: int = int(1e6)) -> bool:
+        """
+        Write edges to database. Either takes an iterable of tuples (if given,
+        translates to ``BioCypherEdge`` objects) or an iterable of
+        ``BioCypherEdge`` objects.
+
+        Args:
+            edges (iterable): An iterable of edges to write to the database.
+
+        Returns:
+            bool: True if successful.
+        """
+
+        if not self._writer:
+            self._get_writer()
+
+        edges = peekable(edges)
+        if not isinstance(edges.peek(), BioCypherEdge):
+            tedges = self._translator.translate_edges(edges)
+        else:
+            tedges = edges
+        # write edge files
+        return self._writer.write_edges(tedges, batch_size=batch_size)
+
+    def to_df(self) -> list[pd.DataFrame]:
+        """
+        Convert entities to a pandas DataFrame for each entity type and return
+        a list.
+
+        Args:
+            entities (iterable): An iterable of entities to convert to a
+                DataFrame.
+
+        Returns:
+            pd.DataFrame: A pandas DataFrame.
+        """
+        if not self._pd:
+            raise ValueError(
+                "No pandas instance found. Please call `add()` first."
+            )
+
+        return self._pd.dfs
+
+    def add(self, entities) -> None:
+        """
+        Function to add entities to the in-memory database. Accepts an iterable
+        of tuples (if given, translates to ``BioCypherNode`` or
+        ``BioCypherEdge`` objects) or an iterable of ``BioCypherNode`` or
+        ``BioCypherEdge`` objects.
+
+        Args:
+            entities (iterable): An iterable of entities to add to the database.
+                Can be 3-tuples (nodes) or 5-tuples (edges); also accepts
+                4-tuples for edges (deprecated).
+
+        Returns:
+            None
+        """
+        if not self._pd:
+            self._pd = Pandas(
+                translator=self._get_translator(),
+                deduplicator=self._get_deduplicator(),
+            )
+
+        entities = peekable(entities)
+
+        if (
+            isinstance(entities.peek(), BioCypherNode)
+            or isinstance(entities.peek(), BioCypherEdge)
+            or isinstance(entities.peek(), BioCypherRelAsNode)
+        ):
+            tentities = entities
+        elif len(entities.peek()) < 4:
+            tentities = self._translator.translate_nodes(entities)
+        else:
+            tentities = self._translator.translate_edges(entities)
+
+        self._pd.add_tables(tentities)
+
+    def add_nodes(self, nodes) -> None:
+        """
+        Wrapper for ``add()`` to add nodes to the in-memory database.
+
+        Args:
+            nodes (iterable): An iterable of node tuples to add to the database.
+
+        Returns:
+            None
+        """
+        self.add(nodes)
+
+    def add_edges(self, edges) -> None:
+        """
+        Wrapper for ``add()`` to add edges to the in-memory database.
+
+        Args:
+            edges (iterable): An iterable of edge tuples to add to the database.
+
+        Returns:
+            None
+        """
+        self.add(edges)
+
+    def merge_nodes(self, nodes) -> bool:
+        """
+        Merge nodes into database. Either takes an iterable of tuples (if given,
+        translates to ``BioCypherNode`` objects) or an iterable of
+        ``BioCypherNode`` objects.
+
+        Args:
+            nodes (iterable): An iterable of nodes to merge into the database.
+
+        Returns:
+            bool: True if successful.
+        """
+
+        if not self._driver:
+            self._get_driver()
+
+        nodes = peekable(nodes)
+        if not isinstance(nodes.peek(), BioCypherNode):
+            tnodes = self._translator.translate_nodes(nodes)
+        else:
+            tnodes = nodes
+        # write node files
+        return self._driver.add_biocypher_nodes(tnodes)
+
+    def merge_edges(self, edges) -> bool:
+        """
+        Merge edges into database. Either takes an iterable of tuples (if given,
+        translates to ``BioCypherEdge`` objects) or an iterable of
+        ``BioCypherEdge`` objects.
+
+        Args:
+            edges (iterable): An iterable of edges to merge into the database.
+
+        Returns:
+            bool: True if successful.
+        """
+
+        if not self._driver:
+            self._get_driver()
+
+        edges = peekable(edges)
+        if not isinstance(edges.peek(), BioCypherEdge):
+            tedges = self._translator.translate_edges(edges)
+        else:
+            tedges = edges
+        # write edge files
+        return self._driver.add_biocypher_edges(tedges)
+
+    # DOWNLOAD AND CACHE MANAGEMENT METHODS ###
+
+    def _get_downloader(self, cache_dir: Optional[str] = None):
+        """
+        Create downloader if not exists.
+        """
+
+        if not self._downloader:
+            self._downloader = Downloader(self._cache_directory)
+
+    def download(self, *resources) -> None:
+        """
+        Use the :class:`Downloader` class to download or load from cache the
+        resources given by the adapter.
+        """
+
+        self._get_downloader()
+        return self._downloader.download(*resources)
+
+    # OVERVIEW AND CONVENIENCE METHODS ###
+
+    def log_missing_input_labels(self) -> Optional[dict[str, list[str]]]:
+        """
+
+        Get the set of input labels encountered without an entry in the
+        `schema_config.yaml` and print them to the logger.
+
+        Returns:
+
+            Optional[Dict[str, List[str]]]: A dictionary of Biolink types
+            encountered without an entry in the `schema_config.yaml` file.
+
+        """
+
+        mt = self._translator.get_missing_biolink_types()
+
+        if mt:
+            msg = (
+                "Input entities not accounted for due to them not being "
+                f"present in the schema configuration file {self._schema_config_path} "
+                "(this is not necessarily a problem, if you did not intend "
+                "to include them in the database; see the log for details): \n"
+            )
+            for k, v in mt.items():
+                msg += f"    {k}: {v} \n"
+
+            logger.info(msg)
+            return mt
+
+        else:
+            logger.info("No missing labels in input.")
+            return None
+
+    def log_duplicates(self) -> None:
+        """
+        Get the set of duplicate nodes and edges encountered and print them to
+        the logger.
+        """
+
+        dn = self._deduplicator.get_duplicate_nodes()
+
+        if dn:
+            ntypes = dn[0]
+            nids = dn[1]
+
+            msg = "Duplicate node types encountered (IDs in log): \n"
+            for typ in ntypes:
+                msg += f"    {typ}\n"
+
+            logger.info(msg)
+
+            idmsg = "Duplicate node IDs encountered: \n"
+            for _id in nids:
+                idmsg += f"    {_id}\n"
+
+            logger.debug(idmsg)
+
+        else:
+            logger.info("No duplicate nodes in input.")
+
+        de = self._deduplicator.get_duplicate_edges()
+
+        if de:
+            etypes = de[0]
+            eids = de[1]
+
+            msg = "Duplicate edge types encountered (IDs in log): \n"
+            for typ in etypes:
+                msg += f"    {typ}\n"
+
+            logger.info(msg)
+
+            idmsg = "Duplicate edge IDs encountered: \n"
+            for _id in eids:
+                idmsg += f"    {_id}\n"
+
+            logger.debug(idmsg)
+
+        else:
+            logger.info("No duplicate edges in input.")
+
+    def show_ontology_structure(self, **kwargs) -> None:
+        """
+        Show the ontology structure using treelib or write to GRAPHML file.
+
+        Args:
+
+            to_disk (str): If specified, the ontology structure will be saved
+                to disk as a GRAPHML file, to be opened in your favourite
+                graph visualisation tool.
+
+            full (bool): If True, the full ontology structure will be shown,
+                including all nodes and edges. If False, only the nodes and
+                edges that are relevant to the extended schema will be shown.
+        """
+
+        if not self._ontology:
+            self._get_ontology()
+
+        return self._ontology.show_ontology_structure(**kwargs)
+
+    def write_import_call(self) -> str:
+        """
+        Write a shell script to import the database depending on the chosen
+        DBMS.
+
+        Returns:
+            str: path toward the file holding the import call.
+        """
+
+        if not self._offline:
+            raise NotImplementedError(
+                "Cannot write import call in online mode."
+            )
+
+        return self._writer.write_import_call()
+
+    def write_schema_info(self, as_node: bool = False) -> None:
+        """
+        Write an extended schema info YAML file that extends the
+        `schema_config.yaml` with run-time information of the built KG. For
+        instance, include information on whether something present in the actual
+        knowledge graph, whether it is a relationship (which is important in the
+        case of representing relationships as nodes) and the actual sources and
+        targets of edges. Since this file can be used in place of the original
+        `schema_config.yaml` file, it indicates that it is the extended schema
+        by setting `is_schema_info` to `true`.
+
+        We start by using the `extended_schema` dictionary from the ontology
+        class instance, which contains all expanded entities and relationships.
+        The information of whether something is a relationship can be gathered
+        from the deduplicator instance, which keeps track of all entities that
+        have been seen.
+        """
+
+        if not self._offline:
+            raise NotImplementedError(
+                "Cannot write schema info in online mode."
+            )
+
+        ontology = self._get_ontology()
+        schema = ontology.mapping.extended_schema.copy()
+        schema["is_schema_info"] = True
+
+        deduplicator = self._get_deduplicator()
+        for node in deduplicator.entity_types:
+            if node in schema.keys():
+                schema[node]["present_in_knowledge_graph"] = True
+                schema[node]["is_relationship"] = False
+            else:
+                logger.info(
+                    f"Node {node} not present in extended schema. "
+                    "Skipping schema info."
+                )
+
+        # find 'label_as_edge' cases in schema entries
+        changed_labels = {}
+        for k, v in schema.items():
+            if not isinstance(v, dict):
+                continue
+            if "label_as_edge" in v.keys():
+                if v["label_as_edge"] in deduplicator.seen_relationships.keys():
+                    changed_labels[v["label_as_edge"]] = k
+
+        for edge in deduplicator.seen_relationships.keys():
+            if edge in changed_labels.keys():
+                edge = changed_labels[edge]
+            if edge in schema.keys():
+                schema[edge]["present_in_knowledge_graph"] = True
+                schema[edge]["is_relationship"] = True
+                # TODO information about source and target nodes
+            else:
+                logger.info(
+                    f"Edge {edge} not present in extended schema. "
+                    "Skipping schema info."
+                )
+
+        # write to output directory as YAML file
+        path = os.path.join(self._output_directory, "schema_info.yaml")
+        with open(path, "w") as f:
+            f.write(yaml.dump(schema))
+
+        if as_node:
+            # write as node
+            node = BioCypherNode(
+                node_id="schema_info",
+                node_label="schema_info",
+                properties={"schema_info": json.dumps(schema)},
+            )
+            self.write_nodes([node], force=True)
+
+            # override import call with added schema info node
+            self.write_import_call()
+
+        return schema
+
+    # TRANSLATION METHODS ###
+
+    def translate_term(self, term: str) -> str:
+        """
+        Translate a term to its BioCypher equivalent.
+
+        Args:
+            term (str): The term to translate.
+
+        Returns:
+            str: The BioCypher equivalent of the term.
+        """
+
+        # instantiate adapter if not exists
+        self.start_ontology()
+
+        return self._translator.translate_term(term)
+
+    def summary(self) -> None:
+        """
+        Wrapper for showing ontology structure and logging duplicates and
+        missing input types.
+        """
+
+        self.show_ontology_structure()
+        self.log_duplicates()
+        self.log_missing_input_labels()
+
+    def reverse_translate_term(self, term: str) -> str:
+        """
+        Reverse translate a term from its BioCypher equivalent.
+
+        Args:
+            term (str): The BioCypher term to reverse translate.
+
+        Returns:
+            str: The original term.
+        """
+
+        # instantiate adapter if not exists
+        self.start_ontology()
+
+        return self._translator.reverse_translate_term(term)
+
+    def translate_query(self, query: str) -> str:
+        """
+        Translate a query to its BioCypher equivalent.
+
+        Args:
+            query (str): The query to translate.
+
+        Returns:
+            str: The BioCypher equivalent of the query.
+        """
+
+        # instantiate adapter if not exists
+        self.start_ontology()
+
+        return self._translator.translate(query)
+
+    def reverse_translate_query(self, query: str) -> str:
+        """
+        Reverse translate a query from its BioCypher equivalent.
+
+        Args:
+            query (str): The BioCypher query to reverse translate.
+
+        Returns:
+            str: The original query.
+        """
+
+        # instantiate adapter if not exists
+        self.start_ontology()
+
+        return self._translator.reverse_translate(query)
--- a/biocypher/_create.py
+++ b/biocypher/_create.py
@ -0,0 +1,356 @@
+#!/usr/bin/env python
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher 'create' module. Handles the creation of BioCypher node and edge
+dataclasses.
+"""
+from ._logger import logger
+
+logger.debug(f"Loading module {__name__}.")
+
+from typing import Union
+from dataclasses import field, dataclass
+import os
+
+__all__ = [
+    "BioCypherEdge",
+    "BioCypherNode",
+    "BioCypherRelAsNode",
+]
+
+
+@dataclass(frozen=True)
+class BioCypherNode:
+    """
+    Handoff class to represent biomedical entities as Neo4j nodes.
+
+    Has id, label, property dict; id and label (in the Neo4j sense of a
+    label, ie, the entity descriptor after the colon, such as
+    ":Protein") are non-optional and called node_id and node_label to
+    avoid confusion with "label" properties. Node labels are written in
+    PascalCase and as nouns, as per Neo4j consensus.
+
+    Args:
+        node_id (string): consensus "best" id for biological entity
+        node_label (string): primary type of entity, capitalised
+        **properties (kwargs): collection of all other properties to be
+            passed to neo4j for the respective node (dict)
+
+    Todo:
+        - check and correct small inconsistencies such as capitalisation
+            of ID names ("uniprot" vs "UniProt")
+        - check for correct ID patterns (eg "ENSG" + string of numbers,
+            uniprot length)
+        - ID conversion using pypath translation facilities for now
+    """
+
+    node_id: str
+    node_label: str
+    preferred_id: str = "id"
+    properties: dict = field(default_factory=dict)
+
+    def __post_init__(self):
+        """
+        Add id field to properties.
+
+        Check for reserved keywords.
+
+        Replace unwanted characters in properties.
+        """
+        self.properties["id"] = self.node_id
+        self.properties["preferred_id"] = self.preferred_id or None
+        # TODO actually make None possible here; as is, "id" is the default in
+        # the dataclass as well as in the configuration file
+
+        if ":TYPE" in self.properties.keys():
+            logger.warning(
+                "Keyword ':TYPE' is reserved for Neo4j. "
+                "Removing from properties.",
+                # "Renaming to 'type'."
+            )
+            # self.properties["type"] = self.properties[":TYPE"]
+            del self.properties[":TYPE"]
+
+        for k, v in self.properties.items():
+            if isinstance(v, str):
+                self.properties[k] = (
+                    v.replace(
+                        os.linesep,
+                        " ",
+                    )
+                    .replace(
+                        "\n",
+                        " ",
+                    )
+                    .replace(
+                        "\r",
+                        " ",
+                    )
+                )
+
+            elif isinstance(v, list):
+                #modified biocypher, because the data contained intgers in lists
+                self.properties[k] = [
+                    (str(val) if isinstance(val, (int, float)) else val)
+                    .replace(os.linesep, " ")
+                    .replace("\n", " ")
+                    .replace("\r", " ")
+                    for val in v
+                ]
+
+    def get_id(self) -> str:
+        """
+        Returns primary node identifier.
+
+        Returns:
+            str: node_id
+        """
+        return self.node_id
+
+    def get_label(self) -> str:
+        """
+        Returns primary node label.
+
+        Returns:
+            str: node_label
+        """
+        return self.node_label
+
+    def get_type(self) -> str:
+        """
+        Returns primary node label.
+
+        Returns:
+            str: node_label
+        """
+        return self.node_label
+
+    def get_preferred_id(self) -> str:
+        """
+        Returns preferred id.
+
+        Returns:
+            str: preferred_id
+        """
+        return self.preferred_id
+
+    def get_properties(self) -> dict:
+        """
+        Returns all other node properties apart from primary id and
+        label as key-value pairs.
+
+        Returns:
+            dict: properties
+        """
+        return self.properties
+
+    def get_dict(self) -> dict:
+        """
+        Return dict of id, labels, and properties.
+
+        Returns:
+            dict: node_id and node_label as top-level key-value pairs,
+            properties as second-level dict.
+        """
+        return {
+            "node_id": self.node_id,
+            "node_label": self.node_label,
+            "properties": self.properties,
+        }
+
+
+@dataclass(frozen=True)
+class BioCypherEdge:
+    """
+    Handoff class to represent biomedical relationships in Neo4j.
+
+    Has source and target ids, label, property dict; ids and label (in
+    the Neo4j sense of a label, ie, the entity descriptor after the
+    colon, such as ":TARGETS") are non-optional and called source_id,
+    target_id, and relationship_label to avoid confusion with properties
+    called "label", which usually denotes the human-readable form.
+    Relationship labels are written in UPPERCASE and as verbs, as per
+    Neo4j consensus.
+
+    Args:
+
+        source_id (string): consensus "best" id for biological entity
+
+        target_id (string): consensus "best" id for biological entity
+
+        relationship_label (string): type of interaction, UPPERCASE
+
+        properties (dict): collection of all other properties of the
+        respective edge
+
+    """
+
+    source_id: str
+    target_id: str
+    relationship_label: str
+    relationship_id: str = None
+    properties: dict = field(default_factory=dict)
+
+    def __post_init__(self):
+        """
+        Check for reserved keywords.
+        """
+
+        if ":TYPE" in self.properties.keys():
+            logger.debug(
+                "Keyword ':TYPE' is reserved for Neo4j. "
+                "Removing from properties.",
+                # "Renaming to 'type'."
+            )
+            # self.properties["type"] = self.properties[":TYPE"]
+            del self.properties[":TYPE"]
+        elif "id" in self.properties.keys():
+            logger.debug(
+                "Keyword 'id' is reserved for Neo4j. "
+                "Removing from properties.",
+                # "Renaming to 'type'."
+            )
+            # self.properties["type"] = self.properties[":TYPE"]
+            del self.properties["id"]
+        elif "_ID" in self.properties.keys():
+            logger.debug(
+                "Keyword '_ID' is reserved for Postgres. "
+                "Removing from properties.",
+                # "Renaming to 'type'."
+            )
+            # self.properties["type"] = self.properties[":TYPE"]
+            del self.properties["_ID"]
+
+    def get_id(self) -> Union[str, None]:
+        """
+        Returns primary node identifier or None.
+
+        Returns:
+            str: node_id
+        """
+
+        return self.relationship_id
+
+    def get_source_id(self) -> str:
+        """
+        Returns primary node identifier of relationship source.
+
+        Returns:
+            str: source_id
+        """
+        return self.source_id
+
+    def get_target_id(self) -> str:
+        """
+        Returns primary node identifier of relationship target.
+
+        Returns:
+            str: target_id
+        """
+        return self.target_id
+
+    def get_label(self) -> str:
+        """
+        Returns relationship label.
+
+        Returns:
+            str: relationship_label
+        """
+        return self.relationship_label
+
+    def get_type(self) -> str:
+        """
+        Returns relationship label.
+
+        Returns:
+            str: relationship_label
+        """
+        return self.relationship_label
+
+    def get_properties(self) -> dict:
+        """
+        Returns all other relationship properties apart from primary ids
+        and label as key-value pairs.
+
+        Returns:
+            dict: properties
+        """
+        return self.properties
+
+    def get_dict(self) -> dict:
+        """
+        Return dict of ids, label, and properties.
+
+        Returns:
+            dict: source_id, target_id and relationship_label as
+                top-level key-value pairs, properties as second-level
+                dict.
+        """
+        return {
+            "relationship_id": self.relationship_id or None,
+            "source_id": self.source_id,
+            "target_id": self.target_id,
+            "relationship_label": self.relationship_label,
+            "properties": self.properties,
+        }
+
+
+@dataclass(frozen=True)
+class BioCypherRelAsNode:
+    """
+    Class to represent relationships as nodes (with in- and outgoing
+    edges) as a triplet of a BioCypherNode and two BioCypherEdges. Main
+    usage in type checking (instances where the receiving function needs
+    to check whether it receives a relationship as a single edge or as
+    a triplet).
+
+    Args:
+
+        node (BioCypherNode): node representing the relationship
+
+        source_edge (BioCypherEdge): edge representing the source of the
+            relationship
+
+        target_edge (BioCypherEdge): edge representing the target of the
+            relationship
+
+    """
+
+    node: BioCypherNode
+    source_edge: BioCypherEdge
+    target_edge: BioCypherEdge
+
+    def __post_init__(self):
+        if not isinstance(self.node, BioCypherNode):
+            raise TypeError(
+                f"BioCypherRelAsNode.node must be a BioCypherNode, "
+                f"not {type(self.node)}.",
+            )
+
+        if not isinstance(self.source_edge, BioCypherEdge):
+            raise TypeError(
+                f"BioCypherRelAsNode.source_edge must be a BioCypherEdge, "
+                f"not {type(self.source_edge)}.",
+            )
+
+        if not isinstance(self.target_edge, BioCypherEdge):
+            raise TypeError(
+                f"BioCypherRelAsNode.target_edge must be a BioCypherEdge, "
+                f"not {type(self.target_edge)}.",
+            )
+
+    def get_node(self) -> BioCypherNode:
+        return self.node
+
+    def get_source_edge(self) -> BioCypherEdge:
+        return self.source_edge
+
+    def get_target_edge(self) -> BioCypherEdge:
+        return self.target_edge
--- a/biocypher/_deduplicate.py
+++ b/biocypher/_deduplicate.py
@ -0,0 +1,147 @@
+from ._logger import logger
+
+logger.debug(f"Loading module {__name__}.")
+
+from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
+
+
+class Deduplicator:
+    """
+    Singleton class responsible of deduplicating BioCypher inputs. Maintains
+    sets/dictionaries of node and edge types and their unique identifiers.
+
+    Nodes identifiers should be globally unique (represented as a set), while
+    edge identifiers are only unique per edge type (represented as a dict of
+    sets, keyed by edge type).
+
+    Stores collection of duplicate node and edge identifiers and types for
+    troubleshooting and to avoid overloading the log.
+    """
+
+    def __init__(self):
+        self.seen_entity_ids = set()
+        self.duplicate_entity_ids = set()
+
+        self.entity_types = set()
+        self.duplicate_entity_types = set()
+
+        self.seen_relationships = {}
+        self.duplicate_relationship_ids = set()
+        self.duplicate_relationship_types = set()
+
+    def node_seen(self, entity: BioCypherNode) -> bool:
+        """
+        Adds a node to the instance and checks if it has been seen before.
+
+        Args:
+            node: BioCypherNode to be added.
+
+        Returns:
+            True if the node has been seen before, False otherwise.
+        """
+        if entity.get_label() not in self.entity_types:
+            self.entity_types.add(entity.get_label())
+
+        if entity.get_id() in self.seen_entity_ids:
+            self.duplicate_entity_ids.add(entity.get_id())
+            if entity.get_label() not in self.duplicate_entity_types:
+                logger.warning(
+                    f"Duplicate node type {entity.get_label()} found. "
+                )
+                self.duplicate_entity_types.add(entity.get_label())
+            return True
+
+        self.seen_entity_ids.add(entity.get_id())
+        return False
+
+    def edge_seen(self, relationship: BioCypherEdge) -> bool:
+        """
+        Adds an edge to the instance and checks if it has been seen before.
+
+        Args:
+            edge: BioCypherEdge to be added.
+
+        Returns:
+            True if the edge has been seen before, False otherwise.
+        """
+        if relationship.get_type() not in self.seen_relationships:
+            self.seen_relationships[relationship.get_type()] = set()
+
+        # concatenate source and target if no id is present
+        if not relationship.get_id():
+            _id = (
+                f"{relationship.get_source_id()}_{relationship.get_target_id()}"
+            )
+        else:
+            _id = relationship.get_id()
+
+        if _id in self.seen_relationships[relationship.get_type()]:
+            self.duplicate_relationship_ids.add(_id)
+            if relationship.get_type() not in self.duplicate_relationship_types:
+                logger.warning(
+                    f"Duplicate edge type {relationship.get_type()} found. "
+                )
+                self.duplicate_relationship_types.add(relationship.get_type())
+            return True
+
+        self.seen_relationships[relationship.get_type()].add(_id)
+        return False
+
+    def rel_as_node_seen(self, rel_as_node: BioCypherRelAsNode) -> bool:
+        """
+        Adds a rel_as_node to the instance (one entity and two relationships)
+        and checks if it has been seen before. Only the node is relevant for
+        identifying the rel_as_node as a duplicate.
+
+        Args:
+            rel_as_node: BioCypherRelAsNode to be added.
+
+        Returns:
+            True if the rel_as_node has been seen before, False otherwise.
+        """
+        node = rel_as_node.get_node()
+
+        if node.get_label() not in self.seen_relationships:
+            self.seen_relationships[node.get_label()] = set()
+
+        # rel as node always has an id
+        _id = node.get_id()
+
+        if _id in self.seen_relationships[node.get_type()]:
+            self.duplicate_relationship_ids.add(_id)
+            if node.get_type() not in self.duplicate_relationship_types:
+                logger.warning(f"Duplicate edge type {node.get_type()} found. ")
+                self.duplicate_relationship_types.add(node.get_type())
+            return True
+
+        self.seen_relationships[node.get_type()].add(_id)
+        return False
+
+    def get_duplicate_nodes(self):
+        """
+        Function to return a list of duplicate nodes.
+
+        Returns:
+            list: list of duplicate nodes
+        """
+
+        if self.duplicate_entity_types:
+            return (self.duplicate_entity_types, self.duplicate_entity_ids)
+        else:
+            return None
+
+    def get_duplicate_edges(self):
+        """
+        Function to return a list of duplicate edges.
+
+        Returns:
+            list: list of duplicate edges
+        """
+
+        if self.duplicate_relationship_types:
+            return (
+                self.duplicate_relationship_types,
+                self.duplicate_relationship_ids,
+            )
+        else:
+            return None
--- a/biocypher/_get.py
+++ b/biocypher/_get.py
@ -0,0 +1,443 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher get module. Used to download and cache data from external sources.
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+import shutil
+
+import requests
+
+from ._logger import logger
+
+logger.debug(f"Loading module {__name__}.")
+
+from abc import ABC
+from datetime import datetime, timedelta
+from tempfile import TemporaryDirectory
+import os
+import json
+import ftplib
+
+import pooch
+
+from ._misc import to_list, is_nested
+
+
+class Resource(ABC):
+    def __init__(
+        self,
+        name: str,
+        url_s: str | list[str],
+        lifetime: int = 0,
+    ):
+        """
+
+        A Resource is a file, a list of files, an API request, or a list of API
+        requests, any of which can be downloaded from the given URL(s) and
+        cached locally. This class implements checks of the minimum requirements
+        for a resource, to be implemented by a biocypher adapter.
+
+        Args:
+            name (str): The name of the resource.
+
+            url_s (str | list[str]): The URL or URLs of the resource.
+
+            lifetime (int): The lifetime of the resource in days. If 0, the
+                resource is considered to be permanent.
+        """
+        self.name = name
+        self.url_s = url_s
+        self.lifetime = lifetime
+
+
+class FileDownload(Resource):
+    def __init__(
+        self,
+        name: str,
+        url_s: str | list[str],
+        lifetime: int = 0,
+        is_dir: bool = False,
+    ):
+        """
+        Represents basic information for a File Download.
+
+        Args:
+            name(str): The name of the File Download.
+
+            url_s(str|list[str]): The URL(s) of the File Download.
+
+            lifetime(int): The lifetime of the File Download in days. If 0, the
+                File Download is cached indefinitely.
+
+            is_dir (bool): Whether the URL points to a directory or not.
+        """
+
+        super().__init__(name, url_s, lifetime)
+        self.is_dir = is_dir
+
+
+class APIRequest(Resource):
+    def __init__(self, name: str, url_s: str | list[str], lifetime: int = 0):
+        """
+        Represents basic information for an API Request.
+
+        Args:
+            name(str): The name of the API Request.
+
+            url_s(str|list): The URL of the API endpoint.
+
+            lifetime(int): The lifetime of the API Request in days. If 0, the
+                API Request is cached indefinitely.
+
+        """
+        super().__init__(name, url_s, lifetime)
+
+
+class Downloader:
+    def __init__(self, cache_dir: Optional[str] = None) -> None:
+        """
+        The Downloader is a class that manages resources that can be downloaded
+        and cached locally. It manages the lifetime of downloaded resources by
+        keeping a JSON record of the download date of each resource.
+
+        Args:
+            cache_dir (str): The directory where the resources are cached. If
+                not given, a temporary directory is created.
+        """
+        self.cache_dir = cache_dir or TemporaryDirectory().name
+        self.cache_file = os.path.join(self.cache_dir, "cache.json")
+        self.cache_dict = self._load_cache_dict()
+
+    def download(self, *resources: Resource):
+        """
+        Download one or multiple resources. Load from cache if the resource is
+        already downloaded and the cache is not expired.
+
+        Args:
+            resources (Resource): The resource(s) to download or load from
+                cache.
+
+        Returns:
+            list[str]: The path or paths to the resource(s) that were downloaded
+                or loaded from cache.
+
+        """
+        paths = []
+        for resource in resources:
+            paths.append(self._download_or_cache(resource))
+
+        # flatten list if it is nested
+        if is_nested(paths):
+            paths = [path for sublist in paths for path in sublist]
+
+        return paths
+
+    def _download_or_cache(self, resource: Resource, cache: bool = True):
+        """
+        Download a resource if it is not cached or exceeded its lifetime.
+
+        Args:
+            resource (Resource): The resource to download.
+        Returns:
+            list[str]: The path or paths to the downloaded resource(s).
+
+
+        """
+        expired = self._is_cache_expired(resource)
+
+        if expired or not cache:
+            self._delete_expired_cache(resource)
+            if isinstance(resource, FileDownload):
+                logger.info(f"Asking for download of resource {resource.name}.")
+                paths = self._download_files(cache, resource)
+            elif isinstance(resource, APIRequest):
+                logger.info(
+                    f"Asking for download of api request {resource.name}."
+                )
+                paths = self._download_api_request(resource)
+
+            else:
+                raise TypeError(f"Unknown resource type: {type(resource)}")
+
+        else:
+            paths = self.get_cached_version(resource)
+        self._update_cache_record(resource)
+        return paths
+
+    def _is_cache_expired(self, resource: Resource) -> bool:
+        """
+        Check if resource or API request cache is expired.
+
+        Args:
+
+            resource (Resource): The resource or API request to download.
+
+        Returns:
+            bool: True if cache is expired, False if not.
+        """
+        cache_record = self._get_cache_record(resource)
+        if cache_record:
+            download_time = datetime.strptime(
+                cache_record.get("date_downloaded"), "%Y-%m-%d %H:%M:%S.%f"
+            )
+            lifetime = timedelta(days=resource.lifetime)
+            expired = download_time + lifetime < datetime.now()
+        else:
+            expired = True
+        return expired
+
+    def _delete_expired_cache(self, resource: Resource):
+        cache_resource_path = self.cache_dir + "/" + resource.name
+        if os.path.exists(cache_resource_path) and os.path.isdir(
+            cache_resource_path
+        ):
+            shutil.rmtree(cache_resource_path)
+
+    def _download_files(self, cache, file_download: FileDownload):
+        """
+        Download a resource given it is a file or a directory and return the
+        path.
+
+        Args:
+            cache (bool): Whether to cache the resource or not.
+            file_download (FileDownload): The resource to download.
+
+        Returns:
+            list[str]: The path or paths to the downloaded resource(s).
+        """
+        if file_download.is_dir:
+            files = self._get_files(file_download)
+            file_download.url_s = [
+                file_download.url_s + "/" + file for file in files
+            ]
+            file_download.is_dir = False
+            paths = self._download_or_cache(file_download, cache)
+        elif isinstance(file_download.url_s, list):
+            paths = []
+            for url in file_download.url_s:
+                fname = url[url.rfind("/") + 1 :].split("?")[0]
+                paths.append(
+                    self._retrieve(
+                        url=url,
+                        fname=fname,
+                        path=os.path.join(self.cache_dir, file_download.name),
+                    )
+                )
+        else:
+            paths = []
+            fname = file_download.url_s[
+                file_download.url_s.rfind("/") + 1 :
+            ].split("?")[0]
+            results = self._retrieve(
+                url=file_download.url_s,
+                fname=fname,
+                path=os.path.join(self.cache_dir, file_download.name),
+            )
+            if isinstance(results, list):
+                paths.extend(results)
+            else:
+                paths.append(results)
+
+        # sometimes a compressed file contains multiple files
+        # TODO ask for a list of files in the archive to be used from the
+        # adapter
+        return paths
+
+    def _download_api_request(self, api_request: APIRequest):
+        """
+        Download an API request and return the path.
+
+        Args:
+            api_request(APIRequest): The API request result that is being
+                cached.
+        Returns:
+            list[str]: The path to the cached API request.
+
+        """
+        urls = (
+            api_request.url_s
+            if isinstance(api_request.url_s, list)
+            else [api_request.url_s]
+        )
+        paths = []
+        for url in urls:
+            fname = url[url.rfind("/") + 1 :].rsplit(".", 1)[0]
+            logger.info(
+                f"Asking for caching API of {api_request.name} {fname}."
+            )
+            response = requests.get(url=url)
+
+            if response.status_code != 200:
+                response.raise_for_status()
+            response_data = response.json()
+            api_path = os.path.join(
+                self.cache_dir, api_request.name, f"{fname}.json"
+            )
+
+            os.makedirs(os.path.dirname(api_path), exist_ok=True)
+            with open(api_path, "w") as f:
+                json.dump(response_data, f)
+                logger.info(f"Caching API request to {api_path}.")
+            paths.append(api_path)
+        return paths
+
+    def get_cached_version(self, resource: Resource) -> list[str]:
+        """Get the cached version of a resource.
+
+        Args:
+            resource(Resource): The resource to get the cached version of.
+
+        Returns:
+            list[str]: The paths to the cached resource(s).
+
+        """
+        cached_location = os.path.join(self.cache_dir, resource.name)
+        logger.info(f"Use cached version from {cached_location}.")
+        paths = []
+        for file in os.listdir(cached_location):
+            paths.append(os.path.join(cached_location, file))
+        return paths
+
+    def _retrieve(
+        self,
+        url: str,
+        fname: str,
+        path: str,
+        known_hash: str = None,
+    ):
+        """
+        Retrieve a file from a URL using Pooch. Infer type of file from
+        extension and use appropriate processor.
+
+        Args:
+            url (str): The URL to retrieve the file from.
+
+            fname (str): The name of the file.
+
+            path (str): The path to the file.
+        """
+        if fname.endswith(".zip"):
+            return pooch.retrieve(
+                url=url,
+                known_hash=known_hash,
+                fname=fname,
+                path=path,
+                processor=pooch.Unzip(),
+                progressbar=True,
+            )
+
+        elif fname.endswith(".tar.gz"):
+            return pooch.retrieve(
+                url=url,
+                known_hash=known_hash,
+                fname=fname,
+                path=path,
+                processor=pooch.Untar(),
+                progressbar=True,
+            )
+
+        elif fname.endswith(".gz"):
+            return pooch.retrieve(
+                url=url,
+                known_hash=known_hash,
+                fname=fname,
+                path=path,
+                processor=pooch.Decompress(),
+                progressbar=True,
+            )
+
+        else:
+            return pooch.retrieve(
+                url=url,
+                known_hash=known_hash,
+                fname=fname,
+                path=path,
+                progressbar=True,
+            )
+
+    def _get_files(self, file_download: FileDownload):
+        """
+        Get the files contained in a directory file.
+
+        Args:
+            file_download (FileDownload): The directory file.
+
+        Returns:
+            list: The files contained in the directory.
+        """
+        if file_download.url_s.startswith("ftp://"):
+            # remove protocol
+            url = file_download.url_s[6:]
+            # get base url
+            url = url[: url.find("/")]
+            # get directory (remove initial slash as well)
+            dir = file_download.url_s[7 + len(url) :]
+            # get files
+            ftp = ftplib.FTP(url)
+            ftp.login()
+            ftp.cwd(dir)
+            files = ftp.nlst()
+            ftp.quit()
+        else:
+            raise NotImplementedError(
+                "Only FTP directories are supported at the moment."
+            )
+
+        return files
+
+    def _load_cache_dict(self):
+        """
+        Load the cache dictionary from the cache file. Create an empty cache
+        file if it does not exist.
+        """
+        if not os.path.exists(self.cache_dir):
+            logger.info(f"Creating cache directory {self.cache_dir}.")
+            os.makedirs(self.cache_dir)
+
+        if not os.path.exists(self.cache_file):
+            logger.info(f"Creating cache file {self.cache_file}.")
+            with open(self.cache_file, "w") as f:
+                json.dump({}, f)
+
+        with open(self.cache_file, "r") as f:
+            logger.info(f"Loading cache file {self.cache_file}.")
+            return json.load(f)
+
+    def _get_cache_record(self, resource: Resource):
+        """
+        Get the cache record of a resource.
+
+        Args:
+            resource (Resource): The resource to get the cache record of.
+
+        Returns:
+            The cache record of the resource.
+        """
+        return self.cache_dict.get(resource.name, {})
+
+    def _update_cache_record(self, resource: Resource):
+        """
+        Update the cache record of a resource.
+
+        Args:
+            resource (Resource): The resource to update the cache record of.
+        """
+        cache_record = {}
+        cache_record["url"] = to_list(resource.url_s)
+        cache_record["date_downloaded"] = str(datetime.now())
+        cache_record["lifetime"] = resource.lifetime
+        self.cache_dict[resource.name] = cache_record
+        with open(self.cache_file, "w") as f:
+            json.dump(self.cache_dict, f, default=str)
--- a/biocypher/_logger.py
+++ b/biocypher/_logger.py
@ -0,0 +1,121 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+Configuration of the module logger.
+"""
+
+__all__ = ["get_logger", "log", "logfile"]
+
+from datetime import datetime
+import os
+import pydoc
+import logging
+
+from biocypher import _config
+from biocypher._metadata import __version__
+
+
+def get_logger(name: str = "biocypher") -> logging.Logger:
+    """
+    Access the module logger, create a new one if does not exist yet.
+
+    Method providing central logger instance to main module. Is called
+    only from main submodule, :mod:`biocypher.driver`. In child modules,
+    the standard Python logging facility is called
+    (using ``logging.getLogger(__name__)``), automatically inheriting
+    the handlers from the central logger.
+
+    The file handler creates a log file named after the current date and
+    time. Levels to output to file and console can be set here.
+
+    Args:
+        name:
+            Name of the logger instance.
+
+    Returns:
+        An instance of the Python :py:mod:`logging.Logger`.
+    """
+
+    if not logging.getLogger(name).hasHandlers():
+        # create logger
+        logger = logging.getLogger(name)
+        logger.setLevel(logging.DEBUG)
+        logger.propagate = True
+
+        # formatting
+        file_formatter = logging.Formatter(
+            "%(asctime)s\t%(levelname)s\tmodule:%(module)s\n%(message)s",
+        )
+        stdout_formatter = logging.Formatter("%(levelname)s -- %(message)s")
+
+        # file name and creation
+        now = datetime.now()
+        date_time = now.strftime("%Y%m%d-%H%M%S")
+
+        log_to_disk = _config.config("biocypher").get("log_to_disk")
+
+        if log_to_disk:
+            logdir = (
+                _config.config("biocypher").get("log_directory")
+                or "biocypher-log"
+            )
+            os.makedirs(logdir, exist_ok=True)
+            logfile = os.path.join(logdir, f"biocypher-{date_time}.log")
+
+            # file handler
+            file_handler = logging.FileHandler(logfile)
+
+            if _config.config("biocypher").get("debug"):
+                file_handler.setLevel(logging.DEBUG)
+            else:
+                file_handler.setLevel(logging.INFO)
+
+            file_handler.setFormatter(file_formatter)
+
+            logger.addHandler(file_handler)
+
+        # handlers
+        # stream handler
+        stdout_handler = logging.StreamHandler()
+        stdout_handler.setLevel(logging.INFO)
+        stdout_handler.setFormatter(stdout_formatter)
+
+        # add handlers
+        logger.addHandler(stdout_handler)
+
+        # startup message
+        logger.info(f"This is BioCypher v{__version__}.")
+        if log_to_disk:
+            logger.info(f"Logging into `{logfile}`.")
+        else:
+            logger.info("Logging into stdout.")
+
+    return logging.getLogger(name)
+
+
+def logfile() -> str:
+    """
+    Path to the log file.
+    """
+
+    return get_logger().handlers[0].baseFilename
+
+
+def log():
+    """
+    Browse the log file.
+    """
+
+    with open(logfile()) as fp:
+        pydoc.pager(fp.read())
+
+
+logger = get_logger()
--- a/biocypher/_mapping.py
+++ b/biocypher/_mapping.py
@ -0,0 +1,307 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher 'mapping' module. Handles the mapping of user-defined schema to the
+underlying ontology.
+"""
+from ._logger import logger
+
+logger.debug(f"Loading module {__name__}.")
+
+from typing import Optional
+from urllib.request import urlopen
+
+import yaml
+
+from . import _misc
+from ._config import config as _config
+
+
+class OntologyMapping:
+    """
+    Class to store the ontology mapping and extensions.
+    """
+
+    def __init__(self, config_file: str = None):
+        self.schema = self._read_config(config_file)
+
+        self.extended_schema = self._extend_schema()
+
+    def _read_config(self, config_file: str = None):
+        """
+        Read the configuration file and store the ontology mapping and extensions.
+        """
+        if config_file is None:
+            schema_config = {}
+
+        # load yaml file from web
+        elif config_file.startswith("http"):
+            with urlopen(config_file) as f:
+                schema_config = yaml.safe_load(f)
+
+        # get graph state from config (assume file is local)
+        else:
+            with open(config_file, "r") as f:
+                schema_config = yaml.safe_load(f)
+
+        return schema_config
+
+    def _extend_schema(self, d: Optional[dict] = None) -> dict:
+        """
+        Get leaves of the tree hierarchy from the data structure dict
+        contained in the `schema_config.yaml`. Creates virtual leaves
+        (as children) from entries that provide more than one preferred
+        id type (and corresponding inputs).
+
+        Args:
+            d:
+                Data structure dict from yaml file.
+
+        """
+
+        d = d or self.schema
+
+        extended_schema = dict()
+
+        # first pass: get parent leaves with direct representation in ontology
+        for k, v in d.items():
+            # k is not an entity
+            if "represented_as" not in v:
+                continue
+
+            # preferred_id optional: if not provided, use `id`
+            if not v.get("preferred_id"):
+                v["preferred_id"] = "id"
+
+            # k is an entity that is present in the ontology
+            if "is_a" not in v:
+                extended_schema[k] = v
+
+        # second pass: "vertical" inheritance
+        d = self._vertical_property_inheritance(d)
+        for k, v in d.items():
+            if "is_a" in v:
+                # prevent loops
+                if k == v["is_a"]:
+                    logger.warning(
+                        f"Loop detected in ontology mapping: {k} -> {v}. "
+                        "Removing item. Please fix the inheritance if you want "
+                        "to use this item."
+                    )
+                    continue
+
+                extended_schema[k] = v
+
+        # "horizontal" inheritance: create siblings for multiple identifiers or
+        # sources -> virtual leaves or implicit children
+        mi_leaves = {}
+        ms_leaves = {}
+        for k, v in d.items():
+            # k is not an entity
+            if "represented_as" not in v:
+                continue
+
+            if isinstance(v.get("preferred_id"), list):
+                mi_leaves = self._horizontal_inheritance_pid(k, v)
+                extended_schema.update(mi_leaves)
+
+            elif isinstance(v.get("source"), list):
+                ms_leaves = self._horizontal_inheritance_source(k, v)
+                extended_schema.update(ms_leaves)
+
+        return extended_schema
+
+    def _vertical_property_inheritance(self, d):
+        """
+        Inherit properties from parents to children and update `d` accordingly.
+        """
+        for k, v in d.items():
+            # k is not an entity
+            if "represented_as" not in v:
+                continue
+
+            # k is an entity that is present in the ontology
+            if "is_a" not in v:
+                continue
+
+            # "vertical" inheritance: inherit properties from parent
+            if v.get("inherit_properties", False):
+                # get direct ancestor
+                if isinstance(v["is_a"], list):
+                    parent = v["is_a"][0]
+                else:
+                    parent = v["is_a"]
+
+                # ensure child has properties and exclude_properties
+                if "properties" not in v:
+                    v["properties"] = {}
+                if "exclude_properties" not in v:
+                    v["exclude_properties"] = {}
+
+                # update properties of child
+                parent_props = self.schema[parent].get("properties", {})
+                if parent_props:
+                    v["properties"].update(parent_props)
+
+                parent_excl_props = self.schema[parent].get(
+                    "exclude_properties", {}
+                )
+                if parent_excl_props:
+                    v["exclude_properties"].update(parent_excl_props)
+
+                # update schema (d)
+                d[k] = v
+
+        return d
+
+    def _horizontal_inheritance_pid(self, key, value):
+        """
+        Create virtual leaves for multiple preferred id types or sources.
+
+        If we create virtual leaves, input_label/label_in_input always has to be
+        a list.
+        """
+
+        leaves = {}
+
+        preferred_id = value["preferred_id"]
+        input_label = value.get("input_label") or value["label_in_input"]
+        represented_as = value["represented_as"]
+
+        # adjust lengths
+        max_l = max(
+            [
+                len(_misc.to_list(preferred_id)),
+                len(_misc.to_list(input_label)),
+                len(_misc.to_list(represented_as)),
+            ],
+        )
+
+        # adjust pid length if necessary
+        if isinstance(preferred_id, str):
+            pids = [preferred_id] * max_l
+        else:
+            pids = preferred_id
+
+        # adjust rep length if necessary
+        if isinstance(represented_as, str):
+            reps = [represented_as] * max_l
+        else:
+            reps = represented_as
+
+        for pid, lab, rep in zip(pids, input_label, reps):
+            skey = pid + "." + key
+            svalue = {
+                "preferred_id": pid,
+                "input_label": lab,
+                "represented_as": rep,
+                # mark as virtual
+                "virtual": True,
+            }
+
+            # inherit is_a if exists
+            if "is_a" in value.keys():
+                # treat as multiple inheritance
+                if isinstance(value["is_a"], list):
+                    v = list(value["is_a"])
+                    v.insert(0, key)
+                    svalue["is_a"] = v
+
+                else:
+                    svalue["is_a"] = [key, value["is_a"]]
+
+            else:
+                # set parent as is_a
+                svalue["is_a"] = key
+
+            # inherit everything except core attributes
+            for k, v in value.items():
+                if k not in [
+                    "is_a",
+                    "preferred_id",
+                    "input_label",
+                    "label_in_input",
+                    "represented_as",
+                ]:
+                    svalue[k] = v
+
+            leaves[skey] = svalue
+
+        return leaves
+
+    def _horizontal_inheritance_source(self, key, value):
+        """
+        Create virtual leaves for multiple sources.
+
+        If we create virtual leaves, input_label/label_in_input always has to be
+        a list.
+        """
+
+        leaves = {}
+
+        source = value["source"]
+        input_label = value.get("input_label") or value["label_in_input"]
+        represented_as = value["represented_as"]
+
+        # adjust lengths
+        src_l = len(source)
+
+        # adjust label length if necessary
+        if isinstance(input_label, str):
+            labels = [input_label] * src_l
+        else:
+            labels = input_label
+
+        # adjust rep length if necessary
+        if isinstance(represented_as, str):
+            reps = [represented_as] * src_l
+        else:
+            reps = represented_as
+
+        for src, lab, rep in zip(source, labels, reps):
+            skey = src + "." + key
+            svalue = {
+                "source": src,
+                "input_label": lab,
+                "represented_as": rep,
+                # mark as virtual
+                "virtual": True,
+            }
+
+            # inherit is_a if exists
+            if "is_a" in value.keys():
+                # treat as multiple inheritance
+                if isinstance(value["is_a"], list):
+                    v = list(value["is_a"])
+                    v.insert(0, key)
+                    svalue["is_a"] = v
+
+                else:
+                    svalue["is_a"] = [key, value["is_a"]]
+
+            else:
+                # set parent as is_a
+                svalue["is_a"] = key
+
+            # inherit everything except core attributes
+            for k, v in value.items():
+                if k not in [
+                    "is_a",
+                    "source",
+                    "input_label",
+                    "label_in_input",
+                    "represented_as",
+                ]:
+                    svalue[k] = v
+
+            leaves[skey] = svalue
+
+        return leaves
--- a/biocypher/_metadata.py
+++ b/biocypher/_metadata.py
@ -0,0 +1,71 @@
+#!/usr/bin/env python
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+Package metadata (version, authors, etc).
+"""
+
+__all__ = ["get_metadata"]
+
+import os
+import pathlib
+import importlib.metadata
+
+import toml
+
+_VERSION = "0.6.0"
+
+
+def get_metadata():
+    """
+    Basic package metadata.
+
+    Retrieves package metadata from the current project directory or from
+    the installed package.
+    """
+
+    here = pathlib.Path(__file__).parent
+    pyproj_toml = "pyproject.toml"
+    meta = {}
+
+    for project_dir in (here, here.parent):
+        toml_path = str(project_dir.joinpath(pyproj_toml).absolute())
+
+        if os.path.exists(toml_path):
+            pyproject = toml.load(toml_path)
+
+            meta = {
+                "name": pyproject["tool"]["poetry"]["name"],
+                "version": pyproject["tool"]["poetry"]["version"],
+                "author": pyproject["tool"]["poetry"]["authors"],
+                "license": pyproject["tool"]["poetry"]["license"],
+                "full_metadata": pyproject,
+            }
+
+            break
+
+    if not meta:
+        try:
+            meta = {
+                k.lower(): v
+                for k, v in importlib.metadata.metadata(here.name).items()
+            }
+
+        except importlib.metadata.PackageNotFoundError:
+            pass
+
+    meta["version"] = meta.get("version", None) or _VERSION
+
+    return meta
+
+
+metadata = get_metadata()
+__version__ = metadata.get("version", None)
+__author__ = metadata.get("author", None)
+__license__ = metadata.get("license", None)
--- a/biocypher/_misc.py
+++ b/biocypher/_misc.py
@ -0,0 +1,264 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+Handy functions for use in various places.
+"""
+from ._logger import logger
+
+logger.debug(f"Loading module {__name__}.")
+
+from typing import (
+    Any,
+    Union,
+    Mapping,
+    KeysView,
+    Generator,
+    ItemsView,
+    ValuesView,
+)
+from collections.abc import Iterable
+import re
+
+from treelib import Tree
+import networkx as nx
+import stringcase
+
+__all__ = ["LIST_LIKE", "SIMPLE_TYPES", "ensure_iterable", "to_list"]
+
+SIMPLE_TYPES = (
+    bytes,
+    str,
+    int,
+    float,
+    bool,
+    type(None),
+)
+
+LIST_LIKE = (
+    list,
+    set,
+    tuple,
+    Generator,
+    ItemsView,
+    KeysView,
+    Mapping,
+    ValuesView,
+)
+
+
+def to_list(value: Any) -> list:
+    """
+    Ensures that ``value`` is a list.
+    """
+
+    if isinstance(value, LIST_LIKE):
+        value = list(value)
+
+    else:
+        value = [value]
+
+    return value
+
+
+def ensure_iterable(value: Any) -> Iterable:
+    """
+    Returns iterables, except strings, wraps simple types into tuple.
+    """
+
+    return value if isinstance(value, LIST_LIKE) else (value,)
+
+
+def create_tree_visualisation(inheritance_graph: Union[dict, nx.Graph]) -> Tree:
+    """
+    Creates a visualisation of the inheritance tree using treelib.
+    """
+    inheritance_tree = _get_inheritance_tree(inheritance_graph)
+    classes, root = _find_root_node(inheritance_tree)
+
+    tree = Tree()
+    tree.create_node(root, root)
+    while classes:
+        for child in classes:
+            parent = inheritance_tree[child]
+            if parent in tree.nodes.keys() or parent == root:
+                tree.create_node(child, child, parent=parent)
+
+        for node in tree.nodes.keys():
+            if node in classes:
+                classes.remove(node)
+
+    return tree
+
+
+def _get_inheritance_tree(inheritance_graph: Union[dict, nx.Graph]) -> dict:
+    """Transforms an inheritance_graph into an inheritance_tree.
+
+    Args:
+        inheritance_graph: A dict or nx.Graph representing the inheritance graph.
+
+    Returns:
+        A dict representing the inheritance tree.
+    """
+    if isinstance(inheritance_graph, nx.Graph):
+        inheritance_tree = nx.to_dict_of_lists(inheritance_graph)
+
+        multiple_parents_present = _multiple_inheritance_present(
+            inheritance_tree
+        )
+        if multiple_parents_present:
+            logger.warning(
+                "The ontology contains multiple inheritance (one child node "
+                "has multiple parent nodes). This is not visualized in the "
+                "following hierarchy tree (the child node is only added once). "
+                "If you wish to browse all relationships of the parsed "
+                "ontologies, write a graphml file to disk using "
+                "`to_disk = <directory>` and view this file."
+            )
+
+        # unlist values
+        inheritance_tree = {k: v[0] for k, v in inheritance_tree.items() if v}
+        return inheritance_tree
+    elif not _multiple_inheritance_present(inheritance_graph):
+        return inheritance_graph
+
+
+def _multiple_inheritance_present(inheritance_tree: dict) -> bool:
+    """Checks if multiple inheritance is present in the inheritance_tree."""
+    return any(len(value) > 1 for value in inheritance_tree.values())
+
+
+def _find_root_node(inheritance_tree: dict) -> tuple[set, str]:
+    classes = set(inheritance_tree.keys())
+    parents = set(inheritance_tree.values())
+    root = list(parents - classes)
+    if len(root) > 1:
+        if "entity" in root:
+            root = "entity"  # TODO: default: good standard?
+        else:
+            raise ValueError(
+                "Inheritance tree cannot have more than one root node. "
+                f"Found {len(root)}: {root}."
+            )
+    else:
+        root = root[0]
+    if not root:
+        # find key whose value is None
+        root = list(inheritance_tree.keys())[
+            list(inheritance_tree.values()).index(None)
+        ]
+    return classes, root
+
+
+# string conversion, adapted from Biolink Model Toolkit
+lowercase_pattern = re.compile(r"[a-zA-Z]*[a-z][a-zA-Z]*")
+underscore_pattern = re.compile(r"(?<!^)(?=[A-Z][a-z])")
+
+
+def from_pascal(s: str, sep: str = " ") -> str:
+    underscored = underscore_pattern.sub(sep, s)
+    lowercased = lowercase_pattern.sub(
+        lambda match: match.group(0).lower(),
+        underscored,
+    )
+    return lowercased
+
+
+def pascalcase_to_sentencecase(s: str) -> str:
+    """
+    Convert PascalCase to sentence case.
+
+    Args:
+        s: Input string in PascalCase
+
+    Returns:
+        string in sentence case form
+    """
+    return from_pascal(s, sep=" ")
+
+
+def snakecase_to_sentencecase(s: str) -> str:
+    """
+    Convert snake_case to sentence case.
+
+    Args:
+        s: Input string in snake_case
+
+    Returns:
+        string in sentence case form
+    """
+    return stringcase.sentencecase(s).lower()
+
+
+def sentencecase_to_snakecase(s: str) -> str:
+    """
+    Convert sentence case to snake_case.
+
+    Args:
+        s: Input string in sentence case
+
+    Returns:
+        string in snake_case form
+    """
+    return stringcase.snakecase(s).lower()
+
+
+def sentencecase_to_pascalcase(s: str, sep: str = r"\s") -> str:
+    """
+    Convert sentence case to PascalCase.
+
+    Args:
+        s: Input string in sentence case
+
+    Returns:
+        string in PascalCase form
+    """
+    return re.sub(
+        r"(?:^|[" + sep + "])([a-zA-Z])",
+        lambda match: match.group(1).upper(),
+        s,
+    )
+
+
+def to_lower_sentence_case(s: str) -> str:
+    """
+    Convert any string to lower sentence case. Works with snake_case,
+    PascalCase, and sentence case.
+
+    Args:
+        s: Input string
+
+    Returns:
+        string in lower sentence case form
+    """
+    if "_" in s:
+        return snakecase_to_sentencecase(s)
+    elif " " in s:
+        return s.lower()
+    elif s[0].isupper():
+        return pascalcase_to_sentencecase(s)
+    else:
+        return s
+
+
+def is_nested(lst) -> bool:
+    """
+    Check if a list is nested.
+
+    Args:
+        lst (list): The list to check.
+
+    Returns:
+        bool: True if the list is nested, False otherwise.
+    """
+    for item in lst:
+        if isinstance(item, list):
+            return True
+    return False
--- a/biocypher/_ontology.py
+++ b/biocypher/_ontology.py
@ -0,0 +1,886 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher 'ontology' module. Contains classes and functions to handle parsing
+and representation of single ontologies as well as their hybridisation and
+other advanced operations.
+"""
+import os
+
+from ._logger import logger
+
+logger.debug(f"Loading module {__name__}.")
+
+from typing import Optional
+from datetime import datetime
+
+from rdflib import Graph
+from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
+import rdflib
+import networkx as nx
+
+from ._misc import (
+    to_list,
+    to_lower_sentence_case,
+    create_tree_visualisation,
+    sentencecase_to_pascalcase,
+)
+from ._mapping import OntologyMapping
+
+
+class OntologyAdapter:
+    """
+    Class that represents an ontology to be used in the Biocypher framework. Can
+    read from a variety of formats, including OWL, OBO, and RDF/XML. The
+    ontology is represented by a networkx.DiGraph object; an RDFlib graph is
+    also kept. By default, the DiGraph reverses the label and identifier of the
+    nodes, such that the node name in the graph is the human-readable label. The
+    edges are oriented from child to parent.
+    Labels are formatted in lower sentence case and underscores are replaced by spaces.
+    Identifiers are taken as defined and the prefixes are removed by default.
+    """
+
+    def __init__(
+        self,
+        ontology_file: str,
+        root_label: str,
+        ontology_file_format: Optional[str] = None,
+        head_join_node_label: Optional[str] = None,
+        merge_nodes: Optional[bool] = True,
+        switch_label_and_id: bool = True,
+        remove_prefixes: bool = True,
+    ):
+        """
+        Initialize the OntologyAdapter class.
+
+        Args:
+            ontology_file (str): Path to the ontology file. Can be local or
+                remote.
+
+            root_label (str): The label of the root node in the ontology. In
+                case of a tail ontology, this is the tail join node.
+
+            ontology_file_format (str): The format of the ontology file (e.g. "application/rdf+xml")
+                If format is not passed, it is determined automatically.
+
+            head_join_node_label (str): Optional variable to store the label of the
+                node in the head ontology that should be used to join to the
+                root node of the tail ontology. Defaults to None.
+
+            merge_nodes (bool): If True, head and tail join nodes will be
+                merged, using the label of the head join node. If False, the
+                tail join node will be attached as a child of the head join
+                node.
+
+            switch_label_and_id (bool): If True, the node names in the graph will be
+                the human-readable labels. If False, the node names will be the
+                identifiers. Defaults to True.
+
+            remove_prefixes (bool): If True, the prefixes of the identifiers will
+                be removed. Defaults to True.
+        """
+
+        logger.info(f"Instantiating OntologyAdapter class for {ontology_file}.")
+
+        self._ontology_file = ontology_file
+        self._root_label = root_label
+        self._format = ontology_file_format
+        self._merge_nodes = merge_nodes
+        self._head_join_node = head_join_node_label
+        self._switch_label_and_id = switch_label_and_id
+        self._remove_prefixes = remove_prefixes
+
+        self._rdf_graph = self._load_rdf_graph(ontology_file)
+
+        self._nx_graph = self._rdf_to_nx(
+            self._rdf_graph, root_label, switch_label_and_id
+        )
+
+    def _rdf_to_nx(
+        self,
+        _rdf_graph: rdflib.Graph,
+        root_label: str,
+        switch_label_and_id: bool,
+        rename_nodes: bool = True,
+    ) -> nx.DiGraph:
+        one_to_one_triples, one_to_many_dict = self._get_relevant_rdf_triples(
+            _rdf_graph
+        )
+        nx_graph = self._convert_to_nx(one_to_one_triples, one_to_many_dict)
+        nx_graph = self._add_labels_to_nodes(nx_graph, switch_label_and_id)
+        nx_graph = self._change_nodes_to_biocypher_format(
+            nx_graph, switch_label_and_id, rename_nodes
+        )
+        nx_graph = self._get_all_ancestors(
+            nx_graph, root_label, switch_label_and_id, rename_nodes
+        )
+        return nx.DiGraph(nx_graph)
+
+    def _get_relevant_rdf_triples(self, g: rdflib.Graph) -> tuple:
+        one_to_one_inheritance_graph = self._get_one_to_one_inheritance_triples(
+            g
+        )
+        intersection = self._get_multiple_inheritance_dict(g)
+        return one_to_one_inheritance_graph, intersection
+
+    def _get_one_to_one_inheritance_triples(
+        self, g: rdflib.Graph
+    ) -> rdflib.Graph:
+        """Get the one to one inheritance triples from the RDF graph.
+
+        Args:
+            g (rdflib.Graph): The RDF graph
+
+        Returns:
+            rdflib.Graph: The one to one inheritance graph
+        """
+        one_to_one_inheritance_graph = Graph()
+        for s, p, o in g.triples((None, rdflib.RDFS.subClassOf, None)):
+            if self.has_label(s, g):
+                one_to_one_inheritance_graph.add((s, p, o))
+        return one_to_one_inheritance_graph
+
+    def _get_multiple_inheritance_dict(self, g: rdflib.Graph) -> dict:
+        """Get the multiple inheritance dictionary from the RDF graph.
+
+        Args:
+            g (rdflib.Graph): The RDF graph
+
+        Returns:
+            dict: The multiple inheritance dictionary
+        """
+        multiple_inheritance = g.triples(
+            (None, rdflib.OWL.intersectionOf, None)
+        )
+        intersection = {}
+        for (
+            node,
+            has_multiple_parents,
+            first_node_of_intersection_list,
+        ) in multiple_inheritance:
+            parents = self._retrieve_rdf_linked_list(
+                first_node_of_intersection_list
+            )
+            child_name = None
+            for s_, _, _ in g.triples((None, rdflib.RDFS.subClassOf, node)):
+                child_name = s_
+
+            # Handle Snomed CT post coordinated expressions
+            if not child_name:
+                for s_, _, _ in g.triples(
+                    (None, rdflib.OWL.equivalentClass, node)
+                ):
+                    child_name = s_
+
+            if child_name:
+                intersection[node] = {
+                    "child_name": child_name,
+                    "parent_node_names": parents,
+                }
+        return intersection
+
+    def has_label(self, node: rdflib.URIRef, g: rdflib.Graph) -> bool:
+        """Does the node have a label in g?
+
+        Args:
+            node (rdflib.URIRef): The node to check
+            g (rdflib.Graph): The graph to check in
+        Returns:
+            bool: True if the node has a label, False otherwise
+        """
+        return (node, rdflib.RDFS.label, None) in g
+
+    def _retrieve_rdf_linked_list(self, subject: rdflib.URIRef) -> list:
+        """Recursively retrieves a linked list from RDF.
+        Example RDF list with the items [item1, item2]:
+        list_node - first -> item1
+        list_node - rest -> list_node2
+        list_node2 - first -> item2
+        list_node2 - rest -> nil
+        Args:
+            subject (rdflib.URIRef): One list_node of the RDF list
+        Returns:
+            list: The items of the RDF list
+        """
+        g = self._rdf_graph
+        rdf_list = []
+        for s, p, o in g.triples((subject, rdflib.RDF.first, None)):
+            rdf_list.append(o)
+        for s, p, o in g.triples((subject, rdflib.RDF.rest, None)):
+            if o != rdflib.RDF.nil:
+                rdf_list.extend(self._retrieve_rdf_linked_list(o))
+        return rdf_list
+
+    def _convert_to_nx(
+        self, one_to_one: rdflib.Graph, one_to_many: dict
+    ) -> nx.DiGraph:
+        """Convert the one to one and one to many inheritance graphs to networkx.
+
+        Args:
+            one_to_one (rdflib.Graph): The one to one inheritance graph
+            one_to_many (dict): The one to many inheritance dictionary
+
+        Returns:
+            nx.DiGraph: The networkx graph
+        """
+        nx_graph = rdflib_to_networkx_digraph(
+            one_to_one, edge_attrs=lambda s, p, o: {}, calc_weights=False
+        )
+        for key, value in one_to_many.items():
+            nx_graph.add_edges_from(
+                [
+                    (value["child_name"], parent)
+                    for parent in value["parent_node_names"]
+                ]
+            )
+            if key in nx_graph.nodes:
+                nx_graph.remove_node(key)
+        return nx_graph
+
+    def _add_labels_to_nodes(
+        self, nx_graph: nx.DiGraph, switch_label_and_id: bool
+    ) -> nx.DiGraph:
+        """Add labels to the nodes in the networkx graph.
+
+        Args:
+            nx_graph (nx.DiGraph): The networkx graph
+            switch_label_and_id (bool): If True, id and label are switched
+
+        Returns:
+            nx.DiGraph: The networkx graph with labels
+        """
+        for node in list(nx_graph.nodes):
+            nx_id, nx_label = self._get_nx_id_and_label(
+                node, switch_label_and_id
+            )
+            if nx_id == "none":
+                # remove node if it has no id
+                nx_graph.remove_node(node)
+                continue
+
+            nx_graph.nodes[node]["label"] = nx_label
+        return nx_graph
+
+    def _change_nodes_to_biocypher_format(
+        self,
+        nx_graph: nx.DiGraph,
+        switch_label_and_id: bool,
+        rename_nodes: bool = True,
+    ) -> nx.DiGraph:
+        """Change the nodes in the networkx graph to BioCypher format:
+            - remove the prefix of the identifier
+            - switch id and label
+            - adapt the labels (replace _ with space and convert to lower sentence case)
+
+        Args:
+            nx_graph (nx.DiGraph): The networkx graph
+            switch_label_and_id (bool): If True, id and label are switched
+            rename_nodes (bool): If True, the nodes are renamed
+
+        Returns:
+            nx.DiGraph: The networkx ontology graph in BioCypher format
+        """
+        mapping = {
+            node: self._get_nx_id_and_label(
+                node, switch_label_and_id, rename_nodes
+            )[0]
+            for node in nx_graph.nodes
+        }
+        renamed = nx.relabel_nodes(nx_graph, mapping, copy=False)
+        return renamed
+
+    def _get_all_ancestors(
+        self,
+        renamed: nx.DiGraph,
+        root_label: str,
+        switch_label_and_id: bool,
+        rename_nodes: bool = True,
+    ) -> nx.DiGraph:
+        """Get all ancestors of the root node in the networkx graph.
+
+        Args:
+            renamed (nx.DiGraph): The renamed networkx graph
+            root_label (str): The label of the root node in the ontology
+            switch_label_and_id (bool): If True, id and label are switched
+            rename_nodes (bool): If True, the nodes are renamed
+
+        Returns:
+            nx.DiGraph: The filtered networkx graph
+        """
+        root = self._get_nx_id_and_label(
+            self._find_root_label(self._rdf_graph, root_label),
+            switch_label_and_id,
+            rename_nodes,
+        )[0]
+        ancestors = nx.ancestors(renamed, root)
+        ancestors.add(root)
+        filtered_graph = renamed.subgraph(ancestors)
+        return filtered_graph
+
+    def _get_nx_id_and_label(
+        self, node, switch_id_and_label: bool, rename_nodes: bool = True
+    ) -> tuple[str, str]:
+        """Rename node id and label for nx graph.
+
+        Args:
+            node (str): The node to rename
+            switch_id_and_label (bool): If True, switch id and label
+
+        Returns:
+            tuple[str, str]: The renamed node id and label
+        """
+        node_id_str = self._remove_prefix(str(node))
+        node_label_str = str(self._rdf_graph.value(node, rdflib.RDFS.label))
+        if rename_nodes:
+            node_label_str = node_label_str.replace("_", " ")
+            node_label_str = to_lower_sentence_case(node_label_str)
+        nx_id = node_label_str if switch_id_and_label else node_id_str
+        nx_label = node_id_str if switch_id_and_label else node_label_str
+        return nx_id, nx_label
+
+    def _find_root_label(self, g, root_label):
+        # Loop through all labels in the ontology
+        for label_subject, _, label_in_ontology in g.triples(
+            (None, rdflib.RDFS.label, None)
+        ):
+            # If the label is the root label, set the root node to the label's subject
+            if str(label_in_ontology) == root_label:
+                root = label_subject
+                break
+        else:
+            labels_in_ontology = []
+            for label_subject, _, label_in_ontology in g.triples(
+                (None, rdflib.RDFS.label, None)
+            ):
+                labels_in_ontology.append(str(label_in_ontology))
+            raise ValueError(
+                f"Could not find root node with label '{root_label}'. "
+                f"The ontology contains the following labels: {labels_in_ontology}"
+            )
+        return root
+
+    def _remove_prefix(self, uri: str) -> str:
+        """
+        Remove the prefix of a URI. URIs can contain either "#" or "/" as a
+        separator between the prefix and the local name. The prefix is
+        everything before the last separator.
+        """
+        if self._remove_prefixes:
+            return uri.rsplit("#", 1)[-1].rsplit("/", 1)[-1]
+        else:
+            return uri
+
+    def _load_rdf_graph(self, ontology_file):
+        """
+        Load the ontology into an RDFlib graph. The ontology file can be in
+        OWL, OBO, or RDF/XML format.
+        """
+        g = rdflib.Graph()
+        g.parse(ontology_file, format=self._get_format(ontology_file))
+        return g
+
+    def _get_format(self, ontology_file):
+        """
+        Get the format of the ontology file.
+        """
+        if self._format:
+            if self._format == "owl":
+                return "application/rdf+xml"
+            elif self._format == "obo":
+                raise NotImplementedError("OBO format not yet supported")
+            elif self._format == "rdf":
+                return "application/rdf+xml"
+            elif self._format == "ttl":
+                return self._format
+            else:
+                raise ValueError(
+                    f"Could not determine format of ontology file {ontology_file}"
+                )
+
+        if ontology_file.endswith(".owl"):
+            return "application/rdf+xml"
+        elif ontology_file.endswith(".obo"):
+            raise NotImplementedError("OBO format not yet supported")
+        elif ontology_file.endswith(".rdf"):
+            return "application/rdf+xml"
+        elif ontology_file.endswith(".ttl"):
+            return "ttl"
+        else:
+            raise ValueError(
+                f"Could not determine format of ontology file {ontology_file}"
+            )
+
+    def get_nx_graph(self):
+        """
+        Get the networkx graph representing the ontology.
+        """
+        return self._nx_graph
+
+    def get_rdf_graph(self):
+        """
+        Get the RDFlib graph representing the ontology.
+        """
+        return self._rdf_graph
+
+    def get_root_node(self):
+        """
+        Get root node in the ontology.
+
+        Returns:
+            root_node: If _switch_label_and_id is True, the root node label is returned,
+                otherwise the root node id is returned.
+        """
+
+        root_node = None
+        root_label = self._root_label.replace("_", " ")
+
+        if self._switch_label_and_id:
+            root_node = to_lower_sentence_case(root_label)
+        elif not self._switch_label_and_id:
+            for node, data in self.get_nx_graph().nodes(data=True):
+                if "label" in data and data["label"] == to_lower_sentence_case(
+                    root_label
+                ):
+                    root_node = node
+                    break
+
+        return root_node
+
+    def get_ancestors(self, node_label):
+        """
+        Get the ancestors of a node in the ontology.
+        """
+        return nx.dfs_preorder_nodes(self._nx_graph, node_label)
+
+    def get_head_join_node(self):
+        """
+        Get the head join node of the ontology.
+        """
+        return self._head_join_node
+
+
+class Ontology:
+    """
+    A class that represents the ontological "backbone" of a BioCypher knowledge
+    graph. The ontology can be built from a single resource, or hybridised from
+    a combination of resources, with one resource being the "head" ontology,
+    while an arbitrary number of other resources can become "tail" ontologies at
+    arbitrary fusion points inside the "head" ontology.
+    """
+
+    def __init__(
+        self,
+        head_ontology: dict,
+        ontology_mapping: Optional["OntologyMapping"] = None,
+        tail_ontologies: Optional[dict] = None,
+    ):
+        """
+        Initialize the Ontology class.
+
+        Args:
+            head_ontology (OntologyAdapter): The head ontology.
+
+            tail_ontologies (list): A list of OntologyAdapters that will be
+                added to the head ontology. Defaults to None.
+        """
+
+        self._head_ontology_meta = head_ontology
+        self.mapping = ontology_mapping
+        self._tail_ontology_meta = tail_ontologies
+
+        self._tail_ontologies = None
+        self._nx_graph = None
+
+        # keep track of nodes that have been extended
+        self._extended_nodes = set()
+
+        self._main()
+
+    def _main(self) -> None:
+        """
+        Main method to be run on instantiation. Loads the ontologies, joins
+        them, and returns the hybrid ontology. Loads only the head ontology
+        if nothing else is given. Adds user extensions and properties from
+        the mapping.
+        """
+        self._load_ontologies()
+
+        if self._tail_ontologies:
+            for adapter in self._tail_ontologies.values():
+                head_join_node = self._get_head_join_node(adapter)
+                self._join_ontologies(adapter, head_join_node)
+        else:
+            self._nx_graph = self._head_ontology.get_nx_graph()
+
+        if self.mapping:
+            self._extend_ontology()
+
+            # experimental: add connections of disjoint classes to entity
+            # self._connect_biolink_classes()
+
+            self._add_properties()
+
+    def _load_ontologies(self) -> None:
+        """
+        For each ontology, load the OntologyAdapter object and store it as an
+        instance variable (head) or a dictionary (tail).
+        """
+
+        logger.info("Loading ontologies...")
+
+        self._head_ontology = OntologyAdapter(
+            ontology_file=self._head_ontology_meta["url"],
+            root_label=self._head_ontology_meta["root_node"],
+            ontology_file_format=self._head_ontology_meta.get("format", None),
+            switch_label_and_id=self._head_ontology_meta.get(
+                "switch_label_and_id", True
+            ),
+        )
+
+        if self._tail_ontology_meta:
+            self._tail_ontologies = {}
+            for key, value in self._tail_ontology_meta.items():
+                self._tail_ontologies[key] = OntologyAdapter(
+                    ontology_file=value["url"],
+                    root_label=value["tail_join_node"],
+                    head_join_node_label=value["head_join_node"],
+                    ontology_file_format=value.get("format", None),
+                    merge_nodes=value.get("merge_nodes", True),
+                    switch_label_and_id=value.get("switch_label_and_id", True),
+                )
+
+    def _get_head_join_node(self, adapter: OntologyAdapter) -> str:
+        """
+        Tries to find the head join node of the given ontology adapter in the
+        head ontology. If the join node is not found, the method will raise an
+        error.
+
+        Args:
+            adapter (OntologyAdapter): The ontology adapter of which to find the
+                join node in the head ontology.
+        """
+
+        head_join_node = None
+        user_defined_head_join_node_label = adapter.get_head_join_node()
+        head_join_node_label_in_bc_format = to_lower_sentence_case(
+            user_defined_head_join_node_label.replace("_", " ")
+        )
+
+        if self._head_ontology._switch_label_and_id:
+            head_join_node = head_join_node_label_in_bc_format
+        elif not self._head_ontology._switch_label_and_id:
+            for node_id, data in self._head_ontology.get_nx_graph().nodes(
+                data=True
+            ):
+                if (
+                    "label" in data
+                    and data["label"] == head_join_node_label_in_bc_format
+                ):
+                    head_join_node = node_id
+                    break
+
+        if head_join_node not in self._head_ontology.get_nx_graph().nodes:
+            head_ontology = self._head_ontology._rdf_to_nx(
+                self._head_ontology.get_rdf_graph(),
+                self._head_ontology._root_label,
+                self._head_ontology._switch_label_and_id,
+                rename_nodes=False,
+            )
+            raise ValueError(
+                f"Head join node '{head_join_node}' not found in head ontology. "
+                f"The head ontology contains the following nodes: {head_ontology.nodes}."
+            )
+        return head_join_node
+
+    def _join_ontologies(
+        self, adapter: OntologyAdapter, head_join_node
+    ) -> None:
+        """
+        Joins the ontologies by adding the tail ontology as a subgraph to the
+        head ontology at the specified join nodes.
+
+        Args:
+            adapter (OntologyAdapter): The ontology adapter of the tail ontology
+                to be added to the head ontology.
+        """
+
+        if not self._nx_graph:
+            self._nx_graph = self._head_ontology.get_nx_graph().copy()
+
+        tail_join_node = adapter.get_root_node()
+        tail_ontology = adapter.get_nx_graph()
+
+        # subtree of tail ontology at join node
+        tail_ontology_subtree = nx.dfs_tree(
+            tail_ontology.reverse(), tail_join_node
+        ).reverse()
+
+        # transfer node attributes from tail ontology to subtree
+        for node in tail_ontology_subtree.nodes:
+            tail_ontology_subtree.nodes[node].update(tail_ontology.nodes[node])
+
+        # if merge_nodes is False, create parent of tail join node from head
+        # join node
+        if not adapter._merge_nodes:
+            # add head join node from head ontology to tail ontology subtree
+            # as parent of tail join node
+            tail_ontology_subtree.add_node(
+                head_join_node,
+                **self._head_ontology.get_nx_graph().nodes[head_join_node],
+            )
+            tail_ontology_subtree.add_edge(tail_join_node, head_join_node)
+
+        # else rename tail join node to match head join node if necessary
+        elif not tail_join_node == head_join_node:
+            tail_ontology_subtree = nx.relabel_nodes(
+                tail_ontology_subtree, {tail_join_node: head_join_node}
+            )
+
+        # combine head ontology and tail subtree
+        self._nx_graph = nx.compose(self._nx_graph, tail_ontology_subtree)
+
+    def _extend_ontology(self) -> None:
+        """
+        Adds the user extensions to the ontology. Tries to find the parent in
+        the ontology, adds it if necessary, and adds the child and a directed
+        edge from child to parent. Can handle multiple parents.
+        """
+
+        if not self._nx_graph:
+            self._nx_graph = self._head_ontology.get_nx_graph().copy()
+
+        for key, value in self.mapping.extended_schema.items():
+            if not value.get("is_a"):
+                if self._nx_graph.has_node(value.get("synonym_for")):
+                    continue
+
+                if not self._nx_graph.has_node(key):
+                    raise ValueError(
+                        f"Node {key} not found in ontology, but also has no "
+                        "inheritance definition. Please check your schema for "
+                        "spelling errors, first letter not in lower case, use of underscores, a missing `is_a` definition (SubClassOf a root node), or missing labels in class or super-classes."
+                    )
+
+                continue
+
+            parents = to_list(value.get("is_a"))
+            child = key
+
+            while parents:
+                parent = parents.pop(0)
+
+                if parent not in self._nx_graph.nodes:
+                    self._nx_graph.add_node(parent)
+                    self._nx_graph.nodes[parent][
+                        "label"
+                    ] = sentencecase_to_pascalcase(parent)
+
+                    # mark parent as user extension
+                    self._nx_graph.nodes[parent]["user_extension"] = True
+                    self._extended_nodes.add(parent)
+
+                if child not in self._nx_graph.nodes:
+                    self._nx_graph.add_node(child)
+                    self._nx_graph.nodes[child][
+                        "label"
+                    ] = sentencecase_to_pascalcase(child)
+
+                    # mark child as user extension
+                    self._nx_graph.nodes[child]["user_extension"] = True
+                    self._extended_nodes.add(child)
+
+                self._nx_graph.add_edge(child, parent)
+
+                child = parent
+
+    def _connect_biolink_classes(self) -> None:
+        """
+        Experimental: Adds edges from disjoint classes to the entity node.
+        """
+
+        if not self._nx_graph:
+            self._nx_graph = self._head_ontology.get_nx_graph().copy()
+
+        if "entity" not in self._nx_graph.nodes:
+            return
+
+        # biolink classes that are disjoint from entity
+        disjoint_classes = [
+            "frequency qualifier mixin",
+            "chemical entity to entity association mixin",
+            "ontology class",
+            "relationship quantifier",
+            "physical essence or occurrent",
+            "gene or gene product",
+            "subject of investigation",
+        ]
+
+        for node in disjoint_classes:
+            if not self._nx_graph.nodes.get(node):
+                self._nx_graph.add_node(node)
+                self._nx_graph.nodes[node][
+                    "label"
+                ] = sentencecase_to_pascalcase(node)
+
+            self._nx_graph.add_edge(node, "entity")
+
+    def _add_properties(self) -> None:
+        """
+        For each entity in the mapping, update the ontology with the properties
+        specified in the mapping. Updates synonym information in the graph,
+        setting the synonym as the primary node label.
+        """
+
+        for key, value in self.mapping.extended_schema.items():
+            if key in self._nx_graph.nodes:
+                self._nx_graph.nodes[key].update(value)
+
+            if value.get("synonym_for"):
+                # change node label to synonym
+                if value["synonym_for"] not in self._nx_graph.nodes:
+                    raise ValueError(
+                        f'Node {value["synonym_for"]} not found in ontology.'
+                    )
+
+                self._nx_graph = nx.relabel_nodes(
+                    self._nx_graph, {value["synonym_for"]: key}
+                )
+
+    def get_ancestors(self, node_label: str) -> list:
+        """
+        Get the ancestors of a node in the ontology.
+
+        Args:
+            node_label (str): The label of the node in the ontology.
+
+        Returns:
+            list: A list of the ancestors of the node.
+        """
+        return nx.dfs_tree(self._nx_graph, node_label)
+
+    def show_ontology_structure(self, to_disk: str = None, full: bool = False):
+        """
+        Show the ontology structure using treelib or write to GRAPHML file.
+
+        Args:
+
+            to_disk (str): If specified, the ontology structure will be saved
+                to disk as a GRAPHML file at the location (directory) specified
+                by the `to_disk` string, to be opened in your favourite graph
+                visualisation tool.
+
+            full (bool): If True, the full ontology structure will be shown,
+                including all nodes and edges. If False, only the nodes and
+                edges that are relevant to the extended schema will be shown.
+        """
+
+        if not full and not self.mapping.extended_schema:
+            raise ValueError(
+                "You are attempting to visualise a subset of the loaded"
+                "ontology, but have not provided a schema configuration. "
+                "To display a partial ontology graph, please provide a schema "
+                "configuration file; to visualise the full graph, please use "
+                "the parameter `full = True`."
+            )
+
+        if not self._nx_graph:
+            raise ValueError("Ontology not loaded.")
+
+        if not self._tail_ontologies:
+            msg = f"Showing ontology structure based on {self._head_ontology._ontology_file}"
+
+        else:
+            msg = f"Showing ontology structure based on {len(self._tail_ontology_meta)+1} ontologies: "
+
+        logger.info(msg)
+
+        if not full:
+            # set of leaves and their intermediate parents up to the root
+            filter_nodes = set(self.mapping.extended_schema.keys())
+
+            for node in self.mapping.extended_schema.keys():
+                filter_nodes.update(self.get_ancestors(node).nodes)
+
+            # filter graph
+            G = self._nx_graph.subgraph(filter_nodes)
+
+        else:
+            G = self._nx_graph
+
+        if not to_disk:
+            # create tree
+            tree = create_tree_visualisation(G)
+
+            # add synonym information
+            for node in self.mapping.extended_schema:
+                if not isinstance(self.mapping.extended_schema[node], dict):
+                    continue
+                if self.mapping.extended_schema[node].get("synonym_for"):
+                    tree.nodes[node].tag = (
+                        f"{node} = "
+                        f"{self.mapping.extended_schema[node].get('synonym_for')}"
+                    )
+
+            logger.info(f"\n{tree}")
+
+            return tree
+
+        else:
+            # convert lists/dicts to strings for vis only
+            for node in G.nodes:
+                # rename node and use former id as label
+                label = G.nodes[node].get("label")
+
+                if not label:
+                    label = node
+
+                G = nx.relabel_nodes(G, {node: label})
+                G.nodes[label]["label"] = node
+
+                for attrib in G.nodes[label]:
+                    if type(G.nodes[label][attrib]) in [list, dict]:
+                        G.nodes[label][attrib] = str(G.nodes[label][attrib])
+
+            path = os.path.join(to_disk, "ontology_structure.graphml")
+
+            logger.info(f"Writing ontology structure to {path}.")
+
+            nx.write_graphml(G, path)
+
+            return True
+
+    def get_dict(self) -> dict:
+        """
+        Returns a dictionary compatible with a BioCypher node for compatibility
+        with the Neo4j driver.
+        """
+
+        d = {
+            "node_id": self._get_current_id(),
+            "node_label": "BioCypher",
+            "properties": {
+                "schema": "self.ontology_mapping.extended_schema",
+            },
+        }
+
+        return d
+
+    def _get_current_id(self):
+        """
+        Instantiate a version ID for the current session. For now does simple
+        versioning using datetime.
+
+        Can later implement incremental versioning, versioning from
+        config file, or manual specification via argument.
+        """
+
+        now = datetime.now()
+        return now.strftime("v%Y%m%d-%H%M%S")
--- a/biocypher/_translate.py
+++ b/biocypher/_translate.py
@ -0,0 +1,480 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher 'translation' module. Responsible for translating between the raw
+input data and the BioCypherNode and BioCypherEdge objects.
+"""
+from ._logger import logger
+
+logger.debug(f"Loading module {__name__}.")
+
+from typing import Any, Union, Optional
+from collections.abc import Iterable, Generator
+
+from more_itertools import peekable
+
+from . import _misc
+from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
+from ._ontology import Ontology
+
+__all__ = ["BiolinkAdapter", "Translator"]
+
+
+class Translator:
+    """
+    Class responsible for exacting the translation process that is configured in
+    the schema_config.yaml file. Creates a mapping dictionary from that file,
+    and, given nodes and edges, translates them into BioCypherNodes and
+    BioCypherEdges. During this process, can also filter the properties of the
+    entities if the schema_config.yaml file specifies a property whitelist or
+    blacklist.
+
+    Provides utility functions for translating between input and output labels
+    and cypher queries.
+    """
+
+    def __init__(self, ontology: "Ontology", strict_mode: bool = False):
+        """
+        Args:
+            leaves:
+                Dictionary detailing the leaves of the hierarchy
+                tree representing the structure of the graph; the leaves are
+                the entities that will be direct components of the graph,
+                while the intermediary nodes are additional labels for
+                filtering purposes.
+            strict_mode:
+                If True, the translator will raise an error if input data do not
+                carry source, licence, and version information.
+        """
+
+        self.ontology = ontology
+        self.strict_mode = strict_mode
+
+        # record nodes without biolink type configured in schema_config.yaml
+        self.notype = {}
+
+        # mapping functionality for translating terms and queries
+        self.mappings = {}
+        self.reverse_mappings = {}
+
+        self._update_ontology_types()
+
+    def translate_nodes(
+        self,
+        node_tuples: Iterable,
+    ) -> Generator[BioCypherNode, None, None]:
+        """
+        Translates input node representation to a representation that
+        conforms to the schema of the given BioCypher graph. For now
+        requires explicit statement of node type on pass.
+
+        Args:
+            node_tuples (list of tuples): collection of tuples
+                representing individual nodes by their unique id and a type
+                that is translated from the original database notation to
+                the corresponding BioCypher notation.
+
+        """
+
+        self._log_begin_translate(node_tuples, "nodes")
+
+        for _id, _type, _props in node_tuples:
+            # check for strict mode requirements
+            required_props = ["source", "licence", "version"]
+
+            if self.strict_mode:
+                # rename 'license' to 'licence' in _props
+                if _props.get("license"):
+                    _props["licence"] = _props.pop("license")
+
+                for prop in required_props:
+                    if prop not in _props:
+                        raise ValueError(
+                            f"Property `{prop}` missing from node {_id}. "
+                            "Strict mode is enabled, so this is not allowed."
+                        )
+
+            # find the node in leaves that represents ontology node type
+            _ontology_class = self._get_ontology_mapping(_type)
+
+            if _ontology_class:
+                # filter properties for those specified in schema_config if any
+                _filtered_props = self._filter_props(_ontology_class, _props)
+
+                # preferred id
+                _preferred_id = self._get_preferred_id(_ontology_class)
+
+                yield BioCypherNode(
+                    node_id=_id,
+                    node_label=_ontology_class,
+                    preferred_id=_preferred_id,
+                    properties=_filtered_props,
+                )
+
+            else:
+                self._record_no_type(_type, _id)
+
+        self._log_finish_translate("nodes")
+
+    def _get_preferred_id(self, _bl_type: str) -> str:
+        """
+        Returns the preferred id for the given Biolink type.
+        """
+
+        return (
+            self.ontology.mapping.extended_schema[_bl_type]["preferred_id"]
+            if "preferred_id"
+            in self.ontology.mapping.extended_schema.get(_bl_type, {})
+            else "id"
+        )
+
+    def _filter_props(self, bl_type: str, props: dict) -> dict:
+        """
+        Filters properties for those specified in schema_config if any.
+        """
+
+        filter_props = self.ontology.mapping.extended_schema[bl_type].get(
+            "properties", {}
+        )
+
+        # strict mode: add required properties (only if there is a whitelist)
+        if self.strict_mode and filter_props:
+            filter_props.update(
+                {"source": "str", "licence": "str", "version": "str"},
+            )
+
+        exclude_props = self.ontology.mapping.extended_schema[bl_type].get(
+            "exclude_properties", []
+        )
+
+        if isinstance(exclude_props, str):
+            exclude_props = [exclude_props]
+
+        if filter_props and exclude_props:
+            filtered_props = {
+                k: v
+                for k, v in props.items()
+                if (k in filter_props.keys() and k not in exclude_props)
+            }
+
+        elif filter_props:
+            filtered_props = {
+                k: v for k, v in props.items() if k in filter_props.keys()
+            }
+
+        elif exclude_props:
+            filtered_props = {
+                k: v for k, v in props.items() if k not in exclude_props
+            }
+
+        else:
+            return props
+
+        missing_props = [
+            k for k in filter_props.keys() if k not in filtered_props.keys()
+        ]
+        # add missing properties with default values
+        for k in missing_props:
+            filtered_props[k] = None
+
+        return filtered_props
+
+    def translate_edges(
+        self,
+        edge_tuples: Iterable,
+    ) -> Generator[Union[BioCypherEdge, BioCypherRelAsNode], None, None]:
+        """
+        Translates input edge representation to a representation that
+        conforms to the schema of the given BioCypher graph. For now
+        requires explicit statement of edge type on pass.
+
+        Args:
+
+            edge_tuples (list of tuples):
+
+                collection of tuples representing source and target of
+                an interaction via their unique ids as well as the type
+                of interaction in the original database notation, which
+                is translated to BioCypher notation using the `leaves`.
+                Can optionally possess its own ID.
+        """
+
+        self._log_begin_translate(edge_tuples, "edges")
+
+        # legacy: deal with 4-tuples (no edge id)
+        # TODO remove for performance reasons once safe
+        edge_tuples = peekable(edge_tuples)
+        if len(edge_tuples.peek()) == 4:
+            edge_tuples = [
+                (None, src, tar, typ, props)
+                for src, tar, typ, props in edge_tuples
+            ]
+
+        for _id, _src, _tar, _type, _props in edge_tuples:
+            # check for strict mode requirements
+            if self.strict_mode:
+                if not "source" in _props:
+                    raise ValueError(
+                        f"Edge {_id if _id else (_src, _tar)} does not have a `source` property.",
+                        " This is required in strict mode.",
+                    )
+                if not "licence" in _props:
+                    raise ValueError(
+                        f"Edge {_id if _id else (_src, _tar)} does not have a `licence` property.",
+                        " This is required in strict mode.",
+                    )
+
+            # match the input label (_type) to
+            # a Biolink label from schema_config
+            bl_type = self._get_ontology_mapping(_type)
+
+            if bl_type:
+                # filter properties for those specified in schema_config if any
+                _filtered_props = self._filter_props(bl_type, _props)
+
+                rep = self.ontology.mapping.extended_schema[bl_type][
+                    "represented_as"
+                ]
+
+                if rep == "node":
+                    if _id:
+                        # if it brings its own ID, use it
+                        node_id = _id
+
+                    else:
+                        # source target concat
+                        node_id = (
+                            str(_src)
+                            + "_"
+                            + str(_tar)
+                            + "_"
+                            + "_".join(str(v) for v in _filtered_props.values())
+                        )
+
+                    n = BioCypherNode(
+                        node_id=node_id,
+                        node_label=bl_type,
+                        properties=_filtered_props,
+                    )
+
+                    # directionality check TODO generalise to account for
+                    # different descriptions of directionality or find a
+                    # more consistent solution for indicating directionality
+                    if _filtered_props.get("directed") == True:
+                        l1 = "IS_SOURCE_OF"
+                        l2 = "IS_TARGET_OF"
+
+                    elif _filtered_props.get(
+                        "src_role",
+                    ) and _filtered_props.get("tar_role"):
+                        l1 = _filtered_props.get("src_role")
+                        l2 = _filtered_props.get("tar_role")
+
+                    else:
+                        l1 = l2 = "IS_PART_OF"
+
+                    e_s = BioCypherEdge(
+                        source_id=_src,
+                        target_id=node_id,
+                        relationship_label=l1,
+                        # additional here
+                    )
+
+                    e_t = BioCypherEdge(
+                        source_id=_tar,
+                        target_id=node_id,
+                        relationship_label=l2,
+                        # additional here
+                    )
+
+                    yield BioCypherRelAsNode(n, e_s, e_t)
+
+                else:
+                    edge_label = self.ontology.mapping.extended_schema[
+                        bl_type
+                    ].get("label_as_edge")
+
+                    if edge_label is None:
+                        edge_label = bl_type
+
+                    yield BioCypherEdge(
+                        relationship_id=_id,
+                        source_id=_src,
+                        target_id=_tar,
+                        relationship_label=edge_label,
+                        properties=_filtered_props,
+                    )
+
+            else:
+                self._record_no_type(_type, (_src, _tar))
+
+        self._log_finish_translate("edges")
+
+    def _record_no_type(self, _type: Any, what: Any) -> None:
+        """
+        Records the type of a node or edge that is not represented in the
+        schema_config.
+        """
+
+        logger.debug(f"No ontology type defined for `{_type}`: {what}")
+
+        if self.notype.get(_type, None):
+            self.notype[_type] += 1
+
+        else:
+            self.notype[_type] = 1
+
+    def get_missing_biolink_types(self) -> dict:
+        """
+        Returns a dictionary of types that were not represented in the
+        schema_config.
+        """
+
+        return self.notype
+
+    @staticmethod
+    def _log_begin_translate(_input: Iterable, what: str):
+        n = f"{len(_input)} " if hasattr(_input, "__len__") else ""
+
+        logger.debug(f"Translating {n}{what} to BioCypher")
+
+    @staticmethod
+    def _log_finish_translate(what: str):
+        logger.debug(f"Finished translating {what} to BioCypher.")
+
+    def _update_ontology_types(self):
+        """
+        Creates a dictionary to translate from input labels to ontology labels.
+
+        If multiple input labels, creates mapping for each.
+        """
+
+        self._ontology_mapping = {}
+
+        for key, value in self.ontology.mapping.extended_schema.items():
+            labels = value.get("input_label") or value.get("label_in_input")
+
+            if isinstance(labels, str):
+                self._ontology_mapping[labels] = key
+
+            elif isinstance(labels, list):
+                for label in labels:
+                    self._ontology_mapping[label] = key
+
+            if value.get("label_as_edge"):
+                self._add_translation_mappings(labels, value["label_as_edge"])
+
+            else:
+                self._add_translation_mappings(labels, key)
+
+    def _get_ontology_mapping(self, label: str) -> Optional[str]:
+        """
+        For each given input type ("input_label" or "label_in_input"), find the
+        corresponding ontology class in the leaves dictionary (from the
+        `schema_config.yam`).
+
+        Args:
+            label:
+                The input type to find (`input_label` or `label_in_input` in
+                `schema_config.yaml`).
+        """
+
+        # commented out until behaviour of _update_bl_types is fixed
+        return self._ontology_mapping.get(label, None)
+
+    def translate_term(self, term):
+        """
+        Translate a single term.
+        """
+
+        return self.mappings.get(term, None)
+
+    def reverse_translate_term(self, term):
+        """
+        Reverse translate a single term.
+        """
+
+        return self.reverse_mappings.get(term, None)
+
+    def translate(self, query):
+        """
+        Translate a cypher query. Only translates labels as of now.
+        """
+        for key in self.mappings:
+            query = query.replace(":" + key, ":" + self.mappings[key])
+        return query
+
+    def reverse_translate(self, query):
+        """
+        Reverse translate a cypher query. Only translates labels as of
+        now.
+        """
+        for key in self.reverse_mappings:
+            a = ":" + key + ")"
+            b = ":" + key + "]"
+            # TODO this conditional probably does not cover all cases
+            if a in query or b in query:
+                if isinstance(self.reverse_mappings[key], list):
+                    raise NotImplementedError(
+                        "Reverse translation of multiple inputs not "
+                        "implemented yet. Many-to-one mappings are "
+                        "not reversible. "
+                        f"({key} -> {self.reverse_mappings[key]})",
+                    )
+                else:
+                    query = query.replace(
+                        a,
+                        ":" + self.reverse_mappings[key] + ")",
+                    ).replace(b, ":" + self.reverse_mappings[key] + "]")
+        return query
+
+    def _add_translation_mappings(self, original_name, biocypher_name):
+        """
+        Add translation mappings for a label and name. We use here the
+        PascalCase version of the BioCypher name, since sentence case is
+        not useful for Cypher queries.
+        """
+        if isinstance(original_name, list):
+            for on in original_name:
+                self.mappings[on] = self.name_sentence_to_pascal(
+                    biocypher_name,
+                )
+        else:
+            self.mappings[original_name] = self.name_sentence_to_pascal(
+                biocypher_name,
+            )
+
+        if isinstance(biocypher_name, list):
+            for bn in biocypher_name:
+                self.reverse_mappings[
+                    self.name_sentence_to_pascal(
+                        bn,
+                    )
+                ] = original_name
+        else:
+            self.reverse_mappings[
+                self.name_sentence_to_pascal(
+                    biocypher_name,
+                )
+            ] = original_name
+
+    @staticmethod
+    def name_sentence_to_pascal(name: str) -> str:
+        """
+        Converts a name in sentence case to pascal case.
+        """
+        # split on dots if dot is present
+        if "." in name:
+            return ".".join(
+                [_misc.sentencecase_to_pascalcase(n) for n in name.split(".")],
+            )
+        else:
+            return _misc.sentencecase_to_pascalcase(name)
--- a/biocypher/output/init.py
+++ b/biocypher/output/init.py
--- a/biocypher/output/connect/init.py
+++ b/biocypher/output/connect/init.py
--- a/biocypher/output/connect/_neo4j_driver.py
+++ b/biocypher/output/connect/_neo4j_driver.py
@ -0,0 +1,422 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher 'online' mode. Handles connection and manipulation of a running DBMS.
+"""
+import subprocess
+
+from biocypher._logger import logger
+
+logger.debug(f"Loading module {__name__}.")
+
+from collections.abc import Iterable
+import itertools
+
+import neo4j_utils
+
+from biocypher import _misc
+from biocypher._config import config as _config
+from biocypher._create import BioCypherEdge, BioCypherNode
+from biocypher._translate import Translator
+
+__all__ = ["_Neo4jDriver"]
+
+
+class _Neo4jDriver:
+    """
+    Manages a BioCypher connection to a Neo4j database using the
+    ``neo4j_utils.Driver`` class.
+
+    Args:
+
+        database_name (str): The name of the database to connect to.
+
+        wipe (bool): Whether to wipe the database before importing.
+
+        uri (str): The URI of the database.
+
+        user (str): The username to use for authentication.
+
+        password (str): The password to use for authentication.
+
+        multi_db (bool): Whether to use multi-database mode.
+
+        fetch_size (int): The number of records to fetch at a time.
+
+        increment_version (bool): Whether to increment the version number.
+
+        translator (Translator): The translator to use for mapping.
+
+    """
+
+    def __init__(
+        self,
+        database_name: str,
+        uri: str,
+        user: str,
+        password: str,
+        multi_db: bool,
+        translator: Translator,
+        wipe: bool = False,
+        fetch_size: int = 1000,
+        increment_version: bool = True,
+    ):
+        self.translator = translator
+
+        self._driver = neo4j_utils.Driver(
+            db_name=database_name,
+            db_uri=uri,
+            db_user=user,
+            db_passwd=password,
+            fetch_size=fetch_size,
+            wipe=wipe,
+            multi_db=multi_db,
+            raise_errors=True,
+        )
+
+        # check for biocypher config in connected graph
+
+        if wipe:
+            self.init_db()
+
+        if increment_version:
+            # set new current version node
+            self._update_meta_graph()
+
+    def _update_meta_graph(self):
+        logger.info("Updating Neo4j meta graph.")
+
+        # find current version node
+        db_version = self._driver.query(
+            "MATCH (v:BioCypher) " "WHERE NOT (v)-[:PRECEDES]->() " "RETURN v",
+        )
+        # add version node
+        self.add_biocypher_nodes(self.translator.ontology)
+
+        # connect version node to previous
+        if db_version[0]:
+            previous = db_version[0][0]
+            previous_id = previous["v"]["id"]
+            e_meta = BioCypherEdge(
+                previous_id,
+                self.translator.ontology.get_dict().get("node_id"),
+                "PRECEDES",
+            )
+            self.add_biocypher_edges(e_meta)
+
+    def init_db(self):
+        """
+        Used to initialise a property graph database by setting up new
+        constraints. Wipe has been performed by the ``neo4j_utils.Driver``
+        class` already.
+
+        Todo:
+            - set up constraint creation interactively depending on the
+                need of the database
+        """
+
+        logger.info("Initialising database.")
+        self._create_constraints()
+
+    def _create_constraints(self):
+        """
+        Creates constraints on node types in the graph. Used for
+        initial setup.
+
+        Grabs leaves of the ``schema_config.yaml`` file and creates
+        constraints on the id of all entities represented as nodes.
+        """
+
+        logger.info("Creating constraints for node types in config.")
+
+        major_neo4j_version = int(self._get_neo4j_version().split(".")[0])
+        # get structure
+        for leaf in self.translator.ontology.mapping.extended_schema.items():
+            label = _misc.sentencecase_to_pascalcase(leaf[0], sep=r"\s\.")
+            if leaf[1]["represented_as"] == "node":
+                if major_neo4j_version >= 5:
+                    s = (
+                        f"CREATE CONSTRAINT `{label}_id` "
+                        f"IF NOT EXISTS FOR (n:`{label}`) "
+                        "REQUIRE n.id IS UNIQUE"
+                    )
+                    self._driver.query(s)
+                else:
+                    s = (
+                        f"CREATE CONSTRAINT `{label}_id` "
+                        f"IF NOT EXISTS ON (n:`{label}`) "
+                        "ASSERT n.id IS UNIQUE"
+                    )
+                    self._driver.query(s)
+
+    def _get_neo4j_version(self):
+        """Get neo4j version."""
+        try:
+            neo4j_version = self._driver.query(
+                """
+                    CALL dbms.components()
+                    YIELD name, versions, edition
+                    UNWIND versions AS version
+                    RETURN version AS version
+                """,
+            )[0][0]["version"]
+            return neo4j_version
+        except Exception as e:
+            logger.warning(
+                f"Error detecting Neo4j version: {e} use default version 4.0.0."
+            )
+            return "4.0.0"
+
+    def add_nodes(self, id_type_tuples: Iterable[tuple]) -> tuple:
+        """
+        Generic node adder method to add any kind of input to the graph via the
+        :class:`biocypher.create.BioCypherNode` class. Employs translation
+        functionality and calls the :meth:`add_biocypher_nodes()` method.
+
+        Args:
+            id_type_tuples (iterable of 3-tuple): for each node to add to
+                the biocypher graph, a 3-tuple with the following layout:
+                first, the (unique if constrained) ID of the node; second, the
+                type of the node, capitalised or PascalCase and in noun form
+                (Neo4j primary label, eg `:Protein`); and third, a dictionary
+                of arbitrary properties the node should possess (can be empty).
+
+        Returns:
+            2-tuple: the query result of :meth:`add_biocypher_nodes()`
+                - first entry: data
+                - second entry: Neo4j summary.
+        """
+
+        bn = self.translator.translate_nodes(id_type_tuples)
+        return self.add_biocypher_nodes(bn)
+
+    def add_edges(self, id_src_tar_type_tuples: Iterable[tuple]) -> tuple:
+        """
+        Generic edge adder method to add any kind of input to the graph
+        via the :class:`biocypher.create.BioCypherEdge` class. Employs
+        translation functionality and calls the
+        :meth:`add_biocypher_edges()` method.
+
+        Args:
+
+            id_src_tar_type_tuples (iterable of 5-tuple):
+
+                for each edge to add to the biocypher graph, a 5-tuple
+                with the following layout: first, the optional unique ID
+                of the interaction. This can be `None` if there is no
+                systematic identifier (which for many interactions is
+                the case). Second and third, the (unique if constrained)
+                IDs of the source and target nodes of the relationship;
+                fourth, the type of the relationship; and fifth, a
+                dictionary of arbitrary properties the edge should
+                possess (can be empty).
+
+        Returns:
+
+            2-tuple: the query result of :meth:`add_biocypher_edges()`
+
+                - first entry: data
+                - second entry: Neo4j summary.
+        """
+
+        bn = self.translator.translate_edges(id_src_tar_type_tuples)
+        return self.add_biocypher_edges(bn)
+
+    def add_biocypher_nodes(
+        self,
+        nodes: Iterable[BioCypherNode],
+        explain: bool = False,
+        profile: bool = False,
+    ) -> bool:
+        """
+        Accepts a node type handoff class
+        (:class:`biocypher.create.BioCypherNode`) with id,
+        label, and a dict of properties (passing on the type of
+        property, ie, ``int``, ``str``, ...).
+
+        The dict retrieved by the
+        :meth:`biocypher.create.BioCypherNode.get_dict()` method is
+        passed into Neo4j as a map of maps, explicitly encoding node id
+        and label, and adding all other properties from the 'properties'
+        key of the dict. The merge is performed via APOC, matching only
+        on node id to prevent duplicates. The same properties are set on
+        match and on create, irrespective of the actual event.
+
+        Args:
+            nodes:
+                An iterable of :class:`biocypher.create.BioCypherNode` objects.
+            explain:
+                Call ``EXPLAIN`` on the CYPHER query.
+            profile:
+                Do profiling on the CYPHER query.
+
+        Returns:
+            True for success, False otherwise.
+        """
+
+        try:
+            nodes = _misc.to_list(nodes)
+
+            entities = [node.get_dict() for node in nodes]
+
+        except AttributeError:
+            msg = "Nodes must have a `get_dict` method."
+            logger.error(msg)
+
+            raise ValueError(msg)
+
+        logger.info(f"Merging {len(entities)} nodes.")
+
+        entity_query = (
+            "UNWIND $entities AS ent "
+            "CALL apoc.merge.node([ent.node_label], "
+            "{id: ent.node_id}, ent.properties, ent.properties) "
+            "YIELD node "
+            "RETURN node"
+        )
+
+        method = "explain" if explain else "profile" if profile else "query"
+
+        result = getattr(self._driver, method)(
+            entity_query,
+            parameters={
+                "entities": entities,
+            },
+        )
+
+        logger.info("Finished merging nodes.")
+
+        return result
+
+    def add_biocypher_edges(
+        self,
+        edges: Iterable[BioCypherEdge],
+        explain: bool = False,
+        profile: bool = False,
+    ) -> bool:
+        """
+        Accepts an edge type handoff class
+        (:class:`biocypher.create.BioCypherEdge`) with source
+        and target ids, label, and a dict of properties (passing on the
+        type of property, ie, int, string ...).
+
+        The individual edge is either passed as a singleton, in the case
+        of representation as an edge in the graph, or as a 4-tuple, in
+        the case of representation as a node (with two edges connecting
+        to interaction partners).
+
+        The dict retrieved by the
+        :meth:`biocypher.create.BioCypherEdge.get_dict()` method is
+        passed into Neo4j as a map of maps, explicitly encoding source
+        and target ids and the relationship label, and adding all edge
+        properties from the 'properties' key of the dict. The merge is
+        performed via APOC, matching only on source and target id to
+        prevent duplicates. The same properties are set on match and on
+        create, irrespective of the actual event.
+
+        Args:
+            edges:
+                An iterable of :class:`biocypher.create.BioCypherEdge` objects.
+            explain:
+                Call ``EXPLAIN`` on the CYPHER query.
+            profile:
+                Do profiling on the CYPHER query.
+
+        Returns:
+            `True` for success, `False` otherwise.
+        """
+
+        edges = _misc.ensure_iterable(edges)
+        edges = itertools.chain(*(_misc.ensure_iterable(i) for i in edges))
+
+        nodes = []
+        rels = []
+
+        try:
+            for e in edges:
+                if hasattr(e, "get_node"):
+                    nodes.append(e.get_node())
+                    rels.append(e.get_source_edge().get_dict())
+                    rels.append(e.get_target_edge().get_dict())
+
+                else:
+                    rels.append(e.get_dict())
+
+        except AttributeError:
+            msg = "Edges and nodes must have a `get_dict` method."
+            logger.error(msg)
+
+            raise ValueError(msg)
+
+        self.add_biocypher_nodes(nodes)
+        logger.info(f"Merging {len(rels)} edges.")
+
+        # cypher query
+
+        # merging only on the ids of the entities, passing the
+        # properties on match and on create;
+        # TODO add node labels?
+        node_query = (
+            "UNWIND $rels AS r "
+            "MERGE (src {id: r.source_id}) "
+            "MERGE (tar {id: r.target_id}) "
+        )
+
+        self._driver.query(node_query, parameters={"rels": rels})
+
+        edge_query = (
+            "UNWIND $rels AS r "
+            "MATCH (src {id: r.source_id}) "
+            "MATCH (tar {id: r.target_id}) "
+            "WITH src, tar, r "
+            "CALL apoc.merge.relationship"
+            "(src, r.relationship_label, NULL, "
+            "r.properties, tar, r.properties) "
+            "YIELD rel "
+            "RETURN rel"
+        )
+
+        method = "explain" if explain else "profile" if profile else "query"
+
+        result = getattr(self._driver, method)(
+            edge_query, parameters={"rels": rels}
+        )
+
+        logger.info("Finished merging edges.")
+
+        return result
+
+
+def get_driver(
+    dbms: str,
+    translator: "Translator",
+):
+    """
+    Function to return the writer class.
+
+    Returns:
+        class: the writer class
+    """
+
+    dbms_config = _config(dbms)
+
+    if dbms == "neo4j":
+        return _Neo4jDriver(
+            database_name=dbms_config["database_name"],
+            wipe=dbms_config["wipe"],
+            uri=dbms_config["uri"],
+            user=dbms_config["user"],
+            password=dbms_config["password"],
+            multi_db=dbms_config["multi_db"],
+            translator=translator,
+        )
+
+    return None
--- a/biocypher/output/in_memory/init.py
+++ b/biocypher/output/in_memory/init.py
--- a/biocypher/output/in_memory/_pandas.py
+++ b/biocypher/output/in_memory/_pandas.py
@ -0,0 +1,90 @@
+import pandas as pd
+
+from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
+
+
+class Pandas:
+    def __init__(self, translator, deduplicator):
+        self.translator = translator
+        self.deduplicator = deduplicator
+
+        self.dfs = {}
+
+    def _separate_entity_types(self, entities):
+        """
+        Given mixed iterable of BioCypher objects, separate them into lists by
+        type. Also deduplicates using the `Deduplicator` instance.
+        """
+        lists = {}
+        for entity in entities:
+            if (
+                not isinstance(entity, BioCypherNode)
+                and not isinstance(entity, BioCypherEdge)
+                and not isinstance(entity, BioCypherRelAsNode)
+            ):
+                raise TypeError(
+                    "Expected a BioCypherNode / BioCypherEdge / "
+                    f"BioCypherRelAsNode, got {type(entity)}."
+                )
+
+            if isinstance(entity, BioCypherNode):
+                seen = self.deduplicator.node_seen(entity)
+            elif isinstance(entity, BioCypherEdge):
+                seen = self.deduplicator.edge_seen(entity)
+            elif isinstance(entity, BioCypherRelAsNode):
+                seen = self.deduplicator.rel_as_node_seen(entity)
+
+            if seen:
+                continue
+
+            if isinstance(entity, BioCypherRelAsNode):
+                node = entity.get_node()
+                source_edge = entity.get_source_edge()
+                target_edge = entity.get_target_edge()
+
+                _type = node.get_type()
+                if not _type in lists:
+                    lists[_type] = []
+                lists[_type].append(node)
+
+                _source_type = source_edge.get_type()
+                if not _source_type in lists:
+                    lists[_source_type] = []
+                lists[_source_type].append(source_edge)
+
+                _target_type = target_edge.get_type()
+                if not _target_type in lists:
+                    lists[_target_type] = []
+                lists[_target_type].append(target_edge)
+                continue
+
+            _type = entity.get_type()
+            if not _type in lists:
+                lists[_type] = []
+            lists[_type].append(entity)
+
+        return lists
+
+    def add_tables(self, entities):
+        """
+        Add Pandas dataframes for each node and edge type in the input.
+        """
+
+        lists = self._separate_entity_types(entities)
+
+        for _type, _entities in lists.items():
+            self._add_entity_df(_type, _entities)
+
+    def _add_entity_df(self, _type, _entities):
+        df = pd.DataFrame(
+            pd.json_normalize([node.get_dict() for node in _entities])
+        )
+        # replace "properties." with "" in column names
+        df.columns = [col.replace("properties.", "") for col in df.columns]
+        if _type not in self.dfs:
+            self.dfs[_type] = df
+        else:
+            self.dfs[_type] = pd.concat(
+                [self.dfs[_type], df], ignore_index=True
+            )
+        return self.dfs[_type]
--- a/biocypher/output/write/init.py
+++ b/biocypher/output/write/init.py
--- a/biocypher/output/write/_batch_writer.py
+++ b/biocypher/output/write/_batch_writer.py
--- a/biocypher/output/write/_get_writer.py
+++ b/biocypher/output/write/_get_writer.py
@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 Michael Hartung
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher 'offline' module. Handles the writing of node and edge representations
+suitable for import into a DBMS.
+"""
+
+from biocypher._logger import logger
+from biocypher.output.write.graph._rdf import _RDFWriter
+from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
+from biocypher.output.write.graph._arangodb import _ArangoDBBatchWriter
+from biocypher.output.write.graph._networkx import _NetworkXWriter
+from biocypher.output.write.relational._csv import _PandasCSVWriter
+from biocypher.output.write.relational._sqlite import _SQLiteBatchWriter
+from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
+
+logger.debug(f"Loading module {__name__}.")
+
+from typing import TYPE_CHECKING
+
+from biocypher._config import config as _config
+
+__all__ = ["get_writer", "DBMS_TO_CLASS"]
+
+if TYPE_CHECKING:
+    from biocypher._translate import Translator
+    from biocypher._deduplicate import Deduplicator
+
+DBMS_TO_CLASS = {
+    "neo": _Neo4jBatchWriter,
+    "neo4j": _Neo4jBatchWriter,
+    "Neo4j": _Neo4jBatchWriter,
+    "postgres": _PostgreSQLBatchWriter,
+    "postgresql": _PostgreSQLBatchWriter,
+    "PostgreSQL": _PostgreSQLBatchWriter,
+    "arango": _ArangoDBBatchWriter,
+    "arangodb": _ArangoDBBatchWriter,
+    "ArangoDB": _ArangoDBBatchWriter,
+    "sqlite": _SQLiteBatchWriter,
+    "sqlite3": _SQLiteBatchWriter,
+    "rdf": _RDFWriter,
+    "RDF": _RDFWriter,
+    "csv": _PandasCSVWriter,
+    "CSV": _PandasCSVWriter,
+    "pandas": _PandasCSVWriter,
+    "Pandas": _PandasCSVWriter,
+    "networkx": _NetworkXWriter,
+    "NetworkX": _NetworkXWriter,
+}
+
+
+def get_writer(
+    dbms: str,
+    translator: "Translator",
+    deduplicator: "Deduplicator",
+    output_directory: str,
+    strict_mode: bool,
+):
+    """
+    Function to return the writer class based on the selection in the config
+    file.
+
+    Args:
+        dbms: the database management system; for options, see DBMS_TO_CLASS.
+        translator: the Translator object.
+        deduplicator: the Deduplicator object.
+        output_directory: the directory to output.write the output files to.
+        strict_mode: whether to use strict mode.
+
+    Returns:
+        instance: an instance of the selected writer class.
+    """
+
+    dbms_config = _config(dbms)
+
+    writer = DBMS_TO_CLASS[dbms]
+
+    if not writer:
+        raise ValueError(f"Unknown dbms: {dbms}")
+
+    if writer is not None:
+        return writer(
+            translator=translator,
+            deduplicator=deduplicator,
+            delimiter=dbms_config.get("delimiter"),
+            array_delimiter=dbms_config.get("array_delimiter"),
+            quote=dbms_config.get("quote_character"),
+            output_directory=output_directory,
+            db_name=dbms_config.get("database_name"),
+            import_call_bin_prefix=dbms_config.get("import_call_bin_prefix"),
+            import_call_file_prefix=dbms_config.get("import_call_file_prefix"),
+            wipe=dbms_config.get("wipe"),
+            strict_mode=strict_mode,
+            skip_bad_relationships=dbms_config.get(
+                "skip_bad_relationships"
+            ),  # neo4j
+            skip_duplicate_nodes=dbms_config.get(
+                "skip_duplicate_nodes"
+            ),  # neo4j
+            db_user=dbms_config.get("user"),  # psql
+            db_password=dbms_config.get("password"),  # psql
+            db_port=dbms_config.get("port"),  # psql
+            rdf_format=dbms_config.get("rdf_format"),  # rdf
+            rdf_namespaces=dbms_config.get("rdf_namespaces"),  # rdf
+        )
--- a/biocypher/output/write/_writer.py
+++ b/biocypher/output/write/_writer.py
@ -0,0 +1,200 @@
+from abc import ABC, abstractmethod
+from typing import Union, Optional
+from collections.abc import Iterable
+import os
+
+from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
+from biocypher._logger import logger
+from biocypher._translate import Translator
+from biocypher._deduplicate import Deduplicator
+
+__all__ = ["_Writer"]
+
+
+class _Writer(ABC):
+    """Abstract class for writing node and edge representations to disk.
+    Specifics of the different writers (e.g. neo4j, postgresql, csv, etc.)
+    are implemented in the child classes. Any concrete writer needs to
+    implement at least:
+    - _write_node_data
+    - _write_edge_data
+    - _construct_import_call
+    - _get_import_script_name
+
+    Args:
+        translator (Translator): Instance of :py:class:`Translator` to enable translation of
+            nodes and manipulation of properties.
+        deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
+            of nodes and edges.
+        output_directory (str, optional): Path for exporting CSV files. Defaults to None.
+        strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
+    strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
+
+    Raises:
+        NotImplementedError: Writer implementation must override '_write_node_data'
+        NotImplementedError: Writer implementation must override '_write_edge_data'
+        NotImplementedError: Writer implementation must override '_construct_import_call'
+        NotImplementedError: Writer implementation must override '_get_import_script_name'
+    """
+
+    def __init__(
+        self,
+        translator: Translator,
+        deduplicator: Deduplicator,
+        output_directory: Optional[str] = None,
+        strict_mode: bool = False,
+        *args,
+        **kwargs,
+    ):
+        """Abstract class for writing node and edge representations to disk.
+
+        Args:
+            translator (Translator): Instance of :py:class:`Translator` to enable translation of
+                nodes and manipulation of properties.
+            deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
+                of nodes and edges.
+            output_directory (str, optional): Path for exporting CSV files. Defaults to None.
+            strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
+        strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
+        """
+        self.translator = translator
+        self.deduplicator = deduplicator
+        self.strict_mode = strict_mode
+        self.output_directory = output_directory
+
+        if os.path.exists(self.output_directory):
+            if kwargs.get("write_to_file", True):
+                logger.warning(
+                    f"Output directory `{self.output_directory}` already exists. "
+                    "If this is not planned, file consistency may be compromised."
+                )
+        else:
+            logger.info(f"Creating output directory `{self.output_directory}`.")
+            os.makedirs(self.output_directory)
+
+    @abstractmethod
+    def _write_node_data(
+        self,
+        nodes: Iterable[
+            Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
+        ],
+    ) -> bool:
+        """Implement how to output.write nodes to disk.
+
+        Args:
+            nodes (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        raise NotImplementedError(
+            "Writer implementation must override 'write_nodes'"
+        )
+
+    @abstractmethod
+    def _write_edge_data(
+        self,
+        edges: Iterable[
+            Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
+        ],
+    ) -> bool:
+        """Implement how to output.write edges to disk.
+
+        Args:
+            edges (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        raise NotImplementedError(
+            "Writer implementation must override 'write_edges'"
+        )
+
+    @abstractmethod
+    def _construct_import_call(self) -> str:
+        """
+        Function to construct the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name. Built after all data has been
+        processed to ensure that nodes are called before any edges.
+
+        Returns:
+            str: command for importing the output files into a DBMS.
+        """
+        raise NotImplementedError(
+            "Writer implementation must override '_construct_import_call'"
+        )
+
+    @abstractmethod
+    def _get_import_script_name(self) -> str:
+        """Returns the name of the import script.
+
+        Returns:
+            str: The name of the import script (ending in .sh)
+        """
+        raise NotImplementedError(
+            "Writer implementation must override '_get_import_script_name'"
+        )
+
+    def write_nodes(
+        self, nodes, batch_size: int = int(1e6), force: bool = False
+    ):
+        """Wrapper for writing nodes.
+
+        Args:
+            nodes (BioCypherNode): a list or generator of nodes in
+                :py:class:`BioCypherNode` format
+            batch_size (int): The batch size for writing nodes.
+            force (bool): Whether to force writing nodes even if their type is
+                not present in the schema.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        passed = self._write_node_data(nodes)
+        if not passed:
+            logger.error("Error while writing node data.")
+            return False
+        return True
+
+    def write_edges(
+        self, edges, batch_size: int = int(1e6), force: bool = False
+    ):
+        """Wrapper for writing edges.
+
+        Args:
+            nodes (BioCypherNode): a list or generator of nodes in
+                :py:class:`BioCypherNode` format
+            batch_size (int): The batch size for writing nodes.
+            force (bool): Whether to force writing nodes even if their type is
+                not present in the schema.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        passed = self._write_edge_data(edges)
+        if not passed:
+            logger.error("Error while writing edge data.")
+            return False
+        return True
+
+    def write_import_call(self):
+        """
+        Function to output.write the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name, to the export folder as txt.
+
+        Returns:
+            str: The path of the file holding the import call.
+        """
+        file_path = os.path.join(
+            self.output_directory, self._get_import_script_name()
+        )
+        logger.info(
+            f"Writing {self.__class__.__name__} import call to `{file_path}`."
+        )
+
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.write(self._construct_import_call())
+
+        return file_path
--- a/biocypher/output/write/graph/init.py
+++ b/biocypher/output/write/graph/init.py
--- a/biocypher/output/write/graph/_arangodb.py
+++ b/biocypher/output/write/graph/_arangodb.py
@ -0,0 +1,241 @@
+import os
+
+from biocypher._logger import logger
+from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
+
+
+class _ArangoDBBatchWriter(_Neo4jBatchWriter):
+    """
+    Class for writing node and edge representations to disk using the format
+    specified by ArangoDB for the use of "arangoimport". Output files are
+    similar to Neo4j, but with a different header format.
+    """
+
+    def _get_default_import_call_bin_prefix(self):
+        """
+        Method to provide the default string for the import call bin prefix.
+
+        Returns:
+            str: The default location for the neo4j admin import location
+        """
+        return ""
+
+    def _get_import_script_name(self) -> str:
+        """
+        Returns the name of the neo4j admin import script
+
+        Returns:
+            str: The name of the import script (ending in .sh)
+        """
+        return "arangodb-import-call.sh"
+
+    def _write_node_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as a node as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of node.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.node_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.node_property_dict.items():
+            # create header CSV with ID, properties, labels
+
+            _id = "_key"
+
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(label)
+
+            header = f"{pascal_label}-header.csv"
+            header_path = os.path.join(
+                self.outdir,
+                header,
+            )
+
+            # check if file already exists
+            if os.path.exists(header_path):
+                logger.warning(
+                    f"File {header_path} already exists. Overwriting."
+                )
+
+            # concatenate key:value in props
+            props_list = []
+            for k in props.keys():
+                props_list.append(f"{k}")
+
+            # create list of lists and flatten
+            # removes need for empty check of property list
+            out_list = [[_id], props_list]
+            out_list = [val for sublist in out_list for val in sublist]
+
+            with open(header_path, "w", encoding="utf-8") as f:
+                # concatenate with delimiter
+                row = self.delim.join(out_list)
+                f.write(row)
+
+            # add collection from schema config
+            collection = self.translator.ontology.mapping.extended_schema[
+                label
+            ].get("db_collection_name", None)
+
+            # add file path to neo4 admin import statement
+            # do once for each part file
+            parts = self.parts.get(label, [])
+
+            if not parts:
+                raise ValueError(
+                    f"No parts found for node label {label}. "
+                    f"Check that the data was parsed first.",
+                )
+
+            for part in parts:
+                import_call_header_path = os.path.join(
+                    self.import_call_file_prefix,
+                    header,
+                )
+                import_call_parts_path = os.path.join(
+                    self.import_call_file_prefix,
+                    part,
+                )
+
+                self.import_call_nodes.add(
+                    (
+                        import_call_header_path,
+                        import_call_parts_path,
+                        collection,
+                    )
+                )
+
+        return True
+
+    def _write_edge_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as an edge as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of edge.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.edge_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.edge_property_dict.items():
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(label)
+
+            # paths
+            header = f"{pascal_label}-header.csv"
+            header_path = os.path.join(
+                self.outdir,
+                header,
+            )
+            parts = f"{pascal_label}-part.*"
+
+            # check for file exists
+            if os.path.exists(header_path):
+                logger.warning(
+                    f"Header file {header_path} already exists. Overwriting."
+                )
+
+            # concatenate key:value in props
+            props_list = []
+            for k in props.keys():
+                props_list.append(f"{k}")
+
+            out_list = ["_from", "_key", *props_list, "_to"]
+
+            with open(header_path, "w", encoding="utf-8") as f:
+                # concatenate with delimiter
+                row = self.delim.join(out_list)
+                f.write(row)
+
+            # add collection from schema config
+            if not self.translator.ontology.mapping.extended_schema.get(label):
+                for (
+                    _,
+                    v,
+                ) in self.translator.ontology.mapping.extended_schema.items():
+                    if v.get("label_as_edge") == label:
+                        collection = v.get("db_collection_name", None)
+                        break
+
+            else:
+                collection = self.translator.ontology.mapping.extended_schema[
+                    label
+                ].get("db_collection_name", None)
+
+            # add file path to neo4 admin import statement (import call path
+            # may be different from actual output path)
+            header_import_call_path = os.path.join(
+                self.import_call_file_prefix,
+                header,
+            )
+            parts_import_call_path = os.path.join(
+                self.import_call_file_prefix,
+                parts,
+            )
+            self.import_call_edges.add(
+                (
+                    header_import_call_path,
+                    parts_import_call_path,
+                    collection,
+                )
+            )
+
+        return True
+
+    def _construct_import_call(self) -> str:
+        """
+        Function to construct the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name. Built after all data has been
+        processed to ensure that nodes are called before any edges.
+
+        Returns:
+            str: a bash command for neo4j-admin import
+        """
+        import_call = (
+            f"{self.import_call_bin_prefix}arangoimp "
+            f"--type csv "
+            f'--separator="{self.escaped_delim}" '
+        )
+
+        if self.quote == "'":
+            import_call += f'--quote="{self.quote}" '
+        else:
+            import_call += f"--quote='{self.quote}' "
+
+        node_lines = ""
+
+        # node import calls: one line per node type
+        for header_path, parts_path, collection in self.import_call_nodes:
+            line = (
+                f"{import_call} "
+                f"--headers-file {header_path} "
+                f"--file= {parts_path} "
+            )
+
+            if collection:
+                line += f"--create-collection --collection {collection} "
+
+            node_lines += f"{line}\n"
+
+        edge_lines = ""
+
+        # edge import calls: one line per edge type
+        for header_path, parts_path, collection in self.import_call_edges:
+            import_call += f'--relationships="{header_path},{parts_path}" '
+
+        return node_lines + edge_lines
--- a/biocypher/output/write/graph/_neo4j.py
+++ b/biocypher/output/write/graph/_neo4j.py
@ -0,0 +1,502 @@
+import os
+import glob
+import pandas as pd
+
+from biocypher._logger import logger
+from biocypher.output.write._batch_writer import parse_label, _BatchWriter
+
+
+class _Neo4jBatchWriter(_BatchWriter):
+    """
+    Class for writing node and edge representations to disk using the
+    format specified by Neo4j for the use of admin import. Each batch
+    writer instance has a fixed representation that needs to be passed
+    at instantiation via the :py:attr:`schema` argument. The instance
+    also expects an ontology adapter via :py:attr:`ontology_adapter` to be able
+    to convert and extend the hierarchy.
+
+    This class inherits from the abstract class "_BatchWriter" and implements the
+    Neo4j-specific methods:
+
+        - _write_node_headers
+        - _write_edge_headers
+        - _construct_import_call
+        - _write_array_string
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor.
+
+        Check the version of Neo4j and adds a command scope if version >= 5.
+
+        Returns:
+            _Neo4jBatchWriter: An instance of the writer.
+        """
+
+        # Should read the configuration and setup import_call_bin_prefix.
+        super().__init__(*args, **kwargs)
+
+    def _get_default_import_call_bin_prefix(self):
+        """
+        Method to provide the default string for the import call bin prefix.
+
+        Returns:
+            str: The default location for the neo4j admin import location
+        """
+
+        return "bin/"
+
+    def _write_array_string(self, string_list):
+        """
+        Abstract method to output.write the string representation of an array into a .csv file
+        as required by the neo4j admin-import.
+
+        Args:
+            string_list (list): list of ontology strings
+
+        Returns:
+            str: The string representation of an array for the neo4j admin import
+        """
+        string = self.adelim.join(string_list)
+        return f"{self.quote}{string}{self.quote}"
+
+    def _write_node_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as a node as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of node.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.node_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.node_property_dict.items():
+            _id = ":ID"
+
+            ##MeDaX dev remark:
+            ##From Fhir data we get case sensitive labels. E.g. 'Procedure' and 'procedure' are two distinct node types.
+            ##Because we are converting Resources to more specific node classes using their "resourceType" attribute.
+            
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(
+                parse_label(label)
+            )
+
+            header = f"{pascal_label}-header.csv"
+            header_path = os.path.join(
+                self.outdir,
+                header,
+            )
+            parts = f"{pascal_label}-part.*"
+
+            existing_header = False
+            # check if file already exists
+            if os.path.exists(header_path):
+                logger.warning(
+                    f"Header file `{header_path}` already exists. Overwriting.",
+                )
+                with open(header_path, "r", encoding="utf-8") as existing:
+                    existing_header = existing.read().strip().split(self.delim)
+
+            # concatenate key:value in props
+            props_list = []
+            for k, v in props.items():
+                if v in ["int", "long", "integer"]:
+                    props_list.append(f"{k}:long")
+                elif v in ["int[]", "long[]", "integer[]"]:
+                    props_list.append(f"{k}:long[]")
+                elif v in ["float", "double", "dbl"]:
+                    props_list.append(f"{k}:double")
+                elif v in ["float[]", "double[]"]:
+                    props_list.append(f"{k}:double[]")
+                elif v in ["bool", "boolean"]:
+                    # TODO Neo4j boolean support / spelling?
+                    props_list.append(f"{k}:boolean")
+                elif v in ["bool[]", "boolean[]"]:
+                    props_list.append(f"{k}:boolean[]")
+                elif v in ["str[]", "string[]"]:
+                    props_list.append(f"{k}:string[]")
+                else:
+                    props_list.append(f"{k}")
+
+            # create list of lists and flatten
+            out_list = [[_id], props_list, [":LABEL"]]
+            out_list = [val for sublist in out_list for val in sublist]
+
+
+
+
+
+            with open(header_path, "w", encoding="utf-8") as f:
+                # Check if header file already exists and has different columns
+                if os.path.exists(header_path):
+                    if existing_header:
+                        #existing_header = existing.read().strip().split(self.delim)
+                        # Compare existing and new headers
+                        if set(existing_header) != set(out_list):
+                            
+                            # Get part files associated with this header
+                            base_name = os.path.basename(header_path).replace("-header.csv", "")
+                            part_files = glob.glob(os.path.join(os.path.dirname(header_path), f"{base_name}-part*.csv"))
+                            
+                            
+                            # Find the highest numbered part file without full sorting
+                            highest_part = None
+                            highest_number = -1
+
+                            for part_file in part_files:
+                                try:
+                                    # Extract number from filename (assuming format like "part123.csv")
+                                    file_name = os.path.basename(part_file)
+                                    number_part = file_name.split("part")[1].split(".")[0]
+                                    number = int(number_part)
+                                    
+                                    if number > highest_number:
+                                        highest_number = number
+                                        highest_part = part_file
+                                except (IndexError, ValueError):
+                                    # Skip files that don't match the expected pattern
+                                    continue
+                            # Update each part file with the new columns
+                            for part_file in part_files:
+                                if part_file == highest_part:
+                                    print(f"Skipping the highest part file: {highest_part}")
+                                    continue
+                                try:
+                                    #print("exi: ", existing_header)
+                                    #print("out: ", out_list)
+                                    df = self.adapt_csv_to_new_header(existing_header, out_list, part_file)
+                                    # Read the file without headers
+                                    
+                                    # Write back to file WITHOUT including the header
+                                    df.to_csv(part_file, sep=self.delim, index=False, header=False)
+                                    print(f"Updated {part_file} with new columns in correct positions")
+                                except Exception as e:
+                                    print(f"Error updating {part_file}: {e}")
+                
+                # Write the new header
+                row = self.delim.join(out_list)
+                f.write(row)
+
+
+            # add file path to neo4 admin import statement (import call file
+            # path may be different from actual file path)
+            import_call_header_path = os.path.join(
+                self.import_call_file_prefix,
+                header,
+            )
+            import_call_parts_path = os.path.join(
+                self.import_call_file_prefix,
+                parts,
+            )
+            self.import_call_nodes.add(
+                (import_call_header_path, import_call_parts_path)
+            )
+
+        return True
+
+    def _write_edge_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as an edge as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of edge.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.edge_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.edge_property_dict.items():
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(
+                parse_label(label)
+            )
+
+            # paths
+            header = f"{pascal_label}-header.csv"
+            header_path = os.path.join(
+                self.outdir,
+                header,
+            )
+            parts = f"{pascal_label}-part.*"
+
+            # check for file exists
+            if os.path.exists(header_path):
+                logger.warning(
+                    f"File {header_path} already exists. Overwriting."
+                )
+
+            # concatenate key:value in props
+            props_list = []
+            for k, v in props.items():
+                if v in ["int", "long", "integer"]:
+                    props_list.append(f"{k}:long")
+                elif v in ["int[]", "long[]", "integer[]"]:
+                    props_list.append(f"{k}:long[]")
+                elif v in ["float", "double"]:
+                    props_list.append(f"{k}:double")
+                elif v in ["float[]", "double[]"]:
+                    props_list.append(f"{k}:double[]")
+                elif v in [
+                    "bool",
+                    "boolean",
+                ]:  # TODO does Neo4j support bool?
+                    props_list.append(f"{k}:boolean")
+                elif v in ["bool[]", "boolean[]"]:
+                    props_list.append(f"{k}:boolean[]")
+                elif v in ["str[]", "string[]"]:
+                    props_list.append(f"{k}:string[]")
+                else:
+                    props_list.append(f"{k}")
+
+            skip_id = False
+            schema_label = None
+
+            if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
+                skip_id = True
+            elif not self.translator.ontology.mapping.extended_schema.get(
+                label
+            ):
+                # find label in schema by label_as_edge
+                for (
+                    k,
+                    v,
+                ) in self.translator.ontology.mapping.extended_schema.items():
+                    if v.get("label_as_edge") == label:
+                        schema_label = k
+                        break
+            else:
+                schema_label = label
+
+            out_list = [":START_ID"]
+
+            if schema_label:
+                if (
+                    self.translator.ontology.mapping.extended_schema.get(
+                        schema_label
+                    ).get("use_id")
+                    == False
+                ):
+                    skip_id = True
+
+            if not skip_id:
+                out_list.append("id")
+
+            out_list.extend(props_list)
+            out_list.extend([":END_ID", ":TYPE"])
+
+            existing_header = False
+            # check if file already exists
+            if os.path.exists(header_path):
+                logger.warning(
+                    f"Header file `{header_path}` already exists. Overwriting.",
+                )
+                with open(header_path, "r", encoding="utf-8") as existing:
+                    existing_header = existing.read().strip().split(self.delim)
+
+
+            with open(header_path, "w", encoding="utf-8") as f:
+                # Check if header file already exists and has different columns
+                if os.path.exists(header_path):
+                    if existing_header:
+                        #existing_header = existing.read().strip().split(self.delim)
+                        # Compare existing and new headers
+                        if set(existing_header) != set(out_list):
+                            
+                            # Get part files associated with this header
+                            base_name = os.path.basename(header_path).replace("-header.csv", "")
+                            part_files = glob.glob(os.path.join(os.path.dirname(header_path), f"{base_name}-part*.csv"))
+                            
+                            
+                            # Find the highest numbered part file without full sorting
+                            highest_part = None
+                            highest_number = -1
+
+                            for part_file in part_files:
+                                try:
+                                    # Extract number from filename (assuming format like "part123.csv")
+                                    file_name = os.path.basename(part_file)
+                                    number_part = file_name.split("part")[1].split(".")[0]
+                                    number = int(number_part)
+                                    
+                                    if number > highest_number:
+                                        highest_number = number
+                                        highest_part = part_file
+                                except (IndexError, ValueError):
+                                    # Skip files that don't match the expected pattern
+                                    continue
+                            # Update each part file with the new columns
+                            for part_file in part_files:
+                                if part_file == highest_part:
+                                    print(f"Skipping the highest part file: {highest_part}")
+                                    continue
+                                try:
+                                    print("exi: ", existing_header)
+                                    print("out: ", out_list)
+                                    df = self.adapt_csv_to_new_header(existing_header, out_list, part_file)
+                                    # Read the file without headers
+                                    
+                                    # Write back to file WITHOUT including the header
+                                    df.to_csv(part_file, sep=self.delim, index=False, header=False)
+                                    print(f"Updated {part_file} with new columns in correct positions")
+                                except Exception as e:
+                                    print(f"Error updating {part_file}: {e}")
+                
+                # Write the new header
+                row = self.delim.join(out_list)
+                f.write(row)
+
+            # add file path to neo4 admin import statement (import call file
+            # path may be different from actual file path)
+            import_call_header_path = os.path.join(
+                self.import_call_file_prefix,
+                header,
+            )
+            import_call_parts_path = os.path.join(
+                self.import_call_file_prefix,
+                parts,
+            )
+            self.import_call_edges.add(
+                (import_call_header_path, import_call_parts_path)
+            )
+
+        return True
+
+    def _get_import_script_name(self) -> str:
+        """
+        Returns the name of the neo4j admin import script
+
+        Returns:
+            str: The name of the import script (ending in .sh)
+        """
+        return "neo4j-admin-import-call.sh"
+
+    def _construct_import_call(self) -> str:
+        """
+        Function to construct the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name. Built after all data has been
+        processed to ensure that nodes are called before any edges.
+
+        Returns:
+            str: a bash command for neo4j-admin import
+        """
+        import_call_neo4j_v4 = self._get_import_call(
+            "import", "--database=", "--force="
+        )
+        import_call_neo4j_v5 = self._get_import_call(
+            "database import full", "", "--overwrite-destination="
+        )
+        neo4j_version_check = f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"
+
+        import_script = f"#!/bin/bash\n{neo4j_version_check}\nif [[ $version -ge 5 ]]; then\n\t{import_call_neo4j_v5}\nelse\n\t{import_call_neo4j_v4}\nfi"
+        return import_script
+
+    def _get_import_call(
+        self, import_cmd: str, database_cmd: str, wipe_cmd: str
+    ) -> str:
+        """Get parametrized import call for Neo4j 4 or 5+.
+
+        Args:
+            import_cmd (str): The import command to use.
+            database_cmd (str): The database command to use.
+            wipe_cmd (str): The wipe command to use.
+
+        Returns:
+            str: The import call.
+        """
+        import_call = f"{self.import_call_bin_prefix}neo4j-admin {import_cmd} "
+
+        import_call += f"{database_cmd}{self.db_name} "
+
+        import_call += f'--delimiter="{self.escaped_delim}" '
+
+        import_call += f'--array-delimiter="{self.escaped_adelim}" '
+
+        if self.quote == "'":
+            import_call += f'--quote="{self.quote}" '
+        else:
+            import_call += f"--quote='{self.quote}' "
+
+        if self.wipe:
+            import_call += f"{wipe_cmd}true "
+        if self.skip_bad_relationships:
+            import_call += "--skip-bad-relationships=true "
+        if self.skip_duplicate_nodes:
+            import_call += "--skip-duplicate-nodes=true "
+
+        # append node import calls
+        for header_path, parts_path in self.import_call_nodes:
+            import_call += f'--nodes="{header_path},{parts_path}" '
+
+        # append edge import calls
+        for header_path, parts_path in self.import_call_edges:
+            import_call += f'--relationships="{header_path},{parts_path}" '
+
+        return import_call
+
+
+
+
+    def adapt_csv_to_new_header(self, old_header, new_header, csv_file_path):
+        """
+        Adapt a CSV table to a new header structure, placing new columns in their correct positions.
+        
+        Parameters:
+        old_header (list): The original header columns
+        new_header (list): The new header columns
+        csv_file_path (str): Path to the CSV file
+        
+        Returns:
+        pandas.DataFrame: CSV data with the new header structure
+        """
+        
+        # Step 1: Read the CSV data without headers
+        df = pd.read_csv(csv_file_path, sep=self.delim, header=None)
+        
+        # Step 2: If the file is empty, return empty DataFrame with new headers
+        if df.empty:
+            return pd.DataFrame(columns=new_header)
+        
+        # Step 3: If column count doesn't match old_header length, handle the mismatch
+        if len(df.columns) != len(old_header):
+            print(f"Warning: CSV columns count ({len(df.columns)}) doesn't match the provided old header count ({len(old_header)})")
+            # If file has fewer columns than old_header, pad with NaN
+            if len(df.columns) < len(old_header):
+                for i in range(len(df.columns), len(old_header)):
+                    df[i] = None
+            # If file has more columns than old_header, truncate
+            else:
+                df = df.iloc[:, :len(old_header)]
+        
+        # Step 4: Assign old header names to the dataframe
+        df.columns = old_header
+        
+        # Step 5: Create a new DataFrame with the correct structure
+        new_df = pd.DataFrame(columns=new_header)
+        
+        # Step 6: For each column in the new header, find its position in the old header
+        for new_col_idx, new_col in enumerate(new_header):
+            if new_col in old_header:
+                # If column exists in old header, copy data
+                new_df[new_col] = df[new_col]
+            else:
+                # If new column, add empty column
+                new_df[new_col] = None
+        
+        # Step 7: Ensure columns are in the exact order of new_header
+        new_df = new_df[new_header]
+        
+        return new_df
--- a/biocypher/output/write/graph/_networkx.py
+++ b/biocypher/output/write/graph/_networkx.py
@ -0,0 +1,76 @@
+import pickle
+
+import networkx as nx
+
+from biocypher._logger import logger
+from biocypher.output.write._writer import _Writer
+from biocypher.output.write.relational._csv import _PandasCSVWriter
+
+
+class _NetworkXWriter(_Writer):
+    """
+    Class for writing node and edges to a networkx DiGraph.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.csv_writer = _PandasCSVWriter(*args, write_to_file=False, **kwargs)
+        self.G = nx.DiGraph()
+
+    def _construct_import_call(self) -> str:
+        """Function to construct the Python code to load all node and edge csv files again into Pandas dfs.
+
+        Returns:
+            str: Python code to load the csv files into Pandas dfs.
+        """
+        logger.info(
+            f"Writing networkx {self.G} to pickle file networkx_graph.pkl."
+        )
+        with open(f"{self.output_directory}/networkx_graph.pkl", "wb") as f:
+            pickle.dump(self.G, f)
+
+        import_call = "import pickle\n"
+        import_call += "with open('./networkx_graph.pkl', 'rb') as f:\n\tG_loaded = pickle.load(f)"
+        return import_call
+
+    def _get_import_script_name(self) -> str:
+        """Function to return the name of the import script."""
+        return "import_networkx.py"
+
+    def _write_node_data(self, nodes) -> bool:
+        passed = self.csv_writer._write_entities_to_file(nodes)
+        self.add_to_networkx()
+        return passed
+
+    def _write_edge_data(self, edges) -> bool:
+        passed = self.csv_writer._write_entities_to_file(edges)
+        self.add_to_networkx()
+        return passed
+
+    def add_to_networkx(self) -> bool:
+        all_dfs = self.csv_writer.stored_dfs
+        node_dfs = [
+            df
+            for df in all_dfs.values()
+            if df.columns.str.contains("node_id").any()
+        ]
+        edge_dfs = [
+            df
+            for df in all_dfs.values()
+            if df.columns.str.contains("source_id").any()
+            and df.columns.str.contains("target_id").any()
+        ]
+        for df in node_dfs:
+            nodes = df.set_index("node_id").to_dict(orient="index")
+            self.G.add_nodes_from(nodes.items())
+        for df in edge_dfs:
+            edges = df.set_index(["source_id", "target_id"]).to_dict(
+                orient="index"
+            )
+            self.G.add_edges_from(
+                (
+                    (source, target, attrs)
+                    for (source, target), attrs in edges.items()
+                )
+            )
+        return True
--- a/biocypher/output/write/graph/_rdf.py
+++ b/biocypher/output/write/graph/_rdf.py
@ -0,0 +1,515 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s):  Loes van den Biggelaar
+#                  Sebastian Lobentanzer
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher 'offline' module. Handles the writing of node and edge representations
+suitable for import into a DBMS.
+"""
+from types import GeneratorType
+from typing import Union
+import os
+
+from rdflib import DC, RDF, RDFS, SKOS, DCTERMS, Graph, Literal, Namespace
+from rdflib.namespace import (
+    _NAMESPACE_PREFIXES_CORE,
+    _NAMESPACE_PREFIXES_RDFLIB,
+)
+
+from biocypher._create import BioCypherEdge, BioCypherNode
+from biocypher._logger import logger
+from biocypher.output.write._batch_writer import _BatchWriter
+
+
+class _RDFWriter(_BatchWriter):
+    """
+    Class to write BioCypher's property graph into an RDF format using
+    rdflib and all the extensions it supports (RDF/XML, N3, NTriples,
+    N-Quads, Turtle, TriX, Trig and JSON-LD). By default the conversion
+    is done keeping only the minimum information about node and edges,
+    skipping all properties.
+    """
+
+    def _get_import_script_name(self) -> str:
+        """
+        Returns the name of the RDF admin import script.
+        This function applicable for RDF export.
+
+        Returns:
+            str: The name of the import script (ending in .sh)
+        """
+        return "rdf-import-call.sh"
+
+    def _get_default_import_call_bin_prefix(self):
+        """
+        Method to provide the default string for the import call bin prefix.
+
+        Returns:
+            str: The default location for the RDF admin import location
+        """
+        return "bin/"
+
+    def _is_rdf_format_supported(self, rdf_format: str) -> bool:
+        """
+        Function to check if the specified RDF format is supported.
+
+        Args:
+            rdf_format (str): The RDF format to check.
+
+        Returns:
+            bool: Returns True if rdf format supported, False otherwise.
+        """
+        supported_formats = [
+            "xml",
+            "n3",
+            "turtle",
+            "nt",
+            "pretty-xml",
+            "trix",
+            "trig",
+            "nquads",
+            "json-ld",
+        ]
+        if rdf_format not in supported_formats:
+            logger.error(
+                f"{rdf_format}; Incorrect or unsupported RDF format, use one of the following: "
+                f'"xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig", "nquads", "json-ld" ',
+            )
+            return False
+        else:
+            # RDF graph does not support 'ttl' format, only 'turtle' format. however, the preferred file extension is always '.ttl'
+            if self.rdf_format == "turtle":
+                self.extension = "ttl"
+            elif self.rdf_format == "ttl":
+                self.rdf_format = "turtle"
+                self.extension = "ttl"
+            else:
+                self.extension = self.rdf_format
+            return True
+
+    def _write_single_edge_list_to_file(
+        self,
+        edge_list: list,
+        label: str,
+        prop_dict: dict,
+    ):
+        """
+        This function takes one list of biocypher edges and writes them
+        to an RDF file with the given format.
+
+        Args:
+            edge_list (list): list of BioCypherEdges to be written
+
+            label (str): the label (type) of the edge
+
+            prop_dict (dict): properties of node class passed from parsing
+                function and their types
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+
+        if not all(isinstance(n, BioCypherEdge) for n in edge_list):
+            logger.error("Edges must be passed as type BioCypherEdge.")
+            return False
+
+        # translate label to PascalCase
+        label_pascal = self.translator.name_sentence_to_pascal(label)
+
+        # create file name
+        file_name = os.path.join(
+            self.outdir, f"{label_pascal}.{self.extension}"
+        )
+
+        # write data in graph
+        graph = Graph()
+        self._init_namespaces(graph)
+
+        for edge in edge_list:
+            rdf_subject = edge.get_source_id()
+            rdf_object = edge.get_target_id()
+            rdf_predicate = edge.get_id()
+            rdf_properties = edge.get_properties()
+            if rdf_predicate == None:
+                rdf_predicate = rdf_subject + rdf_object
+
+            edge_label = self.translator.name_sentence_to_pascal(
+                edge.get_label()
+            )
+            edge_uri = self.rdf_namespaces["biocypher"][edge_label]
+            graph.add((edge_uri, RDF.type, RDFS.Class))
+            graph.add(
+                (
+                    self.rdf_namespaces["biocypher"][rdf_predicate],
+                    RDF.type,
+                    edge_uri,
+                )
+            )
+            graph.add(
+                (
+                    self.rdf_namespaces["biocypher"][rdf_predicate],
+                    self.rdf_namespaces["biocypher"]["subject"],
+                    self.subject_to_uri(rdf_subject),
+                )
+            )
+            graph.add(
+                (
+                    self.rdf_namespaces["biocypher"][rdf_predicate],
+                    self.rdf_namespaces["biocypher"]["object"],
+                    self.subject_to_uri(rdf_object),
+                )
+            )
+
+            # add properties to the transformed edge --> node
+            for key, value in rdf_properties.items():
+                # only write value if it exists.
+                if value:
+                    self.add_property_to_graph(graph, rdf_predicate, value, key)
+
+        graph.serialize(destination=file_name, format=self.rdf_format)
+
+        logger.info(
+            f"Writing {len(edge_list)} entries to {label_pascal}.{self.rdf_format}",
+        )
+
+        return True
+
+    def add_property_to_graph(
+        self,
+        graph: Graph,
+        rdf_subject: str,
+        rdf_object: str,
+        rdf_predicate: str,
+    ):
+        """
+        Function to add the properties to an RDF node. It takes the graph, the subject, object, and predicate of the RDF triple.
+        It checks if the property is a list and adds it to the graph accordingly. otherwise it checks if the string represents a list.
+        If it does, it transforms it to a list and adds it to the graph. if not, it adds the property to the graph as a literal.
+        If the property is neither a list or string, it will also be added as a literal.
+
+        Args:
+            graph (RDFLib.Graph): The RDF graph to add the nodes to.
+
+            rdf_subject (str): The subject of the RDF triple.
+
+            rdf_object (str): The object of the RDF triple.
+
+            rdf_predicate (str): The predicate of the RDF triple.
+
+        Returns:
+            None
+        """
+        if isinstance(rdf_object, list):
+            for obj in rdf_object:
+                graph.add(
+                    (
+                        self.subject_to_uri(rdf_subject),
+                        self.property_to_uri(rdf_predicate),
+                        Literal(obj),
+                    )
+                )
+        elif isinstance(rdf_object, str):
+            if rdf_object.startswith("[") and rdf_object.endswith("]"):
+                self.add_property_to_graph(
+                    graph,
+                    rdf_subject,
+                    self.transform_string_to_list(rdf_object),
+                    rdf_predicate,
+                )
+            else:
+                graph.add(
+                    (
+                        self.subject_to_uri(rdf_subject),
+                        self.property_to_uri(rdf_predicate),
+                        Literal(rdf_object),
+                    )
+                )
+        else:
+            graph.add(
+                (
+                    self.subject_to_uri(rdf_subject),
+                    self.property_to_uri(rdf_predicate),
+                    Literal(rdf_object),
+                )
+            )
+
+    def transform_string_to_list(self, string_list: str) -> list:
+        """
+        Function to transform a string representation of a list into a list.
+
+        Args:
+            string_list (str): The string representation of the list.
+
+        Returns:
+            list: The list representation of the input string.
+        """
+        return (
+            string_list.replace("[", "")
+            .replace("]", "")
+            .replace("'", "")
+            .split(", ")
+        )
+
+    def _write_single_node_list_to_file(
+        self,
+        node_list: list,
+        label: str,
+        prop_dict: dict,
+        labels: str,
+    ):
+        """
+        This function takes a list of BioCypherNodes and writes them
+        to an RDF file in the specified format.
+
+        Args:
+            node_list (list): A list of BioCypherNodes to be written.
+
+            label (str): The label (type) of the nodes.
+
+            prop_dict (dict): A dictionary of properties and their types for the node class.
+
+        Returns:
+            bool: True if the writing is successful, False otherwise.
+        """
+        if not all(isinstance(n, BioCypherNode) for n in node_list):
+            logger.error("Nodes must be passed as type BioCypherNode.")
+            return False
+
+        # translate label to PascalCase
+        label_pascal = self.translator.name_sentence_to_pascal(label)
+
+        # create file name
+        file_name = os.path.join(
+            self.outdir, f"{label_pascal}.{self.extension}"
+        )
+
+        # write data in graph
+        graph = Graph()
+        self._init_namespaces(graph)
+
+        for n in node_list:
+            rdf_subject = n.get_id()
+            rdf_object = n.get_label()
+            properties = n.get_properties()
+            class_name = self.translator.name_sentence_to_pascal(rdf_object)
+            graph.add(
+                (
+                    self.rdf_namespaces["biocypher"][class_name],
+                    RDF.type,
+                    RDFS.Class,
+                )
+            )
+            graph.add(
+                (
+                    self.subject_to_uri(rdf_subject),
+                    RDF.type,
+                    self.rdf_namespaces["biocypher"][class_name],
+                )
+            )
+            for key, value in properties.items():
+                # only write value if it exists.
+                if value:
+                    self.add_property_to_graph(graph, rdf_subject, value, key)
+
+        graph.serialize(destination=file_name, format=self.rdf_format)
+
+        logger.info(
+            f"Writing {len(node_list)} entries to {label_pascal}.{self.rdf_format}",
+        )
+
+        return True
+
+    def write_nodes(
+        self, nodes, batch_size: int = int(1e6), force: bool = False
+    ) -> bool:
+        """
+        Wrapper for writing nodes in RDF format. It calls the _write_node_data() function, specifying the node data.
+
+        Args:
+            nodes (list or generator): A list or generator of nodes in BioCypherNode format.
+            batch_size (int): The number of nodes to write in each batch.
+            force (bool): Flag to force the writing even if the output file already exists.
+
+        Returns:
+            bool: True if the writing is successful, False otherwise.
+        """
+        # check if specified output format is correct
+        passed = self._is_rdf_format_supported(self.rdf_format)
+        if not passed:
+            logger.error("Error while writing node data, wrong RDF format")
+            return False
+        # write node data using _write_node_data method
+        passed = self._write_node_data(nodes, batch_size, force)
+        if not passed:
+            logger.error("Error while writing node data.")
+            return False
+        return True
+
+    def write_edges(
+        self,
+        edges: Union[list, GeneratorType],
+        batch_size: int = int(1e6),
+    ) -> bool:
+        """
+        Wrapper for writing edges in RDF format. It calls _write_edge_data()
+        functions specifying it's edge data.
+
+        Args:
+            edges (BioCypherEdge): a list or generator of edges in
+                :py:class:`BioCypherEdge` format
+            batch_size (int): The number of edges to write in each batch.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # check if specified output format is correct
+        passed = self._is_rdf_format_supported(self.rdf_format)
+        if not passed:
+            logger.error("Error while writing edge data, wrong RDF format")
+            return False
+        # write edge data using _write_edge_data method
+        passed = self._write_edge_data(edges, batch_size=batch_size)
+        if not passed:
+            logger.error("Error while writing edge data.")
+            return False
+
+        return True
+
+    def _construct_import_call(self) -> bool:
+        """
+        Function to write the import call.
+        This function is not applicable for RDF.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        return ""
+
+    def _write_array_string(self, string_list):
+        """
+        Abstract method to write the string representation of an array into a .csv file
+        as required by the RDF admin-import.
+        This function is not applicable for RDF.
+
+        Args:
+            string_list (list): list of ontology strings
+
+        Returns:
+            str: The string representation of an array for the neo4j admin import
+        """
+
+        return True
+
+    def _write_node_headers(self):
+        """
+        Abstract method that takes care of importing properties of a graph entity that is represented
+        as a node as per the definition in the `schema_config.yaml`
+        This function is not applicable for RDF.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        return True
+
+    def _write_edge_headers(self):
+        """
+        Abstract method to write a database import-file for a graph entity that is represented
+        as an edge as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of edge.
+        This function is not applicable for RDF.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        return True
+
+    def subject_to_uri(self, subject: str) -> str:
+        """
+        Converts the subject to a proper URI using the available namespaces.
+        If the conversion fails, it defaults to the biocypher prefix.
+
+        Args:
+            subject (str): The subject to be converted to a URI.
+
+        Returns:
+            str: The corresponding URI for the subject.
+        """
+        try:
+            _pref, _id = subject.split(":")
+
+            if _pref in self.rdf_namespaces.keys():
+                return self.rdf_namespaces[_pref][_id]
+            else:
+                return self.rdf_namespaces["biocypher"][subject]
+        except ValueError:
+            return self.rdf_namespaces["biocypher"][subject]
+
+    def property_to_uri(self, property_name: str) -> dict[str, str]:
+        """
+        Converts a property name to its corresponding URI.
+
+        This function takes a property name and searches for its corresponding URI in various namespaces.
+        It first checks the core namespaces for rdflib, including owl, rdf, rdfs, xsd, and xml.
+
+        Args:
+            property_name (str): The property name to be converted to a URI.
+
+        Returns:
+            str: The corresponding URI for the input property name.
+        """
+        # These namespaces are core for rdflib; owl, rdf, rdfs, xsd and xml
+        for namespace in _NAMESPACE_PREFIXES_CORE.values():
+            if property_name in namespace:
+                return namespace[property_name]
+
+        # If the property name is not found in the core namespaces, search in the SKOS, DC, and DCTERMS namespaces
+        for namespace in [SKOS, DC, DCTERMS]:
+            if property_name in namespace:
+                return namespace[property_name]
+
+        # If the property name is still not found, try other namespaces from rdflib.
+        for namespace in _NAMESPACE_PREFIXES_RDFLIB.values():
+            if property_name in namespace:
+                return namespace[property_name]
+
+        # If the property name is "licence", it recursively calls the function with "license" as the input.
+        if property_name == "licence":
+            return self.property_to_uri("license")
+
+        # TODO: add an option to search trough manually implemented namespaces
+
+        # If the input is not found in any of the namespaces, it returns the corresponding URI from the biocypher namespace.
+        # TODO: give a warning and try to prevent this option altogether
+        return self.rdf_namespaces["biocypher"][property_name]
+
+    def _init_namespaces(self, graph: Graph):
+        """
+        Initializes the namespaces for the RDF graph. These namespaces are used to convert nodes to URIs.
+
+        This function adds the biocypher standard namespace to the `rdf_namespaces` attribute of the class.
+        If `rdf_namespaces` is empty, it sets it to the biocypher standard namespace. Otherwise, it merges
+        the biocypher standard namespace with the namespaces defined in the biocypher_config.yaml.
+
+        Args:
+            graph (RDFLib.Graph): The RDF graph to bind the namespaces to.
+
+        Returns:
+            None
+        """
+        # add biocypher standard to self.rdf_namespaces
+        biocypher_standard = {"biocypher": "https://biocypher.org/biocypher#"}
+        if not self.rdf_namespaces:
+            self.rdf_namespaces = biocypher_standard
+        else:
+            self.rdf_namespaces = self.rdf_namespaces | biocypher_standard
+
+        for key, value in self.rdf_namespaces.items():
+            namespace = Namespace(value)
+            self.rdf_namespaces[key] = namespace
+            graph.bind(key, namespace)
--- a/biocypher/output/write/relational/init.py
+++ b/biocypher/output/write/relational/init.py
--- a/biocypher/output/write/relational/_csv.py
+++ b/biocypher/output/write/relational/_csv.py
@ -0,0 +1,76 @@
+from more_itertools import peekable
+
+from biocypher._logger import logger
+from biocypher.output.write._writer import _Writer
+from biocypher.output.in_memory._pandas import Pandas
+
+
+class _PandasCSVWriter(_Writer):
+    """
+    Class for writing node and edge representations to a CSV file.
+    """
+
+    def __init__(self, *args, write_to_file: bool = True, **kwargs):
+        kwargs["write_to_file"] = write_to_file
+        super().__init__(*args, **kwargs)
+        self.in_memory_dfs = {}
+        self.stored_dfs = {}
+        self.pandas_in_memory = Pandas(
+            translator=self.translator,
+            deduplicator=self.deduplicator,
+        )
+        self.delimiter = kwargs.get("delimiter")
+        if not self.delimiter:
+            self.delimiter = ","
+        self.write_to_file = write_to_file
+
+    def _construct_import_call(self) -> str:
+        """Function to construct the Python code to load all node and edge csv files again into Pandas dfs.
+
+        Returns:
+            str: Python code to load the csv files into Pandas dfs.
+        """
+        import_call = "import pandas as pd\n\n"
+        for df_name in self.stored_dfs.keys():
+            import_call += f"{df_name} = pd.read_csv('./{df_name}.csv', header=0, index_col=0)\n"
+        return import_call
+
+    def _get_import_script_name(self) -> str:
+        """Function to return the name of the import script."""
+        return "import_pandas_csv.py"
+
+    def _write_node_data(self, nodes) -> bool:
+        passed = self._write_entities_to_file(nodes)
+        return passed
+
+    def _write_edge_data(self, edges) -> bool:
+        passed = self._write_entities_to_file(edges)
+        return passed
+
+    def _write_entities_to_file(self, entities: iter) -> bool:
+        """Function to output.write the entities to a CSV file.
+
+        Args:
+            entities (iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
+        """
+        entities = peekable(entities)
+        entity_list = self.pandas_in_memory._separate_entity_types(entities)
+        for entity_type, entities in entity_list.items():
+            self.in_memory_dfs[
+                entity_type
+            ] = self.pandas_in_memory._add_entity_df(entity_type, entities)
+        for entity_type in self.in_memory_dfs.keys():
+            entity_df = self.in_memory_dfs[entity_type]
+            if " " in entity_type or "." in entity_type:
+                entity_type = entity_type.replace(" ", "_").replace(".", "_")
+            if self.write_to_file:
+                logger.info(
+                    f"Writing {entity_df.shape[0]} entries to {entity_type}.csv."
+                )
+                entity_df.to_csv(
+                    f"{self.output_directory}/{entity_type}.csv",
+                    sep=self.delimiter,
+                )
+            self.stored_dfs[entity_type] = entity_df
+        self.in_memory_dfs = {}
+        return True
--- a/biocypher/output/write/relational/_postgresql.py
+++ b/biocypher/output/write/relational/_postgresql.py
@ -0,0 +1,320 @@
+import os
+import glob
+
+from biocypher._logger import logger
+from biocypher.output.write._batch_writer import _BatchWriter
+
+
+class _PostgreSQLBatchWriter(_BatchWriter):
+    """
+    Class for writing node and edge representations to disk using the
+    format specified by PostgreSQL for the use of "COPY FROM...". Each batch
+    writer instance has a fixed representation that needs to be passed
+    at instantiation via the :py:attr:`schema` argument. The instance
+    also expects an ontology adapter via :py:attr:`ontology_adapter` to be able
+    to convert and extend the hierarchy.
+
+    This class inherits from the abstract class "_BatchWriter" and implements the
+    PostgreSQL-specific methods:
+
+        - _write_node_headers
+        - _write_edge_headers
+        - _construct_import_call
+        - _write_array_string
+    """
+
+    DATA_TYPE_LOOKUP = {
+        "str": "VARCHAR",  # VARCHAR needs limit
+        "int": "INTEGER",
+        "long": "BIGINT",
+        "float": "NUMERIC",
+        "double": "NUMERIC",
+        "dbl": "NUMERIC",
+        "boolean": "BOOLEAN",
+        "str[]": "VARCHAR[]",
+        "string[]": "VARCHAR[]",
+    }
+
+    def __init__(self, *args, **kwargs):
+        self._copy_from_csv_commands = set()
+        super().__init__(*args, **kwargs)
+
+    def _get_default_import_call_bin_prefix(self):
+        """
+        Method to provide the default string for the import call bin prefix.
+
+        Returns:
+            str: The default location for the psql command
+        """
+        return ""
+
+    def _get_data_type(self, string) -> str:
+        try:
+            return self.DATA_TYPE_LOOKUP[string]
+        except KeyError:
+            logger.info(
+                'Could not determine data type {string}. Using default "VARCHAR"'
+            )
+            return "VARCHAR"
+
+    def _write_array_string(self, string_list) -> str:
+        """
+        Abstract method to output.write the string representation of an array into a .csv file
+        as required by the postgresql COPY command, with '{','}' brackets and ',' separation.
+
+        Args:
+            string_list (list): list of ontology strings
+
+        Returns:
+            str: The string representation of an array for postgres COPY
+        """
+        string = ",".join(string_list)
+        string = f'"{{{string}}}"'
+        return string
+
+    def _get_import_script_name(self) -> str:
+        """
+        Returns the name of the psql import script
+
+        Returns:
+            str: The name of the import script (ending in .sh)
+        """
+        return f"{self.db_name}-import-call.sh"
+
+    def _adjust_pascal_to_psql(self, string):
+        string = string.replace(".", "_")
+        string = string.lower()
+        return string
+
+    def _write_node_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as a node as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of node.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.node_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.node_property_dict.items():
+            # create header CSV with ID, properties, labels
+
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(label)
+
+            parts = f"{pascal_label}-part*.csv"
+            parts_paths = os.path.join(self.outdir, parts)
+            parts_paths = glob.glob(parts_paths)
+            parts_paths.sort()
+
+            # adjust label for import to psql
+            pascal_label = self._adjust_pascal_to_psql(pascal_label)
+            table_create_command_path = os.path.join(
+                self.outdir,
+                f"{pascal_label}-create_table.sql",
+            )
+
+            # check if file already exists
+            if os.path.exists(table_create_command_path):
+                logger.warning(
+                    f"File {table_create_command_path} already exists. Overwriting.",
+                )
+
+            # concatenate key:value in props
+            columns = ["_ID VARCHAR"]
+            for col_name, col_type in props.items():
+                col_type = self._get_data_type(col_type)
+                col_name = self._adjust_pascal_to_psql(col_name)
+                columns.append(f"{col_name} {col_type}")
+            columns.append("_LABEL VARCHAR[]")
+
+            with open(table_create_command_path, "w", encoding="utf-8") as f:
+                command = ""
+                if self.wipe:
+                    command += f"DROP TABLE IF EXISTS {pascal_label};\n"
+
+                # table creation requires comma separation
+                command += (
+                    f'CREATE TABLE {pascal_label}({",".join(columns)});\n'
+                )
+                f.write(command)
+
+                for parts_path in parts_paths:
+                    # if import_call_file_prefix is set, replace actual path
+                    # with prefix
+                    if self.import_call_file_prefix != self.outdir:
+                        parts_path = parts_path.replace(
+                            self.outdir,
+                            self.import_call_file_prefix,
+                        )
+
+                    self._copy_from_csv_commands.add(
+                        f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;"
+                    )
+
+            # add file path to import statement
+            # if import_call_file_prefix is set, replace actual path
+            # with prefix
+            if self.import_call_file_prefix != self.outdir:
+                table_create_command_path = table_create_command_path.replace(
+                    self.outdir,
+                    self.import_call_file_prefix,
+                )
+
+            self.import_call_nodes.add(table_create_command_path)
+
+        return True
+
+    def _write_edge_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as an edge as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of edge.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.edge_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.edge_property_dict.items():
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(label)
+
+            parts_paths = os.path.join(self.outdir, f"{pascal_label}-part*.csv")
+            parts_paths = glob.glob(parts_paths)
+            parts_paths.sort()
+
+            # adjust label for import to psql
+            pascal_label = self._adjust_pascal_to_psql(pascal_label)
+            table_create_command_path = os.path.join(
+                self.outdir,
+                f"{pascal_label}-create_table.sql",
+            )
+
+            # check for file exists
+            if os.path.exists(table_create_command_path):
+                logger.warning(
+                    f"File {table_create_command_path} already exists. Overwriting.",
+                )
+
+            # concatenate key:value in props
+            columns = []
+            for col_name, col_type in props.items():
+                col_type = self._get_data_type(col_type)
+                col_name = self._adjust_pascal_to_psql(col_name)
+                if col_name == "_ID":
+                    # should ideally never happen
+                    raise ValueError(
+                        "Column name '_ID' is reserved for internal use, "
+                        "denoting the relationship ID. Please choose a "
+                        "different name for your column."
+                    )
+
+                columns.append(f"{col_name} {col_type}")
+
+            # create list of lists and flatten
+            # removes need for empty check of property list
+            out_list = [
+                "_START_ID VARCHAR",
+                "_ID VARCHAR",
+                *columns,
+                "_END_ID VARCHAR",
+                "_TYPE VARCHAR",
+            ]
+
+            with open(table_create_command_path, "w", encoding="utf-8") as f:
+                command = ""
+                if self.wipe:
+                    command += f"DROP TABLE IF EXISTS {pascal_label};\n"
+
+                # table creation requires comma separation
+                command += (
+                    f'CREATE TABLE {pascal_label}({",".join(out_list)});\n'
+                )
+                f.write(command)
+
+                for parts_path in parts_paths:
+                    # if import_call_file_prefix is set, replace actual path
+                    # with prefix
+                    if self.import_call_file_prefix != self.outdir:
+                        parts_path = parts_path.replace(
+                            self.outdir,
+                            self.import_call_file_prefix,
+                        )
+
+                    self._copy_from_csv_commands.add(
+                        f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;"
+                    )
+
+            # add file path to import statement
+            # if import_call_file_prefix is set, replace actual path
+            # with prefix
+            if self.import_call_file_prefix != self.outdir:
+                table_create_command_path = table_create_command_path.replace(
+                    self.outdir,
+                    self.import_call_file_prefix,
+                )
+
+            self.import_call_edges.add(table_create_command_path)
+
+        return True
+
+    def _construct_import_call(self) -> str:
+        """
+        Function to construct the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name. Built after all data has been
+        processed to ensure that nodes are called before any edges.
+
+        Returns:
+            str: a bash command for postgresql import
+        """
+        import_call = ""
+
+        # create tables
+        # At this point, csv files of nodes and edges do not require differentiation
+        for import_file_path in [
+            *self.import_call_nodes,
+            *self.import_call_edges,
+        ]:
+            import_call += f'echo "Setup {import_file_path}..."\n'
+            if {self.db_password}:
+                # set password variable inline
+                import_call += f"PGPASSWORD={self.db_password} "
+            import_call += (
+                f"{self.import_call_bin_prefix}psql -f {import_file_path}"
+            )
+            import_call += f" --dbname {self.db_name}"
+            import_call += f" --host {self.db_host}"
+            import_call += f" --port {self.db_port}"
+            import_call += f" --user {self.db_user}"
+            import_call += '\necho "Done!"\n'
+            import_call += "\n"
+
+        # copy data to tables
+        for command in self._copy_from_csv_commands:
+            table_part = command.split(" ")[3]
+            import_call += f'echo "Importing {table_part}..."\n'
+            if {self.db_password}:
+                # set password variable inline
+                import_call += f"PGPASSWORD={self.db_password} "
+            import_call += f'{self.import_call_bin_prefix}psql -c "{command}"'
+            import_call += f" --dbname {self.db_name}"
+            import_call += f" --host {self.db_host}"
+            import_call += f" --port {self.db_port}"
+            import_call += f" --user {self.db_user}"
+            import_call += '\necho "Done!"\n'
+            import_call += "\n"
+
+        return import_call
--- a/biocypher/output/write/relational/_sqlite.py
+++ b/biocypher/output/write/relational/_sqlite.py
@ -0,0 +1,51 @@
+from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
+
+
+class _SQLiteBatchWriter(_PostgreSQLBatchWriter):
+    """
+    Class for writing node and edge representations to a SQLite database.
+    It uses the _PostgreSQLBatchWriter class under the hood, which already
+    implements the logic to write the nodes/edges to a relational DBMS.
+    Only the import bash script differs between PostgreSQL and SQLite
+    and is therefore implemented in this class.
+
+    - _construct_import_call
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def _construct_import_call(self) -> str:
+        """
+        Function to construct the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name. Built after all data has been
+        processed to ensure that nodes are called before any edges.
+
+        Returns:
+            str: a bash command for sqlite import
+        """
+        import_call = ""
+
+        # create tables
+        # At this point, csv files of nodes and edges do not require differentiation
+        for import_file_path in [
+            *self.import_call_nodes,
+            *self.import_call_edges,
+        ]:
+            import_call += f'echo "Setup {import_file_path}..."\n'
+            import_call += f"{self.import_call_bin_prefix}sqlite3 {self.db_name} < {import_file_path}"
+            import_call += '\necho "Done!"\n'
+            import_call += "\n"
+
+        for command in self._copy_from_csv_commands:
+            table_name = command.split(" ")[1]
+            table_part = command.split(" ")[3].replace("'", "")
+            import_call += f'echo "Importing {table_part}..."\n'
+            separator = self.delim
+            import_part = f".import {table_part} {table_name}"
+            import_call += f"{self.import_call_bin_prefix}sqlite3 -separator $'{separator}' {self.db_name} \"{import_part}\""
+            import_call += '\necho "Done!"\n'
+            import_call += "\n"
+
+        return import_call