release commit

2025-04-16 22:12:19 +02:00
commit a9db0be88a
89 changed files with 2336827 additions and 0 deletions
--- a/biocypher/output/init.py
+++ b/biocypher/output/init.py
--- a/biocypher/output/connect/init.py
+++ b/biocypher/output/connect/init.py
--- a/biocypher/output/connect/_neo4j_driver.py
+++ b/biocypher/output/connect/_neo4j_driver.py
@ -0,0 +1,422 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 ...
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher 'online' mode. Handles connection and manipulation of a running DBMS.
+"""
+import subprocess
+
+from biocypher._logger import logger
+
+logger.debug(f"Loading module {__name__}.")
+
+from collections.abc import Iterable
+import itertools
+
+import neo4j_utils
+
+from biocypher import _misc
+from biocypher._config import config as _config
+from biocypher._create import BioCypherEdge, BioCypherNode
+from biocypher._translate import Translator
+
+__all__ = ["_Neo4jDriver"]
+
+
+class _Neo4jDriver:
+    """
+    Manages a BioCypher connection to a Neo4j database using the
+    ``neo4j_utils.Driver`` class.
+
+    Args:
+
+        database_name (str): The name of the database to connect to.
+
+        wipe (bool): Whether to wipe the database before importing.
+
+        uri (str): The URI of the database.
+
+        user (str): The username to use for authentication.
+
+        password (str): The password to use for authentication.
+
+        multi_db (bool): Whether to use multi-database mode.
+
+        fetch_size (int): The number of records to fetch at a time.
+
+        increment_version (bool): Whether to increment the version number.
+
+        translator (Translator): The translator to use for mapping.
+
+    """
+
+    def __init__(
+        self,
+        database_name: str,
+        uri: str,
+        user: str,
+        password: str,
+        multi_db: bool,
+        translator: Translator,
+        wipe: bool = False,
+        fetch_size: int = 1000,
+        increment_version: bool = True,
+    ):
+        self.translator = translator
+
+        self._driver = neo4j_utils.Driver(
+            db_name=database_name,
+            db_uri=uri,
+            db_user=user,
+            db_passwd=password,
+            fetch_size=fetch_size,
+            wipe=wipe,
+            multi_db=multi_db,
+            raise_errors=True,
+        )
+
+        # check for biocypher config in connected graph
+
+        if wipe:
+            self.init_db()
+
+        if increment_version:
+            # set new current version node
+            self._update_meta_graph()
+
+    def _update_meta_graph(self):
+        logger.info("Updating Neo4j meta graph.")
+
+        # find current version node
+        db_version = self._driver.query(
+            "MATCH (v:BioCypher) " "WHERE NOT (v)-[:PRECEDES]->() " "RETURN v",
+        )
+        # add version node
+        self.add_biocypher_nodes(self.translator.ontology)
+
+        # connect version node to previous
+        if db_version[0]:
+            previous = db_version[0][0]
+            previous_id = previous["v"]["id"]
+            e_meta = BioCypherEdge(
+                previous_id,
+                self.translator.ontology.get_dict().get("node_id"),
+                "PRECEDES",
+            )
+            self.add_biocypher_edges(e_meta)
+
+    def init_db(self):
+        """
+        Used to initialise a property graph database by setting up new
+        constraints. Wipe has been performed by the ``neo4j_utils.Driver``
+        class` already.
+
+        Todo:
+            - set up constraint creation interactively depending on the
+                need of the database
+        """
+
+        logger.info("Initialising database.")
+        self._create_constraints()
+
+    def _create_constraints(self):
+        """
+        Creates constraints on node types in the graph. Used for
+        initial setup.
+
+        Grabs leaves of the ``schema_config.yaml`` file and creates
+        constraints on the id of all entities represented as nodes.
+        """
+
+        logger.info("Creating constraints for node types in config.")
+
+        major_neo4j_version = int(self._get_neo4j_version().split(".")[0])
+        # get structure
+        for leaf in self.translator.ontology.mapping.extended_schema.items():
+            label = _misc.sentencecase_to_pascalcase(leaf[0], sep=r"\s\.")
+            if leaf[1]["represented_as"] == "node":
+                if major_neo4j_version >= 5:
+                    s = (
+                        f"CREATE CONSTRAINT `{label}_id` "
+                        f"IF NOT EXISTS FOR (n:`{label}`) "
+                        "REQUIRE n.id IS UNIQUE"
+                    )
+                    self._driver.query(s)
+                else:
+                    s = (
+                        f"CREATE CONSTRAINT `{label}_id` "
+                        f"IF NOT EXISTS ON (n:`{label}`) "
+                        "ASSERT n.id IS UNIQUE"
+                    )
+                    self._driver.query(s)
+
+    def _get_neo4j_version(self):
+        """Get neo4j version."""
+        try:
+            neo4j_version = self._driver.query(
+                """
+                    CALL dbms.components()
+                    YIELD name, versions, edition
+                    UNWIND versions AS version
+                    RETURN version AS version
+                """,
+            )[0][0]["version"]
+            return neo4j_version
+        except Exception as e:
+            logger.warning(
+                f"Error detecting Neo4j version: {e} use default version 4.0.0."
+            )
+            return "4.0.0"
+
+    def add_nodes(self, id_type_tuples: Iterable[tuple]) -> tuple:
+        """
+        Generic node adder method to add any kind of input to the graph via the
+        :class:`biocypher.create.BioCypherNode` class. Employs translation
+        functionality and calls the :meth:`add_biocypher_nodes()` method.
+
+        Args:
+            id_type_tuples (iterable of 3-tuple): for each node to add to
+                the biocypher graph, a 3-tuple with the following layout:
+                first, the (unique if constrained) ID of the node; second, the
+                type of the node, capitalised or PascalCase and in noun form
+                (Neo4j primary label, eg `:Protein`); and third, a dictionary
+                of arbitrary properties the node should possess (can be empty).
+
+        Returns:
+            2-tuple: the query result of :meth:`add_biocypher_nodes()`
+                - first entry: data
+                - second entry: Neo4j summary.
+        """
+
+        bn = self.translator.translate_nodes(id_type_tuples)
+        return self.add_biocypher_nodes(bn)
+
+    def add_edges(self, id_src_tar_type_tuples: Iterable[tuple]) -> tuple:
+        """
+        Generic edge adder method to add any kind of input to the graph
+        via the :class:`biocypher.create.BioCypherEdge` class. Employs
+        translation functionality and calls the
+        :meth:`add_biocypher_edges()` method.
+
+        Args:
+
+            id_src_tar_type_tuples (iterable of 5-tuple):
+
+                for each edge to add to the biocypher graph, a 5-tuple
+                with the following layout: first, the optional unique ID
+                of the interaction. This can be `None` if there is no
+                systematic identifier (which for many interactions is
+                the case). Second and third, the (unique if constrained)
+                IDs of the source and target nodes of the relationship;
+                fourth, the type of the relationship; and fifth, a
+                dictionary of arbitrary properties the edge should
+                possess (can be empty).
+
+        Returns:
+
+            2-tuple: the query result of :meth:`add_biocypher_edges()`
+
+                - first entry: data
+                - second entry: Neo4j summary.
+        """
+
+        bn = self.translator.translate_edges(id_src_tar_type_tuples)
+        return self.add_biocypher_edges(bn)
+
+    def add_biocypher_nodes(
+        self,
+        nodes: Iterable[BioCypherNode],
+        explain: bool = False,
+        profile: bool = False,
+    ) -> bool:
+        """
+        Accepts a node type handoff class
+        (:class:`biocypher.create.BioCypherNode`) with id,
+        label, and a dict of properties (passing on the type of
+        property, ie, ``int``, ``str``, ...).
+
+        The dict retrieved by the
+        :meth:`biocypher.create.BioCypherNode.get_dict()` method is
+        passed into Neo4j as a map of maps, explicitly encoding node id
+        and label, and adding all other properties from the 'properties'
+        key of the dict. The merge is performed via APOC, matching only
+        on node id to prevent duplicates. The same properties are set on
+        match and on create, irrespective of the actual event.
+
+        Args:
+            nodes:
+                An iterable of :class:`biocypher.create.BioCypherNode` objects.
+            explain:
+                Call ``EXPLAIN`` on the CYPHER query.
+            profile:
+                Do profiling on the CYPHER query.
+
+        Returns:
+            True for success, False otherwise.
+        """
+
+        try:
+            nodes = _misc.to_list(nodes)
+
+            entities = [node.get_dict() for node in nodes]
+
+        except AttributeError:
+            msg = "Nodes must have a `get_dict` method."
+            logger.error(msg)
+
+            raise ValueError(msg)
+
+        logger.info(f"Merging {len(entities)} nodes.")
+
+        entity_query = (
+            "UNWIND $entities AS ent "
+            "CALL apoc.merge.node([ent.node_label], "
+            "{id: ent.node_id}, ent.properties, ent.properties) "
+            "YIELD node "
+            "RETURN node"
+        )
+
+        method = "explain" if explain else "profile" if profile else "query"
+
+        result = getattr(self._driver, method)(
+            entity_query,
+            parameters={
+                "entities": entities,
+            },
+        )
+
+        logger.info("Finished merging nodes.")
+
+        return result
+
+    def add_biocypher_edges(
+        self,
+        edges: Iterable[BioCypherEdge],
+        explain: bool = False,
+        profile: bool = False,
+    ) -> bool:
+        """
+        Accepts an edge type handoff class
+        (:class:`biocypher.create.BioCypherEdge`) with source
+        and target ids, label, and a dict of properties (passing on the
+        type of property, ie, int, string ...).
+
+        The individual edge is either passed as a singleton, in the case
+        of representation as an edge in the graph, or as a 4-tuple, in
+        the case of representation as a node (with two edges connecting
+        to interaction partners).
+
+        The dict retrieved by the
+        :meth:`biocypher.create.BioCypherEdge.get_dict()` method is
+        passed into Neo4j as a map of maps, explicitly encoding source
+        and target ids and the relationship label, and adding all edge
+        properties from the 'properties' key of the dict. The merge is
+        performed via APOC, matching only on source and target id to
+        prevent duplicates. The same properties are set on match and on
+        create, irrespective of the actual event.
+
+        Args:
+            edges:
+                An iterable of :class:`biocypher.create.BioCypherEdge` objects.
+            explain:
+                Call ``EXPLAIN`` on the CYPHER query.
+            profile:
+                Do profiling on the CYPHER query.
+
+        Returns:
+            `True` for success, `False` otherwise.
+        """
+
+        edges = _misc.ensure_iterable(edges)
+        edges = itertools.chain(*(_misc.ensure_iterable(i) for i in edges))
+
+        nodes = []
+        rels = []
+
+        try:
+            for e in edges:
+                if hasattr(e, "get_node"):
+                    nodes.append(e.get_node())
+                    rels.append(e.get_source_edge().get_dict())
+                    rels.append(e.get_target_edge().get_dict())
+
+                else:
+                    rels.append(e.get_dict())
+
+        except AttributeError:
+            msg = "Edges and nodes must have a `get_dict` method."
+            logger.error(msg)
+
+            raise ValueError(msg)
+
+        self.add_biocypher_nodes(nodes)
+        logger.info(f"Merging {len(rels)} edges.")
+
+        # cypher query
+
+        # merging only on the ids of the entities, passing the
+        # properties on match and on create;
+        # TODO add node labels?
+        node_query = (
+            "UNWIND $rels AS r "
+            "MERGE (src {id: r.source_id}) "
+            "MERGE (tar {id: r.target_id}) "
+        )
+
+        self._driver.query(node_query, parameters={"rels": rels})
+
+        edge_query = (
+            "UNWIND $rels AS r "
+            "MATCH (src {id: r.source_id}) "
+            "MATCH (tar {id: r.target_id}) "
+            "WITH src, tar, r "
+            "CALL apoc.merge.relationship"
+            "(src, r.relationship_label, NULL, "
+            "r.properties, tar, r.properties) "
+            "YIELD rel "
+            "RETURN rel"
+        )
+
+        method = "explain" if explain else "profile" if profile else "query"
+
+        result = getattr(self._driver, method)(
+            edge_query, parameters={"rels": rels}
+        )
+
+        logger.info("Finished merging edges.")
+
+        return result
+
+
+def get_driver(
+    dbms: str,
+    translator: "Translator",
+):
+    """
+    Function to return the writer class.
+
+    Returns:
+        class: the writer class
+    """
+
+    dbms_config = _config(dbms)
+
+    if dbms == "neo4j":
+        return _Neo4jDriver(
+            database_name=dbms_config["database_name"],
+            wipe=dbms_config["wipe"],
+            uri=dbms_config["uri"],
+            user=dbms_config["user"],
+            password=dbms_config["password"],
+            multi_db=dbms_config["multi_db"],
+            translator=translator,
+        )
+
+    return None
--- a/biocypher/output/in_memory/init.py
+++ b/biocypher/output/in_memory/init.py
--- a/biocypher/output/in_memory/_pandas.py
+++ b/biocypher/output/in_memory/_pandas.py
@ -0,0 +1,90 @@
+import pandas as pd
+
+from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
+
+
+class Pandas:
+    def __init__(self, translator, deduplicator):
+        self.translator = translator
+        self.deduplicator = deduplicator
+
+        self.dfs = {}
+
+    def _separate_entity_types(self, entities):
+        """
+        Given mixed iterable of BioCypher objects, separate them into lists by
+        type. Also deduplicates using the `Deduplicator` instance.
+        """
+        lists = {}
+        for entity in entities:
+            if (
+                not isinstance(entity, BioCypherNode)
+                and not isinstance(entity, BioCypherEdge)
+                and not isinstance(entity, BioCypherRelAsNode)
+            ):
+                raise TypeError(
+                    "Expected a BioCypherNode / BioCypherEdge / "
+                    f"BioCypherRelAsNode, got {type(entity)}."
+                )
+
+            if isinstance(entity, BioCypherNode):
+                seen = self.deduplicator.node_seen(entity)
+            elif isinstance(entity, BioCypherEdge):
+                seen = self.deduplicator.edge_seen(entity)
+            elif isinstance(entity, BioCypherRelAsNode):
+                seen = self.deduplicator.rel_as_node_seen(entity)
+
+            if seen:
+                continue
+
+            if isinstance(entity, BioCypherRelAsNode):
+                node = entity.get_node()
+                source_edge = entity.get_source_edge()
+                target_edge = entity.get_target_edge()
+
+                _type = node.get_type()
+                if not _type in lists:
+                    lists[_type] = []
+                lists[_type].append(node)
+
+                _source_type = source_edge.get_type()
+                if not _source_type in lists:
+                    lists[_source_type] = []
+                lists[_source_type].append(source_edge)
+
+                _target_type = target_edge.get_type()
+                if not _target_type in lists:
+                    lists[_target_type] = []
+                lists[_target_type].append(target_edge)
+                continue
+
+            _type = entity.get_type()
+            if not _type in lists:
+                lists[_type] = []
+            lists[_type].append(entity)
+
+        return lists
+
+    def add_tables(self, entities):
+        """
+        Add Pandas dataframes for each node and edge type in the input.
+        """
+
+        lists = self._separate_entity_types(entities)
+
+        for _type, _entities in lists.items():
+            self._add_entity_df(_type, _entities)
+
+    def _add_entity_df(self, _type, _entities):
+        df = pd.DataFrame(
+            pd.json_normalize([node.get_dict() for node in _entities])
+        )
+        # replace "properties." with "" in column names
+        df.columns = [col.replace("properties.", "") for col in df.columns]
+        if _type not in self.dfs:
+            self.dfs[_type] = df
+        else:
+            self.dfs[_type] = pd.concat(
+                [self.dfs[_type], df], ignore_index=True
+            )
+        return self.dfs[_type]
--- a/biocypher/output/write/init.py
+++ b/biocypher/output/write/init.py
--- a/biocypher/output/write/_batch_writer.py
+++ b/biocypher/output/write/_batch_writer.py
--- a/biocypher/output/write/_get_writer.py
+++ b/biocypher/output/write/_get_writer.py
@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s): Sebastian Lobentanzer
+#                 Michael Hartung
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher 'offline' module. Handles the writing of node and edge representations
+suitable for import into a DBMS.
+"""
+
+from biocypher._logger import logger
+from biocypher.output.write.graph._rdf import _RDFWriter
+from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
+from biocypher.output.write.graph._arangodb import _ArangoDBBatchWriter
+from biocypher.output.write.graph._networkx import _NetworkXWriter
+from biocypher.output.write.relational._csv import _PandasCSVWriter
+from biocypher.output.write.relational._sqlite import _SQLiteBatchWriter
+from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
+
+logger.debug(f"Loading module {__name__}.")
+
+from typing import TYPE_CHECKING
+
+from biocypher._config import config as _config
+
+__all__ = ["get_writer", "DBMS_TO_CLASS"]
+
+if TYPE_CHECKING:
+    from biocypher._translate import Translator
+    from biocypher._deduplicate import Deduplicator
+
+DBMS_TO_CLASS = {
+    "neo": _Neo4jBatchWriter,
+    "neo4j": _Neo4jBatchWriter,
+    "Neo4j": _Neo4jBatchWriter,
+    "postgres": _PostgreSQLBatchWriter,
+    "postgresql": _PostgreSQLBatchWriter,
+    "PostgreSQL": _PostgreSQLBatchWriter,
+    "arango": _ArangoDBBatchWriter,
+    "arangodb": _ArangoDBBatchWriter,
+    "ArangoDB": _ArangoDBBatchWriter,
+    "sqlite": _SQLiteBatchWriter,
+    "sqlite3": _SQLiteBatchWriter,
+    "rdf": _RDFWriter,
+    "RDF": _RDFWriter,
+    "csv": _PandasCSVWriter,
+    "CSV": _PandasCSVWriter,
+    "pandas": _PandasCSVWriter,
+    "Pandas": _PandasCSVWriter,
+    "networkx": _NetworkXWriter,
+    "NetworkX": _NetworkXWriter,
+}
+
+
+def get_writer(
+    dbms: str,
+    translator: "Translator",
+    deduplicator: "Deduplicator",
+    output_directory: str,
+    strict_mode: bool,
+):
+    """
+    Function to return the writer class based on the selection in the config
+    file.
+
+    Args:
+        dbms: the database management system; for options, see DBMS_TO_CLASS.
+        translator: the Translator object.
+        deduplicator: the Deduplicator object.
+        output_directory: the directory to output.write the output files to.
+        strict_mode: whether to use strict mode.
+
+    Returns:
+        instance: an instance of the selected writer class.
+    """
+
+    dbms_config = _config(dbms)
+
+    writer = DBMS_TO_CLASS[dbms]
+
+    if not writer:
+        raise ValueError(f"Unknown dbms: {dbms}")
+
+    if writer is not None:
+        return writer(
+            translator=translator,
+            deduplicator=deduplicator,
+            delimiter=dbms_config.get("delimiter"),
+            array_delimiter=dbms_config.get("array_delimiter"),
+            quote=dbms_config.get("quote_character"),
+            output_directory=output_directory,
+            db_name=dbms_config.get("database_name"),
+            import_call_bin_prefix=dbms_config.get("import_call_bin_prefix"),
+            import_call_file_prefix=dbms_config.get("import_call_file_prefix"),
+            wipe=dbms_config.get("wipe"),
+            strict_mode=strict_mode,
+            skip_bad_relationships=dbms_config.get(
+                "skip_bad_relationships"
+            ),  # neo4j
+            skip_duplicate_nodes=dbms_config.get(
+                "skip_duplicate_nodes"
+            ),  # neo4j
+            db_user=dbms_config.get("user"),  # psql
+            db_password=dbms_config.get("password"),  # psql
+            db_port=dbms_config.get("port"),  # psql
+            rdf_format=dbms_config.get("rdf_format"),  # rdf
+            rdf_namespaces=dbms_config.get("rdf_namespaces"),  # rdf
+        )
--- a/biocypher/output/write/_writer.py
+++ b/biocypher/output/write/_writer.py
@ -0,0 +1,200 @@
+from abc import ABC, abstractmethod
+from typing import Union, Optional
+from collections.abc import Iterable
+import os
+
+from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
+from biocypher._logger import logger
+from biocypher._translate import Translator
+from biocypher._deduplicate import Deduplicator
+
+__all__ = ["_Writer"]
+
+
+class _Writer(ABC):
+    """Abstract class for writing node and edge representations to disk.
+    Specifics of the different writers (e.g. neo4j, postgresql, csv, etc.)
+    are implemented in the child classes. Any concrete writer needs to
+    implement at least:
+    - _write_node_data
+    - _write_edge_data
+    - _construct_import_call
+    - _get_import_script_name
+
+    Args:
+        translator (Translator): Instance of :py:class:`Translator` to enable translation of
+            nodes and manipulation of properties.
+        deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
+            of nodes and edges.
+        output_directory (str, optional): Path for exporting CSV files. Defaults to None.
+        strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
+    strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
+
+    Raises:
+        NotImplementedError: Writer implementation must override '_write_node_data'
+        NotImplementedError: Writer implementation must override '_write_edge_data'
+        NotImplementedError: Writer implementation must override '_construct_import_call'
+        NotImplementedError: Writer implementation must override '_get_import_script_name'
+    """
+
+    def __init__(
+        self,
+        translator: Translator,
+        deduplicator: Deduplicator,
+        output_directory: Optional[str] = None,
+        strict_mode: bool = False,
+        *args,
+        **kwargs,
+    ):
+        """Abstract class for writing node and edge representations to disk.
+
+        Args:
+            translator (Translator): Instance of :py:class:`Translator` to enable translation of
+                nodes and manipulation of properties.
+            deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
+                of nodes and edges.
+            output_directory (str, optional): Path for exporting CSV files. Defaults to None.
+            strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
+        strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
+        """
+        self.translator = translator
+        self.deduplicator = deduplicator
+        self.strict_mode = strict_mode
+        self.output_directory = output_directory
+
+        if os.path.exists(self.output_directory):
+            if kwargs.get("write_to_file", True):
+                logger.warning(
+                    f"Output directory `{self.output_directory}` already exists. "
+                    "If this is not planned, file consistency may be compromised."
+                )
+        else:
+            logger.info(f"Creating output directory `{self.output_directory}`.")
+            os.makedirs(self.output_directory)
+
+    @abstractmethod
+    def _write_node_data(
+        self,
+        nodes: Iterable[
+            Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
+        ],
+    ) -> bool:
+        """Implement how to output.write nodes to disk.
+
+        Args:
+            nodes (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        raise NotImplementedError(
+            "Writer implementation must override 'write_nodes'"
+        )
+
+    @abstractmethod
+    def _write_edge_data(
+        self,
+        edges: Iterable[
+            Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
+        ],
+    ) -> bool:
+        """Implement how to output.write edges to disk.
+
+        Args:
+            edges (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        raise NotImplementedError(
+            "Writer implementation must override 'write_edges'"
+        )
+
+    @abstractmethod
+    def _construct_import_call(self) -> str:
+        """
+        Function to construct the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name. Built after all data has been
+        processed to ensure that nodes are called before any edges.
+
+        Returns:
+            str: command for importing the output files into a DBMS.
+        """
+        raise NotImplementedError(
+            "Writer implementation must override '_construct_import_call'"
+        )
+
+    @abstractmethod
+    def _get_import_script_name(self) -> str:
+        """Returns the name of the import script.
+
+        Returns:
+            str: The name of the import script (ending in .sh)
+        """
+        raise NotImplementedError(
+            "Writer implementation must override '_get_import_script_name'"
+        )
+
+    def write_nodes(
+        self, nodes, batch_size: int = int(1e6), force: bool = False
+    ):
+        """Wrapper for writing nodes.
+
+        Args:
+            nodes (BioCypherNode): a list or generator of nodes in
+                :py:class:`BioCypherNode` format
+            batch_size (int): The batch size for writing nodes.
+            force (bool): Whether to force writing nodes even if their type is
+                not present in the schema.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        passed = self._write_node_data(nodes)
+        if not passed:
+            logger.error("Error while writing node data.")
+            return False
+        return True
+
+    def write_edges(
+        self, edges, batch_size: int = int(1e6), force: bool = False
+    ):
+        """Wrapper for writing edges.
+
+        Args:
+            nodes (BioCypherNode): a list or generator of nodes in
+                :py:class:`BioCypherNode` format
+            batch_size (int): The batch size for writing nodes.
+            force (bool): Whether to force writing nodes even if their type is
+                not present in the schema.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        passed = self._write_edge_data(edges)
+        if not passed:
+            logger.error("Error while writing edge data.")
+            return False
+        return True
+
+    def write_import_call(self):
+        """
+        Function to output.write the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name, to the export folder as txt.
+
+        Returns:
+            str: The path of the file holding the import call.
+        """
+        file_path = os.path.join(
+            self.output_directory, self._get_import_script_name()
+        )
+        logger.info(
+            f"Writing {self.__class__.__name__} import call to `{file_path}`."
+        )
+
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.write(self._construct_import_call())
+
+        return file_path
--- a/biocypher/output/write/graph/init.py
+++ b/biocypher/output/write/graph/init.py
--- a/biocypher/output/write/graph/_arangodb.py
+++ b/biocypher/output/write/graph/_arangodb.py
@ -0,0 +1,241 @@
+import os
+
+from biocypher._logger import logger
+from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
+
+
+class _ArangoDBBatchWriter(_Neo4jBatchWriter):
+    """
+    Class for writing node and edge representations to disk using the format
+    specified by ArangoDB for the use of "arangoimport". Output files are
+    similar to Neo4j, but with a different header format.
+    """
+
+    def _get_default_import_call_bin_prefix(self):
+        """
+        Method to provide the default string for the import call bin prefix.
+
+        Returns:
+            str: The default location for the neo4j admin import location
+        """
+        return ""
+
+    def _get_import_script_name(self) -> str:
+        """
+        Returns the name of the neo4j admin import script
+
+        Returns:
+            str: The name of the import script (ending in .sh)
+        """
+        return "arangodb-import-call.sh"
+
+    def _write_node_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as a node as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of node.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.node_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.node_property_dict.items():
+            # create header CSV with ID, properties, labels
+
+            _id = "_key"
+
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(label)
+
+            header = f"{pascal_label}-header.csv"
+            header_path = os.path.join(
+                self.outdir,
+                header,
+            )
+
+            # check if file already exists
+            if os.path.exists(header_path):
+                logger.warning(
+                    f"File {header_path} already exists. Overwriting."
+                )
+
+            # concatenate key:value in props
+            props_list = []
+            for k in props.keys():
+                props_list.append(f"{k}")
+
+            # create list of lists and flatten
+            # removes need for empty check of property list
+            out_list = [[_id], props_list]
+            out_list = [val for sublist in out_list for val in sublist]
+
+            with open(header_path, "w", encoding="utf-8") as f:
+                # concatenate with delimiter
+                row = self.delim.join(out_list)
+                f.write(row)
+
+            # add collection from schema config
+            collection = self.translator.ontology.mapping.extended_schema[
+                label
+            ].get("db_collection_name", None)
+
+            # add file path to neo4 admin import statement
+            # do once for each part file
+            parts = self.parts.get(label, [])
+
+            if not parts:
+                raise ValueError(
+                    f"No parts found for node label {label}. "
+                    f"Check that the data was parsed first.",
+                )
+
+            for part in parts:
+                import_call_header_path = os.path.join(
+                    self.import_call_file_prefix,
+                    header,
+                )
+                import_call_parts_path = os.path.join(
+                    self.import_call_file_prefix,
+                    part,
+                )
+
+                self.import_call_nodes.add(
+                    (
+                        import_call_header_path,
+                        import_call_parts_path,
+                        collection,
+                    )
+                )
+
+        return True
+
+    def _write_edge_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as an edge as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of edge.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.edge_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.edge_property_dict.items():
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(label)
+
+            # paths
+            header = f"{pascal_label}-header.csv"
+            header_path = os.path.join(
+                self.outdir,
+                header,
+            )
+            parts = f"{pascal_label}-part.*"
+
+            # check for file exists
+            if os.path.exists(header_path):
+                logger.warning(
+                    f"Header file {header_path} already exists. Overwriting."
+                )
+
+            # concatenate key:value in props
+            props_list = []
+            for k in props.keys():
+                props_list.append(f"{k}")
+
+            out_list = ["_from", "_key", *props_list, "_to"]
+
+            with open(header_path, "w", encoding="utf-8") as f:
+                # concatenate with delimiter
+                row = self.delim.join(out_list)
+                f.write(row)
+
+            # add collection from schema config
+            if not self.translator.ontology.mapping.extended_schema.get(label):
+                for (
+                    _,
+                    v,
+                ) in self.translator.ontology.mapping.extended_schema.items():
+                    if v.get("label_as_edge") == label:
+                        collection = v.get("db_collection_name", None)
+                        break
+
+            else:
+                collection = self.translator.ontology.mapping.extended_schema[
+                    label
+                ].get("db_collection_name", None)
+
+            # add file path to neo4 admin import statement (import call path
+            # may be different from actual output path)
+            header_import_call_path = os.path.join(
+                self.import_call_file_prefix,
+                header,
+            )
+            parts_import_call_path = os.path.join(
+                self.import_call_file_prefix,
+                parts,
+            )
+            self.import_call_edges.add(
+                (
+                    header_import_call_path,
+                    parts_import_call_path,
+                    collection,
+                )
+            )
+
+        return True
+
+    def _construct_import_call(self) -> str:
+        """
+        Function to construct the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name. Built after all data has been
+        processed to ensure that nodes are called before any edges.
+
+        Returns:
+            str: a bash command for neo4j-admin import
+        """
+        import_call = (
+            f"{self.import_call_bin_prefix}arangoimp "
+            f"--type csv "
+            f'--separator="{self.escaped_delim}" '
+        )
+
+        if self.quote == "'":
+            import_call += f'--quote="{self.quote}" '
+        else:
+            import_call += f"--quote='{self.quote}' "
+
+        node_lines = ""
+
+        # node import calls: one line per node type
+        for header_path, parts_path, collection in self.import_call_nodes:
+            line = (
+                f"{import_call} "
+                f"--headers-file {header_path} "
+                f"--file= {parts_path} "
+            )
+
+            if collection:
+                line += f"--create-collection --collection {collection} "
+
+            node_lines += f"{line}\n"
+
+        edge_lines = ""
+
+        # edge import calls: one line per edge type
+        for header_path, parts_path, collection in self.import_call_edges:
+            import_call += f'--relationships="{header_path},{parts_path}" '
+
+        return node_lines + edge_lines
--- a/biocypher/output/write/graph/_neo4j.py
+++ b/biocypher/output/write/graph/_neo4j.py
@ -0,0 +1,502 @@
+import os
+import glob
+import pandas as pd
+
+from biocypher._logger import logger
+from biocypher.output.write._batch_writer import parse_label, _BatchWriter
+
+
+class _Neo4jBatchWriter(_BatchWriter):
+    """
+    Class for writing node and edge representations to disk using the
+    format specified by Neo4j for the use of admin import. Each batch
+    writer instance has a fixed representation that needs to be passed
+    at instantiation via the :py:attr:`schema` argument. The instance
+    also expects an ontology adapter via :py:attr:`ontology_adapter` to be able
+    to convert and extend the hierarchy.
+
+    This class inherits from the abstract class "_BatchWriter" and implements the
+    Neo4j-specific methods:
+
+        - _write_node_headers
+        - _write_edge_headers
+        - _construct_import_call
+        - _write_array_string
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor.
+
+        Check the version of Neo4j and adds a command scope if version >= 5.
+
+        Returns:
+            _Neo4jBatchWriter: An instance of the writer.
+        """
+
+        # Should read the configuration and setup import_call_bin_prefix.
+        super().__init__(*args, **kwargs)
+
+    def _get_default_import_call_bin_prefix(self):
+        """
+        Method to provide the default string for the import call bin prefix.
+
+        Returns:
+            str: The default location for the neo4j admin import location
+        """
+
+        return "bin/"
+
+    def _write_array_string(self, string_list):
+        """
+        Abstract method to output.write the string representation of an array into a .csv file
+        as required by the neo4j admin-import.
+
+        Args:
+            string_list (list): list of ontology strings
+
+        Returns:
+            str: The string representation of an array for the neo4j admin import
+        """
+        string = self.adelim.join(string_list)
+        return f"{self.quote}{string}{self.quote}"
+
+    def _write_node_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as a node as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of node.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.node_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.node_property_dict.items():
+            _id = ":ID"
+
+            ##MeDaX dev remark:
+            ##From Fhir data we get case sensitive labels. E.g. 'Procedure' and 'procedure' are two distinct node types.
+            ##Because we are converting Resources to more specific node classes using their "resourceType" attribute.
+            
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(
+                parse_label(label)
+            )
+
+            header = f"{pascal_label}-header.csv"
+            header_path = os.path.join(
+                self.outdir,
+                header,
+            )
+            parts = f"{pascal_label}-part.*"
+
+            existing_header = False
+            # check if file already exists
+            if os.path.exists(header_path):
+                logger.warning(
+                    f"Header file `{header_path}` already exists. Overwriting.",
+                )
+                with open(header_path, "r", encoding="utf-8") as existing:
+                    existing_header = existing.read().strip().split(self.delim)
+
+            # concatenate key:value in props
+            props_list = []
+            for k, v in props.items():
+                if v in ["int", "long", "integer"]:
+                    props_list.append(f"{k}:long")
+                elif v in ["int[]", "long[]", "integer[]"]:
+                    props_list.append(f"{k}:long[]")
+                elif v in ["float", "double", "dbl"]:
+                    props_list.append(f"{k}:double")
+                elif v in ["float[]", "double[]"]:
+                    props_list.append(f"{k}:double[]")
+                elif v in ["bool", "boolean"]:
+                    # TODO Neo4j boolean support / spelling?
+                    props_list.append(f"{k}:boolean")
+                elif v in ["bool[]", "boolean[]"]:
+                    props_list.append(f"{k}:boolean[]")
+                elif v in ["str[]", "string[]"]:
+                    props_list.append(f"{k}:string[]")
+                else:
+                    props_list.append(f"{k}")
+
+            # create list of lists and flatten
+            out_list = [[_id], props_list, [":LABEL"]]
+            out_list = [val for sublist in out_list for val in sublist]
+
+
+
+
+
+            with open(header_path, "w", encoding="utf-8") as f:
+                # Check if header file already exists and has different columns
+                if os.path.exists(header_path):
+                    if existing_header:
+                        #existing_header = existing.read().strip().split(self.delim)
+                        # Compare existing and new headers
+                        if set(existing_header) != set(out_list):
+                            
+                            # Get part files associated with this header
+                            base_name = os.path.basename(header_path).replace("-header.csv", "")
+                            part_files = glob.glob(os.path.join(os.path.dirname(header_path), f"{base_name}-part*.csv"))
+                            
+                            
+                            # Find the highest numbered part file without full sorting
+                            highest_part = None
+                            highest_number = -1
+
+                            for part_file in part_files:
+                                try:
+                                    # Extract number from filename (assuming format like "part123.csv")
+                                    file_name = os.path.basename(part_file)
+                                    number_part = file_name.split("part")[1].split(".")[0]
+                                    number = int(number_part)
+                                    
+                                    if number > highest_number:
+                                        highest_number = number
+                                        highest_part = part_file
+                                except (IndexError, ValueError):
+                                    # Skip files that don't match the expected pattern
+                                    continue
+                            # Update each part file with the new columns
+                            for part_file in part_files:
+                                if part_file == highest_part:
+                                    print(f"Skipping the highest part file: {highest_part}")
+                                    continue
+                                try:
+                                    #print("exi: ", existing_header)
+                                    #print("out: ", out_list)
+                                    df = self.adapt_csv_to_new_header(existing_header, out_list, part_file)
+                                    # Read the file without headers
+                                    
+                                    # Write back to file WITHOUT including the header
+                                    df.to_csv(part_file, sep=self.delim, index=False, header=False)
+                                    print(f"Updated {part_file} with new columns in correct positions")
+                                except Exception as e:
+                                    print(f"Error updating {part_file}: {e}")
+                
+                # Write the new header
+                row = self.delim.join(out_list)
+                f.write(row)
+
+
+            # add file path to neo4 admin import statement (import call file
+            # path may be different from actual file path)
+            import_call_header_path = os.path.join(
+                self.import_call_file_prefix,
+                header,
+            )
+            import_call_parts_path = os.path.join(
+                self.import_call_file_prefix,
+                parts,
+            )
+            self.import_call_nodes.add(
+                (import_call_header_path, import_call_parts_path)
+            )
+
+        return True
+
+    def _write_edge_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as an edge as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of edge.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.edge_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.edge_property_dict.items():
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(
+                parse_label(label)
+            )
+
+            # paths
+            header = f"{pascal_label}-header.csv"
+            header_path = os.path.join(
+                self.outdir,
+                header,
+            )
+            parts = f"{pascal_label}-part.*"
+
+            # check for file exists
+            if os.path.exists(header_path):
+                logger.warning(
+                    f"File {header_path} already exists. Overwriting."
+                )
+
+            # concatenate key:value in props
+            props_list = []
+            for k, v in props.items():
+                if v in ["int", "long", "integer"]:
+                    props_list.append(f"{k}:long")
+                elif v in ["int[]", "long[]", "integer[]"]:
+                    props_list.append(f"{k}:long[]")
+                elif v in ["float", "double"]:
+                    props_list.append(f"{k}:double")
+                elif v in ["float[]", "double[]"]:
+                    props_list.append(f"{k}:double[]")
+                elif v in [
+                    "bool",
+                    "boolean",
+                ]:  # TODO does Neo4j support bool?
+                    props_list.append(f"{k}:boolean")
+                elif v in ["bool[]", "boolean[]"]:
+                    props_list.append(f"{k}:boolean[]")
+                elif v in ["str[]", "string[]"]:
+                    props_list.append(f"{k}:string[]")
+                else:
+                    props_list.append(f"{k}")
+
+            skip_id = False
+            schema_label = None
+
+            if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
+                skip_id = True
+            elif not self.translator.ontology.mapping.extended_schema.get(
+                label
+            ):
+                # find label in schema by label_as_edge
+                for (
+                    k,
+                    v,
+                ) in self.translator.ontology.mapping.extended_schema.items():
+                    if v.get("label_as_edge") == label:
+                        schema_label = k
+                        break
+            else:
+                schema_label = label
+
+            out_list = [":START_ID"]
+
+            if schema_label:
+                if (
+                    self.translator.ontology.mapping.extended_schema.get(
+                        schema_label
+                    ).get("use_id")
+                    == False
+                ):
+                    skip_id = True
+
+            if not skip_id:
+                out_list.append("id")
+
+            out_list.extend(props_list)
+            out_list.extend([":END_ID", ":TYPE"])
+
+            existing_header = False
+            # check if file already exists
+            if os.path.exists(header_path):
+                logger.warning(
+                    f"Header file `{header_path}` already exists. Overwriting.",
+                )
+                with open(header_path, "r", encoding="utf-8") as existing:
+                    existing_header = existing.read().strip().split(self.delim)
+
+
+            with open(header_path, "w", encoding="utf-8") as f:
+                # Check if header file already exists and has different columns
+                if os.path.exists(header_path):
+                    if existing_header:
+                        #existing_header = existing.read().strip().split(self.delim)
+                        # Compare existing and new headers
+                        if set(existing_header) != set(out_list):
+                            
+                            # Get part files associated with this header
+                            base_name = os.path.basename(header_path).replace("-header.csv", "")
+                            part_files = glob.glob(os.path.join(os.path.dirname(header_path), f"{base_name}-part*.csv"))
+                            
+                            
+                            # Find the highest numbered part file without full sorting
+                            highest_part = None
+                            highest_number = -1
+
+                            for part_file in part_files:
+                                try:
+                                    # Extract number from filename (assuming format like "part123.csv")
+                                    file_name = os.path.basename(part_file)
+                                    number_part = file_name.split("part")[1].split(".")[0]
+                                    number = int(number_part)
+                                    
+                                    if number > highest_number:
+                                        highest_number = number
+                                        highest_part = part_file
+                                except (IndexError, ValueError):
+                                    # Skip files that don't match the expected pattern
+                                    continue
+                            # Update each part file with the new columns
+                            for part_file in part_files:
+                                if part_file == highest_part:
+                                    print(f"Skipping the highest part file: {highest_part}")
+                                    continue
+                                try:
+                                    print("exi: ", existing_header)
+                                    print("out: ", out_list)
+                                    df = self.adapt_csv_to_new_header(existing_header, out_list, part_file)
+                                    # Read the file without headers
+                                    
+                                    # Write back to file WITHOUT including the header
+                                    df.to_csv(part_file, sep=self.delim, index=False, header=False)
+                                    print(f"Updated {part_file} with new columns in correct positions")
+                                except Exception as e:
+                                    print(f"Error updating {part_file}: {e}")
+                
+                # Write the new header
+                row = self.delim.join(out_list)
+                f.write(row)
+
+            # add file path to neo4 admin import statement (import call file
+            # path may be different from actual file path)
+            import_call_header_path = os.path.join(
+                self.import_call_file_prefix,
+                header,
+            )
+            import_call_parts_path = os.path.join(
+                self.import_call_file_prefix,
+                parts,
+            )
+            self.import_call_edges.add(
+                (import_call_header_path, import_call_parts_path)
+            )
+
+        return True
+
+    def _get_import_script_name(self) -> str:
+        """
+        Returns the name of the neo4j admin import script
+
+        Returns:
+            str: The name of the import script (ending in .sh)
+        """
+        return "neo4j-admin-import-call.sh"
+
+    def _construct_import_call(self) -> str:
+        """
+        Function to construct the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name. Built after all data has been
+        processed to ensure that nodes are called before any edges.
+
+        Returns:
+            str: a bash command for neo4j-admin import
+        """
+        import_call_neo4j_v4 = self._get_import_call(
+            "import", "--database=", "--force="
+        )
+        import_call_neo4j_v5 = self._get_import_call(
+            "database import full", "", "--overwrite-destination="
+        )
+        neo4j_version_check = f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"
+
+        import_script = f"#!/bin/bash\n{neo4j_version_check}\nif [[ $version -ge 5 ]]; then\n\t{import_call_neo4j_v5}\nelse\n\t{import_call_neo4j_v4}\nfi"
+        return import_script
+
+    def _get_import_call(
+        self, import_cmd: str, database_cmd: str, wipe_cmd: str
+    ) -> str:
+        """Get parametrized import call for Neo4j 4 or 5+.
+
+        Args:
+            import_cmd (str): The import command to use.
+            database_cmd (str): The database command to use.
+            wipe_cmd (str): The wipe command to use.
+
+        Returns:
+            str: The import call.
+        """
+        import_call = f"{self.import_call_bin_prefix}neo4j-admin {import_cmd} "
+
+        import_call += f"{database_cmd}{self.db_name} "
+
+        import_call += f'--delimiter="{self.escaped_delim}" '
+
+        import_call += f'--array-delimiter="{self.escaped_adelim}" '
+
+        if self.quote == "'":
+            import_call += f'--quote="{self.quote}" '
+        else:
+            import_call += f"--quote='{self.quote}' "
+
+        if self.wipe:
+            import_call += f"{wipe_cmd}true "
+        if self.skip_bad_relationships:
+            import_call += "--skip-bad-relationships=true "
+        if self.skip_duplicate_nodes:
+            import_call += "--skip-duplicate-nodes=true "
+
+        # append node import calls
+        for header_path, parts_path in self.import_call_nodes:
+            import_call += f'--nodes="{header_path},{parts_path}" '
+
+        # append edge import calls
+        for header_path, parts_path in self.import_call_edges:
+            import_call += f'--relationships="{header_path},{parts_path}" '
+
+        return import_call
+
+
+
+
+    def adapt_csv_to_new_header(self, old_header, new_header, csv_file_path):
+        """
+        Adapt a CSV table to a new header structure, placing new columns in their correct positions.
+        
+        Parameters:
+        old_header (list): The original header columns
+        new_header (list): The new header columns
+        csv_file_path (str): Path to the CSV file
+        
+        Returns:
+        pandas.DataFrame: CSV data with the new header structure
+        """
+        
+        # Step 1: Read the CSV data without headers
+        df = pd.read_csv(csv_file_path, sep=self.delim, header=None)
+        
+        # Step 2: If the file is empty, return empty DataFrame with new headers
+        if df.empty:
+            return pd.DataFrame(columns=new_header)
+        
+        # Step 3: If column count doesn't match old_header length, handle the mismatch
+        if len(df.columns) != len(old_header):
+            print(f"Warning: CSV columns count ({len(df.columns)}) doesn't match the provided old header count ({len(old_header)})")
+            # If file has fewer columns than old_header, pad with NaN
+            if len(df.columns) < len(old_header):
+                for i in range(len(df.columns), len(old_header)):
+                    df[i] = None
+            # If file has more columns than old_header, truncate
+            else:
+                df = df.iloc[:, :len(old_header)]
+        
+        # Step 4: Assign old header names to the dataframe
+        df.columns = old_header
+        
+        # Step 5: Create a new DataFrame with the correct structure
+        new_df = pd.DataFrame(columns=new_header)
+        
+        # Step 6: For each column in the new header, find its position in the old header
+        for new_col_idx, new_col in enumerate(new_header):
+            if new_col in old_header:
+                # If column exists in old header, copy data
+                new_df[new_col] = df[new_col]
+            else:
+                # If new column, add empty column
+                new_df[new_col] = None
+        
+        # Step 7: Ensure columns are in the exact order of new_header
+        new_df = new_df[new_header]
+        
+        return new_df
--- a/biocypher/output/write/graph/_networkx.py
+++ b/biocypher/output/write/graph/_networkx.py
@ -0,0 +1,76 @@
+import pickle
+
+import networkx as nx
+
+from biocypher._logger import logger
+from biocypher.output.write._writer import _Writer
+from biocypher.output.write.relational._csv import _PandasCSVWriter
+
+
+class _NetworkXWriter(_Writer):
+    """
+    Class for writing node and edges to a networkx DiGraph.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.csv_writer = _PandasCSVWriter(*args, write_to_file=False, **kwargs)
+        self.G = nx.DiGraph()
+
+    def _construct_import_call(self) -> str:
+        """Function to construct the Python code to load all node and edge csv files again into Pandas dfs.
+
+        Returns:
+            str: Python code to load the csv files into Pandas dfs.
+        """
+        logger.info(
+            f"Writing networkx {self.G} to pickle file networkx_graph.pkl."
+        )
+        with open(f"{self.output_directory}/networkx_graph.pkl", "wb") as f:
+            pickle.dump(self.G, f)
+
+        import_call = "import pickle\n"
+        import_call += "with open('./networkx_graph.pkl', 'rb') as f:\n\tG_loaded = pickle.load(f)"
+        return import_call
+
+    def _get_import_script_name(self) -> str:
+        """Function to return the name of the import script."""
+        return "import_networkx.py"
+
+    def _write_node_data(self, nodes) -> bool:
+        passed = self.csv_writer._write_entities_to_file(nodes)
+        self.add_to_networkx()
+        return passed
+
+    def _write_edge_data(self, edges) -> bool:
+        passed = self.csv_writer._write_entities_to_file(edges)
+        self.add_to_networkx()
+        return passed
+
+    def add_to_networkx(self) -> bool:
+        all_dfs = self.csv_writer.stored_dfs
+        node_dfs = [
+            df
+            for df in all_dfs.values()
+            if df.columns.str.contains("node_id").any()
+        ]
+        edge_dfs = [
+            df
+            for df in all_dfs.values()
+            if df.columns.str.contains("source_id").any()
+            and df.columns.str.contains("target_id").any()
+        ]
+        for df in node_dfs:
+            nodes = df.set_index("node_id").to_dict(orient="index")
+            self.G.add_nodes_from(nodes.items())
+        for df in edge_dfs:
+            edges = df.set_index(["source_id", "target_id"]).to_dict(
+                orient="index"
+            )
+            self.G.add_edges_from(
+                (
+                    (source, target, attrs)
+                    for (source, target), attrs in edges.items()
+                )
+            )
+        return True
--- a/biocypher/output/write/graph/_rdf.py
+++ b/biocypher/output/write/graph/_rdf.py
@ -0,0 +1,515 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2021, Heidelberg University Clinic
+#
+# File author(s):  Loes van den Biggelaar
+#                  Sebastian Lobentanzer
+#
+# Distributed under MIT licence, see the file `LICENSE`.
+#
+"""
+BioCypher 'offline' module. Handles the writing of node and edge representations
+suitable for import into a DBMS.
+"""
+from types import GeneratorType
+from typing import Union
+import os
+
+from rdflib import DC, RDF, RDFS, SKOS, DCTERMS, Graph, Literal, Namespace
+from rdflib.namespace import (
+    _NAMESPACE_PREFIXES_CORE,
+    _NAMESPACE_PREFIXES_RDFLIB,
+)
+
+from biocypher._create import BioCypherEdge, BioCypherNode
+from biocypher._logger import logger
+from biocypher.output.write._batch_writer import _BatchWriter
+
+
+class _RDFWriter(_BatchWriter):
+    """
+    Class to write BioCypher's property graph into an RDF format using
+    rdflib and all the extensions it supports (RDF/XML, N3, NTriples,
+    N-Quads, Turtle, TriX, Trig and JSON-LD). By default the conversion
+    is done keeping only the minimum information about node and edges,
+    skipping all properties.
+    """
+
+    def _get_import_script_name(self) -> str:
+        """
+        Returns the name of the RDF admin import script.
+        This function applicable for RDF export.
+
+        Returns:
+            str: The name of the import script (ending in .sh)
+        """
+        return "rdf-import-call.sh"
+
+    def _get_default_import_call_bin_prefix(self):
+        """
+        Method to provide the default string for the import call bin prefix.
+
+        Returns:
+            str: The default location for the RDF admin import location
+        """
+        return "bin/"
+
+    def _is_rdf_format_supported(self, rdf_format: str) -> bool:
+        """
+        Function to check if the specified RDF format is supported.
+
+        Args:
+            rdf_format (str): The RDF format to check.
+
+        Returns:
+            bool: Returns True if rdf format supported, False otherwise.
+        """
+        supported_formats = [
+            "xml",
+            "n3",
+            "turtle",
+            "nt",
+            "pretty-xml",
+            "trix",
+            "trig",
+            "nquads",
+            "json-ld",
+        ]
+        if rdf_format not in supported_formats:
+            logger.error(
+                f"{rdf_format}; Incorrect or unsupported RDF format, use one of the following: "
+                f'"xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig", "nquads", "json-ld" ',
+            )
+            return False
+        else:
+            # RDF graph does not support 'ttl' format, only 'turtle' format. however, the preferred file extension is always '.ttl'
+            if self.rdf_format == "turtle":
+                self.extension = "ttl"
+            elif self.rdf_format == "ttl":
+                self.rdf_format = "turtle"
+                self.extension = "ttl"
+            else:
+                self.extension = self.rdf_format
+            return True
+
+    def _write_single_edge_list_to_file(
+        self,
+        edge_list: list,
+        label: str,
+        prop_dict: dict,
+    ):
+        """
+        This function takes one list of biocypher edges and writes them
+        to an RDF file with the given format.
+
+        Args:
+            edge_list (list): list of BioCypherEdges to be written
+
+            label (str): the label (type) of the edge
+
+            prop_dict (dict): properties of node class passed from parsing
+                function and their types
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+
+        if not all(isinstance(n, BioCypherEdge) for n in edge_list):
+            logger.error("Edges must be passed as type BioCypherEdge.")
+            return False
+
+        # translate label to PascalCase
+        label_pascal = self.translator.name_sentence_to_pascal(label)
+
+        # create file name
+        file_name = os.path.join(
+            self.outdir, f"{label_pascal}.{self.extension}"
+        )
+
+        # write data in graph
+        graph = Graph()
+        self._init_namespaces(graph)
+
+        for edge in edge_list:
+            rdf_subject = edge.get_source_id()
+            rdf_object = edge.get_target_id()
+            rdf_predicate = edge.get_id()
+            rdf_properties = edge.get_properties()
+            if rdf_predicate == None:
+                rdf_predicate = rdf_subject + rdf_object
+
+            edge_label = self.translator.name_sentence_to_pascal(
+                edge.get_label()
+            )
+            edge_uri = self.rdf_namespaces["biocypher"][edge_label]
+            graph.add((edge_uri, RDF.type, RDFS.Class))
+            graph.add(
+                (
+                    self.rdf_namespaces["biocypher"][rdf_predicate],
+                    RDF.type,
+                    edge_uri,
+                )
+            )
+            graph.add(
+                (
+                    self.rdf_namespaces["biocypher"][rdf_predicate],
+                    self.rdf_namespaces["biocypher"]["subject"],
+                    self.subject_to_uri(rdf_subject),
+                )
+            )
+            graph.add(
+                (
+                    self.rdf_namespaces["biocypher"][rdf_predicate],
+                    self.rdf_namespaces["biocypher"]["object"],
+                    self.subject_to_uri(rdf_object),
+                )
+            )
+
+            # add properties to the transformed edge --> node
+            for key, value in rdf_properties.items():
+                # only write value if it exists.
+                if value:
+                    self.add_property_to_graph(graph, rdf_predicate, value, key)
+
+        graph.serialize(destination=file_name, format=self.rdf_format)
+
+        logger.info(
+            f"Writing {len(edge_list)} entries to {label_pascal}.{self.rdf_format}",
+        )
+
+        return True
+
+    def add_property_to_graph(
+        self,
+        graph: Graph,
+        rdf_subject: str,
+        rdf_object: str,
+        rdf_predicate: str,
+    ):
+        """
+        Function to add the properties to an RDF node. It takes the graph, the subject, object, and predicate of the RDF triple.
+        It checks if the property is a list and adds it to the graph accordingly. otherwise it checks if the string represents a list.
+        If it does, it transforms it to a list and adds it to the graph. if not, it adds the property to the graph as a literal.
+        If the property is neither a list or string, it will also be added as a literal.
+
+        Args:
+            graph (RDFLib.Graph): The RDF graph to add the nodes to.
+
+            rdf_subject (str): The subject of the RDF triple.
+
+            rdf_object (str): The object of the RDF triple.
+
+            rdf_predicate (str): The predicate of the RDF triple.
+
+        Returns:
+            None
+        """
+        if isinstance(rdf_object, list):
+            for obj in rdf_object:
+                graph.add(
+                    (
+                        self.subject_to_uri(rdf_subject),
+                        self.property_to_uri(rdf_predicate),
+                        Literal(obj),
+                    )
+                )
+        elif isinstance(rdf_object, str):
+            if rdf_object.startswith("[") and rdf_object.endswith("]"):
+                self.add_property_to_graph(
+                    graph,
+                    rdf_subject,
+                    self.transform_string_to_list(rdf_object),
+                    rdf_predicate,
+                )
+            else:
+                graph.add(
+                    (
+                        self.subject_to_uri(rdf_subject),
+                        self.property_to_uri(rdf_predicate),
+                        Literal(rdf_object),
+                    )
+                )
+        else:
+            graph.add(
+                (
+                    self.subject_to_uri(rdf_subject),
+                    self.property_to_uri(rdf_predicate),
+                    Literal(rdf_object),
+                )
+            )
+
+    def transform_string_to_list(self, string_list: str) -> list:
+        """
+        Function to transform a string representation of a list into a list.
+
+        Args:
+            string_list (str): The string representation of the list.
+
+        Returns:
+            list: The list representation of the input string.
+        """
+        return (
+            string_list.replace("[", "")
+            .replace("]", "")
+            .replace("'", "")
+            .split(", ")
+        )
+
+    def _write_single_node_list_to_file(
+        self,
+        node_list: list,
+        label: str,
+        prop_dict: dict,
+        labels: str,
+    ):
+        """
+        This function takes a list of BioCypherNodes and writes them
+        to an RDF file in the specified format.
+
+        Args:
+            node_list (list): A list of BioCypherNodes to be written.
+
+            label (str): The label (type) of the nodes.
+
+            prop_dict (dict): A dictionary of properties and their types for the node class.
+
+        Returns:
+            bool: True if the writing is successful, False otherwise.
+        """
+        if not all(isinstance(n, BioCypherNode) for n in node_list):
+            logger.error("Nodes must be passed as type BioCypherNode.")
+            return False
+
+        # translate label to PascalCase
+        label_pascal = self.translator.name_sentence_to_pascal(label)
+
+        # create file name
+        file_name = os.path.join(
+            self.outdir, f"{label_pascal}.{self.extension}"
+        )
+
+        # write data in graph
+        graph = Graph()
+        self._init_namespaces(graph)
+
+        for n in node_list:
+            rdf_subject = n.get_id()
+            rdf_object = n.get_label()
+            properties = n.get_properties()
+            class_name = self.translator.name_sentence_to_pascal(rdf_object)
+            graph.add(
+                (
+                    self.rdf_namespaces["biocypher"][class_name],
+                    RDF.type,
+                    RDFS.Class,
+                )
+            )
+            graph.add(
+                (
+                    self.subject_to_uri(rdf_subject),
+                    RDF.type,
+                    self.rdf_namespaces["biocypher"][class_name],
+                )
+            )
+            for key, value in properties.items():
+                # only write value if it exists.
+                if value:
+                    self.add_property_to_graph(graph, rdf_subject, value, key)
+
+        graph.serialize(destination=file_name, format=self.rdf_format)
+
+        logger.info(
+            f"Writing {len(node_list)} entries to {label_pascal}.{self.rdf_format}",
+        )
+
+        return True
+
+    def write_nodes(
+        self, nodes, batch_size: int = int(1e6), force: bool = False
+    ) -> bool:
+        """
+        Wrapper for writing nodes in RDF format. It calls the _write_node_data() function, specifying the node data.
+
+        Args:
+            nodes (list or generator): A list or generator of nodes in BioCypherNode format.
+            batch_size (int): The number of nodes to write in each batch.
+            force (bool): Flag to force the writing even if the output file already exists.
+
+        Returns:
+            bool: True if the writing is successful, False otherwise.
+        """
+        # check if specified output format is correct
+        passed = self._is_rdf_format_supported(self.rdf_format)
+        if not passed:
+            logger.error("Error while writing node data, wrong RDF format")
+            return False
+        # write node data using _write_node_data method
+        passed = self._write_node_data(nodes, batch_size, force)
+        if not passed:
+            logger.error("Error while writing node data.")
+            return False
+        return True
+
+    def write_edges(
+        self,
+        edges: Union[list, GeneratorType],
+        batch_size: int = int(1e6),
+    ) -> bool:
+        """
+        Wrapper for writing edges in RDF format. It calls _write_edge_data()
+        functions specifying it's edge data.
+
+        Args:
+            edges (BioCypherEdge): a list or generator of edges in
+                :py:class:`BioCypherEdge` format
+            batch_size (int): The number of edges to write in each batch.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # check if specified output format is correct
+        passed = self._is_rdf_format_supported(self.rdf_format)
+        if not passed:
+            logger.error("Error while writing edge data, wrong RDF format")
+            return False
+        # write edge data using _write_edge_data method
+        passed = self._write_edge_data(edges, batch_size=batch_size)
+        if not passed:
+            logger.error("Error while writing edge data.")
+            return False
+
+        return True
+
+    def _construct_import_call(self) -> bool:
+        """
+        Function to write the import call.
+        This function is not applicable for RDF.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        return ""
+
+    def _write_array_string(self, string_list):
+        """
+        Abstract method to write the string representation of an array into a .csv file
+        as required by the RDF admin-import.
+        This function is not applicable for RDF.
+
+        Args:
+            string_list (list): list of ontology strings
+
+        Returns:
+            str: The string representation of an array for the neo4j admin import
+        """
+
+        return True
+
+    def _write_node_headers(self):
+        """
+        Abstract method that takes care of importing properties of a graph entity that is represented
+        as a node as per the definition in the `schema_config.yaml`
+        This function is not applicable for RDF.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        return True
+
+    def _write_edge_headers(self):
+        """
+        Abstract method to write a database import-file for a graph entity that is represented
+        as an edge as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of edge.
+        This function is not applicable for RDF.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        return True
+
+    def subject_to_uri(self, subject: str) -> str:
+        """
+        Converts the subject to a proper URI using the available namespaces.
+        If the conversion fails, it defaults to the biocypher prefix.
+
+        Args:
+            subject (str): The subject to be converted to a URI.
+
+        Returns:
+            str: The corresponding URI for the subject.
+        """
+        try:
+            _pref, _id = subject.split(":")
+
+            if _pref in self.rdf_namespaces.keys():
+                return self.rdf_namespaces[_pref][_id]
+            else:
+                return self.rdf_namespaces["biocypher"][subject]
+        except ValueError:
+            return self.rdf_namespaces["biocypher"][subject]
+
+    def property_to_uri(self, property_name: str) -> dict[str, str]:
+        """
+        Converts a property name to its corresponding URI.
+
+        This function takes a property name and searches for its corresponding URI in various namespaces.
+        It first checks the core namespaces for rdflib, including owl, rdf, rdfs, xsd, and xml.
+
+        Args:
+            property_name (str): The property name to be converted to a URI.
+
+        Returns:
+            str: The corresponding URI for the input property name.
+        """
+        # These namespaces are core for rdflib; owl, rdf, rdfs, xsd and xml
+        for namespace in _NAMESPACE_PREFIXES_CORE.values():
+            if property_name in namespace:
+                return namespace[property_name]
+
+        # If the property name is not found in the core namespaces, search in the SKOS, DC, and DCTERMS namespaces
+        for namespace in [SKOS, DC, DCTERMS]:
+            if property_name in namespace:
+                return namespace[property_name]
+
+        # If the property name is still not found, try other namespaces from rdflib.
+        for namespace in _NAMESPACE_PREFIXES_RDFLIB.values():
+            if property_name in namespace:
+                return namespace[property_name]
+
+        # If the property name is "licence", it recursively calls the function with "license" as the input.
+        if property_name == "licence":
+            return self.property_to_uri("license")
+
+        # TODO: add an option to search trough manually implemented namespaces
+
+        # If the input is not found in any of the namespaces, it returns the corresponding URI from the biocypher namespace.
+        # TODO: give a warning and try to prevent this option altogether
+        return self.rdf_namespaces["biocypher"][property_name]
+
+    def _init_namespaces(self, graph: Graph):
+        """
+        Initializes the namespaces for the RDF graph. These namespaces are used to convert nodes to URIs.
+
+        This function adds the biocypher standard namespace to the `rdf_namespaces` attribute of the class.
+        If `rdf_namespaces` is empty, it sets it to the biocypher standard namespace. Otherwise, it merges
+        the biocypher standard namespace with the namespaces defined in the biocypher_config.yaml.
+
+        Args:
+            graph (RDFLib.Graph): The RDF graph to bind the namespaces to.
+
+        Returns:
+            None
+        """
+        # add biocypher standard to self.rdf_namespaces
+        biocypher_standard = {"biocypher": "https://biocypher.org/biocypher#"}
+        if not self.rdf_namespaces:
+            self.rdf_namespaces = biocypher_standard
+        else:
+            self.rdf_namespaces = self.rdf_namespaces | biocypher_standard
+
+        for key, value in self.rdf_namespaces.items():
+            namespace = Namespace(value)
+            self.rdf_namespaces[key] = namespace
+            graph.bind(key, namespace)
--- a/biocypher/output/write/relational/init.py
+++ b/biocypher/output/write/relational/init.py
--- a/biocypher/output/write/relational/_csv.py
+++ b/biocypher/output/write/relational/_csv.py
@ -0,0 +1,76 @@
+from more_itertools import peekable
+
+from biocypher._logger import logger
+from biocypher.output.write._writer import _Writer
+from biocypher.output.in_memory._pandas import Pandas
+
+
+class _PandasCSVWriter(_Writer):
+    """
+    Class for writing node and edge representations to a CSV file.
+    """
+
+    def __init__(self, *args, write_to_file: bool = True, **kwargs):
+        kwargs["write_to_file"] = write_to_file
+        super().__init__(*args, **kwargs)
+        self.in_memory_dfs = {}
+        self.stored_dfs = {}
+        self.pandas_in_memory = Pandas(
+            translator=self.translator,
+            deduplicator=self.deduplicator,
+        )
+        self.delimiter = kwargs.get("delimiter")
+        if not self.delimiter:
+            self.delimiter = ","
+        self.write_to_file = write_to_file
+
+    def _construct_import_call(self) -> str:
+        """Function to construct the Python code to load all node and edge csv files again into Pandas dfs.
+
+        Returns:
+            str: Python code to load the csv files into Pandas dfs.
+        """
+        import_call = "import pandas as pd\n\n"
+        for df_name in self.stored_dfs.keys():
+            import_call += f"{df_name} = pd.read_csv('./{df_name}.csv', header=0, index_col=0)\n"
+        return import_call
+
+    def _get_import_script_name(self) -> str:
+        """Function to return the name of the import script."""
+        return "import_pandas_csv.py"
+
+    def _write_node_data(self, nodes) -> bool:
+        passed = self._write_entities_to_file(nodes)
+        return passed
+
+    def _write_edge_data(self, edges) -> bool:
+        passed = self._write_entities_to_file(edges)
+        return passed
+
+    def _write_entities_to_file(self, entities: iter) -> bool:
+        """Function to output.write the entities to a CSV file.
+
+        Args:
+            entities (iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
+        """
+        entities = peekable(entities)
+        entity_list = self.pandas_in_memory._separate_entity_types(entities)
+        for entity_type, entities in entity_list.items():
+            self.in_memory_dfs[
+                entity_type
+            ] = self.pandas_in_memory._add_entity_df(entity_type, entities)
+        for entity_type in self.in_memory_dfs.keys():
+            entity_df = self.in_memory_dfs[entity_type]
+            if " " in entity_type or "." in entity_type:
+                entity_type = entity_type.replace(" ", "_").replace(".", "_")
+            if self.write_to_file:
+                logger.info(
+                    f"Writing {entity_df.shape[0]} entries to {entity_type}.csv."
+                )
+                entity_df.to_csv(
+                    f"{self.output_directory}/{entity_type}.csv",
+                    sep=self.delimiter,
+                )
+            self.stored_dfs[entity_type] = entity_df
+        self.in_memory_dfs = {}
+        return True
--- a/biocypher/output/write/relational/_postgresql.py
+++ b/biocypher/output/write/relational/_postgresql.py
@ -0,0 +1,320 @@
+import os
+import glob
+
+from biocypher._logger import logger
+from biocypher.output.write._batch_writer import _BatchWriter
+
+
+class _PostgreSQLBatchWriter(_BatchWriter):
+    """
+    Class for writing node and edge representations to disk using the
+    format specified by PostgreSQL for the use of "COPY FROM...". Each batch
+    writer instance has a fixed representation that needs to be passed
+    at instantiation via the :py:attr:`schema` argument. The instance
+    also expects an ontology adapter via :py:attr:`ontology_adapter` to be able
+    to convert and extend the hierarchy.
+
+    This class inherits from the abstract class "_BatchWriter" and implements the
+    PostgreSQL-specific methods:
+
+        - _write_node_headers
+        - _write_edge_headers
+        - _construct_import_call
+        - _write_array_string
+    """
+
+    DATA_TYPE_LOOKUP = {
+        "str": "VARCHAR",  # VARCHAR needs limit
+        "int": "INTEGER",
+        "long": "BIGINT",
+        "float": "NUMERIC",
+        "double": "NUMERIC",
+        "dbl": "NUMERIC",
+        "boolean": "BOOLEAN",
+        "str[]": "VARCHAR[]",
+        "string[]": "VARCHAR[]",
+    }
+
+    def __init__(self, *args, **kwargs):
+        self._copy_from_csv_commands = set()
+        super().__init__(*args, **kwargs)
+
+    def _get_default_import_call_bin_prefix(self):
+        """
+        Method to provide the default string for the import call bin prefix.
+
+        Returns:
+            str: The default location for the psql command
+        """
+        return ""
+
+    def _get_data_type(self, string) -> str:
+        try:
+            return self.DATA_TYPE_LOOKUP[string]
+        except KeyError:
+            logger.info(
+                'Could not determine data type {string}. Using default "VARCHAR"'
+            )
+            return "VARCHAR"
+
+    def _write_array_string(self, string_list) -> str:
+        """
+        Abstract method to output.write the string representation of an array into a .csv file
+        as required by the postgresql COPY command, with '{','}' brackets and ',' separation.
+
+        Args:
+            string_list (list): list of ontology strings
+
+        Returns:
+            str: The string representation of an array for postgres COPY
+        """
+        string = ",".join(string_list)
+        string = f'"{{{string}}}"'
+        return string
+
+    def _get_import_script_name(self) -> str:
+        """
+        Returns the name of the psql import script
+
+        Returns:
+            str: The name of the import script (ending in .sh)
+        """
+        return f"{self.db_name}-import-call.sh"
+
+    def _adjust_pascal_to_psql(self, string):
+        string = string.replace(".", "_")
+        string = string.lower()
+        return string
+
+    def _write_node_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as a node as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of node.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.node_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.node_property_dict.items():
+            # create header CSV with ID, properties, labels
+
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(label)
+
+            parts = f"{pascal_label}-part*.csv"
+            parts_paths = os.path.join(self.outdir, parts)
+            parts_paths = glob.glob(parts_paths)
+            parts_paths.sort()
+
+            # adjust label for import to psql
+            pascal_label = self._adjust_pascal_to_psql(pascal_label)
+            table_create_command_path = os.path.join(
+                self.outdir,
+                f"{pascal_label}-create_table.sql",
+            )
+
+            # check if file already exists
+            if os.path.exists(table_create_command_path):
+                logger.warning(
+                    f"File {table_create_command_path} already exists. Overwriting.",
+                )
+
+            # concatenate key:value in props
+            columns = ["_ID VARCHAR"]
+            for col_name, col_type in props.items():
+                col_type = self._get_data_type(col_type)
+                col_name = self._adjust_pascal_to_psql(col_name)
+                columns.append(f"{col_name} {col_type}")
+            columns.append("_LABEL VARCHAR[]")
+
+            with open(table_create_command_path, "w", encoding="utf-8") as f:
+                command = ""
+                if self.wipe:
+                    command += f"DROP TABLE IF EXISTS {pascal_label};\n"
+
+                # table creation requires comma separation
+                command += (
+                    f'CREATE TABLE {pascal_label}({",".join(columns)});\n'
+                )
+                f.write(command)
+
+                for parts_path in parts_paths:
+                    # if import_call_file_prefix is set, replace actual path
+                    # with prefix
+                    if self.import_call_file_prefix != self.outdir:
+                        parts_path = parts_path.replace(
+                            self.outdir,
+                            self.import_call_file_prefix,
+                        )
+
+                    self._copy_from_csv_commands.add(
+                        f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;"
+                    )
+
+            # add file path to import statement
+            # if import_call_file_prefix is set, replace actual path
+            # with prefix
+            if self.import_call_file_prefix != self.outdir:
+                table_create_command_path = table_create_command_path.replace(
+                    self.outdir,
+                    self.import_call_file_prefix,
+                )
+
+            self.import_call_nodes.add(table_create_command_path)
+
+        return True
+
+    def _write_edge_headers(self):
+        """
+        Writes single CSV file for a graph entity that is represented
+        as an edge as per the definition in the `schema_config.yaml`,
+        containing only the header for this type of edge.
+
+        Returns:
+            bool: The return value. True for success, False otherwise.
+        """
+        # load headers from data parse
+        if not self.edge_property_dict:
+            logger.error(
+                "Header information not found. Was the data parsed first?",
+            )
+            return False
+
+        for label, props in self.edge_property_dict.items():
+            # translate label to PascalCase
+            pascal_label = self.translator.name_sentence_to_pascal(label)
+
+            parts_paths = os.path.join(self.outdir, f"{pascal_label}-part*.csv")
+            parts_paths = glob.glob(parts_paths)
+            parts_paths.sort()
+
+            # adjust label for import to psql
+            pascal_label = self._adjust_pascal_to_psql(pascal_label)
+            table_create_command_path = os.path.join(
+                self.outdir,
+                f"{pascal_label}-create_table.sql",
+            )
+
+            # check for file exists
+            if os.path.exists(table_create_command_path):
+                logger.warning(
+                    f"File {table_create_command_path} already exists. Overwriting.",
+                )
+
+            # concatenate key:value in props
+            columns = []
+            for col_name, col_type in props.items():
+                col_type = self._get_data_type(col_type)
+                col_name = self._adjust_pascal_to_psql(col_name)
+                if col_name == "_ID":
+                    # should ideally never happen
+                    raise ValueError(
+                        "Column name '_ID' is reserved for internal use, "
+                        "denoting the relationship ID. Please choose a "
+                        "different name for your column."
+                    )
+
+                columns.append(f"{col_name} {col_type}")
+
+            # create list of lists and flatten
+            # removes need for empty check of property list
+            out_list = [
+                "_START_ID VARCHAR",
+                "_ID VARCHAR",
+                *columns,
+                "_END_ID VARCHAR",
+                "_TYPE VARCHAR",
+            ]
+
+            with open(table_create_command_path, "w", encoding="utf-8") as f:
+                command = ""
+                if self.wipe:
+                    command += f"DROP TABLE IF EXISTS {pascal_label};\n"
+
+                # table creation requires comma separation
+                command += (
+                    f'CREATE TABLE {pascal_label}({",".join(out_list)});\n'
+                )
+                f.write(command)
+
+                for parts_path in parts_paths:
+                    # if import_call_file_prefix is set, replace actual path
+                    # with prefix
+                    if self.import_call_file_prefix != self.outdir:
+                        parts_path = parts_path.replace(
+                            self.outdir,
+                            self.import_call_file_prefix,
+                        )
+
+                    self._copy_from_csv_commands.add(
+                        f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;"
+                    )
+
+            # add file path to import statement
+            # if import_call_file_prefix is set, replace actual path
+            # with prefix
+            if self.import_call_file_prefix != self.outdir:
+                table_create_command_path = table_create_command_path.replace(
+                    self.outdir,
+                    self.import_call_file_prefix,
+                )
+
+            self.import_call_edges.add(table_create_command_path)
+
+        return True
+
+    def _construct_import_call(self) -> str:
+        """
+        Function to construct the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name. Built after all data has been
+        processed to ensure that nodes are called before any edges.
+
+        Returns:
+            str: a bash command for postgresql import
+        """
+        import_call = ""
+
+        # create tables
+        # At this point, csv files of nodes and edges do not require differentiation
+        for import_file_path in [
+            *self.import_call_nodes,
+            *self.import_call_edges,
+        ]:
+            import_call += f'echo "Setup {import_file_path}..."\n'
+            if {self.db_password}:
+                # set password variable inline
+                import_call += f"PGPASSWORD={self.db_password} "
+            import_call += (
+                f"{self.import_call_bin_prefix}psql -f {import_file_path}"
+            )
+            import_call += f" --dbname {self.db_name}"
+            import_call += f" --host {self.db_host}"
+            import_call += f" --port {self.db_port}"
+            import_call += f" --user {self.db_user}"
+            import_call += '\necho "Done!"\n'
+            import_call += "\n"
+
+        # copy data to tables
+        for command in self._copy_from_csv_commands:
+            table_part = command.split(" ")[3]
+            import_call += f'echo "Importing {table_part}..."\n'
+            if {self.db_password}:
+                # set password variable inline
+                import_call += f"PGPASSWORD={self.db_password} "
+            import_call += f'{self.import_call_bin_prefix}psql -c "{command}"'
+            import_call += f" --dbname {self.db_name}"
+            import_call += f" --host {self.db_host}"
+            import_call += f" --port {self.db_port}"
+            import_call += f" --user {self.db_user}"
+            import_call += '\necho "Done!"\n'
+            import_call += "\n"
+
+        return import_call
--- a/biocypher/output/write/relational/_sqlite.py
+++ b/biocypher/output/write/relational/_sqlite.py
@ -0,0 +1,51 @@
+from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
+
+
+class _SQLiteBatchWriter(_PostgreSQLBatchWriter):
+    """
+    Class for writing node and edge representations to a SQLite database.
+    It uses the _PostgreSQLBatchWriter class under the hood, which already
+    implements the logic to write the nodes/edges to a relational DBMS.
+    Only the import bash script differs between PostgreSQL and SQLite
+    and is therefore implemented in this class.
+
+    - _construct_import_call
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def _construct_import_call(self) -> str:
+        """
+        Function to construct the import call detailing folder and
+        individual node and edge headers and data files, as well as
+        delimiters and database name. Built after all data has been
+        processed to ensure that nodes are called before any edges.
+
+        Returns:
+            str: a bash command for sqlite import
+        """
+        import_call = ""
+
+        # create tables
+        # At this point, csv files of nodes and edges do not require differentiation
+        for import_file_path in [
+            *self.import_call_nodes,
+            *self.import_call_edges,
+        ]:
+            import_call += f'echo "Setup {import_file_path}..."\n'
+            import_call += f"{self.import_call_bin_prefix}sqlite3 {self.db_name} < {import_file_path}"
+            import_call += '\necho "Done!"\n'
+            import_call += "\n"
+
+        for command in self._copy_from_csv_commands:
+            table_name = command.split(" ")[1]
+            table_part = command.split(" ")[3].replace("'", "")
+            import_call += f'echo "Importing {table_part}..."\n'
+            separator = self.delim
+            import_part = f".import {table_part} {table_name}"
+            import_call += f"{self.import_call_bin_prefix}sqlite3 -separator $'{separator}' {self.db_name} \"{import_part}\""
+            import_call += '\necho "Done!"\n'
+            import_call += "\n"
+
+        return import_call