release commit
This commit is contained in:
148
biocypher/_config/__init__.py
Normal file
148
biocypher/_config/__init__.py
Normal file
@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
#
|
||||
# Copyright 2021, Heidelberg University Clinic
|
||||
#
|
||||
# File author(s): Sebastian Lobentanzer
|
||||
# ...
|
||||
#
|
||||
# Distributed under MIT licence, see the file `LICENSE`.
|
||||
#
|
||||
"""
|
||||
Module data directory, including:
|
||||
|
||||
* The BioLink database schema
|
||||
* The default config files
|
||||
"""
|
||||
|
||||
from typing import Any, Optional
|
||||
import os
|
||||
import warnings
|
||||
|
||||
import yaml
|
||||
import appdirs
|
||||
|
||||
__all__ = ["module_data", "module_data_path", "read_config", "config", "reset"]
|
||||
|
||||
_USER_CONFIG_DIR = appdirs.user_config_dir("biocypher", "saezlab")
|
||||
_USER_CONFIG_FILE = os.path.join(_USER_CONFIG_DIR, "conf.yaml")
|
||||
|
||||
|
||||
class MyLoader(yaml.SafeLoader):
|
||||
def construct_scalar(self, node):
|
||||
# Check if the scalar contains double quotes and an escape sequence
|
||||
value = super().construct_scalar(node)
|
||||
q = bool(node.style == '"')
|
||||
b = bool("\\" in value.encode("unicode_escape").decode("utf-8"))
|
||||
if q and b:
|
||||
warnings.warn(
|
||||
(
|
||||
"Double quotes detected in YAML configuration scalar: "
|
||||
f"{value.encode('unicode_escape')}. "
|
||||
"These allow escape sequences and may cause problems, for "
|
||||
"instance with the Neo4j admin import files (e.g. '\\t'). "
|
||||
"Make sure you wanted to do this, and use single quotes "
|
||||
"whenever possible."
|
||||
),
|
||||
category=UserWarning,
|
||||
)
|
||||
return value
|
||||
|
||||
|
||||
def module_data_path(name: str) -> str:
|
||||
"""
|
||||
Absolute path to a YAML file shipped with the module.
|
||||
"""
|
||||
|
||||
here = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
return os.path.join(here, f"{name}.yaml")
|
||||
|
||||
|
||||
def module_data(name: str) -> Any:
|
||||
"""
|
||||
Retrieve the contents of a YAML file shipped with this module.
|
||||
"""
|
||||
|
||||
path = module_data_path(name)
|
||||
|
||||
return _read_yaml(path)
|
||||
|
||||
|
||||
def _read_yaml(path: str) -> Optional[dict]:
|
||||
if os.path.exists(path):
|
||||
with open(path, "r") as fp:
|
||||
return yaml.load(fp.read(), Loader=MyLoader)
|
||||
|
||||
|
||||
def read_config() -> dict:
|
||||
"""
|
||||
Read the module config.
|
||||
|
||||
Read and merge the built-in default, the user level and directory level
|
||||
configuration, with the later taking precendence over the former.
|
||||
|
||||
TODO explain path configuration
|
||||
"""
|
||||
|
||||
defaults = module_data("biocypher_config")
|
||||
user = _read_yaml(_USER_CONFIG_FILE) or {}
|
||||
# TODO account for .yml?
|
||||
local = (
|
||||
_read_yaml("biocypher_config.yaml")
|
||||
or _read_yaml("config/biocypher_config.yaml")
|
||||
or {}
|
||||
)
|
||||
|
||||
for key in defaults:
|
||||
value = (
|
||||
local[key] if key in local else user[key] if key in user else None
|
||||
)
|
||||
|
||||
if value is not None:
|
||||
if isinstance(
|
||||
defaults[key], str
|
||||
): # first level config (like title)
|
||||
defaults[key] = value
|
||||
else:
|
||||
defaults[key].update(value)
|
||||
|
||||
return defaults
|
||||
|
||||
|
||||
def config(*args, **kwargs) -> Optional[Any]:
|
||||
"""
|
||||
Set or get module config parameters.
|
||||
"""
|
||||
|
||||
if args and kwargs:
|
||||
raise ValueError(
|
||||
"Setting and getting values in the same call is not allowed.",
|
||||
)
|
||||
|
||||
if args:
|
||||
result = tuple(globals()["_config"].get(key, None) for key in args)
|
||||
|
||||
return result[0] if len(result) == 1 else result
|
||||
|
||||
for key, value in kwargs.items():
|
||||
globals()["_config"][key].update(value)
|
||||
|
||||
|
||||
def reset():
|
||||
"""
|
||||
Reload configuration from the config files.
|
||||
"""
|
||||
|
||||
globals()["_config"] = read_config()
|
||||
|
||||
|
||||
reset()
|
||||
|
||||
|
||||
def update_from_file(path: str):
|
||||
"""
|
||||
Update the module configuration from a YAML file.
|
||||
"""
|
||||
|
||||
config(**_read_yaml(path))
|
141
biocypher/_config/biocypher_config.yaml
Normal file
141
biocypher/_config/biocypher_config.yaml
Normal file
@ -0,0 +1,141 @@
|
||||
Title: BioCypher python module configuration file
|
||||
|
||||
## Some options are not used by default. Uncomment them to use them.
|
||||
|
||||
biocypher:
|
||||
### Required parameters ###
|
||||
## DBMS type
|
||||
|
||||
dbms: neo4j
|
||||
|
||||
## Schema configuration
|
||||
|
||||
# schema_config_path: config/schema_config.yaml
|
||||
|
||||
## Offline mode: do not connect to a running DBMS instance
|
||||
## Can be used e.g. for writing batch import files
|
||||
|
||||
offline: true
|
||||
|
||||
## Strict mode: do not allow to create new nodes or relationships without
|
||||
## specifying source, version, and license parameters
|
||||
|
||||
strict_mode: false
|
||||
|
||||
## Ontology configuration
|
||||
|
||||
head_ontology:
|
||||
url: https://github.com/biolink/biolink-model/raw/v3.2.1/biolink-model.owl.ttl
|
||||
root_node: entity
|
||||
# switch_label_and_id: true
|
||||
|
||||
### Optional parameters ###
|
||||
|
||||
## Logging
|
||||
# Write log to disk
|
||||
log_to_disk: true
|
||||
|
||||
# Activate more granular logging
|
||||
debug: true
|
||||
|
||||
# Change the log directory
|
||||
# log_directory: biocypher-log
|
||||
|
||||
## Data output directory
|
||||
# output_directory: biocypher-out
|
||||
|
||||
## Resource cache directory
|
||||
# cache_directory: .cache
|
||||
|
||||
## Optional tail ontologies
|
||||
|
||||
# tail_ontologies:
|
||||
# so:
|
||||
# url: test/ontologies/so.owl
|
||||
# head_join_node: sequence variant
|
||||
# tail_join_node: sequence_variant
|
||||
# switch_label_and_id: true
|
||||
# mondo:
|
||||
# url: test/ontologies/mondo.owl
|
||||
# head_join_node: disease
|
||||
# tail_join_node: disease
|
||||
# switch_label_and_id: true
|
||||
|
||||
### DBMS configuration ###
|
||||
|
||||
neo4j:
|
||||
### Neo4j configuration ###
|
||||
## Database name
|
||||
|
||||
database_name: neo4j
|
||||
|
||||
## Wipe DB before import (offline mode: --force)
|
||||
|
||||
wipe: true
|
||||
|
||||
## Neo4j authentication
|
||||
|
||||
uri: neo4j://localhost:7687
|
||||
user: neo4j
|
||||
password: neo4j
|
||||
|
||||
## Neo4j admin import batch writer settings
|
||||
|
||||
delimiter: ";"
|
||||
array_delimiter: "|"
|
||||
quote_character: "'"
|
||||
|
||||
## MultiDB functionality
|
||||
## Set to false for using community edition or older versions of Neo4j
|
||||
|
||||
multi_db: true
|
||||
|
||||
## Import options
|
||||
|
||||
skip_duplicate_nodes: false
|
||||
skip_bad_relationships: false
|
||||
|
||||
## Import call prefixes
|
||||
|
||||
# import_call_bin_prefix: bin/
|
||||
# import_call_file_prefix: path/to/files/
|
||||
|
||||
postgresql:
|
||||
### PostgreSQL configuration ###
|
||||
|
||||
# PostgreSQL connection credentials
|
||||
database_name: postgres # DB name
|
||||
user: postgres # user name
|
||||
password: postgres # password
|
||||
host: localhost # host
|
||||
port: 5432 # port
|
||||
|
||||
# PostgreSQL import batch writer settings
|
||||
quote_character: '"'
|
||||
delimiter: '\t'
|
||||
# import_call_bin_prefix: '' # path to "psql"
|
||||
# import_call_file_prefix: '/path/to/files'
|
||||
|
||||
rdf:
|
||||
### RDF configuration ###
|
||||
rdf_format: turtle
|
||||
|
||||
sqlite:
|
||||
### SQLite configuration ###
|
||||
|
||||
# SQLite connection credentials
|
||||
database_name: sqlite.db # DB name
|
||||
|
||||
# SQLite import batch writer settings
|
||||
quote_character: '"'
|
||||
delimiter: '\t'
|
||||
# import_call_bin_prefix: '' # path to "sqlite3"
|
||||
# import_call_file_prefix: '/path/to/files'
|
||||
|
||||
csv:
|
||||
### CSV/Pandas configuration ###
|
||||
delimiter: ","
|
||||
|
||||
networkx:
|
||||
### NetworkX configuration ###
|
||||
some_config: some_value # placeholder for technical reasons TODO
|
5
biocypher/_config/test_config.yaml
Normal file
5
biocypher/_config/test_config.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
# We test the quote detection
|
||||
|
||||
valid: 'This is a valid string'
|
||||
also_valid: "This is also a valid string"
|
||||
invalid: "\t"
|
140
biocypher/_config/test_schema_config.yaml
Normal file
140
biocypher/_config/test_schema_config.yaml
Normal file
@ -0,0 +1,140 @@
|
||||
Title: BioCypher graph schema configuration file
|
||||
|
||||
# ---
|
||||
# "Named Things"
|
||||
# ---
|
||||
|
||||
protein:
|
||||
represented_as: node
|
||||
preferred_id: uniprot
|
||||
input_label: protein
|
||||
db_collection_name: proteins
|
||||
properties:
|
||||
name: str
|
||||
score: float
|
||||
taxon: int
|
||||
genes: str[]
|
||||
|
||||
microRNA:
|
||||
represented_as: node
|
||||
preferred_id: mirbase.mature
|
||||
input_label: mirna
|
||||
|
||||
complex:
|
||||
synonym_for: macromolecular complex
|
||||
represented_as: node
|
||||
preferred_id: complexportal
|
||||
input_label: complex
|
||||
|
||||
pathway:
|
||||
represented_as: node
|
||||
preferred_id: [reactome, wikipathways]
|
||||
input_label: [reactome, wikipathways]
|
||||
|
||||
gene:
|
||||
represented_as: node
|
||||
preferred_id: hgnc
|
||||
input_label: [hgnc, ensg]
|
||||
exclude_properties: accession
|
||||
|
||||
disease:
|
||||
represented_as: node
|
||||
preferred_id: doid
|
||||
input_label: Disease
|
||||
|
||||
side effect:
|
||||
is_a: phenotypic feature
|
||||
represented_as: node
|
||||
preferred_id: sider.effect
|
||||
input_label: sider
|
||||
|
||||
sequence variant:
|
||||
represented_as: node
|
||||
preferred_id: [clinically relevant, known, somatic]
|
||||
input_label: [Clinically_relevant_variant, Known_variant, Somatic_mutation]
|
||||
properties:
|
||||
source: str
|
||||
original_source: str
|
||||
effect: str
|
||||
biotype: str
|
||||
|
||||
snRNA sequence:
|
||||
is_a: nucleic acid entity
|
||||
represented_as: node
|
||||
preferred_id: [intact, rnacentral]
|
||||
input_label: [intact_snrna, rnacentral_snrna]
|
||||
properties:
|
||||
ac: str
|
||||
fullName: str
|
||||
shortName: str
|
||||
preferredName: str
|
||||
exclude_properties: sequence
|
||||
|
||||
DNA sequence:
|
||||
is_a: nucleic acid entity
|
||||
represented_as: node
|
||||
preferred_id: ensembl
|
||||
input_label: dna
|
||||
properties:
|
||||
ac: str
|
||||
fullName: str
|
||||
shortName: str
|
||||
preferredName: str
|
||||
sequence: str
|
||||
|
||||
dsDNA sequence:
|
||||
is_a: [DNA sequence, nucleic acid entity]
|
||||
inherit_properties: True
|
||||
represented_as: node
|
||||
preferred_id: [intact, uniparc]
|
||||
input_label: [intact_dsdna, uniprot_archive_dsdna]
|
||||
|
||||
# ---
|
||||
# Associations
|
||||
# ---
|
||||
|
||||
post translational interaction:
|
||||
is_a: pairwise molecular interaction
|
||||
represented_as: node
|
||||
label_as_edge: INTERACTS_POST_TRANSLATIONAL
|
||||
input_label: post_translational
|
||||
|
||||
phosphorylation:
|
||||
is_a: post translational interaction
|
||||
represented_as: edge
|
||||
input_label: phosphorylation
|
||||
|
||||
gene to disease association:
|
||||
represented_as: edge
|
||||
label_as_edge: PERTURBED_IN_DISEASE
|
||||
input_label: [protein_disease, gene_disease]
|
||||
exclude_properties: accession
|
||||
|
||||
mutation to tissue association:
|
||||
is_a: [genotype to tissue association, entity to tissue association, association]
|
||||
represented_as: edge
|
||||
label_as_edge: Is_Mutated_In
|
||||
input_label: Gene_Is_Mutated_In_Cell_Tissue
|
||||
|
||||
variant to gene association: # -> Known.... and Somatic....
|
||||
represented_as: edge
|
||||
source: [known.sequence variant, somatic.sequence variant]
|
||||
target: gene
|
||||
input_label: [
|
||||
VARIANT_FOUND_IN_GENE_Known_variant_Gene,
|
||||
VARIANT_FOUND_IN_GENE_Somatic_mutation_Gene
|
||||
]
|
||||
|
||||
gene to gene association:
|
||||
represented_as: edge
|
||||
input_label: gene_gene
|
||||
properties:
|
||||
directional: bool
|
||||
curated: bool
|
||||
score: float
|
||||
id: str # should be removed
|
||||
|
||||
gene to variant association: # should be removed
|
||||
is_a: gene to variant association
|
||||
represented_as: edge
|
||||
input_label: gene_variant
|
3
biocypher/_config/test_schema_config_disconnected.yaml
Normal file
3
biocypher/_config/test_schema_config_disconnected.yaml
Normal file
@ -0,0 +1,3 @@
|
||||
disconnected:
|
||||
represented_as: node
|
||||
label_in_input: disconnected
|
152
biocypher/_config/test_schema_config_extended.yaml
Normal file
152
biocypher/_config/test_schema_config_extended.yaml
Normal file
@ -0,0 +1,152 @@
|
||||
Title: BioCypher graph schema configuration file
|
||||
|
||||
# ---
|
||||
# "Named Things"
|
||||
# ---
|
||||
|
||||
protein:
|
||||
represented_as: node
|
||||
preferred_id: uniprot
|
||||
input_label: protein
|
||||
db_collection_name: proteins
|
||||
properties:
|
||||
name: str
|
||||
score: float
|
||||
taxon: int
|
||||
genes: str[]
|
||||
|
||||
microRNA:
|
||||
represented_as: node
|
||||
preferred_id: mirbase.mature
|
||||
input_label: mirna
|
||||
|
||||
complex:
|
||||
synonym_for: macromolecular complex
|
||||
represented_as: node
|
||||
preferred_id: complexportal
|
||||
input_label: complex
|
||||
|
||||
pathway:
|
||||
represented_as: node
|
||||
preferred_id: [reactome, wikipathways]
|
||||
input_label: [reactome, wikipathways]
|
||||
|
||||
gene:
|
||||
represented_as: node
|
||||
preferred_id: hgnc
|
||||
input_label: [hgnc, ensg]
|
||||
exclude_properties: accession
|
||||
|
||||
disease:
|
||||
represented_as: node
|
||||
preferred_id: doid
|
||||
input_label: Disease
|
||||
|
||||
side effect:
|
||||
is_a: phenotypic feature
|
||||
represented_as: node
|
||||
preferred_id: sider.effect
|
||||
input_label: sider
|
||||
|
||||
sequence variant:
|
||||
represented_as: node
|
||||
preferred_id: [clinically relevant, known, somatic]
|
||||
input_label: [Clinically_relevant_variant, Known_variant, Somatic_mutation]
|
||||
properties:
|
||||
source: str
|
||||
original_source: str
|
||||
effect: str
|
||||
biotype: str
|
||||
|
||||
altered gene product level:
|
||||
represented_as: node
|
||||
input_label: agpl
|
||||
|
||||
decreased gene product level:
|
||||
represented_as: node
|
||||
input_label: agpl_decreased
|
||||
|
||||
lethal variant:
|
||||
represented_as: node
|
||||
input_label: lethal
|
||||
|
||||
snRNA sequence:
|
||||
is_a: nucleic acid entity
|
||||
represented_as: node
|
||||
preferred_id: [intact, rnacentral]
|
||||
input_label: [intact_snrna, rnacentral_snrna]
|
||||
properties:
|
||||
ac: str
|
||||
fullName: str
|
||||
shortName: str
|
||||
preferredName: str
|
||||
exclude_properties: sequence
|
||||
|
||||
DNA sequence:
|
||||
is_a: nucleic acid entity
|
||||
represented_as: node
|
||||
preferred_id: ensembl
|
||||
input_label: dna
|
||||
properties:
|
||||
ac: str
|
||||
fullName: str
|
||||
shortName: str
|
||||
preferredName: str
|
||||
sequence: str
|
||||
|
||||
dsDNA sequence:
|
||||
is_a: [DNA sequence, nucleic acid entity]
|
||||
inherit_properties: True
|
||||
represented_as: node
|
||||
preferred_id: [intact, uniparc]
|
||||
input_label: [intact_dsdna, uniprot_archive_dsdna]
|
||||
|
||||
# ---
|
||||
# Associations
|
||||
# ---
|
||||
|
||||
post translational interaction:
|
||||
is_a: pairwise molecular interaction
|
||||
represented_as: node
|
||||
label_as_edge: INTERACTS_POST_TRANSLATIONAL
|
||||
input_label: post_translational
|
||||
|
||||
phosphorylation:
|
||||
is_a: post translational interaction
|
||||
represented_as: edge
|
||||
use_id: false
|
||||
input_label: phosphorylation
|
||||
|
||||
gene to disease association:
|
||||
represented_as: edge
|
||||
label_as_edge: PERTURBED_IN_DISEASE
|
||||
input_label: [protein_disease, gene_disease]
|
||||
exclude_properties: accession
|
||||
|
||||
mutation to tissue association:
|
||||
is_a: [genotype to tissue association, entity to tissue association, association]
|
||||
represented_as: edge
|
||||
label_as_edge: Is_Mutated_In
|
||||
input_label: Gene_Is_Mutated_In_Cell_Tissue
|
||||
|
||||
variant to gene association: # -> Known.... and Somatic....
|
||||
represented_as: edge
|
||||
source: [known.sequence variant, somatic.sequence variant]
|
||||
target: gene
|
||||
input_label: [
|
||||
VARIANT_FOUND_IN_GENE_Known_variant_Gene,
|
||||
VARIANT_FOUND_IN_GENE_Somatic_mutation_Gene
|
||||
]
|
||||
|
||||
gene to gene association:
|
||||
represented_as: edge
|
||||
input_label: gene_gene
|
||||
properties:
|
||||
directional: bool
|
||||
curated: bool
|
||||
score: float
|
||||
|
||||
gene to variant association:
|
||||
is_a: gene to variant association
|
||||
represented_as: edge
|
||||
input_label: gene_variant
|
Reference in New Issue
Block a user