Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Publish mkdocs documentation

on:
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# release:
# types: [created]
push:
branches:
- master
paths:
- 'docs/**'

jobs:
build-and-publish:
name: Publish mkdocs documentation
runs-on: ubuntu-latest
steps:
- name: Checkout main
uses: actions/checkout@v3
- name: Deploy docs
uses: mhausenblas/mkdocs-deploy-gh-pages@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CONFIG_FILE: mkdocs.yml
77 changes: 77 additions & 0 deletions .github/workflows/qc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: Build and test

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
lint:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.9' ]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install flake8 flake8-black flake8-isort
- name: Check code quality
run: |
flake8 --ignore F401 src/ test/

test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.9' ]

steps:

#----------------------------------------------
# check-out repo and set-up python
#----------------------------------------------
- name: Check out repository
uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

#----------------------------------------------
# install & configure poetry
#----------------------------------------------
- name: Install Poetry
uses: snok/install-poetry@v1.3
with:
virtualenvs-create: true
virtualenvs-in-project: true

#----------------------------------------------
# load cached venv if cache exists
#----------------------------------------------
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}

#----------------------------------------------
# install dependencies if cache does not exist
#----------------------------------------------
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root

#----------------------------------------------
# run test suite
#----------------------------------------------
- name: Run tests
run: poetry run pytest
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,5 @@ dmypy.json

# Pyre type checker
.pyre/

src/pandasaurus/main.py
13 changes: 13 additions & 0 deletions mkdocks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
site_name: PandaSaurus
theme: readthedocs
repo_url: https://github.com/INCATools/PandaSaurus
watch: ['src/']

plugins:
- search
- autorefs
- section-index
- mkdocstrings

nav:
- Home: README.md
4,440 changes: 4,440 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[tool.poetry]
name = "pandasaurus"
version = "0.1.0"
description = "Supporting simple queries over ontology annotations in dataframes, using UberGraph queries."
authors = ["Ismail Ugur Bayindir <ugur@ebi.ac.uk>"]
license = "http://www.apache.org/licenses/LICENSE-2.0"
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.9"
oaklib = "^0.5.3"

[tool.poetry.group.dev.dependencies]
pytest = "^7.3.1"
black = "^23.3.0"
isort = "^5.12.0"
flake8 = "^6.0.0"
flake8-black = "^0.3.6"
flake8-isort = "^6.0.0"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.black]
line-length = 100

[tool.isort]
profile = "black"
Empty file added src/__init__.py
Empty file.
Empty file added src/pandasaurus/__init__.py
Empty file.
1 change: 1 addition & 0 deletions src/pandasaurus/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from config import default_config
2 changes: 2 additions & 0 deletions src/pandasaurus/config/config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[DEFAULT]
UBERGRAPH_ENDPOINT = "https://ubergraph.apps.renci.org/sparql"
10 changes: 10 additions & 0 deletions src/pandasaurus/config/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import configparser


def get_config():
conf = configparser.ConfigParser()
conf.read("config.ini")
return conf


default_config = get_config()["DEFAULT"]
79 changes: 79 additions & 0 deletions src/pandasaurus/curie_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from typing import List

from utils.query_utils import (
run_curie_list_query,
run_curie_prefix_query,
run_obsolete_term_query,
run_replaced_by_query,
)


class CurieValidator:
"""CurieValidator is responsible for validating CURIE prefixes and CURIEs of slim terms. It also suggests
replacements for obsoleted slim terms.
"""

def __init__(self):
# We might also use Slim object in here
pass

@staticmethod
def validate_curie_prefixes(curie_list: List[str]) -> List[List[str, bool]]:
"""Reports whether the CURIE prefixes are valid or not

Args:
curie_list: List of CURIEs

Returns:
List[List[str, bool]]: True or False status of the prefix validation for each term

"""
# Is there anyway to validate prefixes via Ubergraph or are we going to validate them via a config file? OAK?
run_curie_prefix_query(curie_list)
# TODO Add missing implementation
pass

@staticmethod
def validate_curie_list(curie_list: List[str]) -> List[List[str, bool]]:
"""Reports whether the CURIEs are valid or not

Args:
curie_list: List of CURIEs

Returns:
List[List[str, bool]]: True or False status of the CURIE validation for each term

"""
run_curie_list_query(curie_list)
# TODO Add missing implementation
pass

@staticmethod
def find_obsolete_terms(curie_list: List[str]) -> List[List[str, bool]]:
"""Reports whether the terms in the curie_list are obsoleted or not

Args:
curie_list: List of CURIEs

Returns:
List[List[str, bool]]: True or False status of the term for each term

"""
run_obsolete_term_query(curie_list)
# TODO Add missing implementation
pass

@staticmethod
def find_obsolete_term_replacement(curie_list: List[str, str]):
"""Suggests terms for each obsoleted terms in the curie_list

Args:
curie_list: List of CURIEs

Returns:
List[str, str]: List of suggested term

"""
run_replaced_by_query(curie_list)
# TODO Add missing implementation
pass
103 changes: 103 additions & 0 deletions src/pandasaurus/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import logging
from typing import List

import pandas as pd
from utils.query_utils import (
retrieve_contextual_slim_triples,
retrieve_full_slim_triples,
retrieve_minimal_slim_triples,
retrieve_simple_slim_triples,
)


class Query:
"""Query class is responsible for returning the non-redundant graph for s subClassOf o as a simple Pandas dataframe
with given 2 seeds of classes, S(s) and S(o) from an initial seed, S(i)
"""

def __init__(self, seed_list: List[str]):
self.seed_list = seed_list
self.enriched_df: pd.DataFrame = pd.DataFrame()

def simple_enrichment(self) -> pd.DataFrame:
"""Returns simple enrichment; S(s) = S(i); S(o) = S(i)

Returns:
pd.Dataframe: Enriched df

"""
logging.info(self.seed_list)
df = pd.DataFrame()
# TODO Add missing implementation
self.enriched_df = df
return self.enriched_df

def minimal_slim_enrichment(self, slim_list: List[str]) -> pd.DataFrame:
"""Returns minimal enrichment; S(s) = S(i); S(o) = S(i) + all classes in some specified slims, where class
in slim = class tagged with some specified ‘subset’ axiom


Args:
slim_list (List[str]): Slim list

Returns:
pd.Dataframe: Enriched df

"""
logging.info(self.seed_list)
df = pd.DataFrame()
# TODO Add missing implementation
self.enriched_df = df
return self.enriched_df

def full_slim_enrichment(self, slim_list: List[str]) -> pd.DataFrame:
"""Returns full slim enrichment; S(s) = S(i); S(o) = S(i) + all classes in some specified slims, where class
in slim = class tagged with some specified ‘subset’ axiom, with transitive query of redundant graph such as
owl:subClassOf*

Args:
slim_list (List[str]): Slim list

Returns:
pd.Dataframe: Enriched df

"""
logging.info(self.seed_list)
df = pd.DataFrame()
# TODO Add missing implementation
self.enriched_df = df
return self.enriched_df

def contextual_slim_enrichment(self, context: List[str]) -> pd.DataFrame:
"""Returns contextual slim enrichment; S(s) = S(i); S(o) = S(i) + all classes satisfied by some set of
existential restrictions in the ubergraph redundant graph (e.g. part_of 'Kidney')

Args:
context: Organ/tissue/multicellular anatomical structure list to determine the redundant graph via
existential restrictions

Returns:
pd.Dataframe: Enriched df

"""
logging.info(self.seed_list)
df = pd.DataFrame()
# TODO Add missing implementation
self.enriched_df = df
return self.enriched_df

def query(self, column_name: str, query_term: str) -> pd.DataFrame:
"""Returns filtered dataframe via join on column to subject of enriched_df, looking up of object name or
synonym via query of name_lookup

Args:
column_name (str): Column name
query_term (str): Object label or synonym

Returns:
pd.Dataframe: Filtered dataframe

"""
df = self.enriched_df
# TODO Add missing implementation
return df
Empty file.
42 changes: 42 additions & 0 deletions src/pandasaurus/resources/slim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import List

from .term import Term


class Slim:
"""Represents upper level slims"""

def __init__(self, name: str, description: str, term_list: List[Term]):
self.name = name
self.description = description
self.term_list = term_list

def get_name(self) -> str:
"""

Returns:
str: Slim name

"""
return self.name

def get_description(self) -> str:
"""

Returns:
str: Slim description

"""
return self.description

def get_term_list(self) -> List[Term]:
"""

Returns:
List[Term]: Term list

"""
return self.term_list

def __str__(self):
return f"Name:{self.name}, Description: {self.description}"
Loading