INCATools · ubyndr · Apr 28, 2023 · Apr 3, 2023 · Apr 3, 2023 · Apr 3, 2023
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -0,0 +1,25 @@
+name: Publish mkdocs documentation
+
+on:
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+#  release:
+#    types: [created]
+  push:
+    branches:
+      - master
+    paths:
+      - 'docs/**'
+
+jobs:
+  build-and-publish:
+    name: Publish mkdocs documentation
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout main
+        uses: actions/checkout@v3
+      - name: Deploy docs
+        uses: mhausenblas/mkdocs-deploy-gh-pages@master
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          CONFIG_FILE: mkdocs.yml
diff --git a/.github/workflows/qc.yml b/.github/workflows/qc.yml
@@ -0,0 +1,77 @@
+name: Build and test
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [ '3.9' ]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          pip install flake8 flake8-black flake8-isort
+      - name: Check code quality 
+        run: |
+          flake8 --ignore F401 src/ test/
+
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [ '3.9' ]
+
+    steps:
+
+      #----------------------------------------------
+      #       check-out repo and set-up python
+      #----------------------------------------------
+      - name: Check out repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      #----------------------------------------------
+      #          install & configure poetry
+      #----------------------------------------------
+      - name: Install Poetry
+        uses: snok/install-poetry@v1.3
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+
+      #----------------------------------------------
+      #       load cached venv if cache exists      
+      #----------------------------------------------
+      - name: Load cached venv
+        id: cached-poetry-dependencies
+        uses: actions/cache@v3
+        with:
+          path: .venv
+          key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
+
+      #----------------------------------------------
+      # install dependencies if cache does not exist 
+      #----------------------------------------------
+      - name: Install dependencies
+        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+        run: poetry install --no-interaction --no-root
+
+      #----------------------------------------------
+      #              run test suite   
+      #----------------------------------------------
+      - name: Run tests
+        run: poetry run pytest
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,5 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+src/pandasaurus/main.py
diff --git a/mkdocks.yml b/mkdocks.yml
@@ -0,0 +1,13 @@
+site_name: PandaSaurus
+theme: readthedocs
+repo_url: https://github.com/INCATools/PandaSaurus
+watch: ['src/']
+
+plugins:
+- search
+- autorefs
+- section-index
+- mkdocstrings
+
+nav:
+  - Home: README.md
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,29 @@
+[tool.poetry]
+name = "pandasaurus"
+version = "0.1.0"
+description = "Supporting simple queries over ontology annotations in dataframes, using UberGraph queries."
+authors = ["Ismail Ugur Bayindir <ugur@ebi.ac.uk>"]
+license = "http://www.apache.org/licenses/LICENSE-2.0"
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.9"
+oaklib = "^0.5.3"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.3.1"
+black = "^23.3.0"
+isort = "^5.12.0"
+flake8 = "^6.0.0"
+flake8-black = "^0.3.6"
+flake8-isort = "^6.0.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.black]
+line-length = 100
+
+[tool.isort]
+profile = "black"
diff --git a/src/__init__.py b/src/__init__.py
diff --git a/src/pandasaurus/__init__.py b/src/pandasaurus/__init__.py
diff --git a/src/pandasaurus/config/__init__.py b/src/pandasaurus/config/__init__.py
@@ -0,0 +1 @@
+from config import default_config
diff --git a/src/pandasaurus/config/config.ini b/src/pandasaurus/config/config.ini
@@ -0,0 +1,2 @@
+[DEFAULT]
+UBERGRAPH_ENDPOINT = "https://ubergraph.apps.renci.org/sparql"
diff --git a/src/pandasaurus/config/config.py b/src/pandasaurus/config/config.py
@@ -0,0 +1,10 @@
+import configparser
+
+
+def get_config():
+    conf = configparser.ConfigParser()
+    conf.read("config.ini")
+    return conf
+
+
+default_config = get_config()["DEFAULT"]
diff --git a/src/pandasaurus/curie_validator.py b/src/pandasaurus/curie_validator.py
@@ -0,0 +1,79 @@
+from typing import List
+
+from utils.query_utils import (
+    run_curie_list_query,
+    run_curie_prefix_query,
+    run_obsolete_term_query,
+    run_replaced_by_query,
+)
+
+
+class CurieValidator:
+    """CurieValidator is responsible for validating CURIE prefixes and CURIEs of slim terms. It also suggests
+    replacements for obsoleted slim terms.
+    """
+
+    def __init__(self):
+        # We might also use Slim object in here
+        pass
+
+    @staticmethod
+    def validate_curie_prefixes(curie_list: List[str]) -> List[List[str, bool]]:
+        """Reports whether the CURIE prefixes are valid or not
+
+        Args:
+            curie_list: List of CURIEs
+
+        Returns:
+            List[List[str, bool]]: True or False status of the prefix validation for each term
+
+        """
+        # Is there anyway to validate prefixes via Ubergraph or are we going to validate them via a config file? OAK?
+        run_curie_prefix_query(curie_list)
+        # TODO Add missing implementation
+        pass
+
+    @staticmethod
+    def validate_curie_list(curie_list: List[str]) -> List[List[str, bool]]:
+        """Reports whether the CURIEs are valid or not
+
+        Args:
+            curie_list: List of CURIEs
+
+        Returns:
+            List[List[str, bool]]: True or False status of the CURIE validation for each term
+
+        """
+        run_curie_list_query(curie_list)
+        # TODO Add missing implementation
+        pass
+
+    @staticmethod
+    def find_obsolete_terms(curie_list: List[str]) -> List[List[str, bool]]:
+        """Reports whether the terms in the curie_list are obsoleted or not
+
+        Args:
+            curie_list: List of CURIEs
+
+        Returns:
+            List[List[str, bool]]: True or False status of the term for each term
+
+        """
+        run_obsolete_term_query(curie_list)
+        # TODO Add missing implementation
+        pass
+
+    @staticmethod
+    def find_obsolete_term_replacement(curie_list: List[str, str]):
+        """Suggests terms for each obsoleted terms in the curie_list
+
+        Args:
+            curie_list: List of CURIEs
+
+        Returns:
+            List[str, str]: List of suggested term
+
+        """
+        run_replaced_by_query(curie_list)
+        # TODO Add missing implementation
+        pass
diff --git a/src/pandasaurus/query.py b/src/pandasaurus/query.py
@@ -0,0 +1,103 @@
+import logging
+from typing import List
+
+import pandas as pd
+from utils.query_utils import (
+    retrieve_contextual_slim_triples,
+    retrieve_full_slim_triples,
+    retrieve_minimal_slim_triples,
+    retrieve_simple_slim_triples,
+)
+
+
+class Query:
+    """Query class is responsible for returning the non-redundant graph for s subClassOf o as a simple Pandas dataframe
+    with given 2 seeds of classes, S(s) and S(o) from an initial seed, S(i)
+    """
+
+    def __init__(self, seed_list: List[str]):
+        self.seed_list = seed_list
+        self.enriched_df: pd.DataFrame = pd.DataFrame()
+
+    def simple_enrichment(self) -> pd.DataFrame:
+        """Returns simple enrichment; S(s) = S(i); S(o) = S(i)
+
+        Returns:
+             pd.Dataframe: Enriched df
+
+        """
+        logging.info(self.seed_list)
+        df = pd.DataFrame()
+        # TODO Add missing implementation
+        self.enriched_df = df
+        return self.enriched_df
+
+    def minimal_slim_enrichment(self, slim_list: List[str]) -> pd.DataFrame:
+        """Returns minimal enrichment; S(s) = S(i); S(o) = S(i) + all classes in some specified slims, where class
+        in slim = class tagged with some specified ‘subset’ axiom
+
+
+        Args:
+            slim_list (List[str]): Slim list
+
+        Returns:
+            pd.Dataframe: Enriched df
+
+        """
+        logging.info(self.seed_list)
+        df = pd.DataFrame()
+        # TODO Add missing implementation
+        self.enriched_df = df
+        return self.enriched_df
+
+    def full_slim_enrichment(self, slim_list: List[str]) -> pd.DataFrame:
+        """Returns full slim enrichment; S(s) = S(i); S(o) = S(i) + all classes in some specified slims, where class
+         in slim = class tagged with some specified ‘subset’ axiom, with transitive query of redundant graph such as
+         owl:subClassOf*
+
+        Args:
+             slim_list (List[str]): Slim list
+
+         Returns:
+             pd.Dataframe: Enriched df
+
+        """
+        logging.info(self.seed_list)
+        df = pd.DataFrame()
+        # TODO Add missing implementation
+        self.enriched_df = df
+        return self.enriched_df
+
+    def contextual_slim_enrichment(self, context: List[str]) -> pd.DataFrame:
+        """Returns contextual slim enrichment; S(s) = S(i); S(o) = S(i) + all classes satisfied by some set of
+        existential restrictions in the ubergraph redundant graph (e.g. part_of 'Kidney')
+
+        Args:
+            context: Organ/tissue/multicellular anatomical structure list to determine the redundant graph via
+            existential restrictions
+
+        Returns:
+            pd.Dataframe: Enriched df
+
+        """
+        logging.info(self.seed_list)
+        df = pd.DataFrame()
+        # TODO Add missing implementation
+        self.enriched_df = df
+        return self.enriched_df
+
+    def query(self, column_name: str, query_term: str) -> pd.DataFrame:
+        """Returns filtered dataframe via join on column to subject of enriched_df, looking up of object name or
+        synonym via query of name_lookup
+
+        Args:
+            column_name (str): Column name
+            query_term (str): Object label or synonym
+
+        Returns:
+            pd.Dataframe: Filtered dataframe
+
+        """
+        df = self.enriched_df
+        # TODO Add missing implementation
+        return df
diff --git a/src/pandasaurus/resources/__init__.py b/src/pandasaurus/resources/__init__.py
diff --git a/src/pandasaurus/resources/slim.py b/src/pandasaurus/resources/slim.py
@@ -0,0 +1,42 @@
+from typing import List
+
+from .term import Term
+
+
+class Slim:
+    """Represents upper level slims"""
+
+    def __init__(self, name: str, description: str, term_list: List[Term]):
+        self.name = name
+        self.description = description
+        self.term_list = term_list
+
+    def get_name(self) -> str:
+        """
+
+        Returns:
+            str: Slim name
+
+        """
+        return self.name
+
+    def get_description(self) -> str:
+        """
+
+        Returns:
+            str: Slim description
+
+        """
+        return self.description
+
+    def get_term_list(self) -> List[Term]:
+        """
+
+        Returns:
+            List[Term]: Term list
+
+        """
+        return self.term_list
+
+    def __str__(self):
+        return f"Name:{self.name}, Description: {self.description}"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -127,3 +127,5 @@ dmypy.json

		# Pyre type checker
		.pyre/

		src/pandasaurus/main.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[DEFAULT]
		UBERGRAPH_ENDPOINT = "https://ubergraph.apps.renci.org/sparql"