HelikarLab · JoshLoecker · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026
diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py
diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,11 +12,14 @@ dependencies = [
     "cobamp@git+https://github.com/JoshLoecker/cobamp@master",
     "cobra>=0.28.0",
     "fast-bioservices>=0.3.9",
+    "joypy>=0.2.6",
     "kaleido>=1.0.0",
     "loguru>=0.7.2",
+    "notebook>=7.4.7",
     "numpy>=2",
     "openpyxl>=3.1.5",
     "pandas>=1.3.5",
+    "python-louvain",
     "scanpy>=1.10.4",
     "scikit-learn>=1.5.2",
     "scipy>=1.13.0",
@@ -25,6 +28,7 @@ dependencies = [
     "statsmodels>=0.13.0; python_version < '3.12'",
     "statsmodels>=0.14.0; python_version >= '3.12'",
     "troppo@git+https://github.com/JoshLoecker/troppo@master",
+    "zfpkm>=1.0.3",
 ]
 
 [project.optional-dependencies]
@@ -36,19 +40,11 @@ interactive = [
     "jupyterlab>=4.3.2"
 ]
 dev = [
-    "commitizen>=4.8.3",
-    "commitlint>=1.3.1",
     "como",
     "hatchling>=1.27.0",
-    "pandas-stubs>=2.3.2.250827",
-    "pre-commit>=4.2.0",
-    "pyright>=1.1.405",
-    "pytest>=8.4.1",
     "pytest-asyncio>=1.1.0",
     "pytest-cov>=6.2.1",
-    "ruff>=0.12.11",
-    "scipy-stubs>=1.16.1.1",
-    "types-aiofiles>=24.1.0.20250822",
+    "pytest>=8.4.1",
 ]
 
 [tool.hatch.version]
@@ -62,3 +58,6 @@ allow-direct-references = true
 
 [tool.pytest.ini_options]
 pythonpath = ["main/src"]
+
+[tool.uv.sources]
+python-louvain = { git = "https://github.com/taynaud/python-louvain" }
diff --git a/ruff.toml b/ruff.toml
@@ -1,11 +1,12 @@
-line-length = 150
+line-length = 120
 extend-include = ["docs/**/*.py", "tests/**/*.py", "**/*.ipynb"]
 
 [format]
 quote-style = "double"
 docstring-code-format = true
 
 [lint]
+extend-fixable = ["I001"]
 # Linting rules: https://docs.astral.sh/ruff/rules/
 unfixable = [
     "F401", # warn about, but do not remove, unused imports

diff --git a/tests/fixtures/collect_files.py b/tests/fixtures/collect_files.py
@@ -5,7 +5,8 @@
 from _pytest.fixtures import SubRequest
 
 _fragment_size_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*fragment_size*.txt"))
-_gene_count_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*.tab"))
+_quant_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*.sf"))
+# _gene_count_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*.tab"))
 _insert_size_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*_insert_size.txt"))
 _layout_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*_layout.txt"))
 _preparation_method_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*_prep_method.txt"))
@@ -27,14 +28,14 @@ def fragment_size_filepath(request: SubRequest) -> Path:
     return request.param
 
 
-@pytest.fixture(params=_gene_count_filepaths)
-def gene_count_filepath(request: SubRequest) -> Path:
+@pytest.fixture(params=_quant_filepaths)
+def quant_filepaths(request: SubRequest) -> Path:
     return request.param
 
 
 @pytest.fixture
 def all_gene_count_filepaths() -> list[Path]:
-    return _gene_count_filepaths
+    return _quant_filepaths
 
 
 @pytest.fixture(params=_insert_size_filepaths)
@@ -62,7 +63,7 @@ def strand_filepath(request: SubRequest) -> Path:
         file
         for filepaths in [
             _fragment_size_filepaths,
-            _gene_count_filepaths,
+            _quant_filepaths,
             _insert_size_filepaths,
             _layout_filepaths,
             _preparation_method_filepaths,

diff --git a/tests/unit/test_rnaseq_preprocess.py b/tests/unit/test_rnaseq_preprocess.py
@@ -7,8 +7,8 @@
 from como.rnaseq_preprocess import (
     _organize_gene_counts_files,
     _process_first_multirun_sample,
+    _QuantInformation,
     _sample_name_from_filepath,
-    _STARinformation,
     _StudyMetrics,
 )
 
@@ -22,26 +22,25 @@
 )
 
 
-class TestSTARInformation:
-    valid_data = Path("main/data/COMO_input/naiveB/geneCounts/S1/naiveB_S1R1.tab").resolve()
-    invalid_data = Path("main/data/COMO_input/naiveB/fragmentSizes/S1/naiveB_S1R1_fragment_size.txt").resolve()
+class TestQuantInformation:
+    valid_data = Path("main/data/COMO_input/naiveB/quantification/S1/naiveB_S1R1_quant.genes.sf").resolve()
+    invalid_data = Path("main/data/COMO_input/naiveB/strandedness/S1/naiveB_S1R1_strandedness.txt").resolve()
 
-    @pytest.mark.asyncio
-    async def test_build_from_tab_valid_file(self) -> None:
-        """Validate building STAR information object."""
-        star: _STARinformation = await _STARinformation.build_from_tab(TestSTARInformation.valid_data)
+    def test_build_from_sf_valid_file(self) -> None:
+        quant: _QuantInformation = _QuantInformation.build_from_sf(TestQuantInformation.valid_data)
+        assert len(quant.gene_names) == len(quant.count_matrix) == 78900
+        assert quant.sample_name == "naiveB_S1R1"
+        assert quant.filepath.as_posix().endswith(
+            "/COMO/main/data/COMO_input/naiveB/quantification/S1/naiveB_S1R1_quant.genes.sf"
+        )
 
-        assert len(star.gene_names) == len(star.count_matrix) == 61541
-        assert len(star.num_unmapped) == 3
-        assert len(star.num_multimapping) == 3
-        assert len(star.num_no_feature) == 3
-        assert len(star.num_ambiguous) == 3
+    def test_build_from_sf_invalid_file(self):
+        with pytest.raises(ValueError, match=r"Building quantification information requires a '.sf' file; received: "):
+            _QuantInformation.build_from_sf(TestQuantInformation.invalid_data)
 
-    @pytest.mark.asyncio
-    async def test_build_from_tab_invalid_file(self):
-        """Validate error on invalid file."""
-        with pytest.raises(ValueError, match=r"Building STAR information requires a '\.tab' file"):
-            await _STARinformation.build_from_tab(TestSTARInformation.invalid_data)
+    def test_build_from_missing_file(self):
+        with pytest.raises(FileNotFoundError, match=r"Unable to find the .sf file: "):
+            _QuantInformation.build_from_sf(Path("missing_file.txt"))
 
 
 def test_sample_name_from_filepath(any_como_input_filepath: Path):
@@ -52,9 +51,9 @@ def test_sample_name_from_filepath(any_como_input_filepath: Path):
 def test_organize_gene_counts_files(como_input_data_directory: Path):
     metric: _StudyMetrics
     for metric in _organize_gene_counts_files(como_input_data_directory):
-        assert len(metric.sample_names) == metric.num_samples == len(metric.count_files) == len(metric.strand_files)
+        assert len(metric.sample_names) == metric.num_samples == len(metric.quant_files) == len(metric.strand_files)
 
-        for file in metric.count_files:
+        for file in metric.quant_files:
             assert f"/{metric.study_name}/" in file.as_posix()
             assert "geneCounts" in file.as_posix()
             assert file.suffix == ".tab"
@@ -65,9 +64,8 @@ def test_organize_gene_counts_files(como_input_data_directory: Path):
             assert file.suffix == ".txt"
 
 
-@pytest.mark.asyncio
-async def test_process_first_multirun_sample(strand_filepath: Path, all_gene_count_filepaths: list[Path]):
-    result: pd.DataFrame = await _process_first_multirun_sample(strand_filepath, all_gene_count_filepaths)
+def test_process_first_multirun_sample(strand_filepath: Path, all_gene_count_filepaths: list[Path]):
+    result: pd.DataFrame = _process_first_multirun_sample(strand_filepath, all_gene_count_filepaths)
     assert result.columns[0] == "ensembl_gene_id"
     assert len(result.columns) == 2
     assert result.columns.tolist()[1] in strand_filepath.as_posix()