diff --git a/docs/tutorials/virtual_db_tutorial.ipynb b/docs/tutorials/virtual_db_tutorial.ipynb index bb07e75..c1aecab 100644 --- a/docs/tutorials/virtual_db_tutorial.ipynb +++ b/docs/tutorials/virtual_db_tutorial.ipynb @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "cell-2", "metadata": {}, "outputs": [ @@ -33,7 +33,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Config saved to: /tmp/tmp_krovt13/vdb_config.yaml\n" + "Config saved to: /tmp/tmp6rrutwjn/vdb_config.yaml\n" ] } ], @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "cell-4", "metadata": {}, "outputs": [ @@ -186,6 +186,41 @@ "print(repr(vdb))" ] }, + { + "cell_type": "markdown", + "id": "ea9cc76c", + "metadata": {}, + "source": [ + "## Listing datasets\n", + "\n", + "To list the datasets available in the VirtualDB instance, use `get_datasets()`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "896d3ee1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Datasets:\n", + "- dto\n", + "- hackett\n", + "- harbison\n", + "- kemmeren\n" + ] + } + ], + "source": [ + "print(\"\\nDatasets:\")\n", + "for dataset in vdb.get_datasets():\n", + " print(f\"- {dataset}\")" + ] + }, { "cell_type": "markdown", "id": "0f10c138", @@ -204,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "f7d73db0", "metadata": {}, "outputs": [ @@ -254,7 +289,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "cell-6", "metadata": {}, "outputs": [ @@ -269,11 +304,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 10407.70it/s]\n", - "Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 1770.50it/s]\n", - "Fetching 1 files: 100%|██████████| 1/1 [00:20<00:00, 20.31s/it]\n", + "Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 11305.40it/s]\n", + "Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 6442.86it/s]\n", + "Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 9868.95it/s]\n", "No metadata fields found for data config 'dto' in repo 'BrentLab/yeast_comparative_analysis' -- no embedded metadata_fields and no metadata config with applies_to\n", - "Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 55091.56it/s]\n", + "Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 7124.69it/s]\n", "Key 'carbon_source' not found at path 'media.carbon_source' (current keys: ['name'])\n", "Key 'carbon_source' not found at path 'media.carbon_source' (current keys: ['name'])\n", "Key 'carbon_source' not found at path 'media.carbon_source' (current keys: ['name'])\n", @@ -303,7 +338,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "id": "pdebujnqb9q", "metadata": {}, "outputs": [ @@ -352,7 +387,7 @@ "type": "unknown" } ], - "ref": "955566a4-2a55-483f-a0d4-11f1757f6a28", + "ref": "3dce54e1-e78b-4f8b-9241-eeceae15b6f4", "rows": [ [ "0", @@ -523,7 +558,7 @@ "5 harbison_meta temperature_celsius DOUBLE YES None None None" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "9deee334", "metadata": {}, "outputs": [ @@ -585,7 +620,7 @@ "type": "unknown" } ], - "ref": "012ff714-cded-469d-9c53-642872a5d487", + "ref": "c37ac38f-1f94-4cb1-8e3a-d87122c08b1a", "rows": [ [ "0", @@ -861,7 +896,7 @@ "10 harbison temperature_celsius DOUBLE YES None None None" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -873,7 +908,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "cell-9", "metadata": {}, "outputs": [ @@ -907,7 +942,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "1a705f1c", "metadata": {}, "outputs": [ @@ -951,50 +986,50 @@ "type": "float" } ], - "ref": "b5c797e5-c834-4f9d-a8fc-89789ef0cc68", + "ref": "f6d762b2-08cc-4514-93fb-89fac1ce6c8b", "rows": [ [ "0", - "300", - "YPD", - "YOL116W", - "MSN1", + "118", + "H2O2Hi", + "YGL073W", + "HSF1", "glucose", "30.0" ], [ "1", - "113", + "216", "YPD", - "YGL035C", - "MIG1", + "YKR064W", + "OAF3", "glucose", "30.0" ], [ "2", - "81", - "RAPA", - "YEL009C", - "GCN4", - "glucose", + "314", + "SM", + "YOR358W", + "HAP5", + "unspecified", "30.0" ], [ "3", - "279", + "330", "YPD", - "YNL139C", - "THO2", + "YPL177C", + "CUP9", "glucose", "30.0" ], [ "4", - "73", - "H2O2Hi", - "YDR423C", - "CAD1", + "9", + "RAPA", + "YBL103C", + "RTG3", "glucose", "30.0" ] @@ -1034,46 +1069,46 @@ " \n", " \n", " 0\n", - " 300\n", - " YPD\n", - " YOL116W\n", - " MSN1\n", + " 118\n", + " H2O2Hi\n", + " YGL073W\n", + " HSF1\n", " glucose\n", " 30.0\n", " \n", " \n", " 1\n", - " 113\n", + " 216\n", " YPD\n", - " YGL035C\n", - " MIG1\n", + " YKR064W\n", + " OAF3\n", " glucose\n", " 30.0\n", " \n", " \n", " 2\n", - " 81\n", - " RAPA\n", - " YEL009C\n", - " GCN4\n", - " glucose\n", + " 314\n", + " SM\n", + " YOR358W\n", + " HAP5\n", + " unspecified\n", " 30.0\n", " \n", " \n", " 3\n", - " 279\n", + " 330\n", " YPD\n", - " YNL139C\n", - " THO2\n", + " YPL177C\n", + " CUP9\n", " glucose\n", " 30.0\n", " \n", " \n", " 4\n", - " 73\n", - " H2O2Hi\n", - " YDR423C\n", - " CAD1\n", + " 9\n", + " RAPA\n", + " YBL103C\n", + " RTG3\n", " glucose\n", " 30.0\n", " \n", @@ -1083,11 +1118,11 @@ ], "text/plain": [ " sample_id condition regulator_locus_tag regulator_symbol carbon_source \\\n", - "0 300 YPD YOL116W MSN1 glucose \n", - "1 113 YPD YGL035C MIG1 glucose \n", - "2 81 RAPA YEL009C GCN4 glucose \n", - "3 279 YPD YNL139C THO2 glucose \n", - "4 73 H2O2Hi YDR423C CAD1 glucose \n", + "0 118 H2O2Hi YGL073W HSF1 glucose \n", + "1 216 YPD YKR064W OAF3 glucose \n", + "2 314 SM YOR358W HAP5 unspecified \n", + "3 330 YPD YPL177C CUP9 glucose \n", + "4 9 RAPA YBL103C RTG3 glucose \n", "\n", " temperature_celsius \n", "0 30.0 \n", @@ -1097,7 +1132,7 @@ "4 30.0 " ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -1123,7 +1158,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "cell-17", "metadata": {}, "outputs": [ @@ -1192,75 +1227,75 @@ "type": "float" } ], - "ref": "06d7e391-9665-4a5f-9276-359ff8e71c3e", + "ref": "e270aed6-9c2d-445a-b7d5-9fe1f96455ff", "rows": [ [ "0", - "15", - "14.0", + "13", + "12.0", "YBR049C", "REB1", - "YPD", + "H2O2Hi", "YPR204W", "YPR204W", - "0.85288861", - "0.76943045", + "0.90161323", + "0.6769426", "glucose", "30.0" ], [ "1", - "15", - "14.0", + "13", + "12.0", "YBR049C", "REB1", - "YPD", + "H2O2Hi", "YPR203W", "YPR203W", - "1.2490028", - "0.11237602", + "1.0534522", + "0.38440432", "glucose", "30.0" ], [ "2", - "15", - "14.0", + "13", + "12.0", "YBR049C", "REB1", - "YPD", + "H2O2Hi", "YPR202W", "YPR202W", - "1.2490028", - "0.11237602", + "1.0534522", + "0.38440432", "glucose", "30.0" ], [ "3", - "15", - "14.0", + "13", + "12.0", "YBR049C", "REB1", - "YPD", + "H2O2Hi", "YPR201W", "ARR3", - "1.5137073", - "0.1681333", + "0.84429803", + "0.66537467", "glucose", "30.0" ], [ "4", - "15", - "14.0", + "13", + "12.0", "YBR049C", "REB1", - "YPD", + "H2O2Hi", "YPR200C", "ARR2", - "1.5137073", - "0.1681333", + "0.84429803", + "0.66537467", "glucose", "30.0" ] @@ -1305,71 +1340,71 @@ " \n", " \n", " 0\n", - " 15\n", - " 14.0\n", + " 13\n", + " 12.0\n", " YBR049C\n", " REB1\n", - " YPD\n", + " H2O2Hi\n", " YPR204W\n", " YPR204W\n", - " 0.852889\n", - " 0.769430\n", + " 0.901613\n", + " 0.676943\n", " glucose\n", " 30.0\n", " \n", " \n", " 1\n", - " 15\n", - " 14.0\n", + " 13\n", + " 12.0\n", " YBR049C\n", " REB1\n", - " YPD\n", + " H2O2Hi\n", " YPR203W\n", " YPR203W\n", - " 1.249003\n", - " 0.112376\n", + " 1.053452\n", + " 0.384404\n", " glucose\n", " 30.0\n", " \n", " \n", " 2\n", - " 15\n", - " 14.0\n", + " 13\n", + " 12.0\n", " YBR049C\n", " REB1\n", - " YPD\n", + " H2O2Hi\n", " YPR202W\n", " YPR202W\n", - " 1.249003\n", - " 0.112376\n", + " 1.053452\n", + " 0.384404\n", " glucose\n", " 30.0\n", " \n", " \n", " 3\n", - " 15\n", - " 14.0\n", + " 13\n", + " 12.0\n", " YBR049C\n", " REB1\n", - " YPD\n", + " H2O2Hi\n", " YPR201W\n", " ARR3\n", - " 1.513707\n", - " 0.168133\n", + " 0.844298\n", + " 0.665375\n", " glucose\n", " 30.0\n", " \n", " \n", " 4\n", - " 15\n", - " 14.0\n", + " 13\n", + " 12.0\n", " YBR049C\n", " REB1\n", - " YPD\n", + " H2O2Hi\n", " YPR200C\n", " ARR2\n", - " 1.513707\n", - " 0.168133\n", + " 0.844298\n", + " 0.665375\n", " glucose\n", " 30.0\n", " \n", @@ -1379,18 +1414,18 @@ ], "text/plain": [ " sample_id db_id regulator_locus_tag regulator_symbol condition \\\n", - "0 15 14.0 YBR049C REB1 YPD \n", - "1 15 14.0 YBR049C REB1 YPD \n", - "2 15 14.0 YBR049C REB1 YPD \n", - "3 15 14.0 YBR049C REB1 YPD \n", - "4 15 14.0 YBR049C REB1 YPD \n", + "0 13 12.0 YBR049C REB1 H2O2Hi \n", + "1 13 12.0 YBR049C REB1 H2O2Hi \n", + "2 13 12.0 YBR049C REB1 H2O2Hi \n", + "3 13 12.0 YBR049C REB1 H2O2Hi \n", + "4 13 12.0 YBR049C REB1 H2O2Hi \n", "\n", " target_locus_tag target_symbol effect pvalue carbon_source \\\n", - "0 YPR204W YPR204W 0.852889 0.769430 glucose \n", - "1 YPR203W YPR203W 1.249003 0.112376 glucose \n", - "2 YPR202W YPR202W 1.249003 0.112376 glucose \n", - "3 YPR201W ARR3 1.513707 0.168133 glucose \n", - "4 YPR200C ARR2 1.513707 0.168133 glucose \n", + "0 YPR204W YPR204W 0.901613 0.676943 glucose \n", + "1 YPR203W YPR203W 1.053452 0.384404 glucose \n", + "2 YPR202W YPR202W 1.053452 0.384404 glucose \n", + "3 YPR201W ARR3 0.844298 0.665375 glucose \n", + "4 YPR200C ARR2 0.844298 0.665375 glucose \n", "\n", " temperature_celsius \n", "0 30.0 \n", @@ -1400,7 +1435,7 @@ "4 30.0 " ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -1429,7 +1464,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "cell-19", "metadata": {}, "outputs": [ @@ -1453,7 +1488,7 @@ "type": "integer" } ], - "ref": "6d8b4d37-3b6b-40f1-833d-aa6711694bcb", + "ref": "1f789068-26a6-466d-b977-ce3a58e6b547", "rows": [ [ "0", @@ -1467,17 +1502,17 @@ ], [ "2", - "STE12", + "HSF1", "4" ], [ "3", - "RTG3", + "STE12", "4" ], [ "4", - "DIG1", + "SKN7", "4" ], [ @@ -1487,102 +1522,102 @@ ], [ "6", - "HSF1", + "DIG1", "4" ], [ "7", - "SKN7", + "RTG3", "4" ], [ "8", - "RPN4", + "PHO2", "3" ], [ "9", - "GAT1", + "ROX1", "3" ], [ "10", - "AFT2", + "GZF3", "3" ], [ "11", - "YAP7", + "SFP1", "3" ], [ "12", - "TEC1", + "KSS1", "3" ], [ "13", - "MOT3", + "CIN5", "3" ], [ "14", - "ROX1", + "NRG1", "3" ], [ "15", - "GZF3", + "MBP1", "3" ], [ "16", - "PHO2", + "GAT1", "3" ], [ "17", - "MAL33", + "AFT2", "3" ], [ "18", - "CIN5", + "MOT3", "3" ], [ "19", - "SFP1", + "PHD1", "3" ], [ "20", - "KSS1", + "TEC1", "3" ], [ "21", - "YAP6", + "YAP7", "3" ], [ "22", - "RPH1", + "RIM101", "3" ], [ "23", - "NRG1", + "AFT1", "3" ], [ "24", - "PHD1", + "YJL206C", "3" ], [ "25", - "FHL1", + "RPN4", "3" ], [ @@ -1592,117 +1627,117 @@ ], [ "27", - "FKH2", + "FHL1", "3" ], [ "28", - "MBP1", + "FKH2", "3" ], [ "29", - "RIM101", + "MAL33", "3" ], [ "30", - "YJL206C", + "RPH1", "3" ], [ "31", - "AFT1", + "YAP6", "3" ], [ "32", - "RLM1", + "SOK2", "2" ], [ "33", - "XBP1", + "HAP2", "2" ], [ "34", - "IME4", + "CAD1", "2" ], [ "35", - "MCM1", + "MAC1", "2" ], [ "36", - "DAL80", + "UME6", "2" ], [ "37", - "YAP3", + "YAP5", "2" ], [ "38", - "YAP5", + "MOT2", "2" ], [ "39", - "MAC1", + "UME1", "2" ], [ "40", - "UME6", + "DAL81", "2" ], [ "41", - "PDR1", + "GLN3", "2" ], [ "42", - "UME1", + "ARR1", "2" ], [ "43", - "CAD1", + "IME4", "2" ], [ "44", - "MGA1", + "ASH1", "2" ], [ "45", - "HAP4", + "RLM1", "2" ], [ "46", - "MIG2", + "MSS11", "2" ], [ "47", - "GCN4", + "MCM1", "2" ], [ "48", - "RTG1", + "MGA1", "2" ], [ "49", - "PUT3", + "RDS1", "2" ] ], @@ -1747,17 +1782,17 @@ " \n", " \n", " 2\n", - " STE12\n", + " HSF1\n", " 4\n", " \n", " \n", " 3\n", - " RTG3\n", + " STE12\n", " 4\n", " \n", " \n", " 4\n", - " DIG1\n", + " SKN7\n", " 4\n", " \n", " \n", @@ -1767,27 +1802,27 @@ " \n", " \n", " 58\n", - " IME1\n", + " PUT3\n", " 2\n", " \n", " \n", " 59\n", - " RDS1\n", + " RTG1\n", " 2\n", " \n", " \n", " 60\n", - " MSS11\n", + " ADR1\n", " 2\n", " \n", " \n", " 61\n", - " HAP2\n", + " UGA3\n", " 2\n", " \n", " \n", " 62\n", - " ARR1\n", + " PDR1\n", " 2\n", " \n", " \n", @@ -1799,20 +1834,20 @@ " regulator_symbol n\n", "0 MSN2 6\n", "1 MSN4 5\n", - "2 STE12 4\n", - "3 RTG3 4\n", - "4 DIG1 4\n", + "2 HSF1 4\n", + "3 STE12 4\n", + "4 SKN7 4\n", ".. ... ..\n", - "58 IME1 2\n", - "59 RDS1 2\n", - "60 MSS11 2\n", - "61 HAP2 2\n", - "62 ARR1 2\n", + "58 PUT3 2\n", + "59 RTG1 2\n", + "60 ADR1 2\n", + "61 UGA3 2\n", + "62 PDR1 2\n", "\n", "[63 rows x 2 columns]" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1853,7 +1888,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "cell-21", "metadata": {}, "outputs": [ @@ -1942,7 +1977,7 @@ "type": "string" } ], - "ref": "1ce4dce9-5191-4116-b848-394fcdb3b5fc", + "ref": "426c5717-57fa-4c0d-aa1f-b1947914421c", "rows": [ [ "0", @@ -2131,7 +2166,7 @@ "2 harbison 11 BrentLab/hughes_2006;overexpression " ] }, - "execution_count": 11, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -2143,7 +2178,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "id": "cell-22", "metadata": {}, "outputs": [ @@ -2197,117 +2232,117 @@ "type": "float" } ], - "ref": "8e604dcf-efad-42a8-a049-7bf684faa9b6", + "ref": "43a63e85-fc7c-4630-873c-6a44f8af7442", "rows": [ [ "0", - "18", - "YPD", - "YBR083W", - "TEC1", - "glucose", + "314", + "SM", + "YOR358W", + "HAP5", + "unspecified", "30.0", "0.0", - "0.08188235294117648" + "0.047097156398104266" ], [ "1", - "157", - "H2O2Hi", - "YHR206W", - "SKN7", + "240", + "YPD", + "YML007W", + "YAP1", "glucose", "30.0", "0.0", - "0.13931986462735127" + "0.14091317634369943" ], [ "2", - "93", + "330", "YPD", - "YER111C", - "SWI4", + "YPL177C", + "CUP9", "glucose", "30.0", "0.0", - "0.17005078106191404" + "0.00039874225300765584" ], [ "3", - "72", - "YPD", - "YDR421W", - "ARO80", + "114", + "H2O2Hi", + "YGL071W", + "AFT1", "glucose", "30.0", "0.0", - "0.00011392635800218739" + "0.09653511969862681" ], [ "4", - "71", - "SM", - "YDR421W", - "ARO80", - "unspecified", + "118", + "H2O2Hi", + "YGL073W", + "HSF1", + "glucose", "30.0", "0.0", - "0.00011392635800218739" + "0.03150882247029168" ], [ "5", - "346", - "RAPA", - "YPR104C", - "FHL1", + "31", + "H2O2Hi", + "YDL020C", + "RPN4", "glucose", "30.0", "0.0", - "0.019746237283784218" + "0.12466961356179365" ], [ "6", - "226", + "303", "YPD", - "YLR182W", - "SWI6", + "YOR028C", + "CIN5", "glucose", "30.0", "0.0", - "0.07368989186287292" + "0.03621718920889537" ], [ "7", - "286", - "YPD", - "YNL309W", - "STB1", + "36", + "H2O2Lo", + "YDL056W", + "MBP1", "glucose", "30.0", "0.0", - "0.1821470588235294" + "0.04300429120153643" ], [ "8", - "172", - "SM", - "YIR023W", - "DAL81", - "unspecified", + "15", + "YPD", + "YBR049C", + "REB1", + "glucose", "30.0", "0.0", - "0.21656240134694307" + "0.07954075079166496" ], [ "9", - "320", - "YPD", - "YPL038W", - "MET31", + "162", + "H2O2Lo", + "YIL101C", + "XBP1", "glucose", "30.0", "0.0", - "0.0661219662690251" + "0.22690440962955793" ] ], "shape": { @@ -2347,113 +2382,113 @@ " \n", " \n", " 0\n", - " 18\n", - " YPD\n", - " YBR083W\n", - " TEC1\n", - " glucose\n", + " 314\n", + " SM\n", + " YOR358W\n", + " HAP5\n", + " unspecified\n", " 30.0\n", " 0.0\n", - " 0.081882\n", + " 0.047097\n", " \n", " \n", " 1\n", - " 157\n", - " H2O2Hi\n", - " YHR206W\n", - " SKN7\n", + " 240\n", + " YPD\n", + " YML007W\n", + " YAP1\n", " glucose\n", " 30.0\n", " 0.0\n", - " 0.139320\n", + " 0.140913\n", " \n", " \n", " 2\n", - " 93\n", + " 330\n", " YPD\n", - " YER111C\n", - " SWI4\n", + " YPL177C\n", + " CUP9\n", " glucose\n", " 30.0\n", " 0.0\n", - " 0.170051\n", + " 0.000399\n", " \n", " \n", " 3\n", - " 72\n", - " YPD\n", - " YDR421W\n", - " ARO80\n", + " 114\n", + " H2O2Hi\n", + " YGL071W\n", + " AFT1\n", " glucose\n", " 30.0\n", " 0.0\n", - " 0.000114\n", + " 0.096535\n", " \n", " \n", " 4\n", - " 71\n", - " SM\n", - " YDR421W\n", - " ARO80\n", - " unspecified\n", + " 118\n", + " H2O2Hi\n", + " YGL073W\n", + " HSF1\n", + " glucose\n", " 30.0\n", " 0.0\n", - " 0.000114\n", + " 0.031509\n", " \n", " \n", " 5\n", - " 346\n", - " RAPA\n", - " YPR104C\n", - " FHL1\n", + " 31\n", + " H2O2Hi\n", + " YDL020C\n", + " RPN4\n", " glucose\n", " 30.0\n", " 0.0\n", - " 0.019746\n", + " 0.124670\n", " \n", " \n", " 6\n", - " 226\n", + " 303\n", " YPD\n", - " YLR182W\n", - " SWI6\n", + " YOR028C\n", + " CIN5\n", " glucose\n", " 30.0\n", " 0.0\n", - " 0.073690\n", + " 0.036217\n", " \n", " \n", " 7\n", - " 286\n", - " YPD\n", - " YNL309W\n", - " STB1\n", + " 36\n", + " H2O2Lo\n", + " YDL056W\n", + " MBP1\n", " glucose\n", " 30.0\n", " 0.0\n", - " 0.182147\n", + " 0.043004\n", " \n", " \n", " 8\n", - " 172\n", - " SM\n", - " YIR023W\n", - " DAL81\n", - " unspecified\n", + " 15\n", + " YPD\n", + " YBR049C\n", + " REB1\n", + " glucose\n", " 30.0\n", " 0.0\n", - " 0.216562\n", + " 0.079541\n", " \n", " \n", " 9\n", - " 320\n", - " YPD\n", - " YPL038W\n", - " MET31\n", + " 162\n", + " H2O2Lo\n", + " YIL101C\n", + " XBP1\n", " glucose\n", " 30.0\n", " 0.0\n", - " 0.066122\n", + " 0.226904\n", " \n", " \n", "\n", @@ -2461,31 +2496,31 @@ ], "text/plain": [ " sample_id condition regulator_locus_tag regulator_symbol carbon_source \\\n", - "0 18 YPD YBR083W TEC1 glucose \n", - "1 157 H2O2Hi YHR206W SKN7 glucose \n", - "2 93 YPD YER111C SWI4 glucose \n", - "3 72 YPD YDR421W ARO80 glucose \n", - "4 71 SM YDR421W ARO80 unspecified \n", - "5 346 RAPA YPR104C FHL1 glucose \n", - "6 226 YPD YLR182W SWI6 glucose \n", - "7 286 YPD YNL309W STB1 glucose \n", - "8 172 SM YIR023W DAL81 unspecified \n", - "9 320 YPD YPL038W MET31 glucose \n", + "0 314 SM YOR358W HAP5 unspecified \n", + "1 240 YPD YML007W YAP1 glucose \n", + "2 330 YPD YPL177C CUP9 glucose \n", + "3 114 H2O2Hi YGL071W AFT1 glucose \n", + "4 118 H2O2Hi YGL073W HSF1 glucose \n", + "5 31 H2O2Hi YDL020C RPN4 glucose \n", + "6 303 YPD YOR028C CIN5 glucose \n", + "7 36 H2O2Lo YDL056W MBP1 glucose \n", + "8 15 YPD YBR049C REB1 glucose \n", + "9 162 H2O2Lo YIL101C XBP1 glucose \n", "\n", " temperature_celsius dto_empirical_pvalue dto_fdr \n", - "0 30.0 0.0 0.081882 \n", - "1 30.0 0.0 0.139320 \n", - "2 30.0 0.0 0.170051 \n", - "3 30.0 0.0 0.000114 \n", - "4 30.0 0.0 0.000114 \n", - "5 30.0 0.0 0.019746 \n", - "6 30.0 0.0 0.073690 \n", - "7 30.0 0.0 0.182147 \n", - "8 30.0 0.0 0.216562 \n", - "9 30.0 0.0 0.066122 " + "0 30.0 0.0 0.047097 \n", + "1 30.0 0.0 0.140913 \n", + "2 30.0 0.0 0.000399 \n", + "3 30.0 0.0 0.096535 \n", + "4 30.0 0.0 0.031509 \n", + "5 30.0 0.0 0.124670 \n", + "6 30.0 0.0 0.036217 \n", + "7 30.0 0.0 0.043004 \n", + "8 30.0 0.0 0.079541 \n", + "9 30.0 0.0 0.226904 " ] }, - "execution_count": 12, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -2506,7 +2541,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "id": "cell-23", "metadata": {}, "outputs": [ @@ -2540,77 +2575,77 @@ "type": "string" } ], - "ref": "18363370-be4d-4693-8836-96409a2ac869", + "ref": "75bcfd39-bdd3-40ed-8c32-57a86f2e5145", "rows": [ [ "0", - "15", - "REB1", + "289", + "DAL82", "0.0", - "100_242" + "1213" ], [ "1", - "303", - "CIN5", + "224", + "ACE2", "0.0", - "1280" + "901" ], [ "2", - "330", - "CUP9", + "283", + "RAP1", "0.0", - "256" + "96_238" ], [ "3", - "114", - "AFT1", + "8", + "RTG3", "0.0", - "87" + "57" ], [ "4", - "9", - "RTG3", + "75", + "CAD1", "0.0", - "57" + "360" ], [ "5", - "118", - "HSF1", + "246", + "ARG81", "0.0", - "88" + "1023" ], [ "6", - "15", - "REB1", + "209", + "HAP4", "0.0", - "100_242" + "802" ], [ "7", - "162", - "XBP1", + "83", + "GCN4", "0.0", - "24" + "357" ], [ "8", - "240", - "YAP1", + "55", + "SWI5", "0.0", - "182" + "253" ], [ "9", - "150", - "STP2", + "189", + "HIR3", "0.0", - "604" + "772" ] ], "shape": { @@ -2646,73 +2681,73 @@ " \n", " \n", " 0\n", - " 15\n", - " REB1\n", + " 289\n", + " DAL82\n", " 0.0\n", - " 100_242\n", + " 1213\n", " \n", " \n", " 1\n", - " 303\n", - " CIN5\n", + " 224\n", + " ACE2\n", " 0.0\n", - " 1280\n", + " 901\n", " \n", " \n", " 2\n", - " 330\n", - " CUP9\n", + " 283\n", + " RAP1\n", " 0.0\n", - " 256\n", + " 96_238\n", " \n", " \n", " 3\n", - " 114\n", - " AFT1\n", + " 8\n", + " RTG3\n", " 0.0\n", - " 87\n", + " 57\n", " \n", " \n", " 4\n", - " 9\n", - " RTG3\n", + " 75\n", + " CAD1\n", " 0.0\n", - " 57\n", + " 360\n", " \n", " \n", " 5\n", - " 118\n", - " HSF1\n", + " 246\n", + " ARG81\n", " 0.0\n", - " 88\n", + " 1023\n", " \n", " \n", " 6\n", - " 15\n", - " REB1\n", + " 209\n", + " HAP4\n", " 0.0\n", - " 100_242\n", + " 802\n", " \n", " \n", " 7\n", - " 162\n", - " XBP1\n", + " 83\n", + " GCN4\n", " 0.0\n", - " 24\n", + " 357\n", " \n", " \n", " 8\n", - " 240\n", - " YAP1\n", + " 55\n", + " SWI5\n", " 0.0\n", - " 182\n", + " 253\n", " \n", " \n", " 9\n", - " 150\n", - " STP2\n", + " 189\n", + " HIR3\n", " 0.0\n", - " 604\n", + " 772\n", " \n", " \n", "\n", @@ -2720,19 +2755,19 @@ ], "text/plain": [ " harbison_sample_id regulator_symbol dto_empirical_pvalue hackett_sample_id\n", - "0 15 REB1 0.0 100_242\n", - "1 303 CIN5 0.0 1280\n", - "2 330 CUP9 0.0 256\n", - "3 114 AFT1 0.0 87\n", - "4 9 RTG3 0.0 57\n", - "5 118 HSF1 0.0 88\n", - "6 15 REB1 0.0 100_242\n", - "7 162 XBP1 0.0 24\n", - "8 240 YAP1 0.0 182\n", - "9 150 STP2 0.0 604" + "0 289 DAL82 0.0 1213\n", + "1 224 ACE2 0.0 901\n", + "2 283 RAP1 0.0 96_238\n", + "3 8 RTG3 0.0 57\n", + "4 75 CAD1 0.0 360\n", + "5 246 ARG81 0.0 1023\n", + "6 209 HAP4 0.0 802\n", + "7 83 GCN4 0.0 357\n", + "8 55 SWI5 0.0 253\n", + "9 189 HIR3 0.0 772" ] }, - "execution_count": 13, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -2770,7 +2805,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "id": "f03e942a", "metadata": {}, "outputs": [ @@ -2809,12 +2844,12 @@ "type": "integer" } ], - "ref": "0736a331-fb06-4ba3-abe4-dff7ac0e65a3", + "ref": "933fb2fe-799d-4a25-ae4e-ef95ed28bbc4", "rows": [ [ "0", "SWI1", - "5.0", + "15.0", "ZEV", "P", "3" @@ -2838,7 +2873,7 @@ [ "3", "SWI1", - "10.0", + "5.0", "ZEV", "P", "3" @@ -2862,7 +2897,7 @@ [ "6", "SWI1", - "15.0", + "45.0", "ZEV", "P", "3" @@ -2870,72 +2905,72 @@ [ "7", "SWI1", - "45.0", + "10.0", "ZEV", "P", "3" ], [ "8", - "RDS2", - "10.0", - "ZEV", + "MAC1", + "90.0", + "GEV", "P", "2" ], [ "9", - "MAC1", - "90.0", - "GEV", + "RDS2", + "20.0", + "ZEV", "P", "2" ], [ "10", "MAC1", - "15.0", + "45.0", "GEV", "P", "2" ], [ "11", - "RDS2", - "20.0", - "ZEV", + "MAC1", + "15.0", + "GEV", "P", "2" ], [ "12", - "MAC1", - "45.0", - "GEV", + "RDS2", + "30.0", + "ZEV", "P", "2" ], [ "13", - "RDS2", + "MAC1", "30.0", - "ZEV", + "GEV", "P", "2" ], [ "14", - "GCN4", - "15.0", + "RDS2", + "45.0", "ZEV", "P", "2" ], [ "15", - "MAC1", - "30.0", - "GEV", + "RDS2", + "15.0", + "ZEV", "P", "2" ], @@ -2950,15 +2985,15 @@ [ "17", "GCN4", - "45.0", + "15.0", "ZEV", "P", "2" ], [ "18", - "GCN4", - "90.0", + "RDS2", + "10.0", "ZEV", "P", "2" @@ -2966,7 +3001,7 @@ [ "19", "RDS2", - "45.0", + "0.0", "ZEV", "P", "2" @@ -2974,15 +3009,15 @@ [ "20", "RDS2", - "0.0", + "90.0", "ZEV", "P", "2" ], [ "21", - "RDS2", - "90.0", + "GCN4", + "45.0", "ZEV", "P", "2" @@ -3014,15 +3049,15 @@ [ "25", "GCN4", - "0.0", + "90.0", "ZEV", "P", "2" ], [ "26", - "RDS2", - "15.0", + "GCN4", + "0.0", "ZEV", "P", "2" @@ -3063,7 +3098,7 @@ " \n", " 0\n", " SWI1\n", - " 5.0\n", + " 15.0\n", " ZEV\n", " P\n", " 3\n", @@ -3087,7 +3122,7 @@ " \n", " 3\n", " SWI1\n", - " 10.0\n", + " 5.0\n", " ZEV\n", " P\n", " 3\n", @@ -3111,7 +3146,7 @@ " \n", " 6\n", " SWI1\n", - " 15.0\n", + " 45.0\n", " ZEV\n", " P\n", " 3\n", @@ -3119,72 +3154,72 @@ " \n", " 7\n", " SWI1\n", - " 45.0\n", + " 10.0\n", " ZEV\n", " P\n", " 3\n", " \n", " \n", " 8\n", - " RDS2\n", - " 10.0\n", - " ZEV\n", + " MAC1\n", + " 90.0\n", + " GEV\n", " P\n", " 2\n", " \n", " \n", " 9\n", - " MAC1\n", - " 90.0\n", - " GEV\n", + " RDS2\n", + " 20.0\n", + " ZEV\n", " P\n", " 2\n", " \n", " \n", " 10\n", " MAC1\n", - " 15.0\n", + " 45.0\n", " GEV\n", " P\n", " 2\n", " \n", " \n", " 11\n", - " RDS2\n", - " 20.0\n", - " ZEV\n", + " MAC1\n", + " 15.0\n", + " GEV\n", " P\n", " 2\n", " \n", " \n", " 12\n", - " MAC1\n", - " 45.0\n", - " GEV\n", + " RDS2\n", + " 30.0\n", + " ZEV\n", " P\n", " 2\n", " \n", " \n", " 13\n", - " RDS2\n", + " MAC1\n", " 30.0\n", - " ZEV\n", + " GEV\n", " P\n", " 2\n", " \n", " \n", " 14\n", - " GCN4\n", - " 15.0\n", + " RDS2\n", + " 45.0\n", " ZEV\n", " P\n", " 2\n", " \n", " \n", " 15\n", - " MAC1\n", - " 30.0\n", - " GEV\n", + " RDS2\n", + " 15.0\n", + " ZEV\n", " P\n", " 2\n", " \n", @@ -3199,15 +3234,15 @@ " \n", " 17\n", " GCN4\n", - " 45.0\n", + " 15.0\n", " ZEV\n", " P\n", " 2\n", " \n", " \n", " 18\n", - " GCN4\n", - " 90.0\n", + " RDS2\n", + " 10.0\n", " ZEV\n", " P\n", " 2\n", @@ -3215,7 +3250,7 @@ " \n", " 19\n", " RDS2\n", - " 45.0\n", + " 0.0\n", " ZEV\n", " P\n", " 2\n", @@ -3223,15 +3258,15 @@ " \n", " 20\n", " RDS2\n", - " 0.0\n", + " 90.0\n", " ZEV\n", " P\n", " 2\n", " \n", " \n", " 21\n", - " RDS2\n", - " 90.0\n", + " GCN4\n", + " 45.0\n", " ZEV\n", " P\n", " 2\n", @@ -3263,15 +3298,15 @@ " \n", " 25\n", " GCN4\n", - " 0.0\n", + " 90.0\n", " ZEV\n", " P\n", " 2\n", " \n", " \n", " 26\n", - " RDS2\n", - " 15.0\n", + " GCN4\n", + " 0.0\n", " ZEV\n", " P\n", " 2\n", @@ -3282,36 +3317,36 @@ ], "text/plain": [ " regulator_symbol time mechanism restriction n\n", - "0 SWI1 5.0 ZEV P 3\n", + "0 SWI1 15.0 ZEV P 3\n", "1 SWI1 30.0 ZEV P 3\n", "2 SWI1 20.0 ZEV P 3\n", - "3 SWI1 10.0 ZEV P 3\n", + "3 SWI1 5.0 ZEV P 3\n", "4 SWI1 90.0 ZEV P 3\n", "5 SWI1 0.0 ZEV P 3\n", - "6 SWI1 15.0 ZEV P 3\n", - "7 SWI1 45.0 ZEV P 3\n", - "8 RDS2 10.0 ZEV P 2\n", - "9 MAC1 90.0 GEV P 2\n", - "10 MAC1 15.0 GEV P 2\n", - "11 RDS2 20.0 ZEV P 2\n", - "12 MAC1 45.0 GEV P 2\n", - "13 RDS2 30.0 ZEV P 2\n", - "14 GCN4 15.0 ZEV P 2\n", - "15 MAC1 30.0 GEV P 2\n", + "6 SWI1 45.0 ZEV P 3\n", + "7 SWI1 10.0 ZEV P 3\n", + "8 MAC1 90.0 GEV P 2\n", + "9 RDS2 20.0 ZEV P 2\n", + "10 MAC1 45.0 GEV P 2\n", + "11 MAC1 15.0 GEV P 2\n", + "12 RDS2 30.0 ZEV P 2\n", + "13 MAC1 30.0 GEV P 2\n", + "14 RDS2 45.0 ZEV P 2\n", + "15 RDS2 15.0 ZEV P 2\n", "16 MAC1 5.0 GEV P 2\n", - "17 GCN4 45.0 ZEV P 2\n", - "18 GCN4 90.0 ZEV P 2\n", - "19 RDS2 45.0 ZEV P 2\n", - "20 RDS2 0.0 ZEV P 2\n", - "21 RDS2 90.0 ZEV P 2\n", + "17 GCN4 15.0 ZEV P 2\n", + "18 RDS2 10.0 ZEV P 2\n", + "19 RDS2 0.0 ZEV P 2\n", + "20 RDS2 90.0 ZEV P 2\n", + "21 GCN4 45.0 ZEV P 2\n", "22 GCN4 30.0 ZEV P 2\n", "23 MAC1 0.0 GEV P 2\n", "24 RDS2 5.0 ZEV P 2\n", - "25 GCN4 0.0 ZEV P 2\n", - "26 RDS2 15.0 ZEV P 2" + "25 GCN4 90.0 ZEV P 2\n", + "26 GCN4 0.0 ZEV P 2" ] }, - "execution_count": 14, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -3330,7 +3365,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "id": "4d869036", "metadata": {}, "outputs": [ @@ -3394,7 +3429,7 @@ "type": "float" } ], - "ref": "0f36c45d-0bab-4761-98f1-0e2a625be2df", + "ref": "58631e0d-0adf-41e4-9676-3e51aecbc7dd", "rows": [ [ "0", @@ -3411,26 +3446,26 @@ ], [ "1", - "1628", + "1620", "20161117", "ZEV", "YPL016W", "SWI1", "P", - "SMY2266b", + "SMY2266a", "20.0", "glucose", "30.0" ], [ "2", - "1620", + "1628", "20161117", "ZEV", "YPL016W", "SWI1", "P", - "SMY2266a", + "SMY2266b", "20.0", "glucose", "30.0" @@ -3488,26 +3523,26 @@ " \n", " \n", " 1\n", - " 1628\n", + " 1620\n", " 20161117\n", " ZEV\n", " YPL016W\n", " SWI1\n", " P\n", - " SMY2266b\n", + " SMY2266a\n", " 20.0\n", " glucose\n", " 30.0\n", " \n", " \n", " 2\n", - " 1620\n", + " 1628\n", " 20161117\n", " ZEV\n", " YPL016W\n", " SWI1\n", " P\n", - " SMY2266a\n", + " SMY2266b\n", " 20.0\n", " glucose\n", " 30.0\n", @@ -3519,16 +3554,16 @@ "text/plain": [ " sample_id date mechanism regulator_locus_tag regulator_symbol \\\n", "0 1636 20161117 ZEV YPL016W SWI1 \n", - "1 1628 20161117 ZEV YPL016W SWI1 \n", - "2 1620 20161117 ZEV YPL016W SWI1 \n", + "1 1620 20161117 ZEV YPL016W SWI1 \n", + "2 1628 20161117 ZEV YPL016W SWI1 \n", "\n", " restriction strain time carbon_source temperature_celsius \n", "0 P SMY2266c 20.0 glucose 30.0 \n", - "1 P SMY2266b 20.0 glucose 30.0 \n", - "2 P SMY2266a 20.0 glucose 30.0 " + "1 P SMY2266a 20.0 glucose 30.0 \n", + "2 P SMY2266b 20.0 glucose 30.0 " ] }, - "execution_count": 15, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -3546,7 +3581,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "89408d2b", "metadata": {}, "outputs": [ @@ -3554,7 +3589,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['MAC1', 'SWI1', 'GCN4', 'RDS2']\n" + "['SWI1', 'GCN4', 'RDS2', 'MAC1']\n" ] } ], @@ -3574,7 +3609,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "id": "5a3b802b", "metadata": {}, "outputs": [ @@ -3582,7 +3617,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['MAC1', 'SWI1', 'GCN4', 'RDS2', 'GEV']\n" + "['SWI1', 'GCN4', 'RDS2', 'MAC1', 'GEV']\n" ] } ], @@ -3594,7 +3629,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "id": "abed8bc2", "metadata": {}, "outputs": [ @@ -3683,7 +3718,7 @@ "type": "string" } ], - "ref": "b0a3d538-3af3-4f72-8610-7722a73a7a4f", + "ref": "bf27f61f-dbfa-482f-a0a0-235eeabc3fee", "rows": [ [ "0", @@ -4926,7 +4961,7 @@ "[29804 rows x 15 columns]" ] }, - "execution_count": 18, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -4937,7 +4972,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "id": "cell-25", "metadata": {}, "outputs": [ @@ -4952,47 +4987,54 @@ "3 448 ACA1 15.0 ZEV \n", "4 448 ACA1 15.0 ZEV \n", "\n", - " binding_id \\\n", - "0 BrentLab/callingcards;annotated_features;146 \n", - "1 BrentLab/callingcards;annotated_features;156 \n", - "2 BrentLab/harbison_2004;harbison_2004;88 \n", - "3 BrentLab/callingcards;annotated_features;146 \n", - "4 BrentLab/callingcards;annotated_features;803 \n", + " binding_id \\\n", + "0 BrentLab/callingcards;annotated_features;803 \n", + "1 BrentLab/harbison_2004;harbison_2004;88 \n", + "2 BrentLab/mahendrawada_2025;chec_mahendrawada_m... \n", + "3 BrentLab/callingcards;annotated_features;126 \n", + "4 BrentLab/callingcards;annotated_features;156 \n", "\n", " perturbation_id binding_rank_threshold \\\n", - "0 BrentLab/hackett_2020;hackett_2020;448 452.0 \n", - "1 BrentLab/hackett_2020;hackett_2020;448 296.0 \n", - "2 BrentLab/hackett_2020;hackett_2020;448 122.0 \n", - "3 BrentLab/hackett_2020;hackett_2020;448 35.0 \n", - "4 BrentLab/hackett_2020;hackett_2020;448 544.0 \n", + "0 BrentLab/hackett_2020;hackett_2020;448 110.0 \n", + "1 BrentLab/hackett_2020;hackett_2020;448 334.0 \n", + "2 BrentLab/hackett_2020;hackett_2020;448 3882.0 \n", + "3 BrentLab/hackett_2020;hackett_2020;448 437.0 \n", + "4 BrentLab/hackett_2020;hackett_2020;448 374.0 \n", "\n", " perturbation_rank_threshold binding_set_size perturbation_set_size \\\n", - "0 1.0 454.0 5591.0 \n", - "1 346.0 297.0 346.0 \n", - "2 218.0 122.0 218.0 \n", - "3 407.0 35.0 407.0 \n", - "4 1.0 544.0 5591.0 \n", + "0 346.0 113.0 346.0 \n", + "1 1.0 334.0 5524.0 \n", + "2 1.0 3883.0 5591.0 \n", + "3 1.0 442.0 5591.0 \n", + "4 1.0 376.0 5591.0 \n", "\n", " dto_fdr dto_empirical_pvalue pr_ranking_column \\\n", - "0 0.000000 1.000 pvalue \n", - "1 0.277211 0.000 log2fc \n", - "2 0.612736 0.917 log2fc \n", - "3 0.116834 0.000 log2fc \n", + "0 0.236207 0.001 log2fc \n", + "1 0.000000 1.000 pvalue \n", + "2 0.000000 1.000 pvalue \n", + "3 0.000000 1.000 pvalue \n", "4 0.000000 1.000 pvalue \n", "\n", - " binding_repo_dataset perturbation_repo_dataset binding_id_id \\\n", - "0 callingcards-annotated_features hackett_2020-hackett_2020 146 \n", - "1 callingcards-annotated_features hackett_2020-hackett_2020 156 \n", - "2 harbison_2004-harbison_2004 hackett_2020-hackett_2020 88 \n", - "3 callingcards-annotated_features hackett_2020-hackett_2020 146 \n", - "4 callingcards-annotated_features hackett_2020-hackett_2020 803 \n", + " binding_repo_dataset \\\n", + "0 callingcards-annotated_features \n", + "1 harbison_2004-harbison_2004 \n", + "2 mahendrawada_2025-chec_mahendrawada_m2025_af_c... \n", + "3 callingcards-annotated_features \n", + "4 callingcards-annotated_features \n", "\n", - " binding_id_source perturbation_id_id \\\n", - "0 BrentLab/callingcards;annotated_features 448 \n", - "1 BrentLab/callingcards;annotated_features 448 \n", - "2 harbison 448 \n", - "3 BrentLab/callingcards;annotated_features 448 \n", - "4 BrentLab/callingcards;annotated_features 448 \n", + " perturbation_repo_dataset binding_id_id \\\n", + "0 hackett_2020-hackett_2020 803 \n", + "1 hackett_2020-hackett_2020 88 \n", + "2 hackett_2020-hackett_2020 59 \n", + "3 hackett_2020-hackett_2020 126 \n", + "4 hackett_2020-hackett_2020 156 \n", + "\n", + " binding_id_source perturbation_id_id \\\n", + "0 BrentLab/callingcards;annotated_features 448 \n", + "1 harbison 448 \n", + "2 BrentLab/mahendrawada_2025;chec_mahendrawada_m... 448 \n", + "3 BrentLab/callingcards;annotated_features 448 \n", + "4 BrentLab/callingcards;annotated_features 448 \n", "\n", " perturbation_id_source \n", "0 hackett \n", @@ -5027,7 +5069,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 22, "id": "cell-26", "metadata": {}, "outputs": [], diff --git a/tfbpapi/tests/test_virtual_db.py b/tfbpapi/tests/test_virtual_db.py index cb64592..fa02695 100644 --- a/tfbpapi/tests/test_virtual_db.py +++ b/tfbpapi/tests/test_virtual_db.py @@ -664,6 +664,31 @@ def test_vdb_get_tags_no_views_needed(self, tmp_path): assert tags == {"assay": "binding"} assert not vdb._views_registered + def test_vdb_get_datasets(self, tmp_path): + """VirtualDB.get_datasets() returns sorted db_names without registering + views.""" + vdb = self._make_vdb( + """ + repositories: + BrentLab/harbison: + dataset: + harbison_2004: + db_name: harbison + sample_id: + field: sample_id + BrentLab/kemmeren: + dataset: + kemmeren_2014: + db_name: kemmeren + sample_id: + field: sample_id + """, + tmp_path, + ) + assert not vdb._views_registered + assert vdb.get_datasets() == ["harbison", "kemmeren"] + assert not vdb._views_registered + # ------------------------------------------------------------------ # Tests: View registration diff --git a/tfbpapi/virtual_db.py b/tfbpapi/virtual_db.py index 1ac968c..86c9ea3 100644 --- a/tfbpapi/virtual_db.py +++ b/tfbpapi/virtual_db.py @@ -390,6 +390,24 @@ def get_common_fields(self) -> list[str]: common = set.intersection(*sets) return sorted(common) + def get_datasets(self) -> list[str]: + """ + Return the sorted list of dataset names known to this VirtualDB. + + Dataset names are the resolved ``db_name`` values from the + configuration (falling back to the config_name when ``db_name`` + is not explicitly set). These are the names accepted by + :meth:`get_tags` and queryable via :meth:`query`. + + Unlike :meth:`tables`, this method reads directly from the + configuration and does not require views to be registered, so + no data is downloaded. + + :return: Sorted list of dataset names + + """ + return sorted(self._db_name_map) + def get_tags(self, db_name: str) -> dict[str, str]: """ Return the merged tags for a dataset.