diff --git a/docs/tutorials/virtual_db_tutorial.ipynb b/docs/tutorials/virtual_db_tutorial.ipynb
index bb07e75..c1aecab 100644
--- a/docs/tutorials/virtual_db_tutorial.ipynb
+++ b/docs/tutorials/virtual_db_tutorial.ipynb
@@ -25,7 +25,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 2,
"id": "cell-2",
"metadata": {},
"outputs": [
@@ -33,7 +33,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Config saved to: /tmp/tmp_krovt13/vdb_config.yaml\n"
+ "Config saved to: /tmp/tmp6rrutwjn/vdb_config.yaml\n"
]
}
],
@@ -157,7 +157,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"id": "cell-4",
"metadata": {},
"outputs": [
@@ -186,6 +186,41 @@
"print(repr(vdb))"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "ea9cc76c",
+ "metadata": {},
+ "source": [
+ "## Listing datasets\n",
+ "\n",
+ "To list the datasets available in the VirtualDB instance, use `get_datasets()`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "896d3ee1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Datasets:\n",
+ "- dto\n",
+ "- hackett\n",
+ "- harbison\n",
+ "- kemmeren\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"\\nDatasets:\")\n",
+ "for dataset in vdb.get_datasets():\n",
+ " print(f\"- {dataset}\")"
+ ]
+ },
{
"cell_type": "markdown",
"id": "0f10c138",
@@ -204,7 +239,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 5,
"id": "f7d73db0",
"metadata": {},
"outputs": [
@@ -254,7 +289,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 6,
"id": "cell-6",
"metadata": {},
"outputs": [
@@ -269,11 +304,11 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 10407.70it/s]\n",
- "Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 1770.50it/s]\n",
- "Fetching 1 files: 100%|██████████| 1/1 [00:20<00:00, 20.31s/it]\n",
+ "Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 11305.40it/s]\n",
+ "Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 6442.86it/s]\n",
+ "Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 9868.95it/s]\n",
"No metadata fields found for data config 'dto' in repo 'BrentLab/yeast_comparative_analysis' -- no embedded metadata_fields and no metadata config with applies_to\n",
- "Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 55091.56it/s]\n",
+ "Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 7124.69it/s]\n",
"Key 'carbon_source' not found at path 'media.carbon_source' (current keys: ['name'])\n",
"Key 'carbon_source' not found at path 'media.carbon_source' (current keys: ['name'])\n",
"Key 'carbon_source' not found at path 'media.carbon_source' (current keys: ['name'])\n",
@@ -303,7 +338,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 7,
"id": "pdebujnqb9q",
"metadata": {},
"outputs": [
@@ -352,7 +387,7 @@
"type": "unknown"
}
],
- "ref": "955566a4-2a55-483f-a0d4-11f1757f6a28",
+ "ref": "3dce54e1-e78b-4f8b-9241-eeceae15b6f4",
"rows": [
[
"0",
@@ -523,7 +558,7 @@
"5 harbison_meta temperature_celsius DOUBLE YES None None None"
]
},
- "execution_count": 5,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -536,7 +571,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 8,
"id": "9deee334",
"metadata": {},
"outputs": [
@@ -585,7 +620,7 @@
"type": "unknown"
}
],
- "ref": "012ff714-cded-469d-9c53-642872a5d487",
+ "ref": "c37ac38f-1f94-4cb1-8e3a-d87122c08b1a",
"rows": [
[
"0",
@@ -861,7 +896,7 @@
"10 harbison temperature_celsius DOUBLE YES None None None"
]
},
- "execution_count": 6,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -873,7 +908,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 9,
"id": "cell-9",
"metadata": {},
"outputs": [
@@ -907,7 +942,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 10,
"id": "1a705f1c",
"metadata": {},
"outputs": [
@@ -951,50 +986,50 @@
"type": "float"
}
],
- "ref": "b5c797e5-c834-4f9d-a8fc-89789ef0cc68",
+ "ref": "f6d762b2-08cc-4514-93fb-89fac1ce6c8b",
"rows": [
[
"0",
- "300",
- "YPD",
- "YOL116W",
- "MSN1",
+ "118",
+ "H2O2Hi",
+ "YGL073W",
+ "HSF1",
"glucose",
"30.0"
],
[
"1",
- "113",
+ "216",
"YPD",
- "YGL035C",
- "MIG1",
+ "YKR064W",
+ "OAF3",
"glucose",
"30.0"
],
[
"2",
- "81",
- "RAPA",
- "YEL009C",
- "GCN4",
- "glucose",
+ "314",
+ "SM",
+ "YOR358W",
+ "HAP5",
+ "unspecified",
"30.0"
],
[
"3",
- "279",
+ "330",
"YPD",
- "YNL139C",
- "THO2",
+ "YPL177C",
+ "CUP9",
"glucose",
"30.0"
],
[
"4",
- "73",
- "H2O2Hi",
- "YDR423C",
- "CAD1",
+ "9",
+ "RAPA",
+ "YBL103C",
+ "RTG3",
"glucose",
"30.0"
]
@@ -1034,46 +1069,46 @@
"
\n",
" \n",
" | 0 | \n",
- " 300 | \n",
- " YPD | \n",
- " YOL116W | \n",
- " MSN1 | \n",
+ " 118 | \n",
+ " H2O2Hi | \n",
+ " YGL073W | \n",
+ " HSF1 | \n",
" glucose | \n",
" 30.0 | \n",
"
\n",
" \n",
" | 1 | \n",
- " 113 | \n",
+ " 216 | \n",
" YPD | \n",
- " YGL035C | \n",
- " MIG1 | \n",
+ " YKR064W | \n",
+ " OAF3 | \n",
" glucose | \n",
" 30.0 | \n",
"
\n",
" \n",
" | 2 | \n",
- " 81 | \n",
- " RAPA | \n",
- " YEL009C | \n",
- " GCN4 | \n",
- " glucose | \n",
+ " 314 | \n",
+ " SM | \n",
+ " YOR358W | \n",
+ " HAP5 | \n",
+ " unspecified | \n",
" 30.0 | \n",
"
\n",
" \n",
" | 3 | \n",
- " 279 | \n",
+ " 330 | \n",
" YPD | \n",
- " YNL139C | \n",
- " THO2 | \n",
+ " YPL177C | \n",
+ " CUP9 | \n",
" glucose | \n",
" 30.0 | \n",
"
\n",
" \n",
" | 4 | \n",
- " 73 | \n",
- " H2O2Hi | \n",
- " YDR423C | \n",
- " CAD1 | \n",
+ " 9 | \n",
+ " RAPA | \n",
+ " YBL103C | \n",
+ " RTG3 | \n",
" glucose | \n",
" 30.0 | \n",
"
\n",
@@ -1083,11 +1118,11 @@
],
"text/plain": [
" sample_id condition regulator_locus_tag regulator_symbol carbon_source \\\n",
- "0 300 YPD YOL116W MSN1 glucose \n",
- "1 113 YPD YGL035C MIG1 glucose \n",
- "2 81 RAPA YEL009C GCN4 glucose \n",
- "3 279 YPD YNL139C THO2 glucose \n",
- "4 73 H2O2Hi YDR423C CAD1 glucose \n",
+ "0 118 H2O2Hi YGL073W HSF1 glucose \n",
+ "1 216 YPD YKR064W OAF3 glucose \n",
+ "2 314 SM YOR358W HAP5 unspecified \n",
+ "3 330 YPD YPL177C CUP9 glucose \n",
+ "4 9 RAPA YBL103C RTG3 glucose \n",
"\n",
" temperature_celsius \n",
"0 30.0 \n",
@@ -1097,7 +1132,7 @@
"4 30.0 "
]
},
- "execution_count": 8,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -1123,7 +1158,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 11,
"id": "cell-17",
"metadata": {},
"outputs": [
@@ -1192,75 +1227,75 @@
"type": "float"
}
],
- "ref": "06d7e391-9665-4a5f-9276-359ff8e71c3e",
+ "ref": "e270aed6-9c2d-445a-b7d5-9fe1f96455ff",
"rows": [
[
"0",
- "15",
- "14.0",
+ "13",
+ "12.0",
"YBR049C",
"REB1",
- "YPD",
+ "H2O2Hi",
"YPR204W",
"YPR204W",
- "0.85288861",
- "0.76943045",
+ "0.90161323",
+ "0.6769426",
"glucose",
"30.0"
],
[
"1",
- "15",
- "14.0",
+ "13",
+ "12.0",
"YBR049C",
"REB1",
- "YPD",
+ "H2O2Hi",
"YPR203W",
"YPR203W",
- "1.2490028",
- "0.11237602",
+ "1.0534522",
+ "0.38440432",
"glucose",
"30.0"
],
[
"2",
- "15",
- "14.0",
+ "13",
+ "12.0",
"YBR049C",
"REB1",
- "YPD",
+ "H2O2Hi",
"YPR202W",
"YPR202W",
- "1.2490028",
- "0.11237602",
+ "1.0534522",
+ "0.38440432",
"glucose",
"30.0"
],
[
"3",
- "15",
- "14.0",
+ "13",
+ "12.0",
"YBR049C",
"REB1",
- "YPD",
+ "H2O2Hi",
"YPR201W",
"ARR3",
- "1.5137073",
- "0.1681333",
+ "0.84429803",
+ "0.66537467",
"glucose",
"30.0"
],
[
"4",
- "15",
- "14.0",
+ "13",
+ "12.0",
"YBR049C",
"REB1",
- "YPD",
+ "H2O2Hi",
"YPR200C",
"ARR2",
- "1.5137073",
- "0.1681333",
+ "0.84429803",
+ "0.66537467",
"glucose",
"30.0"
]
@@ -1305,71 +1340,71 @@
" \n",
" \n",
" | 0 | \n",
- " 15 | \n",
- " 14.0 | \n",
+ " 13 | \n",
+ " 12.0 | \n",
" YBR049C | \n",
" REB1 | \n",
- " YPD | \n",
+ " H2O2Hi | \n",
" YPR204W | \n",
" YPR204W | \n",
- " 0.852889 | \n",
- " 0.769430 | \n",
+ " 0.901613 | \n",
+ " 0.676943 | \n",
" glucose | \n",
" 30.0 | \n",
"
\n",
" \n",
" | 1 | \n",
- " 15 | \n",
- " 14.0 | \n",
+ " 13 | \n",
+ " 12.0 | \n",
" YBR049C | \n",
" REB1 | \n",
- " YPD | \n",
+ " H2O2Hi | \n",
" YPR203W | \n",
" YPR203W | \n",
- " 1.249003 | \n",
- " 0.112376 | \n",
+ " 1.053452 | \n",
+ " 0.384404 | \n",
" glucose | \n",
" 30.0 | \n",
"
\n",
" \n",
" | 2 | \n",
- " 15 | \n",
- " 14.0 | \n",
+ " 13 | \n",
+ " 12.0 | \n",
" YBR049C | \n",
" REB1 | \n",
- " YPD | \n",
+ " H2O2Hi | \n",
" YPR202W | \n",
" YPR202W | \n",
- " 1.249003 | \n",
- " 0.112376 | \n",
+ " 1.053452 | \n",
+ " 0.384404 | \n",
" glucose | \n",
" 30.0 | \n",
"
\n",
" \n",
" | 3 | \n",
- " 15 | \n",
- " 14.0 | \n",
+ " 13 | \n",
+ " 12.0 | \n",
" YBR049C | \n",
" REB1 | \n",
- " YPD | \n",
+ " H2O2Hi | \n",
" YPR201W | \n",
" ARR3 | \n",
- " 1.513707 | \n",
- " 0.168133 | \n",
+ " 0.844298 | \n",
+ " 0.665375 | \n",
" glucose | \n",
" 30.0 | \n",
"
\n",
" \n",
" | 4 | \n",
- " 15 | \n",
- " 14.0 | \n",
+ " 13 | \n",
+ " 12.0 | \n",
" YBR049C | \n",
" REB1 | \n",
- " YPD | \n",
+ " H2O2Hi | \n",
" YPR200C | \n",
" ARR2 | \n",
- " 1.513707 | \n",
- " 0.168133 | \n",
+ " 0.844298 | \n",
+ " 0.665375 | \n",
" glucose | \n",
" 30.0 | \n",
"
\n",
@@ -1379,18 +1414,18 @@
],
"text/plain": [
" sample_id db_id regulator_locus_tag regulator_symbol condition \\\n",
- "0 15 14.0 YBR049C REB1 YPD \n",
- "1 15 14.0 YBR049C REB1 YPD \n",
- "2 15 14.0 YBR049C REB1 YPD \n",
- "3 15 14.0 YBR049C REB1 YPD \n",
- "4 15 14.0 YBR049C REB1 YPD \n",
+ "0 13 12.0 YBR049C REB1 H2O2Hi \n",
+ "1 13 12.0 YBR049C REB1 H2O2Hi \n",
+ "2 13 12.0 YBR049C REB1 H2O2Hi \n",
+ "3 13 12.0 YBR049C REB1 H2O2Hi \n",
+ "4 13 12.0 YBR049C REB1 H2O2Hi \n",
"\n",
" target_locus_tag target_symbol effect pvalue carbon_source \\\n",
- "0 YPR204W YPR204W 0.852889 0.769430 glucose \n",
- "1 YPR203W YPR203W 1.249003 0.112376 glucose \n",
- "2 YPR202W YPR202W 1.249003 0.112376 glucose \n",
- "3 YPR201W ARR3 1.513707 0.168133 glucose \n",
- "4 YPR200C ARR2 1.513707 0.168133 glucose \n",
+ "0 YPR204W YPR204W 0.901613 0.676943 glucose \n",
+ "1 YPR203W YPR203W 1.053452 0.384404 glucose \n",
+ "2 YPR202W YPR202W 1.053452 0.384404 glucose \n",
+ "3 YPR201W ARR3 0.844298 0.665375 glucose \n",
+ "4 YPR200C ARR2 0.844298 0.665375 glucose \n",
"\n",
" temperature_celsius \n",
"0 30.0 \n",
@@ -1400,7 +1435,7 @@
"4 30.0 "
]
},
- "execution_count": 9,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -1429,7 +1464,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 12,
"id": "cell-19",
"metadata": {},
"outputs": [
@@ -1453,7 +1488,7 @@
"type": "integer"
}
],
- "ref": "6d8b4d37-3b6b-40f1-833d-aa6711694bcb",
+ "ref": "1f789068-26a6-466d-b977-ce3a58e6b547",
"rows": [
[
"0",
@@ -1467,17 +1502,17 @@
],
[
"2",
- "STE12",
+ "HSF1",
"4"
],
[
"3",
- "RTG3",
+ "STE12",
"4"
],
[
"4",
- "DIG1",
+ "SKN7",
"4"
],
[
@@ -1487,102 +1522,102 @@
],
[
"6",
- "HSF1",
+ "DIG1",
"4"
],
[
"7",
- "SKN7",
+ "RTG3",
"4"
],
[
"8",
- "RPN4",
+ "PHO2",
"3"
],
[
"9",
- "GAT1",
+ "ROX1",
"3"
],
[
"10",
- "AFT2",
+ "GZF3",
"3"
],
[
"11",
- "YAP7",
+ "SFP1",
"3"
],
[
"12",
- "TEC1",
+ "KSS1",
"3"
],
[
"13",
- "MOT3",
+ "CIN5",
"3"
],
[
"14",
- "ROX1",
+ "NRG1",
"3"
],
[
"15",
- "GZF3",
+ "MBP1",
"3"
],
[
"16",
- "PHO2",
+ "GAT1",
"3"
],
[
"17",
- "MAL33",
+ "AFT2",
"3"
],
[
"18",
- "CIN5",
+ "MOT3",
"3"
],
[
"19",
- "SFP1",
+ "PHD1",
"3"
],
[
"20",
- "KSS1",
+ "TEC1",
"3"
],
[
"21",
- "YAP6",
+ "YAP7",
"3"
],
[
"22",
- "RPH1",
+ "RIM101",
"3"
],
[
"23",
- "NRG1",
+ "AFT1",
"3"
],
[
"24",
- "PHD1",
+ "YJL206C",
"3"
],
[
"25",
- "FHL1",
+ "RPN4",
"3"
],
[
@@ -1592,117 +1627,117 @@
],
[
"27",
- "FKH2",
+ "FHL1",
"3"
],
[
"28",
- "MBP1",
+ "FKH2",
"3"
],
[
"29",
- "RIM101",
+ "MAL33",
"3"
],
[
"30",
- "YJL206C",
+ "RPH1",
"3"
],
[
"31",
- "AFT1",
+ "YAP6",
"3"
],
[
"32",
- "RLM1",
+ "SOK2",
"2"
],
[
"33",
- "XBP1",
+ "HAP2",
"2"
],
[
"34",
- "IME4",
+ "CAD1",
"2"
],
[
"35",
- "MCM1",
+ "MAC1",
"2"
],
[
"36",
- "DAL80",
+ "UME6",
"2"
],
[
"37",
- "YAP3",
+ "YAP5",
"2"
],
[
"38",
- "YAP5",
+ "MOT2",
"2"
],
[
"39",
- "MAC1",
+ "UME1",
"2"
],
[
"40",
- "UME6",
+ "DAL81",
"2"
],
[
"41",
- "PDR1",
+ "GLN3",
"2"
],
[
"42",
- "UME1",
+ "ARR1",
"2"
],
[
"43",
- "CAD1",
+ "IME4",
"2"
],
[
"44",
- "MGA1",
+ "ASH1",
"2"
],
[
"45",
- "HAP4",
+ "RLM1",
"2"
],
[
"46",
- "MIG2",
+ "MSS11",
"2"
],
[
"47",
- "GCN4",
+ "MCM1",
"2"
],
[
"48",
- "RTG1",
+ "MGA1",
"2"
],
[
"49",
- "PUT3",
+ "RDS1",
"2"
]
],
@@ -1747,17 +1782,17 @@
" \n",
" \n",
" | 2 | \n",
- " STE12 | \n",
+ " HSF1 | \n",
" 4 | \n",
"
\n",
" \n",
" | 3 | \n",
- " RTG3 | \n",
+ " STE12 | \n",
" 4 | \n",
"
\n",
" \n",
" | 4 | \n",
- " DIG1 | \n",
+ " SKN7 | \n",
" 4 | \n",
"
\n",
" \n",
@@ -1767,27 +1802,27 @@
"
\n",
" \n",
" | 58 | \n",
- " IME1 | \n",
+ " PUT3 | \n",
" 2 | \n",
"
\n",
" \n",
" | 59 | \n",
- " RDS1 | \n",
+ " RTG1 | \n",
" 2 | \n",
"
\n",
" \n",
" | 60 | \n",
- " MSS11 | \n",
+ " ADR1 | \n",
" 2 | \n",
"
\n",
" \n",
" | 61 | \n",
- " HAP2 | \n",
+ " UGA3 | \n",
" 2 | \n",
"
\n",
" \n",
" | 62 | \n",
- " ARR1 | \n",
+ " PDR1 | \n",
" 2 | \n",
"
\n",
" \n",
@@ -1799,20 +1834,20 @@
" regulator_symbol n\n",
"0 MSN2 6\n",
"1 MSN4 5\n",
- "2 STE12 4\n",
- "3 RTG3 4\n",
- "4 DIG1 4\n",
+ "2 HSF1 4\n",
+ "3 STE12 4\n",
+ "4 SKN7 4\n",
".. ... ..\n",
- "58 IME1 2\n",
- "59 RDS1 2\n",
- "60 MSS11 2\n",
- "61 HAP2 2\n",
- "62 ARR1 2\n",
+ "58 PUT3 2\n",
+ "59 RTG1 2\n",
+ "60 ADR1 2\n",
+ "61 UGA3 2\n",
+ "62 PDR1 2\n",
"\n",
"[63 rows x 2 columns]"
]
},
- "execution_count": 10,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -1853,7 +1888,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 13,
"id": "cell-21",
"metadata": {},
"outputs": [
@@ -1942,7 +1977,7 @@
"type": "string"
}
],
- "ref": "1ce4dce9-5191-4116-b848-394fcdb3b5fc",
+ "ref": "426c5717-57fa-4c0d-aa1f-b1947914421c",
"rows": [
[
"0",
@@ -2131,7 +2166,7 @@
"2 harbison 11 BrentLab/hughes_2006;overexpression "
]
},
- "execution_count": 11,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -2143,7 +2178,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 14,
"id": "cell-22",
"metadata": {},
"outputs": [
@@ -2197,117 +2232,117 @@
"type": "float"
}
],
- "ref": "8e604dcf-efad-42a8-a049-7bf684faa9b6",
+ "ref": "43a63e85-fc7c-4630-873c-6a44f8af7442",
"rows": [
[
"0",
- "18",
- "YPD",
- "YBR083W",
- "TEC1",
- "glucose",
+ "314",
+ "SM",
+ "YOR358W",
+ "HAP5",
+ "unspecified",
"30.0",
"0.0",
- "0.08188235294117648"
+ "0.047097156398104266"
],
[
"1",
- "157",
- "H2O2Hi",
- "YHR206W",
- "SKN7",
+ "240",
+ "YPD",
+ "YML007W",
+ "YAP1",
"glucose",
"30.0",
"0.0",
- "0.13931986462735127"
+ "0.14091317634369943"
],
[
"2",
- "93",
+ "330",
"YPD",
- "YER111C",
- "SWI4",
+ "YPL177C",
+ "CUP9",
"glucose",
"30.0",
"0.0",
- "0.17005078106191404"
+ "0.00039874225300765584"
],
[
"3",
- "72",
- "YPD",
- "YDR421W",
- "ARO80",
+ "114",
+ "H2O2Hi",
+ "YGL071W",
+ "AFT1",
"glucose",
"30.0",
"0.0",
- "0.00011392635800218739"
+ "0.09653511969862681"
],
[
"4",
- "71",
- "SM",
- "YDR421W",
- "ARO80",
- "unspecified",
+ "118",
+ "H2O2Hi",
+ "YGL073W",
+ "HSF1",
+ "glucose",
"30.0",
"0.0",
- "0.00011392635800218739"
+ "0.03150882247029168"
],
[
"5",
- "346",
- "RAPA",
- "YPR104C",
- "FHL1",
+ "31",
+ "H2O2Hi",
+ "YDL020C",
+ "RPN4",
"glucose",
"30.0",
"0.0",
- "0.019746237283784218"
+ "0.12466961356179365"
],
[
"6",
- "226",
+ "303",
"YPD",
- "YLR182W",
- "SWI6",
+ "YOR028C",
+ "CIN5",
"glucose",
"30.0",
"0.0",
- "0.07368989186287292"
+ "0.03621718920889537"
],
[
"7",
- "286",
- "YPD",
- "YNL309W",
- "STB1",
+ "36",
+ "H2O2Lo",
+ "YDL056W",
+ "MBP1",
"glucose",
"30.0",
"0.0",
- "0.1821470588235294"
+ "0.04300429120153643"
],
[
"8",
- "172",
- "SM",
- "YIR023W",
- "DAL81",
- "unspecified",
+ "15",
+ "YPD",
+ "YBR049C",
+ "REB1",
+ "glucose",
"30.0",
"0.0",
- "0.21656240134694307"
+ "0.07954075079166496"
],
[
"9",
- "320",
- "YPD",
- "YPL038W",
- "MET31",
+ "162",
+ "H2O2Lo",
+ "YIL101C",
+ "XBP1",
"glucose",
"30.0",
"0.0",
- "0.0661219662690251"
+ "0.22690440962955793"
]
],
"shape": {
@@ -2347,113 +2382,113 @@
" \n",
" \n",
" | 0 | \n",
- " 18 | \n",
- " YPD | \n",
- " YBR083W | \n",
- " TEC1 | \n",
- " glucose | \n",
+ " 314 | \n",
+ " SM | \n",
+ " YOR358W | \n",
+ " HAP5 | \n",
+ " unspecified | \n",
" 30.0 | \n",
" 0.0 | \n",
- " 0.081882 | \n",
+ " 0.047097 | \n",
"
\n",
" \n",
" | 1 | \n",
- " 157 | \n",
- " H2O2Hi | \n",
- " YHR206W | \n",
- " SKN7 | \n",
+ " 240 | \n",
+ " YPD | \n",
+ " YML007W | \n",
+ " YAP1 | \n",
" glucose | \n",
" 30.0 | \n",
" 0.0 | \n",
- " 0.139320 | \n",
+ " 0.140913 | \n",
"
\n",
" \n",
" | 2 | \n",
- " 93 | \n",
+ " 330 | \n",
" YPD | \n",
- " YER111C | \n",
- " SWI4 | \n",
+ " YPL177C | \n",
+ " CUP9 | \n",
" glucose | \n",
" 30.0 | \n",
" 0.0 | \n",
- " 0.170051 | \n",
+ " 0.000399 | \n",
"
\n",
" \n",
" | 3 | \n",
- " 72 | \n",
- " YPD | \n",
- " YDR421W | \n",
- " ARO80 | \n",
+ " 114 | \n",
+ " H2O2Hi | \n",
+ " YGL071W | \n",
+ " AFT1 | \n",
" glucose | \n",
" 30.0 | \n",
" 0.0 | \n",
- " 0.000114 | \n",
+ " 0.096535 | \n",
"
\n",
" \n",
" | 4 | \n",
- " 71 | \n",
- " SM | \n",
- " YDR421W | \n",
- " ARO80 | \n",
- " unspecified | \n",
+ " 118 | \n",
+ " H2O2Hi | \n",
+ " YGL073W | \n",
+ " HSF1 | \n",
+ " glucose | \n",
" 30.0 | \n",
" 0.0 | \n",
- " 0.000114 | \n",
+ " 0.031509 | \n",
"
\n",
" \n",
" | 5 | \n",
- " 346 | \n",
- " RAPA | \n",
- " YPR104C | \n",
- " FHL1 | \n",
+ " 31 | \n",
+ " H2O2Hi | \n",
+ " YDL020C | \n",
+ " RPN4 | \n",
" glucose | \n",
" 30.0 | \n",
" 0.0 | \n",
- " 0.019746 | \n",
+ " 0.124670 | \n",
"
\n",
" \n",
" | 6 | \n",
- " 226 | \n",
+ " 303 | \n",
" YPD | \n",
- " YLR182W | \n",
- " SWI6 | \n",
+ " YOR028C | \n",
+ " CIN5 | \n",
" glucose | \n",
" 30.0 | \n",
" 0.0 | \n",
- " 0.073690 | \n",
+ " 0.036217 | \n",
"
\n",
" \n",
" | 7 | \n",
- " 286 | \n",
- " YPD | \n",
- " YNL309W | \n",
- " STB1 | \n",
+ " 36 | \n",
+ " H2O2Lo | \n",
+ " YDL056W | \n",
+ " MBP1 | \n",
" glucose | \n",
" 30.0 | \n",
" 0.0 | \n",
- " 0.182147 | \n",
+ " 0.043004 | \n",
"
\n",
" \n",
" | 8 | \n",
- " 172 | \n",
- " SM | \n",
- " YIR023W | \n",
- " DAL81 | \n",
- " unspecified | \n",
+ " 15 | \n",
+ " YPD | \n",
+ " YBR049C | \n",
+ " REB1 | \n",
+ " glucose | \n",
" 30.0 | \n",
" 0.0 | \n",
- " 0.216562 | \n",
+ " 0.079541 | \n",
"
\n",
" \n",
" | 9 | \n",
- " 320 | \n",
- " YPD | \n",
- " YPL038W | \n",
- " MET31 | \n",
+ " 162 | \n",
+ " H2O2Lo | \n",
+ " YIL101C | \n",
+ " XBP1 | \n",
" glucose | \n",
" 30.0 | \n",
" 0.0 | \n",
- " 0.066122 | \n",
+ " 0.226904 | \n",
"
\n",
" \n",
"\n",
@@ -2461,31 +2496,31 @@
],
"text/plain": [
" sample_id condition regulator_locus_tag regulator_symbol carbon_source \\\n",
- "0 18 YPD YBR083W TEC1 glucose \n",
- "1 157 H2O2Hi YHR206W SKN7 glucose \n",
- "2 93 YPD YER111C SWI4 glucose \n",
- "3 72 YPD YDR421W ARO80 glucose \n",
- "4 71 SM YDR421W ARO80 unspecified \n",
- "5 346 RAPA YPR104C FHL1 glucose \n",
- "6 226 YPD YLR182W SWI6 glucose \n",
- "7 286 YPD YNL309W STB1 glucose \n",
- "8 172 SM YIR023W DAL81 unspecified \n",
- "9 320 YPD YPL038W MET31 glucose \n",
+ "0 314 SM YOR358W HAP5 unspecified \n",
+ "1 240 YPD YML007W YAP1 glucose \n",
+ "2 330 YPD YPL177C CUP9 glucose \n",
+ "3 114 H2O2Hi YGL071W AFT1 glucose \n",
+ "4 118 H2O2Hi YGL073W HSF1 glucose \n",
+ "5 31 H2O2Hi YDL020C RPN4 glucose \n",
+ "6 303 YPD YOR028C CIN5 glucose \n",
+ "7 36 H2O2Lo YDL056W MBP1 glucose \n",
+ "8 15 YPD YBR049C REB1 glucose \n",
+ "9 162 H2O2Lo YIL101C XBP1 glucose \n",
"\n",
" temperature_celsius dto_empirical_pvalue dto_fdr \n",
- "0 30.0 0.0 0.081882 \n",
- "1 30.0 0.0 0.139320 \n",
- "2 30.0 0.0 0.170051 \n",
- "3 30.0 0.0 0.000114 \n",
- "4 30.0 0.0 0.000114 \n",
- "5 30.0 0.0 0.019746 \n",
- "6 30.0 0.0 0.073690 \n",
- "7 30.0 0.0 0.182147 \n",
- "8 30.0 0.0 0.216562 \n",
- "9 30.0 0.0 0.066122 "
+ "0 30.0 0.0 0.047097 \n",
+ "1 30.0 0.0 0.140913 \n",
+ "2 30.0 0.0 0.000399 \n",
+ "3 30.0 0.0 0.096535 \n",
+ "4 30.0 0.0 0.031509 \n",
+ "5 30.0 0.0 0.124670 \n",
+ "6 30.0 0.0 0.036217 \n",
+ "7 30.0 0.0 0.043004 \n",
+ "8 30.0 0.0 0.079541 \n",
+ "9 30.0 0.0 0.226904 "
]
},
- "execution_count": 12,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -2506,7 +2541,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 15,
"id": "cell-23",
"metadata": {},
"outputs": [
@@ -2540,77 +2575,77 @@
"type": "string"
}
],
- "ref": "18363370-be4d-4693-8836-96409a2ac869",
+ "ref": "75bcfd39-bdd3-40ed-8c32-57a86f2e5145",
"rows": [
[
"0",
- "15",
- "REB1",
+ "289",
+ "DAL82",
"0.0",
- "100_242"
+ "1213"
],
[
"1",
- "303",
- "CIN5",
+ "224",
+ "ACE2",
"0.0",
- "1280"
+ "901"
],
[
"2",
- "330",
- "CUP9",
+ "283",
+ "RAP1",
"0.0",
- "256"
+ "96_238"
],
[
"3",
- "114",
- "AFT1",
+ "8",
+ "RTG3",
"0.0",
- "87"
+ "57"
],
[
"4",
- "9",
- "RTG3",
+ "75",
+ "CAD1",
"0.0",
- "57"
+ "360"
],
[
"5",
- "118",
- "HSF1",
+ "246",
+ "ARG81",
"0.0",
- "88"
+ "1023"
],
[
"6",
- "15",
- "REB1",
+ "209",
+ "HAP4",
"0.0",
- "100_242"
+ "802"
],
[
"7",
- "162",
- "XBP1",
+ "83",
+ "GCN4",
"0.0",
- "24"
+ "357"
],
[
"8",
- "240",
- "YAP1",
+ "55",
+ "SWI5",
"0.0",
- "182"
+ "253"
],
[
"9",
- "150",
- "STP2",
+ "189",
+ "HIR3",
"0.0",
- "604"
+ "772"
]
],
"shape": {
@@ -2646,73 +2681,73 @@
" \n",
" \n",
" | 0 | \n",
- " 15 | \n",
- " REB1 | \n",
+ " 289 | \n",
+ " DAL82 | \n",
" 0.0 | \n",
- " 100_242 | \n",
+ " 1213 | \n",
"
\n",
" \n",
" | 1 | \n",
- " 303 | \n",
- " CIN5 | \n",
+ " 224 | \n",
+ " ACE2 | \n",
" 0.0 | \n",
- " 1280 | \n",
+ " 901 | \n",
"
\n",
" \n",
" | 2 | \n",
- " 330 | \n",
- " CUP9 | \n",
+ " 283 | \n",
+ " RAP1 | \n",
" 0.0 | \n",
- " 256 | \n",
+ " 96_238 | \n",
"
\n",
" \n",
" | 3 | \n",
- " 114 | \n",
- " AFT1 | \n",
+ " 8 | \n",
+ " RTG3 | \n",
" 0.0 | \n",
- " 87 | \n",
+ " 57 | \n",
"
\n",
" \n",
" | 4 | \n",
- " 9 | \n",
- " RTG3 | \n",
+ " 75 | \n",
+ " CAD1 | \n",
" 0.0 | \n",
- " 57 | \n",
+ " 360 | \n",
"
\n",
" \n",
" | 5 | \n",
- " 118 | \n",
- " HSF1 | \n",
+ " 246 | \n",
+ " ARG81 | \n",
" 0.0 | \n",
- " 88 | \n",
+ " 1023 | \n",
"
\n",
" \n",
" | 6 | \n",
- " 15 | \n",
- " REB1 | \n",
+ " 209 | \n",
+ " HAP4 | \n",
" 0.0 | \n",
- " 100_242 | \n",
+ " 802 | \n",
"
\n",
" \n",
" | 7 | \n",
- " 162 | \n",
- " XBP1 | \n",
+ " 83 | \n",
+ " GCN4 | \n",
" 0.0 | \n",
- " 24 | \n",
+ " 357 | \n",
"
\n",
" \n",
" | 8 | \n",
- " 240 | \n",
- " YAP1 | \n",
+ " 55 | \n",
+ " SWI5 | \n",
" 0.0 | \n",
- " 182 | \n",
+ " 253 | \n",
"
\n",
" \n",
" | 9 | \n",
- " 150 | \n",
- " STP2 | \n",
+ " 189 | \n",
+ " HIR3 | \n",
" 0.0 | \n",
- " 604 | \n",
+ " 772 | \n",
"
\n",
" \n",
"\n",
@@ -2720,19 +2755,19 @@
],
"text/plain": [
" harbison_sample_id regulator_symbol dto_empirical_pvalue hackett_sample_id\n",
- "0 15 REB1 0.0 100_242\n",
- "1 303 CIN5 0.0 1280\n",
- "2 330 CUP9 0.0 256\n",
- "3 114 AFT1 0.0 87\n",
- "4 9 RTG3 0.0 57\n",
- "5 118 HSF1 0.0 88\n",
- "6 15 REB1 0.0 100_242\n",
- "7 162 XBP1 0.0 24\n",
- "8 240 YAP1 0.0 182\n",
- "9 150 STP2 0.0 604"
+ "0 289 DAL82 0.0 1213\n",
+ "1 224 ACE2 0.0 901\n",
+ "2 283 RAP1 0.0 96_238\n",
+ "3 8 RTG3 0.0 57\n",
+ "4 75 CAD1 0.0 360\n",
+ "5 246 ARG81 0.0 1023\n",
+ "6 209 HAP4 0.0 802\n",
+ "7 83 GCN4 0.0 357\n",
+ "8 55 SWI5 0.0 253\n",
+ "9 189 HIR3 0.0 772"
]
},
- "execution_count": 13,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -2770,7 +2805,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 16,
"id": "f03e942a",
"metadata": {},
"outputs": [
@@ -2809,12 +2844,12 @@
"type": "integer"
}
],
- "ref": "0736a331-fb06-4ba3-abe4-dff7ac0e65a3",
+ "ref": "933fb2fe-799d-4a25-ae4e-ef95ed28bbc4",
"rows": [
[
"0",
"SWI1",
- "5.0",
+ "15.0",
"ZEV",
"P",
"3"
@@ -2838,7 +2873,7 @@
[
"3",
"SWI1",
- "10.0",
+ "5.0",
"ZEV",
"P",
"3"
@@ -2862,7 +2897,7 @@
[
"6",
"SWI1",
- "15.0",
+ "45.0",
"ZEV",
"P",
"3"
@@ -2870,72 +2905,72 @@
[
"7",
"SWI1",
- "45.0",
+ "10.0",
"ZEV",
"P",
"3"
],
[
"8",
- "RDS2",
- "10.0",
- "ZEV",
+ "MAC1",
+ "90.0",
+ "GEV",
"P",
"2"
],
[
"9",
- "MAC1",
- "90.0",
- "GEV",
+ "RDS2",
+ "20.0",
+ "ZEV",
"P",
"2"
],
[
"10",
"MAC1",
- "15.0",
+ "45.0",
"GEV",
"P",
"2"
],
[
"11",
- "RDS2",
- "20.0",
- "ZEV",
+ "MAC1",
+ "15.0",
+ "GEV",
"P",
"2"
],
[
"12",
- "MAC1",
- "45.0",
- "GEV",
+ "RDS2",
+ "30.0",
+ "ZEV",
"P",
"2"
],
[
"13",
- "RDS2",
+ "MAC1",
"30.0",
- "ZEV",
+ "GEV",
"P",
"2"
],
[
"14",
- "GCN4",
- "15.0",
+ "RDS2",
+ "45.0",
"ZEV",
"P",
"2"
],
[
"15",
- "MAC1",
- "30.0",
- "GEV",
+ "RDS2",
+ "15.0",
+ "ZEV",
"P",
"2"
],
@@ -2950,15 +2985,15 @@
[
"17",
"GCN4",
- "45.0",
+ "15.0",
"ZEV",
"P",
"2"
],
[
"18",
- "GCN4",
- "90.0",
+ "RDS2",
+ "10.0",
"ZEV",
"P",
"2"
@@ -2966,7 +3001,7 @@
[
"19",
"RDS2",
- "45.0",
+ "0.0",
"ZEV",
"P",
"2"
@@ -2974,15 +3009,15 @@
[
"20",
"RDS2",
- "0.0",
+ "90.0",
"ZEV",
"P",
"2"
],
[
"21",
- "RDS2",
- "90.0",
+ "GCN4",
+ "45.0",
"ZEV",
"P",
"2"
@@ -3014,15 +3049,15 @@
[
"25",
"GCN4",
- "0.0",
+ "90.0",
"ZEV",
"P",
"2"
],
[
"26",
- "RDS2",
- "15.0",
+ "GCN4",
+ "0.0",
"ZEV",
"P",
"2"
@@ -3063,7 +3098,7 @@
" \n",
" | 0 | \n",
" SWI1 | \n",
- " 5.0 | \n",
+ " 15.0 | \n",
" ZEV | \n",
" P | \n",
" 3 | \n",
@@ -3087,7 +3122,7 @@
"
\n",
" | 3 | \n",
" SWI1 | \n",
- " 10.0 | \n",
+ " 5.0 | \n",
" ZEV | \n",
" P | \n",
" 3 | \n",
@@ -3111,7 +3146,7 @@
"
\n",
" | 6 | \n",
" SWI1 | \n",
- " 15.0 | \n",
+ " 45.0 | \n",
" ZEV | \n",
" P | \n",
" 3 | \n",
@@ -3119,72 +3154,72 @@
"
\n",
" | 7 | \n",
" SWI1 | \n",
- " 45.0 | \n",
+ " 10.0 | \n",
" ZEV | \n",
" P | \n",
" 3 | \n",
"
\n",
" \n",
" | 8 | \n",
- " RDS2 | \n",
- " 10.0 | \n",
- " ZEV | \n",
+ " MAC1 | \n",
+ " 90.0 | \n",
+ " GEV | \n",
" P | \n",
" 2 | \n",
"
\n",
" \n",
" | 9 | \n",
- " MAC1 | \n",
- " 90.0 | \n",
- " GEV | \n",
+ " RDS2 | \n",
+ " 20.0 | \n",
+ " ZEV | \n",
" P | \n",
" 2 | \n",
"
\n",
" \n",
" | 10 | \n",
" MAC1 | \n",
- " 15.0 | \n",
+ " 45.0 | \n",
" GEV | \n",
" P | \n",
" 2 | \n",
"
\n",
" \n",
" | 11 | \n",
- " RDS2 | \n",
- " 20.0 | \n",
- " ZEV | \n",
+ " MAC1 | \n",
+ " 15.0 | \n",
+ " GEV | \n",
" P | \n",
" 2 | \n",
"
\n",
" \n",
" | 12 | \n",
- " MAC1 | \n",
- " 45.0 | \n",
- " GEV | \n",
+ " RDS2 | \n",
+ " 30.0 | \n",
+ " ZEV | \n",
" P | \n",
" 2 | \n",
"
\n",
" \n",
" | 13 | \n",
- " RDS2 | \n",
+ " MAC1 | \n",
" 30.0 | \n",
- " ZEV | \n",
+ " GEV | \n",
" P | \n",
" 2 | \n",
"
\n",
" \n",
" | 14 | \n",
- " GCN4 | \n",
- " 15.0 | \n",
+ " RDS2 | \n",
+ " 45.0 | \n",
" ZEV | \n",
" P | \n",
" 2 | \n",
"
\n",
" \n",
" | 15 | \n",
- " MAC1 | \n",
- " 30.0 | \n",
- " GEV | \n",
+ " RDS2 | \n",
+ " 15.0 | \n",
+ " ZEV | \n",
" P | \n",
" 2 | \n",
"
\n",
@@ -3199,15 +3234,15 @@
" \n",
" | 17 | \n",
" GCN4 | \n",
- " 45.0 | \n",
+ " 15.0 | \n",
" ZEV | \n",
" P | \n",
" 2 | \n",
"
\n",
" \n",
" | 18 | \n",
- " GCN4 | \n",
- " 90.0 | \n",
+ " RDS2 | \n",
+ " 10.0 | \n",
" ZEV | \n",
" P | \n",
" 2 | \n",
@@ -3215,7 +3250,7 @@
"
\n",
" | 19 | \n",
" RDS2 | \n",
- " 45.0 | \n",
+ " 0.0 | \n",
" ZEV | \n",
" P | \n",
" 2 | \n",
@@ -3223,15 +3258,15 @@
"
\n",
" | 20 | \n",
" RDS2 | \n",
- " 0.0 | \n",
+ " 90.0 | \n",
" ZEV | \n",
" P | \n",
" 2 | \n",
"
\n",
" \n",
" | 21 | \n",
- " RDS2 | \n",
- " 90.0 | \n",
+ " GCN4 | \n",
+ " 45.0 | \n",
" ZEV | \n",
" P | \n",
" 2 | \n",
@@ -3263,15 +3298,15 @@
"
\n",
" | 25 | \n",
" GCN4 | \n",
- " 0.0 | \n",
+ " 90.0 | \n",
" ZEV | \n",
" P | \n",
" 2 | \n",
"
\n",
" \n",
" | 26 | \n",
- " RDS2 | \n",
- " 15.0 | \n",
+ " GCN4 | \n",
+ " 0.0 | \n",
" ZEV | \n",
" P | \n",
" 2 | \n",
@@ -3282,36 +3317,36 @@
],
"text/plain": [
" regulator_symbol time mechanism restriction n\n",
- "0 SWI1 5.0 ZEV P 3\n",
+ "0 SWI1 15.0 ZEV P 3\n",
"1 SWI1 30.0 ZEV P 3\n",
"2 SWI1 20.0 ZEV P 3\n",
- "3 SWI1 10.0 ZEV P 3\n",
+ "3 SWI1 5.0 ZEV P 3\n",
"4 SWI1 90.0 ZEV P 3\n",
"5 SWI1 0.0 ZEV P 3\n",
- "6 SWI1 15.0 ZEV P 3\n",
- "7 SWI1 45.0 ZEV P 3\n",
- "8 RDS2 10.0 ZEV P 2\n",
- "9 MAC1 90.0 GEV P 2\n",
- "10 MAC1 15.0 GEV P 2\n",
- "11 RDS2 20.0 ZEV P 2\n",
- "12 MAC1 45.0 GEV P 2\n",
- "13 RDS2 30.0 ZEV P 2\n",
- "14 GCN4 15.0 ZEV P 2\n",
- "15 MAC1 30.0 GEV P 2\n",
+ "6 SWI1 45.0 ZEV P 3\n",
+ "7 SWI1 10.0 ZEV P 3\n",
+ "8 MAC1 90.0 GEV P 2\n",
+ "9 RDS2 20.0 ZEV P 2\n",
+ "10 MAC1 45.0 GEV P 2\n",
+ "11 MAC1 15.0 GEV P 2\n",
+ "12 RDS2 30.0 ZEV P 2\n",
+ "13 MAC1 30.0 GEV P 2\n",
+ "14 RDS2 45.0 ZEV P 2\n",
+ "15 RDS2 15.0 ZEV P 2\n",
"16 MAC1 5.0 GEV P 2\n",
- "17 GCN4 45.0 ZEV P 2\n",
- "18 GCN4 90.0 ZEV P 2\n",
- "19 RDS2 45.0 ZEV P 2\n",
- "20 RDS2 0.0 ZEV P 2\n",
- "21 RDS2 90.0 ZEV P 2\n",
+ "17 GCN4 15.0 ZEV P 2\n",
+ "18 RDS2 10.0 ZEV P 2\n",
+ "19 RDS2 0.0 ZEV P 2\n",
+ "20 RDS2 90.0 ZEV P 2\n",
+ "21 GCN4 45.0 ZEV P 2\n",
"22 GCN4 30.0 ZEV P 2\n",
"23 MAC1 0.0 GEV P 2\n",
"24 RDS2 5.0 ZEV P 2\n",
- "25 GCN4 0.0 ZEV P 2\n",
- "26 RDS2 15.0 ZEV P 2"
+ "25 GCN4 90.0 ZEV P 2\n",
+ "26 GCN4 0.0 ZEV P 2"
]
},
- "execution_count": 14,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -3330,7 +3365,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 17,
"id": "4d869036",
"metadata": {},
"outputs": [
@@ -3394,7 +3429,7 @@
"type": "float"
}
],
- "ref": "0f36c45d-0bab-4761-98f1-0e2a625be2df",
+ "ref": "58631e0d-0adf-41e4-9676-3e51aecbc7dd",
"rows": [
[
"0",
@@ -3411,26 +3446,26 @@
],
[
"1",
- "1628",
+ "1620",
"20161117",
"ZEV",
"YPL016W",
"SWI1",
"P",
- "SMY2266b",
+ "SMY2266a",
"20.0",
"glucose",
"30.0"
],
[
"2",
- "1620",
+ "1628",
"20161117",
"ZEV",
"YPL016W",
"SWI1",
"P",
- "SMY2266a",
+ "SMY2266b",
"20.0",
"glucose",
"30.0"
@@ -3488,26 +3523,26 @@
"
\n",
" \n",
" | 1 | \n",
- " 1628 | \n",
+ " 1620 | \n",
" 20161117 | \n",
" ZEV | \n",
" YPL016W | \n",
" SWI1 | \n",
" P | \n",
- " SMY2266b | \n",
+ " SMY2266a | \n",
" 20.0 | \n",
" glucose | \n",
" 30.0 | \n",
"
\n",
" \n",
" | 2 | \n",
- " 1620 | \n",
+ " 1628 | \n",
" 20161117 | \n",
" ZEV | \n",
" YPL016W | \n",
" SWI1 | \n",
" P | \n",
- " SMY2266a | \n",
+ " SMY2266b | \n",
" 20.0 | \n",
" glucose | \n",
" 30.0 | \n",
@@ -3519,16 +3554,16 @@
"text/plain": [
" sample_id date mechanism regulator_locus_tag regulator_symbol \\\n",
"0 1636 20161117 ZEV YPL016W SWI1 \n",
- "1 1628 20161117 ZEV YPL016W SWI1 \n",
- "2 1620 20161117 ZEV YPL016W SWI1 \n",
+ "1 1620 20161117 ZEV YPL016W SWI1 \n",
+ "2 1628 20161117 ZEV YPL016W SWI1 \n",
"\n",
" restriction strain time carbon_source temperature_celsius \n",
"0 P SMY2266c 20.0 glucose 30.0 \n",
- "1 P SMY2266b 20.0 glucose 30.0 \n",
- "2 P SMY2266a 20.0 glucose 30.0 "
+ "1 P SMY2266a 20.0 glucose 30.0 \n",
+ "2 P SMY2266b 20.0 glucose 30.0 "
]
},
- "execution_count": 15,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -3546,7 +3581,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 18,
"id": "89408d2b",
"metadata": {},
"outputs": [
@@ -3554,7 +3589,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "['MAC1', 'SWI1', 'GCN4', 'RDS2']\n"
+ "['SWI1', 'GCN4', 'RDS2', 'MAC1']\n"
]
}
],
@@ -3574,7 +3609,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 19,
"id": "5a3b802b",
"metadata": {},
"outputs": [
@@ -3582,7 +3617,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "['MAC1', 'SWI1', 'GCN4', 'RDS2', 'GEV']\n"
+ "['SWI1', 'GCN4', 'RDS2', 'MAC1', 'GEV']\n"
]
}
],
@@ -3594,7 +3629,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 20,
"id": "abed8bc2",
"metadata": {},
"outputs": [
@@ -3683,7 +3718,7 @@
"type": "string"
}
],
- "ref": "b0a3d538-3af3-4f72-8610-7722a73a7a4f",
+ "ref": "bf27f61f-dbfa-482f-a0a0-235eeabc3fee",
"rows": [
[
"0",
@@ -4926,7 +4961,7 @@
"[29804 rows x 15 columns]"
]
},
- "execution_count": 18,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -4937,7 +4972,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 21,
"id": "cell-25",
"metadata": {},
"outputs": [
@@ -4952,47 +4987,54 @@
"3 448 ACA1 15.0 ZEV \n",
"4 448 ACA1 15.0 ZEV \n",
"\n",
- " binding_id \\\n",
- "0 BrentLab/callingcards;annotated_features;146 \n",
- "1 BrentLab/callingcards;annotated_features;156 \n",
- "2 BrentLab/harbison_2004;harbison_2004;88 \n",
- "3 BrentLab/callingcards;annotated_features;146 \n",
- "4 BrentLab/callingcards;annotated_features;803 \n",
+ " binding_id \\\n",
+ "0 BrentLab/callingcards;annotated_features;803 \n",
+ "1 BrentLab/harbison_2004;harbison_2004;88 \n",
+ "2 BrentLab/mahendrawada_2025;chec_mahendrawada_m... \n",
+ "3 BrentLab/callingcards;annotated_features;126 \n",
+ "4 BrentLab/callingcards;annotated_features;156 \n",
"\n",
" perturbation_id binding_rank_threshold \\\n",
- "0 BrentLab/hackett_2020;hackett_2020;448 452.0 \n",
- "1 BrentLab/hackett_2020;hackett_2020;448 296.0 \n",
- "2 BrentLab/hackett_2020;hackett_2020;448 122.0 \n",
- "3 BrentLab/hackett_2020;hackett_2020;448 35.0 \n",
- "4 BrentLab/hackett_2020;hackett_2020;448 544.0 \n",
+ "0 BrentLab/hackett_2020;hackett_2020;448 110.0 \n",
+ "1 BrentLab/hackett_2020;hackett_2020;448 334.0 \n",
+ "2 BrentLab/hackett_2020;hackett_2020;448 3882.0 \n",
+ "3 BrentLab/hackett_2020;hackett_2020;448 437.0 \n",
+ "4 BrentLab/hackett_2020;hackett_2020;448 374.0 \n",
"\n",
" perturbation_rank_threshold binding_set_size perturbation_set_size \\\n",
- "0 1.0 454.0 5591.0 \n",
- "1 346.0 297.0 346.0 \n",
- "2 218.0 122.0 218.0 \n",
- "3 407.0 35.0 407.0 \n",
- "4 1.0 544.0 5591.0 \n",
+ "0 346.0 113.0 346.0 \n",
+ "1 1.0 334.0 5524.0 \n",
+ "2 1.0 3883.0 5591.0 \n",
+ "3 1.0 442.0 5591.0 \n",
+ "4 1.0 376.0 5591.0 \n",
"\n",
" dto_fdr dto_empirical_pvalue pr_ranking_column \\\n",
- "0 0.000000 1.000 pvalue \n",
- "1 0.277211 0.000 log2fc \n",
- "2 0.612736 0.917 log2fc \n",
- "3 0.116834 0.000 log2fc \n",
+ "0 0.236207 0.001 log2fc \n",
+ "1 0.000000 1.000 pvalue \n",
+ "2 0.000000 1.000 pvalue \n",
+ "3 0.000000 1.000 pvalue \n",
"4 0.000000 1.000 pvalue \n",
"\n",
- " binding_repo_dataset perturbation_repo_dataset binding_id_id \\\n",
- "0 callingcards-annotated_features hackett_2020-hackett_2020 146 \n",
- "1 callingcards-annotated_features hackett_2020-hackett_2020 156 \n",
- "2 harbison_2004-harbison_2004 hackett_2020-hackett_2020 88 \n",
- "3 callingcards-annotated_features hackett_2020-hackett_2020 146 \n",
- "4 callingcards-annotated_features hackett_2020-hackett_2020 803 \n",
+ " binding_repo_dataset \\\n",
+ "0 callingcards-annotated_features \n",
+ "1 harbison_2004-harbison_2004 \n",
+ "2 mahendrawada_2025-chec_mahendrawada_m2025_af_c... \n",
+ "3 callingcards-annotated_features \n",
+ "4 callingcards-annotated_features \n",
"\n",
- " binding_id_source perturbation_id_id \\\n",
- "0 BrentLab/callingcards;annotated_features 448 \n",
- "1 BrentLab/callingcards;annotated_features 448 \n",
- "2 harbison 448 \n",
- "3 BrentLab/callingcards;annotated_features 448 \n",
- "4 BrentLab/callingcards;annotated_features 448 \n",
+ " perturbation_repo_dataset binding_id_id \\\n",
+ "0 hackett_2020-hackett_2020 803 \n",
+ "1 hackett_2020-hackett_2020 88 \n",
+ "2 hackett_2020-hackett_2020 59 \n",
+ "3 hackett_2020-hackett_2020 126 \n",
+ "4 hackett_2020-hackett_2020 156 \n",
+ "\n",
+ " binding_id_source perturbation_id_id \\\n",
+ "0 BrentLab/callingcards;annotated_features 448 \n",
+ "1 harbison 448 \n",
+ "2 BrentLab/mahendrawada_2025;chec_mahendrawada_m... 448 \n",
+ "3 BrentLab/callingcards;annotated_features 448 \n",
+ "4 BrentLab/callingcards;annotated_features 448 \n",
"\n",
" perturbation_id_source \n",
"0 hackett \n",
@@ -5027,7 +5069,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 22,
"id": "cell-26",
"metadata": {},
"outputs": [],
diff --git a/tfbpapi/tests/test_virtual_db.py b/tfbpapi/tests/test_virtual_db.py
index cb64592..fa02695 100644
--- a/tfbpapi/tests/test_virtual_db.py
+++ b/tfbpapi/tests/test_virtual_db.py
@@ -664,6 +664,31 @@ def test_vdb_get_tags_no_views_needed(self, tmp_path):
assert tags == {"assay": "binding"}
assert not vdb._views_registered
+ def test_vdb_get_datasets(self, tmp_path):
+ """VirtualDB.get_datasets() returns sorted db_names without registering
+ views."""
+ vdb = self._make_vdb(
+ """
+ repositories:
+ BrentLab/harbison:
+ dataset:
+ harbison_2004:
+ db_name: harbison
+ sample_id:
+ field: sample_id
+ BrentLab/kemmeren:
+ dataset:
+ kemmeren_2014:
+ db_name: kemmeren
+ sample_id:
+ field: sample_id
+ """,
+ tmp_path,
+ )
+ assert not vdb._views_registered
+ assert vdb.get_datasets() == ["harbison", "kemmeren"]
+ assert not vdb._views_registered
+
# ------------------------------------------------------------------
# Tests: View registration
diff --git a/tfbpapi/virtual_db.py b/tfbpapi/virtual_db.py
index 1ac968c..86c9ea3 100644
--- a/tfbpapi/virtual_db.py
+++ b/tfbpapi/virtual_db.py
@@ -390,6 +390,24 @@ def get_common_fields(self) -> list[str]:
common = set.intersection(*sets)
return sorted(common)
+ def get_datasets(self) -> list[str]:
+ """
+ Return the sorted list of dataset names known to this VirtualDB.
+
+ Dataset names are the resolved ``db_name`` values from the
+ configuration (falling back to the config_name when ``db_name``
+ is not explicitly set). These are the names accepted by
+ :meth:`get_tags` and queryable via :meth:`query`.
+
+ Unlike :meth:`tables`, this method reads directly from the
+ configuration and does not require views to be registered, so
+ no data is downloaded.
+
+ :return: Sorted list of dataset names
+
+ """
+ return sorted(self._db_name_map)
+
def get_tags(self, db_name: str) -> dict[str, str]:
"""
Return the merged tags for a dataset.