-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmake_sample_list.py
More file actions
39 lines (34 loc) · 1 KB
/
make_sample_list.py
File metadata and controls
39 lines (34 loc) · 1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
# get sample IDs in new CIDR CEPH cohort
sample_info = pd.read_csv(
"Quinlan_Released_Data/Sample_Info/QuinlanNeklason_Master_Sample_Key.csv",
dtype={
"Subject_ID": "string",
},
)
with open("sample_lists/CIDR_2025.samples.txt", "w") as outfh:
for si, s in enumerate(sample_info["Subject_ID"].unique()):
# if si > 5: break
print (s.strip(), file=outfh)
outfh.close()
orig_ped = "ped/16-08-06_WashU-Yandell-CEPH.ped"
# read in the Ped file from the original CEPH 2019 study
orig_ped = pd.read_csv(
orig_ped,
sep="\t",
names=[
"FAMILY_ID",
"SAMPLE_ID",
"FATHER_ID",
"MOTHER_ID",
"SEX",
"PHENOTYPE",
"MISC",
],
dtype={"FAMILY_ID": "string", "SAMPLE_ID": "string"},
)
with open("sample_lists/ELIFE_2019.samples.txt", "w") as outfh:
for si, s in enumerate(orig_ped["SAMPLE_ID"].unique()):
# if si not in (6, 9): continue
print (s.strip(), file=outfh)
outfh.close()