Skip to content

Commit 00932fd

Browse files
committed
changes to handle diverse data groups
1 parent 2e33ddf commit 00932fd

7 files changed

Lines changed: 153 additions & 89 deletions

File tree

src/DataBUS/neotomaHelpers/pull_params.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def pull_params(params, yml_dict, csv_template, table=None, name = None, values
4343
clean_valor = clean_column(val.get("column"),
4444
csv_template,
4545
clean=not val.get("rowwise"))
46-
except KeyError:
46+
except KeyError as e:
4747
continue
4848
if clean_valor:
4949
match val.get("type"):
@@ -83,7 +83,8 @@ def pull_params(params, yml_dict, csv_template, table=None, name = None, values
8383
add_unit_inputs[key] = {}
8484
if clean_valor is None:
8585
continue
86-
elif not all(x is None for x in clean_valor):
86+
elif ((isinstance(clean_valor, list) and not all(x is None for x in clean_valor))
87+
or (not isinstance(clean_valor, list) and clean_valor is not None)):
8788
if 'chronologyname' in val:
8889
if not add_unit_inputs[key].get(val['chronologyname']):
8990
add_unit_inputs[key][val.get('chronologyname', f'Chron_{chron_counter}')] = {}
@@ -106,6 +107,8 @@ def pull_params(params, yml_dict, csv_template, table=None, name = None, values
106107
add_unit_inputs[val['taxonname']][f"uncertaintybasis"] = val['uncertaintybasis']
107108
else:
108109
add_unit_inputs[i] = clean_valor
110+
else:
111+
add_unit_inputs[i] = None
109112
else:
110113
add_unit_inputs[i] = None
111114
if 'notes' in add_unit_inputs.keys():
@@ -115,13 +118,19 @@ def pull_params(params, yml_dict, csv_template, table=None, name = None, values
115118
if any(k in add_unit_inputs.keys() for k in ('chronologies', 'sampleages')):
116119
key = ('chronologies' if 'chronologies' in add_unit_inputs.keys()
117120
else 'sampleages' if 'sampleages' in add_unit_inputs.keys() else None)
118-
add_unit_inputs[key] = {name: chron
119-
for name, chron in add_unit_inputs[key].items()
120-
if not all(all(v is None
121-
for v in chron[key])
122-
for key in ('age', 'ageyounger', 'ageolder',
123-
'age', 'ageboundolder',
124-
'ageboundyounger'))}
121+
if isinstance(add_unit_inputs[key].values(), list):
122+
add_unit_inputs[key] = {name: chron
123+
for name, chron in add_unit_inputs[key].items()
124+
if not all(all(v is None
125+
for v in chron[k])
126+
for k in ('ageyounger', 'ageolder',
127+
'age', 'ageboundolder',
128+
'ageboundyounger'))}
129+
else:
130+
add_unit_inputs[key] = {name: chron
131+
for name, chron in add_unit_inputs[key].items()
132+
if not all(v is None
133+
for v in chron.values())}
125134
return add_unit_inputs
126135
elif isinstance(table, list):
127136
for item in table:

src/DataBUS/neotomaUploader/insert_publication.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ def list_flattener(original_list, delim =', '):
3434
inputs = nh.pull_params(params, yml_dict, csv_file, "ndb.publications")
3535
inputs['doi'] = list_flattener(inputs['doi'])
3636
inputs['publicationid'] = list_flattener(inputs['publicationid'])
37-
inputs['citation'] = list_flattener(inputs['citation'], delim=' | ')
38-
37+
inputs['citation'] = list_flattener(inputs.get('citation', None), delim=' | ')
3938
if inputs["publicationid"]:
4039
inputs["publicationid"] = [value if value != "NA" else None for value in inputs["publicationid"]]
4140
inputs["publicationid"] = inputs["publicationid"][0]
@@ -51,14 +50,14 @@ def list_flattener(original_list, delim =', '):
5150
doi_q = """SELECT *, similarity(LOWER(doi), %(doi)s) as SIM
5251
FROM ndb.publications
5352
WHERE doi IS NOT NULL
54-
AND similarity(LOWER(doi), %(doi)s) > .65
53+
AND similarity(LOWER(doi), %(doi)s) > .60
5554
ORDER BY similarity(LOWER(doi), %(doi)s) DESC
5655
LIMIT 1; """
5756

5857
dataset_pub_q = """SELECT ts.insertdatasetpublication(%(datasetid)s,
5958
%(publicationid)s,
6059
%(primarypub)s)"""
61-
if inputs.get('publicationid', None) is None:
60+
if not inputs.get('publicationid', None):
6261
response.message.append(f"? No ID present")
6362
response.valid.append(True)
6463
if inputs.get('citation', None):
@@ -70,9 +69,13 @@ def list_flattener(original_list, delim =', '):
7069
response.message.append(f"✔ Found Publication: "
7170
f"{obs[1]} in Neotoma")
7271
response.valid.append(True)
73-
cur.execute(dataset_pub_q, {'datasetid': uploader["datasets"].datasetid,
74-
'publicationid': pub_id[0],
75-
'primarypub': True})
72+
try:
73+
cur.execute(dataset_pub_q, {'datasetid': uploader["datasets"].datasetid,
74+
'publicationid': pub_id[0],
75+
'primarypub': True})
76+
except Exception as e:
77+
response.message.append("✗ Could not associate dataset ID to publication ID")
78+
response.valid.append(False)
7679
else:
7780
if inputs.get('doi', None):
7881
cur.execute(doi_q, {'doi': inputs['doi'][i].lower()})

src/DataBUS/neotomaUploader/insert_uth_series.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,13 @@ def insert_uth_series(cur, yml_dict, csv_file, uploader):
2121
return response
2222

2323
else:
24-
params = ['geochronid', 'decayconstantid',
24+
params = ['decayconstantid',
2525
'ratio230th232th', 'ratiouncertainty230th232th',
2626
'activity230th238u', 'activityuncertainty230th238u',
2727
'activity234u238u', 'activityuncertainty234u238u',
2828
'iniratio230th232th', 'iniratiouncertainty230th232th']
2929
inputs = nh.pull_params(params, yml_dict, csv_file, "ndb.uraniumseries")
30+
inputs['geochronid'] = uploader['geochron'].id
3031
if isinstance(inputs.get('decayconstantid'), list):
3132
elements = [x for x in params if x not in {'geochronid'}]
3233
else:
@@ -51,6 +52,7 @@ def insert_uth_series(cur, yml_dict, csv_file, uploader):
5152
response.valid.append(True)
5253
response.message.append("✔ Decay constant found in database")
5354
else:
55+
new_dc.append(None)
5456
response.valid.append(False)
5557
response.message.append(f"✗ Decay constant {dc} not found in database")
5658
inputs['decayconstantid'] = new_dc
@@ -64,8 +66,10 @@ def insert_uth_series(cur, yml_dict, csv_file, uploader):
6466
response.valid.append(True)
6567
response.message.append("✔ Decay constant found in database")
6668
else:
69+
inputs['decayconstantid'] = None
6770
response.valid.append(False)
6871
response.message.append(f"✗ Decay constant {inputs['decayconstantid']} not found in database")
72+
6973
if not indices:
7074
response.message.append("✔ No UTh Series data to insert")
7175
response.valid.append(True)
@@ -112,7 +116,7 @@ def insert_uth_series(cur, yml_dict, csv_file, uploader):
112116
uthdata = {k: [v for v in vals if v is not None] for k, vals in uthdata.items()}
113117
if uthdata:
114118
for k, v in uthdata.items():
115-
if isinstance(v, list) and len(v):
119+
if isinstance(v, list) and len(v):
116120
for i in range(len(v)):
117121
try:
118122
insert_uraniumseriesdata(cur, v[i], inputs['geochronid'][i])

src/DataBUS/neotomaValidator/valid_chronologies.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,17 @@ def valid_chronologies(cur, yml_dict, csv_file):
1515
ValueError: If there is an issue with the extracted parameters.
1616
AssertionError: If the date format in the CSV file is incorrect.
1717
"""
18+
def collapse_into_chronology(data, chron_k='chronologies'):
19+
chron = data.get(chron_k, {})
20+
if len(chron) == 1:
21+
key = next(iter(chron))
22+
inner = chron[key]
23+
for k, v in data.items():
24+
if k != chron_k:
25+
inner[k] = v
26+
return {chron_k: {key: inner}}
27+
return data
28+
1829
response = ChronResponse()
1930

2031
params = ['ageboundolder', 'ageboundyounger', 'agemodel', #'chronologyname', 'isdefault', <- don't need anymore because we use a dictionary that retrieves this from yml
@@ -38,28 +49,31 @@ def valid_chronologies(cur, yml_dict, csv_file):
3849
new_date = None
3950
else:
4051
new_date = None
41-
if 'age' in params:
42-
params.remove('age')
43-
inputs = nh.pull_params(params, yml_dict, csv_file, "ndb.chronologies")
44-
inputs['age'] = new_date
45-
response.valid.append(True)
52+
if 'age' in params:
53+
params.remove('age')
54+
inputs = nh.pull_params(params, yml_dict, csv_file, "ndb.chronologies")
55+
inputs['age'] = new_date
56+
response.valid.append(True)
57+
else:
58+
inputs = {}
4659
except Exception as inner_e:
60+
inputs = {}
4761
response.validAll = False
4862
response.message.append(f"Chronology parameters cannot be properly extracted. {e}\n"
4963
f"{str(inner_e)}")
5064
return response
51-
65+
66+
inputs = collapse_into_chronology(inputs)
5267
if len(inputs['chronologies']) >1:
5368
response.message.append("✔ File with multiple chronologies")
5469
response.message.append(f"{list(inputs['chronologies'].keys())}")
55-
5670
for chron in inputs['chronologies']:
5771
ch = inputs['chronologies'][chron]
58-
if ch.get("agetype", inputs['agetype']) is not None:
59-
ch.get("agetype", inputs['agetype']).replace("cal yr BP", 'Calendar years BP')
72+
if ch.get("agetype", inputs.get('agetype')) is not None:
73+
ch.get("agetype", inputs.get('agetype')).replace("cal yr BP", 'Calendar years BP')
6074
agetype_query = """SELECT agetypeid FROM ndb.agetypes
6175
WHERE LOWER(agetype) = %(agetype)s"""
62-
cur.execute(agetype_query, {'agetype': ch.get("agetype", inputs['agetype']).lower()})
76+
cur.execute(agetype_query, {'agetype': ch.get("agetype", inputs.get('agetype')).lower()})
6377
id = cur.fetchone()
6478
if id:
6579
ch['agetypeid'] = id[0]
@@ -94,15 +108,23 @@ def valid_chronologies(cur, yml_dict, csv_file):
94108
elif isinstance(ch.get('age', inputs.get('age')), list):
95109
ch['age'] = [1950 - value.year if isinstance(value, datetime) else 1950 - value
96110
for value in ch.get('age', inputs.get('age'))]
97-
c["ageboundolder"]= int(max([num for num in ch.get('ageboundolder', ch.get('age')) if num is not None]))
98-
c["ageboundyounger"]= int(min([num for num in ch.get('ageboundyounger', ch.get('age')) if num is not None]))
111+
for param in ['ageboundolder', 'ageboundyounger']:
112+
if param in ch:
113+
if isinstance(ch[param], list):
114+
c[param]= int(min([num for num in ch.get(param, ch.get('age')) if num is not None])) if param == 'ageboundyounger' else int(max([num for num in ch.get(param, ch.get('age')) if num is not None]))
115+
else:
116+
c[param]= ch[param]
117+
else:
118+
if isinstance(ch['age'], list):
119+
c[param]= int(min([num for num in ch.get('age') if num is not None])) if param == 'ageboundyounger' else int(max([num for num in ch.get('age') if num is not None]))
120+
else:
121+
c[param]= None
99122
Chronology(**c)
100123
response.valid.append(True)
101124
response.message.append("✔ Chronology can be created")
102125
except Exception as e:
103126
response.valid.append(False)
104127
response.message.append(f"✗ Chronology cannot be created: {e}")
105-
106128
response.validAll = all(response.valid)
107129
response.message = list(set(response.message))
108130
return response

src/DataBUS/neotomaValidator/valid_publication.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def list_flattener(original_list, delim =', '):
6363
obs = cur.fetchone()
6464
pub_id = obs if obs is not None else None
6565
if pub_id:
66+
print(obs[1])
6667
response.message.append(f"✔ Found Publication: "
6768
f"{obs[1]} in Neotoma")
6869
response.valid.append(True)

0 commit comments

Comments
 (0)