changes to handle diverse data groups

sedv8808 · sedv8808 · commit 00932fd2d6f0 · 2025-11-27T12:43:00.000-08:00
diff --git a/src/DataBUS/neotomaHelpers/pull_params.py b/src/DataBUS/neotomaHelpers/pull_params.py
@@ -43,7 +43,7 @@ def pull_params(params, yml_dict, csv_template, table=None, name = None, values
                         clean_valor = clean_column(val.get("column"), 
                                                csv_template, 
                                                clean=not val.get("rowwise"))
-                    except KeyError:
+                    except KeyError as e:
                         continue
                     if clean_valor:
                         match val.get("type"):
@@ -83,7 +83,8 @@ def pull_params(params, yml_dict, csv_template, table=None, name = None, values
                                 add_unit_inputs[key] = {}
                             if clean_valor is None:
                                 continue
-                            elif not all(x is None for x in clean_valor): 
+                            elif ((isinstance(clean_valor, list) and not all(x is None for x in clean_valor))
+                                   or (not isinstance(clean_valor, list) and clean_valor is not None)):
                                 if 'chronologyname' in val:
                                     if not add_unit_inputs[key].get(val['chronologyname']):
                                         add_unit_inputs[key][val.get('chronologyname', f'Chron_{chron_counter}')] = {}
@@ -106,6 +107,8 @@ def pull_params(params, yml_dict, csv_template, table=None, name = None, values
                                     add_unit_inputs[val['taxonname']][f"uncertaintybasis"] = val['uncertaintybasis']
                         else:
                             add_unit_inputs[i] = clean_valor
+                    else:
+                        add_unit_inputs[i] = None
             else:
                 add_unit_inputs[i] = None
         if 'notes' in add_unit_inputs.keys():
@@ -115,13 +118,19 @@ def pull_params(params, yml_dict, csv_template, table=None, name = None, values
             if any(k in add_unit_inputs.keys() for k in ('chronologies', 'sampleages')):
                 key = ('chronologies' if 'chronologies' in add_unit_inputs.keys()
                         else 'sampleages' if 'sampleages' in  add_unit_inputs.keys() else None)
-                add_unit_inputs[key] = {name: chron
-                                            for name, chron in add_unit_inputs[key].items()
-                                            if not all(all(v is None 
-                                                            for v in chron[key]) 
-                                                            for key in ('age', 'ageyounger', 'ageolder', 
-                                                                        'age', 'ageboundolder', 
-                                                                        'ageboundyounger'))}
+                if isinstance(add_unit_inputs[key].values(), list):
+                    add_unit_inputs[key] = {name: chron
+                                        for name, chron in add_unit_inputs[key].items()
+                                        if not all(all(v is None 
+                                                        for v in chron[k]) 
+                                                        for k in ('ageyounger', 'ageolder', 
+                                                                    'age', 'ageboundolder', 
+                                                                    'ageboundyounger'))}
+                else:
+                    add_unit_inputs[key] = {name: chron
+                                        for name, chron in add_unit_inputs[key].items()
+                                        if not all(v is None 
+                                                    for v in chron.values())}
             return add_unit_inputs
     elif isinstance(table, list):
         for item in table:
diff --git a/src/DataBUS/neotomaUploader/insert_publication.py b/src/DataBUS/neotomaUploader/insert_publication.py
@@ -34,8 +34,7 @@ def list_flattener(original_list, delim =', '):
     inputs = nh.pull_params(params, yml_dict, csv_file, "ndb.publications")
     inputs['doi'] = list_flattener(inputs['doi'])
     inputs['publicationid'] = list_flattener(inputs['publicationid'])
-    inputs['citation'] = list_flattener(inputs['citation'],  delim=' | ')
-
+    inputs['citation'] = list_flattener(inputs.get('citation', None),  delim=' | ')
     if inputs["publicationid"]:
         inputs["publicationid"] = [value if value != "NA" else None for value in inputs["publicationid"]]
         inputs["publicationid"] = inputs["publicationid"][0]
@@ -51,14 +50,14 @@ def list_flattener(original_list, delim =', '):
     doi_q = """SELECT *, similarity(LOWER(doi), %(doi)s) as SIM
                FROM ndb.publications
                WHERE doi IS NOT NULL
-                AND similarity(LOWER(doi), %(doi)s) > .65
+                AND similarity(LOWER(doi), %(doi)s) > .60
                ORDER BY similarity(LOWER(doi), %(doi)s) DESC
                LIMIT 1; """
     
     dataset_pub_q = """SELECT ts.insertdatasetpublication(%(datasetid)s, 
                                                           %(publicationid)s, 
                                                           %(primarypub)s)"""
-    if inputs.get('publicationid', None) is None:
+    if not inputs.get('publicationid', None):
         response.message.append(f"? No ID present")
         response.valid.append(True)
         if inputs.get('citation', None):
@@ -70,9 +69,13 @@ def list_flattener(original_list, delim =', '):
                     response.message.append(f"✔  Found Publication: "
                                             f"{obs[1]} in Neotoma")
                     response.valid.append(True)
-                    cur.execute(dataset_pub_q, {'datasetid': uploader["datasets"].datasetid,
-                                                'publicationid': pub_id[0],
-                                                'primarypub': True})
+                    try:
+                        cur.execute(dataset_pub_q, {'datasetid': uploader["datasets"].datasetid,
+                                                    'publicationid': pub_id[0],
+                                                    'primarypub': True})
+                    except Exception as e:
+                        response.message.append("✗  Could not associate dataset ID to publication ID")
+                        response.valid.append(False)
                 else:
                     if inputs.get('doi', None):
                         cur.execute(doi_q, {'doi': inputs['doi'][i].lower()})
diff --git a/src/DataBUS/neotomaUploader/insert_uth_series.py b/src/DataBUS/neotomaUploader/insert_uth_series.py
@@ -21,12 +21,13 @@ def insert_uth_series(cur, yml_dict, csv_file, uploader):
         return response
     
     else:
-        params = ['geochronid', 'decayconstantid',
+        params = ['decayconstantid',
                 'ratio230th232th', 'ratiouncertainty230th232th',
                 'activity230th238u', 'activityuncertainty230th238u', 
                 'activity234u238u', 'activityuncertainty234u238u',  
                 'iniratio230th232th', 'iniratiouncertainty230th232th']
         inputs = nh.pull_params(params, yml_dict, csv_file, "ndb.uraniumseries")
+        inputs['geochronid'] = uploader['geochron'].id
     if isinstance(inputs.get('decayconstantid'), list):
         elements = [x for x in params if x not in {'geochronid'}]
     else:
@@ -51,6 +52,7 @@ def insert_uth_series(cur, yml_dict, csv_file, uploader):
                     response.valid.append(True)
                     response.message.append("✔ Decay constant found in database")
                 else:
+                    new_dc.append(None)
                     response.valid.append(False)
                     response.message.append(f"✗ Decay constant {dc} not found in database")
             inputs['decayconstantid'] = new_dc
@@ -64,8 +66,10 @@ def insert_uth_series(cur, yml_dict, csv_file, uploader):
                 response.valid.append(True)
                 response.message.append("✔ Decay constant found in database")
             else:
+                inputs['decayconstantid'] = None
                 response.valid.append(False)
                 response.message.append(f"✗ Decay constant {inputs['decayconstantid']} not found in database")
+        
         if not indices:
             response.message.append("✔ No UTh Series data to insert")
             response.valid.append(True)
@@ -112,7 +116,7 @@ def insert_uth_series(cur, yml_dict, csv_file, uploader):
         uthdata = {k: [v for v in vals if v is not None] for k, vals in uthdata.items()}
         if uthdata:
             for k, v in uthdata.items():
-                if isinstance(v, list) and len(v):
+                if isinstance(v, list) and len(v): 
                     for i in range(len(v)):
                         try:
                             insert_uraniumseriesdata(cur, v[i], inputs['geochronid'][i])
diff --git a/src/DataBUS/neotomaValidator/valid_chronologies.py b/src/DataBUS/neotomaValidator/valid_chronologies.py
@@ -15,6 +15,17 @@ def valid_chronologies(cur, yml_dict, csv_file):
         ValueError: If there is an issue with the extracted parameters.
         AssertionError: If the date format in the CSV file is incorrect.
     """
+    def collapse_into_chronology(data, chron_k='chronologies'):
+        chron = data.get(chron_k, {})
+        if len(chron) == 1:
+            key = next(iter(chron))
+            inner = chron[key]
+            for k, v in data.items():
+                if k != chron_k:
+                    inner[k] = v
+            return {chron_k: {key: inner}}
+        return data
+
     response = ChronResponse()
 
     params = ['ageboundolder', 'ageboundyounger', 'agemodel', #'chronologyname', 'isdefault',  <- don't need anymore because we use a dictionary that retrieves this from yml
@@ -38,28 +49,31 @@ def valid_chronologies(cur, yml_dict, csv_file):
                         new_date = None
                 else:
                     new_date = None
-            if 'age' in params:
-                params.remove('age')
-                inputs = nh.pull_params(params, yml_dict, csv_file, "ndb.chronologies")
-                inputs['age'] = new_date
-                response.valid.append(True)
+                if 'age' in params:
+                    params.remove('age')
+                    inputs = nh.pull_params(params, yml_dict, csv_file, "ndb.chronologies")
+                    inputs['age'] = new_date
+                    response.valid.append(True)
+            else:
+                inputs = {}
         except Exception as inner_e:
+            inputs = {}
             response.validAll = False
             response.message.append(f"Chronology parameters cannot be properly extracted. {e}\n"
                                     f"{str(inner_e)}")
             return response
-    
+        
+    inputs = collapse_into_chronology(inputs)
     if len(inputs['chronologies']) >1:
         response.message.append("✔ File with multiple chronologies")
         response.message.append(f"{list(inputs['chronologies'].keys())}")
-
     for chron in inputs['chronologies']:
         ch = inputs['chronologies'][chron]
-        if ch.get("agetype", inputs['agetype']) is not None: 
-            ch.get("agetype", inputs['agetype']).replace("cal yr BP", 'Calendar years BP')
+        if ch.get("agetype", inputs.get('agetype')) is not None: 
+            ch.get("agetype", inputs.get('agetype')).replace("cal yr BP", 'Calendar years BP')
             agetype_query = """SELECT agetypeid FROM ndb.agetypes
                                 WHERE LOWER(agetype) = %(agetype)s"""
-            cur.execute(agetype_query, {'agetype': ch.get("agetype", inputs['agetype']).lower()})
+            cur.execute(agetype_query, {'agetype': ch.get("agetype", inputs.get('agetype')).lower()})
             id = cur.fetchone()
             if id:
                 ch['agetypeid'] = id[0]
@@ -94,15 +108,23 @@ def valid_chronologies(cur, yml_dict, csv_file):
                 elif isinstance(ch.get('age', inputs.get('age')), list):
                     ch['age'] = [1950 - value.year if isinstance(value, datetime) else 1950 - value
                                     for value in ch.get('age', inputs.get('age'))]
-            c["ageboundolder"]= int(max([num for num in ch.get('ageboundolder', ch.get('age')) if num is not None]))
-            c["ageboundyounger"]= int(min([num for num in ch.get('ageboundyounger', ch.get('age')) if num is not None]))
+            for param in ['ageboundolder', 'ageboundyounger']:
+                if param in ch:
+                    if isinstance(ch[param], list):
+                        c[param]= int(min([num for num in ch.get(param, ch.get('age')) if num is not None])) if param == 'ageboundyounger' else int(max([num for num in ch.get(param, ch.get('age')) if num is not None]))
+                    else:
+                        c[param]= ch[param]
+                else:
+                    if isinstance(ch['age'], list):
+                        c[param]= int(min([num for num in ch.get('age') if num is not None])) if param == 'ageboundyounger' else int(max([num for num in ch.get('age') if num is not None]))
+                    else:
+                        c[param]= None
             Chronology(**c)
             response.valid.append(True)
             response.message.append("✔  Chronology can be created")
         except Exception as e:
             response.valid.append(False)
             response.message.append(f"✗  Chronology cannot be created: {e}")
-    
     response.validAll = all(response.valid)
     response.message = list(set(response.message))
     return response
diff --git a/src/DataBUS/neotomaValidator/valid_publication.py b/src/DataBUS/neotomaValidator/valid_publication.py
@@ -63,6 +63,7 @@ def list_flattener(original_list, delim =', '):
                 obs = cur.fetchone()
                 pub_id = obs if obs is not None else None
                 if pub_id:
+                    print(obs[1])
                     response.message.append(f"✔  Found Publication: "
                                             f"{obs[1]} in Neotoma")
                     response.valid.append(True)
diff --git a/src/DataBUS/neotomaValidator/valid_sample_age.py b/src/DataBUS/neotomaValidator/valid_sample_age.py
diff --git a/src/DataBUS/neotomaValidator/valid_uth_series.py b/src/DataBUS/neotomaValidator/valid_uth_series.py