diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index 08b1d68..50a828d 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -84,7 +84,24 @@ def get_records(self, *args, **kw): q = q.filter(" and ".join(fs)) q = q.expand("Thing/Locations") - return [di.thing.locations.entities[0] for di in q.list()] + + # NM ENV has multiple datastreams per parameter per location (e.g. id 8 and arsenic) + # because of this duplicative site information is retrieved (we operated under the assumption one datastream per location per parameter) + # so we need to filter out duplicates, otherwise there will be multiple site records and duplicative parameter records + all_sites = [di.thing.locations.entities[0] for di in q.list()] + + # can't do list(set(all_sites)) because the Location entities are not hashable + site_dictionary = {} + for site in all_sites: + site_id = site.id + if site_id not in site_dictionary.keys(): + site_dictionary[site_id] = site + + distinct_sites = list(site_dictionary.values()) + # print( + # f"Found {len(all_sites)} datastreams for {analyte} and {len(distinct_sites)} distinct sites." + # ) + return distinct_sites class DWBAnalyteSource(STAnalyteSource): @@ -120,16 +137,20 @@ def get_records(self, site, *args, **kw): f"Thing/Locations/id eq {site.id} and ObservedProperty/id eq {analyte}" ) - ds = q.list().entities[0] + # NMED DWB has multiple datastreams per parameter per location (e.g. id 8 and arsenic) + # print( + # f"Found {len(q.list().entities)} datastreams for {site.id} and {analyte}." + # ) rs = [] - for obs in ds.get_observations().query().list(): - rs.append( - { - "location": site, - "datastream": ds, - "observation": obs, - } - ) + for datastream in q.list().entities: + for obs in datastream.get_observations().query().list(): + rs.append( + { + "location": site, + "datastream": datastream, + "observation": obs, + } + ) return rs diff --git a/backend/transformer.py b/backend/transformer.py index cb3afe5..85b6a9f 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -530,11 +530,7 @@ def do_transform( if warning_msg != "": msg = f"{warning_msg} for {klassed_record.id}" self.warn(msg) - except TypeError: - msg = f"Keeping {source_result} for {klassed_record.id} on {klassed_record.date_measured} for time series data" - self.warn(msg) - converted_result = source_result - except ValueError: + except (TypeError, ValueError): msg = f"Keeping {source_result} for {klassed_record.id} on {klassed_record.date_measured} for time series data" self.warn(msg) converted_result = source_result diff --git a/setup.py b/setup.py index 6dad4c9..2675e8e 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( name="nmuwd", - version="0.9.4", + version="0.9.5", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description,