diff --git a/README.md b/README.md
index bb05d17..e8db69e 100644
--- a/README.md
+++ b/README.md
@@ -38,8 +38,9 @@ Data comes from the following sources. We are continuously adding new sources as
   - Available data: `water levels`
 - [Pecos Valley Artesian Conservancy District (PVACD)](https://st2.newmexicowaterdata.org/FROST-Server/v1.1/Locations?$filter=properties/agency%20eq%20%27PVACD%27)
   - Available data: `water levels`
-- [USGS (NWIS)](https://waterdata.usgs.gov/nwis)
+- [USGS (NWIS)](https://api.waterdata.usgs.gov/docs/)
   - Available data: `water levels`
+  - **IMPORTANT:** The USGS now uses API keys. To prevent yourself from hitting the rate limit please [acquire an API key](https://api.waterdata.usgs.gov/signup/), save it, and provide it via the `--usgs-api-key` flag when gathering water level data from the USGS.
 - [Water Quality Portal (WQP)](https://www.waterqualitydata.us/)
   - Available data: `water levels`, `water quality`
 
@@ -189,6 +190,14 @@ The Data Integration Engine enables the user to obtain groundwater level and gro
 - `--no-pvacd` to exclude Pecos Valley Artesian Convservancy District (PVACD) data
 - `--no-wqp` to exclude Water Quality Portal (WQP) data
 
+### USGS API Keys
+
+The USGS now uses [API keys](https://api.waterdata.usgs.gov/signup/) to increase the query rate limit to their APIs. If you intend to include USGS water level data in your output please acquire an API key, save it somewhere, and provide it via the `--usgs-api-key` flag. For example:
+
+```
+die weave waterlevels --output-type timeseries_unified --usgs-api-key FAKE_API_KEY
+```
+
 ### Geographic Filters [In Development]
 
 The following flags can be used to geographically filter data:
diff --git a/backend/config.py b/backend/config.py
index 2b4af34..d45e95d 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -281,8 +281,19 @@ def get_config_and_false_agencies(self):
                 "nwis",
                 "pvacd",
             ]
+        elif self.parameter in [BICARBONATE]:
+            config_agencies = ["nmbgmr_amp", "nmed_dwb", "nmose_isc_seven_rivers", "wqp"]
+            false_agencies = [
+                "bor",
+                "bernco",
+                "cabq",
+                "ebid",
+                "nmose_roswell",
+                "nmose_pod",
+                "nwis",
+                "pvacd",
+            ]
         elif self.parameter in [
-            BICARBONATE,
             CALCIUM,
             CHLORIDE,
             FLUORIDE,
diff --git a/backend/connectors/bor/source.py b/backend/connectors/bor/source.py
index 5ad03e1..4a28ee2 100644
--- a/backend/connectors/bor/source.py
+++ b/backend/connectors/bor/source.py
@@ -57,7 +57,7 @@ def get_records(self):
         # locationTypeId 10 is for wells
         url = "https://data.usbr.gov/rise/api/location"
         params = {"stateId": "NM", "locationTypeId": 10}
-        return self._execute_json_request(url, params)
+        return self._execute_json_request(url, params, tag="data")
 
 
 def parse_dt(dt):
@@ -119,13 +119,14 @@ def get_records(self, site_record):
         code = get_analyte_search_param(self.config.parameter, BOR_ANALYTE_MAPPING)
 
         catalog_record_data = self._execute_json_request(
-            f"https://data.usbr.gov{site_record.catalogRecords[0]['id']}"
+            f"https://data.usbr.gov{site_record.catalogRecords[0]['id']}",
+            tag="data"
         )
         catalog_items = catalog_record_data["relationships"]["catalogItems"]["data"]
 
         for i, item in enumerate(self._reorder_catalog_items(catalog_items)):
 
-            data = self._execute_json_request(f'https://data.usbr.gov{item["id"]}')
+            data = self._execute_json_request(f'https://data.usbr.gov{item["id"]}', tag="data")
             if not data:
                 continue
 
@@ -142,6 +143,7 @@ def get_records(self, site_record):
                 return self._execute_json_request(
                     "https://data.usbr.gov/rise/api/result",
                     params={"itemId": data["attributes"]["_id"]},
+                    tag="data"
                 )
 
 
diff --git a/backend/connectors/isc_seven_rivers/source.py b/backend/connectors/isc_seven_rivers/source.py
index 5679fad..691bbac 100644
--- a/backend/connectors/isc_seven_rivers/source.py
+++ b/backend/connectors/isc_seven_rivers/source.py
@@ -85,6 +85,7 @@ def health(self):
     def get_records(self):
         return self._execute_json_request(
             _make_url("getMonitoringPoints.ashx"),
+            tag="data",
         )
 
 
@@ -100,7 +101,7 @@ def _get_analyte_id_and_name(self, analyte):
         """ """
         if self._analyte_ids is None:
 
-            resp = self._execute_json_request(_make_url("getAnalytes.ashx"))
+            resp = self._execute_json_request(_make_url("getAnalytes.ashx"), tag="data")
             if resp:
                 self._analyte_ids = {r["name"]: r["id"] for r in resp}
 
@@ -164,7 +165,7 @@ def get_records(self, site_record):
                 self._source_parameter_name = analyte_id_and_name["name"]
 
             return self._execute_json_request(
-                _make_url("getReadings.ashx"), params=params
+                _make_url("getReadings.ashx"), params=params, tag="data"
             )
 
 
@@ -184,6 +185,7 @@ def get_records(self, site_record):
         return self._execute_json_request(
             _make_url("getWaterLevels.ashx"),
             params=params,
+            tag="data",
         )
 
     def _clean_records(self, records):
diff --git a/backend/connectors/usgs/source.py b/backend/connectors/usgs/source.py
index cac4f2a..3da177f 100644
--- a/backend/connectors/usgs/source.py
+++ b/backend/connectors/usgs/source.py
@@ -13,90 +13,42 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
-from datetime import datetime
 import httpx
+import os
+import time
+import json
 
 from backend.connectors import NM_STATE_BOUNDING_POLYGON
 from backend.constants import (
-    FEET,
     DTW,
-    DTW_UNITS,
     DT_MEASURED,
     PARAMETER_NAME,
     PARAMETER_VALUE,
     PARAMETER_UNITS,
     SOURCE_PARAMETER_NAME,
     SOURCE_PARAMETER_UNITS,
-    EARLIEST,
-    LATEST,
 )
 from backend.connectors.usgs.transformer import (
     NWISSiteTransformer,
     NWISWaterLevelTransformer,
 )
+from backend.exceptions import USGSRateLimitError, PartialOrNoDataError
 from backend.source import (
-    BaseSource,
     BaseWaterLevelSource,
     BaseSiteSource,
     make_site_list,
     get_terminal_record,
 )
 
-
-def parse_rdb(text):
-    """'
-    Parses rdb tab-delimited responses for NWIS Site Services
-    """
-
-    def line_generator():
-        header = None
-        for line in text.split("\n"):
-            if line.startswith("#"):
-                continue
-            elif line.startswith("agency_cd"):
-                header = [h.strip() for h in line.split("\t")]
-                continue
-            elif line.startswith("5s"):
-                continue
-            elif line == "":
-                continue
-
-            vals = [v.strip() for v in line.split("\t")]
-            if header and any(vals):
-                yield dict(zip(header, vals))
-
-    return list(line_generator())
-
-
-def parse_json(data):
-    """
-    Parses JSON responses for NWIS Groundwater Level Services
-    """
-    records = []
-
-    for location in data["timeSeries"]:
-        site_code = location["sourceInfo"]["siteCode"][0]["value"]
-        agency = location["sourceInfo"]["siteCode"][0]["agencyCode"]
-        source_parameter_name = location["variable"]["variableName"]
-        source_parameter_units = location["variable"]["unit"]["unitCode"]
-        for value in location["values"][0]["value"]:
-            record = {
-                "site_id": f"{agency}-{site_code}",
-                "source_parameter_name": source_parameter_name,
-                "value": value["value"],
-                "datetime_measured": value["dateTime"],
-                # "date_measured": value["dateTime"].split("T")[0],
-                # "time_measured": value["dateTime"].split("T")[1],
-                "source_parameter_units": source_parameter_units,
-            }
-            records.append(record)
-    return records
-
+LIMIT = 50000    
+TIMEOUT=15*60  # 15 minutes, to allow for retries and large requests
+MAX_RETRIES = 7
 
 class NWISSiteSource(BaseSiteSource):
     transformer_klass = NWISSiteTransformer
     chunk_size = 500
     bounding_polygon = NM_STATE_BOUNDING_POLYGON
+    sites_url: str = "https://api.waterdata.usgs.gov/ogcapi/v0/collections/combined-metadata/items"
 
     def __repr__(self):
         return "NWISSiteSource"
@@ -107,80 +59,255 @@ def tag(self):
 
     def health(self):
         try:
-            self._execute_text_request(
-                "https://waterservices.usgs.gov/nwis/site/",
-                {
-                    "format": "rdb",
-                    "siteOutput": "expanded",
-                    "siteType": "GW",
-                    "site": "325754103461301",
-                },
+            if os.environ.get("USGS_API_KEY"):
+                headers = {"X-API-Key": os.environ["USGS_API_KEY"]}
+            else:
+                headers = {}
+            response = httpx.get(
+                url=self.sites_url,
+                params={"limit": 1, "parameter_code": "72019", "site_type_code": "GW", "state_code": "35"},
+                timeout=TIMEOUT,
+                headers=headers
             )
+            response.raise_for_status()
             return True
         except httpx.HTTPStatusError:
-            pass
+            return False
 
     def get_records(self):
-        params = {"format": "rdb", "siteOutput": "expanded", "siteType": "GW"}
-        config = self.config
+        params: dict = {
+            "limit": LIMIT,
+            "parameter_code": "72019",
+            "site_type_code": "GW",
+        }
 
-        if config.has_bounds():
-            bbox = config.bbox_bounding_points()
-            params["bBox"] = ",".join([str(b) for b in bbox])
+        if self.config.has_bounds():
+            bbox: tuple = self.config.bbox_bounding_points()
+            params["bbox"] = ",".join([str(b) for b in bbox])
         else:
-            params["stateCd"] = "NM"
-
-        if config.start_date:
-            params["startDt"] = config.start_dt.date().isoformat()
-        if config.end_date:
-            params["endDt"] = config.end_dt.date().isoformat()
-
-        text = self._execute_text_request(
-            "https://waterservices.usgs.gov/nwis/site/", params
-        )
-        if text:
-            records = parse_rdb(text)
-            self.log(f"Retrieved {len(records)} records")
-            return records
+            params["state_code"] = "35"
+
+        if self.config.start_date:
+            begin: str = self.config.start_dt.date().isoformat()
+            begin = f"{begin}T00:00:00Z"
+            params["begin"] = begin
+        if self.config.end_date:
+            end: str = self.config.end_dt.date().isoformat()
+            end = f"{end}T23:59:59Z"
+            params["end"] = end
+
+        data: dict = {}
+        tries: int = 0
+
+        while tries < MAX_RETRIES:
+            try:
+                if os.environ.get("USGS_API_KEY"):
+                    headers = {"X-API-Key": os.environ["USGS_API_KEY"]}
+                else:
+                    headers = {}
+                response = httpx.get(
+                    url=self.sites_url,
+                    params=params,
+                    timeout=TIMEOUT,
+                    headers=headers
+                )
+
+                if response.status_code == 200:
+                    data = response.json()
+                    break
+                elif response.status_code == 429:
+                    raise USGSRateLimitError()
+                else:
+                    self.warn(f"Received status code {response.status_code}. Retrying... {tries + 1}/{MAX_RETRIES}")
+
+            except USGSRateLimitError:
+                self.warn("Rate limit exceeded. Please provide a valid USGS API key via the --usgs-api-key flag to increase your rate limit and try again.")
+                raise USGSRateLimitError("Rate limit exceeded")
+            except json.JSONDecodeError as e:
+                self.warn(f"Failed to decode JSON response: {e}. Retrying... {tries + 1}/{MAX_RETRIES}")
+            except Exception as e:
+                self.warn(f"Error retrieving site records: {e}. Retrying... {tries + 1}/{MAX_RETRIES}")
+            
+            tries += 1
+            time.sleep(tries)
+
+        if data == {}:
+            self.warn("Failed to retrieve site records after multiple attempts.")
+            raise PartialOrNoDataError("Failed to retrieve site records after multiple attempts.")
+
+        records: list = data.get("features", [])
+
+        return records
 
 
 class NWISWaterLevelSource(BaseWaterLevelSource):
     transformer_klass = NWISWaterLevelTransformer
+    # USGS complex queries allow up to 250 sites to be queried at once
+    # https://api.waterdata.usgs.gov/docs/ogcapi/complex-queries
+    num_sites = 250
 
     def __repr__(self):
         return "NWISWaterLevelSource"
 
     def get_records(self, site_record):
-        # query sites with the agency, which need to be in the form of "{agency}:{site number}"
-        sites = make_site_list(site_record)
-        sites_with_colons = [s.replace("-", ":") for s in sites]
-
-        params = {
-            "format": "json",
-            "siteType": "GW",
-            "siteStatus": "all",
-            "parameterCd": "72019",
-            "sites": ",".join(sites_with_colons),
+        params: dict = {
+            "limit": LIMIT,
+            "parameter_code": "72019",
         }
 
-        config = self.config
-        if config.start_date:
-            params["startDt"] = config.start_dt.date().isoformat()
-        else:
-            params["startDt"] = "1900-01-01"
-
-        if config.end_date:
-            params["endDt"] = config.end_dt.date().isoformat()
-
-        data = self._execute_json_request(
-            url="https://waterservices.usgs.gov/nwis/gwlevels/",
-            params=params,
-            tag="value",
-        )
-        if data:
-            records = parse_json(data)
-            self.log(f"Retrieved {len(records)} records")
-            return records
+        begin: str = ""
+        end: str = ""
+
+        if self.config.start_date:
+            begin = self.config.start_dt.date().isoformat()
+            begin = f"{begin}T00:00:00Z"
+        if self.config.end_date:
+            end = self.config.end_dt.date().isoformat()
+            end = f"{end}T23:59:59Z"
+
+        if begin and end:
+            params["datetime"] = f"{begin}/{end}"
+        elif begin:
+            params["datetime"] = f"{begin}/.."
+        elif end:
+            params["datetime"] = f"../{end}"
+
+        records: list = []
+        sites: list = make_site_list(site_record)
+
+        # group sites into batches of num_sites to pass to the API
+        # USGS APIs allow up to 250 sites to be queried at once with complex queries
+        list_of_lists_of_sites: list = []
+        for i in range(0, len(sites), self.num_sites):
+            list_of_lists_of_sites.append(sites[i:i + self.num_sites]) 
+
+
+        for list_of_sites in list_of_lists_of_sites:
+            json_data: dict = {
+                "op": "in",
+                "args": [
+                    {"property": "monitoring_location_id"},
+                    list_of_sites
+                ]
+            }
+
+            data: dict = {}
+            tries: int = 0
+
+            while tries < MAX_RETRIES:
+                try:
+                    if os.environ.get("USGS_API_KEY"):
+                        headers = {"X-API-Key": os.environ["USGS_API_KEY"], "Content-Type": "application/query-cql-json"}
+                    else:
+                        headers = {"Content-Type": "application/query-cql-json"}
+                    response = httpx.post(
+                        url="https://api.waterdata.usgs.gov/ogcapi/v0/collections/field-measurements/items",
+                        json=json_data,
+                        headers=headers,
+                        params=params,
+                        timeout=TIMEOUT,
+                    )
+                    if response.status_code == 200:
+                        data = response.json()
+                        break
+                    elif response.status_code == 429:
+                        raise USGSRateLimitError()
+                    else:
+                        self.warn(f"Received status code {response.status_code}. Retrying... {tries + 1}/{MAX_RETRIES}")
+
+                except USGSRateLimitError:
+                    self.warn("Rate limit exceeded. Please provide a valid USGS API key via the --usgs-api-key flag to increase your rate limit and try again.")
+                    raise USGSRateLimitError("Rate limit exceeded")
+                except json.JSONDecodeError as e:
+                    self.warn(f"Failed to decode JSON response: {e}. Retrying... {tries + 1}/{MAX_RETRIES}")
+                except Exception as e:
+                    self.warn(f"Error retrieving water level records: {e}. Retrying... {tries + 1}/{MAX_RETRIES}")
+                
+                tries += 1
+                time.sleep(tries)
+
+            if data == {}:
+                self.warn("Failed to retrieve water level records after multiple attempts.")
+                raise PartialOrNoDataError("Failed to retrieve water level records after multiple attempts.")
+
+            features: list[dict] = data.get("features", [])
+
+            standard_features: list[dict] = [self._standardize_record(feature) for feature in features]
+            records.extend(standard_features)
+            
+            """
+            The following commented-out code handles pagination for cases where there are more than LIMIT records for a given batch of sites.
+            However, in testing, I have not encountered any cases where this is necessary. Furthermore, cursor-based pagination is broken as
+            of 4/29/26 when the limit query parameter is used, and it can't be used in combination with other parameters via complex queries.
+            If we do encounter cases where there are more than LIMIT records, we can use the following code to handle pagination (when it is fixed).
+            
+            found_next_link: bool = False
+            links: list[dict] = data.get("links", [])
+            for link in links:
+                if link["rel"] == "next":
+                    next_link_url = link["href"]
+                    found_next_link = True
+                    break
+
+            # use GET requests for the paginated responses after the initial POST to avoid issues with httpx and long URLs with many site ids
+            # USGS APIs use cursor pagination, so we can just follow the "next" links until there are no more
+            while found_next_link:
+                tries: int = 0
+                data: dict = {}
+                while tries < MAX_RETRIES:
+                    try:
+                        response = httpx.get(
+                                url=next_link_url,
+                                headers=headers,
+                                timeout=TIMEOUT,
+                            )
+                        if response.status_code == 200:
+                            data = response.json()
+                            break
+                        elif response.status_code == 429:
+                            raise USGSRateLimitError()
+                        else:
+                            self.warn(f"Received status code {response.status_code} for paginated request. Retrying... {tries + 1}/{MAX_RETRIES}")
+                    except USGSRateLimitError:
+                        self.warn("Rate limit exceeded. Please provide a valid USGS API key via the --usgs-api-key flag to increase your rate limit and try again.")
+                        raise USGSRateLimitError("Rate limit exceeded")
+                    except json.JSONDecodeError as e:
+                        self.warn(f"Failed to decode JSON response: {e}. Retrying... {tries + 1}/{MAX_RETRIES}")
+                    except Exception as e:
+                        self.warn(f"Error retrieving paginated water level records: {e}. Retrying... {tries + 1}/{MAX_RETRIES}")
+                    
+                    tries += 1
+                    time.sleep(tries)
+                
+                if data == {}:
+                    self.warn("Failed to retrieve paginated water level records after multiple attempts.")
+                    raise PartialOrNoDataError("Failed to retrieve paginated water level records after multiple attempts")
+
+                features: list[dict] = data.get("features", [])
+                standard_features: list[dict] = [self._standardize_record(feature) for feature in features]
+                records.extend(standard_features)
+                
+                found_next_link: bool = False
+                links: list = data.get("links", [])
+                for link in links:
+                    if link["rel"] == "next":
+                        next_link_url = link["href"]
+                        found_next_link = True
+                        break
+            """
+
+        self.log(f"Retrieved {len(records)} records")
+
+        return records
+    
+    def _standardize_record(self, record: dict) -> dict:
+        return {
+            "site_id": record["properties"]["monitoring_location_id"],
+            "source_parameter_name": "Water level, depth LSD",
+            "value": record["properties"]["value"],
+            "datetime_measured": record["properties"]["time"],
+            "source_parameter_units": record["properties"]["unit_of_measure"]
+        }
 
     def _extract_site_records(self, records, site_record):
         return [ri for ri in records if ri["site_id"] == site_record.id]
diff --git a/backend/connectors/usgs/transformer.py b/backend/connectors/usgs/transformer.py
index 379b8bd..fc95212 100644
--- a/backend/connectors/usgs/transformer.py
+++ b/backend/connectors/usgs/transformer.py
@@ -19,35 +19,27 @@
 
 class NWISSiteTransformer(SiteTransformer):
     def _transform(self, record):
-        elevation = record["alt_va"]
+        elevation = record["properties"]["altitude"]
         try:
             elevation = float(elevation)
         except (ValueError, TypeError):
             elevation = None
 
-        lng = record["dec_long_va"]
-        lat = record["dec_lat_va"]
-        datum = record["coord_datum_cd"]
-
-        # if not self.contained(lng, lat):
-        #     return
-
-        agency = record["agency_cd"]
-        site_no = record["site_no"]
-        site_id = f"{agency}-{site_no}"
+        # this data comes from OGC API, which requires the use of WGS84 for the horizontal datum
+        datum = "WGS84"
 
         rec = {
             "source": "USGS-NWIS",
-            "id": site_id,
-            "name": record["station_nm"],
-            "latitude": lat,
-            "longitude": lng,
+            "id": record["properties"]["monitoring_location_id"],
+            "name": record["properties"]["monitoring_location_name"],
+            "latitude": record["geometry"]["coordinates"][1],
+            "longitude": record["geometry"]["coordinates"][0],
             "elevation": elevation,
             "elevation_units": "ft",
             "horizontal_datum": datum,
-            "vertical_datum": record["alt_datum_cd"],
-            "aquifer": record["nat_aqfr_cd"],
-            "well_depth": record["well_depth_va"],
+            "vertical_datum": record["properties"]["vertical_datum"],
+            "aquifer": record["properties"]["national_aquifer_code"],
+            "well_depth": record["properties"]["well_constructed_depth"],
             "well_depth_units": "ft",
         }
         return rec
diff --git a/backend/exceptions.py b/backend/exceptions.py
new file mode 100644
index 0000000..80b7b84
--- /dev/null
+++ b/backend/exceptions.py
@@ -0,0 +1,6 @@
+class USGSRateLimitError(Exception):
+    pass
+
+
+class PartialOrNoDataError(Exception):
+    pass
\ No newline at end of file
diff --git a/backend/source.py b/backend/source.py
index 5189258..be4208d 100644
--- a/backend/source.py
+++ b/backend/source.py
@@ -252,9 +252,6 @@ def _execute_json_request(
             the json response
         """
         resp = httpx.get(url, params=params, **kw)
-        if tag is None:
-            tag = "data"
-
         if resp.status_code == 200:
             try:
                 obj = resp.json()
diff --git a/backend/transformer.py b/backend/transformer.py
index a42ca89..4c8cbb8 100644
--- a/backend/transformer.py
+++ b/backend/transformer.py
@@ -259,6 +259,7 @@ def standardize_datetime(dt, record_id):
             "%Y-%m-%dT%H:%M:%S",
             "%Y-%m-%dT%H:%M:%S.%fZ",
             "%Y-%m-%dT%H:%M:%SZ",
+            "%Y-%m-%dT%H:%M:%S+00:00",
             "%Y-%m-%d %H:%M:%S",
             "%Y-%m-%d %H:%M:%S+00:00",
             "%Y-%m-%d %H:%M",
diff --git a/backend/unifier.py b/backend/unifier.py
index b070631..d044d71 100644
--- a/backend/unifier.py
+++ b/backend/unifier.py
@@ -21,6 +21,7 @@
 from backend.persister import BasePersister
 from backend.persisters.geoserver import GeoServerPersister
 from backend.source import BaseSiteSource
+from backend.exceptions import USGSRateLimitError, PartialOrNoDataError
 
 
 def health_check(source: BaseSiteSource) -> bool | None:
@@ -131,10 +132,18 @@ def _site_wrapper(site_source, parameter_source, persister, config):
         # in the future make discover required
         # return
 
+        # used to revert back to initial state if a rate limit error is hit, so there aren't partial records
+        initial_sites_len = len(persister.sites)
+        initial_timeseries_len = len(persister.timeseries)
+        initial_records_len = len(persister.records)
+
         use_summarize = config.output_summary
         site_limit = config.site_limit
 
-        sites = site_source.read()
+        try:
+            sites = site_source.read()
+        except (USGSRateLimitError, PartialOrNoDataError):
+            sites = []
 
         if not sites:
             return
@@ -158,18 +167,32 @@ def _site_wrapper(site_source, parameter_source, persister, config):
                     end_ind += n
 
                 if use_summarize:
-                    summary_records = parameter_source.read(
-                        site_records, use_summarize, start_ind, end_ind
-                    )
+                    try:
+                        summary_records = parameter_source.read(
+                            site_records, use_summarize, start_ind, end_ind
+                        )
+                    except (USGSRateLimitError, PartialOrNoDataError):
+                        # remove partial records to prevent incomplete data from being saved
+                        persister.sites = persister.sites[:initial_sites_len]
+                        persister.timeseries = persister.timeseries[:initial_timeseries_len]
+                        persister.records = persister.records[:initial_records_len]
+                        break
                     if summary_records:
                         persister.records.extend(summary_records)
                         sites_with_records_count += len(summary_records)
                     else:
                         continue
                 else:
-                    results = parameter_source.read(
-                        site_records, use_summarize, start_ind, end_ind
-                    )
+                    try:
+                        results = parameter_source.read(
+                            site_records, use_summarize, start_ind, end_ind
+                        )
+                    except (USGSRateLimitError, PartialOrNoDataError):
+                        # remove partial records to prevent incomplete data from being saved
+                        persister.sites = persister.sites[:initial_sites_len]
+                        persister.timeseries = persister.timeseries[:initial_timeseries_len]
+                        persister.records = persister.records[:initial_records_len]
+                        break
                     # no records are returned if there is no site record for parameter
                     # or if the record isn't clean (doesn't have the correct fields)
                     # don't count these sites to apply to site_limit
diff --git a/frontend/cli.py b/frontend/cli.py
index 3efb46b..34a3a34 100644
--- a/frontend/cli.py
+++ b/frontend/cli.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
-import sys
+import os
 
 import click
 
@@ -206,6 +206,13 @@ def cli():
     ),
 ]
 
+USGS_API_KEY_OPTION = [
+    click.option(
+        "--usgs-api-key",
+        default=None,
+        help="USGS API key. Can also be set via USGS_API_KEY environment variable",
+    )
+]
 
 def add_options(options):
     def _add_options(func):
@@ -230,6 +237,7 @@ def _add_options(func):
 @add_options(ALL_SOURCE_OPTIONS)
 @add_options(DEBUG_OPTIONS)
 @add_options(OUTPUT_FORMAT_OPTIONS)
+@add_options(USGS_API_KEY_OPTION)
 def weave(
     parameter,
     config_path,
@@ -256,10 +264,15 @@ def weave(
     dry,
     yes,
     output_format,
+    usgs_api_key,
 ):
     """
     Get parameter timeseries or summary data
     """
+    # set USGS_API_KEY environment variable if usgs_api_key is provided
+    if usgs_api_key is not None:
+        os.environ["USGS_API_KEY"] = usgs_api_key
+
     # instantiate config and set up parameter
     config = setup_config(
         tag=parameter,
@@ -334,6 +347,7 @@ def weave(
 @add_options(ALL_SOURCE_OPTIONS)
 @add_options(DEBUG_OPTIONS)
 @add_options(OUTPUT_FORMAT_OPTIONS)
+@add_options(USGS_API_KEY_OPTION)
 def sites(
     config_path,
     bbox,
@@ -356,10 +370,15 @@ def sites(
     dry,
     yes,
     output_format,
+    usgs_api_key,
 ):
     """
     Get sites
     """
+    # set USGS_API_KEY environment variable if usgs_api_key is provided
+    if usgs_api_key is not None:
+        os.environ["USGS_API_KEY"] = usgs_api_key
+
     config = setup_config(
         "sites", config_path, bbox, county, wkt, site_limit, dry, output_format
     )
diff --git a/requirements.txt b/requirements.txt
index 15193eb..bd7f502 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 click==8.2.1
+python-dotenv
 flask
 frost_sta_client
 Geoalchemy2
diff --git a/setup.py b/setup.py
index fb987e1..81bc3c1 100644
--- a/setup.py
+++ b/setup.py
@@ -34,6 +34,7 @@
     ],
     install_requires=[
         "click==8.2.1",
+        "python-dotenv",
         "flask",
         "frost_sta_client",
         "Geoalchemy2",
diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py
index 2029c38..7e3f21a 100644
--- a/tests/test_cli/__init__.py
+++ b/tests/test_cli/__init__.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 import pytest
 from typing import List
+import os
 
 from backend.config import SOURCE_KEYS
 from backend.constants import (
@@ -57,6 +58,7 @@ def _test_weave(
         bbox: str | None = None,
         county: str | None = None,
         wkt: str | None = None,
+        usgs_api_key: str | None = None,
     ):
         # Arrange
         # turn off all sources except for the one being tested
@@ -100,6 +102,11 @@ def _test_weave(
 
         arguments.extend(no_agencies)
 
+        # save original USGS_API_KEY to reset after test, and set to test value if provided
+        original_usgs_api_key = os.getenv("USGS_API_KEY", None)
+        if usgs_api_key:
+            arguments.extend(["--usgs-api-key", usgs_api_key])
+
         # Act
         result = self.runner.invoke(weave, arguments, standalone_mode=False)
 
@@ -177,6 +184,15 @@ def _test_weave(
 
             # 9
             assert getattr(config, "output_format") == output_format
+
+            # 10
+            if usgs_api_key:
+                assert os.getenv("USGS_API_KEY") == usgs_api_key
+
+                if original_usgs_api_key is not None:
+                    os.environ["USGS_API_KEY"] = original_usgs_api_key
+                else:
+                    del os.environ["USGS_API_KEY"]
         except Exception as e:
             print(result)
             assert False
@@ -217,6 +233,13 @@ def test_weave_wkt(self):
             wkt="POLYGON((-106.0 32.0, -102.0 32.0, -102.0 36.0, -106.0 36.0, -106.0 32.0))",
         )
 
+    def test_weave_usgs_api_key(self):
+        self._test_weave(
+            parameter=WATERLEVELS,
+            output_type="summary",
+            usgs_api_key="TEST_API_KEY",
+        )
+
     def test_weave_waterlevels(self):
         self._test_weave(parameter=WATERLEVELS, output_type="summary")
 
diff --git a/tests/test_sources/test_nmbgmr_amp.py b/tests/test_sources/test_nmbgmr_amp.py
index b56fd5b..be75957 100644
--- a/tests/test_sources/test_nmbgmr_amp.py
+++ b/tests/test_sources/test_nmbgmr_amp.py
@@ -4,11 +4,8 @@
 from backend.constants import WATERLEVELS, CALCIUM, MILLIGRAMS_PER_LITER, FEET
 from tests.test_sources import BaseSourceTestClass
 
-os.environ["IS_TESTING_ENV"] = "True"
-
-
 @pytest.fixture(autouse=True)
-def setup():
+def setup_nmbgmr_amp():
     # SETUP CODE -----------------------------------------------------------
     os.environ["IS_TESTING_ENV"] = "True"
 
diff --git a/tests/test_sources/test_nwis.py b/tests/test_sources/test_nwis.py
index b7bf272..0c45618 100644
--- a/tests/test_sources/test_nwis.py
+++ b/tests/test_sources/test_nwis.py
@@ -1,6 +1,20 @@
+import os
+from dotenv import load_dotenv
+import pytest
+
 from backend.constants import WATERLEVELS, FEET
 from tests.test_sources import BaseSourceTestClass
 
+@pytest.fixture(autouse=True)
+def setup_nwis():
+    # SETUP CODE -----------------------------------------------------------
+    load_dotenv(override=True)
+
+    # RUN TESTS ------------------------------------------------------------
+    yield
+
+    # TEARDOWN CODE ---------------------------------------------------------
+    os.environ["USGS_API_KEY"] = ""
 
 class TestNWISWaterlevels(BaseSourceTestClass):