|
34 | 34 | import json |
35 | 35 | import os |
36 | 36 | import sys |
| 37 | +from urllib.error import HTTPError, URLError |
| 38 | +from urllib.parse import quote |
| 39 | +from urllib.request import Request, urlopen |
37 | 40 |
|
38 | 41 | # Add parent dir to path for shared helpers |
39 | 42 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) |
|
75 | 78 | USGS_PARAMETER_CODES = "https://api.waterdata.usgs.gov/ogcapi/v0/collections/parameter-codes" |
76 | 79 | USGS_STATISTIC_CODES = "https://api.waterdata.usgs.gov/ogcapi/v0/collections/statistic-codes" |
77 | 80 | USGS_API_REGISTRATION = "https://api.usgs.gov/" |
| 81 | +USGS_NIMS_API_BASE = "https://api.waterdata.usgs.gov/nims/v0" |
| 82 | +USGS_NIMS_IMAGE_BASE = "https://usgs-nims-images.s3.amazonaws.com" |
78 | 83 |
|
79 | 84 | # Series semantics |
80 | 85 | STATISTIC_INSTANTANEOUS = "00011" |
@@ -131,6 +136,36 @@ def _combined_metadata_url(nwis_id: str, parameter_code: str) -> str: |
131 | 136 | ) |
132 | 137 |
|
133 | 138 |
|
| 139 | +def _nims_latest_image_doc(station: dict) -> dict | None: |
| 140 | + cam_id = station.get("camId") |
| 141 | + if not cam_id: |
| 142 | + return None |
| 143 | + |
| 144 | + url = f"{USGS_NIMS_API_BASE}/listFiles?camId={quote(cam_id)}&limit=1&recent=true" |
| 145 | + try: |
| 146 | + req = Request(url, headers={"Accept": "application/json"}) |
| 147 | + with urlopen(req, timeout=10) as resp: |
| 148 | + files = json.loads(resp.read().decode()) |
| 149 | + except (HTTPError, URLError, TimeoutError, ValueError, OSError) as exc: |
| 150 | + print(f" [WARN] NIMS thumbnail lookup skipped for {cam_id}: {exc}") |
| 151 | + return None |
| 152 | + |
| 153 | + if not isinstance(files, list) or not files: |
| 154 | + return None |
| 155 | + |
| 156 | + filename = files[0] |
| 157 | + if not isinstance(filename, str) or not filename.lower().endswith(".jpg"): |
| 158 | + return None |
| 159 | + |
| 160 | + thumb_url = f"{USGS_NIMS_IMAGE_BASE}/thumbnail/{quote(cam_id)}/{quote(filename)}" |
| 161 | + return { |
| 162 | + "role": "http://dbpedia.org/resource/Photograph", |
| 163 | + "name": "USGS NIMS Camera Image", |
| 164 | + "description": f"Latest available USGS NIMS camera thumbnail for {station['name']}.", |
| 165 | + "link": {"href": thumb_url, "type": "image/jpeg"}, |
| 166 | + } |
| 167 | + |
| 168 | + |
134 | 169 | # ====================================================================== |
135 | 170 | # Resource definitions |
136 | 171 | # ====================================================================== |
@@ -350,6 +385,63 @@ def _system_sml(station: dict) -> dict: |
350 | 385 | "value": station["camId"], |
351 | 386 | }) |
352 | 387 |
|
| 388 | + documents = [ |
| 389 | + doc for doc in [_nims_latest_image_doc(station)] if doc |
| 390 | + ] |
| 391 | + documents.extend([ |
| 392 | + { |
| 393 | + "role": "http://dbpedia.org/resource/Web_page", |
| 394 | + "name": "Monitoring Location", |
| 395 | + "description": f"USGS monitoring-location resource for site {nwis_id}.", |
| 396 | + "link": { |
| 397 | + "href": station.get("monitoringLocationUrl", _monitoring_location_url(nwis_id)), |
| 398 | + "type": "application/geo+json", |
| 399 | + }, |
| 400 | + }, |
| 401 | + { |
| 402 | + "role": "http://dbpedia.org/resource/Web_page", |
| 403 | + "name": "Latest Continuous - Discharge", |
| 404 | + "description": f"Latest discharge values for site {nwis_id}.", |
| 405 | + "link": { |
| 406 | + "href": station.get("latestContinuous00060Url", _latest_continuous_url(nwis_id, "00060")), |
| 407 | + "type": "application/geo+json", |
| 408 | + }, |
| 409 | + }, |
| 410 | + { |
| 411 | + "role": "http://dbpedia.org/resource/Web_page", |
| 412 | + "name": "Latest Continuous - Gage Height", |
| 413 | + "description": f"Latest gage-height values for site {nwis_id}.", |
| 414 | + "link": { |
| 415 | + "href": station.get("latestContinuous00065Url", _latest_continuous_url(nwis_id, "00065")), |
| 416 | + "type": "application/geo+json", |
| 417 | + }, |
| 418 | + }, |
| 419 | + { |
| 420 | + "role": "http://dbpedia.org/resource/Web_page", |
| 421 | + "name": "Time Series Metadata - Discharge", |
| 422 | + "description": f"Time-series metadata for discharge at site {nwis_id}.", |
| 423 | + "link": { |
| 424 | + "href": station.get("timeSeries00060Url", _time_series_metadata_url(nwis_id, "00060")), |
| 425 | + "type": "application/geo+json", |
| 426 | + }, |
| 427 | + }, |
| 428 | + { |
| 429 | + "role": "http://dbpedia.org/resource/Web_page", |
| 430 | + "name": "Time Series Metadata - Gage Height", |
| 431 | + "description": f"Time-series metadata for gage height at site {nwis_id}.", |
| 432 | + "link": { |
| 433 | + "href": station.get("timeSeries00065Url", _time_series_metadata_url(nwis_id, "00065")), |
| 434 | + "type": "application/geo+json", |
| 435 | + }, |
| 436 | + }, |
| 437 | + { |
| 438 | + "role": "http://dbpedia.org/resource/Web_page", |
| 439 | + "name": "USGS Water Data OGC API", |
| 440 | + "description": "Official USGS Water Data OGC API documentation.", |
| 441 | + "link": {"href": USGS_API_DOCS, "type": "text/html"}, |
| 442 | + }, |
| 443 | + ]) |
| 444 | + |
353 | 445 | return { |
354 | 446 | "type": "PhysicalSystem", |
355 | 447 | "id": _system_uid(nwis_id), |
@@ -432,59 +524,7 @@ def _system_sml(station: dict) -> dict: |
432 | 524 | }, |
433 | 525 | }, |
434 | 526 | ], |
435 | | - "documents": [ |
436 | | - { |
437 | | - "role": "http://dbpedia.org/resource/Web_page", |
438 | | - "name": "Monitoring Location", |
439 | | - "description": f"USGS monitoring-location resource for site {nwis_id}.", |
440 | | - "link": { |
441 | | - "href": station.get("monitoringLocationUrl", _monitoring_location_url(nwis_id)), |
442 | | - "type": "application/geo+json", |
443 | | - }, |
444 | | - }, |
445 | | - { |
446 | | - "role": "http://dbpedia.org/resource/Web_page", |
447 | | - "name": "Latest Continuous - Discharge", |
448 | | - "description": f"Latest discharge values for site {nwis_id}.", |
449 | | - "link": { |
450 | | - "href": station.get("latestContinuous00060Url", _latest_continuous_url(nwis_id, "00060")), |
451 | | - "type": "application/geo+json", |
452 | | - }, |
453 | | - }, |
454 | | - { |
455 | | - "role": "http://dbpedia.org/resource/Web_page", |
456 | | - "name": "Latest Continuous - Gage Height", |
457 | | - "description": f"Latest gage-height values for site {nwis_id}.", |
458 | | - "link": { |
459 | | - "href": station.get("latestContinuous00065Url", _latest_continuous_url(nwis_id, "00065")), |
460 | | - "type": "application/geo+json", |
461 | | - }, |
462 | | - }, |
463 | | - { |
464 | | - "role": "http://dbpedia.org/resource/Web_page", |
465 | | - "name": "Time Series Metadata - Discharge", |
466 | | - "description": f"Time-series metadata for discharge at site {nwis_id}.", |
467 | | - "link": { |
468 | | - "href": station.get("timeSeries00060Url", _time_series_metadata_url(nwis_id, "00060")), |
469 | | - "type": "application/geo+json", |
470 | | - }, |
471 | | - }, |
472 | | - { |
473 | | - "role": "http://dbpedia.org/resource/Web_page", |
474 | | - "name": "Time Series Metadata - Gage Height", |
475 | | - "description": f"Time-series metadata for gage height at site {nwis_id}.", |
476 | | - "link": { |
477 | | - "href": station.get("timeSeries00065Url", _time_series_metadata_url(nwis_id, "00065")), |
478 | | - "type": "application/geo+json", |
479 | | - }, |
480 | | - }, |
481 | | - { |
482 | | - "role": "http://dbpedia.org/resource/Web_page", |
483 | | - "name": "USGS Water Data OGC API", |
484 | | - "description": "Official USGS Water Data OGC API documentation.", |
485 | | - "link": {"href": USGS_API_DOCS, "type": "text/html"}, |
486 | | - }, |
487 | | - ], |
| 527 | + "documents": documents, |
488 | 528 | "characteristics": [ |
489 | 529 | { |
490 | 530 | "label": "Station Properties", |
@@ -528,6 +568,15 @@ def _system_sml(station: dict) -> dict: |
528 | 568 | } |
529 | 569 |
|
530 | 570 |
|
| 571 | +def _go_compatible_system_sml(sml: dict, base_url: str) -> dict: |
| 572 | + if "csapi-go" not in base_url: |
| 573 | + return sml |
| 574 | + compat = dict(sml) |
| 575 | + compat.pop("characteristics", None) |
| 576 | + compat.pop("capabilities", None) |
| 577 | + return compat |
| 578 | + |
| 579 | + |
531 | 580 | def _discharge_datastream_schema(site_no: str = "") -> dict: |
532 | 581 | """SWE DataRecord schema for the discharge (streamflow) datastream.""" |
533 | 582 | uid_suffix = f":{site_no}" if site_no else "" |
@@ -829,7 +878,7 @@ def bootstrap(*, clean: bool = False, clean_only: bool = False, |
829 | 878 | uid = _system_uid(nwis_id) |
830 | 879 |
|
831 | 880 | stub = _system_stub(st, proc_id or "pending") |
832 | | - sml = _system_sml(st) |
| 881 | + sml = _go_compatible_system_sml(_system_sml(st), base_url) |
833 | 882 |
|
834 | 883 | sys_id = ensure_system(base_url, auth, uid, stub, sml, |
835 | 884 | dry_run=dry_run, stats=stats, |
|
0 commit comments