Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 0 additions & 10 deletions crates/clients/admin/src/datasets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,6 @@ impl<'a> DatasetsClient<'a> {
"MANIFEST_VALIDATION_ERROR" => Err(RegisterError::ManifestValidationError(
error_response.into(),
)),
"UNSUPPORTED_DATASET_KIND" => {
Err(RegisterError::UnsupportedDatasetKind(error_response.into()))
}
"MANIFEST_REGISTRATION_ERROR" => Err(RegisterError::ManifestRegistrationError(
error_response.into(),
)),
Expand Down Expand Up @@ -1526,13 +1523,6 @@ pub enum RegisterError {
#[error("dependency validation error: {0}")]
ManifestValidationError(#[source] ApiError),

/// Unsupported dataset kind (400, UNSUPPORTED_DATASET_KIND)
///
/// This occurs when:
/// - Dataset kind is not one of the supported types (manifest, evm-rpc, firehose)
#[error("unsupported dataset kind")]
UnsupportedDatasetKind(#[source] ApiError),

/// Failed to register manifest in the system (500, MANIFEST_REGISTRATION_ERROR)
///
/// This occurs when:
Expand Down
11 changes: 0 additions & 11 deletions crates/clients/admin/src/manifests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,6 @@ impl<'a> ManifestsClient<'a> {
"MANIFEST_VALIDATION_ERROR" => Err(RegisterError::ManifestValidationError(
error_response.into(),
)),
"UNSUPPORTED_DATASET_KIND" => {
Err(RegisterError::UnsupportedDatasetKind(error_response.into()))
}
"MANIFEST_STORAGE_ERROR" => {
Err(RegisterError::ManifestStorageError(error_response.into()))
}
Expand Down Expand Up @@ -575,14 +572,6 @@ pub enum RegisterError {
#[error("dependency validation error")]
ManifestValidationError(#[source] ApiError),

/// Unsupported dataset kind (400, UNSUPPORTED_DATASET_KIND)
///
/// This occurs when:
/// - Dataset kind is not one of the supported types (manifest, evm-rpc, firehose)
/// - The 'kind' field in the manifest contains an unrecognized value
#[error("unsupported dataset kind")]
UnsupportedDatasetKind(#[source] ApiError),

/// Failed to write manifest to object store (500, MANIFEST_STORAGE_ERROR)
///
/// This occurs when:
Expand Down
68 changes: 66 additions & 2 deletions crates/core/datasets-common/src/dataset_kind_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,24 @@
pub struct DatasetKindStr(String);

impl DatasetKindStr {
/// Creates a new [`DatasetKindStr`] from a string identifier.
pub fn new(kind: String) -> Self {
/// Creates a new [`DatasetKindStr`] from a string identifier without validation.
///
/// # Safety
/// The caller must ensure the provided string is a valid dataset kind identifier
/// (e.g., originates from a strongly-typed ZST kind or a trusted database value).
pub fn new_unchecked(kind: String) -> Self {
Self(kind)
}

/// Returns the dataset kind as a string slice.
pub fn as_str(&self) -> &str {
&self.0
}

/// Consumes the [`DatasetKindStr`] and returns the inner [`String`].
pub fn into_inner(self) -> String {
self.0
}
}

impl AsRef<str> for DatasetKindStr {
Expand Down Expand Up @@ -53,3 +62,58 @@ impl PartialEq<DatasetKindStr> for &str {
*self == other.0
}
}

#[cfg(feature = "metadata-db")]
impl From<metadata_db::manifests::ManifestKindOwned> for DatasetKindStr {
fn from(value: metadata_db::manifests::ManifestKindOwned) -> Self {
// SAFETY: ManifestKindOwned values originate from the database, which only stores
// validated kind strings inserted at system boundaries.
DatasetKindStr::new_unchecked(value.into_inner())
}
}

#[cfg(feature = "metadata-db")]
impl From<DatasetKindStr> for metadata_db::manifests::ManifestKindOwned {
fn from(value: DatasetKindStr) -> Self {
// SAFETY: DatasetKindStr values originate from validated domain types (ZST kind types),
// so invariants are upheld.
metadata_db::manifests::ManifestKind::from_owned_unchecked(value.0)
}
}

#[cfg(feature = "metadata-db")]
impl<'a> From<&'a DatasetKindStr> for metadata_db::manifests::ManifestKind<'a> {
fn from(value: &'a DatasetKindStr) -> Self {
// SAFETY: DatasetKindStr values originate from validated domain types (ZST kind types),
// so invariants are upheld.
metadata_db::manifests::ManifestKind::from_ref_unchecked(value.as_str())
}
}

#[cfg(feature = "metadata-db")]
impl<'a> PartialEq<metadata_db::manifests::ManifestKind<'a>> for DatasetKindStr {
fn eq(&self, other: &metadata_db::manifests::ManifestKind<'a>) -> bool {
self.as_str() == other.as_str()
}
}

#[cfg(feature = "metadata-db")]
impl<'a> PartialEq<DatasetKindStr> for metadata_db::manifests::ManifestKind<'a> {
fn eq(&self, other: &DatasetKindStr) -> bool {
self.as_str() == other.as_str()
}
}

#[cfg(feature = "metadata-db")]
impl<'a> PartialEq<metadata_db::manifests::ManifestKind<'a>> for &DatasetKindStr {
fn eq(&self, other: &metadata_db::manifests::ManifestKind<'a>) -> bool {
self.as_str() == other.as_str()
}
}

#[cfg(feature = "metadata-db")]
impl<'a> PartialEq<&DatasetKindStr> for metadata_db::manifests::ManifestKind<'a> {
fn eq(&self, other: &&DatasetKindStr) -> bool {
self.as_str() == other.as_str()
}
}
4 changes: 3 additions & 1 deletion crates/core/datasets-derived/src/dataset_kind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ impl DerivedDatasetKind {

impl From<DerivedDatasetKind> for DatasetKindStr {
fn from(value: DerivedDatasetKind) -> Self {
DatasetKindStr::new(value.to_string())
// SAFETY: DerivedDatasetKind is a strongly-typed ZST whose Display impl produces
// a valid dataset kind string.
DatasetKindStr::new_unchecked(value.to_string())
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/core/datasets-derived/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub mod sql_str;

pub use self::{
dataset::Dataset,
dataset_kind::{DerivedDatasetKind, DerivedDatasetKindError},
dataset_kind::DerivedDatasetKind,
func_name::FuncName,
function::{Function, FunctionSource},
manifest::Manifest,
Expand Down
4 changes: 3 additions & 1 deletion crates/core/datasets-raw/src/dataset_kind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ macro_rules! define_dataset_kind {

impl From<$Name> for DatasetKindStr {
fn from(value: $Name) -> Self {
DatasetKindStr::new(value.to_string())
// SAFETY: $Name is a strongly-typed ZST whose Display impl produces
// a valid dataset kind string.
DatasetKindStr::new_unchecked(value.to_string())
}
}

Expand Down
7 changes: 4 additions & 3 deletions crates/core/datasets-registry/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::collections::BTreeSet;

use datasets_common::{
hash::Hash, hash_reference::HashReference, name::Name, namespace::Namespace,
reference::Reference, revision::Revision, version::Version,
dataset_kind_str::DatasetKindStr, hash::Hash, hash_reference::HashReference, name::Name,
namespace::Namespace, reference::Reference, revision::Revision, version::Version,
};
use metadata_db::MetadataDb;

Expand Down Expand Up @@ -63,14 +63,15 @@ impl DatasetsRegistry {
pub async fn register_manifest(
&self,
hash: &Hash,
kind: &DatasetKindStr,
content: String,
) -> Result<(), RegisterManifestError> {
let path = self
.store
.store(hash, content)
.await
.map_err(RegisterManifestError::ManifestStorage)?;
metadata_db::manifests::register(&self.metadata_db, hash, path)
metadata_db::manifests::register(&self.metadata_db, hash, kind, path)
.await
.map_err(RegisterManifestError::MetadataRegistration)?;
Ok(())
Expand Down
4 changes: 3 additions & 1 deletion crates/core/datasets-static/src/dataset_kind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ impl StaticDatasetKind {

impl From<StaticDatasetKind> for DatasetKindStr {
fn from(value: StaticDatasetKind) -> Self {
DatasetKindStr::new(value.to_string())
// SAFETY: StaticDatasetKind is a strongly-typed ZST whose Display impl produces
// a valid dataset kind string.
DatasetKindStr::new_unchecked(value.to_string())
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- =============================================================
-- Migration: Add kind column to manifest_files
-- =============================================================
-- Adds a TEXT kind column to manifest_files to store the dataset
-- kind (e.g., evm-rpc, derived, static) for each manifest file.
-- Defaults to 'unknown' for existing rows.
-- =============================================================

ALTER TABLE manifest_files ADD COLUMN kind TEXT NOT NULL DEFAULT 'unknown';
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the first of 3 PRs that will require either metadata DB fiddling or redeploying the manifests to upgrade to the latest version

12 changes: 8 additions & 4 deletions crates/core/metadata-db/src/manifests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,41 @@
//!
//! This module provides operations for managing manifest files:
//! - **manifest_files**: Content-addressable manifest storage indexed by SHA256 hash
//! - **Type definitions**: ManifestHash, ManifestPath
//! - **Type definitions**: ManifestHash, ManifestPath, ManifestKind
//!
//! ## Database Tables
//!
//! - **manifest_files**: Content-addressable manifest storage with hash → path mapping
//! - **manifest_files**: Content-addressable manifest storage with hash → path → kind mapping

mod hash;
mod kind;
mod path;
pub(crate) mod sql;

pub use self::{
hash::{Hash as ManifestHash, HashOwned as ManifestHashOwned},
kind::{Kind as ManifestKind, KindOwned as ManifestKindOwned},
path::{Path as ManifestPath, PathOwned as ManifestPathOwned},
sql::ManifestSummary,
};
use crate::{db::Executor, error::Error};

/// Register manifest file in content-addressable storage
///
/// Inserts manifest hash and path into `manifest_files` table with ON
/// Inserts manifest hash, path, and kind into `manifest_files` table with ON
/// CONFLICT DO NOTHING. This operation is idempotent - duplicate
/// registrations are silently ignored.
#[tracing::instrument(skip(exe), err)]
pub async fn register<'c, E>(
exe: E,
hash: impl Into<ManifestHash<'_>> + std::fmt::Debug,
kind: impl Into<ManifestKind<'_>> + std::fmt::Debug,
path: impl Into<ManifestPath<'_>> + std::fmt::Debug,
) -> Result<(), Error>
where
E: Executor<'c>,
{
sql::insert(exe, hash.into(), path.into())
sql::insert(exe, hash.into(), kind.into(), path.into())
.await
.map_err(Error::Database)
}
Expand Down Expand Up @@ -74,6 +77,7 @@ where
///
/// Queries for all manifests in the `manifest_files` table, returning:
/// - Hash (content-addressable identifier)
/// - Kind (dataset kind, e.g., "evm-rpc", "solana", "firehose", "manifest")
/// - Dataset count (number of datasets using this manifest)
///
/// Results are ordered by hash.
Expand Down
Loading
Loading