Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions src/compute/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ mz-dyncfgs = { path = "../dyncfgs" }
mz-expr = { path = "../expr" }
mz-metrics = { path = "../metrics" }
mz-ore = { path = "../ore", features = ["async", "process", "tracing", "columnar", "differential-dataflow", "region"] }
mz-proto = { path = "../proto" }
mz-persist-client = { path = "../persist-client" }
mz-persist-types = { path = "../persist-types" }
mz-repr = { path = "../repr" }
mz-storage-operators = { path = "../storage-operators" }
mz-storage-types = { path = "../storage-types" }
mz-timely-util = { path = "../timely-util" }
mz-txn-wal = { path = "../txn-wal" }
prost = "0.14.3"
prometheus = { version = "0.14.0", default-features = false }
scopeguard = "1.2.0"
serde = { version = "1.0.219", features = ["derive"] }
Expand All @@ -49,5 +51,8 @@ uuid = { version = "1.19.0", features = ["serde", "v4"] }
[target.'cfg(not(target_os = "macos"))'.dependencies]
core_affinity = "0.8.3"

[dev-dependencies]
proptest = { version = "1.10.0", default-features = false, features = ["std"] }

[features]
default = []
4 changes: 2 additions & 2 deletions src/compute/src/logging/initialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ use mz_dyncfg::ConfigSet;
use mz_ore::metrics::MetricsRegistry;
use mz_repr::{Diff, Timestamp};
use mz_storage_operators::persist_source::Subtime;
use mz_storage_types::errors::DataflowError;
use mz_timely_util::columnar::Column;
use mz_timely_util::columnar::builder::ColumnBuilder;
use mz_timely_util::operator::CollectionExt;
Expand All @@ -37,6 +36,7 @@ use crate::arrangement::manager::TraceBundle;
use crate::extensions::arrange::{KeyCollection, MzArrange};
use crate::logging::compute::{ComputeEvent, ComputeEventBuilder};
use crate::logging::{BatchLogger, EventQueue, SharedLoggingState};
use crate::render::errors::DataflowErrorSer;
use crate::typedefs::{ErrBatcher, ErrBuilder};

/// Initialize logging dataflows.
Expand Down Expand Up @@ -184,7 +184,7 @@ impl<A: Allocate + 'static> LoggingContext<'_, A> {
collections.extend(prometheus_collections);

let errs = scope.scoped("logging errors", |scope| {
let collection: KeyCollection<_, DataflowError, Diff> =
let collection: KeyCollection<_, DataflowErrorSer, Diff> =
VecCollection::empty(scope).into();
collection
.mz_arrange::<ErrBatcher<_, _>, ErrBuilder<_, _>, _>("Arrange logging err")
Expand Down
41 changes: 21 additions & 20 deletions src/compute/src/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ use mz_repr::explain::DummyHumanizer;
use mz_repr::{Datum, DatumVec, Diff, GlobalId, ReprRelationType, Row, SharedRow};
use mz_storage_operators::persist_source;
use mz_storage_types::controller::CollectionMetadata;
use mz_storage_types::errors::DataflowError;
use mz_timely_util::operator::{CollectionExt, StreamExt};
use mz_timely_util::probe::{Handle as MzProbeHandle, ProbeNotify};
use mz_timely_util::scope_label::ScopeExt;
Expand Down Expand Up @@ -163,12 +162,13 @@ use crate::logging::compute::{
};
use crate::render::context::{ArrangementFlavor, Context};
use crate::render::continual_task::ContinualTaskCtx;
use crate::render::errors::DataflowErrorSer;
use crate::row_spine::{DatumSeq, RowRowBatcher, RowRowBuilder};
use crate::typedefs::{ErrBatcher, ErrBuilder, ErrSpine, KeyBatcher, MzTimestamp};

pub mod context;
pub(crate) mod continual_task;
mod errors;
pub(crate) mod errors;
mod flat_map;
mod join;
mod reduce;
Expand Down Expand Up @@ -282,21 +282,22 @@ pub fn build_compute_dataflow<A: Allocate>(

// Note: For correctness, we require that sources only emit times advanced by
// `dataflow.as_of`. `persist_source` is documented to provide this guarantee.
let (mut ok_stream, err_stream, token) = persist_source::persist_source(
inner,
*source_id,
Arc::clone(&compute_state.persist_clients),
&compute_state.txns_ctx,
import.desc.storage_metadata.clone(),
read_schema,
dataflow.as_of.clone(),
snapshot_mode,
until.clone(),
mfp.as_mut(),
compute_state.dataflow_max_inflight_bytes(),
start_signal.clone(),
ErrorHandler::Halt("compute_import"),
);
let (mut ok_stream, err_stream, token) =
persist_source::persist_source::<_, DataflowErrorSer>(
inner,
*source_id,
Arc::clone(&compute_state.persist_clients),
&compute_state.txns_ctx,
import.desc.storage_metadata.clone(),
read_schema,
dataflow.as_of.clone(),
snapshot_mode,
until.clone(),
mfp.as_mut(),
compute_state.dataflow_max_inflight_bytes(),
start_signal.clone(),
ErrorHandler::Halt("compute_import"),
);

// If `mfp` is non-identity, we need to apply what remains.
// For the moment, assert that it is either trivial or `None`.
Expand Down Expand Up @@ -967,9 +968,9 @@ where
oks = VecCollection::new(in_limit);
if !limit.return_at_limit {
err = err.concat(VecCollection::new(over_limit).map(move |_data| {
DataflowError::EvalError(Box::new(EvalError::LetRecLimitExceeded(
DataflowErrorSer::from(EvalError::LetRecLimitExceeded(
format!("{}", limit.max_iters.get()).into(),
)))
))
}));
}
}
Expand Down Expand Up @@ -1202,7 +1203,7 @@ where
.into_iter()
.map(move |e| {
(
DataflowError::from(e),
DataflowErrorSer::from(e),
<G::Timestamp as Refines<mz_repr::Timestamp>>::to_inner(error_time),
Diff::ONE,
)
Expand Down
25 changes: 15 additions & 10 deletions src/compute/src/render/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ use mz_ore::soft_assert_or_log;
use mz_repr::fixed_length::ToDatumIter;
use mz_repr::{DatumVec, DatumVecBorrow, Diff, GlobalId, Row, RowArena, SharedRow};
use mz_storage_types::controller::CollectionMetadata;
use mz_storage_types::errors::DataflowError;
use mz_timely_util::columnar::builder::ColumnBuilder;
use mz_timely_util::columnar::{Col2ValBatcher, columnar_exchange};
use mz_timely_util::operator::CollectionExt;
Expand All @@ -44,7 +43,7 @@ use timely::progress::{Antichain, Timestamp};

use crate::compute_state::ComputeState;
use crate::extensions::arrange::{KeyCollection, MzArrange, MzArrangeCore};
use crate::render::errors::ErrorLogger;
use crate::render::errors::{DataflowErrorSer, ErrorLogger};
use crate::render::{LinearJoinSpec, RenderTimestamp};
use crate::row_spine::{DatumSeq, RowRowBuilder};
use crate::typedefs::{
Expand Down Expand Up @@ -267,7 +266,7 @@ where
&self,
) -> (
VecCollection<S, Row, Diff>,
VecCollection<S, DataflowError, Diff>,
VecCollection<S, DataflowErrorSer, Diff>,
) {
let mut datums = DatumVec::new();
let logic = move |k: DatumSeq, v: DatumSeq| {
Expand Down Expand Up @@ -305,7 +304,10 @@ where
key: Option<&Row>,
max_demand: usize,
mut logic: L,
) -> (StreamVec<S, I::Item>, VecCollection<S, DataflowError, Diff>)
) -> (
StreamVec<S, I::Item>,
VecCollection<S, DataflowErrorSer, Diff>,
)
where
I: IntoIterator<Item = (D, S::Timestamp, Diff)>,
D: Data,
Expand Down Expand Up @@ -402,7 +404,7 @@ where
{
pub collection: Option<(
VecCollection<S, Row, Diff>,
VecCollection<S, DataflowError, Diff>,
VecCollection<S, DataflowErrorSer, Diff>,
)>,
pub arranged: BTreeMap<Vec<MirScalarExpr>, ArrangementFlavor<S, T>>,
}
Expand All @@ -415,7 +417,7 @@ where
/// Construct a new collection bundle from update streams.
pub fn from_collections(
oks: VecCollection<S, Row, Diff>,
errs: VecCollection<S, DataflowError, Diff>,
errs: VecCollection<S, DataflowErrorSer, Diff>,
) -> Self {
Self {
collection: Some((oks, errs)),
Expand Down Expand Up @@ -526,7 +528,7 @@ where
config_set: &ConfigSet,
) -> (
VecCollection<S, Row, Diff>,
VecCollection<S, DataflowError, Diff>,
VecCollection<S, DataflowErrorSer, Diff>,
) {
// Any operator that uses this method was told to use a particular
// collection during LIR planning, where we should have made
Expand Down Expand Up @@ -576,7 +578,10 @@ where
key_val: Option<(Vec<MirScalarExpr>, Option<Row>)>,
max_demand: usize,
mut logic: L,
) -> (StreamVec<S, I::Item>, VecCollection<S, DataflowError, Diff>)
) -> (
StreamVec<S, I::Item>,
VecCollection<S, DataflowErrorSer, Diff>,
)
where
I: IntoIterator<Item = (D, S::Timestamp, Diff)>,
D: Data,
Expand Down Expand Up @@ -713,7 +718,7 @@ where
config_set: &ConfigSet,
) -> (
VecCollection<S, mz_repr::Row, Diff>,
VecCollection<S, DataflowError, Diff>,
VecCollection<S, DataflowErrorSer, Diff>,
) {
mfp.optimize();
let mfp_plan = mfp.clone().into_plan().unwrap();
Expand Down Expand Up @@ -868,7 +873,7 @@ where
thinning: Vec<usize>,
) -> (
Arranged<S, RowRowAgent<S::Timestamp, Diff>>,
VecCollection<S, DataflowError, Diff>,
VecCollection<S, DataflowErrorSer, Diff>,
VecCollection<S, Row, Diff>,
) {
// This operator implements a `map_fallible`, but produces columnar updates for the ok
Expand Down
10 changes: 5 additions & 5 deletions src/compute/src/render/continual_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ use mz_persist_types::codec_impls::UnitSchema;
use mz_repr::{Diff, GlobalId, Row, Timestamp};
use mz_storage_types::StorageDiff;
use mz_storage_types::controller::CollectionMetadata;
use mz_storage_types::errors::DataflowError;
use mz_storage_types::sources::SourceData;
use mz_timely_util::builder_async::{Button, Event, OperatorBuilder as AsyncOperatorBuilder};
use mz_timely_util::operator::CollectionExt;
Expand All @@ -188,6 +187,7 @@ use tracing::debug;

use crate::compute_state::ComputeState;
use crate::render::StartSignal;
use crate::render::errors::DataflowErrorSer;
use crate::render::sinks::SinkRender;
use crate::sink::ConsolidatingVec;

Expand Down Expand Up @@ -270,10 +270,10 @@ impl ContinualTaskSourceTransformer {
pub fn transform<S: Scope<Timestamp = Timestamp>>(
&self,
oks: VecCollection<S, Row, Diff>,
errs: VecCollection<S, DataflowError, Diff>,
errs: VecCollection<S, DataflowErrorSer, Diff>,
) -> (
VecCollection<S, Row, Diff>,
VecCollection<S, DataflowError, Diff>,
VecCollection<S, DataflowErrorSer, Diff>,
VecCollection<S, (), Diff>,
) {
use ContinualTaskSourceTransformer::*;
Expand Down Expand Up @@ -435,15 +435,15 @@ where
as_of: Antichain<Timestamp>,
start_signal: StartSignal,
oks: VecCollection<G, Row, Diff>,
errs: VecCollection<G, DataflowError, Diff>,
errs: VecCollection<G, DataflowErrorSer, Diff>,
append_times: Option<VecCollection<G, (), Diff>>,
flow_control_probe: &probe::Handle<Timestamp>,
) -> Option<Rc<dyn Any>> {
let name = sink_id.to_string();

let to_append = oks
.map(|x| SourceData(Ok(x)))
.concat(errs.map(|x| SourceData(Err(x))));
.concat(errs.map(|x| SourceData(Err(x.deserialize()))));
let append_times = append_times.expect("should be provided by ContinualTaskCtx");

let write_handle = {
Expand Down
Loading
Loading