diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d1b566..6b2245c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### New features * New `DataFrame.Typed.TH.deriveSchemaFromType` Template Haskell splice generates a typed schema synonym and a `HasSchema` instance from a Haskell record ADT. Pair with `DataFrame.fromRecords` / `DataFrame.toRecords` (or `DataFrame.Typed.fromRecordsTyped` / `toRecordsTyped`) to convert between `[Order]` and `DataFrame`/`TypedDataFrame OrderSchema`. Field names are translated `camelCase → snake_case` by default; the transform is configurable via `SchemaOptions`. * New `DataFrame.Typed.Generic` exposes `SchemaOf`/`SchemaOfRaw` plus `genericToColumns` / `genericFromColumns`, so users who prefer `GHC.Generics` over a TH splice can derive the same schema and row bridge. -* New `DataFrame.Internal.Schema.deriveSchema` Template Haskell splice generates, from a record ADT, both a runtime `Schema` value (`orderSchema :: Schema`, suitable for `readCsvWithSchema` / `readCsvWithOpts`) and one `Expr` accessor per field (`orderCustomerId :: Expr Int`, etc.), so expression-DSL code can refer to columns by typed name without writing `col @T "snake_case_name"` at each call site. Re-exported from `DataFrame`. +* New `DataFrame.Internal.Schema.deriveSchema` Template Haskell splice generates, from a record ADT, both a runtime `Schema` value (`orderSchema :: Schema`, suitable for `readCsvWithSchema` / `readCsvWithOpts`) and one `Expr` accessor per field (`orderCustomerId :: Expr Int`, etc.), so expression-DSL code can refer to columns by typed name without writing `col T "snake_case_name"` at each call site. Re-exported from `DataFrame`. ### Refactor * The untyped Template Haskell splices (`declareColumns`, `declareColumnsFromCsvFile`, `declareColumnsFromCsvWithOpts`, `declareColumnsFromParquetFile`, `declareColumnsWithPrefix`, `declareColumnsWithPrefix'`) have moved from `DataFrame.Functions` to a new `DataFrame.TH` module (re-exported from `DataFrame`). Update imports accordingly; the bundled `dataframe.ghci` already points to the new module. @@ -413,7 +413,7 @@ ```haskell ghci> df |> D.innerJoin ["key_1", "key_2"] other ``` -* Aggregations are now expressions allowing for more expressive aggregation logic. Previously: `D.aggregate [("quantity", D.Mean), ("price", D.Sum)] df` now ``D.aggregate [(F.sum (F.col @Double "label") / (F.count (F.col @Double "label")) `F.as` "positive_rate")]`` +* Aggregations are now expressions allowing for more expressive aggregation logic. Previously: `D.aggregate [("quantity", D.Mean), ("price", D.Sum)] df` now ``D.aggregate [(F.sum (F.col Double "label") / (F.count (F.col Double "label")) `F.as` "positive_rate")]`` * In GHCI, you can now create type-safe bindings for each column and use those in expressions. ```haskell @@ -451,7 +451,7 @@ let withTotalPrice = D.deriveFrom (["quantity", "item_price"], D.func multiply) Now, we have a column expression syntax that mirrors Pyspark and Polars. ```haskell -let withTotalPrice = D.derive "total_price" (D.lift fromIntegral (D.col @Int "quantity") * (D.col @Double"item_price")) df +let withTotalPrice = D.derive "total_price" (D.lift fromIntegral (D.col Int "quantity") * (D.col Double"item_price")) df ``` ### Adds a coverage report to the repository (thanks to @oforero) diff --git a/README.md b/README.md index 0a6010b..f44c6c1 100644 --- a/README.md +++ b/README.md @@ -101,8 +101,8 @@ sales = D.fromNamedColumns -- Group by product and compute totals sales |> D.groupBy ["product"] - |> D.aggregate [ F.sum (F.col @Int "amount") `as` "total" - , F.count (F.col @Int "amount") `as` "orders" + |> D.aggregate [ F.sum (F.col Int "amount") `as` "total" + , F.count (F.col Int "amount") `as` "orders" ] |> D.toMarkdown' ``` @@ -166,7 +166,7 @@ D.describeColumns df |> D.toMarkdown' > | longitude | 20640 | 0 | Double | -The `:declareColumns` macro (`$(D.declareColumns df)` outside the REPL) generates typed column references from a dataframe, so you can use column names directly in expressions instead of writing `F.col @Double "median_income"` every time: +The `:declareColumns` macro (`$(D.declareColumns df)` outside the REPL) generates typed column references from a dataframe, so you can use column names directly in expressions instead of writing `F.col Double "median_income"` every time: ```haskell @@ -265,8 +265,8 @@ Compare this to the manual version which requires spelling out every column name ```haskell -- Without TH — every column needs its name and type spelled out df |> D.derive "rooms_per_household" - (F.col @Double "total_rooms" / F.col @Double "households") - |> D.filterWhere (F.col @Double "median_income" .>. F.lit 5) + (F.col Double "total_rooms" / F.col Double "households") + |> D.filterWhere (F.col Double "median_income" .>. F.lit 5) |> D.take 5 |> D.toMarkdown' ``` @@ -378,7 +378,7 @@ typed-dataframe machinery), there's a companion splice in $(D.deriveSchema ''Order) -- emits: -- orderSchema :: Schema --- orderSchema = makeSchema [("order_id", schemaType @Int64), ...] +-- orderSchema = makeSchema [("order_id", schemaType Int64), ...] -- orderOrderId :: Expr Int64 -- orderOrderId = col "order_id" -- orderRegion :: Expr Text @@ -441,8 +441,8 @@ employees <- D.readCsv "./data/employees.csv" case DT.freeze @EmployeeSchema employees of Nothing -> "Schema mismatch!" Just tdf -> tdf - |> DT.derive @"bonus" (DT.col @"salary" * DT.lit 0.1) - |> DT.filterWhere (DT.col @"salary" DT..>. DT.lit 50000) + |> DT.derive "bonus" (DT.col "salary" * DT.lit 0.1) + |> DT.filterWhere (DT.col "salary" DT..>. DT.lit 50000) |> DT.select @'["name", "bonus"] |> DT.thaw |> D.toMarkdown' @@ -462,11 +462,11 @@ case DT.freeze @EmployeeSchema employees of ```text -- Typo in column name -> compile error -tdf |> DT.filterWhere (DT.col @"slary" DT..>. DT.lit 50000) +tdf |> DT.filterWhere (DT.col "slary" DT..>. DT.lit 50000) -- error: Column "slary" not found in schema -- Wrong type -> compile error -tdf |> DT.filterWhere (DT.col @"name" DT..>. DT.lit 50000) +tdf |> DT.filterWhere (DT.col "name" DT..>. DT.lit 50000) -- error: Couldn't match type 'Text' with 'Double' ``` @@ -486,7 +486,7 @@ Just stdf = DT.freeze @ScoreSchema scoresDf -- filterAllJust drops the null row and changes the column type from -- (Maybe Double) to Double, so `scaled` can multiply it directly. -DT.thaw (DT.filterAllJust stdf |> DT.derive @"scaled" (DT.col @"score" * DT.lit 100)) |> D.toMarkdown' +DT.thaw (DT.filterAllJust stdf |> DT.derive "scaled" (DT.col "score" * DT.lit 100)) |> D.toMarkdown' ``` > @@ -502,7 +502,7 @@ DT.thaw (DT.filterAllJust stdf |> DT.derive @"scaled" (DT.col @"score" * DT.lit **Operations**: filter, select, derive, groupBy, aggregate, joins (inner, left, right, full outer), sort, sample, stratified sample, distinct, k-fold splits. -**Expressions**: typed column references (`F.col @Double "x"`), arithmetic, comparisons, logical operators, nullable-aware three-valued logic (`.==`, `.&&`), string matching (`like`, `regex`), casting, and user-defined functions via `lift`/`lift2`. +**Expressions**: typed column references (`F.col Double "x"`), arithmetic, comparisons, logical operators, nullable-aware three-valued logic (`.==`, `.&&`), string matching (`like`, `regex`), casting, and user-defined functions via `lift`/`lift2`. **Statistics**: mean, median, mode, variance, standard deviation, percentiles, inter-quartile range, correlation, skewness, frequency tables, imputation. @@ -526,23 +526,23 @@ import qualified DataFrame.Lazy as L import DataFrame.Internal.Schema (schemaType, makeSchema) housingSchema = makeSchema - [ ("longitude", schemaType @Double) - , ("latitude", schemaType @Double) - , ("housing_median_age", schemaType @Double) - , ("total_rooms", schemaType @Double) - , ("total_bedrooms", schemaType @(Maybe Double)) - , ("population", schemaType @Double) - , ("households", schemaType @Double) - , ("median_income", schemaType @Double) - , ("median_house_value", schemaType @Double) - , ("ocean_proximity", schemaType @Text) + [ ("longitude", schemaType Double) + , ("latitude", schemaType Double) + , ("housing_median_age", schemaType Double) + , ("total_rooms", schemaType Double) + , ("total_bedrooms", schemaType (Maybe Double)) + , ("population", schemaType Double) + , ("households", schemaType Double) + , ("median_income", schemaType Double) + , ("median_house_value", schemaType Double) + , ("ocean_proximity", schemaType Text) ] lazyResult <- L.runDataFrame $ L.scanCsv housingSchema "./data/housing.csv" - |> L.filter (F.col @Double "median_income" .>. F.lit 5) + |> L.filter (F.col Double "median_income" .>. F.lit 5) |> L.derive "value_per_income" - (F.col @Double "median_house_value" / F.col @Double "median_income") + (F.col Double "median_house_value" / F.col Double "median_income") |> L.select ["ocean_proximity", "median_house_value", "value_per_income"] |> L.take 1000 diff --git a/app/Benchmark.hs b/app/Benchmark.hs index f686738..cb9e155 100644 --- a/app/Benchmark.hs +++ b/app/Benchmark.hs @@ -23,14 +23,14 @@ main = do let generationTime = diffUTCTime endGeneration startGeneration putStrLn $ "Data generation Time: " ++ show generationTime startCalculation <- getCurrentTime - print $ D.mean (F.col @Double "0") df - print $ D.variance (F.col @Double "1") df + print $ D.mean (F.col Double "0") df + print $ D.variance (F.col Double "1") df print $ D.correlation "1" "2" df endCalculation <- getCurrentTime let calculationTime = diffUTCTime endCalculation startCalculation putStrLn $ "Calculation Time: " ++ show calculationTime startFilter <- getCurrentTime - print $ D.filter (F.col @Double "0") (> 0.971) df D.|> D.take 10 + print $ D.filter (F.col Double "0") (> 0.971) df D.|> D.take 10 endFilter <- getCurrentTime let filterTime = diffUTCTime endFilter startFilter putStrLn $ "Filter Time: " ++ show filterTime diff --git a/app/LazyBenchmark.hs b/app/LazyBenchmark.hs index 7b67e9a..cfc6bff 100644 --- a/app/LazyBenchmark.hs +++ b/app/LazyBenchmark.hs @@ -214,10 +214,10 @@ main = do let schema = Schema $ M.fromList - [ ("id", schemaType @Int) - , ("x", schemaType @Double) - , ("y", schemaType @Double) - , ("category", schemaType @T.Text) + [ ("id", schemaType Int) + , ("x", schemaType Double) + , ("y", schemaType Double) + , ("category", schemaType T.Text) ] -- Q1: Preview — limit 20, no filter. @@ -233,7 +233,7 @@ main = do runQuery "Q2 — filter (x > 0.999), limit 20" $ L.runDataFrame $ L.take 20 $ - L.filter (col @Double "x" .> lit (0.999 :: Double)) $ + L.filter (col Double "x" .> lit (0.999 :: Double)) $ L.scanCsv schema pathT -- Q3: Filter + derive + select + limit. @@ -243,8 +243,8 @@ main = do L.runDataFrame $ L.take 20 $ L.select ["id", "z"] $ - L.derive "z" (col @Double "x" * col @Double "y") $ - L.filter (col @Double "x" .> lit (0.999 :: Double)) $ + L.derive "z" (col Double "x" * col Double "y") $ + L.filter (col Double "x" .> lit (0.999 :: Double)) $ L.scanCsv schema pathT -- Q4: Filter fusion demo. @@ -254,8 +254,8 @@ main = do runQuery "Q4 — filter fusion: (x > 0.5) . (y > 0.5), limit 20" $ L.runDataFrame $ L.take 20 $ - L.filter (col @Double "y" .> lit (0.5 :: Double)) $ - L.filter (col @Double "x" .> lit (0.5 :: Double)) $ + L.filter (col Double "y" .> lit (0.5 :: Double)) $ + L.filter (col Double "x" .> lit (0.5 :: Double)) $ L.scanCsv schema pathT -- Q5: Full scan, heavy filter, count results. @@ -269,7 +269,7 @@ main = do ) $ L.runDataFrame $ L.select ["id", "x"] - $ L.filter (col @Double "x" .> lit (0.999 :: Double)) + $ L.filter (col Double "x" .> lit (0.999 :: Double)) $ L.scanCsv schema pathT putStrLn "\nDone." diff --git a/app/Synthesis.hs b/app/Synthesis.hs index b9361fb..b663439 100644 --- a/app/Synthesis.hs +++ b/app/Synthesis.hs @@ -31,7 +31,7 @@ type RawPredSchema = '[DT.Column "Survived" (Maybe Int), DT.Column "prediction" Int] prediction :: D.Expr Int -prediction = F.col @Int "prediction" +prediction = F.col "prediction" main :: IO () main = do @@ -39,9 +39,9 @@ main = do rawTest <- D.readCsv "./data/titanic/test.csv" train <- - maybe (fail "train.csv schema mismatch") pure (DT.freeze @TrainSchema rawTrain) + maybe (fail "train.csv schema mismatch") pure (DT.freeze TrainSchema rawTrain) test <- - maybe (fail "test.csv schema mismatch") pure (DT.freeze @TestSchema rawTest) + maybe (fail "test.csv schema mismatch") pure (DT.freeze TestSchema rawTest) let (trainDf, validDf) = D.randomSplit (mkStdGen 4232) 0.7 (DT.thaw (clean train)) @@ -66,7 +66,7 @@ main = do } } ) - (F.fromMaybe 0 (F.col @(Maybe Int) "Survived")) + (F.fromMaybe 0 (F.col (Maybe Int) "Survived")) (trainDf |> D.exclude ["PassengerId"]) print model @@ -95,7 +95,7 @@ clean :: DT.TypedDataFrame cols -> DT.TypedDataFrame ( DT.RenameManyInSchema - '[ '("Name", "title") + [ '("Name", "title") , '("Cabin", "cabin_prefix") , '("Pclass", "passenger_class") , '("SibSp", "number_of_siblings_and_spouses") @@ -105,15 +105,15 @@ clean :: ) clean tdf = tdf - |> DT.replaceColumn @"Ticket" (DT.nullLift (T.filter isAlpha) (DT.col @"Ticket")) - |> DT.replaceColumn @"Name" (DT.nullLift extractTitle (DT.col @"Name")) - |> DT.replaceColumn @"Cabin" (DT.nullLift (T.take 1) (DT.col @"Cabin")) + |> DT.replaceColumn "Ticket" (DT.nullLift (T.filter isAlpha) (DT.col "Ticket")) + |> DT.replaceColumn "Name" (DT.nullLift extractTitle (DT.col "Name")) + |> DT.replaceColumn "Cabin" (DT.nullLift (T.take 1) (DT.col "Cabin")) |> DT.renameMany - @'[ '("Name", "title") - , '("Cabin", "cabin_prefix") - , '("Pclass", "passenger_class") - , '("SibSp", "number_of_siblings_and_spouses") - , '("Parch", "number_of_parents_and_children") + [ ("Name", "title") + , ("Cabin", "cabin_prefix") + , ("Pclass", "passenger_class") + , ("SibSp", "number_of_siblings_and_spouses") + , ("Parch", "number_of_parents_and_children") ] -- | Extract title (e.g. "Mr", "Mrs") from a full Titanic passenger name. @@ -129,11 +129,11 @@ extractTitle fullName = computeAccuracy :: D.DataFrame -> Double computeAccuracy df = let tdf = - DT.impute @"Survived" 0 $ + DT.impute "Survived" 0 $ DT.unsafeFreeze @RawPredSchema $ df |> D.select ["Survived", "prediction"] - survived = DT.col @"Survived" - predCol = DT.col @"prediction" + survived = DT.col "Survived" + predCol = DT.col "prediction" count expr = fromIntegral (DT.nRows (DT.filterWhere expr tdf)) tp = count ((survived DT..==. DT.lit 1) DT..&&. (predCol DT..==. DT.lit 1)) tn = count ((survived DT..==. DT.lit 0) DT..&&. (predCol DT..==. DT.lit 0)) diff --git a/benchmark/Main.hs b/benchmark/Main.hs index 172e97d..a5b2f8b 100644 --- a/benchmark/Main.hs +++ b/benchmark/Main.hs @@ -59,9 +59,9 @@ groupByHaskell = do df |> D.groupBy ["ocean_proximity"] |> D.aggregate - [ F.minimum (F.col @Double "median_house_value") + [ F.minimum (F.col Double "median_house_value") `as` "minimum_median_house_value" - , F.maximum (F.col @Double "median_house_value") + , F.maximum (F.col Double "median_house_value") `as` "maximum_median_house_value" ] diff --git a/dataframe-arrow/ffi-export/DataFrame/FFI.hs b/dataframe-arrow/ffi-export/DataFrame/FFI.hs index b849c85..a036ad6 100644 --- a/dataframe-arrow/ffi-export/DataFrame/FFI.hs +++ b/dataframe-arrow/ffi-export/DataFrame/FFI.hs @@ -1,9 +1,9 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ForeignFunctionInterface #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -254,30 +254,30 @@ inferTargetType target df = dispatchType (columnTypeRep (unsafeGetColumn target fitTreeWithType :: T.Text -> TreeConfig -> T.Text -> DataFrame -> IO BS.ByteString fitTreeWithType ttag cfg target df = case ttag of - "int" -> fit @Int - "int8" -> fit @Int8 - "int16" -> fit @Int16 - "int32" -> fit @Int32 - "int64" -> fit @Int64 - "word" -> fit @Word - "word8" -> fit @Word8 - "word16" -> fit @Word16 - "word32" -> fit @Word32 - "word64" -> fit @Word64 - "integer" -> fit @Integer - "double" -> fit @Double - "float" -> fit @Float - "bool" -> fit @Bool - "char" -> fit @Char - "text" -> fit @T.Text - "string" -> fit @String + "int" -> fit Int + "int8" -> fit Int8 + "int16" -> fit Int16 + "int32" -> fit Int32 + "int64" -> fit Int64 + "word" -> fit Word + "word8" -> fit Word8 + "word16" -> fit Word16 + "word32" -> fit Word32 + "word64" -> fit Word64 + "integer" -> fit Integer + "double" -> fit Double + "float" -> fit Float + "bool" -> fit Bool + "char" -> fit Char + "text" -> fit T.Text + "string" -> fit String other -> ioError . userError $ "DataFrame.FFI.fitTreeWithType: unsupported target type tag: " ++ T.unpack other where - fit :: forall a. (Columnable a, Ord a) => IO BS.ByteString - fit = do + fit :: forall a -> (Columnable a, Ord a) => IO BS.ByteString + fit a = do let expr = fitDecisionTree @a cfg (Col @a target) df case encodeExprToBytes expr of Right bs -> return bs diff --git a/dataframe-core/src/DataFrame/Internal/Column.hs b/dataframe-core/src/DataFrame/Internal/Column.hs index b1aa539..363c36d 100644 --- a/dataframe-core/src/DataFrame/Internal/Column.hs +++ b/dataframe-core/src/DataFrame/Internal/Column.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DataKinds #-} @@ -11,8 +10,11 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeAbstractions #-} +{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} @@ -158,7 +160,7 @@ Always attaches a bitmap so the column is recognized as nullable even when no 'Nothing' values are present (preserves the Maybe type marker). -} fromMaybeVec :: forall a. (Columnable a) => VB.Vector (Maybe a) -> Column -fromMaybeVec v = case sUnbox @a of +fromMaybeVec v = case sUnbox a of STrue -> fromMaybeVecUnboxed v SFalse -> let n = VB.length v @@ -233,7 +235,7 @@ allMissing _ = False -- | Checks if a column contains numeric values. isNumeric :: Column -> Bool -isNumeric (UnboxedColumn _ (_vec :: VU.Vector a)) = case sNumeric @a of +isNumeric (UnboxedColumn _ (_vec :: VU.Vector a)) = case sNumeric a of STrue -> True _ -> False isNumeric (BoxedColumn _ (_vec :: VB.Vector a)) = case testEquality (typeRep @a) (typeRep @Integer) of @@ -244,8 +246,8 @@ isNumeric (BoxedColumn _ (_vec :: VB.Vector a)) = case testEquality (typeRep @a) For nullable columns (@BoxedColumn (Just _)@ or @UnboxedColumn (Just _)@), also returns @True@ when @a = Maybe b@ and the column stores @b@ internally. -} -hasElemType :: forall a. (Columnable a) => Column -> Bool -hasElemType = \case +hasElemType :: forall a -> (Columnable a) => Column -> Bool +hasElemType a = \case BoxedColumn bm (_column :: VB.Vector b) -> checkBoxed bm (typeRep @b) UnboxedColumn bm (_column :: VU.Vector b) -> checkUnboxed bm (typeRep @b) where @@ -277,13 +279,13 @@ of a column. -} columnTypeString :: Column -> String columnTypeString column = case column of - BoxedColumn Nothing (_ :: VB.Vector a) -> show (typeRep @a) - BoxedColumn (Just _) (_ :: VB.Vector a) -> showMaybeType @a - UnboxedColumn Nothing (_ :: VU.Vector a) -> show (typeRep @a) - UnboxedColumn (Just _) (_ :: VU.Vector a) -> showMaybeType @a + BoxedColumn @a Nothing _ -> show (typeRep @a) + BoxedColumn @a (Just _) _ -> showMaybeType a + UnboxedColumn @a Nothing _ -> show (typeRep @a) + UnboxedColumn @a (Just _) _ -> showMaybeType a where - showMaybeType :: forall a. (Typeable a) => String - showMaybeType = + showMaybeType :: forall a -> (Typeable a) => String + showMaybeType a = let s = show (typeRep @a) in "Maybe " ++ if ' ' `elem` s then "(" ++ s ++ ")" else s @@ -370,7 +372,7 @@ instance Eq Column where Given each Rep we tell the `toColumnRep` function which Column type to pick. -} class ColumnifyRep (r :: Rep) a where - toColumnRep :: VB.Vector a -> Column + toColumnRep :: forall r' -> (r ~ r') => VB.Vector a -> Column -- | Constraint synonym for what we can put into columns. type Columnable a = @@ -389,22 +391,19 @@ instance (Columnable a, VU.Unbox a) => ColumnifyRep 'RUnboxed a where - toColumnRep :: (Columnable a, VUM.Unbox a) => VB.Vector a -> Column - toColumnRep v = UnboxedColumn Nothing (VU.convert v) + toColumnRep _ v = UnboxedColumn Nothing (VU.convert v) instance (Columnable a) => ColumnifyRep 'RBoxed a where - toColumnRep :: (Columnable a) => VB.Vector a -> Column - toColumnRep = BoxedColumn Nothing + toColumnRep _ = BoxedColumn Nothing instance (Columnable a) => ColumnifyRep 'RNullableBoxed (Maybe a) where - toColumnRep :: (Columnable a) => VB.Vector (Maybe a) -> Column - toColumnRep = fromMaybeVec + toColumnRep _ = fromMaybeVec {- | O(n) Convert a vector to a column. Automatically picks the best representation of a vector to store the underlying data in. @@ -420,7 +419,7 @@ fromVector :: forall a. (Columnable a, ColumnifyRep (KindOf a) a) => VB.Vector a -> Column -fromVector = toColumnRep @(KindOf a) +fromVector = toColumnRep (KindOf a) {- | O(n) Convert an unboxed vector to a column. This avoids the extra conversion if you already have the data in an unboxed vector. @@ -449,7 +448,7 @@ fromList :: forall a. (Columnable a, ColumnifyRep (KindOf a) a) => [a] -> Column -fromList = toColumnRep @(KindOf a) . VB.fromList +fromList = toColumnRep (KindOf a) . VB.fromList {- | O(n) Create a column of random elements within a range. @@ -477,9 +476,9 @@ mkRandom pureGen k lo hi = fromList $ go pureGen k -- An internal helper for type errors throwTypeMismatch :: - forall (a :: Type) (b :: Type). + forall (a :: Type) (b :: Type) -> (Typeable a, Typeable b) => Either DataFrameException Column -throwTypeMismatch = +throwTypeMismatch a b = Left $ TypeMismatchException MkTypeErrorContext @@ -508,7 +507,7 @@ mapColumn f = \case let !n = VB.length col in -- Build result directly without intermediate Maybe vector to avoid -- fusion forcing null slots via VU.convert. - Right $ case sUnbox @c of + Right $ case sUnbox c of STrue -> UnboxedColumn Nothing $ VU.generate n $ \i -> f @@ -526,10 +525,10 @@ mapColumn f = \case Nothing -> case testEquality (typeRep @a) (typeRep @b) of Just Refl -> -- user maps over inner type a; preserve bitmap - Right $ case sUnbox @c of + Right $ case sUnbox c of STrue -> UnboxedColumn bm (VU.generate (VB.length col) (f . VB.unsafeIndex col)) SFalse -> BoxedColumn bm (VB.map f col) - Nothing -> throwTypeMismatch @a @b + Nothing -> throwTypeMismatch a b runUnboxed :: forall a. @@ -538,7 +537,7 @@ mapColumn f = \case runUnboxed bm col = case testEquality (typeRep @b) (typeRep @(Maybe a)) of Just Refl -> let !n = VU.length col - in Right $ case sUnbox @c of + in Right $ case sUnbox c of STrue -> UnboxedColumn Nothing $ VU.generate n $ \i -> f @@ -554,10 +553,10 @@ mapColumn f = \case else Nothing ) Nothing -> case testEquality (typeRep @a) (typeRep @b) of - Just Refl -> Right $ case sUnbox @c of + Just Refl -> Right $ case sUnbox c of STrue -> UnboxedColumn bm (VU.map f col) SFalse -> BoxedColumn bm (VB.generate (VU.length col) (f . VU.unsafeIndex col)) - Nothing -> throwTypeMismatch @a @b + Nothing -> throwTypeMismatch a b {-# INLINEABLE mapColumn #-} -- | Applies a function that returns an unboxed result to an unboxed vector, storing the result in a column. @@ -574,23 +573,23 @@ imapColumn f = \case (Columnable a) => Maybe Bitmap -> VB.Vector a -> Either DataFrameException Column runBoxed bm col = case testEquality (typeRep @a) (typeRep @b) of - Just Refl -> Right $ case sUnbox @c of + Just Refl -> Right $ case sUnbox c of STrue -> UnboxedColumn bm (VU.generate (VB.length col) (\i -> f i (VB.unsafeIndex col i))) SFalse -> BoxedColumn bm (VB.imap f col) - Nothing -> throwTypeMismatch @a @b + Nothing -> throwTypeMismatch a b runUnboxed :: forall a. (Columnable a, VU.Unbox a) => Maybe Bitmap -> VU.Vector a -> Either DataFrameException Column runUnboxed bm col = case testEquality (typeRep @a) (typeRep @b) of - Just Refl -> Right $ case sUnbox @c of + Just Refl -> Right $ case sUnbox c of STrue -> UnboxedColumn bm (VU.imap f col) SFalse -> BoxedColumn bm (VB.imap f (VG.convert col)) - Nothing -> throwTypeMismatch @a @b + Nothing -> throwTypeMismatch a b -- | O(1) Gets the number of elements in the column. columnLength :: Column -> Int @@ -917,7 +916,7 @@ foldLinearGroups f seed col rowToGroup nGroups -- indirection per read/write) and returns UnboxedColumn directly — -- avoiding a round-trip through VB.Vector. runWith :: ((Int -> IO acc) -> (Int -> acc -> IO ()) -> IO ()) -> IO Column - runWith body = case sUnbox @acc of + runWith body = case sUnbox acc of STrue -> do accs <- VUM.replicate nGroups seed body (VUM.unsafeRead accs) (VUM.unsafeWrite accs) @@ -1027,7 +1026,7 @@ zipWithColumns f (UnboxedColumn bmL (column :: VU.Vector d)) (UnboxedColumn bmR -- Fast path: both plain unboxed, no bitmaps involved in the output type | isNothing bmL , isNothing bmR -> - pure $ case sUnbox @c of + pure $ case sUnbox c of STrue -> UnboxedColumn Nothing (VU.zipWith f column other) SFalse -> fromVector $ VB.zipWith f (VG.convert column) (VG.convert other) -- Type mismatch or bitmap involvement: fall through to general toVector path @@ -1468,7 +1467,7 @@ toDoubleVector column = Just Refl -> case bm of Nothing -> Right f Just bitmap -> Right $ VU.imap (\i x -> if bitmapTestBit bitmap i then x else read "NaN") f - Nothing -> case sFloating @a of + Nothing -> case sFloating a of STrue -> Right ( VU.imap @@ -1478,7 +1477,7 @@ toDoubleVector column = ) f ) - SFalse -> case sIntegral @a of + SFalse -> case sIntegral a of STrue -> Right ( VU.imap @@ -1550,7 +1549,7 @@ toFloatVector column = Just Refl -> case bm of Nothing -> Right f Just bitmap -> Right $ VU.imap (\i x -> if bitmapTestBit bitmap i then x else read "NaN") f - Nothing -> case sFloating @a of + Nothing -> case sFloating a of STrue -> Right ( VU.imap @@ -1560,7 +1559,7 @@ toFloatVector column = ) f ) - SFalse -> case sIntegral @a of + SFalse -> case sIntegral a of STrue -> Right ( VU.imap @@ -1631,9 +1630,9 @@ toIntVector column = case column of UnboxedColumn _ (f :: VU.Vector a) -> case testEquality (typeRep @a) (typeRep @Int) of Just Refl -> Right f - Nothing -> case sFloating @a of + Nothing -> case sFloating a of STrue -> Right (VU.map (round . (realToFrac :: a -> Double)) f) - SFalse -> case sIntegral @a of + SFalse -> case sIntegral a of STrue -> Right (VU.map fromIntegral f) SFalse -> Left $ diff --git a/dataframe-core/src/DataFrame/Internal/DataFrame.hs b/dataframe-core/src/DataFrame/Internal/DataFrame.hs index d46f720..a8aba08 100644 --- a/dataframe-core/src/DataFrame/Internal/DataFrame.hs +++ b/dataframe-core/src/DataFrame/Internal/DataFrame.hs @@ -1,10 +1,10 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE InstanceSigs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -173,14 +173,14 @@ asTextWith fmt mTrunc d = ) getType :: Column -> T.Text - showMaybeType :: forall a. (Typeable a) => String - showMaybeType = + showMaybeType :: forall a -> (Typeable a) => String + showMaybeType a = let s = show (typeRep @a) in "Maybe " <> if ' ' `elem` s then "(" <> s <> ")" else s getType (BoxedColumn Nothing (_ :: V.Vector a)) = T.pack $ show (typeRep @a) - getType (BoxedColumn (Just _) (_ :: V.Vector a)) = T.pack $ showMaybeType @a + getType (BoxedColumn (Just _) (_ :: V.Vector a)) = T.pack $ showMaybeType a getType (UnboxedColumn Nothing (_ :: VU.Vector a)) = T.pack $ show (typeRep @a) - getType (UnboxedColumn (Just _) (_ :: VU.Vector a)) = T.pack $ showMaybeType @a + getType (UnboxedColumn (Just _) (_ :: VU.Vector a)) = T.pack $ showMaybeType a -- Separate out cases dynamically so we don't end up making round trip -- string copies. diff --git a/dataframe-core/src/DataFrame/Internal/Expression.hs b/dataframe-core/src/DataFrame/Internal/Expression.hs index f15898d..955c481 100644 --- a/dataframe-core/src/DataFrame/Internal/Expression.hs +++ b/dataframe-core/src/DataFrame/Internal/Expression.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DisambiguateRecordFields #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} @@ -272,7 +271,7 @@ instance (Floating a, Columnable a) => Floating (Expr a) where instance (Show a) => Show (Expr a) where show :: Expr a -> String - show (Col name) = "(col @" ++ show (typeRep @a) ++ " " ++ show name ++ ")" + show (Col name) = "(col " ++ show (typeRep @a) ++ " " ++ show name ++ ")" show (CastWith name tag _) = "(castWith " ++ show tag ++ " " ++ show name ++ ")" show (CastExprWith tag _ inner) = "(castExprWith " ++ show tag ++ " " ++ show inner ++ ")" show (Lit value) = "(lit (" ++ show value ++ "))" diff --git a/dataframe-core/src/DataFrame/Internal/Grouping.hs b/dataframe-core/src/DataFrame/Internal/Grouping.hs index 3fcd9ba..c614894 100644 --- a/dataframe-core/src/DataFrame/Internal/Grouping.hs +++ b/dataframe-core/src/DataFrame/Internal/Grouping.hs @@ -86,11 +86,11 @@ groupBy names df case testEquality (typeRep @a) (typeRep @Double) of Just Refl -> hashUnboxed mh ubm mixDouble v Nothing -> - case sIntegral @a of + case sIntegral a of STrue -> hashUnboxed mh ubm (\h d -> mixInt h (fromIntegral @a @Int d)) v SFalse -> - case sFloating @a of + case sFloating a of STrue -> hashUnboxed mh ubm (\h d -> mixDouble h (realToFrac d :: Double)) v SFalse -> diff --git a/dataframe-core/src/DataFrame/Internal/Interpreter.hs b/dataframe-core/src/DataFrame/Internal/Interpreter.hs index 2e10f42..6bf07b6 100644 --- a/dataframe-core/src/DataFrame/Internal/Interpreter.hs +++ b/dataframe-core/src/DataFrame/Internal/Interpreter.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} @@ -9,6 +8,8 @@ {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeAbstractions #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE UndecidableInstances #-} {-# OPTIONS_GHC -Wno-orphans #-} @@ -341,7 +342,7 @@ materialize _ (Group _) = efficient representation. -} broadcastScalar :: forall a. (Columnable a) => Int -> a -> Column -broadcastScalar n v = case sUnbox @a of +broadcastScalar n v = case sUnbox a of STrue -> fromUnboxedVector (VU.replicate n v) SFalse -> fromVector (V.replicate n v) @@ -402,7 +403,7 @@ branchValue cond (Scalar l) r = branchValue cond l (Scalar r) = liftValue2 (\c lv -> if c then lv else r) cond l branchValue (Flat cc) (Flat lc) (Flat rc) = - Flat <$> branchColumn @a cc lc rc + Flat <$> branchColumn a cc lc rc branchValue (Group cgs) (Group lgs) (Group rgs) | V.length cgs == V.length lgs && V.length lgs == V.length rgs = @@ -410,7 +411,7 @@ branchValue (Group cgs) (Group lgs) (Group rgs) <$> V.generateM (V.length cgs) ( \i -> - branchColumn @a (cgs V.! i) (lgs V.! i) (rgs V.! i) + branchColumn a (cgs V.! i) (lgs V.! i) (rgs V.! i) ) branchValue _ _ _ = Left $ @@ -423,13 +424,13 @@ branchValue _ _ _ = columns, produce the element-wise selection. -} branchColumn :: - forall a. + forall a -> (Columnable a) => Column -> Column -> Column -> Either DataFrameException Column -branchColumn cc lc rc = do +branchColumn a cc lc rc = do cs <- toVector @Bool @V.Vector cc ls <- toVector @a @V.Vector lc rs <- toVector @a @V.Vector rc @@ -526,8 +527,8 @@ promoteColumnWith :: (Columnable a, Columnable b, Read a) => (Either String a -> b) -> Column -> Either DataFrameException Column promoteColumnWith onResult col - | hasElemType @b col = Right col - | hasElemType @a col = mapColumn @a (onResult . Right) col + | hasElemType b col = Right col + | hasElemType a col = mapColumn @a (onResult . Right) col | Just result <- tryMaybeWrap @a @b onResult col = result | otherwise = case testEquality (typeRep @a) (typeRep @Double) of @@ -546,19 +547,19 @@ promoteToDoubleWith :: (Either String Double -> b) -> Column -> Either DataFrameException Column promoteToDoubleWith onResult col = case col of UnboxedColumn Nothing (v :: VU.Vector c) -> - case sFloating @c of + case sFloating c of STrue -> Right $ fromVector @b (V.map (onResult . Right . (realToFrac :: c -> Double)) (VG.convert v)) - SFalse -> case sIntegral @c of + SFalse -> case sIntegral c of STrue -> Right $ fromVector @b (V.map (onResult . Right . (fromIntegral :: c -> Double)) (VG.convert v)) - SFalse -> castMismatch @c @b + SFalse -> castMismatch c b UnboxedColumn (Just bm) (v :: VU.Vector c) -> - case sFloating @c of + case sFloating c of STrue -> Right $ fromVector @b @@ -567,7 +568,7 @@ promoteToDoubleWith onResult col = case col of then onResult (Right (realToFrac (VU.unsafeIndex v i) :: Double)) else onResult (Left "null") ) - SFalse -> case sIntegral @c of + SFalse -> case sIntegral c of STrue -> Right $ fromVector @b @@ -576,7 +577,7 @@ promoteToDoubleWith onResult col = case col of then onResult (Right (fromIntegral (VU.unsafeIndex v i) :: Double)) else onResult (Left "null") ) - SFalse -> castMismatch @c @b + SFalse -> castMismatch c b BoxedColumn _ _ -> tryParseWith @Double onResult col promoteToFloatWith :: @@ -585,19 +586,19 @@ promoteToFloatWith :: (Either String Float -> b) -> Column -> Either DataFrameException Column promoteToFloatWith onResult col = case col of UnboxedColumn Nothing (v :: VU.Vector c) -> - case sFloating @c of + case sFloating c of STrue -> Right $ fromVector @b (V.map (onResult . Right . (realToFrac :: c -> Float)) (VG.convert v)) - SFalse -> case sIntegral @c of + SFalse -> case sIntegral c of STrue -> Right $ fromVector @b (V.map (onResult . Right . (fromIntegral :: c -> Float)) (VG.convert v)) - SFalse -> castMismatch @c @b + SFalse -> castMismatch c b UnboxedColumn (Just bm) (v :: VU.Vector c) -> - case sFloating @c of + case sFloating c of STrue -> Right $ fromVector @b @@ -606,7 +607,7 @@ promoteToFloatWith onResult col = case col of then onResult (Right (realToFrac (VU.unsafeIndex v i) :: Float)) else onResult (Left "null") ) - SFalse -> case sIntegral @c of + SFalse -> case sIntegral c of STrue -> Right $ fromVector @b @@ -615,7 +616,7 @@ promoteToFloatWith onResult col = case col of then onResult (Right (fromIntegral (VU.unsafeIndex v i) :: Float)) else onResult (Left "null") ) - SFalse -> castMismatch @c @b + SFalse -> castMismatch c b BoxedColumn _ _ -> tryParseWith @Float onResult col promoteToIntWith :: @@ -624,19 +625,19 @@ promoteToIntWith :: (Either String Int -> b) -> Column -> Either DataFrameException Column promoteToIntWith onResult col = case col of UnboxedColumn Nothing (v :: VU.Vector c) -> - case sFloating @c of + case sFloating c of STrue -> Right $ fromVector @b (V.map (onResult . Right . (round . (realToFrac :: c -> Double))) (VG.convert v)) - SFalse -> case sIntegral @c of + SFalse -> case sIntegral c of STrue -> Right $ fromVector @b (V.map (onResult . Right . (fromIntegral :: c -> Int)) (VG.convert v)) - SFalse -> castMismatch @c @b + SFalse -> castMismatch c b UnboxedColumn (Just bm) (v :: VU.Vector c) -> - case sFloating @c of + case sFloating c of STrue -> Right $ fromVector @b @@ -645,7 +646,7 @@ promoteToIntWith onResult col = case col of then onResult (Right (round (realToFrac (VU.unsafeIndex v i) :: Double))) else onResult (Left "null") ) - SFalse -> case sIntegral @c of + SFalse -> case sIntegral c of STrue -> Right $ fromVector @b @@ -654,7 +655,7 @@ promoteToIntWith onResult col = case col of then onResult (Right (fromIntegral (VU.unsafeIndex v i) :: Int)) else onResult (Left "null") ) - SFalse -> castMismatch @c @b + SFalse -> castMismatch c b BoxedColumn _ _ -> tryParseWith @Int onResult col -- | Single parse primitive: apply @onResult@ to the result of 'reads'. @@ -696,7 +697,7 @@ tryParseWith onResult col = case col of else onResult (Left "null") ) v - Nothing -> castMismatch @c @b + Nothing -> castMismatch c b UnboxedColumn bm (v :: VU.Vector c) -> case bm of Nothing -> Right $ fromVector @b $ V.map (parseWith onResult . show) (V.convert v) Just bitmap -> @@ -742,10 +743,10 @@ tryMaybeWrap _onResult col = case col of _ -> Nothing castMismatch :: - forall src tgt. + forall src tgt -> (Typeable src, Typeable tgt) => Either DataFrameException Column -castMismatch = +castMismatch src tgt = Left $ TypeMismatchException MkTypeErrorContext @@ -775,7 +776,7 @@ eval (FlatCtx df) (Col name) = Nothing -> Left $ ColumnsNotFoundException [name] "" (M.keys $ columnIndices df) Just c - | hasElemType @a c -> Right (Flat c) + | hasElemType a c -> Right (Flat c) | otherwise -> Left $ TypeMismatchException @@ -796,7 +797,7 @@ eval (GroupCtx gdf) (Col name) = "" (M.keys $ columnIndices $ fullDataframe gdf) Just c - | hasElemType @a c -> + | hasElemType a c -> Right (Group (sliceGroups c (offsets gdf) (valueIndices gdf))) | otherwise -> Left $ diff --git a/dataframe-core/src/DataFrame/Internal/Nullable.hs b/dataframe-core/src/DataFrame/Internal/Nullable.hs index d4af803..bae3cda 100644 --- a/dataframe-core/src/DataFrame/Internal/Nullable.hs +++ b/dataframe-core/src/DataFrame/Internal/Nullable.hs @@ -1,7 +1,7 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} @@ -364,38 +364,38 @@ When @a ~ b@ the coercions are identity; otherwise one operand is widened (e.g. 'Int' → 'Double'). -} class (Columnable (Promote a b)) => NumericWidenOp a b where - widen1 :: a -> Promote a b - widen2 :: b -> Promote a b + widen1 :: forall a' b' -> (a ~ a', b ~ b') => a -> Promote a b + widen2 :: forall a' b' -> (a ~ a', b ~ b') => b -> Promote a b -- | Same type: identity coercions. instance {-# OVERLAPPING #-} (Columnable a) => NumericWidenOp a a where - widen1 = id - widen2 = id + widen1 _ _ = id + widen2 _ _ = id -instance NumericWidenOp Int Double where widen1 = fromIntegral; widen2 = id +instance NumericWidenOp Int Double where widen1 _ _ = fromIntegral; widen2 _ _ = id instance NumericWidenOp Double Int where - widen1 = id - widen2 = fromIntegral -instance NumericWidenOp Float Double where widen1 = realToFrac; widen2 = id + widen1 _ _ = id + widen2 _ _ = fromIntegral +instance NumericWidenOp Float Double where widen1 _ _ = realToFrac; widen2 _ _ = id instance NumericWidenOp Double Float where - widen1 = id - widen2 = realToFrac -instance NumericWidenOp Int32 Float where widen1 = fromIntegral; widen2 = id + widen1 _ _ = id + widen2 _ _ = realToFrac +instance NumericWidenOp Int32 Float where widen1 _ _ = fromIntegral; widen2 _ _ = id instance NumericWidenOp Float Int32 where - widen1 = id - widen2 = fromIntegral -instance NumericWidenOp Int32 Double where widen1 = fromIntegral; widen2 = id + widen1 _ _ = id + widen2 _ _ = fromIntegral +instance NumericWidenOp Int32 Double where widen1 _ _ = fromIntegral; widen2 _ _ = id instance NumericWidenOp Double Int32 where - widen1 = id - widen2 = fromIntegral -instance NumericWidenOp Int64 Float where widen1 = fromIntegral; widen2 = id + widen1 _ _ = id + widen2 _ _ = fromIntegral +instance NumericWidenOp Int64 Float where widen1 _ _ = fromIntegral; widen2 _ _ = id instance NumericWidenOp Float Int64 where - widen1 = id - widen2 = fromIntegral -instance NumericWidenOp Int64 Double where widen1 = fromIntegral; widen2 = id + widen1 _ _ = id + widen2 _ _ = fromIntegral +instance NumericWidenOp Int64 Double where widen1 _ _ = fromIntegral; widen2 _ _ = id instance NumericWidenOp Double Int64 where - widen1 = id - widen2 = fromIntegral + widen1 _ _ = id + widen2 _ _ = fromIntegral -- | Apply an arithmetic function after widening both operands to their common type. widenArithOp :: @@ -405,7 +405,7 @@ widenArithOp :: a -> b -> Promote a b -widenArithOp f x y = f (widen1 @a @b x) (widen2 @a @b y) +widenArithOp f x y = f (widen1 a b x) (widen2 a b y) -- | Apply a comparison function after widening both operands to their common type. widenCmpOp :: @@ -415,7 +415,7 @@ widenCmpOp :: a -> b -> Bool -widenCmpOp f x y = f (widen1 @a @b x) (widen2 @a @b y) +widenCmpOp f x y = f (widen1 a b x) (widen2 a b y) -- | Result type of a widening binary operator, accounting for nullable wrappers. type WidenResult a b = NullLift2Result a b (Promote (BaseType a) (BaseType b)) @@ -429,61 +429,61 @@ Floating types still dominate (Double > Float), and any two integral types (same or mixed) are both widened to Double. -} class (Columnable (PromoteDiv a b)) => DivWidenOp a b where - divWiden1 :: a -> PromoteDiv a b - divWiden2 :: b -> PromoteDiv a b + divWiden1 :: forall a' b' -> (a ~ a', b ~ b') => a -> PromoteDiv a b + divWiden2 :: forall a' b' -> (a ~ a', b ~ b') => b -> PromoteDiv a b -- Floating same-type (identity) -instance DivWidenOp Double Double where divWiden1 = id; divWiden2 = id -instance DivWidenOp Float Float where divWiden1 = id; divWiden2 = id +instance DivWidenOp Double Double where divWiden1 _ _ = id; divWiden2 _ _ = id +instance DivWidenOp Float Float where divWiden1 _ _ = id; divWiden2 _ _ = id -- Mixed Double/Float -instance DivWidenOp Double Float where divWiden1 = id; divWiden2 = realToFrac -instance DivWidenOp Float Double where divWiden1 = realToFrac; divWiden2 = id +instance DivWidenOp Double Float where divWiden1 _ _ = id; divWiden2 _ _ = realToFrac +instance DivWidenOp Float Double where divWiden1 _ _ = realToFrac; divWiden2 _ _ = id -- Double beats integral -instance DivWidenOp Double Int where divWiden1 = id; divWiden2 = fromIntegral -instance DivWidenOp Int Double where divWiden1 = fromIntegral; divWiden2 = id -instance DivWidenOp Double Int32 where divWiden1 = id; divWiden2 = fromIntegral -instance DivWidenOp Int32 Double where divWiden1 = fromIntegral; divWiden2 = id -instance DivWidenOp Double Int64 where divWiden1 = id; divWiden2 = fromIntegral -instance DivWidenOp Int64 Double where divWiden1 = fromIntegral; divWiden2 = id +instance DivWidenOp Double Int where divWiden1 _ _ = id; divWiden2 _ _ = fromIntegral +instance DivWidenOp Int Double where divWiden1 _ _ = fromIntegral; divWiden2 _ _ = id +instance DivWidenOp Double Int32 where divWiden1 _ _ = id; divWiden2 _ _ = fromIntegral +instance DivWidenOp Int32 Double where divWiden1 _ _ = fromIntegral; divWiden2 _ _ = id +instance DivWidenOp Double Int64 where divWiden1 _ _ = id; divWiden2 _ _ = fromIntegral +instance DivWidenOp Int64 Double where divWiden1 _ _ = fromIntegral; divWiden2 _ _ = id -- Float beats integral -instance DivWidenOp Float Int where divWiden1 = id; divWiden2 = fromIntegral -instance DivWidenOp Int Float where divWiden1 = fromIntegral; divWiden2 = id -instance DivWidenOp Float Int32 where divWiden1 = id; divWiden2 = fromIntegral -instance DivWidenOp Int32 Float where divWiden1 = fromIntegral; divWiden2 = id -instance DivWidenOp Float Int64 where divWiden1 = id; divWiden2 = fromIntegral -instance DivWidenOp Int64 Float where divWiden1 = fromIntegral; divWiden2 = id +instance DivWidenOp Float Int where divWiden1 _ _ = id; divWiden2 _ _ = fromIntegral +instance DivWidenOp Int Float where divWiden1 _ _ = fromIntegral; divWiden2 _ _ = id +instance DivWidenOp Float Int32 where divWiden1 _ _ = id; divWiden2 _ _ = fromIntegral +instance DivWidenOp Int32 Float where divWiden1 _ _ = fromIntegral; divWiden2 _ _ = id +instance DivWidenOp Float Int64 where divWiden1 _ _ = id; divWiden2 _ _ = fromIntegral +instance DivWidenOp Int64 Float where divWiden1 _ _ = fromIntegral; divWiden2 _ _ = id -- Integral × integral → Double instance DivWidenOp Int Int where - divWiden1 = fromIntegral - divWiden2 = fromIntegral + divWiden1 _ _ = fromIntegral + divWiden2 _ _ = fromIntegral instance DivWidenOp Int32 Int32 where - divWiden1 = fromIntegral - divWiden2 = fromIntegral + divWiden1 _ _ = fromIntegral + divWiden2 _ _ = fromIntegral instance DivWidenOp Int64 Int64 where - divWiden1 = fromIntegral - divWiden2 = fromIntegral + divWiden1 _ _ = fromIntegral + divWiden2 _ _ = fromIntegral instance DivWidenOp Int Int32 where - divWiden1 = fromIntegral - divWiden2 = fromIntegral + divWiden1 _ _ = fromIntegral + divWiden2 _ _ = fromIntegral instance DivWidenOp Int32 Int where - divWiden1 = fromIntegral - divWiden2 = fromIntegral + divWiden1 _ _ = fromIntegral + divWiden2 _ _ = fromIntegral instance DivWidenOp Int Int64 where - divWiden1 = fromIntegral - divWiden2 = fromIntegral + divWiden1 _ _ = fromIntegral + divWiden2 _ _ = fromIntegral instance DivWidenOp Int64 Int where - divWiden1 = fromIntegral - divWiden2 = fromIntegral + divWiden1 _ _ = fromIntegral + divWiden2 _ _ = fromIntegral instance DivWidenOp Int32 Int64 where - divWiden1 = fromIntegral - divWiden2 = fromIntegral + divWiden1 _ _ = fromIntegral + divWiden2 _ _ = fromIntegral instance DivWidenOp Int64 Int32 where - divWiden1 = fromIntegral - divWiden2 = fromIntegral + divWiden1 _ _ = fromIntegral + divWiden2 _ _ = fromIntegral -- | Apply an arithmetic function after widening both operands via 'PromoteDiv'. divArithOp :: @@ -493,7 +493,7 @@ divArithOp :: a -> b -> PromoteDiv a b -divArithOp f x y = f (divWiden1 @a @b x) (divWiden2 @a @b y) +divArithOp f x y = f (divWiden1 a b x) (divWiden2 a b y) -- | Result type of a division-widening binary operator, accounting for nullable wrappers. type WidenResultDiv a b = diff --git a/dataframe-core/src/DataFrame/Internal/Row.hs b/dataframe-core/src/DataFrame/Internal/Row.hs index a10d38e..b0a628a 100644 --- a/dataframe-core/src/DataFrame/Internal/Row.hs +++ b/dataframe-core/src/DataFrame/Internal/Row.hs @@ -158,7 +158,7 @@ toRowVector names df = V.generate (fst (dataframeDimensions df)) (mkRowRep df na ==== __Examples__ ->>> map (rowValue (F.col @Int "age")) (toRowList df) +>>> map (rowValue (F.col Int "age")) (toRowList df) [25,30, ...] -} rowValue :: forall a. Expr a -> [(T.Text, Any)] -> Maybe a diff --git a/dataframe-core/src/DataFrame/Internal/Simplify.hs b/dataframe-core/src/DataFrame/Internal/Simplify.hs index 22d674a..e763ab4 100644 --- a/dataframe-core/src/DataFrame/Internal/Simplify.hs +++ b/dataframe-core/src/DataFrame/Internal/Simplify.hs @@ -1,9 +1,9 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -45,14 +45,14 @@ import DataFrame.Operators ( simplify :: forall a. (Columnable a) => Expr a -> Expr a simplify e - | isBoolish @a = fixpoint (10 :: Int) e + | isBoolish a = fixpoint (10 :: Int) e | otherwise = e where fixpoint 0 x = x fixpoint n x = let x' = simplifyB x in if eqExpr x x' then x else fixpoint (n - 1) x' -isBoolish :: forall a. (Columnable a) => Bool -isBoolish = +isBoolish :: forall a -> (Columnable a) => Bool +isBoolish a = case ( testEquality (typeRep @a) (typeRep @Bool) , testEquality (typeRep @a) (typeRep @(Maybe Bool)) ) of @@ -196,8 +196,8 @@ isLower c = c == CGt || c == CGeq isUpper c = c == CLt || c == CLeq -- | True if @x@ is a @Maybe _@ type. -isMaybeTy :: forall x. (Columnable x) => Bool -isMaybeTy = case typeRep @x of +isMaybeTy :: forall x -> (Columnable x) => Bool +isMaybeTy x = case typeRep @x of App con _ -> case eqTypeRep con (typeRep @Maybe) of Just HRefl -> True; _ -> False _ -> False @@ -221,12 +221,12 @@ integralColE :: forall c. (Columnable c) => Expr c -> Bool integralColE (Unary op _) = unaryName op == "toDouble" integralColE _ = or - [ matches @Int - , matches @(Maybe Int) + [ matches Int + , matches (Maybe Int) ] where - matches :: forall t. (Columnable t) => Bool - matches = case testEquality (typeRep @c) (typeRep @t) of Just Refl -> True; _ -> False + matches :: forall t -> (Columnable t) => Bool + matches t = case testEquality (typeRep @c) (typeRep @t) of Just Refl -> True; _ -> False atomOf :: forall a. (Columnable a) => Expr a -> Maybe Atom atomOf (Unary fm (Binary (op :: op c b r) (colE :: Expr c) litE)) @@ -237,7 +237,7 @@ atomOf (Unary fm (Binary (op :: op c b r) (colE :: Expr c) litE)) atomOf (Binary (op :: op c b a) (colE :: Expr c) litE) | Just cmp <- cmpOf op , Just t <- litDouble litE = - let nk = if isMaybeTy @c then UnknownOnNull else Total + let nk = if isMaybeTy c then UnknownOnNull else Total in Just (Atom cmp t (show (normalize colE)) nk (integralColE colE)) atomOf _ = Nothing diff --git a/dataframe-core/src/DataFrame/Internal/Types.hs b/dataframe-core/src/DataFrame/Internal/Types.hs index e482e46..b35a0d2 100644 --- a/dataframe-core/src/DataFrame/Internal/Types.hs +++ b/dataframe-core/src/DataFrame/Internal/Types.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveTraversable #-} @@ -7,6 +6,7 @@ {-# LANGUAGE GADTs #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} @@ -94,11 +94,11 @@ instance SBoolI 'True where sbool = STrue instance SBoolI 'False where sbool = SFalse -- | Type-level function to determine whether or not a type is unboxa -sUnbox :: forall a. (SBoolI (Unboxable a)) => SBool (Unboxable a) -sUnbox = sbool @(Unboxable a) +sUnbox :: forall a -> (SBoolI (Unboxable a)) => SBool (Unboxable a) +sUnbox a = sbool @(Unboxable a) -sNumeric :: forall a. (SBoolI (Numeric a)) => SBool (Numeric a) -sNumeric = sbool @(Numeric a) +sNumeric :: forall a -> (SBoolI (Numeric a)) => SBool (Numeric a) +sNumeric a = sbool @(Numeric a) type family When (flag :: Bool) (c :: Constraint) :: Constraint where When 'True c = c @@ -120,8 +120,8 @@ type family IntegralTypes (a :: Type) :: Bool where IntegralTypes Word64 = 'True IntegralTypes _ = 'False -sIntegral :: forall a. (SBoolI (IntegralTypes a)) => SBool (IntegralTypes a) -sIntegral = sbool @(IntegralTypes a) +sIntegral :: forall a -> (SBoolI (IntegralTypes a)) => SBool (IntegralTypes a) +sIntegral a = sbool @(IntegralTypes a) type IntegralIf a = When (IntegralTypes a) (Integral a) @@ -130,8 +130,8 @@ type family FloatingTypes (a :: Type) :: Bool where FloatingTypes Double = 'True FloatingTypes _ = 'False -sFloating :: forall a. (SBoolI (FloatingTypes a)) => SBool (FloatingTypes a) -sFloating = sbool @(FloatingTypes a) +sFloating :: forall a -> (SBoolI (FloatingTypes a)) => SBool (FloatingTypes a) +sFloating a = sbool @(FloatingTypes a) type FloatingIf a = When (FloatingTypes a) (Real a, Fractional a) diff --git a/dataframe-core/src/DataFrame/Operators.hs b/dataframe-core/src/DataFrame/Operators.hs index 8c3825c..5281dfc 100644 --- a/dataframe-core/src/DataFrame/Operators.hs +++ b/dataframe-core/src/DataFrame/Operators.hs @@ -1,6 +1,8 @@ +{-# LANGUAGE ExplicitForAll #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} @@ -58,8 +60,11 @@ name other = error $ "You must call `name` on a column reference. Not the expression: " ++ show other -col :: (Columnable a) => T.Text -> Expr a -col = Col +col :: forall a -> (Columnable a) => T.Text -> Expr a +col _ = Col + +col' :: forall a. (Columnable a) => T.Text -> Expr a +col' = Col ifThenElse :: (Columnable a) => Expr Bool -> Expr a -> Expr a -> Expr a ifThenElse = If diff --git a/dataframe-core/src/DataFrame/Typed/Freeze.hs b/dataframe-core/src/DataFrame/Typed/Freeze.hs index a0cd654..a73f724 100644 --- a/dataframe-core/src/DataFrame/Typed/Freeze.hs +++ b/dataframe-core/src/DataFrame/Typed/Freeze.hs @@ -1,7 +1,7 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -30,7 +30,7 @@ then wrap it. Returns 'Nothing' on mismatch. -} freeze :: forall cols. (KnownSchema cols) => D.DataFrame -> Maybe (TypedDataFrame cols) -freeze df = case validateSchema @cols df of +freeze df = case validateSchema cols df of Left _ -> Nothing Right _ -> Just (TDF df) @@ -39,7 +39,7 @@ freezeWithError :: forall cols. (KnownSchema cols) => D.DataFrame -> Either T.Text (TypedDataFrame cols) -freezeWithError df = case validateSchema @cols df of +freezeWithError df = case validateSchema cols df of Left err -> Left err Right _ -> Right (TDF df) @@ -56,10 +56,10 @@ unsafeFreeze :: D.DataFrame -> TypedDataFrame cols unsafeFreeze = TDF validateSchema :: - forall cols. + forall cols -> (KnownSchema cols) => D.DataFrame -> Either T.Text () -validateSchema df = mapM_ checkCol (schemaEvidence @cols) +validateSchema cols df = mapM_ checkCol (schemaEvidence cols) where checkCol :: (T.Text, SomeTypeRep) -> Either T.Text () checkCol (name, expectedRep) = case D.getColumn name df of diff --git a/dataframe-core/src/DataFrame/Typed/Generic.hs b/dataframe-core/src/DataFrame/Typed/Generic.hs index 447af2c..6b425fc 100644 --- a/dataframe-core/src/DataFrame/Typed/Generic.hs +++ b/dataframe-core/src/DataFrame/Typed/Generic.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} diff --git a/dataframe-core/src/DataFrame/Typed/Record.hs b/dataframe-core/src/DataFrame/Typed/Record.hs index d1fc65f..4ed42aa 100644 --- a/dataframe-core/src/DataFrame/Typed/Record.hs +++ b/dataframe-core/src/DataFrame/Typed/Record.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} diff --git a/dataframe-core/src/DataFrame/Typed/Schema.hs b/dataframe-core/src/DataFrame/Typed/Schema.hs index de1f013..7b443ae 100644 --- a/dataframe-core/src/DataFrame/Typed/Schema.hs +++ b/dataframe-core/src/DataFrame/Typed/Schema.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} @@ -7,6 +6,7 @@ {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} @@ -422,25 +422,25 @@ type family -- | Provides runtime evidence of a schema: a list of (name, TypeRep) pairs. class KnownSchema (cols :: [Type]) where - schemaEvidence :: [(T.Text, SomeTypeRep)] + schemaEvidence :: forall cols' -> (cols ~ cols') => [(T.Text, SomeTypeRep)] instance KnownSchema '[] where - schemaEvidence = [] + schemaEvidence _ = [] instance (KnownSymbol name, Typeable a, Columnable a, KnownSchema rest) => KnownSchema (Column name a ': rest) where - schemaEvidence = + schemaEvidence _ = (T.pack (symbolVal (Proxy @name)), someTypeRep (Proxy @a)) - : schemaEvidence @rest + : schemaEvidence rest -- | A class that provides a list of 'Text' values for a type-level list of Symbols. class AllKnownSymbol (names :: [Symbol]) where - symbolVals :: [T.Text] + symbolVals :: forall names' -> (names ~ names') => [T.Text] instance AllKnownSymbol '[] where - symbolVals = [] + symbolVals _ = [] instance (KnownSymbol n, AllKnownSymbol ns) => AllKnownSymbol (n ': ns) where - symbolVals = T.pack (symbolVal (Proxy @n)) : symbolVals @ns + symbolVals _ = T.pack (symbolVal (Proxy @n)) : symbolVals ns diff --git a/dataframe-core/src/DataFrame/Typed/Types.hs b/dataframe-core/src/DataFrame/Typed/Types.hs index dfa93c8..1e4c551 100644 --- a/dataframe-core/src/DataFrame/Typed/Types.hs +++ b/dataframe-core/src/DataFrame/Typed/Types.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE FlexibleContexts #-} diff --git a/dataframe-csv/src/DataFrame/IO/CSV.hs b/dataframe-csv/src/DataFrame/IO/CSV.hs index dd0c78c..256226e 100644 --- a/dataframe-csv/src/DataFrame/IO/CSV.hs +++ b/dataframe-csv/src/DataFrame/IO/CSV.hs @@ -431,7 +431,7 @@ initializeColumns names _row opts = zipWithM initColumn names (map lookupType na BuilderBS <$> newPagedVector <*> pure validityRef initColumn _ mtype = do validityRef <- newPagedUnboxedVector - let t = fromMaybe (schemaType @T.Text) mtype + let t = fromMaybe (schemaType T.Text) mtype case t of SType (_ :: P.Proxy a) -> case testEquality (typeRep @a) (typeRep @Int) of Just Refl -> BuilderInt <$> newPagedUnboxedVector <*> pure validityRef diff --git a/dataframe-fusion/src/DataFrame/Fusion/Typed.hs b/dataframe-fusion/src/DataFrame/Fusion/Typed.hs index 35cfac1..c64ae26 100644 --- a/dataframe-fusion/src/DataFrame/Fusion/Typed.hs +++ b/dataframe-fusion/src/DataFrame/Fusion/Typed.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} diff --git a/dataframe-fusion/tests/Main.hs b/dataframe-fusion/tests/Main.hs index 4f2e8f5..8fce7e5 100644 --- a/dataframe-fusion/tests/Main.hs +++ b/dataframe-fusion/tests/Main.hs @@ -104,19 +104,19 @@ main = do assertEqual "row count after take 2" 2 (Core.nRows df) , TestLabel "select projects to fewer columns" $ TestCase $ do fdf <- scan - fdf' <- Fusion.select @'["id", "name"] fdf + fdf' <- Fusion.select '["id", "name"] fdf df <- thaw <$> Fusion.run fdf' assertEqual "columns after select" 2 (Core.nColumns df) , TestLabel "filter drops non-matching rows" $ TestCase $ do fdf <- scan - let pred_ = Fusion.col @"id" Fusion..>. Fusion.lit (2 :: Int) + let pred_ = Fusion.col "id" Fusion..>. Fusion.lit (2 :: Int) fdf' <- Fusion.filter pred_ fdf df <- thaw <$> Fusion.run fdf' assertEqual "row count after filter id > 2" 2 (Core.nRows df) , TestLabel "derive adds a computed column" $ TestCase $ do fdf <- scan - let doubled = Fusion.col @"score" Fusion..*. Fusion.lit (2.0 :: Double) - fdf' <- Fusion.derive @"doubled" doubled fdf + let doubled = Fusion.col "score" Fusion..*. Fusion.lit (2.0 :: Double) + fdf' <- Fusion.derive "doubled" doubled fdf df <- thaw <$> Fusion.run fdf' assertEqual "row count after derive" 4 (Core.nRows df) assertEqual "column count after derive" 4 (Core.nColumns df) @@ -130,7 +130,7 @@ main = do -- group by name, sum scores (each name unique here, so 4 groups) fdf' <- Fusion.aggregate - (Fusion.as @"total" (Fusion.sum (Fusion.col @"score"))) + (Fusion.as @"total" (Fusion.sum (Fusion.col "score"))) (Fusion.groupBy @'["name"] fdf) df <- thaw <$> Fusion.run fdf' assertEqual "row count after groupBy" 4 (Core.nRows df) diff --git a/dataframe-lazy/src/DataFrame/Lazy/Internal/DataFrame.hs b/dataframe-lazy/src/DataFrame/Lazy/Internal/DataFrame.hs index da55d05..35911ab 100644 --- a/dataframe-lazy/src/DataFrame/Lazy/Internal/DataFrame.hs +++ b/dataframe-lazy/src/DataFrame/Lazy/Internal/DataFrame.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE NumericUnderscores #-} diff --git a/dataframe-lazy/src/DataFrame/Typed/Lazy.hs b/dataframe-lazy/src/DataFrame/Typed/Lazy.hs index 8949659..77a8228 100644 --- a/dataframe-lazy/src/DataFrame/Typed/Lazy.hs +++ b/dataframe-lazy/src/DataFrame/Typed/Lazy.hs @@ -1,8 +1,8 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE KindSignatures #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -136,21 +136,21 @@ take n (TLD ldf) = TLD (L.take n ldf) -- | Add a computed column. derive :: - forall name a cols. + forall name -> forall a cols. (KnownSymbol name, C.Columnable a, AssertAbsent name cols) => TExpr cols a -> TypedLazyDataFrame cols -> TypedLazyDataFrame (Snoc cols (Column name a)) -derive (TExpr expr) (TLD ldf) = +derive name (TExpr expr) (TLD ldf) = TLD (L.derive (T.pack (symbolVal (Proxy @name))) expr ldf) -- | Retain only the listed columns. select :: - forall (names :: [Symbol]) cols. + forall (names :: [Symbol]) -> forall cols. (AllKnownSymbol names, AssertAllPresent names cols) => TypedLazyDataFrame cols -> TypedLazyDataFrame (SubsetSchema names cols) -select (TLD ldf) = TLD (L.select (DataFrame.Typed.Schema.symbolVals @names) ldf) +select names (TLD ldf) = TLD (L.select (DataFrame.Typed.Schema.symbolVals names) ldf) -- | A typed lazy grouped query. newtype TypedLazyGrouped (keys :: [Symbol]) (cols :: [Type]) = TLG @@ -163,7 +163,7 @@ groupBy :: (AllKnownSymbol keys, AssertAllPresent keys cols) => TypedLazyDataFrame cols -> TypedLazyGrouped keys cols -groupBy (TLD ldf) = TLG (DataFrame.Typed.Schema.symbolVals @keys, ldf) +groupBy (TLD ldf) = TLG (DataFrame.Typed.Schema.symbolVals keys, ldf) -- | Aggregate a grouped lazy query. aggregate :: diff --git a/dataframe-learn/src/DataFrame/DecisionTree/Cart.hs b/dataframe-learn/src/DataFrame/DecisionTree/Cart.hs index fb4188a..94c4d1c 100644 --- a/dataframe-learn/src/DataFrame/DecisionTree/Cart.hs +++ b/dataframe-learn/src/DataFrame/DecisionTree/Cart.hs @@ -207,9 +207,9 @@ featuresOfColumn df c = case unsafeGetColumn c df of numericFeature :: forall b. (Columnable b, VU.Unbox b) => T.Text -> VU.Vector b -> [CartFeature] numericFeature c v = case testEquality (typeRep @b) (typeRep @Double) of - Just Refl -> [CartFeature v (\t -> F.col @Double c .<=. F.lit t)] - Nothing -> case sIntegral @b of - STrue -> [CartFeature (VU.map fromIntegral v) (\t -> F.toDouble (F.col @b c) .<=. F.lit t)] + Just Refl -> [CartFeature v (\t -> F.col Double c .<=. F.lit t)] + Nothing -> case sIntegral b of + STrue -> [CartFeature (VU.map fromIntegral v) (\t -> F.toDouble (F.col b c) .<=. F.lit t)] SFalse -> [] oneHotFeatures :: forall b. (Columnable b) => Int -> T.Text -> V.Vector b -> [CartFeature] @@ -219,7 +219,7 @@ oneHotFeatures nAll c v = case testEquality (typeRep @b) (typeRep @T.Text) of oneHot :: Int -> T.Text -> V.Vector T.Text -> T.Text -> CartFeature oneHot nAll c v cat = - CartFeature (VU.generate nAll (\i -> if v V.! i == cat then 1 else 0)) (const (F.col @T.Text c ./=. F.lit cat)) + CartFeature (VU.generate nAll (\i -> if v V.! i == cat then 1 else 0)) (const (F.col T.Text c ./=. F.lit cat)) -- | Target column as string labels (matches pandas @y.astype(str)@). cartTargetLabels :: T.Text -> DataFrame -> V.Vector T.Text diff --git a/dataframe-learn/src/DataFrame/DecisionTree/Categorical.hs b/dataframe-learn/src/DataFrame/DecisionTree/Categorical.hs index d00ec4b..1d0dc9d 100644 --- a/dataframe-learn/src/DataFrame/DecisionTree/Categorical.hs +++ b/dataframe-learn/src/DataFrame/DecisionTree/Categorical.hs @@ -1,7 +1,7 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -183,10 +183,10 @@ catValueListsFor :: (Ord a, Ord target) => CatCtx target -> V.Vector a -> [[a]] catValueListsFor ctx = catValueLists (ccBinary ctx) (ccPos ctx) (ccTargets ctx) (ccSubsetCap ctx) -- | True for numeric columns (handled by the numeric pool, not here). -isNumericKind :: forall a. (Columnable a) => Bool -isNumericKind = case sFloating @a of +isNumericKind :: forall a -> (Columnable a) => Bool +isNumericKind a = case sFloating a of STrue -> True - SFalse -> case sIntegral @a of + SFalse -> case sIntegral a of STrue -> True SFalse -> False @@ -208,7 +208,7 @@ nonNullColConds ctx colName column = nullableColConds :: forall a target. (Columnable a, Ord target) => CatCtx target -> T.Text -> Bitmap -> V.Vector a -> [Expr Bool] nullableColConds ctx colName bm column - | isNumericKind @a || V.null valid = [] + | isNumericKind a || V.null valid = [] | otherwise = fromMaybe [] (withOrdFrom @a (ccOrds ctx) (map (orEqs (eqJustFor @a colName)) (catValueListsFor ctx valid))) where valid = validBoxedValues bm column @@ -233,29 +233,29 @@ isDisallowedPair cfg l r = pairConds :: ColumnOrdering -> DataFrame -> (T.Text, T.Text) -> [Expr Bool] pairConds ords df (l, r) = case (unsafeGetColumn l df, unsafeGetColumn r df) of - (BoxedColumn Nothing (_ :: V.Vector a), BoxedColumn Nothing (_ :: V.Vector b)) -> strictPairConds @a @b l r - (BoxedColumn (Just _) (_ :: V.Vector a), BoxedColumn (Just _) (_ :: V.Vector b)) -> nullablePairConds @a @b ords l r + (BoxedColumn Nothing (_ :: V.Vector a), BoxedColumn Nothing (_ :: V.Vector b)) -> strictPairConds a b l r + (BoxedColumn (Just _) (_ :: V.Vector a), BoxedColumn (Just _) (_ :: V.Vector b)) -> nullablePairConds a b ords l r _ -> [] -strictPairConds :: forall a b. (Columnable a, Columnable b) => T.Text -> T.Text -> [Expr Bool] -strictPairConds l r = case testEquality (typeRep @a) (typeRep @b) of +strictPairConds :: forall a b -> (Columnable a, Columnable b) => T.Text -> T.Text -> [Expr Bool] +strictPairConds a b l r = case testEquality (typeRep @a) (typeRep @b) of Just Refl -> [Col @a l .==. Col @a r] Nothing -> [] -nullablePairConds :: forall a b. (Columnable a, Columnable b) => ColumnOrdering -> T.Text -> T.Text -> [Expr Bool] -nullablePairConds ords l r = case testEquality (typeRep @a) (typeRep @b) of +nullablePairConds :: forall a b -> (Columnable a, Columnable b) => ColumnOrdering -> T.Text -> T.Text -> [Expr Bool] +nullablePairConds a b ords l r = case testEquality (typeRep @a) (typeRep @b) of Nothing -> [] - Just Refl -> nullableEqOrLe @a ords l r + Just Refl -> nullableEqOrLe a ords l r -nullableEqOrLe :: forall a. (Columnable a) => ColumnOrdering -> T.Text -> T.Text -> [Expr Bool] -nullableEqOrLe ords l r - | isTextType @a = eqOnly +nullableEqOrLe :: forall a -> (Columnable a) => ColumnOrdering -> T.Text -> T.Text -> [Expr Bool] +nullableEqOrLe a ords l r + | isTextType a = eqOnly | otherwise = maybe eqOnly (++ eqOnly) (withOrdFrom @a ords [Col @(Maybe a) l .<=. Col @(Maybe a) r]) where eqOnly = [Col @(Maybe a) l .==. Col @(Maybe a) r] -isTextType :: forall a. (Columnable a) => Bool -isTextType = case testEquality (typeRep @a) (typeRep @T.Text) of +isTextType :: forall a -> (Columnable a) => Bool +isTextType a = case testEquality (typeRep @a) (typeRep @T.Text) of Just Refl -> True Nothing -> False diff --git a/dataframe-learn/src/DataFrame/DecisionTree/Fit.hs b/dataframe-learn/src/DataFrame/DecisionTree/Fit.hs index c75968d..22c0d9d 100644 --- a/dataframe-learn/src/DataFrame/DecisionTree/Fit.hs +++ b/dataframe-learn/src/DataFrame/DecisionTree/Fit.hs @@ -1,5 +1,5 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -58,14 +58,14 @@ fitDecisionTree :: forall a. (Columnable a, Ord a) => TreeConfig -> Expr a -> Da fitDecisionTree cfg (Col target) df = pruneExpr (treeToExpr (taoOptimizeCV @a cfg target condVecs df indices initialTree)) where - condVecs = candidatePool @a cfg target df + condVecs = candidatePool a cfg target df initialTree = buildCartTree @a cfg target df indices = V.enumFromN 0 (nRows df) fitDecisionTree _ expr _ = error ("Cannot create tree for compound expression: " ++ show expr) -- | The deduplicated numeric + discrete candidate pool for a target column. -candidatePool :: forall a. (Columnable a, Ord a) => TreeConfig -> T.Text -> DataFrame -> [CondVec] -candidatePool cfg target df = dedupCVByExpr (numericCVs ++ discreteCVs) +candidatePool :: forall a -> (Columnable a, Ord a) => TreeConfig -> T.Text -> DataFrame -> [CondVec] +candidatePool a cfg target df = dedupCVByExpr (numericCVs ++ discreteCVs) where dfNoTarget = exclude [target] df numericCVs = numericCondVecs cfg dfNoTarget df @@ -89,8 +89,8 @@ partitionDataFrame :: Expr Bool -> DataFrame -> (DataFrame, DataFrame) partitionDataFrame cond df = (filterWhere cond df, filterWhere (F.not cond) df) -- | Laplace-smoothed Gini impurity of the target distribution. -calculateGini :: forall a. (Columnable a, Ord a) => T.Text -> DataFrame -> Double -calculateGini target df +calculateGini :: forall a -> (Columnable a, Ord a) => T.Text -> DataFrame -> Double +calculateGini a target df | n == 0 = 0 | otherwise = 1 - sum (map (^ (2 :: Int)) probs) where diff --git a/dataframe-learn/src/DataFrame/DecisionTree/Numeric.hs b/dataframe-learn/src/DataFrame/DecisionTree/Numeric.hs index b2d115a..1d162fd 100644 --- a/dataframe-learn/src/DataFrame/DecisionTree/Numeric.hs +++ b/dataframe-learn/src/DataFrame/DecisionTree/Numeric.hs @@ -1,7 +1,7 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -200,21 +200,21 @@ numericCols df = concatMap (numExprsOfColumn df) (columnNames df) numExprsOfColumn :: DataFrame -> T.Text -> [NumExpr] numExprsOfColumn df colName = case unsafeGetColumn colName df of - UnboxedColumn Nothing (_ :: VU.Vector b) -> strictNumeric @b colName - BoxedColumn (Just _) (_ :: V.Vector b) -> nullableNumeric @b colName - UnboxedColumn (Just _) (_ :: VU.Vector b) -> nullableNumeric @b colName + UnboxedColumn Nothing (_ :: VU.Vector b) -> strictNumeric b colName + BoxedColumn (Just _) (_ :: V.Vector b) -> nullableNumeric b colName + UnboxedColumn (Just _) (_ :: VU.Vector b) -> nullableNumeric b colName _ -> [] -strictNumeric :: forall b. (Columnable b) => T.Text -> [NumExpr] -strictNumeric c = case testEquality (typeRep @b) (typeRep @Double) of +strictNumeric :: forall b -> (Columnable b) => T.Text -> [NumExpr] +strictNumeric b c = case testEquality (typeRep @b) (typeRep @Double) of Just Refl -> [NDouble (Col c)] - Nothing -> case sIntegral @b of + Nothing -> case sIntegral b of STrue -> [NDouble (F.toDouble (Col @b c))] SFalse -> [] -nullableNumeric :: forall b. (Columnable b) => T.Text -> [NumExpr] -nullableNumeric c = case testEquality (typeRep @b) (typeRep @Double) of +nullableNumeric :: forall b -> (Columnable b) => T.Text -> [NumExpr] +nullableNumeric b c = case testEquality (typeRep @b) (typeRep @Double) of Just Refl -> [NMaybeDouble (Col @(Maybe b) c)] - Nothing -> case sIntegral @b of + Nothing -> case sIntegral b of STrue -> [NMaybeDouble (F.whenPresent (realToFrac @b @Double) (Col @(Maybe b) c))] SFalse -> [] diff --git a/dataframe-learn/src/DataFrame/DecisionTree/Types.hs b/dataframe-learn/src/DataFrame/DecisionTree/Types.hs index 349e21d..c772eac 100644 --- a/dataframe-learn/src/DataFrame/DecisionTree/Types.hs +++ b/dataframe-learn/src/DataFrame/DecisionTree/Types.hs @@ -1,7 +1,7 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -132,8 +132,8 @@ instance Monoid ColumnOrdering where mempty = ColumnOrdering M.empty -- | Register a type as orderable for decision-tree splits. -orderable :: forall a. (Columnable a, Ord a) => ColumnOrdering -orderable = ColumnOrdering (M.singleton (SomeTypeRep (typeRep @a)) (OrdDict (Proxy @a))) +orderable :: forall a -> (Columnable a, Ord a) => ColumnOrdering +orderable a = ColumnOrdering (M.singleton (SomeTypeRep (typeRep @a)) (OrdDict (Proxy @a))) -- | All standard numeric, text, and primitive types. defaultColumnOrdering :: ColumnOrdering @@ -141,24 +141,24 @@ defaultColumnOrdering = mconcat (numericOrderings ++ otherOrderings) numericOrderings :: [ColumnOrdering] numericOrderings = - [ orderable @Int - , orderable @Int8 - , orderable @Int16 - , orderable @Int32 - , orderable @Int64 - , orderable @Word - , orderable @Word8 - , orderable @Word16 - , orderable @Word32 - , orderable @Word64 - , orderable @Integer - , orderable @Double - , orderable @Float + [ orderable Int + , orderable Int8 + , orderable Int16 + , orderable Int32 + , orderable Int64 + , orderable Word + , orderable Word8 + , orderable Word16 + , orderable Word32 + , orderable Word64 + , orderable Integer + , orderable Double + , orderable Float ] otherOrderings :: [ColumnOrdering] otherOrderings = - [orderable @Bool, orderable @Char, orderable @T.Text, orderable @String] + [orderable Bool, orderable Char, orderable T.Text, orderable String] -- | Existential @Ord@ dictionary keyed by type representation. data OrdDict where diff --git a/dataframe-operations/src/DataFrame/Operations/Aggregation.hs b/dataframe-operations/src/DataFrame/Operations/Aggregation.hs index 20f99f8..586f29f 100644 --- a/dataframe-operations/src/DataFrame/Operations/Aggregation.hs +++ b/dataframe-operations/src/DataFrame/Operations/Aggregation.hs @@ -69,11 +69,11 @@ computeRowHashes indices df = runST $ do Just Refl -> hashKeyUnboxed mv ubm mixDouble v Nothing -> - case sIntegral @a of + case sIntegral a of STrue -> hashKeyUnboxed mv ubm (\h d -> mixInt h (fromIntegral @a @Int d)) v SFalse -> - case sFloating @a of + case sFloating a of STrue -> hashKeyUnboxed mv ubm (\h d -> mixDouble h (realToFrac d :: Double)) v SFalse -> diff --git a/dataframe-operations/src/DataFrame/Operations/Core.hs b/dataframe-operations/src/DataFrame/Operations/Core.hs index 665cf58..346bc4f 100644 --- a/dataframe-operations/src/DataFrame/Operations/Core.hs +++ b/dataframe-operations/src/DataFrame/Operations/Core.hs @@ -814,10 +814,10 @@ You must specify the type via type applications. ==== __Examples__ ->>> columnAsVector (F.col @Int "age") df +>>> columnAsVector (F.col Int "age") df Right [25, 30, 35, ...] ->>> columnAsVector (F.col @Text "name") df +>>> columnAsVector (F.col Text "name") df Right ["Alice", "Bob", "Charlie", ...] -} columnAsVector :: @@ -918,10 +918,10 @@ You must specify the type via type applications. ==== __Examples__ ->>> columnAsList @Int "age" df +>>> columnAsList Int "age" df [25, 30, 35, ...] ->>> columnAsList @Text "name" df +>>> columnAsList Text "name" df ["Alice", "Bob", "Charlie", ...] ==== __Throws__ diff --git a/dataframe-operations/src/DataFrame/Operations/Statistics.hs b/dataframe-operations/src/DataFrame/Operations/Statistics.hs index 0992920..6eb3c78 100644 --- a/dataframe-operations/src/DataFrame/Operations/Statistics.hs +++ b/dataframe-operations/src/DataFrame/Operations/Statistics.hs @@ -219,9 +219,9 @@ _getColumnAsDouble :: T.Text -> DataFrame -> Maybe (VU.Vector Double) _getColumnAsDouble name df = case getColumn name df of Just (UnboxedColumn _ (f :: VU.Vector a)) -> case testEquality (typeRep @a) (typeRep @Double) of Just Refl -> Just f - Nothing -> case sIntegral @a of + Nothing -> case sIntegral a of STrue -> Just (VU.map fromIntegral f) - SFalse -> case sFloating @a of + SFalse -> case sFloating a of STrue -> Just (VU.map realToFrac f) SFalse -> Nothing Nothing -> diff --git a/dataframe-operations/src/DataFrame/Operations/Subset.hs b/dataframe-operations/src/DataFrame/Operations/Subset.hs index 1962c76..0acdf96 100644 --- a/dataframe-operations/src/DataFrame/Operations/Subset.hs +++ b/dataframe-operations/src/DataFrame/Operations/Subset.hs @@ -208,7 +208,7 @@ filterBy = flip filter {- | O(k) filters the dataframe with a boolean expression. -> filterWhere (F.col @Int x + F.col y F.> 5) df +> filterWhere (F.col Int x + F.col y F.> 5) df -} filterWhere :: Expr Bool -> DataFrame -> DataFrame filterWhere expr df = @@ -408,7 +408,7 @@ sample :: (RandomGen g) => g -> Double -> DataFrame -> DataFrame sample pureGen p df = let rand = mkRandom pureGen (fst (dataframeDimensions df)) (0 :: Double) 1 - cRand = col @Double "__rand__" + cRand = col Double "__rand__" in df & insertColumn (name cRand) rand @@ -429,7 +429,7 @@ randomSplit :: randomSplit pureGen p df = let rand = mkRandom pureGen (fst (dataframeDimensions df)) (0 :: Double) 1 - cRand = col @Double "__rand__" + cRand = col Double "__rand__" withRand = df & insertColumn (name cRand) rand in ( withRand @@ -454,7 +454,7 @@ kFolds :: (RandomGen g) => g -> Int -> DataFrame -> [DataFrame] kFolds pureGen folds df = let rand = mkRandom pureGen (fst (dataframeDimensions df)) (0 :: Double) 1 - cRand = col @Double "__rand__" + cRand = col Double "__rand__" withRand = df & insertColumn (name cRand) rand partitionSize = 1 / fromIntegral folds singleFold n d = diff --git a/dataframe-operations/src/DataFrame/Operations/Transformations.hs b/dataframe-operations/src/DataFrame/Operations/Transformations.hs index ef1a96a..edb6a13 100644 --- a/dataframe-operations/src/DataFrame/Operations/Transformations.hs +++ b/dataframe-operations/src/DataFrame/Operations/Transformations.hs @@ -5,6 +5,7 @@ {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUaGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE UndecidableInstances #-} @@ -85,7 +86,7 @@ add the result into `alias` column but ==== __Examples__ ->>> (z, df') = deriveWithExpr "z" (F.col @Int "x" + F.col "y") df +>>> (z, df') = deriveWithExpr "z" (F.col Int "x" + F.col "y") df >>> filterWhere (z .>= 50) -} deriveWithExpr :: diff --git a/dataframe-operations/src/DataFrame/Operations/Typing.hs b/dataframe-operations/src/DataFrame/Operations/Typing.hs index 94d32f5..4908455 100644 --- a/dataframe-operations/src/DataFrame/Operations/Typing.hs +++ b/dataframe-operations/src/DataFrame/Operations/Typing.hs @@ -1,8 +1,8 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -417,10 +417,10 @@ parseWithTypes resolveMode ts df -- 'SafeReadMode'. @toStr@ converts column elements to a 'String' ready for -- 'Read'. plainType :: - forall a b. + forall a -> forall b. (Columnable a, Read a) => SafeReadMode -> V.Vector b -> (b -> String) -> Column - plainType mode col toStr = case mode of + plainType a mode col toStr = case mode of NoSafeRead -> fromVector (V.map ((read @a) . toStr) col) MaybeRead -> fromVector (V.map ((readMaybe @a) . toStr) col) EitherRead -> fromVector (V.map ((readEitherRaw @a) . toStr) col) @@ -443,14 +443,14 @@ parseWithTypes resolveMode ts df Nothing -> case testEquality (typeRep @a) (typeRep @b) of Just Refl -> c Nothing -> case testEquality (typeRep @T.Text) (typeRep @b) of - Just Refl -> plainType @a mode col T.unpack - Nothing -> plainType @a mode col show + Just Refl -> plainType a mode col T.unpack + Nothing -> plainType a mode col show _ -> c _ -> case testEquality (typeRep @a) (typeRep @b) of Just Refl -> c Nothing -> case testEquality (typeRep @T.Text) (typeRep @b) of - Just Refl -> plainType @a mode col T.unpack - Nothing -> plainType @a mode col show + Just Refl -> plainType a mode col T.unpack + Nothing -> plainType a mode col show asType _ _ c = c readAsMaybe :: (Read a) => String -> Maybe a diff --git a/dataframe-operations/src/DataFrame/Typed/Access.hs b/dataframe-operations/src/DataFrame/Typed/Access.hs index daf71fa..fbfd806 100644 --- a/dataframe-operations/src/DataFrame/Typed/Access.hs +++ b/dataframe-operations/src/DataFrame/Typed/Access.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeAbstractions #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} @@ -28,28 +29,28 @@ import DataFrame.Typed.Types (TypedDataFrame (..)) the schema. The column must exist (enforced at compile time). -} columnAsVector :: - forall name cols a. + forall name -> forall cols a. ( KnownSymbol name , a ~ SafeLookup name cols , Columnable a , AssertPresent name cols ) => TypedDataFrame cols -> V.Vector a -columnAsVector (TDF df) = +columnAsVector name @_ @a (TDF df) = either throw id $ D.columnAsVector (Col @a colName) df where colName = T.pack (symbolVal (Proxy @name)) -- | Retrieve a column as a list, with the type determined by the schema. columnAsList :: - forall name cols a. + forall name -> forall cols a. ( KnownSymbol name , a ~ SafeLookup name cols , Columnable a , AssertPresent name cols ) => TypedDataFrame cols -> [a] -columnAsList (TDF df) = +columnAsList name @_ @a (TDF df) = D.columnAsList (Col @a colName) df where colName = T.pack (symbolVal (Proxy @name)) diff --git a/dataframe-operations/src/DataFrame/Typed/Aggregate.hs b/dataframe-operations/src/DataFrame/Typed/Aggregate.hs index c1df1e1..212fbc5 100644 --- a/dataframe-operations/src/DataFrame/Typed/Aggregate.hs +++ b/dataframe-operations/src/DataFrame/Typed/Aggregate.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} @@ -44,7 +43,7 @@ groupBy :: forall (keys :: [Symbol]) cols. (AllKnownSymbol keys, AssertAllPresent keys cols) => TypedDataFrame cols -> TypedGrouped keys cols -groupBy (TDF df) = TGD (DA.groupBy (symbolVals @keys) df) +groupBy (TDF df) = TGD (DA.groupBy (symbolVals keys) df) {- | Build a named aggregation entry. The result column name is supplied via @TypeApplications@; the underlying expression is validated against the diff --git a/dataframe-operations/src/DataFrame/Typed/Expr.hs b/dataframe-operations/src/DataFrame/Typed/Expr.hs index fae1fdb..4f87139 100644 --- a/dataframe-operations/src/DataFrame/Typed/Expr.hs +++ b/dataframe-operations/src/DataFrame/Typed/Expr.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE DisambiguateRecordFields #-} {-# LANGUAGE FlexibleContexts #-} @@ -7,6 +6,7 @@ {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} @@ -29,15 +29,15 @@ type Schema = '[Column \"age\" Int, Column \"salary\" Double] -- This compiles: goodExpr :: TExpr Schema Double -goodExpr = col \@\"salary\" +goodExpr = col \"salary\" -- This gives a compile-time error (column not found): badExpr :: TExpr Schema Double -badExpr = col \@\"nonexistent\" +badExpr = col \"nonexistent\" -- This gives a compile-time error (type mismatch): wrongType :: TExpr Schema Int -wrongType = col \@\"salary\" -- salary is Double, not Int +wrongType = col \"salary\" -- salary is Double, not Int @ -} module DataFrame.Typed.Expr ( @@ -179,17 +179,17 @@ salary = col \@\"salary\" @ -} col :: - forall (name :: Symbol) cols a. + forall (name :: Symbol) -> forall cols a. ( KnownSymbol name , a ~ SafeLookup name cols , Columnable a , AssertPresent name cols ) => TExpr cols a -col = TExpr (Col (T.pack (symbolVal (Proxy @name)))) +col name = TExpr (Col (T.pack (symbolVal (Proxy @name)))) {- | Use a column name as an @OverloadedLabels@ label: @#age@ is sugar for -@col \@\"age\"@. Enable @OverloadedLabels@ at the use site. +@col \"age\"@. Enable @OverloadedLabels@ at the use site. @ adults = filterWhere (#age .>=. lit 18) people @@ -203,7 +203,7 @@ instance ) => IsLabel name (TExpr cols a) where - fromLabel = col @name + fromLabel = col name {- | Create a literal expression. Valid for any schema since it references no columns. @@ -601,10 +601,10 @@ collect :: (Columnable a) => TExpr cols a -> TExpr cols [a] collect (TExpr e) = TExpr (F.collect e) over :: - forall (names :: [Symbol]) cols a. + forall (names :: [Symbol]) -> forall cols a. (Columnable a, AllKnownSymbol names, AssertAllPresent names cols) => TExpr cols a -> TExpr cols a -over (TExpr e) = TExpr{unTExpr = F.over (symbolVals @names) e} +over names (TExpr e) = TExpr{unTExpr = F.over (symbolVals names) e} ------------------------------------------------------------------------------- -- Cast / coercion expressions diff --git a/dataframe-operations/src/DataFrame/Typed/Join.hs b/dataframe-operations/src/DataFrame/Typed/Join.hs index dfa9210..47803e3 100644 --- a/dataframe-operations/src/DataFrame/Typed/Join.hs +++ b/dataframe-operations/src/DataFrame/Typed/Join.hs @@ -1,8 +1,8 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} @@ -25,48 +25,48 @@ import DataFrame.Typed.Types (TypedDataFrame (..)) -- | Typed inner join on one or more key columns. innerJoin :: - forall (keys :: [Symbol]) left right. + forall (keys :: [Symbol]) -> forall left right. (AllKnownSymbol keys) => TypedDataFrame left -> TypedDataFrame right -> TypedDataFrame (InnerJoinSchema keys left right) -innerJoin (TDF l) (TDF r) = +innerJoin keys (TDF l) (TDF r) = unsafeFreeze (DJ.innerJoin keyNames r l) where - keyNames = symbolVals @keys + keyNames = symbolVals keys -- | Typed left join. leftJoin :: - forall (keys :: [Symbol]) left right. + forall (keys :: [Symbol]) -> forall left right. (AllKnownSymbol keys) => TypedDataFrame left -> TypedDataFrame right -> TypedDataFrame (LeftJoinSchema keys left right) -leftJoin (TDF l) (TDF r) = +leftJoin keys (TDF l) (TDF r) = unsafeFreeze (DJ.leftJoin keyNames l r) where - keyNames = symbolVals @keys + keyNames = symbolVals keys -- | Typed right join. rightJoin :: - forall (keys :: [Symbol]) left right. + forall (keys :: [Symbol]) -> forall left right. (AllKnownSymbol keys) => TypedDataFrame left -> TypedDataFrame right -> TypedDataFrame (RightJoinSchema keys left right) -rightJoin (TDF l) (TDF r) = +rightJoin keys (TDF l) (TDF r) = unsafeFreeze (DJ.rightJoin keyNames l r) where - keyNames = symbolVals @keys + keyNames = symbolVals keys -- | Typed full outer join. fullOuterJoin :: - forall (keys :: [Symbol]) left right. + forall (keys :: [Symbol]) -> forall left right. (AllKnownSymbol keys) => TypedDataFrame left -> TypedDataFrame right -> TypedDataFrame (FullOuterJoinSchema keys left right) -fullOuterJoin (TDF l) (TDF r) = +fullOuterJoin keys (TDF l) (TDF r) = unsafeFreeze (DJ.fullOuterJoin keyNames r l) where - keyNames = symbolVals @keys + keyNames = symbolVals keys diff --git a/dataframe-operations/src/DataFrame/Typed/Operations.hs b/dataframe-operations/src/DataFrame/Typed/Operations.hs index d812aa6..23f17f7 100644 --- a/dataframe-operations/src/DataFrame/Typed/Operations.hs +++ b/dataframe-operations/src/DataFrame/Typed/Operations.hs @@ -1,10 +1,11 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeAbstractions #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} @@ -123,12 +124,12 @@ filterJust \@\"x\" df @ -} filterJust :: - forall name cols. + forall name -> forall cols. ( KnownSymbol name , AssertPresent name cols ) => TypedDataFrame cols -> TypedDataFrame (StripMaybeAt name cols) -filterJust (TDF df) = unsafeFreeze (D.filterJust colName df) +filterJust name (TDF df) = unsafeFreeze (D.filterJust colName df) where colName = T.pack (symbolVal (Proxy @name)) @@ -136,12 +137,12 @@ filterJust (TDF df) = unsafeFreeze (D.filterJust colName df) Schema is preserved (column types unchanged, just fewer rows). -} filterNothing :: - forall name cols. + forall name -> forall cols. ( KnownSymbol name , AssertPresent name cols ) => TypedDataFrame cols -> TypedDataFrame cols -filterNothing (TDF df) = TDF (D.filterNothing colName df) +filterNothing name (TDF df) = TDF (D.filterNothing colName df) where colName = T.pack (symbolVal (Proxy @name)) @@ -206,7 +207,7 @@ df' = derive \@\"total\" (col \@\"price\" * col \@\"qty\") df @ -} derive :: - forall name a cols. + forall name -> forall a cols. ( KnownSymbol name , Columnable a , AssertAbsent name cols @@ -214,12 +215,12 @@ derive :: TExpr cols a -> TypedDataFrame cols -> TypedDataFrame (Snoc cols (T.Column name a)) -derive (TExpr expr) (TDF df) = unsafeFreeze (D.derive colName expr df) +derive name (TExpr expr) (TDF df) = unsafeFreeze (D.derive colName expr df) where colName = T.pack (symbolVal (Proxy @name)) impute :: - forall name a cols. + forall name -> forall a cols. ( KnownSymbol name , Columnable a , Maybe a ~ Lookup name cols @@ -227,105 +228,105 @@ impute :: a -> TypedDataFrame cols -> TypedDataFrame (Impute name cols) -impute value (TDF df) = +impute name @a value (TDF df) = unsafeFreeze - (D.derive colName (DF.fromMaybe value (DF.col @(Maybe a) colName)) df) + (D.derive colName (DF.fromMaybe value (DF.col (Maybe a) colName)) df) where colName = T.pack (symbolVal (Proxy @name)) -- | Select a subset of columns by name. select :: - forall (names :: [Symbol]) cols. + forall (names :: [Symbol]) -> forall cols. (AllKnownSymbol names, AssertAllPresent names cols) => TypedDataFrame cols -> TypedDataFrame (SubsetSchema names cols) -select (TDF df) = unsafeFreeze (D.select (symbolVals @names) df) +select names (TDF df) = unsafeFreeze (D.select (symbolVals names) df) -- | Exclude columns by name. exclude :: - forall (names :: [Symbol]) cols. + forall (names :: [Symbol]) -> forall cols. (AllKnownSymbol names) => TypedDataFrame cols -> TypedDataFrame (ExcludeSchema names cols) -exclude (TDF df) = unsafeFreeze (D.exclude (symbolVals @names) df) +exclude names (TDF df) = unsafeFreeze (D.exclude (symbolVals names) df) -- | Rename a column. rename :: - forall old new cols. + forall old new -> forall cols. (KnownSymbol old, KnownSymbol new) => TypedDataFrame cols -> TypedDataFrame (RenameInSchema old new cols) -rename (TDF df) = unsafeFreeze (D.rename oldName newName df) +rename old new (TDF df) = unsafeFreeze (D.rename oldName newName df) where oldName = T.pack (symbolVal (Proxy @old)) newName = T.pack (symbolVal (Proxy @new)) -- | Rename multiple columns from a type-level list of pairs. renameMany :: - forall (pairs :: [(Symbol, Symbol)]) cols. + forall (pairs :: [(Symbol, Symbol)]) -> forall cols. (AllKnownPairs pairs) => TypedDataFrame cols -> TypedDataFrame (RenameManyInSchema pairs cols) -renameMany (TDF df) = unsafeFreeze (foldRenames (pairVals @pairs) df) +renameMany pairs (TDF df) = unsafeFreeze (foldRenames (pairVals pairs) df) where foldRenames [] df' = df' foldRenames ((old, new) : rest) df' = foldRenames rest (D.rename old new df') -- | Insert a new column from a Foldable container. insert :: - forall name a cols t. + forall name -> forall a cols t. ( KnownSymbol name , Columnable a , Foldable t , AssertAbsent name cols ) => t a -> TypedDataFrame cols -> TypedDataFrame (T.Column name a ': cols) -insert xs (TDF df) = unsafeFreeze (D.insert colName xs df) +insert name xs (TDF df) = unsafeFreeze (D.insert colName xs df) where colName = T.pack (symbolVal (Proxy @name)) -- | Insert a raw 'Column' value. insertColumn :: - forall name a cols. + forall name -> forall a cols. ( KnownSymbol name , Columnable a , AssertAbsent name cols ) => C.Column -> TypedDataFrame cols -> TypedDataFrame (T.Column name a ': cols) -insertColumn col (TDF df) = unsafeFreeze (D.insertColumn colName col df) +insertColumn name col (TDF df) = unsafeFreeze (D.insertColumn colName col df) where colName = T.pack (symbolVal (Proxy @name)) -- | Insert a boxed 'Vector'. insertVector :: - forall name a cols. + forall name -> forall a cols. ( KnownSymbol name , Columnable a , AssertAbsent name cols ) => V.Vector a -> TypedDataFrame cols -> TypedDataFrame (T.Column name a ': cols) -insertVector vec (TDF df) = unsafeFreeze (D.insertVector colName vec df) +insertVector name vec (TDF df) = unsafeFreeze (D.insertVector colName vec df) where colName = T.pack (symbolVal (Proxy @name)) -- | Clone an existing column under a new name. cloneColumn :: - forall old new cols. + forall old new -> forall cols. ( KnownSymbol old , KnownSymbol new , AssertPresent old cols , AssertAbsent new cols ) => TypedDataFrame cols -> TypedDataFrame (T.Column new (Lookup old cols) ': cols) -cloneColumn (TDF df) = unsafeFreeze (D.cloneColumn oldName newName df) +cloneColumn old new (TDF df) = unsafeFreeze (D.cloneColumn oldName newName df) where oldName = T.pack (symbolVal (Proxy @old)) newName = T.pack (symbolVal (Proxy @new)) -- | Drop a column by name. dropColumn :: - forall name cols. + forall name -> forall cols. ( KnownSymbol name , AssertPresent name cols ) => TypedDataFrame cols -> TypedDataFrame (RemoveColumn name cols) -dropColumn (TDF df) = unsafeFreeze (D.exclude [colName] df) +dropColumn name (TDF df) = unsafeFreeze (D.exclude [colName] df) where colName = T.pack (symbolVal (Proxy @name)) @@ -333,14 +334,14 @@ dropColumn (TDF df) = unsafeFreeze (D.exclude [colName] df) The column must already exist and the new type must match. -} replaceColumn :: - forall name a cols. + forall name -> forall a cols. ( KnownSymbol name , Columnable a , a ~ SafeLookup name cols , AssertPresent name cols ) => TExpr cols a -> TypedDataFrame cols -> TypedDataFrame cols -replaceColumn (TExpr expr) (TDF df) = unsafeFreeze (D.derive colName expr df) +replaceColumn name (TExpr expr) (TDF df) = unsafeFreeze (D.derive colName expr df) where colName = T.pack (symbolVal (Proxy @name)) @@ -396,17 +397,17 @@ columnNames (TDF df) = D.columnNames df -- | Helper class for extracting [(Text, Text)] from type-level pairs. class AllKnownPairs (pairs :: [(Symbol, Symbol)]) where - pairVals :: [(T.Text, T.Text)] + pairVals :: forall pairs' -> (pairs ~ pairs') => [(T.Text, T.Text)] instance AllKnownPairs '[] where - pairVals = [] + pairVals _ = [] instance (KnownSymbol a, KnownSymbol b, AllKnownPairs rest) => AllKnownPairs ('(a, b) ': rest) where - pairVals = + pairVals _ = ( T.pack (symbolVal (Proxy @a)) , T.pack (symbolVal (Proxy @b)) ) - : pairVals @rest + : pairVals rest diff --git a/dataframe-parquet/dataframe-parquet.cabal b/dataframe-parquet/dataframe-parquet.cabal index 133a75c..bb13dce 100644 --- a/dataframe-parquet/dataframe-parquet.cabal +++ b/dataframe-parquet/dataframe-parquet.cabal @@ -58,7 +58,7 @@ library pinch >= 0.5 && < 1, snappy-hs ^>= 0.1, streamly-bytestring >= 0.2.0 && < 0.4, - streamly-core >= 0.2.3 && < 0.4, + streamly-core >= 0.3.0 && < 0.4, text >= 2.0 && < 3, time >= 1.12 && < 2, vector ^>= 0.13, diff --git a/dataframe-parsing/src/DataFrame/Internal/Schema.hs b/dataframe-parsing/src/DataFrame/Internal/Schema.hs index d48f3ec..6572f4b 100644 --- a/dataframe-parsing/src/DataFrame/Internal/Schema.hs +++ b/dataframe-parsing/src/DataFrame/Internal/Schema.hs @@ -2,6 +2,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE InstanceSigs #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} @@ -35,7 +36,7 @@ data SchemaType where ==== __Examples__ >>> :set -XTypeApplications ->>> show (schemaType @Bool) +>>> show (schemaType Bool) "Bool" -} instance Show SchemaType where @@ -46,10 +47,10 @@ instance Show SchemaType where ==== __Examples__ >>> :set -XTypeApplications ->>> schemaType @Int == schemaType @Int +>>> schemaType Int == schemaType Int True ->>> schemaType @Int == schemaType @Integer +>>> schemaType Int == schemaType Integer False -} instance Eq SchemaType where @@ -61,14 +62,14 @@ instance Eq SchemaType where ==== __Examples__ >>> :set -XTypeApplications ->>> schemaType @T.Text == schemaType @T.Text +>>> schemaType T.Text == schemaType T.Text True ->>> show (schemaType @Double) +>>> show (schemaType Double) "Double" -} -schemaType :: forall a. (Columnable a, Read a) => SchemaType -schemaType = SType (P.Proxy @a) +schemaType :: forall a -> (Columnable a, Read a) => SchemaType +schemaType a = SType (P.Proxy @a) {- | Logical schema of a 'DataFrame': a mapping from column names to their element types ('SchemaType'). diff --git a/dataframe-persistent/README.md b/dataframe-persistent/README.md index a1e6d34..5c477b1 100644 --- a/dataframe-persistent/README.md +++ b/dataframe-persistent/README.md @@ -26,7 +26,7 @@ write a `persistent` entity, a `persistLowerCase` block, or any instances. | Tier | You write | You get | |------|-----------|---------| | **Runtime** | `readTable db "artists"` | a `DataFrame`, types inferred from the schema | -| **Typed** | `$(declareTable db "artists")` + `readTableTyped @Schema` | a compile-time schema type; columns checked by `col @"Name"` | +| **Typed** | `$(declareTable db "artists")` + `readTableTyped Schema` | a compile-time schema type; columns checked by `col "Name"` | | **Persistent** | `$(declareEntity db "artists")` | a full `persistent` entity: typed `Filter` DSL, write-back | ## Tier 0: runtime reads @@ -114,7 +114,7 @@ D.toMarkdown' <$> readTableWith "./data/chinook.db" "artists" (allRows & limit 3 `declareTable` reads the schema at compile time and emits just the schema type. You read into it with `readTableTyped`, where the schema is a type argument and the database and table are ordinary -values. Column references go through `col @"Name"`, checked against the schema, so a typo or a wrong +values. Column references go through `col "Name"`, checked against the schema, so a typo or a wrong type is a compile error. Nothing is keyed on a generated function name. ```haskell @@ -134,11 +134,11 @@ $(declareTable "./data/chinook.db" "artists") > -`readTableTyped @ArtistsSchema` reads any database/table into a `TypedDataFrame ArtistsSchema` (it +`readTableTyped ArtistsSchema` reads any database/table into a `TypedDataFrame ArtistsSchema` (it validates the schema as it reads). You can bind your own reader: ```haskell -artists = readTableTyped @ArtistsSchema "./data/chinook.db" "artists" +artists = readTableTyped ArtistsSchema "./data/chinook.db" "artists" ``` > @@ -158,11 +158,11 @@ D.toMarkdown' . D.take 5 . DT.thaw <$> artists > | 4 | Just "Alanis Morissette" | > | 5 | Just "Alice In Chains" | -Column access is checked against the schema. `col @"Name"` only compiles because `"Name"` is a +Column access is checked against the schema. `col "Name"` only compiles because `"Name"` is a column of `ArtistsSchema` (its element type is `Maybe Text`): ```haskell -DT.columnAsList @"Name" . DT.take 3 <$> artists +DT.columnAsList "Name" . DT.take 3 <$> artists ``` > @@ -172,7 +172,7 @@ A filter on a column that doesn't exist (or has the wrong type) is a compile err runtime surprise: ```haskell -D.toMarkdown' . DT.thaw . DT.filterWhere (DT.col @"Name" .==. DT.lit (Just "Accept")) <$> artists +D.toMarkdown' . DT.thaw . DT.filterWhere (DT.col "Name" .==. DT.lit (Just "Accept")) <$> artists ``` > @@ -181,15 +181,15 @@ D.toMarkdown' . DT.thaw . DT.filterWhere (DT.col @"Name" .==. DT.lit (Just "Acce > | 2 | Just "Accept" | Because the database is a value, reading the same table from two sources to join them is just two -calls with the same `@ArtistsSchema`: +calls with the same `ArtistsSchema`: ```text -a <- readTableTyped @ArtistsSchema "europe.sqlite" "artists" -b <- readTableTyped @ArtistsSchema "us.sqlite" "artists" +a <- readTableTyped ArtistsSchema "europe.sqlite" "artists" +b <- readTableTyped ArtistsSchema "us.sqlite" "artists" -- DT.thaw a / DT.thaw b, then DataFrame.innerJoin on "ArtistId", etc. ``` -(`readSqlTyped @cols db "SELECT ... JOIN ..."` does the same for an arbitrary query. The Postgres +(`readSqlTyped cols db "SELECT ... JOIN ..."` does the same for an arbitrary query. The Postgres section below reads this same `ArtistsSchema` from a different backend.) ## Tier 2: generate a `persistent` entity diff --git a/dataframe-persistent/docs/base_scripts/base_readme.md b/dataframe-persistent/docs/base_scripts/base_readme.md index bc85706..6ddcec8 100644 --- a/dataframe-persistent/docs/base_scripts/base_readme.md +++ b/dataframe-persistent/docs/base_scripts/base_readme.md @@ -26,7 +26,7 @@ write a `persistent` entity, a `persistLowerCase` block, or any instances. | Tier | You write | You get | |------|-----------|---------| | **Runtime** | `readTable db "artists"` | a `DataFrame`, types inferred from the schema | -| **Typed** | `$(declareTable db "artists")` + `readTableTyped @Schema` | a compile-time schema type; columns checked by `col @"Name"` | +| **Typed** | `$(declareTable db "artists")` + `readTableTyped @Schema` | a compile-time schema type; columns checked by `col "Name"` | | **Persistent** | `$(declareEntity db "artists")` | a full `persistent` entity: typed `Filter` DSL, write-back | ## Tier 0: runtime reads @@ -78,7 +78,7 @@ D.toMarkdown' <$> readTableWith "./data/chinook.db" "artists" (allRows & limit 3 `declareTable` reads the schema at compile time and emits just the schema type. You read into it with `readTableTyped`, where the schema is a type argument and the database and table are ordinary -values. Column references go through `col @"Name"`, checked against the schema, so a typo or a wrong +values. Column references go through `col "Name"`, checked against the schema, so a typo or a wrong type is a compile error. Nothing is keyed on a generated function name. ```haskell @@ -107,18 +107,18 @@ artists = readTableTyped @ArtistsSchema "./data/chinook.db" "artists" D.toMarkdown' . D.take 5 . DT.thaw <$> artists ``` -Column access is checked against the schema. `col @"Name"` only compiles because `"Name"` is a +Column access is checked against the schema. `col "Name"` only compiles because `"Name"` is a column of `ArtistsSchema` (its element type is `Maybe Text`): ```haskell -DT.columnAsList @"Name" . DT.take 3 <$> artists +DT.columnAsList "Name" . DT.take 3 <$> artists ``` A filter on a column that doesn't exist (or has the wrong type) is a compile error rather than a runtime surprise: ```haskell -D.toMarkdown' . DT.thaw . DT.filterWhere (DT.col @"Name" .==. DT.lit (Just "Accept")) <$> artists +D.toMarkdown' . DT.thaw . DT.filterWhere (DT.col "Name" .==. DT.lit (Just "Accept")) <$> artists ``` Because the database is a value, reading the same table from two sources to join them is just two diff --git a/dataframe-persistent/src/DataFrame/IO/Persistent.hs b/dataframe-persistent/src/DataFrame/IO/Persistent.hs index 12d50d2..700898c 100644 --- a/dataframe-persistent/src/DataFrame/IO/Persistent.hs +++ b/dataframe-persistent/src/DataFrame/IO/Persistent.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE ExistentialQuantification #-} diff --git a/dataframe-persistent/src/DataFrame/IO/Persistent/Read.hs b/dataframe-persistent/src/DataFrame/IO/Persistent/Read.hs index aeca828..653cd01 100644 --- a/dataframe-persistent/src/DataFrame/IO/Persistent/Read.hs +++ b/dataframe-persistent/src/DataFrame/IO/Persistent/Read.hs @@ -1,6 +1,6 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} @@ -233,15 +233,15 @@ selectToDataFrame filters opts = entitiesToDataFrame :: forall r. (PersistEntity r) => [Entity r] -> DataFrame entitiesToDataFrame ents = - assembleInferred ("id" : entityFieldNames @r) (map entityRow ents) + assembleInferred ("id" : entityFieldNames r) (map entityRow ents) -- @keyToValues@ decodes any key (single or backend) without a @ToBackendKey@ -- constraint; single-column keys (the common case) yield the @id@ column. entityRow :: (PersistEntity r) => Entity r -> [PersistValue] entityRow (Entity k v) = keyToValues k ++ toPersistFields v -entityFieldNames :: forall r. (PersistEntity r) => [Text] -entityFieldNames = +entityFieldNames :: forall r -> (PersistEntity r) => [Text] +entityFieldNames r = map (unFieldNameHS . fieldHaskell) (getEntityFields (entityDef (Nothing :: Maybe r))) diff --git a/dataframe-persistent/src/DataFrame/IO/Persistent/Read/Columns.hs b/dataframe-persistent/src/DataFrame/IO/Persistent/Read/Columns.hs index 3740373..69b7853 100644 --- a/dataframe-persistent/src/DataFrame/IO/Persistent/Read/Columns.hs +++ b/dataframe-persistent/src/DataFrame/IO/Persistent/Read/Columns.hs @@ -1,6 +1,6 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} -- 'buildNullableColumn' deliberately constrains @Columnable (Maybe a)@ (needed to @@ -44,16 +44,16 @@ import DataFrame.Typed.Types (TypedDataFrame) type ColumnReader = [PersistValue] -> Column -- | Build a non-null column, decoding each value with 'fromPersistValue'. -buildColumn :: forall a. (Columnable a, PersistField a) => ColumnReader -buildColumn pvs = case traverse fromPersistValue pvs of - Left err -> error (decodeError (typeName @a) err) +buildColumn :: forall a -> (Columnable a, PersistField a) => ColumnReader +buildColumn a pvs = case traverse fromPersistValue pvs of + Left err -> error (decodeError (typeName a) err) Right (xs :: [a]) -> fromList xs -- | Build a nullable column (@PersistNull@ becomes @Nothing@). buildNullableColumn :: - forall a. (Columnable (Maybe a), PersistField a) => ColumnReader -buildNullableColumn pvs = case traverse fromPersistValue pvs of - Left err -> error (decodeError (typeName @(Maybe a)) err) + forall a -> (Columnable (Maybe a), PersistField a) => ColumnReader +buildNullableColumn a pvs = case traverse fromPersistValue pvs of + Left err -> error (decodeError (typeName (Maybe a)) err) Right (xs :: [Maybe a]) -> fromList xs {- | Infer a column's type by sniffing the first non-null value; nullable if any @@ -67,24 +67,24 @@ columnReaderFor ht True = nullableReaderFor ht columnReaderFor ht False = readerFor ht readerFor :: HaskellType -> ColumnReader -readerFor HTInt = buildColumn @Int -readerFor HTDouble = buildColumn @Double -readerFor HTText = buildColumn @Text -readerFor HTBool = buildColumn @Bool -readerFor HTByteString = buildColumn @ByteString -readerFor HTDay = buildColumn @Day -readerFor HTUTCTime = buildColumn @UTCTime -readerFor HTTimeOfDay = buildColumn @TimeOfDay +readerFor HTInt = buildColumn Int +readerFor HTDouble = buildColumn Double +readerFor HTText = buildColumn Text +readerFor HTBool = buildColumn Bool +readerFor HTByteString = buildColumn ByteString +readerFor HTDay = buildColumn Day +readerFor HTUTCTime = buildColumn UTCTime +readerFor HTTimeOfDay = buildColumn TimeOfDay nullableReaderFor :: HaskellType -> ColumnReader -nullableReaderFor HTInt = buildNullableColumn @Int -nullableReaderFor HTDouble = buildNullableColumn @Double -nullableReaderFor HTText = buildNullableColumn @Text -nullableReaderFor HTBool = buildNullableColumn @Bool -nullableReaderFor HTByteString = buildNullableColumn @ByteString -nullableReaderFor HTDay = buildNullableColumn @Day -nullableReaderFor HTUTCTime = buildNullableColumn @UTCTime -nullableReaderFor HTTimeOfDay = buildNullableColumn @TimeOfDay +nullableReaderFor HTInt = buildNullableColumn Int +nullableReaderFor HTDouble = buildNullableColumn Double +nullableReaderFor HTText = buildNullableColumn Text +nullableReaderFor HTBool = buildNullableColumn Bool +nullableReaderFor HTByteString = buildNullableColumn ByteString +nullableReaderFor HTDay = buildNullableColumn Day +nullableReaderFor HTUTCTime = buildNullableColumn UTCTime +nullableReaderFor HTTimeOfDay = buildNullableColumn TimeOfDay inferHaskellType :: [PersistValue] -> HaskellType inferHaskellType = maybe HTText haskellTypeOf . find (not . isNull) @@ -103,8 +103,8 @@ isNull :: PersistValue -> Bool isNull PersistNull = True isNull _ = False -typeName :: forall a. (Columnable a) => String -typeName = show (typeRep (Proxy @a)) +typeName :: forall a -> (Columnable a) => String +typeName a = show (typeRep (Proxy @a)) decodeError :: String -> Text -> String decodeError ty err = "buildColumn: failed to decode column as " <> ty <> ": " <> T.unpack err diff --git a/dataframe-persistent/src/DataFrame/IO/Persistent/TH.hs b/dataframe-persistent/src/DataFrame/IO/Persistent/TH.hs index 3b7dc56..be1e599 100644 --- a/dataframe-persistent/src/DataFrame/IO/Persistent/TH.hs +++ b/dataframe-persistent/src/DataFrame/IO/Persistent/TH.hs @@ -70,7 +70,7 @@ deriveEntityToDataFrame entityName = do trace (nm <> " :: Expr " <> show ty) pure () let n = mkName nm sig <- sigD n [t|Expr $(pure ty)|] - val <- valD (varP n) (normalB [|col $(lift colName)|]) [] + val <- valD (varP n) (normalB [|col $(pure $ TypeE ty) $(lift colName)|]) [] pure [sig, val] return (instanceDec : concat dataframeExprs) diff --git a/dataframe-persistent/tests/PersistentTests.hs b/dataframe-persistent/tests/PersistentTests.hs index 31dda3c..6343637 100644 --- a/dataframe-persistent/tests/PersistentTests.hs +++ b/dataframe-persistent/tests/PersistentTests.hs @@ -197,7 +197,7 @@ testDataFrameOperations = TestCase $ withTestDb $ do assertEqual "Young users count" 2 (nRows youngUsers) -- Sort operation - let sorted = DF.sortBy [DF.Asc (F.col @Int "age")] df + let sorted = DF.sortBy [DF.Asc (F.col Int "age")] df let ages = DF.columnAsList test_user_age sorted assertEqual "Ages sorted" [25, 28, 30, 35] ages diff --git a/dataframe-persistent/tests/SchemaTHTests.hs b/dataframe-persistent/tests/SchemaTHTests.hs index cc106d6..eab7805 100644 --- a/dataframe-persistent/tests/SchemaTHTests.hs +++ b/dataframe-persistent/tests/SchemaTHTests.hs @@ -88,7 +88,7 @@ testDeclareTableTyped = TestCase $ do tdf <- readTableTyped @ArtistsSchema "./data/chinook.db" "artists" assertEqual "typed row count" 275 (nRows (DT.thaw tdf)) -- @col \@"Name"@ here is checked against the generated ArtistsSchema: - let names = DT.columnAsList @"Name" tdf + let names = DT.columnAsList "Name" tdf assertEqual "first artist" (Just (Just "AC/DC")) (listToMaybe names) -- Tier 2: persistent entity generation ------------------------------------- diff --git a/dataframe-th/src/DataFrame/Internal/Schema/TH.hs b/dataframe-th/src/DataFrame/Internal/Schema/TH.hs index 0a5740e..0f89c11 100644 --- a/dataframe-th/src/DataFrame/Internal/Schema/TH.hs +++ b/dataframe-th/src/DataFrame/Internal/Schema/TH.hs @@ -52,7 +52,7 @@ deriveSchema tyName = do tupleE (colName, _, fTy) = TupE [ Just (AppE (VarE 'T.pack) (LitE (StringL colName))) - , Just (AppTypeE (VarE 'schemaType) fTy) + , Just (AppE (VarE 'schemaType) (TypeE fTy)) ] schemaBody = AppE (VarE 'makeSchema) (ListE (map tupleE entries)) @@ -67,7 +67,10 @@ deriveSchema tyName = do (VarP accName) ( NormalB ( AppE - (VarE 'col) + ( AppE + (VarE 'col) + (TypeE fTy) + ) ( AppE (VarE 'T.pack) (LitE (StringL colName)) diff --git a/dataframe-th/src/DataFrame/TH/Records.hs b/dataframe-th/src/DataFrame/TH/Records.hs index 051b292..7ae57bb 100644 --- a/dataframe-th/src/DataFrame/TH/Records.hs +++ b/dataframe-th/src/DataFrame/TH/Records.hs @@ -108,5 +108,5 @@ declareColumnsWithPrefix' prefix df = ty <- typeFromString (words tyStr) let n = mkName (T.unpack nm) sig <- sigD n [t|Expr $(pure ty)|] - val <- valD (varP n) (normalB [|col $(TH.lift raw)|]) [] + val <- valD (varP n) (normalB [| col $(pure $ TypeE ty) $(TH.lift raw) |]) [] pure [sig, val] diff --git a/dataframe-viz/README.md b/dataframe-viz/README.md index b23092d..f8417bf 100644 --- a/dataframe-viz/README.md +++ b/dataframe-viz/README.md @@ -64,9 +64,9 @@ df = fromNamedColumns , ("region", fromList (["INLAND","NEAR BAY","INLAND","NEAR OCEAN","ISLAND","INLAND","NEAR BAY","INLAND"] :: [Text])) ] -income = F.col @Double "income" -value = F.col @Double "value" -region = F.col @Text "region" +income = F.col Double "income" +value = F.col Double "value" +region = F.col Text "region" -- show a Vega-Lite spec without its (verbose) inlined data. -- Returns String so scripths prints it raw rather than show-escaped. diff --git a/dataframe-viz/docs/base_scripts/base_readme.md b/dataframe-viz/docs/base_scripts/base_readme.md index 77429a9..2b56d70 100644 --- a/dataframe-viz/docs/base_scripts/base_readme.md +++ b/dataframe-viz/docs/base_scripts/base_readme.md @@ -64,9 +64,9 @@ df = fromNamedColumns , ("region", fromList (["INLAND","NEAR BAY","INLAND","NEAR OCEAN","ISLAND","INLAND","NEAR BAY","INLAND"] :: [Text])) ] -income = F.col @Double "income" -value = F.col @Double "value" -region = F.col @Text "region" +income = F.col Double "income" +value = F.col Double "value" +region = F.col Text "region" -- show a Vega-Lite spec without its (verbose) inlined data. -- Returns String so scripths prints it raw rather than show-escaped. diff --git a/dataframe-viz/src/DataFrame/Display/Internal/Common.hs b/dataframe-viz/src/DataFrame/Display/Internal/Common.hs index 7a167a6..cef3b5c 100644 --- a/dataframe-viz/src/DataFrame/Display/Internal/Common.hs +++ b/dataframe-viz/src/DataFrame/Display/Internal/Common.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} @@ -175,9 +174,9 @@ vectorToDoubles :: forall a. (Columnable a, Show a) => V.Vector a -> [Double] vectorToDoubles vec = case testEquality (typeRep @a) (typeRep @Double) of Just Refl -> V.toList vec - Nothing -> case sIntegral @a of + Nothing -> case sIntegral a of STrue -> V.toList $ V.map fromIntegral vec - SFalse -> case sFloating @a of + SFalse -> case sFloating a of STrue -> V.toList $ V.map realToFrac vec SFalse -> error $ "Column is not numeric (type: " ++ show (typeRep @a) ++ ")" @@ -187,9 +186,9 @@ unboxedVectorToDoubles :: unboxedVectorToDoubles vec = case testEquality (typeRep @a) (typeRep @Double) of Just Refl -> VU.toList vec - Nothing -> case sIntegral @a of + Nothing -> case sIntegral a of STrue -> VU.toList $ VU.map fromIntegral vec - SFalse -> case sFloating @a of + SFalse -> case sFloating a of STrue -> VU.toList $ VU.map realToFrac vec SFalse -> error $ "Column is not numeric (type: " ++ show (typeRep @a) ++ ")" diff --git a/dataframe-viz/src/DataFrame/Display/Internal/VegaLite.hs b/dataframe-viz/src/DataFrame/Display/Internal/VegaLite.hs index 4280f52..22941cf 100644 --- a/dataframe-viz/src/DataFrame/Display/Internal/VegaLite.hs +++ b/dataframe-viz/src/DataFrame/Display/Internal/VegaLite.hs @@ -1,9 +1,9 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE PolyKinds #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -164,8 +164,8 @@ emptySpec m = VLSpec m [] [] Nothing 600 400 [] 'Quantitative', date/time → 'Temporal', everything else → 'Nominal'. @Maybe a@ classifies as its inner type. -} -fieldTypeOf :: forall a. (Columnable a) => FieldType -fieldTypeOf = classify (typeRep @a) +fieldTypeOf :: forall a -> (Columnable a) => FieldType +fieldTypeOf a = classify (typeRep @a) classify :: forall k (x :: k). TypeRep x -> FieldType classify tr @@ -214,7 +214,7 @@ stored under the given fallback name. resolveField :: forall a. (Columnable a) => DataFrame -> T.Text -> Expr a -> ResolvedField resolveField df fallbackName expr = - let ft = fieldTypeOf @a + let ft = fieldTypeOf a (name, col) = case expr of Col cname -> (cname, lookupCol cname) _ -> (fallbackName, materialiseExpr df expr) diff --git a/docs/base/haskell_for_data_analysis.md b/docs/base/haskell_for_data_analysis.md index 8ddc9b0..bdd155b 100644 --- a/docs/base/haskell_for_data_analysis.md +++ b/docs/base/haskell_for_data_analysis.md @@ -99,11 +99,11 @@ The type of `D.take` is `Int -> DataFrame -> DataFrame` — an integer in, a dat ### Opting into stronger type safety -`F.col @Type "colName"` tells the compiler what type to expect in a column. If you get the type wrong you hear about it immediately. +`F.col Type "colName"` tells the compiler what type to expect in a column. If you get the type wrong you hear about it immediately. ```haskell -- Runtime-checked (flexible): -D.mean (F.col @Double "High Temperature (C)") weather +D.mean (F.col Double "High Temperature (C)") weather -- Compile-time-checked — $(D.declareColumns …) generates typed bindings: $(D.declareColumns weather) @@ -165,9 +165,9 @@ Passing a character instead of an integer to `take`: Passing a `Double` column expression where an `Int` is expected: ```haskell --- D.mean (F.col @Int "longitude") housing +-- D.mean (F.col Int "longitude") housing -- error: Couldn't match type 'Double' with 'Int' for column "longitude" --- Fix: D.mean (F.col @Double "longitude") housing +-- Fix: D.mean (F.col Double "longitude") housing ``` Errors tell you the exact location and what went wrong. Over time you learn to read them as precise hints. @@ -214,13 +214,13 @@ The companions `filterNothing` and `filterAllNothing` do the opposite — they l ```haskell TIO.putStrLn $ D.toMarkdown - (D.impute (F.col @(Maybe Int) "id") 0 messy) + (D.impute (F.col (Maybe Int) "id") 0 messy) ``` Notice the `@(Maybe Int)` type annotation — it tells the imputer what type the column holds. Passing the wrong type throws a clear runtime error: ```haskell --- D.impute (F.col @(Maybe Double) "id") 0 messy +-- D.impute (F.col (Maybe Double) "id") 0 messy -- Exception: Type Mismatch — expected 'Maybe Double' but column is 'Maybe Integer' ``` @@ -230,7 +230,7 @@ Notice the `@(Maybe Int)` type annotation — it tells the imputer what type the ```haskell TIO.putStrLn $ D.toMarkdown $ D.take 10 - (D.imputeWith F.mean (F.col @(Maybe Double) "total_bedrooms") housing) + (D.imputeWith F.mean (F.col (Maybe Double) "total_bedrooms") housing) ``` ### Removing duplicates @@ -282,23 +282,23 @@ TIO.putStrLn $ D.toMarkdown meat ```haskell TIO.putStrLn $ D.toMarkdown - (D.derive "kilograms" (F.col @Double "ounces" * 0.03) meat) + (D.derive "kilograms" (F.col Double "ounces" * 0.03) meat) ``` ### Expressions and F.col -`F.col @Type "name"` creates an *expression* — a typed reference to a column that can be combined with arithmetic operators, boolean operators, or custom functions. +`F.col Type "name"` creates an *expression* — a typed reference to a column that can be combined with arithmetic operators, boolean operators, or custom functions. ``` -F.col @Double "ounces" -- :: Expr Double -F.col @Text "food" -- :: Expr Text +F.col Double "ounces" -- :: Expr Double +F.col Text "food" -- :: Expr Text ``` You can compose expressions: ```haskell roomsPerHousehold = D.derive "rooms_per_household" - (F.col @Double "total_rooms" / F.col @Double "households") + (F.col Double "total_rooms" / F.col Double "households") housing TIO.putStrLn $ D.toMarkdown (D.take 5 roomsPerHousehold) @@ -321,7 +321,7 @@ meatToAnimal "nova lox" = "salmon" meatToAnimal _ = "unknown" TIO.putStrLn $ D.toMarkdown - (D.derive "animal" (F.lift meatToAnimal (F.col @Text "food")) meat) + (D.derive "animal" (F.lift meatToAnimal (F.col Text "food")) meat) ``` ### Recoding values @@ -332,7 +332,7 @@ TIO.putStrLn $ D.toMarkdown animalMapping = [("bacon","pig"),("pulled pork","pig"),("pastrami","cow"),("corned beef","cow"),("honey ham","pig"),("nova lox","salmon")] TIO.putStrLn $ D.toMarkdown - (D.derive "animal2" (F.recode animalMapping (F.col @Text "food")) meat) + (D.derive "animal2" (F.recode animalMapping (F.col Text "food")) meat) ``` ### Opting into stronger type safety @@ -502,7 +502,7 @@ TIO.putStrLn $ D.toMarkdown (D.take 5 withIncomeCast) All of the above reads the entire file into memory upfront. For datasets larger than available RAM, `DataFrame.Lazy` offers a pull-based streaming executor: operations build a logical plan tree and nothing is read from disk until `runDataFrame` is called. The optimizer pushes `filter` predicates down to the scan, so unneeded rows are discarded before any column is allocated. -The Lazy path requires an explicit schema — there is no inference. Build one with `schemaType @T`: +The Lazy path requires an explicit schema — there is no inference. Build one with `schemaType T`: ```haskell -- cabal: build-depends: containers @@ -511,16 +511,16 @@ import DataFrame.Internal.Schema (Schema (..), schemaType) import qualified Data.Map.Strict as M housingSchema = Schema $ M.fromList - [ ("longitude", schemaType @Double) - , ("latitude", schemaType @Double) - , ("housing_median_age", schemaType @Double) - , ("total_rooms", schemaType @Double) - , ("total_bedrooms", schemaType @(Maybe Double)) - , ("population", schemaType @Double) - , ("households", schemaType @Double) - , ("median_income", schemaType @Double) - , ("median_house_value", schemaType @Double) - , ("ocean_proximity", schemaType @T.Text) + [ ("longitude", schemaType Double) + , ("latitude", schemaType Double) + , ("housing_median_age", schemaType Double) + , ("total_rooms", schemaType Double) + , ("total_bedrooms", schemaType (Maybe Double)) + , ("population", schemaType Double) + , ("households", schemaType Double) + , ("median_income", schemaType Double) + , ("median_house_value", schemaType Double) + , ("ocean_proximity", schemaType T.Text) ] ``` @@ -529,7 +529,7 @@ Build and run a lazy pipeline: ```haskell lazyQuery = L.scanCsv housingSchema "../data/housing.csv" - |> L.filter (F.col @Double "median_house_value" .>=. 300000) + |> L.filter (F.col Double "median_house_value" .>=. 300000) |> L.select ["longitude","latitude","median_house_value","ocean_proximity"] |> L.take 10 @@ -611,7 +611,7 @@ Three functions manipulate `Text` columns inside expressions. emails = D.fromNamedColumns [ ("email", D.fromList ["alice@example.com", "bob@haskell.org", "cara@data.io" :: T.Text]) ] -emailParts = D.derive "parts" (F.splitOn "@" (F.col @T.Text "email")) emails +emailParts = D.derive "parts" (F.splitOn "@" (F.col T.Text "email")) emails TIO.putStrLn $ D.toMarkdown emailParts ``` @@ -621,7 +621,7 @@ TIO.putStrLn $ D.toMarkdown emailParts `F.match pattern expr` returns `Just` the first regex match, or `Nothing` if there is none: ```haskell -domains = D.derive "domain" (F.match "[a-z]+\\.[a-z]+" (F.col @T.Text "email")) emails +domains = D.derive "domain" (F.match "[a-z]+\\.[a-z]+" (F.col T.Text "email")) emails TIO.putStrLn $ D.toMarkdown domains ``` @@ -631,7 +631,7 @@ TIO.putStrLn $ D.toMarkdown domains `F.matchAll pattern expr` returns a list of *all* matches: ```haskell -withWords = D.derive "words" (F.matchAll "[a-z]+" (F.col @T.Text "email")) emails +withWords = D.derive "words" (F.matchAll "[a-z]+" (F.col T.Text "email")) emails TIO.putStrLn $ D.toMarkdown withWords ``` @@ -650,7 +650,7 @@ events = D.fromNamedColumns , ("date_text", D.fromList ["2025-03-01","2025-06-15","2025-09-30" :: T.Text]) ] -withDates = D.derive "date" (F.parseDate @Day "%Y-%m-%d" (F.col @T.Text "date_text")) events +withDates = D.derive "date" (F.parseDate @Day "%Y-%m-%d" (F.col T.Text "date_text")) events TIO.putStrLn $ D.toMarkdown withDates ``` @@ -849,7 +849,7 @@ Histograms show the distribution of a single numeric variable. ```haskell import qualified DataFrame.Display.Terminal.Plot as P -houseValues = D.columnAsList (F.col @Double "median_house_value") housing +houseValues = D.columnAsList (F.col Double "median_house_value") housing TIO.putStrLn $ histogram @@ -875,8 +875,8 @@ P.plotHistogram "median_house_value" housing Scatter plots reveal relationships between two numeric variables. ```haskell -incomes = D.columnAsList (F.col @Double "median_income") housing -values = D.columnAsList (F.col @Double "median_house_value") housing +incomes = D.columnAsList (F.col Double "median_income") housing +values = D.columnAsList (F.col Double "median_house_value") housing TIO.putStrLn $ scatter @@ -915,7 +915,7 @@ P.plotBoxPlots ["median_house_value", "median_income"] housing Line graphs show trends along a continuous x-axis. The second argument is a list of y-axis column names. ```haskell -ages = D.columnAsList (F.col @Double "housing_median_age") housing +ages = D.columnAsList (F.col Double "housing_median_age") housing TIO.putStrLn $ lineGraph @@ -937,10 +937,10 @@ P.plotCorrelationMatrix housing ### Opting into stronger type safety -Using `F.col @Double` in `D.columnAsList` ensures only numeric columns reach the plotting functions. Passing a `Text` column to a histogram is a compile error: +Using `F.col Double` in `D.columnAsList` ensures only numeric columns reach the plotting functions. Passing a `Text` column to a histogram is a compile error: ```haskell --- D.columnAsList (F.col @Double "ocean_proximity") housing +-- D.columnAsList (F.col Double "ocean_proximity") housing -- error: column "ocean_proximity" has type Text, not Double ``` @@ -960,11 +960,11 @@ import DataFrame.Operators (as) grouped = D.groupBy ["ocean_proximity"] housing summary = D.aggregate - [ F.count (F.col @Double "median_house_value") `as` "count" - , F.mean (F.col @Double "median_house_value") `as` "mean_value" - , F.median (F.col @Double "median_house_value") `as` "median_value" - , F.maximum (F.col @Double "median_house_value") `as` "max_value" - , F.minimum (F.col @Double "median_house_value") `as` "min_value" + [ F.count (F.col Double "median_house_value") `as` "count" + , F.mean (F.col Double "median_house_value") `as` "mean_value" + , F.median (F.col Double "median_house_value") `as` "median_value" + , F.maximum (F.col Double "median_house_value") `as` "max_value" + , F.minimum (F.col Double "median_house_value") `as` "min_value" ] grouped TIO.putStrLn $ D.toMarkdown summary @@ -999,7 +999,7 @@ $(D.declareColumns meat) meatGrouped = D.groupBy ["food"] meat meatSummary = D.aggregate - [ F.count (F.col @T.Text "food") `as` "count" + [ F.count (F.col T.Text "food") `as` "count" , F.sum ounces `as` "total_oz" , F.mean ounces `as` "mean_oz" ] meatGrouped @@ -1012,7 +1012,7 @@ TIO.putStrLn $ D.toMarkdown meatSummary `D.frequencies expr df` returns a frequency table — row counts and percentages for each unique value: ```haskell -TIO.putStrLn $ D.toMarkdown (D.frequencies (F.col @T.Text "ocean_proximity") housing) +TIO.putStrLn $ D.toMarkdown (D.frequencies (F.col T.Text "ocean_proximity") housing) ``` ### Pearson correlation @@ -1031,7 +1031,7 @@ Aggregate first, then derive new columns from the summary: totalRows = fromIntegral (D.nRows housing) :: Double withShare = D.derive "pct_of_total" - (F.toDouble (F.col @Int "count") / F.lit totalRows * F.lit 100.0) + (F.toDouble (F.col Int "count") / F.lit totalRows * F.lit 100.0) summary TIO.putStrLn $ D.toMarkdown withShare @@ -1043,8 +1043,8 @@ A full group-normalise pipeline: group → compute mean and stddev → derive z- ```haskell incomeByProx = D.aggregate - [ F.mean (F.col @Double "median_income") `as` "mean_income" - , F.stddev (F.col @Double "median_income") `as` "stddev_income" + [ F.mean (F.col Double "median_income") `as` "mean_income" + , F.stddev (F.col Double "median_income") `as` "stddev_income" ] (D.groupBy ["ocean_proximity"] housing) TIO.putStrLn $ D.toMarkdown incomeByProx @@ -1071,7 +1071,7 @@ A type-level mistake on an aggregated column — say, treating `count` (an `Int` ### Motivation -Throughout this guide we have used `F.col @Type "colName"` to reference columns. The expressions are typed (the `@Type` annotation is checked), but the schema of the *dataframe* is not in the Haskell type. This creates a subtle foot-gun: an `Expr` built against one dataframe can be silently applied to a completely different dataframe — the compiler will accept it, but at runtime you get either a missing-column error or, worse, wrong results from a same-named column with different semantics. +Throughout this guide we have used `F.col Type "colName"` to reference columns. The expressions are typed (the `@Type` annotation is checked), but the schema of the *dataframe* is not in the Haskell type. This creates a subtle foot-gun: an `Expr` built against one dataframe can be silently applied to a completely different dataframe — the compiler will accept it, but at runtime you get either a missing-column error or, worse, wrong results from a same-named column with different semantics. `DataFrame.Typed` solves this by moving the full column schema into the type system. The schema becomes a type-level list of `DT.Column "name" Type` entries. If a column does not exist in a `TypedDataFrame`, or if you use it with the wrong type, the code will not compile. @@ -1115,12 +1115,12 @@ thousing <- either (error . show) id . DT.freezeWithError @Housing <$> D.readCsv ### Typed transforms -`DT.col @"colName"` looks up the column in the schema at compile time. The type of the expression is inferred automatically — no `@Type` annotation needed. `DT.derive @"newCol"` adds the column to the schema type so subsequent steps can reference it: +`DT.col "colName"` looks up the column in the schema at compile time. The type of the expression is inferred automatically — no `@Type` annotation needed. `DT.derive "newCol"` adds the column to the schema type so subsequent steps can reference it: ```haskell typedResult = thousing - |> DT.derive @"rooms_per_household" (DT.col @"total_rooms" / DT.col @"households") - |> DT.derive @"bedrooms_per_household" (DT.col @"total_bedrooms" / DT.col @"households") + |> DT.derive "rooms_per_household" (DT.col "total_rooms" / DT.col "households") + |> DT.derive "bedrooms_per_household" (DT.col "total_bedrooms" / DT.col "households") TIO.putStrLn $ D.toMarkdown (DT.thaw typedResult) ``` @@ -1132,7 +1132,7 @@ TIO.putStrLn $ D.toMarkdown (DT.thaw typedResult) ```haskell typedGrouped = thousing |> DT.groupBy @'["ocean_proximity"] - |> DT.aggregate (DT.as @"count" (DT.count (DT.col @"median_house_value"))) + |> DT.aggregate (DT.as @"count" (DT.count (DT.col "median_house_value"))) TIO.putStrLn $ D.toMarkdown (DT.thaw typedGrouped) ``` @@ -1143,7 +1143,7 @@ TIO.putStrLn $ D.toMarkdown (DT.thaw typedGrouped) | Layer | Best for | |---|---| -| Eager string API (`F.col @T "name"`) | exploration, quick scripts, one-off analyses | +| Eager string API (`F.col T "name"`) | exploration, quick scripts, one-off analyses | | `FrameM` | multi-step transformation pipelines where threading `df` by hand is noisy | | `DataFrame.Lazy` | files larger than RAM; push-down filters; ETL pipelines | | `DataFrame.Typed` | production pipelines where schema correctness must be guaranteed at compile time | diff --git a/docs/base_scripts/base_exploratory_data_analysis_primer.md b/docs/base_scripts/base_exploratory_data_analysis_primer.md index e3c593d..74a09d9 100644 --- a/docs/base_scripts/base_exploratory_data_analysis_primer.md +++ b/docs/base_scripts/base_exploratory_data_analysis_primer.md @@ -66,9 +66,9 @@ For a given column calculating the mean and median is fairly straightfoward and ```haskell import qualified DataFrame.Functions as F -D.mean (F.col @Double "housing_median_age") df +D.mean (F.col Double "housing_median_age") df -D.median (F.col @Double "housing_median_age") df +D.median (F.col Double "housing_median_age") df ``` Note: You need to pass the expression for the column into these functions not the column name so the program knows that you are actually calling `mean` or `median` on a column containing numbers. @@ -103,7 +103,7 @@ From the small sample it does seem like there are some wild deviations. The firs ```haskell df |> D.derive "deviation" (abs (median_house_value - (F.mean median_house_value))) |> D.select ["median_house_value", "deviation"] - |> D.mean (F.col @Double "deviation") + |> D.mean (F.col Double "deviation") ``` Getting the mean of the deviations was as simple as tacking `D.mean "deviation"` to the end of our existing pipeline. Composability is a big strength of Haskell code. @@ -125,7 +125,7 @@ $(D.declareColumns withDeviation) import Data.Maybe sumOfSqureDifferences = withDeviation |> D.derive "deviation^2" (F.pow deviation 2) - |> D.sum (F.col @Double "deviation^2") + |> D.sum (F.col Double "deviation^2") n = fromIntegral (fst (D.dimensions df) - 1) @@ -136,7 +136,7 @@ The standard deviation being larger than the mean absolute deviation means we do We can calculate the standard deviation in one line as follows: ```haskell -D.standardDeviation (F.col @Double "median_house_value") df +D.standardDeviation (F.col Double "median_house_value") df ``` ## Interquartile range (IQR) @@ -147,7 +147,7 @@ The IQR is a more robust measure of spread than the variance or standard deviati For our dataset: ```haskell -D.interQuartileRange (F.col @Double "median_house_value") df +D.interQuartileRange (F.col Double "median_house_value") df ``` This is larger than the standard deviation but not by much. This means that outliers don't have a significant influence on the distribution and most values are close to typical. @@ -158,7 +158,7 @@ Variance is the square of the standard deviation. It is much more sensitive to o In our example it's a very large number: ``` haskell -D.variance (F.col @Double "median_house_value") df +D.variance (F.col Double "median_house_value") df ``` The variance is more useful when comparing different datasets. If the variance of house prices in Minnesota was lower than California this would mean there were much fewer really cheap and really expensive house in Minnesota. @@ -173,7 +173,7 @@ The intuition behind why a positive skew is left shifted follows from the formul A skewness score between -0.5 and 0.5 means the data has little skew. A score between -0.5 and -1 or 0.5 and 1 means the data has moderate skew. A skewness greater than 1 or less than -1 means the data is heavily skewed. ```haskell -D.skewness (F.col @Double "median_house_value") df +D.skewness (F.col Double "median_house_value") df ``` So the median house value is moderately skewed to the left. That is, there are more houses that are cheaper than the mean values and a tail of expensive outliers. Having lived in California, I can confirm that this data reflects reality. @@ -212,7 +212,7 @@ import Granite.Svg import qualified Data.Text.IO as T import qualified Data.Text as T -let houseValues = D.columnAsList (F.col @Double "median_house_value") df +let houseValues = D.columnAsList (F.col Double "median_house_value") df T.putStrLn $ histogram diff --git a/docs/base_scripts/base_readme.md b/docs/base_scripts/base_readme.md index 63a757c..a66ff6a 100644 --- a/docs/base_scripts/base_readme.md +++ b/docs/base_scripts/base_readme.md @@ -94,8 +94,8 @@ sales = D.fromNamedColumns -- Group by product and compute totals sales |> D.groupBy ["product"] - |> D.aggregate [ F.sum (F.col @Int "amount") `as` "total" - , F.count (F.col @Int "amount") `as` "orders" + |> D.aggregate [ F.sum (F.col Int "amount") `as` "total" + , F.count (F.col Int "amount") `as` "orders" ] |> D.toMarkdown' ``` @@ -125,7 +125,7 @@ D.dimensions df D.describeColumns df |> D.toMarkdown' ``` -The `:declareColumns` macro (`$(D.declareColumns df)` outside the REPL) generates typed column references from a dataframe, so you can use column names directly in expressions instead of writing `F.col @Double "median_income"` every time: +The `:declareColumns` macro (`$(D.declareColumns df)` outside the REPL) generates typed column references from a dataframe, so you can use column names directly in expressions instead of writing `F.col Double "median_income"` every time: ```haskell $(D.declareColumns df) @@ -187,8 +187,8 @@ Compare this to the manual version which requires spelling out every column name ```haskell -- Without TH — every column needs its name and type spelled out df |> D.derive "rooms_per_household" - (F.col @Double "total_rooms" / F.col @Double "households") - |> D.filterWhere (F.col @Double "median_income" .>. F.lit 5) + (F.col Double "total_rooms" / F.col Double "households") + |> D.filterWhere (F.col Double "median_income" .>. F.lit 5) |> D.take 5 |> D.toMarkdown' ``` @@ -264,7 +264,7 @@ typed-dataframe machinery), there's a companion splice in $(D.deriveSchema ''Order) -- emits: -- orderSchema :: Schema --- orderSchema = makeSchema [("order_id", schemaType @Int64), ...] +-- orderSchema = makeSchema [("order_id", schemaType Int64), ...] -- orderOrderId :: Expr Int64 -- orderOrderId = col "order_id" -- orderRegion :: Expr Text @@ -319,9 +319,9 @@ employees <- D.readCsv "./data/employees.csv" case DT.freeze @EmployeeSchema employees of Nothing -> "Schema mismatch!" Just tdf -> tdf - |> DT.derive @"bonus" (DT.col @"salary" * DT.lit 0.1) - |> DT.filterWhere (DT.col @"salary" DT..>. DT.lit 50000) - |> DT.select @'["name", "bonus"] + |> DT.derive "bonus" (DT.col "salary" * DT.lit 0.1) + |> DT.filterWhere (DT.col "salary" DT..>. DT.lit 50000) + |> DT.select ["name", "bonus"] |> DT.thaw |> D.toMarkdown' ``` @@ -330,11 +330,11 @@ case DT.freeze @EmployeeSchema employees of ```text -- Typo in column name -> compile error -tdf |> DT.filterWhere (DT.col @"slary" DT..>. DT.lit 50000) +tdf |> DT.filterWhere (DT.col "slary" DT..>. DT.lit 50000) -- error: Column "slary" not found in schema -- Wrong type -> compile error -tdf |> DT.filterWhere (DT.col @"name" DT..>. DT.lit 50000) +tdf |> DT.filterWhere (DT.col "name" DT..>. DT.lit 50000) -- error: Couldn't match type 'Text' with 'Double' ``` @@ -352,7 +352,7 @@ Just stdf = DT.freeze @ScoreSchema scoresDf -- filterAllJust drops the null row and changes the column type from -- (Maybe Double) to Double, so `scaled` can multiply it directly. -DT.thaw (DT.filterAllJust stdf |> DT.derive @"scaled" (DT.col @"score" * DT.lit 100)) |> D.toMarkdown' +DT.thaw (DT.filterAllJust stdf |> DT.derive "scaled" (DT.col "score" * DT.lit 100)) |> D.toMarkdown' ``` ## Features @@ -361,7 +361,7 @@ DT.thaw (DT.filterAllJust stdf |> DT.derive @"scaled" (DT.col @"score" * DT.lit **Operations**: filter, select, derive, groupBy, aggregate, joins (inner, left, right, full outer), sort, sample, stratified sample, distinct, k-fold splits. -**Expressions**: typed column references (`F.col @Double "x"`), arithmetic, comparisons, logical operators, nullable-aware three-valued logic (`.==`, `.&&`), string matching (`like`, `regex`), casting, and user-defined functions via `lift`/`lift2`. +**Expressions**: typed column references (`F.col Double "x"`), arithmetic, comparisons, logical operators, nullable-aware three-valued logic (`.==`, `.&&`), string matching (`like`, `regex`), casting, and user-defined functions via `lift`/`lift2`. **Statistics**: mean, median, mode, variance, standard deviation, percentiles, inter-quartile range, correlation, skewness, frequency tables, imputation. @@ -384,23 +384,23 @@ import qualified DataFrame.Lazy as L import DataFrame.Internal.Schema (schemaType, makeSchema) housingSchema = makeSchema - [ ("longitude", schemaType @Double) - , ("latitude", schemaType @Double) - , ("housing_median_age", schemaType @Double) - , ("total_rooms", schemaType @Double) - , ("total_bedrooms", schemaType @(Maybe Double)) - , ("population", schemaType @Double) - , ("households", schemaType @Double) - , ("median_income", schemaType @Double) - , ("median_house_value", schemaType @Double) - , ("ocean_proximity", schemaType @Text) + [ ("longitude", schemaType Double) + , ("latitude", schemaType Double) + , ("housing_median_age", schemaType Double) + , ("total_rooms", schemaType Double) + , ("total_bedrooms", schemaType (Maybe Double)) + , ("population", schemaType Double) + , ("households", schemaType Double) + , ("median_income", schemaType Double) + , ("median_house_value", schemaType Double) + , ("ocean_proximity", schemaType Text) ] lazyResult <- L.runDataFrame $ L.scanCsv housingSchema "./data/housing.csv" - |> L.filter (F.col @Double "median_income" .>. F.lit 5) + |> L.filter (F.col Double "median_income" .>. F.lit 5) |> L.derive "value_per_income" - (F.col @Double "median_house_value" / F.col @Double "median_income") + (F.col Double "median_house_value" / F.col Double "median_income") |> L.select ["ocean_proximity", "median_house_value", "value_per_income"] |> L.take 1000 diff --git a/docs/coming_from_other_implementations.md b/docs/coming_from_other_implementations.md index ecbd4e7..42c710d 100644 --- a/docs/coming_from_other_implementations.md +++ b/docs/coming_from_other_implementations.md @@ -23,7 +23,7 @@ same underlying `DataFrame` type at runtime. | **Frame monad** | x | - | Sequential pipelines where an intermediate column feeds later steps | | **Typed** | x | x | Production code, libraries, anywhere a schema change should be a compile error | -**Untyped** is what most of this guide uses: expressions built with `F.col @Type "name"` and +**Untyped** is what most of this guide uses: expressions built with `F.col Type "name"` and operations like `D.derive`, `D.filterWhere`, `D.groupBy`. Column names are strings — typos only surface at runtime. @@ -34,8 +34,8 @@ The whole computation stays pure — `execFrameM df m` just runs it. **Typed** wraps `DataFrame` in a phantom type that tracks the full schema as a type-level list of `Column "name" Type` entries. The `freeze`/`freezeWithError` boundary validates the runtime -frame against the declared schema. After that, every column access (`T.col @"salary"`), every -derivation (`T.derive @"bonus"`), and every `select`/`exclude`/`rename` is checked at compile time. +frame against the declared schema. After that, every column access (`T.col "salary"`), every +derivation (`T.derive "bonus"`), and every `select`/`exclude`/`rename` is checked at compile time. Operations like `T.filterAllJust` go further — they change the **type**, promoting `Maybe Int` columns to `Int` in the result schema so that downstream code can no longer treat them as optional. @@ -293,7 +293,7 @@ python> df.sort_values(by='E') Since we don't support row indexes, we only provide column-based sorting. You specify the sort direction and column names: ```haskell -dataframe> D.sortBy [D.Asc (F.col @Transport "E")] df +dataframe> D.sortBy [D.Asc (F.col Transport "E")] df ------------------------------------------------------- A | B | C | D | E | F --------|------------|-------|-----|-----------|------- @@ -393,8 +393,8 @@ import qualified DataFrame as D import qualified DataFrame.Functions as F import DataFrame.Operators -df |> D.derive "doubled_A" (F.col @Double "A" * F.lit 2) - |> D.filterWhere (F.col @Double "doubled_A" .>. F.lit 1.0) +df |> D.derive "doubled_A" (F.col Double "A" * F.lit 2) + |> D.filterWhere (F.col Double "doubled_A" .>. F.lit 1.0) |> D.select ["date", "doubled_A"] ``` @@ -407,7 +407,7 @@ but forget to update `filterWhere`, the error only appears when the code runs. import DataFrame.Monad execFrameM df $ do - doubledA <- deriveM "doubled_A" (F.col @Double "A" * F.lit 2) + doubledA <- deriveM "doubled_A" (F.col Double "A" * F.lit 2) filterWhereM (doubledA .>. F.lit 1.0) modifyM (D.select ["date", "doubled_A"]) ``` @@ -437,13 +437,13 @@ type MySchema = '[ T.Column "date" Day case T.freeze @MySchema df of Nothing -> error "schema mismatch at startup" Just tdf -> - tdf |> T.derive @"doubled_A" (T.col @"A" * T.lit 2) - |> T.filterWhere (T.col @"doubled_A" .>. T.lit 1.0) + tdf |> T.derive "doubled_A" (T.col "A" * T.lit 2) + |> T.filterWhere (T.col "doubled_A" .>. T.lit 1.0) |> T.select @'["date", "doubled_A"] ``` -**Why it's better**: `T.col @"A"` is a compile-time error if the column `"A"` does not exist in -`MySchema` or has the wrong type. The same applies to `T.col @"doubled_A"` in `filterWhere` — if +**Why it's better**: `T.col "A"` is a compile-time error if the column `"A"` does not exist in +`MySchema` or has the wrong type. The same applies to `T.col "doubled_A"` in `filterWhere` — if you accidentally filter before deriving, or use the wrong type annotation, the code does not compile. Typos in column names, wrong aggregation types, and schema-breaking refactors are all caught before the program runs. @@ -481,7 +481,7 @@ python> df.fillna(5) In Haskell, we use the `impute` function: ```haskell -dataframe> D.impute (F.col @Integer "G") 5 df' +dataframe> D.impute (F.col Integer "G") 5 df' ----------------------------------------------------------------- A | B | C | D | E | F | G --------|------------|-------|-----|-----------|--------|-------- @@ -643,8 +643,8 @@ main = do let year d = let (y, _, _) = toGregorian d in y print $ df_csv - |> D.derive "birth_year" (F.lift year (F.col @Day "birthdate")) - |> D.derive "bmi" (F.col @Double "weight" / (F.pow (F.col @Double "height") 2)) + |> D.derive "birth_year" (F.lift year (F.col Day "birthdate")) + |> D.derive "bmi" (F.col Double "weight" / (F.pow (F.col Double "height") 2)) |> D.select ["name", "birth_year", "bmi"] ``` @@ -666,7 +666,7 @@ main = do 1. `derive "birth_year"` creates a new column by extracting years from birthdates - `F.lift` adapts a regular Haskell function to work with columns - - `F.col @Day "birthdate"` references the birthdate column with explicit type + - `F.col Day "birthdate"` references the birthdate column with explicit type 2. `derive "bmi"` creates another column with the BMI formula - `F.pow 2` squares the height - Division works directly on column expressions @@ -721,7 +721,7 @@ df_csv **However**, you can use standard Haskell functions to reduce repetition: ```haskell -let reduce name = D.derive (name <> "-5%") ((F.col @Double name) * (F.lit 0.95)) +let reduce name = D.derive (name <> "-5%") ((F.col Double name) * (F.lit 0.95)) df_csv |> foldl (flip reduce) ["weight", "height"] |> D.select ["name", "weight-5%", "height-5%"] @@ -813,9 +813,9 @@ Alternatively, you can use `filterWhere` with boolean expression combinations: ```haskell df_csv - |> D.filterWhere ((F.col @Int "birth_year" .>=. 1982) - .&&. (F.col @Int "birth_year" .<=. 1996) - .&&. (F.col @Double "height" .>. 1.7)) + |> D.filterWhere ((F.col Int "birth_year" .>=. 1982) + .&&. (F.col Int "birth_year" .<=. 1996) + .&&. (F.col Double "height" .>. 1.7)) ``` ### Grouping and Aggregation @@ -841,9 +841,9 @@ let decade d = let (y, _, _) = toGregorian d in (y `div` 10) * 10 df_csv - |> D.derive "decade" (F.lift decade (F.col @Day "birthdate")) + |> D.derive "decade" (F.lift decade (F.col Day "birthdate")) |> D.groupBy ["decade"] - |> D.aggregate [F.count (F.col @Day "decade") `as` "Count"] + |> D.aggregate [F.count (F.col Day "decade") `as` "Count"] ``` **Output:** @@ -890,11 +890,11 @@ let decade d = let (y, _, _) = toGregorian d in (y `div` 10) * 10 df_csv - |> D.derive "decade" (F.lift decade (F.col @Day "birthdate")) + |> D.derive "decade" (F.lift decade (F.col Day "birthdate")) |> D.groupBy ["decade"] - |> D.aggregate [ F.count (F.col @Day "decade") `as` "sample_size" - , F.mean (F.col @Double "weight") `as` "avg_weight" - , F.max (F.col @Double "height") `as` "tallest" + |> D.aggregate [ F.count (F.col Day "decade") `as` "sample_size" + , F.mean (F.col Double "weight") `as` "avg_weight" + , F.max (F.col Double "height") `as` "tallest" ] ``` @@ -911,7 +911,7 @@ df_csv ``` The `aggregate` function takes a list of aggregation expressions. Each expression specifies: -- What column to aggregate (`F.col @Type "name"`) +- What column to aggregate (`F.col Type "name"`) - What aggregation to perform (`mean`, `max`, `count`, etc.) - What to name the result (`as "new_name"`) @@ -945,13 +945,13 @@ let decade d = let (y, _, _) = toGregorian d firstName = head . T.split (== ' ') df_csv - |> D.derive "name" (F.lift firstName (F.col @T.Text "name")) - |> D.derive "decade" (F.lift decade (F.col @Day "birthdate")) + |> D.derive "name" (F.lift firstName (F.col T.Text "name")) + |> D.derive "decade" (F.lift decade (F.col Day "birthdate")) |> D.exclude ["birthdate"] |> D.groupBy ["decade"] - |> D.aggregate [ F.mean (F.col @Double "weight") `as` "avg_weight" - , F.mean (F.col @Double "height") `as` "avg_height" - , F.collect (F.col @T.Text "name") `as` "names" + |> D.aggregate [ F.mean (F.col Double "weight") `as` "avg_weight" + , F.mean (F.col Double "height") `as` "avg_height" + , F.collect (F.col T.Text "name") `as` "names" ] ``` @@ -982,15 +982,15 @@ let decade d = let (y, _, _) = toGregorian d in (y `div` 10) * 10 firstName = head . T.split (== ' ') execFrameM df_csv $ do - modifyM (D.derive "name" (F.lift firstName (F.col @T.Text "name"))) - modifyM (D.derive "decade" (F.lift decade (F.col @Day "birthdate"))) + modifyM (D.derive "name" (F.lift firstName (F.col T.Text "name"))) + modifyM (D.derive "decade" (F.lift decade (F.col Day "birthdate"))) modifyM (D.exclude ["birthdate"]) shape <- inspectM D.dimensions -- peek: how many rows/cols so far? modifyM (D.groupBy ["decade"]) modifyM $ D.aggregate - [ F.mean (F.col @Double "weight") `as` "avg_weight" - , F.mean (F.col @Double "height") `as` "avg_height" - , F.collect (F.col @T.Text "name") `as` "names" + [ F.mean (F.col Double "weight") `as` "avg_weight" + , F.mean (F.col Double "height") `as` "avg_height" + , F.collect (F.col T.Text "name") `as` "names" ] ``` @@ -1019,14 +1019,14 @@ type BirthdateSchema = '[ T.Column "name" T.Text example :: T.TypedDataFrame BirthdateSchema -> IO () example tdf = do let result = T.aggregate - ( T.as @"avg_weight" (T.mean (T.col @"weight")) - . T.as @"avg_height" (T.mean (T.col @"height")) - . T.as @"names" (T.collect (T.col @"name")) + ( T.as @"avg_weight" (T.mean (T.col "weight")) + . T.as @"avg_height" (T.mean (T.col "height")) + . T.as @"names" (T.collect (T.col "name")) ) (T.groupBy @'["decade"] tdf') tdf' = tdf - |> T.derive @"name" (T.lift firstName (T.col @"name")) - |> T.derive @"decade" (T.lift decade (T.col @"birthdate")) + |> T.derive "name" (T.lift firstName (T.col "name")) + |> T.derive "decade" (T.lift decade (T.col "birthdate")) |> T.exclude @'["birthdate"] print (T.thaw result) where @@ -1034,7 +1034,7 @@ example tdf = do firstName = head . T.split (== ' ') ``` -**Why it's better**: `T.as @"avg_weight" (T.mean (T.col @"weight"))` is checked in two ways at +**Why it's better**: `T.as @"avg_weight" (T.mean (T.col "weight"))` is checked in two ways at compile time — `"weight"` must exist in the schema with type `Double`, and the result column `"avg_weight"` will have type `Double` in the output schema. A wrong output-type annotation causes a type error before the program runs. @@ -1071,18 +1071,18 @@ import DataFrame.Internal.Schema (Schema, schemaType) import Data.Proxy (Proxy (..)) mySchema :: Schema -mySchema = [ ("name", schemaType @T.Text) - , ("weight", schemaType @Double) - , ("height", schemaType @Double) +mySchema = [ ("name", schemaType T.Text) + , ("weight", schemaType Double) + , ("height", schemaType Double) ] result :: IO DataFrame result = L.runDataFrame $ L.scanCsv mySchema "data.csv" - |> L.filter (F.col @Double "height" .>. F.lit 1.7) + |> L.filter (F.col Double "height" .>. F.lit 1.7) |> L.select ["name", "weight", "height"] - |> L.derive "bmi" (F.col @Double "weight" ./ - (F.col @Double "height" * F.col @Double "height")) + |> L.derive "bmi" (F.col Double "weight" ./ + (F.col Double "height" * F.col Double "height")) ``` **When Polars lazy is excellent too**: Polars lazy is mature, fast, and ergonomic for Python @@ -1110,7 +1110,7 @@ workflows. Its type inference is automatic — you rarely need to declare schem result :: IO DataFrame result = L.runDataFrame $ L.scanParquet mySchema "warehouse/events.parquet" - |> L.filter (F.col @T.Text "country" .==. F.lit "US") + |> L.filter (F.col T.Text "country" .==. F.lit "US") |> L.select ["event_id", "country", "revenue"] |> L.take 1000 ``` @@ -1154,7 +1154,7 @@ starwars %>% import qualified Data.Text as T starwars - |> D.filterWhere (F.col @Text "species" .==. "Droid") + |> D.filterWhere (F.col Text "species" .==. "Droid") ``` **Output (truncated for readability):** @@ -1240,7 +1240,7 @@ starwars -- Remove the maybes. |> D.filterJust "mass" |> D.filterJust "height" - |> D.derive "bmi" (F.col @Double "mass" / F.pow (F.col @Double "height" / F.lit 100) 2) + |> D.derive "bmi" (F.col Double "mass" / F.pow (F.col Double "height" / F.lit 100) 2) |> D.select ["name", "height", "mass", "bmi"] |> D.take 5 ``` @@ -1346,7 +1346,7 @@ starwars |> D.aggregate [ F.mean mass `as` "mean_mass" , F.count mass `as` "count" ] - |> D.filterWhere ((F.col @Int "count" .>. 1) .&&. (F.col @Double "mean_mass" .>. 50)) + |> D.filterWhere ((F.col Int "count" .>. 1) .&&. (F.col Double "mean_mass" .>. 50)) ``` **Output:** @@ -1383,8 +1383,8 @@ import DataFrame.Monad execFrameM starwars $ do modifyM D.filterAllJust -- drop rows with any Nothing bmiCol <- deriveM "bmi" - (F.col @Double "mass" - / F.pow (F.col @Double "height" / F.lit 100) 2) + (F.col Double "mass" + / F.pow (F.col Double "height" / F.lit 100) 2) filterWhereM (bmiCol .>. F.lit 20.0) modifyM (D.select ["name", "height", "mass", "bmi"]) ``` @@ -1416,16 +1416,16 @@ example :: T.TypedDataFrame StarwarsSchema -> T.TypedDataFrame _ example tdf = let stripped = T.filterAllJust tdf -- After filterAllJust, "height" and "mass" are Double (not Maybe Double) - withBmi = T.derive @"bmi" - (T.col @"mass" / (T.col @"height" / T.lit 100) ^ 2) + withBmi = T.derive "bmi" + (T.col "mass" / (T.col "height" / T.lit 100) ^ 2) stripped in withBmi - |> T.filterWhere (T.col @"bmi" .>. T.lit 20.0) + |> T.filterWhere (T.col "bmi" .>. T.lit 20.0) |> T.select @'["name", "height", "mass", "bmi"] ``` -**Why it's better**: after `T.filterAllJust`, `T.col @"height"` has type `TExpr cols Double` — -using it in an arithmetic expression just works. Before the strip, `T.col @"height"` has type +**Why it's better**: after `T.filterAllJust`, `T.col "height"` has type `TExpr cols Double` — +using it in an arithmetic expression just works. Before the strip, `T.col "height"` has type `TExpr cols (Maybe Double)`, so the same arithmetic expression would be a type error. The compiler enforces that you handle missing values before doing math on them. @@ -1523,7 +1523,7 @@ purchases$amount |> sum() **dataframe:** ```haskell -D.sum (F.col @Int "amount") df +D.sum (F.col Int "amount") df -- 17210 ``` @@ -1571,7 +1571,7 @@ definitions, each with a substantial type signature, to express "group by countr ```haskell df |> D.groupBy ["country"] - |> D.aggregate [F.sum (F.col @Int "amount") `as` "total"] + |> D.aggregate [F.sum (F.col Int "amount") `as` "total"] ``` ``` @@ -1615,9 +1615,9 @@ the input (`Amount`, `Discount`), and adding a new field requires more type-leve **dataframe:** ```haskell -df |> D.derive "net" (F.col @Int "amount" - F.col @Int "discount") +df |> D.derive "net" (F.col Int "amount" - F.col Int "discount") |> D.groupBy ["country"] - |> D.aggregate [F.sum (F.col @Int "net") `as` "total"] + |> D.aggregate [F.sum (F.col Int "net") `as` "total"] ``` ``` @@ -1664,12 +1664,12 @@ purchases |> **dataframe:** ```haskell -let med = D.median (F.col @Double "amount") df +let med = D.median (F.col Double "amount") df -df |> D.filterWhere (F.col @Double "amount" .<=. F.lit (med * 10)) - |> D.derive "net" (F.col @Int "amount" - F.col @Int "discount") +df |> D.filterWhere (F.col Double "amount" .<=. F.lit (med * 10)) + |> D.derive "net" (F.col Int "amount" - F.col Int "discount") |> D.groupBy ["country"] - |> D.aggregate [F.sum (F.col @Int "net") `as` "total"] + |> D.aggregate [F.sum (F.col Int "net") `as` "total"] ``` ### Filter within groups @@ -1715,13 +1715,13 @@ original grouped data, and filter each group separately. This is the point in th -- Compute each country's median and join it back let medians = df |> D.groupBy ["country"] - |> D.aggregate [F.median (F.col @Double "amount") `as` "country_median"] + |> D.aggregate [F.median (F.col Double "amount") `as` "country_median"] D.innerJoin ["country"] df medians - |> D.filterWhere (F.col @Double "amount" .<=. F.col @Double "country_median" * 10) - |> D.derive "net" (F.col @Int "amount" - F.col @Int "discount") + |> D.filterWhere (F.col Double "amount" .<=. F.col Double "country_median" * 10) + |> D.derive "net" (F.col Int "amount" - F.col Int "discount") |> D.groupBy ["country"] - |> D.aggregate [F.sum (F.col @Int "net") `as` "total"] + |> D.aggregate [F.sum (F.col Int "net") `as` "total"] ``` Not quite as concise as R (which has implicit grouped-filter semantics), but still a @@ -1753,12 +1753,12 @@ main = do Nothing -> putStrLn "Schema mismatch!" Just tdf -> do let result = T.aggregate - (T.as @"total" (T.sum (T.col @"amount"))) + (T.as @"total" (T.sum (T.col "amount"))) (T.groupBy @'["country"] tdf) print (T.thaw result) ``` -`T.col @"amount"` is checked against `PurchaseSchema` at compile time — same guarantee as +`T.col "amount"` is checked against `PurchaseSchema` at compile time — same guarantee as Frames' vinyl records, but without the lens imports, `Foldl` plumbing, or map-reduce pipeline. ### Key differences from Frames @@ -1766,7 +1766,7 @@ Frames' vinyl records, but without the lens imports, `Foldl` plumbing, or map-re | | Frames | dataframe | |---|---|---| | **Record type** | Vinyl `Record '[Field1, Field2, ...]` | Untyped `DataFrame` or phantom-typed `TypedDataFrame` | -| **Column access** | Lens (`r ^. amount`) or `rgetField @Amount r` | Expression DSL (`F.col @Int "amount"`) or TH-generated bindings (`amount`) | +| **Column access** | Lens (`r ^. amount`) or `rgetField @Amount r` | Expression DSL (`F.col Int "amount"`) or TH-generated bindings (`amount`) | | **GroupBy + aggregate** | Map-reduce fold pipeline (unpack, assign, reduce) | `D.groupBy` + `D.aggregate` | | **Naming results** | Output shares input field names; new fields require type-level work | `as "name"` on any aggregation expression | | **Deriving columns** | Define new record fields or manipulate type-level lists | `D.derive "name" expr` | @@ -1796,13 +1796,13 @@ and opt into compile-time types when you need them. **Untyped** ```haskell -df |> D.derive "bonus" (F.col @Double "salary" * F.lit 0.1) +df |> D.derive "bonus" (F.col Double "salary" * F.lit 0.1) ``` **Frame monad** ```haskell execFrameM df $ do - bonus <- deriveM "bonus" (F.col @Double "salary" * F.lit 0.1) + bonus <- deriveM "bonus" (F.col Double "salary" * F.lit 0.1) -- `bonus` is now an Expr Double you can reuse without repeating "bonus" ... ``` @@ -1810,24 +1810,24 @@ execFrameM df $ do **Typed** ```haskell -- T.derive prepends Column "bonus" Double to the schema type -tdf |> T.derive @"bonus" (T.col @"salary" * T.lit 0.1) +tdf |> T.derive "bonus" (T.col "salary" * T.lit 0.1) ``` #### Filtering rows **Untyped** ```haskell -df |> D.filterWhere (F.col @Double "salary" .>. F.lit 50000) +df |> D.filterWhere (F.col Double "salary" .>. F.lit 50000) ``` **Frame monad** ```haskell -execFrameM df $ filterWhereM (F.col @Double "salary" .>. F.lit 50000) +execFrameM df $ filterWhereM (F.col Double "salary" .>. F.lit 50000) ``` **Typed** ```haskell -tdf |> T.filterWhere (T.col @"salary" .>. T.lit 50000) +tdf |> T.filterWhere (T.col "salary" .>. T.lit 50000) ``` #### Grouping and aggregating @@ -1835,8 +1835,8 @@ tdf |> T.filterWhere (T.col @"salary" .>. T.lit 50000) **Untyped** ```haskell df |> D.groupBy ["dept"] - |> D.aggregate [ F.mean (F.col @Double "salary") `as` "avg_salary" - , F.count (F.col @Double "salary") `as` "n" + |> D.aggregate [ F.mean (F.col Double "salary") `as` "avg_salary" + , F.count (F.col Double "salary") `as` "n" ] ``` @@ -1845,16 +1845,16 @@ df |> D.groupBy ["dept"] execFrameM df $ do modifyM (D.groupBy ["dept"]) modifyM $ D.aggregate - [ F.mean (F.col @Double "salary") `as` "avg_salary" - , F.count (F.col @Double "salary") `as` "n" + [ F.mean (F.col Double "salary") `as` "avg_salary" + , F.count (F.col Double "salary") `as` "n" ] ``` **Typed** ```haskell T.aggregate - ( T.as @"avg_salary" (T.mean (T.col @"salary")) - . T.as @"n" (T.count (T.col @"salary")) + ( T.as @"avg_salary" (T.mean (T.col "salary")) + . T.as @"n" (T.count (T.col "salary")) ) (T.groupBy @'["dept"] tdf) ``` @@ -1867,14 +1867,14 @@ T.aggregate df |> D.filterJust "col" -- Fill Nothing with a default -df |> D.impute (F.col @Type "col") defaultVal +df |> D.impute (F.col Type "col") defaultVal ``` **Frame monad** ```haskell execFrameM df $ do - col' <- filterJustM (F.col @(Maybe Double) "col") - _ <- imputeM (F.col @(Maybe Int) "other") 0 + col' <- filterJustM (F.col (Maybe Double) "col") + _ <- imputeM (F.col (Maybe Int) "other") 0 ... ``` @@ -1894,9 +1894,9 @@ import qualified DataFrame.Lazy as L result <- L.runDataFrame $ L.scanCsv mySchema "large_file.csv" - |> L.filter (F.col @Double "revenue" .>. F.lit 1000) + |> L.filter (F.col Double "revenue" .>. F.lit 1000) |> L.select ["id", "region", "revenue"] - |> L.derive "tax" (F.col @Double "revenue" * F.lit 0.2) + |> L.derive "tax" (F.col Double "revenue" * F.lit 0.2) |> L.take 10000 ``` @@ -1908,7 +1908,7 @@ The optimizer pushes the filter into the scan and drops unreferenced columns bef Our library often requires type annotations to disambiguate operations: ```haskell -F.col @Double "weight" -- Specify column contains Doubles +F.col Double "weight" -- Specify column contains Doubles F.lit @Int 5 -- Specify literal is an Int filter "col" (== ("text" :: T.Text)) -- Specify string type ``` diff --git a/docs/cookbook.md b/docs/cookbook.md index 6854168..ffc1032 100644 --- a/docs/cookbook.md +++ b/docs/cookbook.md @@ -537,16 +537,16 @@ main = do submissions' |> DT.groupBy @'["challenge_id"] |> DT.aggregate - ( (DT.sum (DT.col @"total_submissions") |> DT.as @"total_submissions") - . (DT.sum (DT.col @"total_accepted_submissions") + ( (DT.sum (DT.col "total_submissions") |> DT.as @"total_submissions") + . (DT.sum (DT.col "total_accepted_submissions") |> DT.as @"total_accepted_submissions") ) let viewTotals = views' |> DT.groupBy @'["challenge_id"] |> DT.aggregate - ( (DT.sum (DT.col @"total_views") |> DT.as @"total_views") - . (DT.sum (DT.col @"total_unique_views") |> DT.as @"total_unique_views") + ( (DT.sum (DT.col "total_views") |> DT.as @"total_views") + . (DT.sum (DT.col "total_unique_views") |> DT.as @"total_unique_views") ) print $ contestsWithColleges @@ -566,10 +566,10 @@ main = do |> DT.impute @"total_submissions" (0 :: Int) |> DT.impute @"total_accepted_submissions" (0 :: Int) |> DT.filterWhere - ( DT.col @"total_unique_views" - + DT.col @"total_views" - + DT.col @"total_submissions" - + DT.col @"total_accepted_submissions" .>. DT.lit 0 + ( DT.col "total_unique_views" + + DT.col "total_views" + + DT.col "total_submissions" + + DT.col "total_accepted_submissions" .>. DT.lit 0 ) ``` diff --git a/docs/exploratory_data_analysis_primer.md b/docs/exploratory_data_analysis_primer.md index f97944e..d5d88a3 100644 --- a/docs/exploratory_data_analysis_primer.md +++ b/docs/exploratory_data_analysis_primer.md @@ -91,9 +91,9 @@ For a given column calculating the mean and median is fairly straightfoward and ```haskell import qualified DataFrame.Functions as F -D.mean (F.col @Double "housing_median_age") df +D.mean (F.col Double "housing_median_age") df -D.median (F.col @Double "housing_median_age") df +D.median (F.col Double "housing_median_age") df ``` @@ -150,7 +150,7 @@ From the small sample it does seem like there are some wild deviations. The firs ```haskell df |> D.derive "deviation" (abs (median_house_value - (F.mean median_house_value))) |> D.select ["median_house_value", "deviation"] - |> D.mean (F.col @Double "deviation") + |> D.mean (F.col Double "deviation") ``` @@ -177,7 +177,7 @@ $(D.declareColumns withDeviation) import Data.Maybe sumOfSqureDifferences = withDeviation |> D.derive "deviation^2" (F.pow deviation 2) - |> D.sum (F.col @Double "deviation^2") + |> D.sum (F.col Double "deviation^2") n = fromIntegral (fst (D.dimensions df) - 1) @@ -193,7 +193,7 @@ We can calculate the standard deviation in one line as follows: ```haskell -D.standardDeviation (F.col @Double "median_house_value") df +D.standardDeviation (F.col Double "median_house_value") df ``` @@ -209,7 +209,7 @@ For our dataset: ```haskell -D.interQuartileRange (F.col @Double "median_house_value") df +D.interQuartileRange (F.col Double "median_house_value") df ``` @@ -225,7 +225,7 @@ In our example it's a very large number: ```haskell -D.variance (F.col @Double "median_house_value") df +D.variance (F.col Double "median_house_value") df ``` @@ -245,7 +245,7 @@ A skewness score between -0.5 and 0.5 means the data has little skew. A score be ```haskell -D.skewness (F.col @Double "median_house_value") df +D.skewness (F.col Double "median_house_value") df ``` @@ -305,7 +305,7 @@ import Granite.Svg import qualified Data.Text.IO as T import qualified Data.Text as T -let houseValues = D.columnAsList (F.col @Double "median_house_value") df +let houseValues = D.columnAsList (F.col Double "median_house_value") df T.putStrLn $ histogram diff --git a/docs/haskell_for_data_analysis.md b/docs/haskell_for_data_analysis.md index ddca115..c37cd30 100644 --- a/docs/haskell_for_data_analysis.md +++ b/docs/haskell_for_data_analysis.md @@ -140,12 +140,12 @@ The type of `D.take` is `Int -> DataFrame -> DataFrame` — an integer in, a dat ### Opting into stronger type safety -`F.col @Type "colName"` tells the compiler what type to expect in a column. If you get the type wrong you hear about it immediately. +`F.col Type "colName"` tells the compiler what type to expect in a column. If you get the type wrong you hear about it immediately. ```haskell -- Runtime-checked (flexible): -D.mean (F.col @Double "High Temperature (C)") weather +D.mean (F.col Double "High Temperature (C)") weather -- Compile-time-checked — $(D.declareColumns …) generates typed bindings: $(D.declareColumns weather) @@ -276,9 +276,9 @@ Passing a `Double` column expression where an `Int` is expected: ```haskell --- D.mean (F.col @Int "longitude") housing +-- D.mean (F.col Int "longitude") housing -- error: Couldn't match type 'Double' with 'Int' for column "longitude" --- Fix: D.mean (F.col @Double "longitude") housing +-- Fix: D.mean (F.col Double "longitude") housing ``` > @@ -354,7 +354,7 @@ The companions `filterNothing` and `filterAllNothing` do the opposite — they l ```haskell TIO.putStrLn $ D.toMarkdown - (D.impute (F.col @(Maybe Int) "id") 0 messy) + (D.impute (F.col (Maybe Int) "id") 0 messy) ``` > @@ -370,7 +370,7 @@ Notice the `@(Maybe Int)` type annotation — it tells the imputer what type the ```haskell --- D.impute (F.col @(Maybe Double) "id") 0 messy +-- D.impute (F.col (Maybe Double) "id") 0 messy -- Exception: Type Mismatch — expected 'Maybe Double' but column is 'Maybe Integer' ``` @@ -384,7 +384,7 @@ Notice the `@(Maybe Int)` type annotation — it tells the imputer what type the ```haskell TIO.putStrLn $ D.toMarkdown $ D.take 10 - (D.imputeWith F.mean (F.col @(Maybe Double) "total_bedrooms") housing) + (D.imputeWith F.mean (F.col (Maybe Double) "total_bedrooms") housing) ``` > @@ -495,7 +495,7 @@ TIO.putStrLn $ D.toMarkdown meat ```haskell TIO.putStrLn $ D.toMarkdown - (D.derive "kilograms" (F.col @Double "ounces" * 0.03) meat) + (D.derive "kilograms" (F.col Double "ounces" * 0.03) meat) ``` > @@ -514,12 +514,12 @@ TIO.putStrLn $ D.toMarkdown ### Expressions and F.col -`F.col @Type "name"` creates an *expression* — a typed reference to a column that can be combined with arithmetic operators, boolean operators, or custom functions. +`F.col Type "name"` creates an *expression* — a typed reference to a column that can be combined with arithmetic operators, boolean operators, or custom functions. ``` -F.col @Double "ounces" -- :: Expr Double -F.col @Text "food" -- :: Expr Text +F.col Double "ounces" -- :: Expr Double +F.col Text "food" -- :: Expr Text ``` @@ -528,7 +528,7 @@ You can compose expressions: ```haskell roomsPerHousehold = D.derive "rooms_per_household" - (F.col @Double "total_rooms" / F.col @Double "households") + (F.col Double "total_rooms" / F.col Double "households") housing TIO.putStrLn $ D.toMarkdown (D.take 5 roomsPerHousehold) @@ -562,7 +562,7 @@ meatToAnimal "nova lox" = "salmon" meatToAnimal _ = "unknown" TIO.putStrLn $ D.toMarkdown - (D.derive "animal" (F.lift meatToAnimal (F.col @Text "food")) meat) + (D.derive "animal" (F.lift meatToAnimal (F.col Text "food")) meat) ``` > @@ -588,7 +588,7 @@ TIO.putStrLn $ D.toMarkdown animalMapping = [("bacon","pig"),("pulled pork","pig"),("pastrami","cow"),("corned beef","cow"),("honey ham","pig"),("nova lox","salmon")] TIO.putStrLn $ D.toMarkdown - (D.derive "animal2" (F.recode animalMapping (F.col @Text "food")) meat) + (D.derive "animal2" (F.recode animalMapping (F.col Text "food")) meat) ``` > @@ -851,7 +851,7 @@ After loading, use `F.cast` to produce a column expression with the exact type y ```haskell -- Retype a text column to Double, converting unparseable values to Nothing: -withIncomeCast = D.derive "income_cast" (F.cast @Double "median_income") housingFull +withIncomeCast = D.derive "income_cast" (F.cast Double "median_income") housingFull TIO.putStrLn $ D.toMarkdown (D.take 5 withIncomeCast) ``` @@ -861,7 +861,7 @@ TIO.putStrLn $ D.toMarkdown (D.take 5 withIncomeCast) All of the above reads the entire file into memory upfront. For datasets larger than available RAM, `DataFrame.Lazy` offers a pull-based streaming executor: operations build a logical plan tree and nothing is read from disk until `runDataFrame` is called. The optimizer pushes `filter` predicates down to the scan, so unneeded rows are discarded before any column is allocated. -The Lazy path requires an explicit schema — there is no inference. Build one with `schemaType @T`: +The Lazy path requires an explicit schema — there is no inference. Build one with `schemaType T`: ```haskell @@ -871,16 +871,16 @@ import DataFrame.Internal.Schema (Schema (..), schemaType) import qualified Data.Map.Strict as M housingSchema = Schema $ M.fromList - [ ("longitude", schemaType @Double) - , ("latitude", schemaType @Double) - , ("housing_median_age", schemaType @Double) - , ("total_rooms", schemaType @Double) - , ("total_bedrooms", schemaType @(Maybe Double)) - , ("population", schemaType @Double) - , ("households", schemaType @Double) - , ("median_income", schemaType @Double) - , ("median_house_value", schemaType @Double) - , ("ocean_proximity", schemaType @T.Text) + [ ("longitude", schemaType Double) + , ("latitude", schemaType Double) + , ("housing_median_age", schemaType Double) + , ("total_rooms", schemaType Double) + , ("total_bedrooms", schemaType (Maybe Double)) + , ("population", schemaType Double) + , ("households", schemaType Double) + , ("median_income", schemaType Double) + , ("median_house_value", schemaType Double) + , ("ocean_proximity", schemaType T.Text) ] ``` @@ -891,7 +891,7 @@ Build and run a lazy pipeline: ```haskell lazyQuery = L.scanCsv housingSchema "../data/housing.csv" - |> L.filter (F.col @Double "median_house_value" .>=. 300000) + |> L.filter (F.col Double "median_house_value" .>=. 300000) |> L.select ["longitude","latitude","median_house_value","ocean_proximity"] |> L.take 10 @@ -938,7 +938,7 @@ Three functions handle column re-typing with different failure modes: messyNums = D.fromNamedColumns [ ("raw", D.fromList ["1.5", "2.0", "bad", "3.7", "" :: T.Text]) ] -withCast = D.derive "as_double" (F.cast @Double "raw") messyNums +withCast = D.derive "as_double" (F.cast Double "raw") messyNums TIO.putStrLn $ D.toMarkdown withCast ``` @@ -960,7 +960,7 @@ TIO.putStrLn $ D.toMarkdown withDefault ```haskell -withAudit = D.derive "audit" (F.castEither @Double "raw") messyNums +withAudit = D.derive "audit" (F.castEither Double "raw") messyNums TIO.putStrLn $ D.toMarkdown withAudit ``` @@ -981,7 +981,7 @@ Three functions manipulate `Text` columns inside expressions. emails = D.fromNamedColumns [ ("email", D.fromList ["alice@example.com", "bob@haskell.org", "cara@data.io" :: T.Text]) ] -emailParts = D.derive "parts" (F.splitOn "@" (F.col @T.Text "email")) emails +emailParts = D.derive "parts" (F.splitOn "@" (F.col T.Text "email")) emails TIO.putStrLn $ D.toMarkdown emailParts ``` @@ -993,7 +993,7 @@ TIO.putStrLn $ D.toMarkdown emailParts ```haskell -domains = D.derive "domain" (F.match "[a-z]+\\.[a-z]+" (F.col @T.Text "email")) emails +domains = D.derive "domain" (F.match "[a-z]+\\.[a-z]+" (F.col T.Text "email")) emails TIO.putStrLn $ D.toMarkdown domains ``` @@ -1005,7 +1005,7 @@ TIO.putStrLn $ D.toMarkdown domains ```haskell -withWords = D.derive "words" (F.matchAll "[a-z]+" (F.col @T.Text "email")) emails +withWords = D.derive "words" (F.matchAll "[a-z]+" (F.col T.Text "email")) emails TIO.putStrLn $ D.toMarkdown withWords ``` @@ -1026,7 +1026,7 @@ events = D.fromNamedColumns , ("date_text", D.fromList ["2025-03-01","2025-06-15","2025-09-30" :: T.Text]) ] -withDates = D.derive "date" (F.parseDate @Day "%Y-%m-%d" (F.col @T.Text "date_text")) events +withDates = D.derive "date" (F.parseDate Day "%Y-%m-%d" (F.col T.Text "date_text")) events TIO.putStrLn $ D.toMarkdown withDates ``` @@ -1259,7 +1259,7 @@ Histograms show the distribution of a single numeric variable. ```haskell import qualified DataFrame.Display.Terminal.Plot as P -houseValues = D.columnAsList (F.col @Double "median_house_value") housing +houseValues = D.columnAsList (F.col Double "median_house_value") housing TIO.putStrLn $ histogram @@ -1289,8 +1289,8 @@ Scatter plots reveal relationships between two numeric variables. ```haskell -incomes = D.columnAsList (F.col @Double "median_income") housing -values = D.columnAsList (F.col @Double "median_house_value") housing +incomes = D.columnAsList (F.col Double "median_income") housing +values = D.columnAsList (F.col Double "median_house_value") housing TIO.putStrLn $ scatter @@ -1339,7 +1339,7 @@ Line graphs show trends along a continuous x-axis. The second argument is a list ```haskell -ages = D.columnAsList (F.col @Double "housing_median_age") housing +ages = D.columnAsList (F.col Double "housing_median_age") housing TIO.putStrLn $ lineGraph @@ -1366,11 +1366,11 @@ P.plotCorrelationMatrix housing ### Opting into stronger type safety -Using `F.col @Double` in `D.columnAsList` ensures only numeric columns reach the plotting functions. Passing a `Text` column to a histogram is a compile error: +Using `F.col Double` in `D.columnAsList` ensures only numeric columns reach the plotting functions. Passing a `Text` column to a histogram is a compile error: ```haskell --- D.columnAsList (F.col @Double "ocean_proximity") housing +-- D.columnAsList (F.col Double "ocean_proximity") housing -- error: column "ocean_proximity" has type Text, not Double ``` @@ -1392,11 +1392,11 @@ import DataFrame.Operators (as) grouped = D.groupBy ["ocean_proximity"] housing summary = D.aggregate - [ F.count (F.col @Double "median_house_value") `as` "count" - , F.mean (F.col @Double "median_house_value") `as` "mean_value" - , F.median (F.col @Double "median_house_value") `as` "median_value" - , F.maximum (F.col @Double "median_house_value") `as` "max_value" - , F.minimum (F.col @Double "median_house_value") `as` "min_value" + [ F.count (F.col Double "median_house_value") `as` "count" + , F.mean (F.col Double "median_house_value") `as` "mean_value" + , F.median (F.col Double "median_house_value") `as` "median_value" + , F.maximum (F.col Double "median_house_value") `as` "max_value" + , F.minimum (F.col Double "median_house_value") `as` "min_value" ] grouped TIO.putStrLn $ D.toMarkdown summary @@ -1433,7 +1433,7 @@ $(D.declareColumns meat) meatGrouped = D.groupBy ["food"] meat meatSummary = D.aggregate - [ F.count (F.col @T.Text "food") `as` "count" + [ F.count (F.col T.Text "food") `as` "count" , F.sum ounces `as` "total_oz" , F.mean ounces `as` "mean_oz" ] meatGrouped @@ -1448,7 +1448,7 @@ TIO.putStrLn $ D.toMarkdown meatSummary ```haskell -TIO.putStrLn $ D.toMarkdown (D.frequencies (F.col @T.Text "ocean_proximity") housing) +TIO.putStrLn $ D.toMarkdown (D.frequencies (F.col T.Text "ocean_proximity") housing) ``` @@ -1471,7 +1471,7 @@ Aggregate first, then derive new columns from the summary: totalRows = fromIntegral (D.nRows housing) :: Double withShare = D.derive "pct_of_total" - (F.toDouble (F.col @Int "count") / F.lit totalRows * F.lit 100.0) + (F.toDouble (F.col Int "count") / F.lit totalRows * F.lit 100.0) summary TIO.putStrLn $ D.toMarkdown withShare @@ -1485,8 +1485,8 @@ A full group-normalise pipeline: group → compute mean and stddev → derive z- ```haskell incomeByProx = D.aggregate - [ F.mean (F.col @Double "median_income") `as` "mean_income" - , F.stddev (F.col @Double "median_income") `as` "stddev_income" + [ F.mean (F.col Double "median_income") `as` "mean_income" + , F.stddev (F.col Double "median_income") `as` "stddev_income" ] (D.groupBy ["ocean_proximity"] housing) TIO.putStrLn $ D.toMarkdown incomeByProx @@ -1516,7 +1516,7 @@ A type-level mistake on an aggregated column — say, treating `count` (an `Int` ### Motivation -Throughout this guide we have used `F.col @Type "colName"` to reference columns. The expressions are typed (the `@Type` annotation is checked), but the schema of the *dataframe* is not in the Haskell type. This creates a subtle foot-gun: an `Expr` built against one dataframe can be silently applied to a completely different dataframe — the compiler will accept it, but at runtime you get either a missing-column error or, worse, wrong results from a same-named column with different semantics. +Throughout this guide we have used `F.col Type "colName"` to reference columns. The expressions are typed (the `@Type` annotation is checked), but the schema of the *dataframe* is not in the Haskell type. This creates a subtle foot-gun: an `Expr` built against one dataframe can be silently applied to a completely different dataframe — the compiler will accept it, but at runtime you get either a missing-column error or, worse, wrong results from a same-named column with different semantics. `DataFrame.Typed` solves this by moving the full column schema into the type system. The schema becomes a type-level list of `DT.Column "name" Type` entries. If a column does not exist in a `TypedDataFrame`, or if you use it with the wrong type, the code will not compile. @@ -1566,13 +1566,13 @@ thousing <- either (error . show) id . DT.freezeWithError @Housing <$> D.readCsv ### Typed transforms -`DT.col @"colName"` looks up the column in the schema at compile time. The type of the expression is inferred automatically — no `@Type` annotation needed. `DT.derive @"newCol"` adds the column to the schema type so subsequent steps can reference it: +`DT.col "colName"` looks up the column in the schema at compile time. The type of the expression is inferred automatically — no `@Type` annotation needed. `DT.derive "newCol"` adds the column to the schema type so subsequent steps can reference it: ```haskell typedResult = thousing - |> DT.derive @"rooms_per_household" (DT.col @"total_rooms" / DT.col @"households") - |> DT.derive @"bedrooms_per_household" (DT.col @"total_bedrooms" / DT.col @"households") + |> DT.derive "rooms_per_household" (DT.col "total_rooms" / DT.col "households") + |> DT.derive "bedrooms_per_household" (DT.col "total_bedrooms" / DT.col "households") TIO.putStrLn $ D.toMarkdown (DT.thaw typedResult) ``` @@ -1586,7 +1586,7 @@ TIO.putStrLn $ D.toMarkdown (DT.thaw typedResult) ```haskell typedGrouped = thousing |> DT.groupBy @'["ocean_proximity"] - |> DT.aggregate (DT.as @"count" (DT.count (DT.col @"median_house_value"))) + |> DT.aggregate (DT.as @"count" (DT.count (DT.col "median_house_value"))) TIO.putStrLn $ D.toMarkdown (DT.thaw typedGrouped) ``` @@ -1598,7 +1598,7 @@ TIO.putStrLn $ D.toMarkdown (DT.thaw typedGrouped) | Layer | Best for | |---|---| -| Eager string API (`F.col @T "name"`) | exploration, quick scripts, one-off analyses | +| Eager string API (`F.col T "name"`) | exploration, quick scripts, one-off analyses | | `FrameM` | multi-step transformation pipelines where threading `df` by hand is noisy | | `DataFrame.Lazy` | files larger than RAM; push-down filters; ETL pipelines | | `DataFrame.Typed` | production pipelines where schema correctness must be guaranteed at compile time | diff --git a/docs/intro_to_probability_and_data.md b/docs/intro_to_probability_and_data.md index afa4ca4..4c785c0 100644 --- a/docs/intro_to_probability_and_data.md +++ b/docs/intro_to_probability_and_data.md @@ -29,7 +29,7 @@ dataframe> :script dataframe.ghci 💡 Use prefix 'D' for core functionality. ● E.g. D.readCsv "/path/to/file" 💡 Use prefix 'F' for expression functions. - ● E.g. F.sum (F.col @Int "value") + ● E.g. F.sum (F.col Int "value") ✅ Ready. dataframe> df <- D.readCsv "./data/arbuthnot.csv" @@ -179,8 +179,8 @@ year, but there is a faster way. If we add the vector for baptisms for boys to that of girls, Haskell will compute all sums simultaneously. ```haskell -dataframe> bs = D.columnAsList @Int "boys" df -dataframe> gs = D.columnAsList @Int "girls" df +dataframe> bs = D.columnAsList Int "boys" df +dataframe> gs = D.columnAsList Int "girls" df dataframe> zipWith (+) bs gs ``` @@ -193,7 +193,7 @@ We'll be using this new vector to generate some plots, so we'll want to save it as a permanent column in our data frame. ```haskell -dataframe> withTotal = df |> D.derive "total" (F.col @Int "boys" + F.col @Int "girls") +dataframe> withTotal = df |> D.derive "total" (F.col Int "boys" + F.col Int "girls") dataframe> D.take 10 withTotal ``` @@ -214,7 +214,7 @@ This is essentially equivalent to going through each row and adding up the boys and girls counts for that year and recording that value in a new column called total. -The `F.col @Int "boys" + F.col @Int "girls"` part is how we right expressions for our dataframe. Read left to right, this expression says take the `Int` called boys and add it to the `Int` column called girls. This saves us the work of having to work with vectors directly. But having to remember the name and type of each column is tedious and error prone. We can ask Haskell to expose correct references to these columns by using `:declareColumns`. +The `F.col Int "boys" + F.col Int "girls"` part is how we right expressions for our dataframe. Read left to right, this expression says take the `Int` called boys and add it to the `Int` column called girls. This saves us the work of having to work with vectors directly. But having to remember the name and type of each column is tedious and error prone. We can ask Haskell to expose correct references to these columns by using `:declareColumns`. ```haskell dataframe> :declareColumns df @@ -353,7 +353,7 @@ dataframe> :script dataframe.ghci 💡 Use prefix 'D' for core functionality. ● E.g. D.readCsv "/path/to/file" 💡 Use prefix 'F' for expression functions. - ● E.g. F.sum (F.col @Int "value") + ● E.g. F.sum (F.col Int "value") ✅ Ready. dataframe> df <- D.readCsv "./data/present.csv" diff --git a/docs/persistent_integration.md b/docs/persistent_integration.md index 3973396..e9014b7 100644 --- a/docs/persistent_integration.md +++ b/docs/persistent_integration.md @@ -111,13 +111,13 @@ analyzeUsers = runSqlite "test.db" $ do -- Derive columns let withAgeGroup = DF.derive "age_group" - (F.ifThenElse (F.col @Int "age" `F.lt` F.lit 30) + (F.ifThenElse (F.col Int "age" `F.lt` F.lit 30) (F.lit @Text "young") (F.lit @Text "adult")) df -- Get column values - let ages = DF.columnAsList (F.col @Int "age") sorted + let ages = DF.columnAsList (F.col Int "age") sorted print ages -- [25, 28, 30, 35] ``` @@ -198,4 +198,4 @@ See the `dataframe-persistent/tests/PersistentTests.hs` file for comprehensive e 1. **Column Names**: Column names are automatically cleaned to remove table prefixes (e.g., `test_user_name` becomes `name`) 2. **Type Application**: Use TypeApplications syntax for cleaner code: `fromPersistent @TestUser []` 3. **Vector Operations**: Use `V.toList $ DF.columnAsVector @Type "column"` to extract column values -4. **Empty DataFrames**: Empty result sets still preserve the column structure \ No newline at end of file +4. **Empty DataFrames**: Empty result sets still preserve the column structure diff --git a/docs/tutorial.md b/docs/tutorial.md index 3f3c5d8..6174ece 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -85,7 +85,7 @@ Let's say we want to filter all flowers whose petal width is greater than 6. We There are a couple of core functions we can use to define columns: ```haskell -F.col @Int "x" -- this says "I have an int column called 'x'" +F.col Int "x" -- this says "I have an int column called 'x'" F.lit @Int 5 -- this says "I have a literal value `5` that is an integer`. ``` @@ -94,11 +94,11 @@ Expressions support regular math operations: ```haskell -- Take the value at x and add 5 to it. -- The type of this is `Expr Int` -F.col @Int "x" + F.lit @Int 5 +F.col Int "x" + F.lit @Int 5 -- Compute the sine of the column x and add the cosine of 0.5. -- The type of this is `Expr Double` -sin (F.col @Double "x") + cos (F.lit @Double 0.5) +sin (F.col Double "x") + cos (F.lit @Double 0.5) ``` However, for comparison operations we have a special syntax. Same-type (non-nullable) operators are wrapped in dots on both sides; nullable-aware operators use a single leading dot. @@ -106,7 +106,7 @@ However, for comparison operations we have a special syntax. Same-type (non-null ```haskell -- Is x greater than 5? (both sides are non-nullable Int) -- The type of this is `Expr Bool` -F.col @Int "x" .>=. F.lit @Int 5 +F.col Int "x" .>=. F.lit @Int 5 ``` The expression language eliminates a class of bugs from column operations (like adding a string to an integer). @@ -116,7 +116,7 @@ Armed with this knowledge, we can go back and filter all flowers with petal leng We can see from the sample we printed before that `petal.length` is of type `Double`. We write the expression as follows: ```haskell -df |> D.filterWhere (F.col @Double "petal.length" .>. F.lit @Double 6) +df |> D.filterWhere (F.col Double "petal.length" .>. F.lit @Double 6) |> D.select ["petal.width", "petal.length", "variety"] ``` @@ -126,7 +126,7 @@ Suppose we write out the wrong types in the expression. That is suppose we say t ```haskell -- This fails at runtime because "petal.length" is Double, not Int: -df |> D.filterWhere (F.col @Int "petal.length" .>. F.lit @Int 6) +df |> D.filterWhere (F.col Int "petal.length" .>. F.lit @Int 6) ``` ![Screenshot of filtering with type error](./_static/filter_wrong_type.png) diff --git a/docs/using_dataframe_in_a_standalone_script.md b/docs/using_dataframe_in_a_standalone_script.md index fc447fb..2ae8938 100644 --- a/docs/using_dataframe_in_a_standalone_script.md +++ b/docs/using_dataframe_in_a_standalone_script.md @@ -50,7 +50,7 @@ import qualified DataFrame.Functions as F main :: IO () main = do df <- D.readCsv "./housing.csv" - print (D.derive "rooms_per_household" (F.col @Double "total_rooms" / F.col @Double "households") df) + print (D.derive "rooms_per_household" (F.col Double "total_rooms" / F.col Double "households") df) ``` You'll notice the dataframe now has a new column called `rooms_per_household` at the end. This is great! But what if we had made a mistake? What if we had instead typed "totalrooms"? Or what if we had assumed both columns were ints and used integer division? These would have both resulted in runtime errors. This gets worse as we rewrite more transformations using the same columns (we're more likely to have a stray typo or get the type wrong). @@ -73,8 +73,8 @@ module Main where import qualified DataFrame as D import qualified DataFrame.Functions as F -total_rooms = F.col @Double "total_rooms" -households = F.col @Double "households" +total_rooms = F.col Double "total_rooms" +households = F.col Double "households" main :: IO () main = do @@ -147,7 +147,7 @@ main = do meanBedrooms = D.meanMaybe total_bedrooms dfWithRoomsPerHousehold dfWithTotalBedroomsImputed = D.impute total_bedrooms meanBedrooms dfWithRoomsPerHousehold - print $ dfWithTotalBedroomsImputed |> D.filterWhere (isExpensive .&&. roomsPerHousehold .>=. 7 .&&. (F.col @Double "total_bedrooms") .>=. 200) + print $ dfWithTotalBedroomsImputed |> D.filterWhere (isExpensive .&&. roomsPerHousehold .>=. 7 .&&. (F.col Double "total_bedrooms") .>=. 200) ``` Our code is now inundated with different dataframe variables and a bunch of pipeline management. Should we just revert back to using the old `derive` and keep track of names and types ourselves? Not at all. We can use a structure called a `FrameM` to remove this boilerplate while still keeping the guarantees we unlocked before. @@ -308,7 +308,7 @@ main = do ### Statically Typed Transformations -Once your dataframe is safely frozen, you can perform transformations with complete confidence. You will notice the use of the `@` symbol in functions like `DT.derive` and `DT.col`. This syntax is enabled by the `TypeApplications` extension in Haskell. Instead of passing standard string values, we are passing type level strings directly to the compiler. When you write `DT.col @"total_rooms"`, the compiler checks the schema of your dataframe immediately. It verifies that the column exists and knows exactly what type of data it holds. Every time you derive a new column, the compiler automatically updates the schema to include it for the very next step. +Once your dataframe is safely frozen, you can perform transformations with complete confidence. You will notice the use of the `@` symbol in functions like `DT.derive` and `DT.col`. This syntax is enabled by the `TypeApplications` extension in Haskell. Instead of passing standard string values, we are passing type level strings directly to the compiler. When you write `DT.col "total_rooms"`, the compiler checks the schema of your dataframe immediately. It verifies that the column exists and knows exactly what type of data it holds. Every time you derive a new column, the compiler automatically updates the schema to include it for the very next step. ```haskell {-# LANGUAGE DataKinds #-} @@ -328,12 +328,12 @@ main = do df <- D.readCsv "./data/housing.csv" let tdf = either (error . show) id (DT.freezeWithError @Housing df) -- We could generate this with `D.declareColumnsFromCsvFile` as before - let total_bedrooms = F.col @(Maybe Double) "total_bedrooms" + let total_bedrooms = F.col (Maybe Double) "total_bedrooms" print $ tdf - |> DT.derive @"rooms_per_household" (DT.col @"total_rooms" / DT.col @"households") + |> DT.derive "rooms_per_household" (DT.col "total_rooms" / DT.col "households") |> DT.impute @"total_bedrooms" (D.meanMaybe total_bedrooms df) - |> DT.derive @"bedrooms_per_household" (DT.col @"total_bedrooms" / DT.col @"households") - |> DT.derive @"population_per_household" (DT.col @"population" / DT.col @"households") + |> DT.derive "bedrooms_per_household" (DT.col "total_bedrooms" / DT.col "households") + |> DT.derive "population_per_household" (DT.col "population" / DT.col "households") ``` ### Complex Operations @@ -358,18 +358,18 @@ main :: IO () main = do df <- D.readCsv "./data/housing.csv" -- We could generate this with `D.declareColumnsFromCsvFile` as before - let total_bedrooms = F.col @(Maybe Double) "total_bedrooms" + let total_bedrooms = F.col (Maybe Double) "total_bedrooms" let tdf = either (error . show) id (DT.freezeWithError @Housing df) print $ tdf - |> DT.derive @"rooms_per_household" (DT.col @"total_rooms" / DT.col @"households") + |> DT.derive "rooms_per_household" (DT.col "total_rooms" / DT.col "households") |> DT.impute @"total_bedrooms" (D.meanMaybe total_bedrooms df) - |> DT.derive @"bedrooms_per_household" (DT.col @"total_bedrooms" / DT.col @"households") - |> DT.derive @"population_per_household" (DT.col @"population" / DT.col @"households") + |> DT.derive "bedrooms_per_household" (DT.col "total_bedrooms" / DT.col "households") + |> DT.derive "population_per_household" (DT.col "population" / DT.col "households") print $ tdf |> DT.groupBy @'["ocean_proximity"] |> DT.aggregate - (DT.as @"total" (DT.count (DT.col @"median_house_value"))) + (DT.as @"total" (DT.count (DT.col "median_house_value"))) ``` ## The Trade Off diff --git a/docs/working_with_nulls.md b/docs/working_with_nulls.md index 9354ec7..ef5e283 100644 --- a/docs/working_with_nulls.md +++ b/docs/working_with_nulls.md @@ -55,14 +55,14 @@ D.filterAllJust df import qualified DataFrame.Functions as F -- Replace every Nothing in "score" with 0 -D.impute (F.col @(Maybe Int) "score") 0 df +D.impute (F.col (Maybe Int) "score") 0 df ``` ### Fill with a computed aggregate (mean, median, etc.) ```haskell -- Replace every Nothing in "score" with the mean of the non-missing values -D.imputeWith F.mean (F.col @(Maybe Int) "score") df +D.imputeWith F.mean (F.col (Maybe Int) "score") df ``` `imputeWith` accepts any aggregate expression (`F.mean`, `F.median`, a custom fold, etc.) and computes it over the non-null rows before filling. @@ -79,7 +79,7 @@ These work on non-nullable columns and return plain `Bool`. ```haskell -- Both columns must be non-nullable -D.filterWhere (F.col @Int "id" .==. F.lit 2) df +D.filterWhere (F.col Int "id" .==. F.lit 2) df ``` ### Nullable-aware operators (`.+`, `.-`, `.*`, `./`, `.==`, `.<`, …) @@ -90,10 +90,10 @@ These accept any combination of nullable and non-nullable operands and propagate import DataFrame.Operators -- Int column + Maybe Int column → Maybe Int column -D.derive "adjusted" (F.col @Int "id" .+ F.col @(Maybe Int) "score") df +D.derive "adjusted" (F.col Int "id" .+ F.col (Maybe Int) "score") df -- Comparison: Maybe Int .== Int → Maybe Bool column -D.derive "match" (F.col @(Maybe Int) "score" .== F.lit 90) df +D.derive "match" (F.col (Maybe Int) "score" .== F.lit 90) df ``` The result type is determined at compile time: @@ -111,16 +111,16 @@ When the built-in operators don't cover your function, use `nullLift` (unary) an ```haskell -- Unary: negate over Maybe Int column → Maybe Int -D.derive "neg_score" (F.nullLift negate (F.col @(Maybe Int) "score")) df +D.derive "neg_score" (F.nullLift negate (F.col (Maybe Int) "score")) df -- Unary: negate over plain Int column → Int (no wrapping) -D.derive "neg_id" (F.nullLift negate (F.col @Int "id")) df +D.derive "neg_id" (F.nullLift negate (F.col Int "id")) df -- Binary: mixed nullable — result type follows the same table above -D.derive "sum" (F.nullLift2 (+) (F.col @Int "id") (F.col @(Maybe Int) "score")) df +D.derive "sum" (F.nullLift2 (+) (F.col Int "id") (F.col (Maybe Int) "score")) df -- Binary: custom function, both non-nullable -D.derive "product" (F.nullLift2 (*) (F.col @Int "id") (F.col @Int "id")) df +D.derive "product" (F.nullLift2 (*) (F.col Int "id") (F.col Int "id")) df ``` `nullLift` / `nullLift2` work for **any** function, including those returning a different type: @@ -130,7 +130,7 @@ import qualified Data.Text as T -- Convert nullable Int to nullable Text D.derive "score_text" - (F.nullLift (T.pack . show) (F.col @(Maybe Int) "score")) + (F.nullLift (T.pack . show) (F.col (Maybe Int) "score")) df -- produces a Maybe Text column ``` @@ -140,7 +140,7 @@ D.derive "score_text" `whenBothPresent` predates `nullLift2` and is retained for backward compatibility. It handles the both-nullable case for operands of the same type: ```haskell -F.whenBothPresent (+) (F.col @(Maybe Int) "a") (F.col @(Maybe Int) "b") +F.whenBothPresent (+) (F.col (Maybe Int) "a") (F.col (Maybe Int) "b") -- equivalent to: F.nullLift2 (+) ... ``` @@ -168,7 +168,7 @@ This means you rarely need to write `D.apply @(Maybe Int) (fmap negate)` explici ```haskell -- Wrap the plain Int "id" column in Maybe -D.derive "maybe_id" (F.cast @(Maybe Int) Nothing "id") df +D.derive "maybe_id" (F.cast (Maybe Int) Nothing "id") df -- if "id" is already Maybe Int, the column is used as-is -- if "id" is plain Int, each value is wrapped in Just ``` @@ -176,7 +176,7 @@ D.derive "maybe_id" (F.cast @(Maybe Int) Nothing "id") df `F.unsafeCast` strips `Maybe` when you know (at runtime) there are no `Nothing` values: ```haskell -D.derive "bare_score" (F.coerce @Int "score") df +D.derive "bare_score" (F.coerce Int "score") df ``` --- @@ -198,11 +198,11 @@ stripped = T.filterAllJust typedDf -- Nullable-aware expression sumExpr :: TE.TExpr MySchema (Maybe Int) -sumExpr = TE.col @"id" TE..+ TE.col @"score" -- Int + Maybe Int → Maybe Int +sumExpr = TE.col "id" TE..+ TE.col "score" -- Int + Maybe Int → Maybe Int -- nullLift on a typed expression negScore :: TE.TExpr MySchema (Maybe Int) -negScore = TE.nullLift negate (TE.col @"score") +negScore = TE.nullLift negate (TE.col "score") ``` ### `filterAllJust` removes `Maybe` from the schema type diff --git a/examples/CaliforniaHousing.hs b/examples/CaliforniaHousing.hs index a174e4b..8482d7a 100644 --- a/examples/CaliforniaHousing.hs +++ b/examples/CaliforniaHousing.hs @@ -1,6 +1,7 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeApplications #-} @@ -76,11 +77,11 @@ normalizeFeatures df = ( \name d -> let -- Convenience reference to the column. - col = F.col @Double name + col = F.col Prelude.Double name in D.derive name ((col - F.minimum col) / (F.maximum col - F.minimum col)) d ) - (D.columnNames (df |> D.selectBy [D.byProperty (D.hasElemType @Double)])) + (D.columnNames (df |> D.selectBy [D.byProperty (D.hasElemType Prelude.Double)])) model :: Linear -> Tensor -> Tensor model state input = squeezeAll $ linear state input diff --git a/examples/Chipotle.hs b/examples/Chipotle.hs index 03402c5..db10d7f 100644 --- a/examples/Chipotle.hs +++ b/examples/Chipotle.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeApplications #-} @@ -53,7 +54,7 @@ run = do , "max" .= F.maximum quantity , "mean" .= F.mean quantity ] - |> D.sortBy [D.Desc (F.col "sum" `asTypeOf` quantity)] + |> D.sortBy [D.Desc (F.col' "sum" `asTypeOf` quantity)] let firstOrder = df diff --git a/examples/Iris.lhs b/examples/Iris.lhs index dff1a14..465d203 100644 --- a/examples/Iris.lhs +++ b/examples/Iris.lhs @@ -293,7 +293,7 @@ Convert the text labels to integers using our Iris type: > df > |> D.derive > "variety" -> (F.lift (fromEnum . read @Iris . T.unpack) (F.col "variety")) +> (F.lift (fromEnum . read @Iris . T.unpack) (F.col' "variety")) The `|>` operator pipes data left-to-right (like Unix pipes or method chaining). This converts: "Setosa" → 0, "Versicolor" → 1, "Virginica" → 2 @@ -323,8 +323,8 @@ Extract the four measurement columns as our features: Extract the labels (species) as integers: -> let trainLabels = either throw id (D.columnAsIntVector (F.col @Int "variety") trainDf) -> let testLabels = either throw id (D.columnAsIntVector (F.col @Int "variety") testDf) +> let trainLabels = either throw id (D.columnAsIntVector (F.col Int "variety") trainDf) +> let testLabels = either throw id (D.columnAsIntVector (F.col Int "variety") testDf) Convert labels to one-hot encoding for neural network training: - 0 (Setosa) → [1.0, 0.0, 0.0] diff --git a/examples/OneBillionRowChallenge.hs b/examples/OneBillionRowChallenge.hs index f61b0a5..bc856e4 100644 --- a/examples/OneBillionRowChallenge.hs +++ b/examples/OneBillionRowChallenge.hs @@ -16,13 +16,13 @@ import DataFrame.Operators run :: IO () run = do - let city = F.col @Text "names" - let measurement = F.col @Double "temperature" + let city = F.col Text "names" + let measurement = F.col Double "temperature" let schema = Schema $ M.fromList - [ (F.name city, schemaType @Text) - , (F.name measurement, schemaType @Double) + [ (F.name city, schemaType Text) + , (F.name measurement, schemaType Double) ] startCalculation <- getCurrentTime diff --git a/examples/titanic.ipynb b/examples/titanic.ipynb index 1048da8..f2bbec3 100644 --- a/examples/titanic.ipynb +++ b/examples/titanic.ipynb @@ -196,7 +196,7 @@ "-- using the function `filterNothing`)\n", "\n", "\n", - "df |> D.filterJust \"Age\" |> D.mean (F.col @Double \"Age\")" + "df |> D.filterJust \"Age\" |> D.mean (F.col Double \"Age\")" ] }, { diff --git a/ffi/DataFrame/IR.hs b/ffi/DataFrame/IR.hs index 0dcfb6a..ac51ef5 100644 --- a/ffi/DataFrame/IR.hs +++ b/ffi/DataFrame/IR.hs @@ -1,9 +1,9 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -310,16 +310,16 @@ buildSchema pairs = do return (makeSchema sch) where resolve (name, tag) = case tag of - "int" -> return (name, schemaType @Int) - "int8" -> return (name, schemaType @Int8) - "int16" -> return (name, schemaType @Int16) - "int32" -> return (name, schemaType @Int32) - "int64" -> return (name, schemaType @Int64) - "double" -> return (name, schemaType @Double) - "float" -> return (name, schemaType @Float) - "bool" -> return (name, schemaType @Bool) - "text" -> return (name, schemaType @T.Text) - "string" -> return (name, schemaType @String) + "int" -> return (name, schemaType Int) + "int8" -> return (name, schemaType Int8) + "int16" -> return (name, schemaType Int16) + "int32" -> return (name, schemaType Int32) + "int64" -> return (name, schemaType Int64) + "double" -> return (name, schemaType Double) + "float" -> return (name, schemaType Float) + "bool" -> return (name, schemaType Bool) + "text" -> return (name, schemaType T.Text) + "string" -> return (name, schemaType String) other -> ioError . userError $ "DataFrame.IR.buildSchema: unsupported schema type tag '" @@ -336,28 +336,28 @@ runFrequencies colName df = dispatchType (columnTypeRep (unsafeGetColumn colName columnTypeRep (UnboxedColumn _ (_ :: VU.Vector a)) = SomeTypeRep (typeRep @a) columnTypeRep (BoxedColumn _ (_ :: V.Vector a)) = SomeTypeRep (typeRep @a) - fr :: forall a. (Columnable a, Ord a) => IO DataFrame - fr = return $ Stats.frequencies (Col @a colName) df + fr :: forall a -> (Columnable a, Ord a) => IO DataFrame + fr a = return $ Stats.frequencies (Col @a colName) df dispatchType :: SomeTypeRep -> IO DataFrame dispatchType (SomeTypeRep tr) - | Just HRefl <- eqTypeRep tr (typeRep @Int) = fr @Int - | Just HRefl <- eqTypeRep tr (typeRep @Int8) = fr @Int8 - | Just HRefl <- eqTypeRep tr (typeRep @Int16) = fr @Int16 - | Just HRefl <- eqTypeRep tr (typeRep @Int32) = fr @Int32 - | Just HRefl <- eqTypeRep tr (typeRep @Int64) = fr @Int64 - | Just HRefl <- eqTypeRep tr (typeRep @Word) = fr @Word - | Just HRefl <- eqTypeRep tr (typeRep @Word8) = fr @Word8 - | Just HRefl <- eqTypeRep tr (typeRep @Word16) = fr @Word16 - | Just HRefl <- eqTypeRep tr (typeRep @Word32) = fr @Word32 - | Just HRefl <- eqTypeRep tr (typeRep @Word64) = fr @Word64 - | Just HRefl <- eqTypeRep tr (typeRep @Integer) = fr @Integer - | Just HRefl <- eqTypeRep tr (typeRep @Double) = fr @Double - | Just HRefl <- eqTypeRep tr (typeRep @Float) = fr @Float - | Just HRefl <- eqTypeRep tr (typeRep @Bool) = fr @Bool - | Just HRefl <- eqTypeRep tr (typeRep @Char) = fr @Char - | Just HRefl <- eqTypeRep tr (typeRep @T.Text) = fr @T.Text - | Just HRefl <- eqTypeRep tr (typeRep @String) = fr @String + | Just HRefl <- eqTypeRep tr (typeRep @Int) = fr Int + | Just HRefl <- eqTypeRep tr (typeRep @Int8) = fr Int8 + | Just HRefl <- eqTypeRep tr (typeRep @Int16) = fr Int16 + | Just HRefl <- eqTypeRep tr (typeRep @Int32) = fr Int32 + | Just HRefl <- eqTypeRep tr (typeRep @Int64) = fr Int64 + | Just HRefl <- eqTypeRep tr (typeRep @Word) = fr Word + | Just HRefl <- eqTypeRep tr (typeRep @Word8) = fr Word8 + | Just HRefl <- eqTypeRep tr (typeRep @Word16) = fr Word16 + | Just HRefl <- eqTypeRep tr (typeRep @Word32) = fr Word32 + | Just HRefl <- eqTypeRep tr (typeRep @Word64) = fr Word64 + | Just HRefl <- eqTypeRep tr (typeRep @Integer) = fr Integer + | Just HRefl <- eqTypeRep tr (typeRep @Double) = fr Double + | Just HRefl <- eqTypeRep tr (typeRep @Float) = fr Float + | Just HRefl <- eqTypeRep tr (typeRep @Bool) = fr Bool + | Just HRefl <- eqTypeRep tr (typeRep @Char) = fr Char + | Just HRefl <- eqTypeRep tr (typeRep @T.Text) = fr T.Text + | Just HRefl <- eqTypeRep tr (typeRep @String) = fr String | otherwise = ioError . userError $ "DataFrame.IR.Frequencies: unsupported column type for '" diff --git a/ffi/DataFrame/IR/ExprJson.hs b/ffi/DataFrame/IR/ExprJson.hs index 130f848..672676b 100644 --- a/ffi/DataFrame/IR/ExprJson.hs +++ b/ffi/DataFrame/IR/ExprJson.hs @@ -1,9 +1,9 @@ -{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} @@ -85,8 +85,8 @@ data SomeExpr where SomeExpr :: (Columnable a) => TypeRep a -> Expr a -> SomeExpr -- | Map a Haskell type to its wire-format tag string. -typeTagOf :: forall a. (Typeable a) => Maybe T.Text -typeTagOf +typeTagOf :: forall a -> (Typeable a) => Maybe T.Text +typeTagOf a | Just _ <- testEquality (typeRep @a) (typeRep @Int) = Just "int" | Just _ <- testEquality (typeRep @a) (typeRep @Int8) = Just "int8" | Just _ <- testEquality (typeRep @a) (typeRep @Int16) = Just "int16" @@ -234,7 +234,7 @@ names (binaryUdf, unaryUdf, …). encodeExpr :: forall a. (Columnable a) => Expr a -> Either String Aeson.Value encodeExpr expr = case expr of Col name -> do - outTag <- requireTypeTag @a + outTag <- requireTypeTag a Right $ object [ "node" .= ("col" :: T.Text) @@ -242,7 +242,7 @@ encodeExpr expr = case expr of , "name" .= name ] Lit v -> do - outTag <- requireTypeTag @a + outTag <- requireTypeTag a litVal <- encodeLit @a v Right $ object @@ -251,8 +251,8 @@ encodeExpr expr = case expr of , "value" .= litVal ] Unary op (arg :: Expr b) -> do - outTag <- requireTypeTag @a - argTag <- requireTypeTag @b + outTag <- requireTypeTag a + argTag <- requireTypeTag b opTag <- recognizeUnary (unaryName op) argEnc <- encodeExpr arg Right $ @@ -264,8 +264,8 @@ encodeExpr expr = case expr of , "arg" .= argEnc ] Binary op (lhs :: Expr c) (rhs :: Expr b) -> do - outTag <- requireTypeTag @a - argTag <- requireTypeTag @c + outTag <- requireTypeTag a + argTag <- requireTypeTag c opTag <- recognizeBinary (binaryName op) lEnc <- encodeExpr lhs rEnc <- encodeExpr rhs @@ -279,7 +279,7 @@ encodeExpr expr = case expr of , "rhs" .= rEnc ] If cond th el -> do - outTag <- requireTypeTag @a + outTag <- requireTypeTag a cEnc <- encodeExpr cond tEnc <- encodeExpr th eEnc <- encodeExpr el @@ -298,8 +298,8 @@ encodeExpr expr = case expr of Left "DataFrame.IR.ExprJson.encodeExpr: CastExprWith is not supported in the wire format" Agg (strat :: F.AggStrategy a b) (inner :: F.Expr b) -> do - outTag <- requireTypeTag @a - argTag <- requireTypeTag @b + outTag <- requireTypeTag a + argTag <- requireTypeTag b innerEnc <- encodeExpr inner Right $ object @@ -310,7 +310,7 @@ encodeExpr expr = case expr of , "arg" .= innerEnc ] Over names inner -> do - outTag <- requireTypeTag @a + outTag <- requireTypeTag a innerEnc <- encodeExpr inner Right $ object @@ -320,8 +320,8 @@ encodeExpr expr = case expr of , "arg" .= innerEnc ] where - requireTypeTag :: forall x. (Typeable x) => Either String T.Text - requireTypeTag = case typeTagOf @x of + requireTypeTag :: forall x -> (Typeable x) => Either String T.Text + requireTypeTag x = case typeTagOf x of Just t -> Right t Nothing -> Left $ diff --git a/tests/DecisionTree.hs b/tests/DecisionTree.hs index 7160878..c22bd08 100644 --- a/tests/DecisionTree.hs +++ b/tests/DecisionTree.hs @@ -1,5 +1,6 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RequiredTypeArguments #-} {-# LANGUAGE TypeApplications #-} module DecisionTree where @@ -52,7 +53,7 @@ rightTree = Leaf "B" -- x <= 2.5: True for idx 0,1 (→ left); False for idx 2,3 (→ right) splitCond :: Expr Bool -splitCond = F.col @Double "x" .<= F.lit (2.5 :: Double) +splitCond = F.col Double "x" .<= F.lit (2.5 :: Double) -- Pre-computed care points for the full fixture carePoints3 :: [CarePoint] @@ -198,7 +199,7 @@ countErrorsAllCorrect = TestCase $ do -- CarePoint 0 GoLeft → goesLeft=True, shouldGoLeft=True → correct -- CarePoint 1 GoRight → goesLeft=False, shouldGoLeft=False → correct let cps = [CarePoint 0 GoLeft, CarePoint 1 GoRight] - cond = F.col @Double "x" .<= F.lit (1.5 :: Double) + cond = F.col Double "x" .<= F.lit (1.5 :: Double) errs = countCarePointErrors cond fixtureDF cps assertEqual "condition routes all care points correctly" 0 errs @@ -208,7 +209,7 @@ countErrorsAllWrong = TestCase $ do -- CarePoint 0 GoLeft → goesLeft=False, shouldGoLeft=True → wrong -- CarePoint 1 GoRight → goesLeft=True, shouldGoLeft=False → wrong let cps = [CarePoint 0 GoLeft, CarePoint 1 GoRight] - cond = F.col @Double "x" .> F.lit (1.5 :: Double) + cond = F.col Double "x" .> F.lit (1.5 :: Double) errs = countCarePointErrors cond fixtureDF cps assertEqual "reversed condition misroutes all care points" 2 errs @@ -253,8 +254,8 @@ sepDF = -- Candidate conditions that bracket the decision boundary sepConds :: [Expr Bool] sepConds = - [ F.col @Double "x" .<= F.lit (10.5 :: Double) - , F.col @Double "x" .> F.lit (10.5 :: Double) + [ F.col Double "x" .<= F.lit (10.5 :: Double) + , F.col Double "x" .> F.lit (10.5 :: Double) ] testCfg :: TreeConfig @@ -269,7 +270,7 @@ testCfg = wrongStump :: Tree T.Text wrongStump = Branch - (F.col @Double "x" .> F.lit (10.5 :: Double)) + (F.col Double "x" .> F.lit (10.5 :: Double)) (Leaf "pos") (Leaf "neg") @@ -320,7 +321,7 @@ taoDeadBranchNoCrash :: Test taoDeadBranchNoCrash = TestCase $ do -- Threshold below all x values: x <= 0.5 is False for every row -- → all indices route to the right child; left partition is always empty - let badCond = F.col @Double "x" .<= F.lit (0.5 :: Double) + let badCond = F.col Double "x" .<= F.lit (0.5 :: Double) indices = V.enumFromN 0 20 initTree = Branch badCond (Leaf "pos") (Leaf "neg") :: Tree T.Text result = @@ -352,20 +353,20 @@ taoRecoversSingleObliqueDerived :: Test taoRecoversSingleObliqueDerived = TestCase $ do let labelExpr = F.ifThenElse - ((F.col @Double "x" + F.col @Double "y") .<= F.lit (4.5 :: Double)) + ((F.col Double "x" + F.col Double "y") .<= F.lit (4.5 :: Double)) (F.lit ("pos" :: T.Text)) (F.lit ("neg" :: T.Text)) df = D.derive @T.Text "label" labelExpr gridBaseDF indices = V.enumFromN 0 16 initTree = Branch - (F.col @Double "x" .<= F.lit (2.5 :: Double)) + (F.col Double "x" .<= F.lit (2.5 :: Double)) (Leaf "pos") (Leaf "neg") :: Tree T.Text conds = - [ (F.col @Double "x" + F.col @Double "y") .<= F.lit (4.5 :: Double) - , (F.col @Double "x" + F.col @Double "y") .> F.lit (4.5 :: Double) + [ (F.col Double "x" + F.col Double "y") .<= F.lit (4.5 :: Double) + , (F.col Double "x" + F.col Double "y") .> F.lit (4.5 :: Double) ] cfg = defaultTreeConfig{taoIterations = 5, expressionPairs = 4, minLeafSize = 1} result = taoOptimize @T.Text cfg "label" conds df indices initTree @@ -379,10 +380,10 @@ taoRecoversNestedObliqueDerived :: Test taoRecoversNestedObliqueDerived = TestCase $ do let labelExpr = F.ifThenElse - ((F.col @Double "x" + F.col @Double "y") .<= F.lit (4.5 :: Double)) + ((F.col Double "x" + F.col Double "y") .<= F.lit (4.5 :: Double)) (F.lit ("low" :: T.Text)) ( F.ifThenElse - ((F.col @Double "x" - F.col @Double "y") .<= F.lit (0.5 :: Double)) + ((F.col Double "x" - F.col Double "y") .<= F.lit (0.5 :: Double)) (F.lit "mid") (F.lit "high") ) @@ -390,19 +391,19 @@ taoRecoversNestedObliqueDerived = TestCase $ do indices = V.enumFromN 0 16 initTree = Branch - (F.col @Double "x" .<= F.lit (1.5 :: Double)) + (F.col Double "x" .<= F.lit (1.5 :: Double)) (Leaf "low") ( Branch - (F.col @Double "y" .<= F.lit (3.5 :: Double)) + (F.col Double "y" .<= F.lit (3.5 :: Double)) (Leaf "mid") (Leaf "high") ) :: Tree T.Text conds = - [ (F.col @Double "x" + F.col @Double "y") .<= F.lit (4.5 :: Double) - , (F.col @Double "x" + F.col @Double "y") .> F.lit (4.5 :: Double) - , (F.col @Double "x" - F.col @Double "y") .<= F.lit (0.5 :: Double) - , (F.col @Double "x" - F.col @Double "y") .> F.lit (0.5 :: Double) + [ (F.col Double "x" + F.col Double "y") .<= F.lit (4.5 :: Double) + , (F.col Double "x" + F.col Double "y") .> F.lit (4.5 :: Double) + , (F.col Double "x" - F.col Double "y") .<= F.lit (0.5 :: Double) + , (F.col Double "x" - F.col Double "y") .> F.lit (0.5 :: Double) ] cfg = defaultTreeConfig{taoIterations = 5, expressionPairs = 4, minLeafSize = 1} result = taoOptimize @T.Text cfg "label" conds df indices initTree @@ -418,17 +419,17 @@ obliqueAxisAlignedFixture :: obliqueAxisAlignedFixture = let labelExpr = F.ifThenElse - ((F.col @Double "x" + F.col @Double "y") .<= F.lit (4.5 :: Double)) + ((F.col Double "x" + F.col Double "y") .<= F.lit (4.5 :: Double)) (F.lit ("pos" :: T.Text)) (F.lit ("neg" :: T.Text)) df = D.derive @T.Text "label" labelExpr gridBaseDF indices = V.enumFromN 0 16 axisConds = - [F.col @Double "x" .<= F.lit (t :: Double) | t <- [1.5, 2.5, 3.5]] - ++ [F.col @Double "y" .<= F.lit (t :: Double) | t <- [1.5, 2.5, 3.5]] + [F.col Double "x" .<= F.lit (t :: Double) | t <- [1.5, 2.5, 3.5]] + ++ [F.col Double "y" .<= F.lit (t :: Double) | t <- [1.5, 2.5, 3.5]] initTree = Branch - (F.col @Double "x" .<= F.lit (2.5 :: Double)) + (F.col Double "x" .<= F.lit (2.5 :: Double)) (Leaf "pos") (Leaf "neg") :: Tree T.Text @@ -555,7 +556,7 @@ nullValueRoutesFalseTest = TestCase $ do ] -- Nothing <= 6.0 = Nothing -> fromMaybe False = False -> right -- Just 5.0 <= 6.0 = Just True -> fromMaybe False = True -> left - cond = F.fromMaybe False (F.col @(Maybe Double) "x" .<= F.lit (6.0 :: Double)) + cond = F.fromMaybe False (F.col (Maybe Double) "x" .<= F.lit (6.0 :: Double)) (lft, rgt) = partitionIndices cond df (V.fromList [0, 1]) assertBool "null row (idx 0) routes to right (false) partition" (0 `V.elem` rgt) assertBool "Just 5.0 <= 6.0 routes to left (true) partition" (1 `V.elem` lft) @@ -772,10 +773,10 @@ taoRecoversNestedObliqueWithoutHint :: Test taoRecoversNestedObliqueWithoutHint = TestCase $ do let labelExpr = F.ifThenElse - ((F.col @Double "x" + F.col @Double "y") .<= F.lit (4.5 :: Double)) + ((F.col Double "x" + F.col Double "y") .<= F.lit (4.5 :: Double)) (F.lit ("low" :: T.Text)) ( F.ifThenElse - ((F.col @Double "x" - F.col @Double "y") .<= F.lit (0.5 :: Double)) + ((F.col Double "x" - F.col Double "y") .<= F.lit (0.5 :: Double)) (F.lit "mid") (F.lit "high") ) @@ -783,17 +784,17 @@ taoRecoversNestedObliqueWithoutHint = TestCase $ do indices = V.enumFromN 0 16 initTree = Branch - (F.col @Double "x" .<= F.lit (1.5 :: Double)) + (F.col Double "x" .<= F.lit (1.5 :: Double)) (Leaf "low") ( Branch - (F.col @Double "y" .<= F.lit (3.5 :: Double)) + (F.col Double "y" .<= F.lit (3.5 :: Double)) (Leaf "mid") (Leaf "high") ) :: Tree T.Text axisOnlyConds = - [F.col @Double "x" .<= F.lit (t :: Double) | t <- [1.5, 2.5, 3.5]] - ++ [F.col @Double "y" .<= F.lit (t :: Double) | t <- [1.5, 2.5, 3.5]] + [F.col Double "x" .<= F.lit (t :: Double) | t <- [1.5, 2.5, 3.5]] + ++ [F.col Double "y" .<= F.lit (t :: Double) | t <- [1.5, 2.5, 3.5]] cfg = defaultTreeConfig { taoIterations = 20 @@ -1129,90 +1130,90 @@ assertEqExpr msg expected actual = threshAndLeq :: Test threshAndLeq = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .<=. F.lit (3.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .<=. F.lit (1.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .<=. F.lit (3.0 :: Double)) + b = materializeOrFail (F.col Double "x" .<=. F.lit (1.0 :: Double)) r = combineAndVec a b assertEqExpr "AND of x≤3 and x≤1 collapses to x≤1" - (F.col @Double "x" .<=. F.lit (1.0 :: Double)) + (F.col Double "x" .<=. F.lit (1.0 :: Double)) (cvExpr r) threshOrLeq :: Test threshOrLeq = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .<=. F.lit (3.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .<=. F.lit (1.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .<=. F.lit (3.0 :: Double)) + b = materializeOrFail (F.col Double "x" .<=. F.lit (1.0 :: Double)) r = combineOrVec a b assertEqExpr "OR of x≤3 and x≤1 collapses to x≤3" - (F.col @Double "x" .<=. F.lit (3.0 :: Double)) + (F.col Double "x" .<=. F.lit (3.0 :: Double)) (cvExpr r) threshAndLt :: Test threshAndLt = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .<. F.lit (3.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .<. F.lit (1.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .<. F.lit (3.0 :: Double)) + b = materializeOrFail (F.col Double "x" .<. F.lit (1.0 :: Double)) r = combineAndVec a b assertEqExpr "AND of x<3 and x<1 collapses to x<1" - (F.col @Double "x" .<. F.lit (1.0 :: Double)) + (F.col Double "x" .<. F.lit (1.0 :: Double)) (cvExpr r) threshOrLt :: Test threshOrLt = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .<. F.lit (3.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .<. F.lit (1.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .<. F.lit (3.0 :: Double)) + b = materializeOrFail (F.col Double "x" .<. F.lit (1.0 :: Double)) r = combineOrVec a b assertEqExpr "OR of x<3 and x<1 collapses to x<3" - (F.col @Double "x" .<. F.lit (3.0 :: Double)) + (F.col Double "x" .<. F.lit (3.0 :: Double)) (cvExpr r) threshAndGeq :: Test threshAndGeq = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .>=. F.lit (1.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .>=. F.lit (3.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .>=. F.lit (1.0 :: Double)) + b = materializeOrFail (F.col Double "x" .>=. F.lit (3.0 :: Double)) r = combineAndVec a b assertEqExpr "AND of x≥1 and x≥3 collapses to x≥3" - (F.col @Double "x" .>=. F.lit (3.0 :: Double)) + (F.col Double "x" .>=. F.lit (3.0 :: Double)) (cvExpr r) threshOrGeq :: Test threshOrGeq = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .>=. F.lit (1.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .>=. F.lit (3.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .>=. F.lit (1.0 :: Double)) + b = materializeOrFail (F.col Double "x" .>=. F.lit (3.0 :: Double)) r = combineOrVec a b assertEqExpr "OR of x≥1 and x≥3 collapses to x≥1" - (F.col @Double "x" .>=. F.lit (1.0 :: Double)) + (F.col Double "x" .>=. F.lit (1.0 :: Double)) (cvExpr r) threshAndGt :: Test threshAndGt = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .>. F.lit (1.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .>. F.lit (3.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .>. F.lit (1.0 :: Double)) + b = materializeOrFail (F.col Double "x" .>. F.lit (3.0 :: Double)) r = combineAndVec a b assertEqExpr "AND of x>1 and x>3 collapses to x>3" - (F.col @Double "x" .>. F.lit (3.0 :: Double)) + (F.col Double "x" .>. F.lit (3.0 :: Double)) (cvExpr r) threshOrGt :: Test threshOrGt = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .>. F.lit (1.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .>. F.lit (3.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .>. F.lit (1.0 :: Double)) + b = materializeOrFail (F.col Double "x" .>. F.lit (3.0 :: Double)) r = combineOrVec a b assertEqExpr "OR of x>1 and x>3 collapses to x>1" - (F.col @Double "x" .>. F.lit (1.0 :: Double)) + (F.col Double "x" .>. F.lit (1.0 :: Double)) (cvExpr r) -- Six negative cases: rewrite must NOT fire. threshNegMixedDirection :: Test threshNegMixedDirection = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .<. F.lit (3.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .>=. F.lit (1.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .<. F.lit (3.0 :: Double)) + b = materializeOrFail (F.col Double "x" .>=. F.lit (1.0 :: Double)) r = combineAndVec a b -- Mixed directions (< vs ≥): consolidation deliberately out-of-scope. -- Expect the generic F.and form. @@ -1223,8 +1224,8 @@ threshNegMixedDirection = TestCase $ do threshNegCrossColumn :: Test threshNegCrossColumn = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .>. F.lit (1.0 :: Double)) - b = materializeOrFail (F.col @Double "y" .>. F.lit (3.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .>. F.lit (1.0 :: Double)) + b = materializeOrFail (F.col Double "y" .>. F.lit (3.0 :: Double)) r = combineAndVec a b -- Same op, different columns: no rewrite. assertEqExpr @@ -1234,8 +1235,8 @@ threshNegCrossColumn = TestCase $ do threshNegMixedOpFamily :: Test threshNegMixedOpFamily = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .>. F.lit (1.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .<. F.lit (4.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .>. F.lit (1.0 :: Double)) + b = materializeOrFail (F.col Double "x" .<. F.lit (4.0 :: Double)) r = combineAndVec a b -- > and < are different op families: no rewrite. assertEqExpr @@ -1245,8 +1246,8 @@ threshNegMixedOpFamily = TestCase $ do threshNegEqualityOp :: Test threshNegEqualityOp = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .==. F.lit (3.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .==. F.lit (1.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .==. F.lit (3.0 :: Double)) + b = materializeOrFail (F.col Double "x" .==. F.lit (1.0 :: Double)) r = combineOrVec a b -- Equality is not in the threshold family; consolidate doesn't fire. assertEqExpr @@ -1257,8 +1258,8 @@ threshNegEqualityOp = TestCase $ do threshNegLitOnLeft :: Test threshNegLitOnLeft = TestCase $ do -- Lit on LEFT of the comparison: pattern requires (Col, Lit) ordering. - let a = materializeOrFail (F.lit (1.0 :: Double) .<. F.col @Double "x") - b = materializeOrFail (F.lit (3.0 :: Double) .<. F.col @Double "x") + let a = materializeOrFail (F.lit (1.0 :: Double) .<. F.col Double "x") + b = materializeOrFail (F.lit (3.0 :: Double) .<. F.col Double "x") r = combineAndVec a b assertEqExpr "Lit-on-left AND keeps generic F.and form" @@ -1268,8 +1269,8 @@ threshNegLitOnLeft = TestCase $ do threshNegNonLiteralRhs :: Test threshNegNonLiteralRhs = TestCase $ do -- RHS is a Col, not a Lit: pattern doesn't match. - let a = materializeOrFail (F.col @Double "x" .>. F.col @Double "y") - b = materializeOrFail (F.col @Double "x" .>. F.lit (3.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .>. F.col Double "y") + b = materializeOrFail (F.col Double "x" .>. F.lit (3.0 :: Double)) r = combineAndVec a b assertEqExpr "non-literal RHS AND keeps generic F.and form" @@ -1282,8 +1283,8 @@ threshNegNonLiteralRhs = TestCase $ do -- the inputs at every row of a synthetic DataFrame. threshSemanticPreservation :: Test threshSemanticPreservation = TestCase $ do - let a = materializeOrFail (F.col @Double "x" .>. F.lit (1.0 :: Double)) - b = materializeOrFail (F.col @Double "x" .>. F.lit (3.0 :: Double)) + let a = materializeOrFail (F.col Double "x" .>. F.lit (1.0 :: Double)) + b = materializeOrFail (F.col Double "x" .>. F.lit (3.0 :: Double)) rAnd = combineAndVec a b rOr = combineOrVec a b expectedAnd = VU.zipWith (&&) (cvVec a) (cvVec b) diff --git a/tests/Functions.hs b/tests/Functions.hs index 2445706..16a92ea 100644 --- a/tests/Functions.hs +++ b/tests/Functions.hs @@ -76,7 +76,7 @@ testSum = , ("sum", DI.fromList (replicate 10 (55 :: Int))) ] ) - (D.derive "sum" (F.sum (F.col @Int "A")) df) + (D.derive @Int "sum" (F.sum (F.col Int "A")) df) ) tests :: [Test] diff --git a/tests/LazyParquet.hs b/tests/LazyParquet.hs index 2632828..acb8391 100644 --- a/tests/LazyParquet.hs +++ b/tests/LazyParquet.hs @@ -19,17 +19,17 @@ allTypesSchema :: Schema allTypesSchema = Schema $ M.fromList - [ ("id", schemaType @Int32) - , ("bool_col", schemaType @Bool) - , ("tinyint_col", schemaType @Int32) - , ("smallint_col", schemaType @Int32) - , ("int_col", schemaType @Int32) - , ("bigint_col", schemaType @Int64) - , ("float_col", schemaType @Float) - , ("double_col", schemaType @Double) - , ("date_string_col", schemaType @T.Text) - , ("string_col", schemaType @T.Text) - , ("timestamp_col", schemaType @UTCTime) + [ ("id", schemaType Int32) + , ("bool_col", schemaType Bool) + , ("tinyint_col", schemaType Int32) + , ("smallint_col", schemaType Int32) + , ("int_col", schemaType Int32) + , ("bigint_col", schemaType Int64) + , ("float_col", schemaType Float) + , ("double_col", schemaType Double) + , ("date_string_col", schemaType T.Text) + , ("string_col", schemaType T.Text) + , ("timestamp_col", schemaType UTCTime) ] plainPath :: FilePath @@ -64,7 +64,7 @@ filterPushdown = actual <- L.runDataFrame ( L.filter - (F.geq (F.col @Int32 "id") (F.lit 6)) + (F.geq (F.col Int32 "id") (F.lit 6)) (L.scanParquet allTypesSchema (T.pack plainPath)) ) assertEqual "filterPushdown" (2, 11) (D.dimensions actual) @@ -80,7 +80,7 @@ filterAndProject = ( L.select ["id", "bool_col"] ( L.filter - (F.geq (F.col @Int32 "id") (F.lit 6)) + (F.geq (F.col Int32 "id") (F.lit 6)) (L.scanParquet allTypesSchema (T.pack plainPath)) ) ) diff --git a/tests/Operations/Aggregations.hs b/tests/Operations/Aggregations.hs index 0fd3672..d931a50 100644 --- a/tests/Operations/Aggregations.hs +++ b/tests/Operations/Aggregations.hs @@ -39,8 +39,8 @@ foldAggregation = ) ( testData & D.groupBy ["test1"] - & D.aggregate [F.count (F.col @Int "test2") `as` "test2"] - & D.sortBy [D.Asc (F.col @Int "test1")] + & D.aggregate [F.count (F.col Int "test2") `as` "test2"] + & D.sortBy [D.Asc (F.col Int "test1")] ) ) @@ -57,7 +57,7 @@ countAllAggregation = ( testData & D.groupBy ["test1"] & D.aggregate [F.countAll `as` "n"] - & D.sortBy [D.Asc (F.col @Int "test1")] + & D.sortBy [D.Asc (F.col Int "test1")] ) ) @@ -83,7 +83,7 @@ countAllAggregationTyped = ] & DT.groupBy @'["test1"] & DT.aggregate (DT.as @"n" DT.countAll) - & DT.sortBy [DT.asc (DT.col @"test1")] + & DT.sortBy [DT.asc (DT.col "test1")] & DT.thaw ) ) @@ -109,8 +109,8 @@ foldAggregationTyped = , DT.Column "test6" Integer ] & DT.groupBy @'["test1"] - & DT.aggregate (DT.as @"test2_count" (DT.count (DT.col @"test2"))) - & DT.sortBy [DT.asc (DT.col @"test1")] + & DT.aggregate (DT.as @"test2_count" (DT.count (DT.col "test2"))) + & DT.sortBy [DT.asc (DT.col "test1")] & DT.thaw ) ) @@ -127,8 +127,8 @@ numericAggregation = ) ( testData & D.groupBy ["test1"] - & D.aggregate [F.mean (F.col @Int "test2") `as` "test2"] - & D.sortBy [D.Asc (F.col @Int "test1")] + & D.aggregate [F.mean (F.col Int "test2") `as` "test2"] + & D.sortBy [D.Asc (F.col Int "test1")] ) ) @@ -153,8 +153,8 @@ numericAggregationTyped = , DT.Column "test6" Integer ] & DT.groupBy @'["test1"] - & DT.aggregate (DT.as @"test2_mean" (DT.mean (DT.col @"test2"))) - & DT.sortBy [DT.asc (DT.col @"test1")] + & DT.aggregate (DT.as @"test2_mean" (DT.mean (DT.col "test2"))) + & DT.sortBy [DT.asc (DT.col "test1")] & DT.thaw ) ) @@ -172,9 +172,9 @@ numericAggregationOfUnaggregatedUnaryOp = ( testData & D.groupBy ["test1"] & D.aggregate - [ F.mean (F.lift (fromIntegral @Int @Double) (F.col @Int "test2")) `as` "test2" + [ F.mean (F.lift (fromIntegral @Int @Double) (F.col Int "test2")) `as` "test2" ] - & D.sortBy [D.Asc (F.col @Int "test1")] + & D.sortBy [D.Asc (F.col Int "test1")] ) ) @@ -190,8 +190,8 @@ numericAggregationOfUnaggregatedBinaryOp = ) ( testData & D.groupBy ["test1"] - & D.aggregate [F.mean (F.col @Int "test2" + F.col @Int "test2") `as` "test2"] - & D.sortBy [D.Asc (F.col @Int "test1")] + & D.aggregate [F.mean (F.col Int "test2" + F.col Int "test2") `as` "test2"] + & D.sortBy [D.Asc (F.col Int "test1")] ) ) @@ -208,10 +208,10 @@ reduceAggregationOfUnaggregatedUnaryOp = ( testData & D.groupBy ["test1"] & D.aggregate - [ F.maximum (F.lift (fromIntegral @Int @Double) (F.col @Int "test2")) + [ F.maximum (F.lift (fromIntegral @Int @Double) (F.col Int "test2")) `as` "test2" ] - & D.sortBy [D.Asc (F.col @Int "test1")] + & D.sortBy [D.Asc (F.col Int "test1")] ) ) @@ -228,8 +228,8 @@ reduceAggregationOfUnaggregatedBinaryOp = ( testData & D.groupBy ["test1"] & D.aggregate - [F.maximum (F.col @Int "test2" + F.col @Int "test2") `as` "test2"] - & D.sortBy [D.Asc (F.col @Int "test1")] + [F.maximum (F.col Int "test2" + F.col Int "test2") `as` "test2"] + & D.sortBy [D.Asc (F.col Int "test1")] ) ) @@ -247,7 +247,7 @@ aggregationOnNoRows = & D.drop 12 & D.groupBy ["test1"] & D.aggregate - [F.sum (F.col @Int "test2") `as` "sum(test2)"] + [F.sum (F.col Int "test2") `as` "sum(test2)"] ) ) @@ -319,7 +319,7 @@ groupByOptionalColumn = ( D.nRows ( optGroupByDf & D.groupBy ["key"] - & D.aggregate [F.count (F.col @Int "val") `as` "val"] + & D.aggregate [F.count (F.col Int "val") `as` "val"] ) ) ) diff --git a/tests/Operations/Apply.hs b/tests/Operations/Apply.hs index 1d4c411..737f836 100644 --- a/tests/Operations/Apply.hs +++ b/tests/Operations/Apply.hs @@ -225,7 +225,7 @@ imputeHappyPath = ( assertEqual "impute fills Nothing with the given value" (Just $ DI.UnboxedColumn Nothing (VU.fromList [1 :: Int, 0, 3])) - (DI.getColumn "opt" $ impute (F.col @(Maybe Int) "opt") 0 imputeData) + (DI.getColumn "opt" $ impute (F.col (Maybe Int) "opt") 0 imputeData) ) imputeColumnNotFound :: Test @@ -234,7 +234,7 @@ imputeColumnNotFound = ( assertExpectException "[Error Case]" (DE.columnNotFound "missing" "impute" (D.columnNames imputeData)) - (print $ impute (F.col @(Maybe Int) "missing") 0 imputeData) + (print $ impute (F.col (Maybe Int) "missing") 0 imputeData) ) imputeOnNonOptional :: Test @@ -243,7 +243,7 @@ imputeOnNonOptional = ( assertEqual "impute is a no-op on a non-nullable column" imputeData - (impute (F.col @(Maybe Int) "plain") 0 imputeData) + (impute (F.col (Maybe Int) "plain") 0 imputeData) ) imputePlainNoOp :: Test @@ -252,7 +252,7 @@ imputePlainNoOp = ( assertEqual "impute with non-Maybe expr is always a no-op" imputeData - (impute (F.col @Int "plain") 0 imputeData) + (impute (F.col Int "plain") 0 imputeData) ) imputeWithPlainNoOp :: Test @@ -261,7 +261,7 @@ imputeWithPlainNoOp = ( assertEqual "imputeWith with non-Maybe expr is always a no-op" imputeData - (imputeWith id (F.col @Int "plain") imputeData) + (imputeWith id (F.col Int "plain") imputeData) ) tests :: [Test] diff --git a/tests/Operations/Derive.hs b/tests/Operations/Derive.hs index 801c634..6f6fc05 100644 --- a/tests/Operations/Derive.hs +++ b/tests/Operations/Derive.hs @@ -40,8 +40,8 @@ deriveWAI = "test4" ( F.lift2 (++) - (F.lift show (F.col @Int "test1")) - (F.lift (: ([] :: [Char])) (F.col @Char "test3")) + (F.lift show (F.col Int "test1")) + (F.lift (: ([] :: [Char])) (F.col Char "test3")) ) testData ) @@ -53,13 +53,13 @@ deriveWAITyped = ( assertEqual "typed derive works with column expression" (zipWith (\n c -> show n ++ [c]) ([1 .. 26] :: [Int]) ['a' .. 'z']) - ( DT.columnAsList @"test4" $ + ( DT.columnAsList "test4" $ DT.derive - @"test4" + "test4" ( DT.lift2 (++) - (DT.lift show (DT.col @"test1")) - (DT.lift (: ([] :: [Char])) (DT.col @"test3")) + (DT.lift show (DT.col "test1")) + (DT.lift (: ([] :: [Char])) (DT.col "test3")) ) ( either (error . show) diff --git a/tests/Operations/Filter.hs b/tests/Operations/Filter.hs index 3154ca6..2e44140 100644 --- a/tests/Operations/Filter.hs +++ b/tests/Operations/Filter.hs @@ -33,7 +33,7 @@ filterColumnDoesNotExist = ( assertExpectException "[Error Case]" (D.columnNotFound "test0" "filter" (D.columnNames testData)) - (print $ D.filter (F.col @Int "test0") even testData) + (print $ D.filter (F.col Int "test0") even testData) ) filterColumnWrongType :: Test @@ -42,7 +42,7 @@ filterColumnWrongType = ( assertExpectException "[Error Case]" (D.typeMismatchError (show $ typeRep @Integer) (show $ typeRep @Int)) - (print $ D.filter (F.col @Integer "test1") even testData) + (print $ D.filter (F.col Integer "test1") even testData) ) filterByColumnDoesNotExist :: Test @@ -51,7 +51,7 @@ filterByColumnDoesNotExist = ( assertExpectException "[Error Case]" (D.columnNotFound "test0" "filter" (D.columnNames testData)) - (print $ D.filterBy even (F.col @Int "test0") testData) + (print $ D.filterBy even (F.col Int "test0") testData) ) filterByColumnWrongType :: Test @@ -60,7 +60,7 @@ filterByColumnWrongType = ( assertExpectException "[Error Case]" (D.typeMismatchError (show $ typeRep @Integer) (show $ typeRep @Int)) - (print $ D.filterBy even (F.col @Integer "test1") testData) + (print $ D.filterBy even (F.col Integer "test1") testData) ) filterColumnInexistentValues :: Test @@ -69,7 +69,7 @@ filterColumnInexistentValues = ( assertEqual "Non existent filter value returns no rows" (0, 8) - (D.dimensions $ D.filter (F.col @Int "test1") (< 0) testData) + (D.dimensions $ D.filter (F.col Int "test1") (< 0) testData) ) filterColumnAllValues :: Test @@ -78,7 +78,7 @@ filterColumnAllValues = ( assertEqual "Filters all columns" (26, 8) - (D.dimensions $ D.filter (F.col @Int "test1") (const True) testData) + (D.dimensions $ D.filter (F.col Int "test1") (const True) testData) ) filterJustWAI :: Test diff --git a/tests/Operations/Join.hs b/tests/Operations/Join.hs index 7919e5d..f04598d 100644 --- a/tests/Operations/Join.hs +++ b/tests/Operations/Join.hs @@ -54,7 +54,7 @@ testInnerJoin = , ("B", D.fromList ["B0" :: Text, "B1", "B2"]) ] ) - (D.sortBy [D.Asc (F.col @Text "key")] (innerJoin ["key"] df1 df2)) + (D.sortBy [D.Asc (F.col Text "key")] (innerJoin ["key"] df1 df2)) ) testLeftJoin :: Test @@ -68,7 +68,7 @@ testLeftJoin = , ("B", D.fromList [Just "B0", Just "B1" :: Maybe Text, Just "B2"]) ] ) - (D.sortBy [D.Asc (F.col @Text "key")] (leftJoin ["key"] df1 df2)) + (D.sortBy [D.Asc (F.col Text "key")] (leftJoin ["key"] df1 df2)) ) testRightJoin :: Test @@ -82,7 +82,7 @@ testRightJoin = , ("B", D.fromList ["B0" :: Text, "B1", "B2"]) ] ) - (D.sortBy [D.Asc (F.col @Text "key")] (rightJoin ["key"] df1 df2)) + (D.sortBy [D.Asc (F.col Text "key")] (rightJoin ["key"] df1 df2)) ) tdf1 :: DT.TypedDataFrame [DT.Column "key" Text, DT.Column "A" Text] @@ -102,7 +102,7 @@ testInnerJoinTyped = , ("B", D.fromList ["B0" :: Text, "B1", "B2"]) ] ) - (DT.thaw $ DT.sortBy [DT.asc (DT.col @"key")] (DT.innerJoin @'["key"] tdf1 tdf2)) + (DT.thaw $ DT.sortBy [DT.asc (DT.col "key")] (DT.innerJoin ["key"] tdf1 tdf2)) ) testLeftJoinTyped :: Test @@ -116,7 +116,7 @@ testLeftJoinTyped = , ("B", D.fromList [Just "B0", Just "B1" :: Maybe Text, Just "B2"]) ] ) - (DT.thaw $ DT.sortBy [DT.asc (DT.col @"key")] (DT.leftJoin @'["key"] tdf1 tdf2)) + (DT.thaw $ DT.sortBy [DT.asc (DT.col "key")] (DT.leftJoin ["key"] tdf1 tdf2)) ) -- A right-hand frame whose payload column is already optional. @@ -150,13 +150,13 @@ testLeftJoinTypedOptional = ) ] ) - (DT.thaw $ DT.sortBy [DT.asc (DT.col @"key")] joined) + (DT.thaw $ DT.sortBy [DT.asc (DT.col "key")] joined) ) where joined :: DT.TypedDataFrame [DT.Column "key" Text, DT.Column "A" Text, DT.Column "C" (Maybe Int)] - joined = DT.leftJoin @'["key"] tdf1 tdfOptional + joined = DT.leftJoin ["key"] tdf1 tdfOptional testRightJoinTyped :: Test testRightJoinTyped = @@ -169,7 +169,7 @@ testRightJoinTyped = , ("B", D.fromList ["B0" :: Text, "B1", "B2"]) ] ) - (DT.thaw $ DT.sortBy [DT.asc (DT.col @"key")] (DT.rightJoin @'["key"] tdf1 tdf2)) + (DT.thaw $ DT.sortBy [DT.asc (DT.col "key")] (DT.rightJoin ["key"] tdf1 tdf2)) ) staffDf :: D.DataFrame @@ -216,7 +216,7 @@ testFullOuterJoin = ) ] ) - (D.sortBy [D.Asc (F.col @Text "Name")] (fullOuterJoin ["Name"] studentDf staffDf)) + (D.sortBy [D.Asc (F.col Text "Name")] (fullOuterJoin ["Name"] studentDf staffDf)) ) dfL :: D.DataFrame @@ -247,7 +247,7 @@ testInnerJoinWithCollisions = , ("Ronly", D.fromList [10 :: Int, 11]) ] ) - (D.sortBy [D.Asc (F.col @Text "key")] (innerJoin ["key"] dfL dfR)) + (D.sortBy [D.Asc (F.col Text "key")] (innerJoin ["key"] dfL dfR)) ) testLeftJoinWithCollisions :: Test @@ -265,7 +265,7 @@ testLeftJoinWithCollisions = , ("Ronly", D.fromList [Just 10 :: Maybe Int, Just 11, Nothing]) ] ) - (D.sortBy [D.Asc (F.col @Text "key")] (leftJoin ["key"] dfL dfR)) + (D.sortBy [D.Asc (F.col Text "key")] (leftJoin ["key"] dfL dfR)) ) testRightJoinWithCollisions :: Test @@ -283,7 +283,7 @@ testRightJoinWithCollisions = , ("Lonly", D.fromList [Just "L0" :: Maybe Text, Just "L1", Nothing]) ] ) - (D.sortBy [D.Asc (F.col @Text "key")] (rightJoin ["key"] dfL dfR)) + (D.sortBy [D.Asc (F.col Text "key")] (rightJoin ["key"] dfL dfR)) ) testOuterJoinWithCollisions :: Test @@ -306,7 +306,7 @@ testOuterJoinWithCollisions = , ("Ronly", D.fromList [Just 10 :: Maybe Int, Just 11, Nothing, Just 13]) ] ) - (D.sortBy [D.Asc (F.col @Text "key")] (fullOuterJoin ["key"] dfL dfR)) + (D.sortBy [D.Asc (F.col Text "key")] (fullOuterJoin ["key"] dfL dfR)) ) testInnerJoinMissingKey :: Test diff --git a/tests/Operations/Nullable.hs b/tests/Operations/Nullable.hs index 877689f..b8994fa 100644 --- a/tests/Operations/Nullable.hs +++ b/tests/Operations/Nullable.hs @@ -30,7 +30,7 @@ testData = , ("y", DI.fromVector (V.fromList [Just 10, Nothing, Just 30 :: Maybe Int])) ] --- | col @Int .+ col @(Maybe Int) should give Maybe Int column +-- | col Int .+ col (Maybe Int) should give Maybe Int column addIntMaybeInt :: Test addIntMaybeInt = TestCase @@ -40,12 +40,12 @@ addIntMaybeInt = ( DI.getColumn "result" $ D.derive "result" - (F.col @Int "x" .+ F.col @(Maybe Int) "y") + (F.col Int "x" .+ F.col (Maybe Int) "y") testData ) ) --- | col @(Maybe Int) .+ col @Int should give Maybe Int column +-- | col (Maybe Int) .+ col Int should give Maybe Int column addMaybeIntInt :: Test addMaybeIntInt = TestCase @@ -55,12 +55,12 @@ addMaybeIntInt = ( DI.getColumn "result" $ D.derive "result" - (F.col @(Maybe Int) "y" .+ F.col @Int "x") + (F.col (Maybe Int) "y" .+ F.col Int "x") testData ) ) --- | col @Int .+ col @Int (same-type non-nullable) should give Int column +-- | col Int .+ col Int (same-type non-nullable) should give Int column addIntInt :: Test addIntInt = TestCase @@ -70,12 +70,12 @@ addIntInt = ( DI.getColumn "result" $ D.derive "result" - (F.col @Int "x" .+ F.col @Int "x") + (F.col Int "x" .+ F.col Int "x") testData ) ) --- | col @(Maybe Int) .+ col @(Maybe Int) should give Maybe Int column +-- | col (Maybe Int) .+ col (Maybe Int) should give Maybe Int column addMaybeMaybe :: Test addMaybeMaybe = TestCase @@ -85,7 +85,7 @@ addMaybeMaybe = ( DI.getColumn "result" $ D.derive "result" - (F.col @(Maybe Int) "y" .+ F.col @(Maybe Int) "y") + (F.col (Maybe Int) "y" .+ F.col (Maybe Int) "y") testData ) ) @@ -102,7 +102,7 @@ subIntMaybeInt = ( DI.getColumn "result" $ D.derive "result" - (F.col @Int "x" .- F.col @(Maybe Int) "y") + (F.col Int "x" .- F.col (Maybe Int) "y") testData ) ) @@ -119,7 +119,7 @@ eqIntMaybeInt = ( DI.getColumn "result" $ D.derive "result" - (F.col @Int "x" .== F.col @(Maybe Int) "y" :: Expr (Maybe Bool)) + (F.col Int "x" .== F.col (Maybe Int) "y" :: Expr (Maybe Bool)) testData ) ) @@ -134,7 +134,7 @@ eqIntInt = ( DI.getColumn "result" $ D.derive "result" - (F.col @Int "x" .== F.lit (1 :: Int) :: Expr Bool) + (F.col Int "x" .== F.lit (1 :: Int) :: Expr Bool) testData ) ) @@ -155,7 +155,7 @@ nullLiftMaybeInt = ( DI.getColumn "result" $ D.derive "result" - (F.nullLift negate (F.col @(Maybe Int) "y") :: Expr (Maybe Int)) + (F.nullLift negate (F.col (Maybe Int) "y") :: Expr (Maybe Int)) testData ) ) @@ -170,7 +170,7 @@ nullLiftInt = ( DI.getColumn "result" $ D.derive "result" - (F.nullLift negate (F.col @Int "x") :: Expr Int) + (F.nullLift negate (F.col Int "x") :: Expr Int) testData ) ) @@ -185,7 +185,7 @@ nullLift2IntMaybeInt = ( DI.getColumn "result" $ D.derive "result" - (F.nullLift2 (+) (F.col @Int "x") (F.col @(Maybe Int) "y") :: Expr (Maybe Int)) + (F.nullLift2 (+) (F.col Int "x") (F.col (Maybe Int) "y") :: Expr (Maybe Int)) testData ) ) @@ -200,7 +200,7 @@ nullLift2MaybeIntInt = ( DI.getColumn "result" $ D.derive "result" - (F.nullLift2 (+) (F.col @(Maybe Int) "y") (F.col @Int "x") :: Expr (Maybe Int)) + (F.nullLift2 (+) (F.col (Maybe Int) "y") (F.col Int "x") :: Expr (Maybe Int)) testData ) ) @@ -215,7 +215,7 @@ nullLift2IntInt = ( DI.getColumn "result" $ D.derive "result" - (F.nullLift2 (+) (F.col @Int "x") (F.col @Int "x") :: Expr Int) + (F.nullLift2 (+) (F.col Int "x") (F.col Int "x") :: Expr Int) testData ) ) @@ -237,7 +237,7 @@ crossData = ) ] --- | col @Int .+ col @Double → Double +-- | col Int .+ col Double → Double addIntDouble :: Test addIntDouble = TestCase @@ -245,11 +245,11 @@ addIntDouble = "Int .+ Double = Double" (Just $ DI.fromList [2.5, 4.5, 6.5 :: Double]) ( DI.getColumn "result" $ - D.derive "result" (F.col @Int "x" .+ F.col @Double "d") crossData + D.derive "result" (F.col Int "x" .+ F.col Double "d") crossData ) ) --- | col @Double .+ col @Int → Double +-- | col Double .+ col Int → Double addDoubleInt :: Test addDoubleInt = TestCase @@ -257,11 +257,11 @@ addDoubleInt = "Double .+ Int = Double" (Just $ DI.fromList [2.5, 4.5, 6.5 :: Double]) ( DI.getColumn "result" $ - D.derive "result" (F.col @Double "d" .+ F.col @Int "x") crossData + D.derive "result" (F.col Double "d" .+ F.col Int "x") crossData ) ) --- | col @(Maybe Int) .+ col @Double → Maybe Double +-- | col (Maybe Int) .+ col Double → Maybe Double addMaybeIntDouble :: Test addMaybeIntDouble = TestCase @@ -271,11 +271,11 @@ addMaybeIntDouble = DI.fromVector (V.fromList [Just 11.5, Nothing, Just 33.5 :: Maybe Double]) ) ( DI.getColumn "result" $ - D.derive "result" (F.col @(Maybe Int) "y" .+ F.col @Double "d") crossData + D.derive "result" (F.col (Maybe Int) "y" .+ F.col Double "d") crossData ) ) --- | col @Int .+ col @(Maybe Double) → Maybe Double +-- | col Int .+ col (Maybe Double) → Maybe Double addIntMaybeDouble :: Test addIntMaybeDouble = TestCase @@ -285,11 +285,11 @@ addIntMaybeDouble = DI.fromVector (V.fromList [Just 11.5, Nothing, Just 33.5 :: Maybe Double]) ) ( DI.getColumn "result" $ - D.derive "result" (F.col @Int "x" .+ F.col @(Maybe Double) "md") crossData + D.derive "result" (F.col Int "x" .+ F.col (Maybe Double) "md") crossData ) ) --- | col @(Maybe Int) .+ col @(Maybe Double) → Maybe Double +-- | col (Maybe Int) .+ col (Maybe Double) → Maybe Double addMaybeIntMaybeDouble :: Test addMaybeIntMaybeDouble = TestCase @@ -301,12 +301,12 @@ addMaybeIntMaybeDouble = ( DI.getColumn "result" $ D.derive "result" - (F.col @(Maybe Int) "y" .+ F.col @(Maybe Double) "md") + (F.col (Maybe Int) "y" .+ F.col (Maybe Double) "md") crossData ) ) --- | col @Int .- col @Double → Double +-- | col Int .- col Double → Double subIntDouble :: Test subIntDouble = TestCase @@ -314,11 +314,11 @@ subIntDouble = "Int .- Double = Double" (Just $ DI.fromList [-0.5, -0.5, -0.5 :: Double]) ( DI.getColumn "result" $ - D.derive "result" (F.col @Int "x" .- F.col @Double "d") crossData + D.derive "result" (F.col Int "x" .- F.col Double "d") crossData ) ) --- | col @Int .* col @Double → Double +-- | col Int .* col Double → Double mulIntDouble :: Test mulIntDouble = TestCase @@ -326,11 +326,11 @@ mulIntDouble = "Int .* Double = Double" (Just $ DI.fromList [1.5, 5.0, 10.5 :: Double]) ( DI.getColumn "result" $ - D.derive "result" (F.col @Int "x" .* F.col @Double "d") crossData + D.derive "result" (F.col Int "x" .* F.col Double "d") crossData ) ) --- | col @Double .- col @Int → Double +-- | col Double .- col Int → Double subDoubleInt :: Test subDoubleInt = TestCase @@ -338,11 +338,11 @@ subDoubleInt = "Double .- Int = Double" (Just $ DI.fromList [0.5, 0.5, 0.5 :: Double]) ( DI.getColumn "result" $ - D.derive "result" (F.col @Double "d" .- F.col @Int "x") crossData + D.derive "result" (F.col Double "d" .- F.col Int "x") crossData ) ) --- | col @(Maybe Int) .- col @Double → Maybe Double +-- | col (Maybe Int) .- col Double → Maybe Double subMaybeIntDouble :: Test subMaybeIntDouble = TestCase @@ -352,11 +352,11 @@ subMaybeIntDouble = DI.fromVector (V.fromList [Just 8.5, Nothing, Just 26.5 :: Maybe Double]) ) ( DI.getColumn "result" $ - D.derive "result" (F.col @(Maybe Int) "y" .- F.col @Double "d") crossData + D.derive "result" (F.col (Maybe Int) "y" .- F.col Double "d") crossData ) ) --- | col @Int .- col @(Maybe Double) → Maybe Double +-- | col Int .- col (Maybe Double) → Maybe Double subIntMaybeDouble :: Test subIntMaybeDouble = TestCase @@ -367,11 +367,11 @@ subIntMaybeDouble = (V.fromList [Just (-9.5), Nothing, Just (-27.5) :: Maybe Double]) ) ( DI.getColumn "result" $ - D.derive "result" (F.col @Int "x" .- F.col @(Maybe Double) "md") crossData + D.derive "result" (F.col Int "x" .- F.col (Maybe Double) "md") crossData ) ) --- | col @(Maybe Int) .- col @(Maybe Double) → Maybe Double +-- | col (Maybe Int) .- col (Maybe Double) → Maybe Double subMaybeIntMaybeDouble :: Test subMaybeIntMaybeDouble = TestCase @@ -384,12 +384,12 @@ subMaybeIntMaybeDouble = ( DI.getColumn "result" $ D.derive "result" - (F.col @(Maybe Int) "y" .- F.col @(Maybe Double) "md") + (F.col (Maybe Int) "y" .- F.col (Maybe Double) "md") crossData ) ) --- | col @Double .* col @Int → Double +-- | col Double .* col Int → Double mulDoubleInt :: Test mulDoubleInt = TestCase @@ -397,11 +397,11 @@ mulDoubleInt = "Double .* Int = Double" (Just $ DI.fromList [1.5, 5.0, 10.5 :: Double]) ( DI.getColumn "result" $ - D.derive "result" (F.col @Double "d" .* F.col @Int "x") crossData + D.derive "result" (F.col Double "d" .* F.col Int "x") crossData ) ) --- | col @(Maybe Int) .* col @Double → Maybe Double +-- | col (Maybe Int) .* col Double → Maybe Double mulMaybeIntDouble :: Test mulMaybeIntDouble = TestCase @@ -411,11 +411,11 @@ mulMaybeIntDouble = DI.fromVector (V.fromList [Just 15.0, Nothing, Just 105.0 :: Maybe Double]) ) ( DI.getColumn "result" $ - D.derive "result" (F.col @(Maybe Int) "y" .* F.col @Double "d") crossData + D.derive "result" (F.col (Maybe Int) "y" .* F.col Double "d") crossData ) ) --- | col @Int .* col @(Maybe Double) → Maybe Double +-- | col Int .* col (Maybe Double) → Maybe Double mulIntMaybeDouble :: Test mulIntMaybeDouble = TestCase @@ -425,11 +425,11 @@ mulIntMaybeDouble = DI.fromVector (V.fromList [Just 10.5, Nothing, Just 91.5 :: Maybe Double]) ) ( DI.getColumn "result" $ - D.derive "result" (F.col @Int "x" .* F.col @(Maybe Double) "md") crossData + D.derive "result" (F.col Int "x" .* F.col (Maybe Double) "md") crossData ) ) --- | col @(Maybe Int) .* col @(Maybe Double) → Maybe Double +-- | col (Maybe Int) .* col (Maybe Double) → Maybe Double mulMaybeIntMaybeDouble :: Test mulMaybeIntMaybeDouble = TestCase @@ -441,7 +441,7 @@ mulMaybeIntMaybeDouble = ( DI.getColumn "result" $ D.derive "result" - (F.col @(Maybe Int) "y" .* F.col @(Maybe Double) "md") + (F.col (Maybe Int) "y" .* F.col (Maybe Double) "md") crossData ) ) @@ -459,7 +459,7 @@ divData = ) ] --- | col @Int ./ col @Double → Double +-- | col Int ./ col Double → Double divIntDouble :: Test divIntDouble = TestCase @@ -467,11 +467,11 @@ divIntDouble = "Int ./ Double = Double" (Just $ DI.fromList [2.0, 2.0, 2.0 :: Double]) ( DI.getColumn "result" $ - D.derive "result" (F.col @Int "x" ./ F.col @Double "d") divData + D.derive "result" (F.col Int "x" ./ F.col Double "d") divData ) ) --- | col @Double ./ col @Int → Double +-- | col Double ./ col Int → Double divDoubleInt :: Test divDoubleInt = TestCase @@ -479,11 +479,11 @@ divDoubleInt = "Double ./ Int = Double" (Just $ DI.fromList [0.5, 0.5, 0.5 :: Double]) ( DI.getColumn "result" $ - D.derive "result" (F.col @Double "d" ./ F.col @Int "x") divData + D.derive "result" (F.col Double "d" ./ F.col Int "x") divData ) ) --- | col @(Maybe Int) ./ col @Double → Maybe Double +-- | col (Maybe Int) ./ col Double → Maybe Double divMaybeIntDouble :: Test divMaybeIntDouble = TestCase @@ -493,11 +493,11 @@ divMaybeIntDouble = DI.fromVector (V.fromList [Just 4.0, Nothing, Just 2.0 :: Maybe Double]) ) ( DI.getColumn "result" $ - D.derive "result" (F.col @(Maybe Int) "y" ./ F.col @Double "d") divData + D.derive "result" (F.col (Maybe Int) "y" ./ F.col Double "d") divData ) ) --- | col @Int ./ col @(Maybe Double) → Maybe Double +-- | col Int ./ col (Maybe Double) → Maybe Double divIntMaybeDouble :: Test divIntMaybeDouble = TestCase @@ -507,11 +507,11 @@ divIntMaybeDouble = DI.fromVector (V.fromList [Just 2.0, Nothing, Just 2.0 :: Maybe Double]) ) ( DI.getColumn "result" $ - D.derive "result" (F.col @Int "x" ./ F.col @(Maybe Double) "md") divData + D.derive "result" (F.col Int "x" ./ F.col (Maybe Double) "md") divData ) ) --- | col @(Maybe Int) ./ col @(Maybe Double) → Maybe Double +-- | col (Maybe Int) ./ col (Maybe Double) → Maybe Double divMaybeIntMaybeDouble :: Test divMaybeIntMaybeDouble = TestCase @@ -521,7 +521,7 @@ divMaybeIntMaybeDouble = DI.fromVector (V.fromList [Just 4.0, Nothing, Just 2.0 :: Maybe Double]) ) ( DI.getColumn "result" $ - D.derive "result" (F.col @(Maybe Int) "y" ./ F.col @(Maybe Double) "md") divData + D.derive "result" (F.col (Maybe Int) "y" ./ F.col (Maybe Double) "md") divData ) ) @@ -541,75 +541,75 @@ typedCrossData = either (error . show) id $ DT.freezeWithError @CrossSchema crossData --- | Typed: col @"x" .+ col @"d" → Double +-- | Typed: col "x" .+ col "d" → Double typedAddIntDouble :: Test typedAddIntDouble = TestCase ( assertEqual "Typed: Int .+ Double = Double" [2.5, 4.5, 6.5 :: Double] - ( DT.columnAsList @"result" $ - DT.derive @"result" (TE.col @"x" TE..+ TE.col @"d") typedCrossData + ( DT.columnAsList "result" $ + DT.derive "result" (TE.col "x" TE..+ TE.col "d") typedCrossData ) ) --- | Typed: col @"y" .+ col @"d" → Maybe Double +-- | Typed: col "y" .+ col "d" → Maybe Double typedAddMaybeIntDouble :: Test typedAddMaybeIntDouble = TestCase ( assertEqual "Typed: Maybe Int .+ Double = Maybe Double" [Just 11.5, Nothing, Just 33.5] - ( DT.columnAsList @"result" $ - DT.derive @"result" (TE.col @"y" TE..+ TE.col @"d") typedCrossData + ( DT.columnAsList "result" $ + DT.derive "result" (TE.col "y" TE..+ TE.col "d") typedCrossData ) ) --- | Typed: col @"x" .+ col @"md" → Maybe Double +-- | Typed: col "x" .+ col "md" → Maybe Double typedAddIntMaybeDouble :: Test typedAddIntMaybeDouble = TestCase ( assertEqual "Typed: Int .+ Maybe Double = Maybe Double" [Just 11.5, Nothing, Just 33.5] - ( DT.columnAsList @"result" $ - DT.derive @"result" (TE.col @"x" TE..+ TE.col @"md") typedCrossData + ( DT.columnAsList "result" $ + DT.derive "result" (TE.col "x" TE..+ TE.col "md") typedCrossData ) ) --- | Typed: col @"y" .+ col @"md" → Maybe Double +-- | Typed: col "y" .+ col "md" → Maybe Double typedAddMaybeIntMaybeDouble :: Test typedAddMaybeIntMaybeDouble = TestCase ( assertEqual "Typed: Maybe Int .+ Maybe Double = Maybe Double" [Just 20.5, Nothing, Just 60.5] - ( DT.columnAsList @"result" $ - DT.derive @"result" (TE.col @"y" TE..+ TE.col @"md") typedCrossData + ( DT.columnAsList "result" $ + DT.derive "result" (TE.col "y" TE..+ TE.col "md") typedCrossData ) ) --- | Typed: col @"x" .- col @"d" → Double +-- | Typed: col "x" .- col "d" → Double typedSubIntDouble :: Test typedSubIntDouble = TestCase ( assertEqual "Typed: Int .- Double = Double" [-0.5, -0.5, -0.5 :: Double] - ( DT.columnAsList @"result" $ - DT.derive @"result" (TE.col @"x" TE..- TE.col @"d") typedCrossData + ( DT.columnAsList "result" $ + DT.derive "result" (TE.col "x" TE..- TE.col "d") typedCrossData ) ) --- | Typed: col @"x" .* col @"d" → Double +-- | Typed: col "x" .* col "d" → Double typedMulIntDouble :: Test typedMulIntDouble = TestCase ( assertEqual "Typed: Int .* Double = Double" [1.5, 5.0, 10.5 :: Double] - ( DT.columnAsList @"result" $ - DT.derive @"result" (TE.col @"x" TE..* TE.col @"d") typedCrossData + ( DT.columnAsList "result" $ + DT.derive "result" (TE.col "x" TE..* TE.col "d") typedCrossData ) ) @@ -624,17 +624,17 @@ typedTestData = either (error . show) id $ DT.freezeWithError @TestSchema testData --- | Typed: col @"x" .+ col @"y" should give Maybe Int column +-- | Typed: col "x" .+ col "y" should give Maybe Int column typedAddIntMaybeInt :: Test typedAddIntMaybeInt = TestCase ( assertEqual "Typed: Int .+ Maybe Int = Maybe Int" [Just 11, Nothing, Just 33] - ( DT.columnAsList @"result" $ + ( DT.columnAsList "result" $ DT.derive - @"result" - (TE.col @"x" TE..+ TE.col @"y") + "result" + (TE.col "x" TE..+ TE.col "y") typedTestData ) ) @@ -646,10 +646,10 @@ typedNullLiftMaybeInt = ( assertEqual "Typed nullLift negate (Maybe Int) propagates Nothing" [Just (-10), Nothing, Just (-30 :: Int)] - ( DT.columnAsList @"result" $ + ( DT.columnAsList "result" $ DT.derive - @"result" - (TE.nullLift negate (TE.col @"y") :: TExpr TestSchema (Maybe Int)) + "result" + (TE.nullLift negate (TE.col "y") :: TExpr TestSchema (Maybe Int)) typedTestData ) ) @@ -661,40 +661,40 @@ typedNullLiftInt = ( assertEqual "Typed nullLift negate (Int) gives Int column" [-1, -2, -3 :: Int] - ( DT.columnAsList @"result" $ + ( DT.columnAsList "result" $ DT.derive - @"result" - (TE.nullLift negate (TE.col @"x") :: TExpr TestSchema Int) + "result" + (TE.nullLift negate (TE.col "x") :: TExpr TestSchema Int) typedTestData ) ) --- | Typed: nullLift2 (+) col @"x" col @"y" → Maybe Int +-- | Typed: nullLift2 (+) col "x" col "y" → Maybe Int typedNullLift2IntMaybeInt :: Test typedNullLift2IntMaybeInt = TestCase ( assertEqual "Typed nullLift2 (+) Int (Maybe Int) = Maybe Int" [Just 11, Nothing, Just 33 :: Maybe Int] - ( DT.columnAsList @"result" $ + ( DT.columnAsList "result" $ DT.derive - @"result" - (TE.nullLift2 (+) (TE.col @"x") (TE.col @"y") :: TExpr TestSchema (Maybe Int)) + "result" + (TE.nullLift2 (+) (TE.col "x") (TE.col "y") :: TExpr TestSchema (Maybe Int)) typedTestData ) ) --- | Typed: col @"y" .== col @"y" should give Maybe Bool column +-- | Typed: col "y" .== col "y" should give Maybe Bool column typedEqMaybeMaybe :: Test typedEqMaybeMaybe = TestCase ( assertEqual "Typed: Maybe Int .== Maybe Int = Maybe Bool" [Just True, Nothing, Just True] - ( DT.columnAsList @"result" $ + ( DT.columnAsList "result" $ DT.derive - @"result" - (TE.col @"y" TE..== TE.col @"y") + "result" + (TE.col "y" TE..== TE.col "y") typedTestData ) ) diff --git a/tests/Operations/Provenance.hs b/tests/Operations/Provenance.hs index 39fafe5..cb4eae4 100644 --- a/tests/Operations/Provenance.hs +++ b/tests/Operations/Provenance.hs @@ -22,7 +22,7 @@ base = -- A frame with one derived column "z". withZ :: D.DataFrame -withZ = D.derive "z" (F.col @Int "x" + F.col "y") base +withZ = D.derive "z" (F.col Int "x" + F.col Int "y") base -- ── insertColumn ────────────────────────────────────────────────────────────── @@ -42,7 +42,7 @@ insertPreservesProvenance = insertOverwriteDropsOwnExpr :: Test insertOverwriteDropsOwnExpr = let - df2 = D.derive "w" (F.col @Int "x") withZ + df2 = D.derive "w" (F.col Int "x") withZ df3 = D.insertColumn "z" (DI.fromList [99 :: Int]) df2 in TestCase $ do @@ -67,7 +67,7 @@ deriveTracksExpression = -- Multiple derives accumulate. deriveManyTracksAll :: Test deriveManyTracksAll = - let df = D.derive "w" (F.col @Int "x") withZ + let df = D.derive "w" (F.col Int "x") withZ in TestCase ( assertEqual "two derive calls should leave two expressions" @@ -78,7 +78,7 @@ deriveManyTracksAll = -- Re-deriving a column replaces its expression and keeps the count stable. deriveOverwriteReplacesExpression :: Test deriveOverwriteReplacesExpression = - let df = D.derive "z" (F.col @Int "y") withZ -- overwrite z + let df = D.derive "z" (F.col Int "y") withZ -- overwrite z in TestCase ( assertEqual "re-deriving z should not duplicate the entry" @@ -91,7 +91,7 @@ deriveOverwriteReplacesExpression = -- deriveWithExpr should also track the expression. deriveWithExprTracksExpression :: Test deriveWithExprTracksExpression = - let (_, df) = D.deriveWithExpr @Int "z" (F.col @Int "x" + F.col "y") base + let (_, df) = D.deriveWithExpr @Int "z" (F.col Int "x" + F.col Int "y") base in TestCase ( assertBool "deriveWithExpr should record z in derivingExpressions" @@ -133,7 +133,7 @@ semiGroupPreservesLeft = semiGroupPreservesBoth :: Test semiGroupPreservesBoth = - let dfW = D.derive "w" (F.col @Int "y") base + let dfW = D.derive "w" (F.col Int "y") base merged = withZ <> dfW in TestCase ( assertEqual @@ -145,8 +145,8 @@ semiGroupPreservesBoth = -- Left frame wins when both sides have an expression for the same column. semiGroupLeftBias :: Test semiGroupLeftBias = - let dfLeft = D.derive "z" (F.col @Int "x") base - dfRight = D.derive "z" (F.col @Int "y") base + let dfLeft = D.derive "z" (F.col Int "x") base + dfRight = D.derive "z" (F.col Int "y") base merged = dfLeft <> dfRight in TestCase ( assertEqual @@ -176,7 +176,7 @@ emptyWithSemiGroup = horizontalMergePreservesLeft :: Test horizontalMergePreservesLeft = - let dfW = D.derive "w" (F.col @Int "y") base + let dfW = D.derive "w" (F.col Int "y") base extra = D.fromNamedColumns [("q", DI.fromList [0 :: Int, 0, 0, 0, 0])] merged = dfW ||| extra in TestCase @@ -191,7 +191,7 @@ horizontalMergePreservesRight = dfW = D.derive "w" - (F.col @Int "y") + (F.col Int "y") (D.fromNamedColumns [("y", DI.fromList [2 .. 6 :: Int])]) merged = extra ||| dfW in TestCase @@ -205,7 +205,7 @@ horizontalMergePreservesBoth = let dfZ = withZ dfW = D.fromNamedColumns [("q", DI.fromList [0 :: Int, 0, 0, 0, 0])] -- give dfW a derived column on a separate base - dfWD = D.derive "w" (F.col @Int "q") dfW + dfWD = D.derive "w" (F.col Int "q") dfW merged = dfZ ||| dfWD in TestCase ( assertEqual diff --git a/tests/Operations/ReadCsv.hs b/tests/Operations/ReadCsv.hs index 32cdb6d..f896986 100644 --- a/tests/Operations/ReadCsv.hs +++ b/tests/Operations/ReadCsv.hs @@ -34,7 +34,7 @@ specifyTypesNoInferenceFallback = D.defaultReadOptions { D.typeSpec = D.SpecifyTypes - [("year", D.schemaType @Int)] + [("year", D.schemaType Int)] D.NoInference } arbuthnotPath @@ -56,7 +56,7 @@ specifyTypesInferFallback = D.defaultReadOptions { D.typeSpec = D.SpecifyTypes - [("year", D.schemaType @Int)] + [("year", D.schemaType Int)] (D.InferFromSample 100) } arbuthnotPath diff --git a/tests/Operations/Record.hs b/tests/Operations/Record.hs index 31f10c0..9beb5ad 100644 --- a/tests/Operations/Record.hs +++ b/tests/Operations/Record.hs @@ -234,15 +234,15 @@ deriveSchemaSplice = TestCase $ do (M.keys (IS.elements orderSchema)) assertEqual "order_id is Int64" - (Just (IS.schemaType @Int64)) + (Just (IS.schemaType Int64)) (M.lookup "order_id" (IS.elements orderSchema)) assertEqual "region is Text" - (Just (IS.schemaType @T.Text)) + (Just (IS.schemaType T.Text)) (M.lookup "region" (IS.elements orderSchema)) assertEqual "amount is Double" - (Just (IS.schemaType @Double)) + (Just (IS.schemaType Double)) (M.lookup "amount" (IS.elements orderSchema)) deriveSchemaNullable :: Test @@ -253,15 +253,15 @@ deriveSchemaNullable = TestCase $ do (M.keys (IS.elements userSchema)) assertEqual "user_id is Int64" - (Just (IS.schemaType @Int64)) + (Just (IS.schemaType Int64)) (M.lookup "user_id" (IS.elements userSchema)) assertEqual "user_name is Maybe Text" - (Just (IS.schemaType @(Maybe T.Text))) + (Just (IS.schemaType (Maybe T.Text))) (M.lookup "user_name" (IS.elements userSchema)) assertEqual "user_age is Maybe Int" - (Just (IS.schemaType @(Maybe Int))) + (Just (IS.schemaType (Maybe Int))) (M.lookup "user_age" (IS.elements userSchema)) deriveSchemaWide :: Test @@ -272,11 +272,11 @@ deriveSchemaWide = TestCase $ do (M.size (IS.elements wideSchema)) assertEqual "f1 is Int" - (Just (IS.schemaType @Int)) + (Just (IS.schemaType Int)) (M.lookup "f1" (IS.elements wideSchema)) assertEqual "f8 is Int" - (Just (IS.schemaType @Int)) + (Just (IS.schemaType Int)) (M.lookup "f8" (IS.elements wideSchema)) deriveSchemaReadsCsv :: Test @@ -335,7 +335,7 @@ deriveSchemaAccessorDerive = TestCase $ do assertEqual "accessor composes in derive expression" [20.0, 40.0] - (D.columnAsList (D.col @Double "double_amount") df') + (D.columnAsList (D.col Double "double_amount") df') labelColumnFilter :: Test labelColumnFilter = TestCase $ do @@ -346,7 +346,7 @@ labelColumnFilter = TestCase $ do Left e -> assertFailure (T.unpack e) Right xs -> assertEqual - "#region OverloadedLabel resolves to col @\"region\"" + "#region OverloadedLabel resolves to col \"region\"" [Order 1 "us" 10.0] xs diff --git a/tests/Operations/SetOps.hs b/tests/Operations/SetOps.hs index 25f24b2..d56fb77 100644 --- a/tests/Operations/SetOps.hs +++ b/tests/Operations/SetOps.hs @@ -13,7 +13,7 @@ import Test.HUnit hash-bucket order) can be compared deterministically. -} sortByA :: D.DataFrame -> D.DataFrame -sortByA = D.sortBy [D.Asc (F.col @Int "A")] +sortByA = D.sortBy [D.Asc (F.col Int "A")] dfA :: D.DataFrame dfA = diff --git a/tests/Operations/Shuffle.hs b/tests/Operations/Shuffle.hs index 4ccf3e0..92e0ae8 100644 --- a/tests/Operations/Shuffle.hs +++ b/tests/Operations/Shuffle.hs @@ -48,7 +48,7 @@ shufflePreservesData :: Test shufflePreservesData = let gen = mkStdGen 1234 shuffled = shuffle gen testDataFrame - sortedShuffled = D.sortBy [D.Asc (D.col @Int "numbers")] shuffled + sortedShuffled = D.sortBy [D.Asc (D.col Int "numbers")] shuffled in TestCase (assertEqual "sort recovers initial numbers" testDataFrame sortedShuffled) diff --git a/tests/Operations/Sort.hs b/tests/Operations/Sort.hs index 5eab698..2584545 100644 --- a/tests/Operations/Sort.hs +++ b/tests/Operations/Sort.hs @@ -42,7 +42,7 @@ sortByAscendingWAI = , ("test2", DI.fromList ['a' .. 'z']) ] ) - (D.sortBy [D.Asc (F.col @Int "test1")] testData) + (D.sortBy [D.Asc (F.col Int "test1")] testData) ) sortByDescendingWAI :: Test @@ -55,7 +55,7 @@ sortByDescendingWAI = , ("test2", DI.fromList $ reverse ['a' .. 'z']) ] ) - (D.sortBy [D.Desc (F.col @Int "test1")] testData) + (D.sortBy [D.Desc (F.col Int "test1")] testData) ) sortByTwoColumns :: Test @@ -64,7 +64,7 @@ sortByTwoColumns = ( assertEqual "Sorting moreTestData (which is already sorted) is idempotent." moreTestData - (D.sortBy [D.Asc (F.col @Int "test1"), D.Asc (F.col @Int "test2")] moreTestData) + (D.sortBy [D.Asc (F.col Int "test1"), D.Asc (F.col Int "test2")] moreTestData) ) sortByOneColumnAscOneColumnDesc :: Test @@ -77,7 +77,7 @@ sortByOneColumnAscOneColumnDesc = , ("test2", DI.fromList $ [10 :: Int, 9 .. 1] ++ [10, 9 .. 1]) ] ) - (D.sortBy [D.Asc (F.col @Int "test1"), D.Desc (F.col @Int "test2")] moreTestData) + (D.sortBy [D.Asc (F.col Int "test1"), D.Desc (F.col Int "test2")] moreTestData) ) sortByColumnDoesNotExist :: Test @@ -86,7 +86,7 @@ sortByColumnDoesNotExist = ( assertExpectException "[Error Case]" (D.columnsNotFound ["test0"] "sortBy" (D.columnNames testData)) - (print $ D.sortBy [D.Asc (F.col @Int "test0")] testData) + (print $ D.sortBy [D.Asc (F.col Int "test0")] testData) ) compoundTestData :: D.DataFrame @@ -106,7 +106,7 @@ sortByCompoundExpression = , ("b", DI.fromList ([10, 20, 30, 40, 50] :: [Int])) ] ) - (D.sortBy [D.Asc (F.col @Int "a" + F.col @Int "b")] compoundTestData) + (D.sortBy [D.Asc (F.col Int "a" + F.col Int "b")] compoundTestData) ) sortByCompoundExpressionDescending :: Test @@ -119,7 +119,7 @@ sortByCompoundExpressionDescending = , ("b", DI.fromList ([50, 40, 30, 20, 10] :: [Int])) ] ) - (D.sortBy [D.Desc (F.col @Int "b" - F.col @Int "a")] compoundTestData) + (D.sortBy [D.Desc (F.col Int "b" - F.col Int "a")] compoundTestData) ) sortByCompoundMixedWithBareColumn :: Test @@ -133,7 +133,7 @@ sortByCompoundMixedWithBareColumn = ] ) ( D.sortBy - [D.Asc (F.col @Int "a" * 2), D.Desc (F.col @Int "b")] + [D.Asc (F.col Int "a" * 2), D.Desc (F.col Int "b")] compoundTestData ) ) @@ -150,7 +150,7 @@ sortByCompoundMissingColumn = ) ( print $ D.sortBy - [D.Asc (F.col @Int "nope" + F.col @Int "a")] + [D.Asc (F.col Int "nope" + F.col Int "a")] compoundTestData ) ) diff --git a/tests/Operations/Subset.hs b/tests/Operations/Subset.hs index cef527f..6378e68 100644 --- a/tests/Operations/Subset.hs +++ b/tests/Operations/Subset.hs @@ -87,7 +87,7 @@ prop_stratifiedSplit_deterministic _ = [ ("label", Col.fromList (replicate 50 ("A" :: T.Text) ++ replicate 50 "B")) , ("val", Col.fromList ([1 .. 100] :: [Int])) ] - (tr, va) = D.stratifiedSplit (mkStdGen 314) 0.7 (D.col @T.Text "label") df + (tr, va) = D.stratifiedSplit (mkStdGen 314) 0.7 (D.col T.Text "label") df in fst (dataframeDimensions tr) + fst (dataframeDimensions va) == 100 strataDf :: DataFrame @@ -100,7 +100,7 @@ strataDf = unit_stratifiedSample_full :: Test unit_stratifiedSample_full = TestCase $ - let sampled = D.stratifiedSample (mkStdGen 42) 1.0 (D.col @T.Text "label") strataDf + let sampled = D.stratifiedSample (mkStdGen 42) 1.0 (D.col T.Text "label") strataDf in assertEqual "p=1.0 preserves row count" (fst $ dataframeDimensions strataDf) @@ -109,7 +109,7 @@ unit_stratifiedSample_full = unit_stratifiedSplit_rowCount :: Test unit_stratifiedSplit_rowCount = TestCase $ - let (tr, va) = D.stratifiedSplit (mkStdGen 99) 0.8 (D.col @T.Text "label") strataDf + let (tr, va) = D.stratifiedSplit (mkStdGen 99) 0.8 (D.col T.Text "label") strataDf in assertEqual "train+validation == total" (fst $ dataframeDimensions strataDf) @@ -123,7 +123,7 @@ unit_stratifiedSplit_singleRowStratum = [ ("label", Col.fromList (["A", "A", "A", "A", "A", "B"] :: [T.Text])) , ("val", Col.fromList ([1 .. 6] :: [Int])) ] - (tr, va) = D.stratifiedSplit (mkStdGen 7) 0.8 (D.col @T.Text "label") tinyDf + (tr, va) = D.stratifiedSplit (mkStdGen 7) 0.8 (D.col T.Text "label") tinyDf in assertEqual "single-row stratum: no rows lost" (fst $ dataframeDimensions tinyDf) @@ -152,7 +152,7 @@ unit_stratifiedSplit_proportions = ) , ("val", Col.fromList ([1 .. aCount + bCount] :: [Int])) ] - (tr, va) = D.stratifiedSplit (mkStdGen 42) 0.8 (D.col @T.Text "label") df + (tr, va) = D.stratifiedSplit (mkStdGen 42) 0.8 (D.col T.Text "label") df origProp = labelProportion "label" "A" df trProp = labelProportion "label" "A" tr vaProp = labelProportion "label" "A" va diff --git a/tests/Operations/Window.hs b/tests/Operations/Window.hs index da7d8a2..2cde58e 100644 --- a/tests/Operations/Window.hs +++ b/tests/Operations/Window.hs @@ -34,7 +34,7 @@ globalFilterTest = 7 ( purchases & D.filterWhere - (F.col @Double "amount" .<=. F.median (F.col @Double "amount") * 10) + (F.col Double "amount" .<=. F.median (F.col Double "amount") * 10) & D.nRows ) ) @@ -51,11 +51,11 @@ overMedianFilterTest = actual = purchases & D.filterWhere - ( F.col @Double "amount" - .<=. F.over ["country"] (F.median (F.col @Double "amount")) + ( F.col Double "amount" + .<=. F.over ["country"] (F.median (F.col Double "amount")) * 10 ) - & D.sortBy [D.Asc (F.col @T.Text "country"), D.Asc (F.col @Double "amount")] + & D.sortBy [D.Asc (F.col T.Text "country"), D.Asc (F.col Double "amount")] expected = D.fromNamedColumns [ @@ -77,17 +77,17 @@ globalVsOverDifferentResults = globalResult = purchases & D.filterWhere - (F.col @Double "amount" .<=. F.median (F.col @Double "amount") * 3) - & D.sortBy [D.Asc (F.col @T.Text "country"), D.Asc (F.col @Double "amount")] + (F.col Double "amount" .<=. F.median (F.col Double "amount") * 3) + & D.sortBy [D.Asc (F.col T.Text "country"), D.Asc (F.col Double "amount")] overResult = purchases & D.filterWhere - ( F.col @Double "amount" - .<=. F.over ["country"] (F.median (F.col @Double "amount")) + ( F.col Double "amount" + .<=. F.over ["country"] (F.median (F.col Double "amount")) * 3 ) - & D.sortBy [D.Asc (F.col @T.Text "country"), D.Asc (F.col @Double "amount")] + & D.sortBy [D.Asc (F.col T.Text "country"), D.Asc (F.col Double "amount")] overMeanDeriveTest :: Test overMeanDeriveTest = @@ -105,8 +105,8 @@ overMeanDeriveTest = ] result = simpleData - & D.derive "group_mean" (F.over ["group"] (F.mean (F.col @Double "value"))) - & D.sortBy [D.Asc (F.col @T.Text "group"), D.Asc (F.col @Double "value")] + & D.derive "group_mean" (F.over ["group"] (F.mean (F.col Double "value"))) + & D.sortBy [D.Asc (F.col T.Text "group"), D.Asc (F.col Double "value")] -- A mean = 15.0, B mean = 60.0 expectedMeans = [15.0, 15.0, 60.0, 60.0, 60.0] :: [Double] actualMeans = case DI.getColumn "group_mean" result of @@ -129,8 +129,8 @@ overSumTest = ] result = simpleData - & D.derive "group_sum" (F.over ["group"] (F.sum (F.col @Int "value"))) - & D.sortBy [D.Asc (F.col @T.Text "group"), D.Asc (F.col @Int "value")] + & D.derive "group_sum" (F.over ["group"] (F.sum (F.col Int "value"))) + & D.sortBy [D.Asc (F.col T.Text "group"), D.Asc (F.col Int "value")] expectedSums = [30, 30, 300, 300] :: [Int] actualSums = case DI.getColumn "group_sum" result of Nothing -> error "group_sum column not found" @@ -152,8 +152,8 @@ overCountTest = ] result = simpleData - & D.derive "group_count" (F.over ["group"] (F.count (F.col @Int "value"))) - & D.sortBy [D.Asc (F.col @T.Text "group"), D.Asc (F.col @Int "value")] + & D.derive "group_count" (F.over ["group"] (F.count (F.col Int "value"))) + & D.sortBy [D.Asc (F.col T.Text "group"), D.Asc (F.col Int "value")] expectedCounts = [3, 3, 3, 2, 2] :: [Int] actualCounts = case DI.getColumn "group_count" result of Nothing -> error "group_count column not found" @@ -177,8 +177,8 @@ mixedGlobalAndOverTest = simpleData & D.derive "deviation" - (F.col @Double "value" - F.over ["group"] (F.mean (F.col @Double "value"))) - & D.sortBy [D.Asc (F.col @T.Text "group"), D.Asc (F.col @Double "value")] + (F.col Double "value" - F.over ["group"] (F.mean (F.col Double "value"))) + & D.sortBy [D.Asc (F.col T.Text "group"), D.Asc (F.col Double "value")] expectedDeviations = [-5.0, 5.0, -50.0, 50.0] :: [Double] actualDeviations = case DI.getColumn "deviation" result of Nothing -> error "deviation column not found" @@ -194,14 +194,14 @@ blogPostExampleGlobal = actual ) where - amount = F.col @Double "amount" - discount = F.col @Double "discount" + amount = F.col Double "amount" + discount = F.col Double "discount" actual = purchases & D.filterWhere (amount .<=. F.median amount * 10) & D.groupBy ["country"] & D.aggregate [F.sum (amount - discount) `as` "total"] - & D.sortBy [D.Asc (F.col @T.Text "country")] + & D.sortBy [D.Asc (F.col T.Text "country")] -- Global median = 60, threshold = 600 → removes 5000 and 800 -- France: (30-1)+(40-2)+(35-1) = 101, UK: (50-2)+(60-3) = 105, US: (100-5)+(200-10) = 285 expected = @@ -219,14 +219,14 @@ blogPostExampleOver = actual ) where - amount = F.col @Double "amount" - discount = F.col @Double "discount" + amount = F.col Double "amount" + discount = F.col Double "discount" actual = purchases & D.filterWhere (amount .<=. F.over ["country"] (F.median amount) * 10) & D.groupBy ["country"] & D.aggregate [F.sum (amount - discount) `as` "total"] - & D.sortBy [D.Asc (F.col @T.Text "country")] + & D.sortBy [D.Asc (F.col T.Text "country")] expected = D.fromNamedColumns [ ("country", DI.fromList (["France", "UK", "US"] :: [T.Text])) diff --git a/tests/Parquet.hs b/tests/Parquet.hs index c0bd8f4..3bb7174 100644 --- a/tests/Parquet.hs +++ b/tests/Parquet.hs @@ -98,7 +98,7 @@ allTypesPlainSnappy = testBothReadParquetPaths $ \readParquet -> TestCase ( assertEqual "allTypesPlainSnappy" - (D.filter (F.col @Int32 "id") (`elem` [6, 7]) allTypes) + (D.filter (F.col Int32 "id") (`elem` [6, 7]) allTypes) (unsafePerformIO (readParquet "./tests/data/alltypes_plain.snappy.parquet")) ) @@ -107,7 +107,7 @@ allTypesDictionary = testBothReadParquetPaths $ \readParquet -> TestCase ( assertEqual "allTypesPlainSnappy" - (D.filter (F.col @Int32 "id") (`elem` [0, 1]) allTypes) + (D.filter (F.col Int32 "id") (`elem` [0, 1]) allTypes) (unsafePerformIO (readParquet "./tests/data/alltypes_dictionary.parquet")) ) @@ -153,7 +153,7 @@ predicateWithOpts = , D.predicate = Just ( F.geq - (F.col @Int32 "id") + (F.col Int32 "id") (F.lit (6 :: Int32)) ) } @@ -176,7 +176,7 @@ predicateUsesNonSelectedColumnWithOpts = , D.predicate = Just ( F.geq - (F.col @Int32 "id") + (F.col Int32 "id") (F.lit (6 :: Int32)) ) } @@ -209,10 +209,10 @@ safeColumnsWithOpts = safeDf assertBool "safeColumns id type" - (hasElemType @(Maybe Int32) (unsafeGetColumn "id" safeDf)) + (hasElemType (Maybe Int32) (unsafeGetColumn "id" safeDf)) assertBool "safeColumns bool_col type" - (hasElemType @(Maybe Bool) (unsafeGetColumn "bool_col" safeDf)) + (hasElemType (Maybe Bool) (unsafeGetColumn "bool_col" safeDf)) safeColumnsWithSelectedColumns :: Test safeColumnsWithSelectedColumns = @@ -233,10 +233,10 @@ safeColumnsWithSelectedColumns = df assertBool "safeColumns projected id type" - (hasElemType @(Maybe Int32) (unsafeGetColumn "id" df)) + (hasElemType (Maybe Int32) (unsafeGetColumn "id" df)) assertBool "safeColumns projected bool_col type" - (hasElemType @(Maybe Bool) (unsafeGetColumn "bool_col" df)) + (hasElemType (Maybe Bool) (unsafeGetColumn "bool_col" df)) predicateWithOptsAcrossFiles :: Test predicateWithOptsAcrossFiles = @@ -252,7 +252,7 @@ predicateWithOptsAcrossFiles = , D.predicate = Just ( F.geq - (F.col @Int32 "id") + (F.col Int32 "id") (F.lit (6 :: Int32)) ) } @@ -1059,7 +1059,7 @@ byteStreamSplitExtendedGzip = -- columns as raw-byte text; proper float16 value decoding is not yet -- implemented. -- TODO: When IEEE 754 half-precision (float16) decoding is implemented, --- add a value-level assertion using hasElemType @Float (or a dedicated +-- add a value-level assertion using hasElemType Float (or a dedicated -- Float16 type if one is introduced). Verify that the decoded values match -- the known reference values for float16_nonzeros_and_nans.parquet. -- The column should no longer be exposed as raw-byte Text. diff --git a/tests/Plotting.hs b/tests/Plotting.hs index 54f1904..b7aa2a0 100644 --- a/tests/Plotting.hs +++ b/tests/Plotting.hs @@ -69,8 +69,8 @@ fieldTypeInference = TestCase $ do C.toVegaSpec ( C.chart mixedFrame & C.mark C.Point - & C.enc C.X (col @Double "a") - & C.enc C.Y (col @T.Text "g") + & C.enc C.X (col Double "a") + & C.enc C.Y (col T.Text "g") ) assertEqual "numeric column -> quantitative" @@ -84,7 +84,7 @@ fieldTypeInference = TestCase $ do boxIsBoxplot :: Test boxIsBoxplot = TestCase $ do let spec = - C.toVegaSpec (C.chart numFrame & C.mark C.Boxplot & C.enc C.Y (col @Double "a")) + C.toVegaSpec (C.chart numFrame & C.mark C.Boxplot & C.enc C.Y (col Double "a")) assertEqual "Chart box uses boxplot mark" (Just (String "boxplot")) @@ -103,7 +103,7 @@ legacyBoxIsBoxplot = TestCase $ do nanBecomesNull :: Test nanBecomesNull = TestCase $ do let df = D.fromNamedColumns [("a", D.fromList ([0 / 0, 1.0] :: [Double]))] - spec = C.toVegaSpec (C.chart df & C.enc C.Y (col @Double "a")) + spec = C.toVegaSpec (C.chart df & C.enc C.Y (col Double "a")) firstA = lookupKey "a" (fromMaybe Null (dataValues spec V.!? 0)) assertEqual "NaN inlines as null" (Just Null) firstA @@ -111,7 +111,7 @@ escapingSafe :: Test escapingSafe = TestCase $ do let weird = "we\"ir\\d" df = D.fromNamedColumns [(weird, D.fromList ([1.0, 2.0] :: [Double]))] - spec = C.toVegaSpec (C.chart df & C.enc C.X (col @Double weird)) + spec = C.toVegaSpec (C.chart df & C.enc C.X (col Double weird)) row0 = fromMaybe Null (dataValues spec V.!? 0) assertBool "weird column name present as a data key" @@ -125,7 +125,7 @@ computedExpr :: Test computedExpr = TestCase $ do let spec = C.toVegaSpec - (C.chart numFrame & C.enc C.Y (col @Double "a" + col @Double "a")) + (C.chart numFrame & C.enc C.Y (col Double "a" + col Double "a")) row0 = fromMaybe Null (dataValues spec V.!? 0) assertEqual "computed field named after channel" @@ -145,15 +145,15 @@ typedParity = TestCase $ do C.toVegaSpec ( C.chart numFrame & C.mark C.Point - & C.enc C.X (col @Double "a") - & C.enc C.Y (col @Double "b") + & C.enc C.X (col Double "a") + & C.enc C.Y (col Double "b") ) specT = CT.toVegaSpec ( CT.chart tdf & CT.mark CT.Point - & CT.enc CT.X (DT.col @"a") - & CT.enc CT.Y (DT.col @"b") + & CT.enc CT.X (DT.col "a") + & CT.enc CT.Y (DT.col "b") ) assertEqual "typed spec equals untyped spec" specU specT diff --git a/tests/Properties/Simplify.hs b/tests/Properties/Simplify.hs index 78b63e1..8d8ac40 100644 --- a/tests/Properties/Simplify.hs +++ b/tests/Properties/Simplify.hs @@ -67,18 +67,18 @@ genAtomBool = do t <- elements thresholds oneof [ elements - [ F.col @Double "x" .< F.lit t - , F.col @Double "x" .<= F.lit t - , F.col @Double "x" .> F.lit t - , F.col @Double "x" .>= F.lit t - , F.col @Double "x" .== F.lit t - , F.col @Double "x" ./= F.lit t + [ F.col Double "x" .< F.lit t + , F.col Double "x" .<= F.lit t + , F.col Double "x" .> F.lit t + , F.col Double "x" .>= F.lit t + , F.col Double "x" .== F.lit t + , F.col Double "x" ./= F.lit t ] , elements - [ F.toDouble (F.col @Int "n") .< F.lit t - , F.toDouble (F.col @Int "n") .<= F.lit t - , F.toDouble (F.col @Int "n") .> F.lit t - , F.toDouble (F.col @Int "n") .>= F.lit t + [ F.toDouble (F.col Int "n") .< F.lit t + , F.toDouble (F.col Int "n") .<= F.lit t + , F.toDouble (F.col Int "n") .> F.lit t + , F.toDouble (F.col Int "n") .>= F.lit t ] ] @@ -98,12 +98,12 @@ genAtomMaybe :: Gen (Expr (Maybe Bool)) genAtomMaybe = do t <- elements thresholds elements - [ F.col @(Maybe Double) "m" .< F.lit t - , F.col @(Maybe Double) "m" .<= F.lit t - , F.col @(Maybe Double) "m" .> F.lit t - , F.col @(Maybe Double) "m" .>= F.lit t - , F.col @(Maybe Double) "m" .== F.lit t - , F.col @(Maybe Double) "m" ./= F.lit t + [ F.col (Maybe Double) "m" .< F.lit t + , F.col (Maybe Double) "m" .<= F.lit t + , F.col (Maybe Double) "m" .> F.lit t + , F.col (Maybe Double) "m" .>= F.lit t + , F.col (Maybe Double) "m" .== F.lit t + , F.col (Maybe Double) "m" ./= F.lit t ] genMaybeExpr :: Int -> Gen (Expr (Maybe Bool)) diff --git a/tests/Simplify.hs b/tests/Simplify.hs index f871155..9a36f47 100644 --- a/tests/Simplify.hs +++ b/tests/Simplify.hs @@ -30,31 +30,31 @@ sameDirection = [ simplifiesTo "and lower bounds keeps max" ( F.and - (F.col @Double "age" .> F.lit (20 :: Double)) - (F.col @Double "age" .> F.lit (25 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (25 :: Double)) ) - (F.col @Double "age" .> F.lit (25 :: Double)) + (F.col Double "age" .> F.lit (25 :: Double)) , simplifiesTo "and upper bounds keeps min" ( F.and - (F.col @Double "age" .< F.lit (50 :: Double)) - (F.col @Double "age" .< F.lit (40 :: Double)) + (F.col Double "age" .< F.lit (50 :: Double)) + (F.col Double "age" .< F.lit (40 :: Double)) ) - (F.col @Double "age" .< F.lit (40 :: Double)) + (F.col Double "age" .< F.lit (40 :: Double)) , simplifiesTo "or lower bounds keeps min" ( F.or - (F.col @Double "age" .> F.lit (20 :: Double)) - (F.col @Double "age" .> F.lit (25 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (25 :: Double)) ) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) , simplifiesTo "or upper bounds keeps max" ( F.or - (F.col @Double "age" .< F.lit (50 :: Double)) - (F.col @Double "age" .< F.lit (40 :: Double)) + (F.col Double "age" .< F.lit (50 :: Double)) + (F.col Double "age" .< F.lit (40 :: Double)) ) - (F.col @Double "age" .< F.lit (50 :: Double)) + (F.col Double "age" .< F.lit (50 :: Double)) ] mixedDirection :: [Test] @@ -62,52 +62,52 @@ mixedDirection = [ simplifiesTo "closed interval at a point becomes equality" ( F.and - (F.col @Double "age" .>= F.lit (30 :: Double)) - (F.col @Double "age" .<= F.lit (30 :: Double)) + (F.col Double "age" .>= F.lit (30 :: Double)) + (F.col Double "age" .<= F.lit (30 :: Double)) ) - (F.col @Double "age" .== F.lit (30 :: Double)) + (F.col Double "age" .== F.lit (30 :: Double)) , simplifiesTo "open contradiction becomes False" ( F.and - (F.col @Double "age" .> F.lit (30 :: Double)) - (F.col @Double "age" .< F.lit (30 :: Double)) + (F.col Double "age" .> F.lit (30 :: Double)) + (F.col Double "age" .< F.lit (30 :: Double)) ) (F.lit False) , simplifiesTo "disjoint bounds become False" ( F.and - (F.col @Double "age" .> F.lit (30 :: Double)) - (F.col @Double "age" .< F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (30 :: Double)) + (F.col Double "age" .< F.lit (20 :: Double)) ) (F.lit False) , simplifiesTo "distinct points conjoined become False" ( F.and - (F.col @Double "age" .== F.lit (30 :: Double)) - (F.col @Double "age" .== F.lit (40 :: Double)) + (F.col Double "age" .== F.lit (30 :: Double)) + (F.col Double "age" .== F.lit (40 :: Double)) ) (F.lit False) , simplifiesTo "point inside half-space becomes the point" ( F.and - (F.col @Double "age" .== F.lit (30 :: Double)) - (F.col @Double "age" .> F.lit (25 :: Double)) + (F.col Double "age" .== F.lit (30 :: Double)) + (F.col Double "age" .> F.lit (25 :: Double)) ) - (F.col @Double "age" .== F.lit (30 :: Double)) + (F.col Double "age" .== F.lit (30 :: Double)) , simplifiesTo "point outside half-space becomes False" ( F.and - (F.col @Double "age" .== F.lit (30 :: Double)) - (F.col @Double "age" .> F.lit (40 :: Double)) + (F.col Double "age" .== F.lit (30 :: Double)) + (F.col Double "age" .> F.lit (40 :: Double)) ) (F.lit False) , simplifiesTo "negation redundant under bound drops" ( F.and - (F.col @Double "age" ./= F.lit (30 :: Double)) - (F.col @Double "age" .> F.lit (40 :: Double)) + (F.col Double "age" ./= F.lit (30 :: Double)) + (F.col Double "age" .> F.lit (40 :: Double)) ) - (F.col @Double "age" .> F.lit (40 :: Double)) + (F.col Double "age" .> F.lit (40 :: Double)) ] tautologies :: [Test] @@ -115,22 +115,22 @@ tautologies = [ simplifiesTo "integral exhaustive cover becomes True" ( F.or - (F.toDouble (F.col @Int "ai") .<= F.lit (30 :: Double)) - (F.toDouble (F.col @Int "ai") .> F.lit (30 :: Double)) + (F.toDouble (F.col Int "ai") .<= F.lit (30 :: Double)) + (F.toDouble (F.col Int "ai") .> F.lit (30 :: Double)) ) (F.lit True) , simplifiesTo "distinct inequalities cover everything" ( F.or - (F.col @Double "age" ./= F.lit (30 :: Double)) - (F.col @Double "age" ./= F.lit (40 :: Double)) + (F.col Double "age" ./= F.lit (30 :: Double)) + (F.col Double "age" ./= F.lit (40 :: Double)) ) (F.lit True) , simplifiesTo "inequality or equality at same point" ( F.or - (F.col @Double "age" ./= F.lit (30 :: Double)) - (F.col @Double "age" .== F.lit (30 :: Double)) + (F.col Double "age" ./= F.lit (30 :: Double)) + (F.col Double "age" .== F.lit (30 :: Double)) ) (F.lit True) ] @@ -140,32 +140,32 @@ booleanAlgebra = [ simplifiesTo "idempotent and" ( F.and - (F.col @Double "age" .> F.lit (20 :: Double)) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) ) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) , simplifiesTo "absorption and over or" ( F.and - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) ( F.or - (F.col @Double "age" .> F.lit (20 :: Double)) - (F.col @Double "hours" .> F.lit (40 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) + (F.col Double "hours" .> F.lit (40 :: Double)) ) ) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) , simplifiesTo "true and unit" - (F.and (F.lit True) (F.col @Double "hours" .> F.lit (40 :: Double))) - (F.col @Double "hours" .> F.lit (40 :: Double)) + (F.and (F.lit True) (F.col Double "hours" .> F.lit (40 :: Double))) + (F.col Double "hours" .> F.lit (40 :: Double)) , simplifiesTo "false and annihilates" - (F.and (F.lit False) (F.col @Double "hours" .> F.lit (40 :: Double))) + (F.and (F.lit False) (F.col Double "hours" .> F.lit (40 :: Double))) (F.lit False) , simplifiesTo "double negation" - (F.not (F.not (F.col @Double "age" .> F.lit (20 :: Double)))) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.not (F.not (F.col Double "age" .> F.lit (20 :: Double)))) + (F.col Double "age" .> F.lit (20 :: Double)) ] ifCollapse :: [Test] @@ -173,19 +173,19 @@ ifCollapse = [ simplifiesTo "boolean if becomes its condition" ( F.ifThenElse - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) (F.lit True) (F.lit False) ) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) , simplifiesTo "if with equal branches collapses" ( F.ifThenElse - (F.col @Double "hours" .> F.lit (40 :: Double)) - (F.col @Double "age" .> F.lit (20 :: Double)) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "hours" .> F.lit (40 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) ) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) ] multiPass :: [Test] @@ -195,34 +195,34 @@ multiPass = ( F.and ( F.and ( F.and - (F.col @Double "age" .> F.lit (10 :: Double)) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (10 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) ) - (F.col @Double "age" .> F.lit (30 :: Double)) + (F.col Double "age" .> F.lit (30 :: Double)) ) - (F.col @Double "age" .> F.lit (40 :: Double)) + (F.col Double "age" .> F.lit (40 :: Double)) ) - (F.col @Double "age" .> F.lit (40 :: Double)) + (F.col Double "age" .> F.lit (40 :: Double)) , simplifiesTo "consolidate then contradiction" ( F.and ( F.and - (F.col @Double "age" .>= F.lit (30 :: Double)) - (F.col @Double "age" .>= F.lit (40 :: Double)) + (F.col Double "age" .>= F.lit (30 :: Double)) + (F.col Double "age" .>= F.lit (40 :: Double)) ) - (F.col @Double "age" .<= F.lit (35 :: Double)) + (F.col Double "age" .<= F.lit (35 :: Double)) ) (F.lit False) , simplifiesTo "cascade of contradictions" ( F.or ( F.and - (F.col @Double "age" .> F.lit (30 :: Double)) - (F.col @Double "age" .< F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (30 :: Double)) + (F.col Double "age" .< F.lit (20 :: Double)) ) ( F.and - (F.col @Double "hours" .> F.lit (200 :: Double)) - (F.col @Double "hours" .< F.lit (10 :: Double)) + (F.col Double "hours" .> F.lit (200 :: Double)) + (F.col Double "hours" .< F.lit (10 :: Double)) ) ) (F.lit False) @@ -230,21 +230,21 @@ multiPass = "consolidate enabling idempotence" ( F.and ( F.or - (F.col @Double "age" .> F.lit (20 :: Double)) - (F.col @Double "age" .> F.lit (25 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (25 :: Double)) ) ( F.or - (F.col @Double "age" .> F.lit (20 :: Double)) - (F.col @Double "age" .> F.lit (30 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (30 :: Double)) ) ) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) , simplifiesTo "de morgan over contradiction" ( F.not ( F.and - (F.col @Double "age" .> F.lit (30 :: Double)) - (F.col @Double "age" .< F.lit (20 :: Double)) + (F.col Double "age" .> F.lit (30 :: Double)) + (F.col Double "age" .< F.lit (20 :: Double)) ) ) (F.lit True) @@ -252,12 +252,12 @@ multiPass = "interior contradiction collapses the conjunction" ( F.and ( F.and - (F.col @Double "age" .> F.lit (10 :: Double)) - (F.col @Double "hours" .> F.lit (40 :: Double)) + (F.col Double "age" .> F.lit (10 :: Double)) + (F.col Double "hours" .> F.lit (40 :: Double)) ) ( F.and - (F.col @Double "age" .> F.lit (30 :: Double)) - (F.col @Double "age" .< F.lit (25 :: Double)) + (F.col Double "age" .> F.lit (30 :: Double)) + (F.col Double "age" .< F.lit (25 :: Double)) ) ) (F.lit False) @@ -267,45 +267,45 @@ nullAware :: [Test] nullAware = [ simplifiesTo "just-literal lower bounds keep max" - ( (F.col @Int "age" .> F.lit (Just (30 :: Int))) - .&& (F.col @Int "age" .> F.lit (Just (35 :: Int))) + ( (F.col Int "age" .> F.lit (Just (30 :: Int))) + .&& (F.col Int "age" .> F.lit (Just (35 :: Int))) ) - (F.col @Int "age" .> F.lit (Just (35 :: Int))) + (F.col Int "age" .> F.lit (Just (35 :: Int))) , simplifiesTo "just-literal contradiction over non-null column becomes Just False" - ( (F.col @Int "age" .> F.lit (Just (30 :: Int))) - .&& (F.col @Int "age" .< F.lit (Just (20 :: Int))) + ( (F.col Int "age" .> F.lit (Just (30 :: Int))) + .&& (F.col Int "age" .< F.lit (Just (20 :: Int))) ) (F.lit (Just False)) , unchanged "nullable column contradiction stays unknown" - ( (F.col @(Maybe Int) "w" .> F.lit (Just (30 :: Int))) - .&& (F.col @(Maybe Int) "w" .< F.lit (Just (20 :: Int))) + ( (F.col (Maybe Int) "w" .> F.lit (Just (30 :: Int))) + .&& (F.col (Maybe Int) "w" .< F.lit (Just (20 :: Int))) ) , unchanged "nullable column tautology stays unknown" - ( (F.col @(Maybe Int) "w" .<= F.lit (Just (30 :: Int))) - .|| (F.col @(Maybe Int) "w" .> F.lit (Just (30 :: Int))) + ( (F.col (Maybe Int) "w" .<= F.lit (Just (30 :: Int))) + .|| (F.col (Maybe Int) "w" .> F.lit (Just (30 :: Int))) ) , simplifiesTo "fromMaybe consolidation keeps tighter" ( F.and - (F.fromMaybe False (F.col @(Maybe Double) "w" .<= F.lit (5 :: Double))) - (F.fromMaybe False (F.col @(Maybe Double) "w" .<= F.lit (3 :: Double))) + (F.fromMaybe False (F.col (Maybe Double) "w" .<= F.lit (5 :: Double))) + (F.fromMaybe False (F.col (Maybe Double) "w" .<= F.lit (3 :: Double))) ) - (F.fromMaybe False (F.col @(Maybe Double) "w" .<= F.lit (3 :: Double))) + (F.fromMaybe False (F.col (Maybe Double) "w" .<= F.lit (3 :: Double))) , simplifiesTo "fromMaybe contradiction becomes False" ( F.and - (F.fromMaybe False (F.col @(Maybe Double) "w" .> F.lit (30 :: Double))) - (F.fromMaybe False (F.col @(Maybe Double) "w" .< F.lit (20 :: Double))) + (F.fromMaybe False (F.col (Maybe Double) "w" .> F.lit (30 :: Double))) + (F.fromMaybe False (F.col (Maybe Double) "w" .< F.lit (20 :: Double))) ) (F.lit False) , unchanged "fromMaybe tautology stays unsimplified" ( F.or - (F.fromMaybe False (F.col @(Maybe Double) "w" .<= F.lit (30 :: Double))) - (F.fromMaybe False (F.col @(Maybe Double) "w" .> F.lit (30 :: Double))) + (F.fromMaybe False (F.col (Maybe Double) "w" .<= F.lit (30 :: Double))) + (F.fromMaybe False (F.col (Maybe Double) "w" .> F.lit (30 :: Double))) ) ] @@ -314,38 +314,38 @@ bailing = [ unchanged "proper interval is not collapsed" ( F.and - (F.col @Double "age" .>= F.lit (20 :: Double)) - (F.col @Double "age" .<= F.lit (65 :: Double)) + (F.col Double "age" .>= F.lit (20 :: Double)) + (F.col Double "age" .<= F.lit (65 :: Double)) ) , unchanged "or with a gap is not a tautology" ( F.or - (F.col @Double "age" .<= F.lit (30 :: Double)) - (F.col @Double "age" .> F.lit (40 :: Double)) + (F.col Double "age" .<= F.lit (30 :: Double)) + (F.col Double "age" .> F.lit (40 :: Double)) ) , unchanged "two inequalities are not an interval" ( F.and - (F.col @Double "age" ./= F.lit (30 :: Double)) - (F.col @Double "age" ./= F.lit (40 :: Double)) + (F.col Double "age" ./= F.lit (30 :: Double)) + (F.col Double "age" ./= F.lit (40 :: Double)) ) , unchanged "cross-column conjunction is left alone" ( F.and - (F.col @Double "age" .> F.lit (50 :: Double)) - (F.col @Double "hours" .> F.lit (40 :: Double)) + (F.col Double "age" .> F.lit (50 :: Double)) + (F.col Double "hours" .> F.lit (40 :: Double)) ) , unchanged "double exhaustive cover bails (NaN)" ( F.or - (F.col @Double "age" .<= F.lit (30 :: Double)) - (F.col @Double "age" .> F.lit (30 :: Double)) + (F.col Double "age" .<= F.lit (30 :: Double)) + (F.col Double "age" .> F.lit (30 :: Double)) ) , unchanged "punctured interval is not a single atom" ( F.and - (F.col @Double "age" ./= F.lit (30 :: Double)) - (F.col @Double "age" .> F.lit (20 :: Double)) + (F.col Double "age" ./= F.lit (30 :: Double)) + (F.col Double "age" .> F.lit (20 :: Double)) ) ] diff --git a/tests/TreePruning.hs b/tests/TreePruning.hs index ac197bb..eee4540 100644 --- a/tests/TreePruning.hs +++ b/tests/TreePruning.hs @@ -34,19 +34,19 @@ pathEntailment = [ prunesTo "ancestor entails child keeps true subtree" ( Branch - (F.col @Double "age" .> F.lit (50 :: Double)) - (Branch (F.col @Double "age" .> F.lit (30 :: Double)) (Leaf "a") (Leaf "b")) + (F.col Double "age" .> F.lit (50 :: Double)) + (Branch (F.col Double "age" .> F.lit (30 :: Double)) (Leaf "a") (Leaf "b")) (Leaf "c") ) - (Branch (F.col @Double "age" .> F.lit (50 :: Double)) (Leaf "a") (Leaf "c")) + (Branch (F.col Double "age" .> F.lit (50 :: Double)) (Leaf "a") (Leaf "c")) , prunesTo "ancestor refutes child keeps false subtree" ( Branch - (F.col @Double "age" .> F.lit (50 :: Double)) - (Branch (F.col @Double "age" .< F.lit (40 :: Double)) (Leaf "a") (Leaf "b")) + (F.col Double "age" .> F.lit (50 :: Double)) + (Branch (F.col Double "age" .< F.lit (40 :: Double)) (Leaf "a") (Leaf "b")) (Leaf "c") ) - (Branch (F.col @Double "age" .> F.lit (50 :: Double)) (Leaf "b") (Leaf "c")) + (Branch (F.col Double "age" .> F.lit (50 :: Double)) (Leaf "b") (Leaf "c")) ] falseEdgeGate :: [Test] @@ -54,16 +54,16 @@ falseEdgeGate = [ prunesTo "integral false edge entails child" ( Branch - (F.toDouble (F.col @Int "ai") .> F.lit (50 :: Double)) + (F.toDouble (F.col Int "ai") .> F.lit (50 :: Double)) (Leaf "c") ( Branch - (F.toDouble (F.col @Int "ai") .< F.lit (60 :: Double)) + (F.toDouble (F.col Int "ai") .< F.lit (60 :: Double)) (Leaf "a") (Leaf "b") ) ) ( Branch - (F.toDouble (F.col @Int "ai") .> F.lit (50 :: Double)) + (F.toDouble (F.col Int "ai") .> F.lit (50 :: Double)) (Leaf "c") (Leaf "a") ) @@ -73,13 +73,13 @@ sameBranchCollapse :: [Test] sameBranchCollapse = [ prunesTo "equal leaves collapse the branch" - (Branch (F.col @Double "age" .> F.lit (50 :: Double)) (Leaf "a") (Leaf "a")) + (Branch (F.col Double "age" .> F.lit (50 :: Double)) (Leaf "a") (Leaf "a")) (Leaf "a") , prunesTo "collapse cascades upward" ( Branch - (F.col @Double "age" .> F.lit (50 :: Double)) - (Branch (F.col @Double "hours" .> F.lit (40 :: Double)) (Leaf "a") (Leaf "a")) + (F.col Double "age" .> F.lit (50 :: Double)) + (Branch (F.col Double "hours" .> F.lit (40 :: Double)) (Leaf "a") (Leaf "a")) (Leaf "a") ) (Leaf "a") @@ -90,22 +90,22 @@ preservedTrees = [ preserved "child not tight enough is kept" ( Branch - (F.col @Double "age" .> F.lit (50 :: Double)) - (Branch (F.col @Double "age" .> F.lit (60 :: Double)) (Leaf "a") (Leaf "b")) + (F.col Double "age" .> F.lit (50 :: Double)) + (Branch (F.col Double "age" .> F.lit (60 :: Double)) (Leaf "a") (Leaf "b")) (Leaf "c") ) , preserved "double false edge is kept (NaN)" ( Branch - (F.col @Double "weight" .> F.lit (50 :: Double)) + (F.col Double "weight" .> F.lit (50 :: Double)) (Leaf "c") - (Branch (F.col @Double "weight" .< F.lit (60 :: Double)) (Leaf "a") (Leaf "b")) + (Branch (F.col Double "weight" .< F.lit (60 :: Double)) (Leaf "a") (Leaf "b")) ) , preserved "cross-column descendant is kept" ( Branch - (F.col @Double "age" .> F.lit (50 :: Double)) - (Branch (F.col @Double "income" .> F.lit (30000 :: Double)) (Leaf "a") (Leaf "b")) + (F.col Double "age" .> F.lit (50 :: Double)) + (Branch (F.col Double "income" .> F.lit (30000 :: Double)) (Leaf "a") (Leaf "b")) (Leaf "c") ) ] diff --git a/tests/Worklist.hs b/tests/Worklist.hs index 405433c..0196160 100644 --- a/tests/Worklist.hs +++ b/tests/Worklist.hs @@ -60,17 +60,17 @@ mat e = (materializeCondVec fixtureDF e) xGt, xLt, yGt, yLt, zGt, zLt :: Double -> CondVec -xGt n = mat (F.col @Double "x" .>. F.lit n) -xLt n = mat (F.col @Double "x" .<. F.lit n) -yGt n = mat (F.col @Double "y" .>. F.lit n) -yLt n = mat (F.col @Double "y" .<. F.lit n) -zGt n = mat (F.col @Double "z" .>. F.lit n) -zLt n = mat (F.col @Double "z" .<. F.lit n) +xGt n = mat (F.col Double "x" .>. F.lit n) +xLt n = mat (F.col Double "x" .<. F.lit n) +yGt n = mat (F.col Double "y" .>. F.lit n) +yLt n = mat (F.col Double "y" .<. F.lit n) +zGt n = mat (F.col Double "z" .>. F.lit n) +zLt n = mat (F.col Double "z" .<. F.lit n) -- Same truth vector as 'xGt 2' ([F,F,F,T,T,T]) but eSize 4 vs 3 — a non-degenerate -- truth-vector collision for the min-eSize representative rule. notLe2 :: CondVec -notLe2 = mat (F.not (F.col @Double "x" .<=. F.lit 2)) +notLe2 = mat (F.not (F.col Double "x" .<=. F.lit 2)) litTrue :: CondVec litTrue = mat (F.lit True) @@ -126,9 +126,9 @@ matW e = wideBase :: [CondVec] wideBase = - [ matW (F.col @Double "a" .>. F.lit 3) - , matW (F.col @Double "b" .<. F.lit 5) - , matW (F.col @Double "c" .>=. F.lit 3) + [ matW (F.col Double "a" .>. F.lit 3) + , matW (F.col Double "b" .<. F.lit 5) + , matW (F.col Double "c" .>=. F.lit 3) ] ------------------------------------------------------------------------