DataHaskell · jvanbruegge · Jun 7, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,7 +4,7 @@
 ### New features
 * New `DataFrame.Typed.TH.deriveSchemaFromType` Template Haskell splice generates a typed schema synonym and a `HasSchema` instance from a Haskell record ADT. Pair with `DataFrame.fromRecords` / `DataFrame.toRecords` (or `DataFrame.Typed.fromRecordsTyped` / `toRecordsTyped`) to convert between `[Order]` and `DataFrame`/`TypedDataFrame OrderSchema`. Field names are translated `camelCase → snake_case` by default; the transform is configurable via `SchemaOptions`.
 * New `DataFrame.Typed.Generic` exposes `SchemaOf`/`SchemaOfRaw` plus `genericToColumns` / `genericFromColumns`, so users who prefer `GHC.Generics` over a TH splice can derive the same schema and row bridge.
-* New `DataFrame.Internal.Schema.deriveSchema` Template Haskell splice generates, from a record ADT, both a runtime `Schema` value (`orderSchema :: Schema`, suitable for `readCsvWithSchema` / `readCsvWithOpts`) and one `Expr` accessor per field (`orderCustomerId :: Expr Int`, etc.), so expression-DSL code can refer to columns by typed name without writing `col @T "snake_case_name"` at each call site. Re-exported from `DataFrame`.
+* New `DataFrame.Internal.Schema.deriveSchema` Template Haskell splice generates, from a record ADT, both a runtime `Schema` value (`orderSchema :: Schema`, suitable for `readCsvWithSchema` / `readCsvWithOpts`) and one `Expr` accessor per field (`orderCustomerId :: Expr Int`, etc.), so expression-DSL code can refer to columns by typed name without writing `col T "snake_case_name"` at each call site. Re-exported from `DataFrame`.
 
 ### Refactor
 * The untyped Template Haskell splices (`declareColumns`, `declareColumnsFromCsvFile`, `declareColumnsFromCsvWithOpts`, `declareColumnsFromParquetFile`, `declareColumnsWithPrefix`, `declareColumnsWithPrefix'`) have moved from `DataFrame.Functions` to a new `DataFrame.TH` module (re-exported from `DataFrame`). Update imports accordingly; the bundled `dataframe.ghci` already points to the new module.
@@ -413,7 +413,7 @@
 ```haskell
 ghci> df |> D.innerJoin ["key_1", "key_2"] other
 ```
-* Aggregations are now expressions allowing for more expressive aggregation logic. Previously: `D.aggregate [("quantity", D.Mean), ("price", D.Sum)] df` now ``D.aggregate [(F.sum (F.col @Double "label") / (F.count (F.col @Double "label")) `F.as` "positive_rate")]``
+* Aggregations are now expressions allowing for more expressive aggregation logic. Previously: `D.aggregate [("quantity", D.Mean), ("price", D.Sum)] df` now ``D.aggregate [(F.sum (F.col Double "label") / (F.count (F.col Double "label")) `F.as` "positive_rate")]``
 * In GHCI, you can now create type-safe bindings for each column and use those in expressions.
 
 ```haskell
@@ -451,7 +451,7 @@ let withTotalPrice = D.deriveFrom (["quantity", "item_price"], D.func multiply)
 Now, we have a column expression syntax that mirrors Pyspark and Polars.
 
 ```haskell
-let withTotalPrice = D.derive "total_price" (D.lift fromIntegral (D.col @Int "quantity") * (D.col @Double"item_price")) df
+let withTotalPrice = D.derive "total_price" (D.lift fromIntegral (D.col Int "quantity") * (D.col Double"item_price")) df
 ```
 
 ### Adds a coverage report to the repository (thanks to @oforero)

diff --git a/README.md b/README.md
@@ -101,8 +101,8 @@ sales = D.fromNamedColumns
 -- Group by product and compute totals
 sales
     |> D.groupBy ["product"]
-    |> D.aggregate [ F.sum (F.col @Int "amount") `as` "total"
-                   , F.count (F.col @Int "amount") `as` "orders"
+    |> D.aggregate [ F.sum (F.col Int "amount") `as` "total"
+                   , F.count (F.col Int "amount") `as` "orders"
                    ]
     |> D.toMarkdown'
 ```
@@ -166,7 +166,7 @@ D.describeColumns df |> D.toMarkdown'
 > | longitude           | 20640                    | 0                    | Double       |
 
 
-The `:declareColumns` macro (`$(D.declareColumns df)` outside the REPL) generates typed column references from a dataframe, so you can use column names directly in expressions instead of writing `F.col @Double "median_income"` every time:
+The `:declareColumns` macro (`$(D.declareColumns df)` outside the REPL) generates typed column references from a dataframe, so you can use column names directly in expressions instead of writing `F.col Double "median_income"` every time:
 
 
 ```haskell
@@ -265,8 +265,8 @@ Compare this to the manual version which requires spelling out every column name
 ```haskell
 -- Without TH — every column needs its name and type spelled out
 df |> D.derive "rooms_per_household"
-        (F.col @Double "total_rooms" / F.col @Double "households")
-   |> D.filterWhere (F.col @Double "median_income" .>. F.lit 5)
+        (F.col Double "total_rooms" / F.col Double "households")
+   |> D.filterWhere (F.col Double "median_income" .>. F.lit 5)
    |> D.take 5
    |> D.toMarkdown'
 ```
@@ -378,7 +378,7 @@ typed-dataframe machinery), there's a companion splice in
 $(D.deriveSchema ''Order)
 -- emits:
 --   orderSchema     :: Schema
---   orderSchema     = makeSchema [("order_id", schemaType @Int64), ...]
+--   orderSchema     = makeSchema [("order_id", schemaType Int64), ...]
 --   orderOrderId    :: Expr Int64
 --   orderOrderId    = col "order_id"
 --   orderRegion     :: Expr Text
@@ -441,8 +441,8 @@ employees <- D.readCsv "./data/employees.csv"
 case DT.freeze @EmployeeSchema employees of
     Nothing  -> "Schema mismatch!"
     Just tdf -> tdf
-        |> DT.derive @"bonus" (DT.col @"salary" * DT.lit 0.1)
-        |> DT.filterWhere (DT.col @"salary" DT..>. DT.lit 50000)
+        |> DT.derive "bonus" (DT.col "salary" * DT.lit 0.1)
+        |> DT.filterWhere (DT.col "salary" DT..>. DT.lit 50000)
         |> DT.select @'["name", "bonus"]
         |> DT.thaw
         |> D.toMarkdown'
@@ -462,11 +462,11 @@ case DT.freeze @EmployeeSchema employees of
 
 ```text
 -- Typo in column name -> compile error
-tdf |> DT.filterWhere (DT.col @"slary" DT..>. DT.lit 50000)
+tdf |> DT.filterWhere (DT.col "slary" DT..>. DT.lit 50000)
 -- error: Column "slary" not found in schema
 
 -- Wrong type -> compile error
-tdf |> DT.filterWhere (DT.col @"name" DT..>. DT.lit 50000)
+tdf |> DT.filterWhere (DT.col "name" DT..>. DT.lit 50000)
 -- error: Couldn't match type 'Text' with 'Double'
 ```
 
@@ -486,7 +486,7 @@ Just stdf = DT.freeze @ScoreSchema scoresDf
 
 -- filterAllJust drops the null row and changes the column type from
 -- (Maybe Double) to Double, so `scaled` can multiply it directly.
-DT.thaw (DT.filterAllJust stdf |> DT.derive @"scaled" (DT.col @"score" * DT.lit 100)) |> D.toMarkdown'
+DT.thaw (DT.filterAllJust stdf |> DT.derive "scaled" (DT.col "score" * DT.lit 100)) |> D.toMarkdown'
 ```
 
 > <!-- sabela:mime text/plain -->
@@ -502,7 +502,7 @@ DT.thaw (DT.filterAllJust stdf |> DT.derive @"scaled" (DT.col @"score" * DT.lit
 
 **Operations**: filter, select, derive, groupBy, aggregate, joins (inner, left, right, full outer), sort, sample, stratified sample, distinct, k-fold splits.
 
-**Expressions**: typed column references (`F.col @Double "x"`), arithmetic, comparisons, logical operators, nullable-aware three-valued logic (`.==`, `.&&`), string matching (`like`, `regex`), casting, and user-defined functions via `lift`/`lift2`.
+**Expressions**: typed column references (`F.col Double "x"`), arithmetic, comparisons, logical operators, nullable-aware three-valued logic (`.==`, `.&&`), string matching (`like`, `regex`), casting, and user-defined functions via `lift`/`lift2`.
 
 **Statistics**: mean, median, mode, variance, standard deviation, percentiles, inter-quartile range, correlation, skewness, frequency tables, imputation.
 
@@ -526,23 +526,23 @@ import qualified DataFrame.Lazy as L
 import DataFrame.Internal.Schema (schemaType, makeSchema)
 
 housingSchema = makeSchema
-    [ ("longitude",          schemaType @Double)
-    , ("latitude",           schemaType @Double)
-    , ("housing_median_age", schemaType @Double)
-    , ("total_rooms",        schemaType @Double)
-    , ("total_bedrooms",     schemaType @(Maybe Double))
-    , ("population",         schemaType @Double)
-    , ("households",         schemaType @Double)
-    , ("median_income",      schemaType @Double)
-    , ("median_house_value", schemaType @Double)
-    , ("ocean_proximity",    schemaType @Text)
+    [ ("longitude",          schemaType Double)
+    , ("latitude",           schemaType Double)
+    , ("housing_median_age", schemaType Double)
+    , ("total_rooms",        schemaType Double)
+    , ("total_bedrooms",     schemaType (Maybe Double))
+    , ("population",         schemaType Double)
+    , ("households",         schemaType Double)
+    , ("median_income",      schemaType Double)
+    , ("median_house_value", schemaType Double)
+    , ("ocean_proximity",    schemaType Text)
     ]
 
 lazyResult <- L.runDataFrame $
     L.scanCsv housingSchema "./data/housing.csv"
-    |> L.filter  (F.col @Double "median_income" .>. F.lit 5)
+    |> L.filter  (F.col Double "median_income" .>. F.lit 5)
     |> L.derive  "value_per_income"
-                 (F.col @Double "median_house_value" / F.col @Double "median_income")
+                 (F.col Double "median_house_value" / F.col Double "median_income")
     |> L.select  ["ocean_proximity", "median_house_value", "value_per_income"]
     |> L.take 1000
 

diff --git a/app/Benchmark.hs b/app/Benchmark.hs
@@ -23,14 +23,14 @@ main = do
     let generationTime = diffUTCTime endGeneration startGeneration
     putStrLn $ "Data generation Time: " ++ show generationTime
     startCalculation <- getCurrentTime
-    print $ D.mean (F.col @Double "0") df
-    print $ D.variance (F.col @Double "1") df
+    print $ D.mean (F.col Double "0") df
+    print $ D.variance (F.col Double "1") df
     print $ D.correlation "1" "2" df
     endCalculation <- getCurrentTime
     let calculationTime = diffUTCTime endCalculation startCalculation
     putStrLn $ "Calculation Time: " ++ show calculationTime
     startFilter <- getCurrentTime
-    print $ D.filter (F.col @Double "0") (> 0.971) df D.|> D.take 10
+    print $ D.filter (F.col Double "0") (> 0.971) df D.|> D.take 10
     endFilter <- getCurrentTime
     let filterTime = diffUTCTime endFilter startFilter
     putStrLn $ "Filter Time: " ++ show filterTime

diff --git a/app/LazyBenchmark.hs b/app/LazyBenchmark.hs
@@ -214,10 +214,10 @@ main = do
     let schema =
             Schema $
                 M.fromList
-                    [ ("id", schemaType @Int)
-                    , ("x", schemaType @Double)
-                    , ("y", schemaType @Double)
-                    , ("category", schemaType @T.Text)
+                    [ ("id", schemaType Int)
+                    , ("x", schemaType Double)
+                    , ("y", schemaType Double)
+                    , ("category", schemaType T.Text)
                     ]
 
     -- Q1: Preview — limit 20, no filter.
@@ -233,7 +233,7 @@ main = do
     runQuery "Q2 — filter (x > 0.999), limit 20" $
         L.runDataFrame $
             L.take 20 $
-                L.filter (col @Double "x" .> lit (0.999 :: Double)) $
+                L.filter (col Double "x" .> lit (0.999 :: Double)) $
                     L.scanCsv schema pathT
 
     -- Q3: Filter + derive + select + limit.
@@ -243,8 +243,8 @@ main = do
         L.runDataFrame $
             L.take 20 $
                 L.select ["id", "z"] $
-                    L.derive "z" (col @Double "x" * col @Double "y") $
-                        L.filter (col @Double "x" .> lit (0.999 :: Double)) $
+                    L.derive "z" (col Double "x" * col Double "y") $
+                        L.filter (col Double "x" .> lit (0.999 :: Double)) $
                             L.scanCsv schema pathT
 
     -- Q4: Filter fusion demo.
@@ -254,8 +254,8 @@ main = do
     runQuery "Q4 — filter fusion: (x > 0.5) . (y > 0.5), limit 20" $
         L.runDataFrame $
             L.take 20 $
-                L.filter (col @Double "y" .> lit (0.5 :: Double)) $
-                    L.filter (col @Double "x" .> lit (0.5 :: Double)) $
+                L.filter (col Double "y" .> lit (0.5 :: Double)) $
+                    L.filter (col Double "x" .> lit (0.5 :: Double)) $
                         L.scanCsv schema pathT
 
     -- Q5: Full scan, heavy filter, count results.
@@ -269,7 +269,7 @@ main = do
         )
         $ L.runDataFrame
         $ L.select ["id", "x"]
-        $ L.filter (col @Double "x" .> lit (0.999 :: Double))
+        $ L.filter (col Double "x" .> lit (0.999 :: Double))
         $ L.scanCsv schema pathT
 
     putStrLn "\nDone."

diff --git a/app/Synthesis.hs b/app/Synthesis.hs
@@ -31,17 +31,17 @@ type RawPredSchema =
     '[DT.Column "Survived" (Maybe Int), DT.Column "prediction" Int]
 
 prediction :: D.Expr Int
-prediction = F.col @Int "prediction"
+prediction = F.col "prediction"
 
 main :: IO ()
 main = do
     rawTrain <- D.readCsv "./data/titanic/train.csv"
     rawTest <- D.readCsv "./data/titanic/test.csv"
 
     train <-
-        maybe (fail "train.csv schema mismatch") pure (DT.freeze @TrainSchema rawTrain)
+        maybe (fail "train.csv schema mismatch") pure (DT.freeze TrainSchema rawTrain)
     test <-
-        maybe (fail "test.csv schema mismatch") pure (DT.freeze @TestSchema rawTest)
+        maybe (fail "test.csv schema mismatch") pure (DT.freeze TestSchema rawTest)
 
     let (trainDf, validDf) =
             D.randomSplit (mkStdGen 4232) 0.7 (DT.thaw (clean train))
@@ -66,7 +66,7 @@ main = do
                             }
                     }
                 )
-                (F.fromMaybe 0 (F.col @(Maybe Int) "Survived"))
+                (F.fromMaybe 0 (F.col (Maybe Int) "Survived"))
                 (trainDf |> D.exclude ["PassengerId"])
 
     print model
@@ -95,7 +95,7 @@ clean ::
     DT.TypedDataFrame cols ->
     DT.TypedDataFrame
         ( DT.RenameManyInSchema
-            '[ '("Name", "title")
+             [ '("Name", "title")
              , '("Cabin", "cabin_prefix")
              , '("Pclass", "passenger_class")
              , '("SibSp", "number_of_siblings_and_spouses")
@@ -105,15 +105,15 @@ clean ::
         )
 clean tdf =
     tdf
-        |> DT.replaceColumn @"Ticket" (DT.nullLift (T.filter isAlpha) (DT.col @"Ticket"))
-        |> DT.replaceColumn @"Name" (DT.nullLift extractTitle (DT.col @"Name"))
-        |> DT.replaceColumn @"Cabin" (DT.nullLift (T.take 1) (DT.col @"Cabin"))
+        |> DT.replaceColumn "Ticket" (DT.nullLift (T.filter isAlpha) (DT.col "Ticket"))
+        |> DT.replaceColumn "Name" (DT.nullLift extractTitle (DT.col "Name"))
+        |> DT.replaceColumn "Cabin" (DT.nullLift (T.take 1) (DT.col "Cabin"))
         |> DT.renameMany
-            @'[ '("Name", "title")
-              , '("Cabin", "cabin_prefix")
-              , '("Pclass", "passenger_class")
-              , '("SibSp", "number_of_siblings_and_spouses")
-              , '("Parch", "number_of_parents_and_children")
+              [ ("Name", "title")
+              , ("Cabin", "cabin_prefix")
+              , ("Pclass", "passenger_class")
+              , ("SibSp", "number_of_siblings_and_spouses")
+              , ("Parch", "number_of_parents_and_children")
               ]
 
 -- | Extract title (e.g. "Mr", "Mrs") from a full Titanic passenger name.
@@ -129,11 +129,11 @@ extractTitle fullName =
 computeAccuracy :: D.DataFrame -> Double
 computeAccuracy df =
     let tdf =
-            DT.impute @"Survived" 0 $
+            DT.impute "Survived" 0 $
                 DT.unsafeFreeze @RawPredSchema $
                     df |> D.select ["Survived", "prediction"]
-        survived = DT.col @"Survived"
-        predCol = DT.col @"prediction"
+        survived = DT.col "Survived"
+        predCol = DT.col "prediction"
         count expr = fromIntegral (DT.nRows (DT.filterWhere expr tdf))
         tp = count ((survived DT..==. DT.lit 1) DT..&&. (predCol DT..==. DT.lit 1))
         tn = count ((survived DT..==. DT.lit 0) DT..&&. (predCol DT..==. DT.lit 0))

diff --git a/benchmark/Main.hs b/benchmark/Main.hs
@@ -59,9 +59,9 @@ groupByHaskell = do
         df
             |> D.groupBy ["ocean_proximity"]
             |> D.aggregate
-                [ F.minimum (F.col @Double "median_house_value")
+                [ F.minimum (F.col Double "median_house_value")
                     `as` "minimum_median_house_value"
-                , F.maximum (F.col @Double "median_house_value")
+                , F.maximum (F.col Double "median_house_value")
                     `as` "maximum_median_house_value"
                 ]
 

diff --git a/dataframe-arrow/ffi-export/DataFrame/FFI.hs b/dataframe-arrow/ffi-export/DataFrame/FFI.hs
@@ -1,9 +1,9 @@
-{-# LANGUAGE AllowAmbiguousTypes #-}
 {-# LANGUAGE ExplicitNamespaces #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE ForeignFunctionInterface #-}
 {-# LANGUAGE GADTs #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RequiredTypeArguments #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE TypeApplications #-}
 
@@ -254,30 +254,30 @@ inferTargetType target df = dispatchType (columnTypeRep (unsafeGetColumn target
 fitTreeWithType ::
     T.Text -> TreeConfig -> T.Text -> DataFrame -> IO BS.ByteString
 fitTreeWithType ttag cfg target df = case ttag of
-    "int" -> fit @Int
-    "int8" -> fit @Int8
-    "int16" -> fit @Int16
-    "int32" -> fit @Int32
-    "int64" -> fit @Int64
-    "word" -> fit @Word
-    "word8" -> fit @Word8
-    "word16" -> fit @Word16
-    "word32" -> fit @Word32
-    "word64" -> fit @Word64
-    "integer" -> fit @Integer
-    "double" -> fit @Double
-    "float" -> fit @Float
-    "bool" -> fit @Bool
-    "char" -> fit @Char
-    "text" -> fit @T.Text
-    "string" -> fit @String
+    "int" -> fit Int
+    "int8" -> fit Int8
+    "int16" -> fit Int16
+    "int32" -> fit Int32
+    "int64" -> fit Int64
+    "word" -> fit Word
+    "word8" -> fit Word8
+    "word16" -> fit Word16
+    "word32" -> fit Word32
+    "word64" -> fit Word64
+    "integer" -> fit Integer
+    "double" -> fit Double
+    "float" -> fit Float
+    "bool" -> fit Bool
+    "char" -> fit Char
+    "text" -> fit T.Text
+    "string" -> fit String
     other ->
         ioError . userError $
             "DataFrame.FFI.fitTreeWithType: unsupported target type tag: "
                 ++ T.unpack other
   where
-    fit :: forall a. (Columnable a, Ord a) => IO BS.ByteString
-    fit = do
+    fit :: forall a -> (Columnable a, Ord a) => IO BS.ByteString
+    fit a = do
         let expr = fitDecisionTree @a cfg (Col @a target) df
         case encodeExprToBytes expr of
             Right bs -> return bs