diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 81d69be7..7d927139 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -1,10 +1,8 @@ -import datetime -import decimal import os import pathlib import typing -import uuid from typing_extensions import Self +from ._expression import Expression if typing.TYPE_CHECKING: import fsspec @@ -12,28 +10,27 @@ if typing.TYPE_CHECKING: import polars import pandas import pyarrow.lib + from builtins import list as lst from collections.abc import Callable, Iterable, Sequence, Mapping + from ._typing import ( + ParquetFieldsOptions, + IntoExpr, + IntoExprColumn, + PythonLiteral, + IntoValues, + IntoDType, + IntoFields, + StrIntoDType, + JoinType, + JsonCompression, + JsonFormat, + JsonRecordOptions, + CsvEncoding, + CsvCompression, + HiveTypes, + ColumnsTypes, + ) from duckdb import sqltypes, func - from builtins import list as lst # needed to avoid mypy error on DuckDBPyRelation.list method shadowing - - # the field_ids argument to to_parquet and write_parquet has a recursive structure - ParquetFieldIdsType = Mapping[str, int | "ParquetFieldIdsType"] - -_ExpressionLike: typing.TypeAlias = ( - "Expression" - | str - | int - | float - | bool - | bytes - | None - | datetime.date - | datetime.datetime - | datetime.time - | datetime.timedelta - | decimal.Decimal - | uuid.UUID -) __all__: lst[str] = [ "BinderException", @@ -213,7 +210,7 @@ class DuckDBPyConnection: def __enter__(self) -> Self: ... def __exit__(self, exc_type: object, exc: object, traceback: object) -> None: ... def append(self, table_name: str, df: pandas.DataFrame, *, by_name: bool = False) -> DuckDBPyConnection: ... - def array_type(self, type: sqltypes.DuckDBPyType, size: typing.SupportsInt) -> sqltypes.DuckDBPyType: ... + def array_type(self, type: IntoDType, size: typing.SupportsInt) -> sqltypes.DuckDBPyType: ... def arrow(self, rows_per_batch: typing.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: """Alias of to_arrow_reader(). We recommend using to_arrow_reader() instead.""" ... @@ -226,9 +223,9 @@ class DuckDBPyConnection: def create_function( self, name: str, - function: Callable[..., typing.Any], - parameters: lst[sqltypes.DuckDBPyType] | None = None, - return_type: sqltypes.DuckDBPyType | None = None, + function: Callable[..., PythonLiteral], + parameters: lst[IntoDType] | None = None, + return_type: IntoDType | None = None, *, type: func.PythonUDFType = ..., null_handling: func.FunctionNullHandling = ..., @@ -238,7 +235,7 @@ class DuckDBPyConnection: def cursor(self) -> DuckDBPyConnection: ... def decimal_type(self, width: typing.SupportsInt, scale: typing.SupportsInt) -> sqltypes.DuckDBPyType: ... def df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... - def dtype(self, type_str: str) -> sqltypes.DuckDBPyType: ... + def dtype(self, type_str: StrIntoDType) -> sqltypes.DuckDBPyType: ... def duplicate(self) -> DuckDBPyConnection: ... def enum_type(self, name: str, type: sqltypes.DuckDBPyType, values: lst[typing.Any]) -> sqltypes.DuckDBPyType: ... def execute(self, query: Statement | str, parameters: object = None) -> DuckDBPyConnection: ... @@ -265,18 +262,18 @@ class DuckDBPyConnection: self, path_or_buffer: str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes], header: bool | int | None = None, - compression: str | None = None, + compression: CsvCompression | None = None, sep: str | None = None, delimiter: str | None = None, files_to_sniff: int | None = None, comment: str | None = None, thousands: str | None = None, - dtype: dict[str, str] | lst[str] | None = None, + dtype: IntoFields | None = None, na_values: str | lst[str] | None = None, skiprows: int | None = None, quotechar: str | None = None, escapechar: str | None = None, - encoding: str | None = None, + encoding: CsvEncoding | None = None, parallel: bool | None = None, date_format: str | None = None, timestamp_format: str | None = None, @@ -287,8 +284,8 @@ class DuckDBPyConnection: null_padding: bool | None = None, names: lst[str] | None = None, lineterminator: str | None = None, - columns: dict[str, str] | None = None, - auto_type_candidates: lst[str] | None = None, + columns: ColumnsTypes | None = None, + auto_type_candidates: lst[StrIntoDType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -302,7 +299,7 @@ class DuckDBPyConnection: filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, strict_mode: bool | None = None, ) -> DuckDBPyRelation: ... @@ -347,9 +344,9 @@ class DuckDBPyConnection: def disable_profiling(self) -> None: ... def interrupt(self) -> None: ... def list_filesystems(self) -> lst[str]: ... - def list_type(self, type: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ... + def list_type(self, type: IntoDType) -> sqltypes.DuckDBPyType: ... def load_extension(self, extension: str) -> None: ... - def map_type(self, key: sqltypes.DuckDBPyType, value: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ... + def map_type(self, key: IntoDType, value: IntoDType) -> sqltypes.DuckDBPyType: ... @typing.overload def pl( self, rows_per_batch: typing.SupportsInt = 1000000, *, lazy: typing.Literal[False] = ... @@ -366,18 +363,18 @@ class DuckDBPyConnection: self, path_or_buffer: str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes], header: bool | int | None = None, - compression: str | None = None, + compression: CsvCompression | None = None, sep: str | None = None, delimiter: str | None = None, files_to_sniff: int | None = None, comment: str | None = None, thousands: str | None = None, - dtype: dict[str, str] | lst[str] | None = None, + dtype: IntoFields | None = None, na_values: str | lst[str] | None = None, skiprows: int | None = None, quotechar: str | None = None, escapechar: str | None = None, - encoding: str | None = None, + encoding: CsvEncoding | None = None, parallel: bool | None = None, date_format: str | None = None, timestamp_format: str | None = None, @@ -388,8 +385,8 @@ class DuckDBPyConnection: null_padding: bool | None = None, names: lst[str] | None = None, lineterminator: str | None = None, - columns: dict[str, str] | None = None, - auto_type_candidates: lst[str] | None = None, + columns: ColumnsTypes | None = None, + auto_type_candidates: lst[StrIntoDType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -403,7 +400,7 @@ class DuckDBPyConnection: filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, strict_mode: bool | None = None, ) -> DuckDBPyRelation: ... @@ -411,14 +408,14 @@ class DuckDBPyConnection: self, path_or_buffer: str | bytes | os.PathLike[str], *, - columns: dict[str, str] | None = None, + columns: ColumnsTypes | None = None, sample_size: int | None = None, maximum_depth: int | None = None, - records: str | None = None, - format: str | None = None, + records: JsonRecordOptions | None = None, + format: JsonFormat | None = None, date_format: str | None = None, timestamp_format: str | None = None, - compression: str | None = None, + compression: JsonCompression | None = None, maximum_object_size: int | None = None, ignore_errors: bool | None = None, convert_strings_to_integers: bool | None = None, @@ -428,7 +425,7 @@ class DuckDBPyConnection: filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, ) -> DuckDBPyRelation: ... @typing.overload @@ -459,26 +456,20 @@ class DuckDBPyConnection: def register_filesystem(self, filesystem: fsspec.AbstractFileSystem) -> None: ... def remove_function(self, name: str) -> DuckDBPyConnection: ... def rollback(self) -> DuckDBPyConnection: ... - def row_type( - self, fields: dict[str, sqltypes.DuckDBPyType] | lst[sqltypes.DuckDBPyType] - ) -> sqltypes.DuckDBPyType: ... + def row_type(self, fields: IntoFields) -> sqltypes.DuckDBPyType: ... def sql(self, query: Statement | str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... def sqltype(self, type_str: str) -> sqltypes.DuckDBPyType: ... def string_type(self, collation: str = "") -> sqltypes.DuckDBPyType: ... - def struct_type( - self, fields: dict[str, sqltypes.DuckDBPyType] | lst[sqltypes.DuckDBPyType] - ) -> sqltypes.DuckDBPyType: ... + def struct_type(self, fields: IntoFields) -> sqltypes.DuckDBPyType: ... def table(self, table_name: str) -> DuckDBPyRelation: ... def table_function(self, name: str, parameters: object = None) -> DuckDBPyRelation: ... def tf(self) -> dict[str, typing.Any]: ... def torch(self) -> dict[str, typing.Any]: ... def type(self, type_str: str) -> sqltypes.DuckDBPyType: ... - def union_type( - self, members: lst[sqltypes.DuckDBPyType] | dict[str, sqltypes.DuckDBPyType] - ) -> sqltypes.DuckDBPyType: ... + def union_type(self, members: IntoFields) -> sqltypes.DuckDBPyType: ... def unregister(self, view_name: str) -> DuckDBPyConnection: ... def unregister_filesystem(self, name: str) -> None: ... - def values(self, *args: lst[typing.Any] | tuple[Expression, ...] | Expression) -> DuckDBPyRelation: ... + def values(self, *args: IntoValues) -> DuckDBPyRelation: ... def view(self, view_name: str) -> DuckDBPyRelation: ... @property def description(self) -> lst[tuple[str, sqltypes.DuckDBPyType, None, None, None, None, None]]: ... @@ -491,9 +482,7 @@ class DuckDBPyRelation: def __getattr__(self, name: str) -> DuckDBPyRelation: ... def __getitem__(self, name: str) -> DuckDBPyRelation: ... def __len__(self) -> int: ... - def aggregate( - self, aggr_expr: str | Iterable[_ExpressionLike], group_expr: _ExpressionLike = "" - ) -> DuckDBPyRelation: ... + def aggregate(self, aggr_expr: str | Iterable[IntoExpr], group_expr: IntoExpr = "") -> DuckDBPyRelation: ... def any_value( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... @@ -549,13 +538,13 @@ class DuckDBPyRelation: ) -> DuckDBPyRelation: ... def create(self, table_name: str) -> None: ... def create_view(self, view_name: str, replace: bool = True) -> DuckDBPyRelation: ... - def cross(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def cross(self, other_rel: Self) -> DuckDBPyRelation: ... def cume_dist(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def dense_rank(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def describe(self) -> DuckDBPyRelation: ... def df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... def distinct(self) -> DuckDBPyRelation: ... - def except_(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def except_(self, other_rel: Self) -> DuckDBPyRelation: ... def execute(self) -> DuckDBPyRelation: ... def explain(self, type: ExplainType = ExplainType.STANDARD) -> str: ... def favg( @@ -578,7 +567,7 @@ class DuckDBPyRelation: def fetchmany(self, size: typing.SupportsInt = 1) -> lst[tuple[typing.Any, ...]]: ... def fetchnumpy(self) -> dict[str, np.typing.NDArray[typing.Any] | pandas.Categorical]: ... def fetchone(self) -> tuple[typing.Any, ...] | None: ... - def filter(self, filter_expr: Expression | str) -> DuckDBPyRelation: ... + def filter(self, filter_expr: IntoExprColumn) -> DuckDBPyRelation: ... def first(self, expression: str, groups: str = "", projected_columns: str = "") -> DuckDBPyRelation: ... def first_value(self, expression: str, window_spec: str = "", projected_columns: str = "") -> DuckDBPyRelation: ... def fsum( @@ -590,13 +579,8 @@ class DuckDBPyRelation: ) -> DuckDBPyRelation: ... def insert(self, values: lst[object]) -> None: ... def insert_into(self, table_name: str) -> None: ... - def intersect(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... - def join( - self, - other_rel: DuckDBPyRelation, - condition: Expression | str, - how: typing.Literal["inner", "left", "right", "outer", "semi", "anti"] = "inner", - ) -> DuckDBPyRelation: ... + def intersect(self, other_rel: Self) -> DuckDBPyRelation: ... + def join(self, other_rel: Self, condition: IntoExprColumn, how: JoinType = "inner") -> DuckDBPyRelation: ... def lag( self, expression: str, @@ -622,7 +606,7 @@ class DuckDBPyRelation: self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... def map( - self, map_function: Callable[..., typing.Any], *, schema: dict[str, sqltypes.DuckDBPyType] | None = None + self, map_function: Callable[..., PythonLiteral], *, schema: dict[str, sqltypes.DuckDBPyType] | None = None ) -> DuckDBPyRelation: ... def max( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" @@ -665,7 +649,7 @@ class DuckDBPyRelation: def product( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... - def project(self, *args: _ExpressionLike, groups: str = "") -> DuckDBPyRelation: ... + def project(self, *args: IntoExpr, groups: str = "") -> DuckDBPyRelation: ... def quantile( self, expression: str, @@ -694,9 +678,9 @@ class DuckDBPyRelation: def rank(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def rank_dense(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def row_number(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... - def select(self, *args: _ExpressionLike, groups: str = "") -> DuckDBPyRelation: ... - def select_dtypes(self, types: lst[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... - def select_types(self, types: lst[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... + def select(self, *args: IntoExpr, groups: str = "") -> DuckDBPyRelation: ... + def select_dtypes(self, types: lst[sqltypes.DuckDBPyType | StrIntoDType]) -> DuckDBPyRelation: ... + def select_types(self, types: lst[sqltypes.DuckDBPyType | StrIntoDType]) -> DuckDBPyRelation: ... def set_alias(self, alias: str) -> DuckDBPyRelation: ... def show( self, @@ -707,7 +691,7 @@ class DuckDBPyRelation: null_value: str | None = None, render_mode: RenderMode | None = None, ) -> None: ... - def sort(self, *args: _ExpressionLike) -> DuckDBPyRelation: ... + def sort(self, *args: IntoExpr) -> DuckDBPyRelation: ... def sql_query(self) -> str: ... def std( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" @@ -740,8 +724,8 @@ class DuckDBPyRelation: date_format: str | None = None, timestamp_format: str | None = None, quoting: str | int | None = None, - encoding: str | None = None, - compression: str | None = None, + encoding: CsvEncoding | None = None, + compression: CsvCompression | None = None, overwrite: bool | None = None, per_thread_output: bool | None = None, use_tmp_file: bool | None = None, @@ -754,7 +738,7 @@ class DuckDBPyRelation: file_name: str, *, compression: str | None = None, - field_ids: ParquetFieldIdsType | typing.Literal["auto"] | None = None, + field_ids: ParquetFieldsOptions | None = None, row_group_size_bytes: int | str | None = None, row_group_size: int | None = None, overwrite: bool | None = None, @@ -769,9 +753,9 @@ class DuckDBPyRelation: def to_table(self, table_name: str) -> None: ... def to_view(self, view_name: str, replace: bool = True) -> DuckDBPyRelation: ... def torch(self) -> dict[str, typing.Any]: ... - def union(self, union_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def union(self, union_rel: Self) -> DuckDBPyRelation: ... def unique(self, unique_aggr: str) -> DuckDBPyRelation: ... - def update(self, set: dict[str, _ExpressionLike], *, condition: _ExpressionLike | None = None) -> None: ... + def update(self, set: Mapping[str, IntoExpr], *, condition: IntoExpr = None) -> None: ... def value_counts(self, expression: str, groups: str = "") -> DuckDBPyRelation: ... def var( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" @@ -797,8 +781,8 @@ class DuckDBPyRelation: date_format: str | None = None, timestamp_format: str | None = None, quoting: str | int | None = None, - encoding: str | None = None, - compression: str | None = None, + encoding: CsvEncoding | None = None, + compression: CsvCompression | None = None, overwrite: bool | None = None, per_thread_output: bool | None = None, use_tmp_file: bool | None = None, @@ -810,7 +794,7 @@ class DuckDBPyRelation: file_name: str, *, compression: str | None = None, - field_ids: ParquetFieldIdsType | typing.Literal["auto"] | None = None, + field_ids: ParquetFieldsOptions | None = None, row_group_size_bytes: str | int | None = None, row_group_size: int | None = None, overwrite: bool | None = None, @@ -878,56 +862,6 @@ class ExplainType: @property def value(self) -> int: ... -class Expression: - def __add__(self, other: _ExpressionLike) -> Expression: ... - def __and__(self, other: _ExpressionLike) -> Expression: ... - def __div__(self, other: _ExpressionLike) -> Expression: ... - def __eq__(self, other: _ExpressionLike) -> Expression: ... # type: ignore[override] - def __floordiv__(self, other: _ExpressionLike) -> Expression: ... - def __ge__(self, other: _ExpressionLike) -> Expression: ... - def __gt__(self, other: _ExpressionLike) -> Expression: ... - @typing.overload - def __init__(self, arg0: str) -> None: ... - @typing.overload - def __init__(self, arg0: typing.Any) -> None: ... - def __invert__(self) -> Expression: ... - def __le__(self, other: _ExpressionLike) -> Expression: ... - def __lt__(self, other: _ExpressionLike) -> Expression: ... - def __mod__(self, other: _ExpressionLike) -> Expression: ... - def __mul__(self, other: _ExpressionLike) -> Expression: ... - def __ne__(self, other: _ExpressionLike) -> Expression: ... # type: ignore[override] - def __neg__(self) -> Expression: ... - def __or__(self, other: _ExpressionLike) -> Expression: ... - def __pow__(self, other: _ExpressionLike) -> Expression: ... - def __radd__(self, other: _ExpressionLike) -> Expression: ... - def __rand__(self, other: _ExpressionLike) -> Expression: ... - def __rdiv__(self, other: _ExpressionLike) -> Expression: ... - def __rfloordiv__(self, other: _ExpressionLike) -> Expression: ... - def __rmod__(self, other: _ExpressionLike) -> Expression: ... - def __rmul__(self, other: _ExpressionLike) -> Expression: ... - def __ror__(self, other: _ExpressionLike) -> Expression: ... - def __rpow__(self, other: _ExpressionLike) -> Expression: ... - def __rsub__(self, other: _ExpressionLike) -> Expression: ... - def __rtruediv__(self, other: _ExpressionLike) -> Expression: ... - def __sub__(self, other: _ExpressionLike) -> Expression: ... - def __truediv__(self, other: _ExpressionLike) -> Expression: ... - def alias(self, name: str) -> Expression: ... - def asc(self) -> Expression: ... - def between(self, lower: _ExpressionLike, upper: _ExpressionLike) -> Expression: ... - def cast(self, type: sqltypes.DuckDBPyType) -> Expression: ... - def collate(self, collation: str) -> Expression: ... - def desc(self) -> Expression: ... - def get_name(self) -> str: ... - def isin(self, *args: _ExpressionLike) -> Expression: ... - def isnotin(self, *args: _ExpressionLike) -> Expression: ... - def isnotnull(self) -> Expression: ... - def isnull(self) -> Expression: ... - def nulls_first(self) -> Expression: ... - def nulls_last(self) -> Expression: ... - def otherwise(self, value: _ExpressionLike) -> Expression: ... - def show(self) -> None: ... - def when(self, condition: _ExpressionLike, value: _ExpressionLike) -> Expression: ... - class FatalException(DatabaseError): ... class HTTPException(IOException): @@ -1078,18 +1012,18 @@ class token_type: @property def value(self) -> int: ... -def CaseExpression(condition: _ExpressionLike, value: _ExpressionLike) -> Expression: ... -def CoalesceOperator(*args: _ExpressionLike) -> Expression: ... +def CaseExpression(condition: IntoExpr, value: IntoExpr) -> Expression: ... +def CoalesceOperator(*args: IntoExpr) -> Expression: ... def ColumnExpression(*args: str) -> Expression: ... -def ConstantExpression(value: typing.Any) -> Expression: ... +def ConstantExpression(value: PythonLiteral) -> Expression: ... def DefaultExpression() -> Expression: ... -def FunctionExpression(function_name: str, *args: _ExpressionLike) -> Expression: ... -def LambdaExpression(lhs: typing.Any, rhs: _ExpressionLike) -> Expression: ... +def FunctionExpression(function_name: str, *args: IntoExpr) -> Expression: ... +def LambdaExpression(lhs: IntoExprColumn | tuple[IntoExprColumn, ...], rhs: IntoExpr) -> Expression: ... def SQLExpression(expression: str) -> Expression: ... -def StarExpression(*, exclude: Iterable[str | Expression] | None = None) -> Expression: ... +def StarExpression(*, exclude: Iterable[IntoExprColumn] | None = None) -> Expression: ... def aggregate( df: pandas.DataFrame, - aggr_expr: str | Iterable[_ExpressionLike], + aggr_expr: str | Iterable[IntoExpr], group_expr: str = "", *, connection: DuckDBPyConnection | None = None, @@ -1099,7 +1033,7 @@ def append( table_name: str, df: pandas.DataFrame, *, by_name: bool = False, connection: DuckDBPyConnection | None = None ) -> DuckDBPyConnection: ... def array_type( - type: sqltypes.DuckDBPyType, size: typing.SupportsInt, *, connection: DuckDBPyConnection | None = None + type: IntoDType, size: typing.SupportsInt, *, connection: DuckDBPyConnection | None = None ) -> sqltypes.DuckDBPyType: ... @typing.overload def arrow( @@ -1127,9 +1061,9 @@ def connect( ) -> DuckDBPyConnection: ... def create_function( name: str, - function: Callable[..., typing.Any], - parameters: lst[sqltypes.DuckDBPyType] | None = None, - return_type: sqltypes.DuckDBPyType | None = None, + function: Callable[..., PythonLiteral], + parameters: lst[IntoDType] | None = None, + return_type: IntoDType | None = None, *, type: func.PythonUDFType = ..., null_handling: func.FunctionNullHandling = ..., @@ -1150,7 +1084,7 @@ def df(*, date_as_object: bool = False, connection: DuckDBPyConnection | None = @typing.overload def df(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def distinct(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... -def dtype(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def dtype(type_str: StrIntoDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def duplicate(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def enum_type( name: str, @@ -1203,7 +1137,7 @@ def fetchone(*, connection: DuckDBPyConnection | None = None) -> tuple[typing.An def filesystem_is_registered(name: str, *, connection: DuckDBPyConnection | None = None) -> bool: ... def filter( df: pandas.DataFrame, - filter_expr: Expression | str, + filter_expr: IntoExprColumn, *, connection: DuckDBPyConnection | None = None, ) -> DuckDBPyRelation: ... @@ -1215,18 +1149,18 @@ def from_arrow( def from_csv_auto( path_or_buffer: str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes], header: bool | int | None = None, - compression: str | None = None, + compression: CsvCompression | None = None, sep: str | None = None, delimiter: str | None = None, files_to_sniff: int | None = None, comment: str | None = None, thousands: str | None = None, - dtype: dict[str, str] | lst[str] | None = None, + dtype: IntoFields | None = None, na_values: str | lst[str] | None = None, skiprows: int | None = None, quotechar: str | None = None, escapechar: str | None = None, - encoding: str | None = None, + encoding: CsvEncoding | None = None, parallel: bool | None = None, date_format: str | None = None, timestamp_format: str | None = None, @@ -1237,8 +1171,8 @@ def from_csv_auto( null_padding: bool | None = None, names: lst[str] | None = None, lineterminator: str | None = None, - columns: dict[str, str] | None = None, - auto_type_candidates: lst[str] | None = None, + columns: ColumnsTypes | None = None, + auto_type_candidates: lst[StrIntoDType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -1252,7 +1186,7 @@ def from_csv_auto( filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, strict_mode: bool | None = None, ) -> DuckDBPyRelation: ... @@ -1312,15 +1246,10 @@ def get_profiling_information(*, connection: DuckDBPyConnection | None = None, f def enable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... def disable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... def list_filesystems(*, connection: DuckDBPyConnection | None = None) -> lst[str]: ... -def list_type( - type: sqltypes.DuckDBPyType, *, connection: DuckDBPyConnection | None = None -) -> sqltypes.DuckDBPyType: ... +def list_type(type: IntoDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def load_extension(extension: str, *, connection: DuckDBPyConnection | None = None) -> None: ... def map_type( - key: sqltypes.DuckDBPyType, - value: sqltypes.DuckDBPyType, - *, - connection: DuckDBPyConnection | None = None, + key: IntoDType, value: IntoDType, *, connection: DuckDBPyConnection | None = None ) -> sqltypes.DuckDBPyType: ... def order( df: pandas.DataFrame, order_expr: str, *, connection: DuckDBPyConnection | None = None @@ -1347,7 +1276,7 @@ def pl( connection: DuckDBPyConnection | None = None, ) -> polars.DataFrame | polars.LazyFrame: ... def project( - df: pandas.DataFrame, *args: _ExpressionLike, groups: str = "", connection: DuckDBPyConnection | None = None + df: pandas.DataFrame, *args: IntoExpr, groups: str = "", connection: DuckDBPyConnection | None = None ) -> DuckDBPyRelation: ... def query( query: Statement | str, @@ -1367,18 +1296,18 @@ def query_progress(*, connection: DuckDBPyConnection | None = None) -> float: .. def read_csv( path_or_buffer: str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes], header: bool | int | None = None, - compression: str | None = None, + compression: CsvCompression | None = None, sep: str | None = None, delimiter: str | None = None, files_to_sniff: int | None = None, comment: str | None = None, thousands: str | None = None, - dtype: dict[str, str] | lst[str] | None = None, + dtype: IntoFields | None = None, na_values: str | lst[str] | None = None, skiprows: int | None = None, quotechar: str | None = None, escapechar: str | None = None, - encoding: str | None = None, + encoding: CsvEncoding | None = None, parallel: bool | None = None, date_format: str | None = None, timestamp_format: str | None = None, @@ -1389,8 +1318,8 @@ def read_csv( null_padding: bool | None = None, names: lst[str] | None = None, lineterminator: str | None = None, - columns: dict[str, str] | None = None, - auto_type_candidates: lst[str] | None = None, + columns: ColumnsTypes | None = None, + auto_type_candidates: lst[StrIntoDType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -1404,21 +1333,21 @@ def read_csv( filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, strict_mode: bool | None = None, ) -> DuckDBPyRelation: ... def read_json( path_or_buffer: str | bytes | os.PathLike[str], *, - columns: dict[str, str] | None = None, + columns: ColumnsTypes | None = None, sample_size: int | None = None, maximum_depth: int | None = None, - records: str | None = None, - format: str | None = None, + records: JsonRecordOptions | None = None, + format: JsonFormat | None = None, date_format: str | None = None, timestamp_format: str | None = None, - compression: str | None = None, + compression: JsonCompression | None = None, maximum_object_size: int | None = None, ignore_errors: bool | None = None, convert_strings_to_integers: bool | None = None, @@ -1428,7 +1357,7 @@ def read_json( filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, ) -> DuckDBPyRelation: ... @typing.overload @@ -1466,11 +1395,7 @@ def register_filesystem( ) -> None: ... def remove_function(name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def rollback(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... -def row_type( - fields: dict[str, sqltypes.DuckDBPyType] | lst[sqltypes.DuckDBPyType], - *, - connection: DuckDBPyConnection | None = None, -) -> sqltypes.DuckDBPyType: ... +def row_type(fields: IntoFields, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def rowcount(*, connection: DuckDBPyConnection | None = None) -> int: ... def set_default_connection(connection: DuckDBPyConnection) -> None: ... def sql( @@ -1482,11 +1407,7 @@ def sql( ) -> DuckDBPyRelation: ... def sqltype(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def string_type(collation: str = "", *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... -def struct_type( - fields: dict[str, sqltypes.DuckDBPyType] | lst[sqltypes.DuckDBPyType], - *, - connection: DuckDBPyConnection | None = None, -) -> sqltypes.DuckDBPyType: ... +def struct_type(fields: IntoFields, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def table(table_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def table_function( name: str, @@ -1498,16 +1419,10 @@ def tf(*, connection: DuckDBPyConnection | None = None) -> dict[str, typing.Any] def tokenize(query: str) -> lst[tuple[int, token_type]]: ... def torch(*, connection: DuckDBPyConnection | None = None) -> dict[str, typing.Any]: ... def type(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... -def union_type( - members: dict[str, sqltypes.DuckDBPyType] | lst[sqltypes.DuckDBPyType], - *, - connection: DuckDBPyConnection | None = None, -) -> sqltypes.DuckDBPyType: ... +def union_type(members: IntoFields, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def unregister(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def unregister_filesystem(name: str, *, connection: DuckDBPyConnection | None = None) -> None: ... -def values( - *args: lst[typing.Any] | tuple[Expression, ...] | Expression, connection: DuckDBPyConnection | None = None -) -> DuckDBPyRelation: ... +def values(*args: IntoValues, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def view(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def write_csv( df: pandas.DataFrame, @@ -1521,8 +1436,8 @@ def write_csv( date_format: str | None = None, timestamp_format: str | None = None, quoting: str | int | None = None, - encoding: str | None = None, - compression: str | None = None, + encoding: CsvEncoding | None = None, + compression: CsvCompression | None = None, overwrite: bool | None = None, per_thread_output: bool | None = None, use_tmp_file: bool | None = None, diff --git a/_duckdb-stubs/_expression.pyi b/_duckdb-stubs/_expression.pyi new file mode 100644 index 00000000..b4d4b1a6 --- /dev/null +++ b/_duckdb-stubs/_expression.pyi @@ -0,0 +1,54 @@ +from typing import TYPE_CHECKING, Any, overload + +if TYPE_CHECKING: + from ._typing import IntoExpr, IntoDType + +class Expression: + def __add__(self, other: IntoExpr) -> Expression: ... + def __and__(self, other: IntoExpr) -> Expression: ... + def __div__(self, other: IntoExpr) -> Expression: ... + def __eq__(self, other: IntoExpr) -> Expression: ... # type: ignore[override] + def __floordiv__(self, other: IntoExpr) -> Expression: ... + def __ge__(self, other: IntoExpr) -> Expression: ... + def __gt__(self, other: IntoExpr) -> Expression: ... + @overload + def __init__(self, arg0: str) -> None: ... + @overload + def __init__(self, arg0: Any) -> None: ... + def __invert__(self) -> Expression: ... + def __le__(self, other: IntoExpr) -> Expression: ... + def __lt__(self, other: IntoExpr) -> Expression: ... + def __mod__(self, other: IntoExpr) -> Expression: ... + def __mul__(self, other: IntoExpr) -> Expression: ... + def __ne__(self, other: IntoExpr) -> Expression: ... # type: ignore[override] + def __neg__(self) -> Expression: ... + def __or__(self, other: IntoExpr) -> Expression: ... + def __pow__(self, other: IntoExpr) -> Expression: ... + def __radd__(self, other: IntoExpr) -> Expression: ... + def __rand__(self, other: IntoExpr) -> Expression: ... + def __rdiv__(self, other: IntoExpr) -> Expression: ... + def __rfloordiv__(self, other: IntoExpr) -> Expression: ... + def __rmod__(self, other: IntoExpr) -> Expression: ... + def __rmul__(self, other: IntoExpr) -> Expression: ... + def __ror__(self, other: IntoExpr) -> Expression: ... + def __rpow__(self, other: IntoExpr) -> Expression: ... + def __rsub__(self, other: IntoExpr) -> Expression: ... + def __rtruediv__(self, other: IntoExpr) -> Expression: ... + def __sub__(self, other: IntoExpr) -> Expression: ... + def __truediv__(self, other: IntoExpr) -> Expression: ... + def alias(self, name: str) -> Expression: ... + def asc(self) -> Expression: ... + def between(self, lower: IntoExpr, upper: IntoExpr) -> Expression: ... + def cast(self, type: IntoDType) -> Expression: ... + def collate(self, collation: str) -> Expression: ... + def desc(self) -> Expression: ... + def get_name(self) -> str: ... + def isin(self, *args: IntoExpr) -> Expression: ... + def isnotin(self, *args: IntoExpr) -> Expression: ... + def isnotnull(self) -> Expression: ... + def isnull(self) -> Expression: ... + def nulls_first(self) -> Expression: ... + def nulls_last(self) -> Expression: ... + def otherwise(self, value: IntoExpr) -> Expression: ... + def show(self) -> None: ... + def when(self, condition: IntoExpr, value: IntoExpr) -> Expression: ... diff --git a/_duckdb-stubs/_sqltypes.pyi b/_duckdb-stubs/_sqltypes.pyi index 82e768eb..f5942805 100644 --- a/_duckdb-stubs/_sqltypes.pyi +++ b/_duckdb-stubs/_sqltypes.pyi @@ -1,5 +1,6 @@ import duckdb import typing +from ._typing import StrIntoDType, DTypeIdentifiers __all__: list[str] = [ "BIGINT", @@ -39,13 +40,13 @@ class DuckDBPyType: def __getitem__(self, name: str) -> DuckDBPyType: ... def __hash__(self) -> int: ... @typing.overload - def __init__(self, type_str: str, connection: duckdb.DuckDBPyConnection) -> None: ... + def __init__(self, type_str: StrIntoDType, connection: duckdb.DuckDBPyConnection) -> None: ... @typing.overload def __init__(self, obj: object) -> None: ... @property def children(self) -> list[tuple[str, DuckDBPyType | int | list[str]]]: ... @property - def id(self) -> str: ... + def id(self) -> DTypeIdentifiers: ... BIGINT: DuckDBPyType # value = BIGINT BIT: DuckDBPyType # value = BIT diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi new file mode 100644 index 00000000..83ce1183 --- /dev/null +++ b/_duckdb-stubs/_typing.pyi @@ -0,0 +1,206 @@ +from __future__ import annotations + +from typing import TypeAlias, TYPE_CHECKING, Protocol, Any, TypeVar, Generic, Literal +from datetime import date, datetime, time, timedelta +from decimal import Decimal +from uuid import UUID +from collections.abc import Mapping, Iterator, Sequence + +if TYPE_CHECKING: + from ._expression import Expression + from ._sqltypes import DuckDBPyType + +# Numpy protocols + +_T_co = TypeVar("_T_co", covariant=True) +_S_co = TypeVar("_S_co", bound=tuple[Any, ...], covariant=True) +_D_co = TypeVar("_D_co", covariant=True) + +class NPProtocol(Protocol): + """Base Protocol for numpy objects.""" + @property + def dtype(self) -> Any: ... + @property + def ndim(self) -> int: ... + def __array__(self, *args: Any, **kwargs: Any) -> Any: ... + def __array_wrap__(self, *args: Any, **kwargs: Any) -> Any: ... + @property + def __array_interface__(self) -> dict[str, Any]: ... + @property + def __array_priority__(self) -> float: ... + +class NPScalarTypeLike(NPProtocol, Protocol): + @property + def itemsize(self) -> int: ... + +class NPArrayLike(NPProtocol, Generic[_S_co, _D_co], Protocol): + """`numpy.ndarray` Protocol. + + This is needed to accept numpy arrays as literals in expressions, without emitting type checker errors about unknown symbol if the user doesn't have numpy installed. + + Note: + Using `np.typing.NDArray` is still the best option for return types. + """ + def __len__(self) -> int: ... + def __contains__(self, value: object, /) -> bool: ... + def __iter__(self) -> Iterator[_D_co]: ... + def __array_finalize__(self, *args: Any, **kwargs: Any) -> None: ... + def __getitem__(self, *args: Any, **kwargs: Any) -> Any: ... + def __setitem__(self, *args: Any, **kwargs: Any) -> None: ... + @property + def shape(self) -> _S_co: ... + @property + def size(self) -> int: ... + +# Expression and values conversions + +NumericLiteral: TypeAlias = int | float +"""Python objects that can be converted to a numerical `Expression` or `DuckDBPyType` (integer or floating points numbers.)""" +TemporalLiteral: TypeAlias = date | datetime | time | timedelta +BlobLiteral: TypeAlias = bytes | bytearray +"""Python objects that can be converted to a `BLOB` `ConstantExpression` or `DuckDBPyType`. + +Note: + `bytes` can also be converted to a `BITSTRING`. +""" +ScalarLiteral: TypeAlias = NumericLiteral | BlobLiteral | str | bool +NonNestedLiteral: TypeAlias = ScalarLiteral | TemporalLiteral | UUID | Decimal | memoryview + +# NOTE: +# Using `Sequence` and `Mapping` instead of `list | tuple` and `dict` would make the covariance of the element types work. +# Thus, this would allow to avoid the use of `Any` for them. +# However, this would also be incorrect at runtime, since only the 3 aformentioned containers types are accepted. +NestedLiteral: TypeAlias = list[Any] | tuple[Any, ...] | dict[Any, Any] | NPArrayLike[Any, Any] +"""Containers types that can be converted to a nested `ConstantExpression` (e.g. to `ARRAY` or `STRUCT`). + +Those types can be aribtraly nested, as long as their leaf values are `PythonLiteral`.""" + +PythonLiteral: TypeAlias = NonNestedLiteral | NestedLiteral | None +"""Python objects that can be converted to a `ConstantExpression`.""" + +IntoExprColumn: TypeAlias = Expression | str +"""Types that are, or can be used as a `ColumnExpression`.""" + +IntoExpr: TypeAlias = IntoExprColumn | PythonLiteral +"""Any type that can be converted to an `Expression` (or is already one). + +See Also: + https://duckdb.org/docs/stable/clients/python/conversion +""" + +IntoValues: TypeAlias = list[PythonLiteral] | tuple[Expression, ...] | Expression +"""Types that can be converted to a table.""" +# Datatypes conversions + +Builtins: TypeAlias = Literal[ + "bigint", + "bit", + "bignum", + "blob", + "boolean", + "date", + "double", + "float", + "hugeint", + "integer", + "interval", + "smallint", + "null", + "time_tz", + "time", + "timestamp_ms", + "timestamp_ns", + "timestamp_s", + "timestamp_tz", + "timestamp", + "tinyint", + "ubigint", + "uhugeint", + "uinteger", + "usmallint", + "utinyint", + "uuid", + "varchar", +] +"""Literals strings convertibles into `DuckDBPyType` instances. + +Note: + Passing the same values in uppercase is also accepted. + We use lowercase here to be able to reuse this `Literal` in the `DTypeIdentifiers` `Literal`. +""" + +NestedIds: TypeAlias = Literal["list", "struct", "array", "enum", "map", "decimal", "union"] +"""Identifiers for nested types in `DuckDBPyType.id`.""" + +DTypeIdentifiers: TypeAlias = Builtins | NestedIds +"""All possible identifiers for `DuckDBPyType.id`.""" + +StrIntoDType = Builtins | Literal["json"] | str +"""Any `str` that can be converted into a `DuckDBPyType`. + +The dtypes not present in the literal values are the composed ones, like `STRUCT` or `DECIMAL`. + +Note: + A `StrEnum` will be handled the same way as a `str`.""" + +# NOTE: +# the `dict` and `list` types are `Any` due to the same limitation mentionned in `NestedLiteral`. +IntoDType: TypeAlias = ( + DuckDBPyType + | StrIntoDType + | type[NPScalarTypeLike] + | type[ScalarLiteral] + | type[list[Any]] + | type[dict[Any, Any]] + | dict[Any, Any] +) +"""All types that can be converted to a `DuckDBPyType`. + +They can be arbitrarily nested as long as their leaf values are convertible to `DuckDBPyType`. + +See Also: + https://duckdb.org/docs/stable/clients/python/types +""" + +# NOTE: here we keep the covariance "hack" and warn the user in the docstring, +# because otherwise we can just resort to `Any` for the `dict` and `list` types. +IntoFields: TypeAlias = Mapping[str, IntoDType] | Sequence[IntoDType] +"""Types that can be converted either into: + +- a nested `DuckDBPyType` (e.g. `STRUCT` or `UNION`) +- a schema for file reads + +Warning: + Only `dict` and `list` containers are accepted at runtime. + We use `Mapping` and `Sequence` here to satisfy the covariance of the element types. +""" + +# Files related + +# NOTE: ideally HiveTypes should also be accepted as a Mapping[str, StrIntoDType]. +ColumnsTypes: TypeAlias = Mapping[str, StrIntoDType] +HiveTypes: TypeAlias = dict[str, StrIntoDType] +ParquetFieldIdsType: TypeAlias = Mapping[str, int | ParquetFieldIdsType] + +_Auto: TypeAlias = Literal["auto"] +ParquetFieldsOptions: TypeAlias = _Auto | ParquetFieldIdsType +"""Types accepted for the `field_ids` parameter in parquet writing methods.""" + +_CompressionOptions: TypeAlias = Literal["none", "gzip", "zstd"] +"""Generally available compression options.""" + +CsvCompression: TypeAlias = _Auto | _CompressionOptions +CsvEncoding: TypeAlias = Literal["utf-8", "utf-16", "latin-1"] | str +"""Encdoding options. + +All availables options not in the literal values can be seen here: + https://duckdb.org/docs/stable/core_extensions/encodings +""" +JsonCompression: TypeAlias = _CompressionOptions | Literal["auto_detect"] +JsonFormat: TypeAlias = _Auto | Literal["unstructured", "newline_delimited", "array"] +JsonRecordOptions: TypeAlias = _Auto | Literal["true", "false"] + +# Other + +JoinType = Literal["inner", "left", "right", "outer", "semi", "anti"] +"""Types of join accepted by `DuckDBPyRelation.join` method.""" diff --git a/external/duckdb b/external/duckdb index 461ef9e3..4213f2ba 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 461ef9e350c2599d77df623a6cb9b3aa84213a25 +Subproject commit 4213f2bae35fd8790c0d0551c4d24246a039a0c8