googleapis
diff --git a/‎bigframes/bigquery/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎bigframes/bigquery/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎bigframes/bigquery/_operations/ml.py‎
Lines changed: 150 additions & 1 deletion b/‎bigframes/bigquery/_operations/ml.py‎
Lines changed: 150 additions & 1 deletion
diff --git a/‎bigframes/bigquery/_operations/table.py‎
Lines changed: 103 additions & 0 deletions b/‎bigframes/bigquery/_operations/table.py‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎bigframes/bigquery/ml.py‎
Lines changed: 4 additions & 0 deletions b/‎bigframes/bigquery/ml.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/aggregations/unary_compiler.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/compile/sqlglot/aggregations/unary_compiler.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 17 additions & 16 deletions b/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 17 additions & 16 deletions
@@ -60,6 +60,7 @@
 from bigframes.bigquery._operations.search import create_vector_index, vector_search
 from bigframes.bigquery._operations.sql import sql_scalar
 from bigframes.bigquery._operations.struct import struct
+from bigframes.bigquery._operations.table import create_external_table
 from bigframes.core.logging import log_adapter
 
 _functions = [
@@ -104,6 +105,8 @@
     sql_scalar,
     # struct ops
     struct,
+    # table ops
+    create_external_table,
 ]
 
 _module = sys.modules[__name__]
@@ -155,6 +158,8 @@
     "sql_scalar",
     # struct ops
     "struct",
+    # table ops
+    "create_external_table",
     # Modules / SQL namespaces
     "ai",
     "ml",
 
@@ -14,7 +14,7 @@
 
 from __future__ import annotations
 
-from typing import cast, Mapping, Optional, Union
+from typing import cast, List, Mapping, Optional, Union
 
 import bigframes_vendored.constants
 import google.cloud.bigquery
@@ -431,3 +431,152 @@ def transform(
         return bpd.read_gbq_query(sql)
     else:
         return session.read_gbq_query(sql)
+
+
+@log_adapter.method_logger(custom_base_name="bigquery_ml")
+def generate_text(
+    model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
+    input_: Union[pd.DataFrame, dataframe.DataFrame, str],
+    *,
+    temperature: Optional[float] = None,
+    max_output_tokens: Optional[int] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    flatten_json_output: Optional[bool] = None,
+    stop_sequences: Optional[List[str]] = None,
+    ground_with_google_search: Optional[bool] = None,
+    request_type: Optional[str] = None,
+) -> dataframe.DataFrame:
+    """
+    Generates text using a BigQuery ML model.
+
+    See the `BigQuery ML GENERATE_TEXT function syntax
+    <https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-text>`_
+    for additional reference.
+
+    Args:
+        model (bigframes.ml.base.BaseEstimator or str):
+            The model to use for text generation.
+        input_ (Union[bigframes.pandas.DataFrame, str]):
+            The DataFrame or query to use for text generation.
+        temperature (float, optional):
+            A FLOAT64 value that is used for sampling promiscuity. The value
+            must be in the range ``[0.0, 1.0]``. A lower temperature works well
+            for prompts that expect a more deterministic and less open-ended
+            or creative response, while a higher temperature can lead to more
+            diverse or creative results. A temperature of ``0`` is
+            deterministic, meaning that the highest probability response is
+            always selected.
+        max_output_tokens (int, optional):
+            An INT64 value that sets the maximum number of tokens in the
+            generated text.
+        top_k (int, optional):
+            An INT64 value that changes how the model selects tokens for
+            output. A ``top_k`` of ``1`` means the next selected token is the
+            most probable among all tokens in the model's vocabulary. A
+            ``top_k`` of ``3`` means that the next token is selected from
+            among the three most probable tokens by using temperature. The
+            default value is ``40``.
+        top_p (float, optional):
+            A FLOAT64 value that changes how the model selects tokens for
+            output. Tokens are selected from most probable to least probable
+            until the sum of their probabilities equals the ``top_p`` value.
+            For example, if tokens A, B, and C have a probability of 0.3, 0.2,
+            and 0.1 and the ``top_p`` value is ``0.5``, then the model will
+            select either A or B as the next token by using temperature. The
+            default value is ``0.95``.
+        flatten_json_output (bool, optional):
+            A BOOL value that determines the content of the generated JSON column.
+        stop_sequences (List[str], optional):
+            An ARRAY<STRING> value that contains the stop sequences for the model.
+        ground_with_google_search (bool, optional):
+            A BOOL value that determines whether to ground the model with Google Search.
+        request_type (str, optional):
+            A STRING value that contains the request type for the model.
+
+    Returns:
+        bigframes.pandas.DataFrame:
+            The generated text.
+    """
+    import bigframes.pandas as bpd
+
+    model_name, session = _get_model_name_and_session(model, input_)
+    table_sql = _to_sql(input_)
+
+    sql = bigframes.core.sql.ml.generate_text(
+        model_name=model_name,
+        table=table_sql,
+        temperature=temperature,
+        max_output_tokens=max_output_tokens,
+        top_k=top_k,
+        top_p=top_p,
+        flatten_json_output=flatten_json_output,
+        stop_sequences=stop_sequences,
+        ground_with_google_search=ground_with_google_search,
+        request_type=request_type,
+    )
+
+    if session is None:
+        return bpd.read_gbq_query(sql)
+    else:
+        return session.read_gbq_query(sql)
+
+
+@log_adapter.method_logger(custom_base_name="bigquery_ml")
+def generate_embedding(
+    model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
+    input_: Union[pd.DataFrame, dataframe.DataFrame, str],
+    *,
+    flatten_json_output: Optional[bool] = None,
+    task_type: Optional[str] = None,
+    output_dimensionality: Optional[int] = None,
+) -> dataframe.DataFrame:
+    """
+    Generates text embedding using a BigQuery ML model.
+
+    See the `BigQuery ML GENERATE_EMBEDDING function syntax
+    <https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-embedding>`_
+    for additional reference.
+
+    Args:
+        model (bigframes.ml.base.BaseEstimator or str):
+            The model to use for text embedding.
+        input_ (Union[bigframes.pandas.DataFrame, str]):
+            The DataFrame or query to use for text embedding.
+        flatten_json_output (bool, optional):
+            A BOOL value that determines the content of the generated JSON column.
+        task_type (str, optional):
+            A STRING value that specifies the intended downstream application task.
+            Supported values are:
+            - `RETRIEVAL_QUERY`
+            - `RETRIEVAL_DOCUMENT`
+            - `SEMANTIC_SIMILARITY`
+            - `CLASSIFICATION`
+            - `CLUSTERING`
+            - `QUESTION_ANSWERING`
+            - `FACT_VERIFICATION`
+            - `CODE_RETRIEVAL_QUERY`
+        output_dimensionality (int, optional):
+            An INT64 value that specifies the size of the output embedding.
+
+    Returns:
+        bigframes.pandas.DataFrame:
+            The generated text embedding.
+    """
+    import bigframes.pandas as bpd
+
+    model_name, session = _get_model_name_and_session(model, input_)
+    table_sql = _to_sql(input_)
+
+    sql = bigframes.core.sql.ml.generate_embedding(
+        model_name=model_name,
+        table=table_sql,
+        flatten_json_output=flatten_json_output,
+        task_type=task_type,
+        output_dimensionality=output_dimensionality,
+    )
+
+    if session is None:
+        return bpd.read_gbq_query(sql)
+    else:
+        return session.read_gbq_query(sql)
@@ -0,0 +1,103 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Mapping, Optional, Union
+
+import bigframes_vendored.constants
+import google.cloud.bigquery
+import pandas as pd
+
+import bigframes.core.logging.log_adapter as log_adapter
+import bigframes.core.sql.table
+import bigframes.session
+
+
+def _get_table_metadata(
+    *,
+    bqclient: google.cloud.bigquery.Client,
+    table_name: str,
+) -> pd.Series:
+    table_metadata = bqclient.get_table(table_name)
+    table_dict = table_metadata.to_api_repr()
+    return pd.Series(table_dict)
+
+
+@log_adapter.method_logger(custom_base_name="bigquery_table")
+def create_external_table(
+    table_name: str,
+    *,
+    replace: bool = False,
+    if_not_exists: bool = False,
+    columns: Optional[Mapping[str, str]] = None,
+    partition_columns: Optional[Mapping[str, str]] = None,
+    connection_name: Optional[str] = None,
+    options: Mapping[str, Union[str, int, float, bool, list]],
+    session: Optional[bigframes.session.Session] = None,
+) -> pd.Series:
+    """
+    Creates a BigQuery external table.
+
+    See the `BigQuery CREATE EXTERNAL TABLE DDL syntax
+    <https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_external_table_statement>`_
+    for additional reference.
+
+    Args:
+        table_name (str):
+            The name of the table in BigQuery.
+        replace (bool, default False):
+            Whether to replace the table if it already exists.
+        if_not_exists (bool, default False):
+            Whether to ignore the error if the table already exists.
+        columns (Mapping[str, str], optional):
+            The table's schema.
+        partition_columns (Mapping[str, str], optional):
+            The table's partition columns.
+        connection_name (str, optional):
+            The connection to use for the table.
+        options (Mapping[str, Union[str, int, float, bool, list]]):
+            The OPTIONS clause, which specifies the table options.
+        session (bigframes.session.Session, optional):
+            The session to use. If not provided, the default session is used.
+
+    Returns:
+        pandas.Series:
+            A Series with object dtype containing the table metadata. Reference
+            the `BigQuery Table REST API reference
+            <https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table>`_
+            for available fields.
+    """
+    import bigframes.pandas as bpd
+
+    sql = bigframes.core.sql.table.create_external_table_ddl(
+        table_name=table_name,
+        replace=replace,
+        if_not_exists=if_not_exists,
+        columns=columns,
+        partition_columns=partition_columns,
+        connection_name=connection_name,
+        options=options,
+    )
+
+    if session is None:
+        bpd.read_gbq_query(sql)
+        session = bpd.get_global_session()
+        assert (
+            session is not None
+        ), f"Missing connection to BigQuery. Please report how you encountered this error at {bigframes_vendored.constants.FEEDBACK_LINK}."
+    else:
+        session.read_gbq_query(sql)
+
+    return _get_table_metadata(bqclient=session.bqclient, table_name=table_name)
@@ -23,6 +23,8 @@
     create_model,
     evaluate,
     explain_predict,
+    generate_embedding,
+    generate_text,
     global_explain,
     predict,
     transform,
@@ -35,4 +37,6 @@
     "explain_predict",
     "global_explain",
     "transform",
+    "generate_text",
+    "generate_embedding",
 ]
@@ -527,7 +527,7 @@ def _(
     else:
         result = apply_window_if_present(result, window)
 
-    if op.should_floor_result:
+    if op.should_floor_result or column.dtype == dtypes.TIMEDELTA_DTYPE:
         result = sge.Cast(this=sge.func("FLOOR", result), to="INT64")
     return result
 
 
@@ -42,8 +42,6 @@
 def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
     """Compiles a BigFrameNode according to the request into SQL using SQLGlot."""
 
-    # Generator for unique identifiers.
-    uid_gen = guid.SequentialUIDGenerator()
     output_names = tuple((expression.DerefOp(id), id.sql) for id in request.node.ids)
     result_node = nodes.ResultNode(
         request.node,
@@ -62,12 +60,8 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
     )
     if request.sort_rows:
         result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
-        result_node = _remap_variables(result_node, uid_gen)
-        result_node = typing.cast(
-            nodes.ResultNode, rewrite.defer_selection(result_node)
-        )
         encoded_type_refs = data_type_logger.encode_type_refs(result_node)
-        sql = _compile_result_node(result_node, uid_gen)
+        sql = _compile_result_node(result_node)
         return configs.CompileResult(
             sql,
             result_node.schema.to_bigquery(),
@@ -78,9 +72,6 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
     ordering: typing.Optional[bf_ordering.RowOrdering] = result_node.order_by
     result_node = dataclasses.replace(result_node, order_by=None)
     result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
-
-    result_node = _remap_variables(result_node, uid_gen)
-    result_node = typing.cast(nodes.ResultNode, rewrite.defer_selection(result_node))
     encoded_type_refs = data_type_logger.encode_type_refs(result_node)
     sql = _compile_result_node(result_node, uid_gen)
     # Return the ordering iff no extra columns are needed to define the row order
@@ -105,11 +96,16 @@ def _remap_variables(
     return typing.cast(nodes.ResultNode, result_node)
 
 
-def _compile_result_node(
-    root: nodes.ResultNode, uid_gen: guid.SequentialUIDGenerator
-) -> str:
+def _compile_result_node(root: nodes.ResultNode) -> str:
+    # Create UIDs to standardize variable names and ensure consistent compilation
+    # of nodes using the same generator.
+    uid_gen = guid.SequentialUIDGenerator()
+    root = _remap_variables(root, uid_gen)
+    root = typing.cast(nodes.ResultNode, rewrite.defer_selection(root))
+
     # Have to bind schema as the final step before compilation.
     root = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(root))
+
     selected_cols: tuple[tuple[str, sge.Expression], ...] = tuple(
         (name, scalar_compiler.scalar_op_compiler.compile_expression(ref))
         for ref, name in root.output_cols
@@ -135,7 +131,6 @@ def _compile_result_node(
     return sqlglot_ir.sql
 
 
-@functools.lru_cache(maxsize=5000)
 def compile_node(
     node: nodes.BigFrameNode, uid_gen: guid.SequentialUIDGenerator
 ) -> ir.SQLGlotIR:
@@ -274,10 +269,16 @@ def compile_concat(node: nodes.ConcatNode, *children: ir.SQLGlotIR) -> ir.SQLGlo
     assert len(children) >= 1
     uid_gen = children[0].uid_gen
 
-    output_ids = [id.sql for id in node.output_ids]
+    # BigQuery `UNION` query takes the column names from the first `SELECT` clause.
+    default_output_ids = [field.id.sql for field in node.child_nodes[0].fields]
+    output_aliases = [
+        (default_output_id, output_id.sql)
+        for default_output_id, output_id in zip(default_output_ids, node.output_ids)
+    ]
+
     return ir.SQLGlotIR.from_union(
         [child.expr for child in children],
-        output_ids=output_ids,
+        output_aliases=output_aliases,
         uid_gen=uid_gen,
     )