From 53ec027715e8771fde1eccc1f8caf8497d5a0895 Mon Sep 17 00:00:00 2001 From: ajosh0504 Date: Tue, 17 Feb 2026 07:52:52 -0800 Subject: [PATCH 1/2] Renaming --- ...ipynb => multimodal_ecommerce_agent_voyageai_pixeltable.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename partners/pixeltable/{ecommerce-search-voyageai-webinar-agentic.ipynb => multimodal_ecommerce_agent_voyageai_pixeltable.ipynb} (100%) diff --git a/partners/pixeltable/ecommerce-search-voyageai-webinar-agentic.ipynb b/partners/pixeltable/multimodal_ecommerce_agent_voyageai_pixeltable.ipynb similarity index 100% rename from partners/pixeltable/ecommerce-search-voyageai-webinar-agentic.ipynb rename to partners/pixeltable/multimodal_ecommerce_agent_voyageai_pixeltable.ipynb From a2e2b404a8492a0c838d7b9ca4b54f6b907df4aa Mon Sep 17 00:00:00 2001 From: ajosh0504 Date: Tue, 17 Feb 2026 07:58:37 -0800 Subject: [PATCH 2/2] Pre-commit checks pass --- ..._ecommerce_agent_voyageai_pixeltable.ipynb | 265 ++++++++++-------- 1 file changed, 141 insertions(+), 124 deletions(-) diff --git a/partners/pixeltable/multimodal_ecommerce_agent_voyageai_pixeltable.ipynb b/partners/pixeltable/multimodal_ecommerce_agent_voyageai_pixeltable.ipynb index a1d4c9e..02506d9 100644 --- a/partners/pixeltable/multimodal_ecommerce_agent_voyageai_pixeltable.ipynb +++ b/partners/pixeltable/multimodal_ecommerce_agent_voyageai_pixeltable.ipynb @@ -96,11 +96,11 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import getpass\n", + "import os\n", "\n", - "if 'VOYAGE_API_KEY' not in os.environ:\n", - " os.environ['VOYAGE_API_KEY'] = getpass.getpass('Enter your Voyage AI API key: ')" + "if \"VOYAGE_API_KEY\" not in os.environ:\n", + " os.environ[\"VOYAGE_API_KEY\"] = getpass.getpass(\"Enter your Voyage AI API key: \")" ] }, { @@ -126,8 +126,8 @@ "metadata": {}, "outputs": [], "source": [ - "pxt.drop_dir('ecommerce_search', force=True)\n", - "pxt.create_dir('ecommerce_search')" + "pxt.drop_dir(\"ecommerce_search\", force=True)\n", + "pxt.create_dir(\"ecommerce_search\")" ] }, { @@ -170,7 +170,7 @@ "metadata": {}, "outputs": [], "source": [ - "DATASET_URL = 'https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/amazon_products_with_images.parquet'" + "DATASET_URL = \"https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/amazon_products_with_images.parquet\"" ] }, { @@ -196,20 +196,21 @@ ], "source": [ "import pandas as pd\n", + "\n", "df = pd.read_parquet(DATASET_URL)\n", "\n", "products = pxt.create_table(\n", - " 'ecommerce_search.products',\n", + " \"ecommerce_search.products\",\n", " source=df,\n", " schema_overrides={\n", - " 'Uniq_Id': pxt.String,\n", - " 'Product_Name': pxt.String,\n", - " 'Category': pxt.String,\n", - " 'Selling_Price': pxt.String,\n", - " 'About_Product': pxt.String,\n", - " 'Image': pxt.Image,\n", + " \"Uniq_Id\": pxt.String,\n", + " \"Product_Name\": pxt.String,\n", + " \"Category\": pxt.String,\n", + " \"Selling_Price\": pxt.String,\n", + " \"About_Product\": pxt.String,\n", + " \"Image\": pxt.Image,\n", " },\n", - " on_error='ignore'\n", + " on_error=\"ignore\",\n", ")" ] }, @@ -246,7 +247,7 @@ } ], "source": [ - "pxt.list_tables('ecommerce_search')" + "pxt.list_tables(\"ecommerce_search\")" ] }, { @@ -258,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -273,7 +274,7 @@ } ], "source": [ - "products.delete(where=(products.About_Product == None) | (products.About_Product == ''))" + "products.delete(where=(products.About_Product is None) | (products.About_Product == \"\"))" ] }, { @@ -388,7 +389,7 @@ " products.Product_Name,\n", " products.Image,\n", " products.Selling_Price,\n", - " products.About_Product\n", + " products.About_Product,\n", ").tail(3)" ] }, @@ -467,7 +468,7 @@ } ], "source": [ - "products.where(products.About_Product.contains('dino')).select(\n", + "products.where(products.About_Product.contains(\"dino\")).select(\n", " products.Product_Name,\n", " products.Image,\n", " products.Selling_Price,\n", @@ -509,13 +510,10 @@ "outputs": [], "source": [ "products.add_embedding_index(\n", - " idx_name='txt_idx',\n", + " idx_name=\"txt_idx\",\n", " column=products.About_Product,\n", - " embedding=pxtf.voyageai.embeddings.using(\n", - " model='voyage-3.5', \n", - " input_type='document'\n", - " ),\n", - " if_exists='replace'\n", + " embedding=pxtf.voyageai.embeddings.using(model=\"voyage-3.5\", input_type=\"document\"),\n", + " if_exists=\"replace\",\n", ")" ] }, @@ -533,13 +531,12 @@ "outputs": [], "source": [ "products.add_embedding_index(\n", - " idx_name='img_idx',\n", + " idx_name=\"img_idx\",\n", " column=products.Image,\n", " embedding=pxtf.voyageai.multimodal_embed.using(\n", - " model='voyage-multimodal-3.5',\n", - " input_type='document'\n", + " model=\"voyage-multimodal-3.5\", input_type=\"document\"\n", " ),\n", - " if_exists='replace'\n", + " if_exists=\"replace\",\n", ")" ] }, @@ -742,8 +739,8 @@ "metadata": {}, "outputs": [], "source": [ - "query = 'realistic, scientifically accurate dinosaur figurines'\n", - "txt_sim = products.About_Product.similarity(string=query, idx='txt_idx')" + "query = \"realistic, scientifically accurate dinosaur figurines\"\n", + "txt_sim = products.About_Product.similarity(string=query, idx=\"txt_idx\")" ] }, { @@ -777,10 +774,9 @@ "source": [ "txt_results = (\n", " products.order_by(txt_sim, asc=False)\n", - " .select(products.Product_Name, \n", - " products.Image,\n", - " products.About_Product,\n", - " score=txt_sim)\n", + " .select(\n", + " products.Product_Name, products.Image, products.About_Product, score=txt_sim\n", + " )\n", " .where(txt_sim > SIM_THRESHOLD)\n", ")" ] @@ -922,13 +918,13 @@ } ], "source": [ - "sim_i = products.Image.similarity(string='plush, cuddly stuffed dinosaur', idx='img_idx')\n", + "sim_i = products.Image.similarity(\n", + " string=\"plush, cuddly stuffed dinosaur\", idx=\"img_idx\"\n", + ")\n", "\n", "img_results = (\n", " products.order_by(sim_i, asc=False)\n", - " .select(products.Product_Name,\n", - " products.Image,\n", - " score=sim_i)\n", + " .select(products.Product_Name, products.Image, score=sim_i)\n", " .where(sim_i > SIM_THRESHOLD)\n", ")\n", "\n", @@ -969,10 +965,9 @@ "source": [ "@pxt.query\n", "def text_search(query_text: str, limit: int = 5):\n", - " sim = products.About_Product.similarity(string=query_text, idx='txt_idx')\n", + " sim = products.About_Product.similarity(string=query_text, idx=\"txt_idx\")\n", " return (\n", - " products\n", - " .where(sim > SIM_THRESHOLD)\n", + " products.where(sim > SIM_THRESHOLD)\n", " .order_by(sim, asc=False)\n", " .limit(limit)\n", " .select(\n", @@ -983,12 +978,12 @@ " )\n", " )\n", "\n", + "\n", "@pxt.query\n", "def image_search(query_text: str, limit: int = 5):\n", - " sim = products.Image.similarity(string=query_text, idx='img_idx')\n", + " sim = products.Image.similarity(string=query_text, idx=\"img_idx\")\n", " return (\n", - " products\n", - " .where(sim > SIM_THRESHOLD)\n", + " products.where(sim > SIM_THRESHOLD)\n", " .order_by(sim, asc=False)\n", " .limit(limit * 3)\n", " .select(\n", @@ -1023,9 +1018,9 @@ "source": [ "# Table to hold user queries\n", "searches = pxt.create_table(\n", - " 'ecommerce_search.searches_live',\n", - " {'query': pxt.String},\n", - " if_exists='replace',\n", + " \"ecommerce_search.searches_live\",\n", + " {\"query\": pxt.String},\n", + " if_exists=\"replace\",\n", ")" ] }, @@ -1131,12 +1126,12 @@ "# Computed columns execute the query functions\n", "searches.add_computed_column(\n", " text_results=text_search(searches.query, limit=5),\n", - " if_exists='replace',\n", + " if_exists=\"replace\",\n", ")\n", "\n", "searches.add_computed_column(\n", " image_results=image_search(searches.query, limit=5),\n", - " if_exists='replace',\n", + " if_exists=\"replace\",\n", ")" ] }, @@ -1199,11 +1194,13 @@ ], "source": [ "# Insert queries\n", - "searches.insert([\n", - " {'query': 'realistic, scientifically accurate dinosaur figurines'},\n", - " {'query': 'colorful, cuddly plush dinosaurs'},\n", - " {'query': 'teach math skills with dinosaurs'},\n", - "])" + "searches.insert(\n", + " [\n", + " {\"query\": \"realistic, scientifically accurate dinosaur figurines\"},\n", + " {\"query\": \"colorful, cuddly plush dinosaurs\"},\n", + " {\"query\": \"teach math skills with dinosaurs\"},\n", + " ]\n", + ")" ] }, { @@ -1381,7 +1378,7 @@ "searches.select(\n", " searches.query,\n", " img_matches=searches.image_results[0:3].Product_Name,\n", - " prod_prices=searches.image_results[0:3].Selling_Price\n", + " prod_prices=searches.image_results[0:3].Selling_Price,\n", ").collect()" ] }, @@ -1422,8 +1419,8 @@ "metadata": {}, "outputs": [], "source": [ - "if 'OPENAI_API_KEY' not in os.environ:\n", - " os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key: ')" + "if \"OPENAI_API_KEY\" not in os.environ:\n", + " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")" ] }, { @@ -1464,9 +1461,7 @@ ], "source": [ "mm_agent = pxt.create_table(\n", - " 'ecommerce_search.mm_agent',\n", - " {'question': pxt.String},\n", - " if_exists='replace'\n", + " \"ecommerce_search.mm_agent\", {\"question\": pxt.String}, if_exists=\"replace\"\n", ")" ] }, @@ -1529,18 +1524,18 @@ "mm_agent.add_computed_column(\n", " llm_response=pxtf.openai.chat_completions(\n", " messages=[\n", - " {'role': 'system', 'content': SYSTEM_PROMPT},\n", - " {'role': 'user', 'content': mm_agent.question}\n", + " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", + " {\"role\": \"user\", \"content\": mm_agent.question},\n", " ],\n", - " model='gpt-4o-mini',\n", - " tools=agent_tools\n", + " model=\"gpt-4o-mini\",\n", + " tools=agent_tools,\n", " ),\n", - " if_exists='replace'\n", + " if_exists=\"replace\",\n", ")\n", "\n", "mm_agent.add_computed_column(\n", " tool_results=pxtf.openai.invoke_tools(agent_tools, mm_agent.llm_response),\n", - " if_exists='replace'\n", + " if_exists=\"replace\",\n", ")" ] }, @@ -1571,17 +1566,25 @@ " for row in vals:\n", " if not isinstance(row, dict):\n", " continue\n", - " pid = row.get('Uniq_Id', '')\n", + " pid = row.get(\"Uniq_Id\", \"\")\n", " if pid in seen:\n", " continue\n", " seen.add(pid)\n", - " rows.append(f\"- {row.get('Product_Name', 'Unknown')} - {row.get('Selling_Price', 'N/A')}\")\n", + " rows.append(\n", + " f\"- {row.get('Product_Name', 'Unknown')} - {row.get('Selling_Price', 'N/A')}\"\n", + " )\n", "\n", - " context = '\\n'.join(rows[:10]) if rows else 'No results found.'\n", + " context = \"\\n\".join(rows[:10]) if rows else \"No results found.\"\n", " return [\n", - " {'role': 'system', 'content': 'You summarize product options clearly and briefly.'},\n", - " {'role': 'user', 'content': f\"Question: {question}\\n\\nProducts Found:\\n{context}\\n\\nGive a concise recommendation.\"}\n", - " ]\n" + " {\n", + " \"role\": \"system\",\n", + " \"content\": \"You summarize product options clearly and briefly.\",\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"Question: {question}\\n\\nProducts Found:\\n{context}\\n\\nGive a concise recommendation.\",\n", + " },\n", + " ]" ] }, { @@ -1618,15 +1621,14 @@ "source": [ "mm_agent.add_computed_column(\n", " answer_prompt=assemble_answer_prompt(mm_agent.question, mm_agent.tool_results),\n", - " if_exists='replace'\n", + " if_exists=\"replace\",\n", ")\n", "\n", "mm_agent.add_computed_column(\n", " answer=pxtf.openai.chat_completions(\n", - " messages=mm_agent.answer_prompt,\n", - " model='gpt-4o-mini'\n", - " )['choices'][0]['message']['content'],\n", - " if_exists='replace'\n", + " messages=mm_agent.answer_prompt, model=\"gpt-4o-mini\"\n", + " )[\"choices\"][0][\"message\"][\"content\"],\n", + " if_exists=\"replace\",\n", ")" ] }, @@ -1759,11 +1761,13 @@ } ], "source": [ - "mm_agent.insert([\n", - " {'question': 'Find me realistic, scientifically accurate dinosaur figurines'},\n", - " {'question': 'Show me colorful, cuddly plush dinosaurs'},\n", - " {'question': 'What is a good toy to teach math skills with dinosaurs?'},\n", - "])" + "mm_agent.insert(\n", + " [\n", + " {\"question\": \"Find me realistic, scientifically accurate dinosaur figurines\"},\n", + " {\"question\": \"Show me colorful, cuddly plush dinosaurs\"},\n", + " {\"question\": \"What is a good toy to teach math skills with dinosaurs?\"},\n", + " ]\n", + ")" ] }, { @@ -1882,7 +1886,7 @@ ], "source": [ "# Check the tool's answer against text similarity search\n", - "searches.insert([{'query': 'realistic, scientifically accurate dinosaur figurines'}])\n", + "searches.insert([{\"query\": \"realistic, scientifically accurate dinosaur figurines\"}])\n", "searches.select(\n", " searches.query,\n", " text_names=searches.text_results[0:5].Product_Name,\n", @@ -1940,11 +1944,11 @@ ], "source": [ "# Check the tool's answer against image similarity search\n", - "searches.insert([{'query': 'colorful, cuddly plush dinosaurs'}])\n", + "searches.insert([{\"query\": \"colorful, cuddly plush dinosaurs\"}])\n", "searches.select(\n", " searches.query,\n", " image_names=searches.image_results[0:5].Product_Name,\n", - " image_prices=searches.image_results[0:5].Selling_Price\n", + " image_prices=searches.image_results[0:5].Selling_Price,\n", ").tail(1)" ] }, @@ -1978,10 +1982,9 @@ "@pxt.query\n", "def text_search_max(query_text: str, limit: int = 5):\n", " \"\"\"Richer text search for analysis/reranking flows.\"\"\"\n", - " sim = products.About_Product.similarity(string=query_text, idx='txt_idx')\n", + " sim = products.About_Product.similarity(string=query_text, idx=\"txt_idx\")\n", " return (\n", - " products\n", - " .where(sim > SIM_THRESHOLD)\n", + " products.where(sim > SIM_THRESHOLD)\n", " .order_by(sim, asc=False)\n", " .limit(limit * 3)\n", " .select(\n", @@ -2004,34 +2007,34 @@ "@pxt.udf\n", "def extract_descriptions(results: list[dict], limit: int = 15) -> list[str]:\n", " if not results:\n", - " return ['no matching product description']\n", + " return [\"no matching product description\"]\n", " seen_ids = set()\n", " descriptions = []\n", " for r in results:\n", - " pid = r.get('Uniq_Id')\n", - " desc = r.get('About_Product', '')\n", + " pid = r.get(\"Uniq_Id\")\n", + " desc = r.get(\"About_Product\", \"\")\n", " if pid not in seen_ids and desc:\n", " seen_ids.add(pid)\n", " descriptions.append(desc)\n", " if len(descriptions) >= limit:\n", " break\n", - " return descriptions if descriptions else ['no matching product description']\n", + " return descriptions if descriptions else [\"no matching product description\"]\n", "\n", "\n", "@pxt.udf\n", "def format_reranked(reranked: dict) -> list[str]:\n", " import re\n", "\n", - " if not reranked or 'results' not in reranked:\n", + " if not reranked or \"results\" not in reranked:\n", " return []\n", "\n", " cleaned = []\n", - " for r in reranked['results']:\n", - " doc = r.get('document', '')\n", + " for r in reranked[\"results\"]:\n", + " doc = r.get(\"document\", \"\")\n", "\n", - " prefix = 'Make sure this fits by entering your model number.'\n", - " doc = re.sub(rf\"^{re.escape(prefix)}\\s*\\|\\s*\", '', doc, flags=re.IGNORECASE)\n", - " doc = re.sub(r'\\s+', ' ', doc).strip()\n", + " prefix = \"Make sure this fits by entering your model number.\"\n", + " doc = re.sub(rf\"^{re.escape(prefix)}\\s*\\|\\s*\", \"\", doc, flags=re.IGNORECASE)\n", + " doc = re.sub(r\"\\s+\", \" \", doc).strip()\n", "\n", " if not doc:\n", " continue\n", @@ -2190,18 +2193,17 @@ ], "source": [ "searches.add_computed_column(\n", - " candidates=text_search_max(searches.query, limit=15),\n", - " if_exists='replace'\n", + " candidates=text_search_max(searches.query, limit=15), if_exists=\"replace\"\n", ")\n", "\n", "searches.add_computed_column(\n", " reranked=pxtf.voyageai.rerank(\n", " searches.query,\n", " extract_descriptions(searches.candidates),\n", - " model='rerank-2.5',\n", - " top_k=5\n", + " model=\"rerank-2.5\",\n", + " top_k=5,\n", " ),\n", - " if_exists='replace'\n", + " if_exists=\"replace\",\n", ")" ] }, @@ -2327,9 +2329,11 @@ } ], "source": [ - "searches.insert([\n", - " {'query': 'toys for kids who love dinosaurs and math'},\n", - "])" + "searches.insert(\n", + " [\n", + " {\"query\": \"toys for kids who love dinosaurs and math\"},\n", + " ]\n", + ")" ] }, { @@ -2406,20 +2410,28 @@ " import re\n", "\n", " lines = []\n", - " if reranked and 'results' in reranked:\n", - " prefix = 'Make sure this fits by entering your model number.'\n", - " for r in reranked['results'][:5]:\n", - " doc = r.get('document', '')\n", - " doc = re.sub(rf\"^{re.escape(prefix)}\\\\s*\\\\|\\\\s*\", '', doc, flags=re.IGNORECASE)\n", - " doc = re.sub(r'\\\\s+', ' ', doc).strip()\n", + " if reranked and \"results\" in reranked:\n", + " prefix = \"Make sure this fits by entering your model number.\"\n", + " for r in reranked[\"results\"][:5]:\n", + " doc = r.get(\"document\", \"\")\n", + " doc = re.sub(\n", + " rf\"^{re.escape(prefix)}\\\\s*\\\\|\\\\s*\", \"\", doc, flags=re.IGNORECASE\n", + " )\n", + " doc = re.sub(r\"\\\\s+\", \" \", doc).strip()\n", " if not doc:\n", " continue\n", " lines.append(f\"- {doc[:140]} (score: {r.get('relevance_score', 0):.3f})\")\n", "\n", - " context = '\\n'.join(lines) if lines else 'No useful reranked evidence found.'\n", + " context = \"\\n\".join(lines) if lines else \"No useful reranked evidence found.\"\n", " return [\n", - " {'role': 'system', 'content': 'Use reranked evidence only. Be concise and avoid made-up products.'},\n", - " {'role': 'user', 'content': f\"Question: {question}\\n\\nReranked Evidence:\\n{context}\\n\\nGive a concise recommendation.\"}\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": \"Use reranked evidence only. Be concise and avoid made-up products.\",\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"Question: {question}\\n\\nReranked Evidence:\\n{context}\\n\\nGive a concise recommendation.\",\n", + " },\n", " ]" ] }, @@ -2461,7 +2473,7 @@ " query_text=mm_agent.question,\n", " limit=15,\n", " ),\n", - " if_exists='replace',\n", + " if_exists=\"replace\",\n", ")\n", "\n", "mm_agent.add_computed_column(\n", @@ -2471,10 +2483,10 @@ " results=mm_agent.text_candidates,\n", " limit=15,\n", " ),\n", - " model='rerank-2.5',\n", + " model=\"rerank-2.5\",\n", " top_k=5,\n", " ),\n", - " if_exists='replace',\n", + " if_exists=\"replace\",\n", ")\n", "\n", "mm_agent.add_computed_column(\n", @@ -2483,10 +2495,10 @@ " question=mm_agent.question,\n", " reranked=mm_agent.text_reranked,\n", " ),\n", - " model='gpt-4o-mini',\n", - " )['choices'][0]['message']['content'],\n", - " if_exists='replace',\n", - ")\n" + " model=\"gpt-4o-mini\",\n", + " )[\"choices\"][0][\"message\"][\"content\"],\n", + " if_exists=\"replace\",\n", + ")" ] }, { @@ -2540,7 +2552,7 @@ " mm_agent.question,\n", " mm_agent.answer,\n", " mm_agent.answer_reranked,\n", - " reranked_top_5=format_reranked(mm_agent.text_reranked)\n", + " reranked_top_5=format_reranked(mm_agent.text_reranked),\n", ").tail(1)" ] }, @@ -2594,6 +2606,11 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.9" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {} + } } }, "nbformat": 4,