Skip to content

Commit 19b9a65

Browse files
committed
Merge branch 'release-tooling'
# Conflicts: # .gitignore # pyproject.toml
2 parents d5b5109 + c3001a6 commit 19b9a65

16 files changed

Lines changed: 659 additions & 133 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
__pycache__
44
.pytest_cache
55
.env
6+
uv.lock
67
dist/
8+
build/
9+
*.egg-info/
710

811
# Claude
912
.claude/settings.local.json

Makefile

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
.PHONY: test test-live build clean bump-patch bump-minor bump-major set-token-pypi set-token-testpypi release-test release verify-release-test verify-release
2+
3+
test:
4+
uv run --extra dev pytest
5+
6+
test-live:
7+
uv run --extra dev pytest -m live
8+
9+
clean:
10+
rm -rf dist build *.egg-info
11+
12+
build: clean
13+
uv build
14+
15+
# Version bumps: edits pyproject.toml in place and prints old => new.
16+
bump-patch:
17+
uv version --bump patch
18+
19+
bump-minor:
20+
uv version --bump minor
21+
22+
bump-major:
23+
uv version --bump major
24+
25+
# Store a PyPI / TestPyPI token in macOS Keychain. Prompts with hidden input
26+
# (bash `read -rsp`), so the token never appears on screen, in shell history,
27+
# or in `make` output. Re-running overwrites the existing entry.
28+
set-token-pypi:
29+
@bash -c 'read -rsp "Paste PyPI token: " TOKEN && echo && \
30+
security delete-generic-password -s pypi-token-pypi >/dev/null 2>&1; \
31+
security add-generic-password -a "$$USER" -s pypi-token-pypi -w "$$TOKEN" && \
32+
echo "stored in Keychain under service: pypi-token-pypi"'
33+
34+
set-token-testpypi:
35+
@bash -c 'read -rsp "Paste TestPyPI token: " TOKEN && echo && \
36+
security delete-generic-password -s pypi-token-testpypi >/dev/null 2>&1; \
37+
security add-generic-password -a "$$USER" -s pypi-token-testpypi -w "$$TOKEN" && \
38+
echo "stored in Keychain under service: pypi-token-testpypi"'
39+
40+
# Publish to TestPyPI. Token comes from macOS Keychain (service: pypi-token-testpypi).
41+
# The `@` on the recipe lines hides the actual command so the token never appears in output.
42+
release-test: build
43+
@VERSION=$$(grep '^version' pyproject.toml | head -1 | cut -d'"' -f2) && \
44+
STATUS=$$(curl -s -o /dev/null -w "%{http_code}" "https://test.pypi.org/pypi/diffbot-python/$$VERSION/json") && \
45+
if [ "$$STATUS" = "200" ]; then \
46+
echo "ERROR: diffbot-python $$VERSION is already on TestPyPI. Bump the version in pyproject.toml."; \
47+
exit 1; \
48+
fi
49+
@TOKEN=$$(security find-generic-password -s pypi-token-testpypi -w 2>/dev/null) && \
50+
if [ -z "$$TOKEN" ]; then \
51+
echo "ERROR: no Keychain entry for pypi-token-testpypi. Run 'make set-token-testpypi' first."; \
52+
exit 1; \
53+
fi && \
54+
UV_PUBLISH_TOKEN="$$TOKEN" uv publish --publish-url https://test.pypi.org/legacy/
55+
56+
# Publish to real PyPI. Confirmation gate before upload (PyPI does not allow re-uploads).
57+
release: build
58+
@VERSION=$$(grep '^version' pyproject.toml | head -1 | cut -d'"' -f2) && \
59+
STATUS=$$(curl -s -o /dev/null -w "%{http_code}" "https://pypi.org/pypi/diffbot-python/$$VERSION/json") && \
60+
if [ "$$STATUS" = "200" ]; then \
61+
echo "ERROR: diffbot-python $$VERSION is already on PyPI. Bump the version in pyproject.toml."; \
62+
exit 1; \
63+
fi && \
64+
echo "About to publish diffbot-python $$VERSION to PyPI. This cannot be undone." && \
65+
read -p "Type the version to confirm: " CONFIRM && \
66+
[ "$$CONFIRM" = "$$VERSION" ] || { echo "Aborted."; exit 1; }
67+
@TOKEN=$$(security find-generic-password -s pypi-token-pypi -w 2>/dev/null) && \
68+
if [ -z "$$TOKEN" ]; then \
69+
echo "ERROR: no Keychain entry for pypi-token-pypi. Run 'make set-token-pypi' first."; \
70+
exit 1; \
71+
fi && \
72+
UV_PUBLISH_TOKEN="$$TOKEN" uv publish
73+
74+
# Smoke-test installs from each index in a throwaway venv.
75+
# `cd $$TMP` before running python so CWD doesn't shadow the venv install with this repo's source.
76+
# Deps live on prod PyPI, so TestPyPI install needs --extra-index-url.
77+
verify-release-test:
78+
@VERSION=$$(grep '^version' pyproject.toml | head -1 | cut -d'"' -f2) && \
79+
TMP=$$(mktemp -d) && \
80+
uv venv --python 3.12 $$TMP/.venv >/dev/null 2>&1 && \
81+
uv pip install --quiet --python $$TMP/.venv/bin/python \
82+
--index-url https://test.pypi.org/simple/ \
83+
--extra-index-url https://pypi.org/simple/ \
84+
"diffbot-python==$$VERSION" && \
85+
(cd $$TMP && $$TMP/.venv/bin/python -c "import diffbot; print('TestPyPI install OK:', diffbot.__version__)") && \
86+
rm -rf $$TMP
87+
88+
verify-release:
89+
@VERSION=$$(grep '^version' pyproject.toml | head -1 | cut -d'"' -f2) && \
90+
TMP=$$(mktemp -d) && \
91+
uv venv --python 3.12 $$TMP/.venv >/dev/null 2>&1 && \
92+
uv pip install --quiet --python $$TMP/.venv/bin/python "diffbot-python==$$VERSION" && \
93+
(cd $$TMP && $$TMP/.venv/bin/python -c "import diffbot; print('PyPI install OK:', diffbot.__version__)") && \
94+
rm -rf $$TMP

README.md

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,38 @@ pip install -e ".[dev]"
1818
## Usage
1919

2020
### Authentication
21-
Set your Diffbot API token in your environment or .env.
21+
22+
The CLI and the library can share a single credential. The token always has to be
23+
passed to the client explicitly, but `resolve_token()` gives you the same lookup the
24+
CLI uses, in this order:
25+
26+
1. An explicit token passed to `resolve_token(token)`.
27+
2. The `DIFFBOT_API_TOKEN` environment variable.
28+
3. A `DIFFBOT_API_TOKEN=...` line in `~/.diffbot/credentials`.
29+
30+
Set it once and it works for both the CLI and your scripts. Either export it:
2231

2332
```bash
2433
export DIFFBOT_API_TOKEN=<TOKEN>
2534
```
2635

36+
…or write it to the shared credentials file (handy for keeping it out of your shell environment):
37+
38+
```bash
39+
mkdir -p ~/.diffbot
40+
printf 'DIFFBOT_API_TOKEN=%s\n' '<TOKEN>' > ~/.diffbot/credentials
41+
chmod 600 ~/.diffbot/credentials
42+
```
43+
44+
With either in place, resolve the token and pass it to the client:
45+
46+
```python
47+
from diffbot import Diffbot, resolve_token
48+
49+
db = Diffbot(token=resolve_token()) # from env var or ~/.diffbot/credentials
50+
data = db.extract("https://www.example.com")
51+
```
52+
2753
### Extract structured content
2854
```python
2955
from diffbot import Diffbot
@@ -166,7 +192,15 @@ asyncio.run(main())
166192

167193
## CLI
168194

169-
This library also includes a CLI.
195+
This library also includes a CLI exposed as the `db` command.
196+
197+
To make `db` available from anywhere, install it as an isolated tool with [uv](https://docs.astral.sh/uv/):
198+
199+
```bash
200+
uv tool install .
201+
```
202+
203+
This drops a `db` executable into `~/.local/bin` (ensure it is on your `PATH`). Use `--force` to reinstall or upgrade after changes, or `--editable` to have source edits take effect immediately. Alternatively, a plain `pip install .` (or `pip install -e .`) also installs the `db` entry point into the active environment.
170204

171205
```bash
172206
export DIFFBOT_API_TOKEN=your-token-here
@@ -189,7 +223,9 @@ Run the mock test suite:
189223
python -m pytest
190224
```
191225

192-
Run live integration tests against the real API (requires a valid token):
226+
Run live integration tests against the real API (requires a valid token).
227+
The token is resolved the same way as everywhere else — the `DIFFBOT_API_TOKEN`
228+
environment variable or `~/.diffbot/credentials`:
193229
```bash
194-
DIFFBOT_TOKEN=your_token python -m pytest -m live
230+
DIFFBOT_API_TOKEN=your_token python -m pytest -m live
195231
```

pyproject.toml

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,39 @@ version = "0.1.0"
88
description = "Python client library for Diffbot APIs"
99
readme = "README.md"
1010
requires-python = ">=3.10"
11-
classifiers = [
12-
"Programming Language :: Python :: 3",
13-
"Operating System :: OS Independent",
14-
"Topic :: Software Development :: Libraries",
15-
"Topic :: Scientific/Engineering :: Artificial Intelligence",
16-
"Topic :: Internet :: WWW/HTTP :: Indexing/Search"
17-
]
1811
license = "MIT"
1912
license-files = ["LICEN[CS]E*"]
2013
authors = [
2114
{ name = "Jerome Choo", email = "jerome@diffbot.com" },
2215
{ name = "Mike Tung", email = "miket@diffbot.com" }
2316
]
17+
keywords = [
18+
"diffbot",
19+
"knowledge-graph",
20+
"web-scraping",
21+
"extract",
22+
"crawler",
23+
"nlp",
24+
"llm",
25+
"api-client",
26+
]
27+
classifiers = [
28+
"Development Status :: 3 - Alpha",
29+
"Intended Audience :: Developers",
30+
"Operating System :: OS Independent",
31+
"Programming Language :: Python :: 3",
32+
"Programming Language :: Python :: 3 :: Only",
33+
"Programming Language :: Python :: 3.10",
34+
"Programming Language :: Python :: 3.11",
35+
"Programming Language :: Python :: 3.12",
36+
"Programming Language :: Python :: 3.13",
37+
"Topic :: Internet :: WWW/HTTP",
38+
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
39+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
40+
"Topic :: Software Development :: Libraries :: Python Modules",
41+
"Topic :: Text Processing :: Markup :: HTML",
42+
"Typing :: Typed",
43+
]
2444
dependencies = [
2545
"httpx>=0.27.0",
2646
"click>=8.1.0",
@@ -34,6 +54,7 @@ dev = [
3454

3555
[project.urls]
3656
Homepage = "https://github.com/diffbot/diffbot-python"
57+
Documentation = "https://github.com/diffbot/diffbot-python#readme"
3758
Repository = "https://github.com/diffbot/diffbot-python"
3859
Issues = "https://github.com/diffbot/diffbot-python/issues"
3960

@@ -43,6 +64,15 @@ db = "diffbot.cli:main"
4364
[tool.hatch.build.targets.wheel]
4465
packages = ["src/diffbot"]
4566

67+
[tool.hatch.build.targets.sdist]
68+
include = [
69+
"/src",
70+
"/tests",
71+
"/README.md",
72+
"/LICENSE",
73+
"/pyproject.toml",
74+
]
75+
4676
[tool.pytest.ini_options]
47-
markers = ["live: marks tests as live integration tests requiring a real DIFFBOT_TOKEN"]
48-
addopts = "-m 'not live'"
77+
markers = ["live: marks tests as live integration tests requiring a real DIFFBOT_API_TOKEN"]
78+
addopts = "-m 'not live'"

src/diffbot/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
__version__ = "0.1.0"
66

7+
from ._auth import resolve_token
78
from .client import Diffbot, DiffbotAsync
89
from .crawl import CrawlEvent, CrawlEventType
910
from .errors import (
@@ -14,12 +15,15 @@
1415
RateLimitError,
1516
ValidationError,
1617
)
18+
from .ontology import Ontology
1719

1820
__all__ = [
1921
"Diffbot",
2022
"DiffbotAsync",
23+
"resolve_token",
2124
"CrawlEvent",
2225
"CrawlEventType",
26+
"Ontology",
2327
"DiffbotError",
2428
"AuthError",
2529
"ExtractionError",

src/diffbot/_auth.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
"""Shared Diffbot credential resolution for both the library and the CLI.
2+
3+
The same lookup chain is used everywhere so a single credential works for the
4+
``db`` CLI and any Python script that constructs a client:
5+
6+
1. An explicit token passed to the client / function.
7+
2. The ``DIFFBOT_API_TOKEN`` environment variable.
8+
3. A ``DIFFBOT_API_TOKEN=...`` line in ``~/.diffbot/credentials``.
9+
"""
10+
11+
import os
12+
import pathlib
13+
from typing import Optional
14+
15+
TOKEN_ENV_VAR = "DIFFBOT_API_TOKEN"
16+
CREDENTIALS_PATH = pathlib.Path.home() / ".diffbot" / "credentials"
17+
18+
19+
def _read_credentials_file() -> str:
20+
if not CREDENTIALS_PATH.exists():
21+
return ""
22+
for line in CREDENTIALS_PATH.read_text().splitlines():
23+
line = line.strip()
24+
if line.startswith(f"{TOKEN_ENV_VAR}="):
25+
return line[len(TOKEN_ENV_VAR) + 1:].strip()
26+
return ""
27+
28+
29+
def resolve_token(token: Optional[str] = None) -> str:
30+
"""Resolve a Diffbot API token from the explicit argument, env var, or file.
31+
32+
Returns an empty string if no token can be found.
33+
"""
34+
if token and token.strip():
35+
return token.strip()
36+
37+
env_token = os.environ.get(TOKEN_ENV_VAR, "").strip()
38+
if env_token:
39+
return env_token
40+
41+
return _read_credentials_file()

src/diffbot/cli/_common.py

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,20 @@
1-
import os
2-
import pathlib
3-
41
import click
52

6-
from diffbot import Diffbot
7-
8-
CREDENTIALS_PATH = pathlib.Path.home() / ".diffbot" / "credentials"
9-
10-
11-
def resolve_token() -> str:
12-
"""Return the Diffbot API token from the env var, falling back to ~/.diffbot/credentials."""
13-
token = os.environ.get("DIFFBOT_API_TOKEN", "").strip()
14-
if token:
15-
return token
16-
17-
if CREDENTIALS_PATH.exists():
18-
for line in CREDENTIALS_PATH.read_text().splitlines():
19-
line = line.strip()
20-
if line.startswith("DIFFBOT_API_TOKEN="):
21-
return line[len("DIFFBOT_API_TOKEN="):].strip()
22-
23-
return ""
3+
from diffbot import Diffbot, resolve_token
4+
from diffbot._auth import CREDENTIALS_PATH, TOKEN_ENV_VAR
245

256

267
def get_client() -> Diffbot:
8+
"""Build a Diffbot client using the shared credential resolution chain.
9+
10+
Looks at the DIFFBOT_API_TOKEN env var, then ~/.diffbot/credentials.
11+
"""
2712
token = resolve_token()
2813
if not token:
2914
click.echo(
3015
"Error: no Diffbot API token found.\n"
31-
" Set a DIFFBOT_API_TOKEN environment variable, or\n"
32-
f" write 'DIFFBOT_API_TOKEN=YOUR_TOKEN' to {CREDENTIALS_PATH}",
16+
f" Set a {TOKEN_ENV_VAR} environment variable, or\n"
17+
f" write '{TOKEN_ENV_VAR}=YOUR_TOKEN' to {CREDENTIALS_PATH}",
3318
err=True,
3419
)
3520
raise click.Abort()

src/diffbot/cli/dql.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
from diffbot import DiffbotError
1616

1717
from . import ontology
18-
from ._common import get_client, resolve_token
18+
from diffbot import resolve_token
19+
20+
from ._common import get_client
1921

2022

2123
class _DqlGroup(click.Group):

0 commit comments

Comments
 (0)