diff --git a/pyproject.toml b/pyproject.toml index e1eaed6..25de5e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ authors = [ ] requires-python = ">=3.10" dependencies = [ + "boto3", "click", "click-default-group", "httpx", @@ -32,6 +33,7 @@ build-backend = "uv_build" [dependency-groups] dev = [ + "moto[s3]>=5.0.0", "pytest>=9.0.2", "pytest-httpx>=0.35.0", "syrupy>=5.0.0", diff --git a/src/claude_code_transcripts/__init__.py b/src/claude_code_transcripts/__init__.py index fa383dc..689ccde 100644 --- a/src/claude_code_transcripts/__init__.py +++ b/src/claude_code_transcripts/__init__.py @@ -12,6 +12,8 @@ from datetime import datetime from pathlib import Path +import boto3 +from botocore.exceptions import ClientError import click from click_default_group import DefaultGroup import httpx @@ -1564,6 +1566,52 @@ def fetch_url_to_tempfile(url): return temp_file +def is_s3_url(path): + """Check if a path is an S3 URL (starts with s3://).""" + return path.startswith("s3://") + + +def fetch_s3_to_tempfile(s3_url): + """Fetch an S3 object and save to a temporary file. + + Returns the Path to the temporary file. + Raises click.ClickException on S3 errors. + """ + # Parse s3://bucket/key format + if not s3_url.startswith("s3://"): + raise click.ClickException(f"Invalid S3 URL: {s3_url}") + + parts = s3_url[5:].split("/", 1) # Remove "s3://" prefix + if len(parts) < 2: + raise click.ClickException(f"Invalid S3 URL (missing key): {s3_url}") + + bucket = parts[0] + key = parts[1] + + try: + s3 = boto3.client("s3") + response = s3.get_object(Bucket=bucket, Key=key) + content = response["Body"].read().decode("utf-8") + except ClientError as e: + raise click.ClickException(f"Failed to fetch S3 object: {e}") + + # Determine file extension from key + if key.endswith(".jsonl"): + suffix = ".jsonl" + elif key.endswith(".json"): + suffix = ".json" + else: + suffix = ".jsonl" # Default to JSONL + + # Extract a name from the key for the temp file + key_name = Path(key).stem or "session" + + temp_dir = Path(tempfile.gettempdir()) + temp_file = temp_dir / f"claude-s3-{key_name}{suffix}" + temp_file.write_text(content, encoding="utf-8") + return temp_file + + @cli.command("json") @click.argument("json_file", type=click.Path()) @click.option( @@ -1608,6 +1656,13 @@ def json_cmd(json_file, output, output_auto, repo, gist, include_json, open_brow json_file_path = temp_file # Use URL path for naming url_name = Path(json_file.split("?")[0]).stem or "session" + elif is_s3_url(json_file): + click.echo(f"Fetching {json_file}...") + temp_file = fetch_s3_to_tempfile(json_file) + json_file_path = temp_file + # Use S3 key for naming + s3_key = json_file[5:].split("/", 1)[1] if "/" in json_file[5:] else "session" + url_name = Path(s3_key).stem or "session" else: # Validate that local file exists json_file_path = Path(json_file) diff --git a/tests/test_s3_url.py b/tests/test_s3_url.py new file mode 100644 index 0000000..3db6418 --- /dev/null +++ b/tests/test_s3_url.py @@ -0,0 +1,100 @@ +"""Tests for S3 URL support.""" + +import json +import tempfile +from pathlib import Path + +import boto3 +import pytest +from moto import mock_aws + +from claude_code_transcripts import is_s3_url, fetch_s3_to_tempfile + + +class TestIsS3Url: + """Tests for S3 URL detection.""" + + def test_detects_s3_url(self): + """Test that s3:// URLs are detected.""" + assert is_s3_url("s3://my-bucket/path/to/file.jsonl") is True + + def test_detects_s3_url_with_nested_path(self): + """Test that s3:// URLs with nested paths are detected.""" + assert is_s3_url("s3://bucket/a/b/c/session.json") is True + + def test_rejects_http_url(self): + """Test that http:// URLs are not detected as S3.""" + assert is_s3_url("http://example.com/file.jsonl") is False + + def test_rejects_https_url(self): + """Test that https:// URLs are not detected as S3.""" + assert is_s3_url("https://example.com/file.jsonl") is False + + def test_rejects_local_path(self): + """Test that local paths are not detected as S3.""" + assert is_s3_url("/path/to/file.jsonl") is False + assert is_s3_url("relative/path.json") is False + + +class TestFetchS3ToTempfile: + """Tests for S3 file fetching.""" + + @mock_aws + def test_fetches_jsonl_file(self): + """Test fetching a JSONL file from S3.""" + # Set up mock S3 + s3 = boto3.client("s3", region_name="us-east-1") + s3.create_bucket(Bucket="test-bucket") + + # Upload test content + content = '{"type":"user","message":{"content":"Hello"}}\n' + s3.put_object(Bucket="test-bucket", Key="sessions/test.jsonl", Body=content) + + # Fetch the file + temp_file = fetch_s3_to_tempfile("s3://test-bucket/sessions/test.jsonl") + + assert temp_file.exists() + assert temp_file.suffix == ".jsonl" + assert temp_file.read_text() == content + + @mock_aws + def test_fetches_json_file(self): + """Test fetching a JSON file from S3.""" + # Set up mock S3 + s3 = boto3.client("s3", region_name="us-east-1") + s3.create_bucket(Bucket="test-bucket") + + # Upload test content + content = json.dumps({"loglines": []}) + s3.put_object(Bucket="test-bucket", Key="session.json", Body=content) + + # Fetch the file + temp_file = fetch_s3_to_tempfile("s3://test-bucket/session.json") + + assert temp_file.exists() + assert temp_file.suffix == ".json" + assert temp_file.read_text() == content + + @mock_aws + def test_raises_on_missing_bucket(self): + """Test that missing bucket raises an error.""" + import click + + with pytest.raises(click.ClickException) as exc_info: + fetch_s3_to_tempfile("s3://nonexistent-bucket/file.jsonl") + + assert "Failed to fetch S3 object" in str(exc_info.value) + + @mock_aws + def test_raises_on_missing_key(self): + """Test that missing key raises an error.""" + import click + + # Set up mock S3 with empty bucket + s3 = boto3.client("s3", region_name="us-east-1") + s3.create_bucket(Bucket="test-bucket") + + with pytest.raises(click.ClickException) as exc_info: + fetch_s3_to_tempfile("s3://test-bucket/nonexistent.jsonl") + + assert "Failed to fetch S3 object" in str(exc_info.value)