Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/datapoint/commands/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,13 @@ def survey_status(ctx, job_id, with_results, as_json):
@survey.command("responses")
@click.argument("job_id")
@click.option("--page", type=int, default=1, show_default=True)
@click.option("--per-page", type=int, default=100, show_default=True, help="Responses per page (max 200).")
@click.option(
"--per-page",
type=int,
default=100,
show_default=True,
help="Minimum responses per page; pages keep each annotator's full answer set together.",
)
@click.option("--include-abandoned", is_flag=True, help="Include answers from abandoned chains.")
@click.option("--include-in-progress", is_flag=True, help="Include answers from in-flight chains.")
@json_option
Expand Down
64 changes: 34 additions & 30 deletions src/datapoint/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ def _format_chain_outcome(r: dict) -> str:
return f"chain: {' / '.join(parts)}" if parts else ""


def _format_response_row(r: dict) -> str:
def _format_response_row(r: dict, include_annotator: bool = True) -> str:
annotator = (r.get("annotator_id") or "?")[:8]
timestamp = r.get("timestamp") or "?"
response_text = r.get("response")
Expand All @@ -381,7 +381,8 @@ def _format_response_row(r: dict) -> str:
loc_str = f" — {location}" if location else ""
chain_outcome = _format_chain_outcome(r)
outcome_str = f" [{chain_outcome}]" if chain_outcome else ""
return f"{annotator} @ {timestamp}: {rendered}{rt_str}{loc_str}{outcome_str}"
prefix = f"{annotator} @ {timestamp}" if include_annotator else f"@ {timestamp}"
return f"{prefix}: {rendered}{rt_str}{loc_str}{outcome_str}"


def render_responses(
Expand All @@ -401,9 +402,16 @@ def render_responses(
steps_meta = data.get("steps")
is_chain = bool(steps_meta) or any(r.get("step_index") is not None for r in responses)

# Pages hold whole annotator groups, so they can exceed per_page and the
# page count is not derivable from total/per_page. Older servers don't
# send total_pages; estimate from the row count for them.
total_pages = data.get("total_pages")
if total_pages is None:
total_pages = -(-total // per_page) if per_page > 0 else 1

lines = [
f"Raw responses — job {job_id}",
f"Showing {len(responses)} of {total} total (page {page}, {per_page} per page)",
f"Showing {len(responses)} of {total} total (page {page} of {total_pages})",
]
if include_abandoned:
lines.append("Including answers from abandoned chains.")
Expand Down Expand Up @@ -438,37 +446,33 @@ def render_responses(
if instruction or opts:
lines.append("")

if is_chain:
by_dp_step: dict[int, dict[int, list[dict]]] = {}
for r in responses:
dp = r.get("datapoint_index", -1)
si = r.get("step_index", -1)
by_dp_step.setdefault(dp, {}).setdefault(si, []).append(r)
for dp_idx in sorted(by_dp_step):
steps = by_dp_step[dp_idx]
total_rows = sum(len(rs) for rs in steps.values())
# Rows arrive annotator-major (each page holds whole annotator answer
# sets, in earliest-activity order) — render one block per annotator.
by_annotator: dict[str, list[dict]] = {}
for r in responses:
by_annotator.setdefault(r.get("annotator_id") or "?", []).append(r)

for annotator, items in by_annotator.items():
if is_chain:
datapoints = {r.get("datapoint_index", -1) for r in items}
lines.append(
f"Datapoint {dp_idx} ({_pluralize(total_rows, 'response')} across {_pluralize(len(steps), 'step')}):"
f"Annotator {annotator[:8]} ({_pluralize(len(items), 'answer')} "
f"across {_pluralize(len(datapoints), 'datapoint')}):"
)
for step_idx in sorted(steps):
items = steps[step_idx]
tt = items[0].get("task_type", "?")
lines.append(f" Step {step_idx} [{tt}] — {_pluralize(len(items), 'response')}:")
for r in items:
lines.append(f" - {_format_response_row(r)}")
lines.append("")
else:
by_datapoint: dict[int, list[dict]] = {}
for r in responses:
by_datapoint.setdefault(r.get("datapoint_index", -1), []).append(r)
for idx in sorted(by_datapoint):
items = by_datapoint[idx]
lines.append(f"Datapoint {idx} ({_pluralize(len(items), 'response')}):")
for r in items:
lines.append(f" - {_format_response_row(r)}")
lines.append("")
dp = r.get("datapoint_index", "?")
si = r.get("step_index", "?")
tt = r.get("task_type", "?")
lines.append(
f" - Datapoint {dp}, step {si} [{tt}]: {_format_response_row(r, include_annotator=False)}"
)
else:
lines.append(f"Annotator {annotator[:8]} ({_pluralize(len(items), 'response')}):")
for r in items:
dp = r.get("datapoint_index", "?")
lines.append(f" - Datapoint {dp}: {_format_response_row(r, include_annotator=False)}")
lines.append("")

total_pages = -(-total // per_page) if per_page > 0 else 1
if page < total_pages:
lines.append(f"More responses available — re-run with --page {page + 1}.")
return "\n".join(lines)
Expand Down
44 changes: 44 additions & 0 deletions tests/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,50 @@ def test_render_responses_includes_label():
assert "Logo A" in out


def test_render_responses_groups_by_annotator():
out = f.render_responses(
{
"responses": [
{"annotator_id": "anon_a", "response": "yes", "datapoint_index": 0},
{"annotator_id": "anon_a", "response": "no", "datapoint_index": 1},
{"annotator_id": "anon_b", "response": "yes", "datapoint_index": 0},
],
"total_responses": 3,
"total_pages": 1,
},
job_id="job_1",
page=1,
per_page=100,
)
assert "Annotator anon_a (2 responses):" in out
assert "Annotator anon_b (1 response):" in out
# Blocks preserve the server's annotator order.
assert out.index("anon_a") < out.index("anon_b")
assert "More responses" not in out


def test_render_responses_uses_server_total_pages():
# Pages overshoot per_page under annotator grouping: 8 of 12 rows on page
# 1 of 2. The naive row-count estimate would claim 3 pages.
row = {"annotator_id": "anon_a", "response": "yes", "datapoint_index": 0}
out = f.render_responses(
{"responses": [row] * 8, "total_responses": 12, "total_pages": 2},
job_id="job_1",
page=1,
per_page=5,
)
assert "(page 1 of 2)" in out
assert "--page 2" in out
out_last = f.render_responses(
{"responses": [row] * 4, "total_responses": 12, "total_pages": 2},
job_id="job_1",
page=2,
per_page=5,
)
assert "(page 2 of 2)" in out_last
assert "More responses" not in out_last


def test_render_subscription():
out = f.render_subscription(
{"tier": "pro", "status": "active", "monthly_credits": 1000, "current_period_end": "2026-07-01T00:00:00Z"}
Expand Down
Loading