-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval.py
More file actions
81 lines (64 loc) · 2.5 KB
/
eval.py
File metadata and controls
81 lines (64 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
"""Simple prompt evaluator for the outer loop (manual evolution).
Scores a prompt based on quality signals: tech stack, code quality,
testing, security, documentation, and more. Max score: 100.
"""
import os
import re
from datetime import datetime
def evaluate() -> float:
prompt_path = "prompt.txt"
if not os.path.exists(prompt_path):
print("No prompt.txt found. Create one first.")
return 0.0
raw = open(prompt_path).read()
content = raw.lower()
score = 30.0
# --- Tech stack ---
if "ollama" in content: score += 5
if "local" in content: score += 3
if "langgraph" in content: score += 4
if "react" in content or "react loop" in content: score += 3
if "pydantic" in content: score += 3
if "httpx" in content: score += 2
# --- Quality ---
if "pyproject.toml" in content: score += 4
if "type hint" in content or "type hints" in content: score += 3
if "error handling" in content: score += 2
if "logging" in content: score += 2
if "test" in content or "tests" in content: score += 2
if "async" in content: score += 2
if "streaming" in content or "stream" in content: score += 2
if "retry" in content: score += 2
if "main()" in content or "__main__" in content: score += 2
if "dataclass" in content: score += 2
if "docstring" in content: score += 2
# --- Output ---
if "```" in content: score += 4
if "readme" in content or "README" in raw: score += 3
if "install" in content or "pip install" in content: score += 2
# --- Completeness ---
words = len(content.split())
if words > 100: score += 2
if words > 200: score += 2
if words > 300: score += 2
# --- Security ---
if "auth" in content or "authentication" in content: score += 2
if "api key" in content or "secret" in content: score += 2
# --- Performance ---
if "cache" in content: score += 2
if "parallel" in content or "concurrent" in content: score += 2
# --- Deployment ---
if "docker" in content: score += 2
if "ci" in content or "github action" in content: score += 2
if "makefile" in content: score += 2
score = min(100.0, round(score, 1))
timestamp = datetime.now().isoformat()
with open("results.log", "a") as f:
f.write(f"{timestamp} | Score: {score}/100\n")
print(f"Score: {score}/100")
print(f"Prompt: {len(words)} words, {len(content)} chars")
print(f"Logged to results.log")
return score
if __name__ == "__main__":
evaluate()