-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Expand file tree
/
Copy pathnotice_extraction.py
More file actions
106 lines (96 loc) · 3.34 KB
/
notice_extraction.py
File metadata and controls
106 lines (96 loc) · 3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from datetime import date, datetime
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field, computed_field
class NoticeEmailExtract(BaseModel):
date_of_notice_str: str | None = Field(
default=None,
exclude=True,
repr=False,
description="""The date of the notice (if any) reformatted
to match YYYY-mm-dd""",
)
entity_name: str | None = Field(
default=None,
description="""The name of the entity sending the notice (if present
in the message)""",
)
entity_phone: str | None = Field(
default=None,
description="""The phone number of the entity sending the notice
(if present in the message)""",
)
entity_email: str | None = Field(
default=None,
description="""The email of the entity sending the notice
(if present in the message)""",
)
project_id: int | None = Field(
default=None,
description="""The project ID (if present in the message) -
must be an integer""",
)
site_location: str | None = Field(
default=None,
description="""The site location of the project (if present
in the message). Use the full address if possible.""",
)
violation_type: str | None = Field(
default=None,
description="""The type of violation (if present in the
message)""",
)
required_changes: str | None = Field(
default=None,
description="""The required changes specified by the entity
(if present in the message)""",
)
compliance_deadline_str: str | None = Field(
default=None,
exclude=True,
repr=False,
description="""The date that the company must comply (if any)
reformatted to match YYYY-mm-dd""",
)
max_potential_fine: float | None = Field(
default=None,
description="""The maximum potential fine
(if any)""",
)
@staticmethod
def _convert_string_to_date(date: str | None) -> date | None:
try:
return datetime.strptime(date, "%Y-%m-%d").date()
except Exception as e:
print(e)
return None
@computed_field
@property
def date_of_notice(self) -> date | None:
return self._convert_string_to_date(self.date_of_notice_str)
@computed_field
@property
def compliance_deadline(self) -> date | None:
return self._convert_string_to_date(self.compliance_deadline_str)
info_parse_prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""
Parse the date of notice, sending entity name, sending entity
phone, sending entity email, project id, site location, violation
type, required changes, compliance deadline, and maximum potential
fine from the message. If any of the fields aren't present, don't
populate them. Try to cast dates into the YYYY-mm-dd format. Don't
populate fields if they're not present in the message.
Here's the notice message:
{message}
""",
)
]
)
notice_parser_model = ChatOpenAI(model="gpt-4o-mini", temperature=0)
NOTICE_PARSER_CHAIN = (
info_parse_prompt
| notice_parser_model.with_structured_output(NoticeEmailExtract)
)