-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapp.py
More file actions
137 lines (110 loc) · 4.29 KB
/
Copy pathapp.py
File metadata and controls
137 lines (110 loc) · 4.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os
import markdown
import itertools
from glob import iglob
# Web app
import streamlit as st
from dotenv import load_dotenv
# File scraping
from bs4 import BeautifulSoup
# Langchain
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
# HTML
from htmlTemplates import css, bot_template, user_template
from enum import Enum
class SDK(Enum):
Android = "Android"
Angular = "Angular"
Flutter = "Flutter"
iOS = "iOS"
React = "React"
ReactNative = "reactnative"
def handle_question(question):
response = st.session_state.conversation_bot({"question": question})
st.session_state.chat_history = response["chat_history"]
print("Chat history:", st.session_state.chat_history)
print("Response:", response)
for i, message in enumerate(reversed(st.session_state.chat_history)):
if message.type == "ai":
st.write(
bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True
)
else:
st.write(
user_template.replace("{{MSG}}", message.content),
unsafe_allow_html=True,
)
def create_conversation_bot(vector_store):
print("Creating conversation bot")
llm = ChatOpenAI()
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
conversation_bot = ConversationalRetrievalChain.from_llm(
llm=llm, retriever=vector_store.as_retriever(), memory=memory
)
return conversation_bot
def put_data_into_db(text_chunks):
# TODO: use HF embeddings for free
print("Putting data into DB")
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vector_store
def load_text_from_docs(sdk):
rootdir = f"./data/{sdk.value}/**/*"
print("Searching for files in", rootdir)
file_types = ["md", "mdx"]
ignore_file_types = ["DS_Store", "json", "png", "jpg", "svg", "ico", "jpeg", "gif"]
files = [f for f in iglob(rootdir, recursive=True) if os.path.isfile(f)]
filtered_files = list(filter(lambda x: x.split(".")[-1] in file_types, files))
print(f"Found {len(filtered_files)} relevant files.")
text_elements = []
for file in filtered_files:
print("Processing file:", file)
with open(file, "r") as f:
html = markdown.markdown(f.read())
elements = list(BeautifulSoup(html, "html.parser").find_all())
headlines = ["h1", "h2", "h3", "h4", "h5"]
lists = [
list(group)
for k, group in itertools.groupby(
elements, lambda x: x.name in headlines
)
if not k
]
# TODOS:
# - Lists are doubled
# - Headlines are omitted - should be part of the text
# - Some elements are still showing up (e.g. :::note)
for x in lists:
text_elements.append(" ".join([el.get_text() for el in x]))
return text_elements
def main():
load_dotenv()
print("Main is executed")
# setup streamlit app
st.set_page_config(page_title="Chat with Stream docs", page_icon=":boat:")
st.write(css, unsafe_allow_html=True)
st.header("Chat with Stream docs :boat:")
if "conversation_bot" not in st.session_state:
print("Conversation bot not found")
with st.spinner("Loading data..."):
# load docs data into texts
texts = load_text_from_docs(SDK.iOS)
print(f"Found {len(texts)} chunks of texts.")
# load embeddings
vector_store = put_data_into_db(texts)
# create conversation bot
conversation_bot = create_conversation_bot(vector_store)
st.session_state.conversation_bot = conversation_bot
# Add chat history to session state if not present
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
question = st.text_input("Ask a question to our documentation:")
if question:
with st.spinner("Thinking..."):
handle_question(question)
if __name__ == "__main__":
main()