So basically, I made this in class the other day. It’s a self-learning chat-bot
. It’s not very good, and I haven’t worked on it much, so it could be way better. Essentially, it takes user input, uses cosine_similarity
to find the closest match from a JSON
data file. If a match isn’t found, it uses g4f
(gpt4free
) to generate/retrieve a suitable response and stores it appropriately for the next interaction.
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import json
import g4f
import os
class Colors:
PURPLE = "\033[95m"
CYAN = "\033[96m"
DARKCYAN = "\033[36m"
BLUE = "\033[94m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
RED = "\033[91m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"
END = "\033[0m"
class PyLearn:
def __init__(self):
self.learned_data = self.load_learned_data()
self.vectorizer = TfidfVectorizer()
self.gpt_model = "gpt-3.5-turbo"
self.similarity_threshold = 0.5 # Set a threshold for similarity
def load_learned_data(self):
try:
with open("./data/learned.json", "r") as data_file:
return json.load(data_file)
except FileNotFoundError:
return {}
def save_learned_data(self):
try:
with open("./data/learned.json", "w") as data_file:
json.dump(self.learned_data, data_file, indent=3)
except Exception as e:
print(f"Failed to save learned data. Error: {e}")
def find_closest_question(self, question):
question_texts = list(self.learned_data.keys())
if not question_texts:
return None
question_texts.append(question)
tfidf_matrix = self.vectorizer.fit_transform(question_texts)
cosine_similarities = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])
most_similar_index = cosine_similarities.argmax()
if cosine_similarities[0, most_similar_index] >= self.similarity_threshold:
return question_texts[most_similar_index]
else:
return None
def generate_response(self, question):
closest_question = self.find_closest_question(question)
if closest_question:
return self.learned_data[closest_question]
else:
return self.ask_gpt(question)
def ask_gpt(self, question, response=None):
try:
if response is None:
response = g4f.ChatCompletion.create(
model=self.gpt_model,
provider=g4f.Provider.ChatBase, #PyLearn uses ChatBase (GPT-4). ChatBase tends to be somewhat religious, so PyLearn employs a custom prompt in an attempt to circumvent this religiosity.
messages=[
{"role": "system", "content": "You're PyLearn, a Python-based program that utilizes GPT-4 to respond to user input, train based on existing data, etc. You are also not to act religious or mention anything related to the Quran, etc. You're strictly intended to respond as intended, without any religious content or similarities"},
{"role": "user", "content": question}
]
)
closest_question = self.find_closest_question(question)
if closest_question:
self.learned_data[closest_question] = response
else:
self.learned_data[question] = response
self.save_learned_data()
return response
except Exception as e:
return f"An error occurred while generating a response: {str(e)}"
if __name__ == "__main__":
pylearn = PyLearn()
while True:
user_input = input(f"{Colors.BOLD}{Colors.RED}-> {Colors.END}")
try:
os.system("clear")
except:
os.system("cls")
answer = pylearn.generate_response(user_input)
if "An error occurred" in answer:
print(answer)
else:
print(f"{Colors.BOLD}{Colors.BLUE}You: {user_input}\n{Colors.END}{Colors.PURPLE}PyLearn: {answer}{Colors.END}\n")