from pyhere import here
import sys
import os
import pandas as pd
'../..')) # adding the absolute path
sys.path.append(os.path.abspath(
from openai import OpenAI
= OpenAI() client
6 Custom Memory
6.1 Text Only Memory
6.1.0.1 Fn: Helper _get_messages_user_assistant()
from typing import List
import pandas as pd
def _get_messages_user_assistant_text(mem_df: pd.DataFrame,
= "user",
user_key = "assistant") -> list[dict]:
assistant_key """Convert user-assistant DataFrame into a list of message dictionaries."""
= []
messages for i in range(len(mem_df)):
if mem_df[user_key][i]: # Check if 'user' cell is not empty
"role": user_key, "content": mem_df[user_key][i]})
messages.append({if mem_df[assistant_key][i]: # Check if 'assistant' cell is not empty
"role": assistant_key, "content": mem_df[assistant_key][i]})
messages.append({return messages
# Example usage
= pd.DataFrame({"user": ["Hi", "Nothing"], "assistant": ["How can I help you?", "Bye"]})
mem_df = _get_messages_user_assistant_text(mem_df)
messages print(messages)
[{'role': 'user', 'content': 'Hi'}, {'role': 'assistant', 'content': 'How can I help you?'}, {'role': 'user', 'content': 'Nothing'}, {'role': 'assistant', 'content': 'Bye'}]
= pd.DataFrame({"user": ["Hi", "There"], "assistant": ["How can I...", "Hello"]})
mem_df 1]
mem_df.loc[
1, "assistant"] = "Ha"
mem_df.loc[# _get_messages_user_assistant(mem_df)
Ha
6.1.0.2 Main Fn
from typing import List
import pandas as pd
def get_completions_text_mem_df(prompts: List[str], model = "gpt-4o"):
dict[str, str]]
msg: List[= pd.DataFrame({"user": [], "assistant": []})
mem_df
for i in range(len(prompts)):
# Get user text
= [prompts[i], None]
mem_df.loc[i] # Convert to user-assistance messages
= _get_messages_user_assistant_text(mem_df)
msg # Generate Response
= client.chat.completions.create(
completion =model,
model=[
messages"role": "system", "content": "You are a helpful assistant."},
{*msg
]
)# response_text = "Resp: " + user_texts[i] # For Debug
= completion.choices[0].message.content
response_text # Update memory DF
"assistant"] = response_text
mem_df.loc[i,
return mem_df
"Hi, my name is Kittipos.", "What's my name?"]) get_completions_text_mem_df([
user | assistant | |
---|---|---|
0 | Hi, my name is Kittipos. | Hello, Kittipos! How can I assist you today? |
1 | What's my name? | Your name is Kittipos. How can I help you furt... |
6.2 Vision Memory
6.2.0.1 Helper: _get_messages_user_assistant_text_image()
import pandas as pd
= pd.DataFrame({
mem_vision_df "user_text": ["Convert data from this image to markdown text"] * 2,
"user_image_url": ["url1", "url2"],
"assistant_text": ["out1", "out2"]
}) mem_vision_df
user_text | user_image_url | assistant_text | |
---|---|---|---|
0 | Convert data from this image to markdown text | url1 | out1 |
1 | Convert data from this image to markdown text | url2 | out2 |
def _get_messages_user_assistant_text_image(mem_vision_df: pd.DataFrame, image_detail = "high") -> list[dict]:
"""Convert user (text + image) + assistant DataFrame into a list of message dictionaries."""
dict[str, str | List]] = []
messages: List[
for i in range(len(mem_vision_df)):
= mem_vision_df["user_text"][i]
user_text = mem_vision_df["user_image_url"][i]
user_image_url = mem_vision_df["assistant_text"][i]
assistant_text
if user_text and user_image_url:
"role": "user",
messages.append({"content": [
"type": "text", "text": user_text},
{"type": "image_url", "image_url": {"url": user_image_url, "detail": image_detail}}
{
]})if assistant_text:
"role": "assistant", "content": assistant_text})
messages.append({
return messages
_get_messages_user_assistant_text_image(mem_vision_df)
[{'role': 'user',
'content': [{'type': 'text',
'text': 'Convert data from this image to markdown text'},
{'type': 'image_url', 'image_url': {'url': 'url1', 'detail': 'high'}}]},
{'role': 'assistant', 'content': 'out1'},
{'role': 'user',
'content': [{'type': 'text',
'text': 'Convert data from this image to markdown text'},
{'type': 'image_url', 'image_url': {'url': 'url2', 'detail': 'high'}}]},
{'role': 'assistant', 'content': 'out2'}]
mem_vision_df
user_text | user_image_url | assistant_text | |
---|---|---|---|
0 | Convert data from this image to markdown text | url1 | out1 |
1 | Convert data from this image to markdown text | url2 | out2 |
6.2.0.2 Main Fn
from typing import List
import pandas as pd
def get_completions_vision_mem_df(image_prompt: str,
str | None = None,
image_prompt_next: str] | None = None,
image_urls: List[str] | None = None,
base64_images: List[str = "You are a helpful assistant.",
system_prompt: = "gpt-4o"):
model
dict[str, str | List]] = []
msg: List[= pd.DataFrame({"user_text": [], "user_image_url": [], "assistant_text": []})
mem_vision_df
if all([base64_images, image_urls]):
raise ValueError("Must choose one of: `image_prompt` or `image_urls`")
# If Provided `base64_images`, ignore `image_urls`
if base64_images:
= [f"data:image/png;base64,{base64_image}" for base64_image in base64_images]
image_urls
for i in range(len(image_urls)):
# For non-first iteration, if next image prompt is provided, use it.
if i != 0 and image_prompt_next:
= image_prompt_next
image_prompt
# Add Image prompt and URL to Memory DF
= [image_prompt, image_urls[i], None]
mem_vision_df.loc[i]
# Convert to user-assistance messages
= _get_messages_user_assistant_text_image(mem_vision_df)
msg
# Generate Response
= client.chat.completions.create(
completion =model,
model=[
messages"role": "system", "content": system_prompt},
{*msg
]
)= completion.choices[0].message.content
response_text # response_text = "Resp: " + image_prompt # For Debug
# Update memory DF
"assistant_text"] = response_text
mem_vision_df.loc[i,
return mem_vision_df
6.2.1 Vision Mem Execute
# Image URL
= get_completions_vision_mem_df(image_prompt="Give exactly 3 words that gives impression to this image",
resp_vision_df_1 ="This too",
image_prompt_next= ["https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
image_urls "https://upload.wikimedia.org/wikipedia/commons/f/f0/Hintersee-Hochkalter.jpg"]
) resp_vision_df_1
user_text | user_image_url | assistant_text | |
---|---|---|---|
0 | Give exactly 3 words that gives impression to ... | https://upload.wikimedia.org/wikipedia/commons... | Peaceful, serene, scenic. |
1 | This too | https://upload.wikimedia.org/wikipedia/commons... | Majestic, tranquil, natural. |
import base64
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
= ["../../img/misc/70.png", "../../img/misc/100.png"]
img_paths = [encode_image(img_path) for img_path in img_paths] imgs_enc
= get_completions_vision_mem_df(image_prompt="What is the speed?",
resp_vision_df_2 ="""
system_prompt You are the car speed reporter. Report the current speed and previous speed in the prior image (if provided).
""",
=imgs_enc) base64_images
"assistant_text"].to_list() resp_vision_df_2[
['The current speed is 100 kilometers per hour.',
'The current speed is 110 kilometers per hour. The previous speed was 100 kilometers per hour.']