#Using LLM to transform the query
from openai import OpenAI
transformation_prompt = """
Divide the following query into two distinct parts: one for spoken content and one for visual content. The spoken content should refer to any narration, dialogue, or verbal explanations and The visual content should refer to any images, videos, or graphical representations. Format the response strictly as:\nSpoken: <spoken_query>\nVisual: <visual_query>\n\nQuery: {query}
"""
# Initialize OpenAI client
client = OpenAI()
def divide_query(query):
# Use the OpenAI client to create a chat completion with a structured prompt
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": transformation_prompt.format(query=query)}
],
)
message = response.choices[0].message.content
divided_query = message.strip().split("\n")
spoken_query = divided_query[0].replace("Spoken:", "").strip()
visual_query = divided_query[1].replace("Visual:", "").strip()
return spoken_query, visual_query
# Test the query
query = "Show me the footage where the narrator talks about the terrestrial planets and Mercury, Venus, Earth are visible on the screen"
spoken_query, visual_query = divide_query(query)
print(f"Spoken Query: {spoken_query}")
print(f"Visual Query: {visual_query}")