Answer Correctness
Last updated
Last updated
import logging
import sys
from dotenv import find_dotenv, load_dotenv
from dynamiq.evaluations.metrics import AnswerCorrectnessEvaluator
from dynamiq.nodes.llms import OpenAI
# Load environment variables for the OpenAI API
load_dotenv(find_dotenv())
# Configure logging level
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# Initialize the OpenAI language model
llm = OpenAI(model="gpt-4o-mini")
# Sample data
questions = [
"What powers the sun and what is its primary function?",
"What is the boiling point of water?",
]
answers = [
(
"The sun is powered by nuclear fission, similar to nuclear reactors on Earth."
" Its primary function is to provide light to the solar system."
),
"The boiling point of water is 100 degrees Celsius at sea level.",
]
ground_truth_answers = [
(
"The sun is powered by nuclear fusion, where hydrogen atoms fuse to form helium."
" This fusion process releases a tremendous amount of energy. The sun provides"
" heat and light, which are essential for life on Earth."
),
(
"The boiling point of water is 100 degrees Celsius (212 degrees Fahrenheit) at"
" sea level. The boiling point can change with altitude."
),
]
# Initialize evaluator
evaluator = AnswerCorrectnessEvaluator(llm=llm)
# Evaluate
correctness_scores = evaluator.run(
questions=questions,
answers=answers,
ground_truth_answers=ground_truth_answers,
verbose=False, # Set verbose=True to enable logging
)
# Print the results
for idx, score in enumerate(correctness_scores):
print(f"Question: {questions[idx]}")
print(f"Answer Correctness Score: {score}")
print("-" * 50)
print("Answer Correctness Scores:")
print(correctness_scores)