Skip to content
pip install --upgrade openai -q
import os
from openai import OpenAI
client = OpenAI(
api_key=os.environ['OPENAI_API_KEY'],
)
Dataset
training_data = [
{
"prompt": "What is the capital of France?->",
"completion": """ The capital of France is Paris.\n"""
},
{
"prompt": "What is the primary function of the heart?->",
"completion": """ The primary function of the heart is to pump blood throughout the body.\n"""
},
{
"prompt": "What is photosynthesis?->",
"completion": """ Photosynthesis is the process by which green plants and some other organisms convert sunlight into chemical energy stored in the form of glucose.\n"""
},
{
"prompt": "Who wrote the play 'Romeo and Juliet'?->",
"completion": """ William Shakespeare wrote the play 'Romeo and Juliet'.\n"""
},
{
"prompt": "Which element has the atomic number 1?->",
"completion": """ Hydrogen has the atomic number 1.\n"""
},
{
"prompt": "What is the largest planet in our solar system?->",
"completion": """ Jupiter is the largest planet in our solar system.\n"""
},
{
"prompt": "What is the freezing point of water in Celsius?->",
"completion": """ The freezing point of water in Celsius is 0 degrees.\n"""
},
{
"prompt": "What is the square root of 144?->",
"completion": """ The square root of 144 is 12.\n"""
},
{
"prompt": "Who is the author of 'To Kill a Mockingbird'?->",
"completion": """ The author of 'To Kill a Mockingbird' is Harper Lee.\n"""
},
{
"prompt": "What is the smallest unit of life?->",
"completion": """ The smallest unit of life is the cell.\n"""
}
]
validation_data = [
{
"prompt": "Which gas do plants use for photosynthesis?->",
"completion": """ Plants use carbon dioxide for photosynthesis.\n"""
},
{
"prompt": "What are the three primary colors of light?->",
"completion": """ The three primary colors of light are red, green, and blue.\n"""
},
{
"prompt": "Who discovered penicillin?->",
"completion": """ Sir Alexander Fleming discovered penicillin.\n"""
},
{
"prompt": "What is the chemical formula for water?->",
"completion": """ The chemical formula for water is H2O.\n"""
},
{
"prompt": "What is the largest country by land area?->",
"completion": """ Russia is the largest country by land area.\n"""
},
{
"prompt": "What is the speed of light in a vacuum?->",
"completion": """ The speed of light in a vacuum is approximately 299,792 kilometers per second.\n"""
},
{
"prompt": "What is the currency of Japan?->",
"completion": """ The currency of Japan is the Japanese Yen.\n"""
},
{
"prompt": "What is the smallest bone in the human body?->",
"completion": """ The stapes, located in the middle ear, is the smallest bone in the human body.\n"""
}
]
Saving the Dataset
import json
training_file_name = "training_data.jsonl"
validation_file_name = "validation_data.jsonl"
def prepare_data(dictionary_data, final_file_name):
with open(final_file_name, 'w') as outfile:
for entry in dictionary_data:
json.dump(entry, outfile)
outfile.write('\n')
prepare_data(training_data, "training_data.jsonl")
prepare_data(validation_data, "validation_data.jsonl")
Uploading the Dataset
training_file_id = client.files.create(
file=open(training_file_name, "rb"),
purpose="fine-tune"
)
validation_file_id = client.files.create(
file=open(validation_file_name, "rb"),
purpose="fine-tune"
)
print(f"Training File ID: {training_file_id}")
print(f"Validation File ID: {validation_file_id}")
training_file_id.id
Finetuning
response = client.fine_tuning.jobs.create(
training_file=training_file_id.id,
validation_file=validation_file_id.id,
model="davinci-002",
hyperparameters={
"n_epochs": 15,
"batch_size": 3,
"learning_rate_multiplier": 0.3
}
)
job_id = response.id
status = response.status
print(f'Fine-tunning model with jobID: {job_id}.')
print(f"Training Response: {response}")
print(f"Training Status: {status}")
Monitoring the Jobs
import signal
import datetime
def signal_handler(sig, frame):
status = client.fine_tuning.jobs.retrieve(job_id).status
print(f"Stream interrupted. Job is still {status}.")
return
print(f"Streaming events for the fine-tuning job: {job_id}")
signal.signal(signal.SIGINT, signal_handler)
events = client.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id)
try:
for event in events:
print(
f'{datetime.datetime.fromtimestamp(event.created_at)} {event.message}'
)
except Exception:
print("Stream interrupted (client disconnected).")
import time
status = client.fine_tuning.jobs.retrieve(job_id).status
if status not in ["succeeded", "failed"]:
print(f"Job not in terminal status: {status}. Waiting.")
while status not in ["succeeded", "failed"]:
time.sleep(2)
status = client.fine_tuning.jobs.retrieve(job_id).status
print(f"Status: {status}")
else:
print(f"Finetune job {job_id} finished with status: {status}")
print("Checking other finetune jobs in the subscription.")
result = client.fine_tuning.jobs.list()
print(f"Found {len(result.data)} finetune jobs.")