Skip to content
Amazon Web Scrapper Project
# import libraries
from bs4 import BeautifulSoup
import requests
import time
import datetime
import smtplib# Connect to a website
URL = 'https://www.amazon.com/dp/B01B3ET8IG/ref=sspa_dk_detail_2?psc=1&pd_rd_i=B01B3ET8IG&pd_rd_w=0uSIj&content-id=amzn1.sym.eb7c1ac5-7c51-4df5-ba34-ca810f1f119a&pf_rd_p=eb7c1ac5-7c51-4df5-ba34-ca810f1f119a&pf_rd_r=7G3F0KCT5SPC3C7SVWGV&pd_rd_wg=daOen&pd_rd_r=9b1280e6-b1c6-4982-812b-daa63e2baafb&s=apparel&sp_csd=d2lkZ2V0TmFtZT1zcF9kZXRhaWw'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
page = requests.get(URL, headers=headers)
soup1= BeautifulSoup(page.content, 'html.parser')
soup2= BeautifulSoup(soup1.prettify(), 'html.parser')
title = soup2.find(id='productTitle').get_text()
print(title)
price = soup2.find('span', {'class':'a-price'}).find('span').text
print(price)# Clean up the data a little bit
price = price.strip()[1:]
title = title.strip()
print(title)
print(price)type(price)# Create a timestamp for the output to track when the data was collected
import datetime
today = datetime.date.today()
print(today)import csv
header = ['Title', 'Price', 'Date']
data = [title, price, today]
with open('AmazonWebScrapperDataset.csv', 'w', newline = '', encoding='UTF8') as f:
writer = csv.writer(f)
writer.writerow(header)
writer.writerow(data)
import pandas as pd
df = pd.read_csv(r"C:\Users\sylvi\AmazonWebScrapperDataset.csv")
print(df)# Appending data to the csv
with open('AmazonWebScrapperDataset.csv', 'a+', newline='', encoding = 'UTF8') as f:
writer = csv.writer(f)
writer.writerow(data)
# Combine all of the above code into one function to automate the process
def check_price():
URL = 'https://www.amazon.com/dp/B01B3ET8IG/ref=sspa_dk_detail_2?psc=1&pd_rd_i=B01B3ET8IG&pd_rd_w=0uSIj&content-id=amzn1.sym.eb7c1ac5-7c51-4df5-ba34-ca810f1f119a&pf_rd_p=eb7c1ac5-7c51-4df5-ba34-ca810f1f119a&pf_rd_r=7G3F0KCT5SPC3C7SVWGV&pd_rd_wg=daOen&pd_rd_r=9b1280e6-b1c6-4982-812b-daa63e2baafb&s=apparel&sp_csd=d2lkZ2V0TmFtZT1zcF9kZXRhaWw'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
page = requests.get(URL, headers=headers)
soup1= BeautifulSoup(page.content, 'html.parser')
soup2= BeautifulSoup(soup1.prettify(), 'html.parser')
title = soup2.find(id='productTitle').get_text()
price = soup2.find('span', {'class':'a-price'}).find('span').text
price = price.strip()[1:]
title = title.strip()
import datetime
today = datetime.date.today()
import csv
header = ['Title', 'Price', 'Date']
data = [title, price, today]
with open('AmazonWebScrapperDataset.csv', 'a+', newline='', encoding = 'UTF8') as f:
writer = csv.writer(f)
writer.writerow(data)
# Runs check_price after a set time (per day) and inputs data into your CSV
while(True):
check_price()
time.sleep(86400)import pandas as pd
df = pd.read_csv(r"C:\Users\sylvi\AmazonWebScrapperDataset.csv")
print(df)