Skip to content
twitter scraping with snscrape
!pip install snscrape
!pip install hvplot panel
!pip install wordcloud
import pandas as pd
import snscrape.modules.twitter as sntwitter
import datetime as dt
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import seaborn as sns
import panel as pn
import hvplot.pandas
import hvplot as hv
pn.extension('tabular',sizing_mode='sketch_width')
hv.extension('bokeh')
# defining color palette
PAL= ['r','y'.'g']
pn.Row(pn.layout.HSpacer(height=50,background=PAL[0]))
list1 = []
for tweet in sntwitter.TwitterHashtagScraper('DataFestAfrica22').get_items():
list1.append([tweet.date, tweet.id,tweet.retweetedTweet, tweet.content, tweet.user.username, tweet.replyCount, tweet.likeCount,tweet.retweetCount,tweet.source])
column_names= ['tweet_datetime', 'tweet_id','retweeted_tweet_id', 'tweet_content', 'username', 'numb_reply',
'numb_likes','numb_retweet','tweet_source']
tweetdf = pd.DataFrame(list1, columns=column_names)
tweetdf['tweet_date']=tweetdf.tweet_datetime.dt.date
print(tweetdf.shape)
print(tweetdf.info())
tweetdf.head()
tweetdf.to_csv('datafest22_tweeter_data')
tweetdf.tweet_date.value_counts().plot();
plt.xticks(rotation=90);
tweetdf.username.value_counts()[0:10].plot(kind='bar')
tweetdf.groupby('username')[['numb_likes','numb_reply']].sum().sort_values(by=['numb_likes','numb_reply'],ascending=False)[0:10].plot(kind='bar')
top10_tweeters =tweetdf.username.value_counts()[0:10].index
top10_df= tweetdf[tweetdf.username.isin(top10_tweeters)]
data=top10_df.groupby(['tweet_date','username']).count()['tweet_id'].reset_index()
order=tweetdf.username.value_counts()[0:10].index
g=sns.relplot(data=data,x='tweet_date',y='tweet_id',col='username',col_wrap=2,kind='line',col_order=order)
g.set(ylabel='number of tweets ',xlabel='date');
plt.xticks(rotation=90)
g.fig.suptitle('changes in number of tweets with date for top 10 tweeters ',x=.5,y=1);
wordlist= " ".join(content.replace('https','') for content in tweetdf.tweet_content)
word_cloud = WordCloud(collocations = False, background_color = 'black').generate(wordlist)
# Display the generated Word Cloud
plt.imshow(word_cloud, interpolation='bilinear')
plt.axis("off")
plt.show()
!pip install snscrape
import os
import datetime
import time
import pandas as pd
import snscrape.modules.twitter as sntwitter
import csv
# Define the hashtag and date range
hashtag = "#datafam"
start_date = datetime.date.today() - datetime.timedelta(days=1)
end_date = datetime.date.today()
# Define the output file name
output_file = f"{hashtag}_{start_date}_{end_date}.csv"
# Define the search query
query = f"{hashtag} since:{start_date} until:{end_date}"
# Scrape the tweets and save to a CSV file
with open(output_file, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["id", "date", "content", "username", "url"])
for tweet in sntwitter.TwitterSearchScraper(query).get_items():
writer.writerow([tweet.id, tweet.date, tweet.content, tweet.user.username, tweet.url])
# Wait for 24 hours before running the script again
time.sleep(86400)