Skip to content
Extract Mentions and Hashtags from Tweets
Extract mentions and hashtags from tweets
Extract hashtags and tweets from your twitter data with regex. This is a helpful first step in topic identification.
# Load packages
import pandas as pd
import nltk
from nltk.tokenize import regexp_tokenize
import pandas as pd
# Upload your data as a csv or json file and load it as a data frame
import pandas as pd
try:
df = pd.read_csv('twitter.csv')
except:
df = pd.read_json('twitter.json', orient='index').T
df.head()
# Define a regex pattern to find hashtags: pattern1
pattern1 = r"#\w+"
# Write a pattern that matches both mentions (@) and hashtags
pattern2 = r"(@\w+)"
hashtags = []
mentions = []
for tweet in df['text']:
hashtags.append(nltk.tokenize.regexp_tokenize(tweet, pattern1))
mentions.append(regexp_tokenize(tweet, pattern2))
df['mentions'] = mentions
df['hashtags'] = hashtags
df.head()