Skip to content
InternProve_DS_1
INTERNPROVE_DS_TASK1
# Load necessary library
library(tidyverse)
library(dplyr)
library(readr)
library(ggplot2)
# Set the working directory to the location of the CSV file
setwd("/work/files/workspace")
# Load the twitter_training.csv file
twitter_training <- read_csv("twitter_training.csv")
# Display the first few rows of the dataset
head(twitter_training)
# Clean and preprocess the data: Replace "Irrelevant" with "Neutral" in the sentiment column
twitter_cleaned <- twitter_training %>%
mutate(sentiment = ifelse(sentiment == "Irrelevant", "Neutral", sentiment))
# view the cleaned data
twitter_cleaned
# Sentiment analysis
twitter_sentiment <- twitter_cleaned %>%
group_by(entity, sentiment) %>%
count() %>%
arrange(desc(n)) # Sort by count in descending order
# view result
twitter_sentiment
# Visualize sentiment patterns.
# Bar plot showing the sentiment distribution
ggplot(twitter_sentiment, aes(x = n, y = entity, fill = sentiment)) +
geom_bar(stat = "identity") +
labs(title = "Sentiment Distribution", x = "Count", y = "Entity") +
theme_minimal()