Skip to content

INTERNPROVE_DS_TASK1

# Load necessary library
library(tidyverse)
library(dplyr)
library(readr)
library(ggplot2)

# Set the working directory to the location of the CSV file
setwd("/work/files/workspace")

# Load the twitter_training.csv file
twitter_training <- read_csv("twitter_training.csv")

# Display the first few rows of the dataset
head(twitter_training)

# Clean and preprocess the data: Replace "Irrelevant" with "Neutral" in the sentiment column
twitter_cleaned <- twitter_training %>%
  mutate(sentiment = ifelse(sentiment == "Irrelevant", "Neutral", sentiment))

# view the cleaned data
twitter_cleaned

# Sentiment analysis
twitter_sentiment <- twitter_cleaned %>%
  group_by(entity, sentiment) %>%
  count() %>%
  arrange(desc(n))  # Sort by count in descending order


# view result
twitter_sentiment

# Visualize sentiment patterns.
# Bar plot showing the sentiment distribution
ggplot(twitter_sentiment, aes(x = n, y = entity, fill = sentiment)) +
  geom_bar(stat = "identity") +
  labs(title = "Sentiment Distribution", x = "Count", y = "Entity") +
  theme_minimal()