Course Notes: Regular Expressions in Python

# Import any packages you want to use here
import numpy as np

movie = 'fox and kelley soon become bitter rivals because the new fox books store is opening up right across the block from the small business .'

"""
Knowing how to manipulate strings will help you perform many data science tasks faster and easier.
"""
# Find characters in movie variable
length_string = len(movie)

# Convert to string
to_string = str(length_string)

# Predefined variable
statement = "Number of characters in this review:"

# Concatenate strings and print result
print(statement, to_string)

movie1 = 'the most significant tension of _election_ is the potential relationship between a teacher and his student .'
movie2 = 'the most significant tension of _rushmore_ is the potential relationship between a teacher and his student .'

# Select the first 32 characters of movie1
first_part = movie1[:32]

# Select from 43rd character to the end of movie1
last_part = movie1[42:]

# Select from 33rd to the 42nd character
middle_part = movie2[32:42]

# Print concatenation and movie2 variable
print(first_part+middle_part+last_part) 
print(movie2)

movie_with_palindrome_title = 'oh my God! desserts I stressed was an ugly movie'
# Get the word
movie_title = movie_with_palindrome_title[11:30]

# Obtain the palindrome
palindrome = movie_title[::-1]

# Print the word if it's a palindrome
if movie_title == palindrome:
	print(movie_title)

movie_title_3 = '$I supposed that coming from MTV Films I should expect no less$'
# Convert to lowercase and print the result
print(movie_title_3.lower())

# Remove specified character and print the result
print(movie_title_3.rsplit(sep=" ", maxsplit=2))

# Remove specified character and print the result
print(movie_title_3.strip("$"))

# Split the string into substrings and print the result
print(movie_title_3.strip("$").split(sep=" ", maxsplit=100))

splitter = movie_title_3.strip("$").split(sep=" ", maxsplit=100)

# for row in splitter:
#     print(row[::-1])

# Select root word and print the result
print(splitter[1][::-2])

movie = 'the film,however,is all good<\\i>'

# Remove tags happening at the end and print results
movie_tag = movie.splitlines()
print(movie_tag)

# Split the string using commas and print results
movie_no_comma = movie.split(sep=',', maxsplit=2)
print(movie_no_comma)

# Join back together and print results
movie_join = " ".join(movie_no_comma)
print(movie_join)

"""
The difference between split() and splitlines is that splitlines() breaks a string by line boundaries while split() uses the separating element to break a string into pieces.
"""
file = 'mtv films election, a high school comedy, is a current example\nfrom there, director steven spielberg wastes no time, taking us into the water on a midnight swim'

# Split string at line boundaries
file_split = file.splitlines()

# Print file_split
print(file_split)

# Complete for-loop to split by commas
for substring in file_split:
    substring_split = substring.split(',', maxsplit=2)
    print(substring_split)

import pandas as pd

data = {
    'text': [
        "it's clear that he's passionate about his beliefs...",
        "I believe you I always said that the actor act...",
        "it's astonishing how frightening the actor act..."
    ]
}

movies = pd.DataFrame(data, index=[200, 201, 202])
for movie in movies:
  	# If actor is not found between character 37 and 41 inclusive
    # Print word not found
    if movie.find("actor", 37, 42) == -1:
        print("Word not found")
    # Count occurrences and replace two with one
    elif movie.count("actor") == 2:  
        print(movie.replace("actor actor", "actor"))
    else:
        # Replace three occurrences with one
        print(movie.replace("actor actor actor", "actor"))

for movie in movies:
  # Find the first occurrence of word
  print(movie.find('money', 12, 51))

for movie in movies:
  try:
    # Find the first occurrence of word
  	print(movie.index('money', 12, 51))
  except ValueError:
    print("substring not found")

movie_title = "the rest of the story isn't important because all it does is serve as a mere backdrop for the two stars to share the screen ."

# Replace negations 
movies_no_negation = movie_title.replace("isn't", "is")

# Replace important
movies_antonym = movies_no_negation.replace("important", "insignificant")

# Print out
print(movies_antonym)

wikipedia_article = 'In computer science, artificial intelligence (AI), sometimes called machine intelligence, is intelligence demonstrated by machines, in contrast to the natural intelligence displayed by humans and animals.'
my_list = []

# Assign the substrings to the variables
first_pos = wikipedia_article[3:19].lower()
second_pos = wikipedia_article[21:44].lower()

# Define string with placeholders 
my_list.append("The tool {} is used in {}")

# Define string with rearranged placeholders
my_list.append("The tool {1} is used in {0}")

# Use format to print strings
for my_string in my_list:
  	print(my_string.format(first_pos, second_pos))

print(my_list)

courses = ['artificial intelligence', 'neural networks']

# Create a dictionary
plan = {"field": courses[0], "tool": courses[1]}
print(type(plan))

‌
‌
‌