Skip to content
Course Notes: Regular Expressions in Python
  • AI Chat
  • Code
  • Report
  • # Import any packages you want to use here
    import numpy as np
    movie = 'fox and kelley soon become bitter rivals because the new fox books store is opening up right across the block from the small business .'
    """
    Knowing how to manipulate strings will help you perform many data science tasks faster and easier.
    """
    # Find characters in movie variable
    length_string = len(movie)
    
    # Convert to string
    to_string = str(length_string)
    
    # Predefined variable
    statement = "Number of characters in this review:"
    
    # Concatenate strings and print result
    print(statement, to_string)
    movie1 = 'the most significant tension of _election_ is the potential relationship between a teacher and his student .'
    movie2 = 'the most significant tension of _rushmore_ is the potential relationship between a teacher and his student .'
    # Select the first 32 characters of movie1
    first_part = movie1[:32]
    
    # Select from 43rd character to the end of movie1
    last_part = movie1[42:]
    
    # Select from 33rd to the 42nd character
    middle_part = movie2[32:42]
    
    # Print concatenation and movie2 variable
    print(first_part+middle_part+last_part) 
    print(movie2)
    movie_with_palindrome_title = 'oh my God! desserts I stressed was an ugly movie'
    # Get the word
    movie_title = movie_with_palindrome_title[11:30]
    
    # Obtain the palindrome
    palindrome = movie_title[::-1]
    
    # Print the word if it's a palindrome
    if movie_title == palindrome:
    	print(movie_title)
    movie_title_3 = '$I supposed that coming from MTV Films I should expect no less$'
    # Convert to lowercase and print the result
    print(movie_title_3.lower())
    
    # Remove specified character and print the result
    print(movie_title_3.rsplit(sep=" ", maxsplit=2))
    
    # Remove specified character and print the result
    print(movie_title_3.strip("$"))
    
    # Split the string into substrings and print the result
    print(movie_title_3.strip("$").split(sep=" ", maxsplit=100))
    
    splitter = movie_title_3.strip("$").split(sep=" ", maxsplit=100)
    
    # for row in splitter:
    #     print(row[::-1])
    
    # Select root word and print the result
    print(splitter[1][::-2])
    movie = 'the film,however,is all good<\\i>'
    
    # Remove tags happening at the end and print results
    movie_tag = movie.splitlines()
    print(movie_tag)
    
    # Split the string using commas and print results
    movie_no_comma = movie.split(sep=',', maxsplit=2)
    print(movie_no_comma)
    
    # Join back together and print results
    movie_join = " ".join(movie_no_comma)
    print(movie_join)
    """
    The difference between split() and splitlines is that splitlines() breaks a string by line boundaries while split() uses the separating element to break a string into pieces.
    """
    file = 'mtv films election, a high school comedy, is a current example\nfrom there, director steven spielberg wastes no time, taking us into the water on a midnight swim'
    
    # Split string at line boundaries
    file_split = file.splitlines()
    
    # Print file_split
    print(file_split)
    
    # Complete for-loop to split by commas
    for substring in file_split:
        substring_split = substring.split(',', maxsplit=2)
        print(substring_split)
    import pandas as pd
    
    data = {
        'text': [
            "it's clear that he's passionate about his beliefs...",
            "I believe you I always said that the actor act...",
            "it's astonishing how frightening the actor act..."
        ]
    }
    
    movies = pd.DataFrame(data, index=[200, 201, 202])
    for movie in movies:
      	# If actor is not found between character 37 and 41 inclusive
        # Print word not found
        if movie.find("actor", 37, 42) == -1:
            print("Word not found")
        # Count occurrences and replace two with one
        elif movie.count("actor") == 2:  
            print(movie.replace("actor actor", "actor"))
        else:
            # Replace three occurrences with one
            print(movie.replace("actor actor actor", "actor"))
    
    for movie in movies:
      # Find the first occurrence of word
      print(movie.find('money', 12, 51))
    for movie in movies:
      try:
        # Find the first occurrence of word
      	print(movie.index('money', 12, 51))
      except ValueError:
        print("substring not found")
    movie_title = "the rest of the story isn't important because all it does is serve as a mere backdrop for the two stars to share the screen ."
    
    # Replace negations 
    movies_no_negation = movie_title.replace("isn't", "is")
    
    # Replace important
    movies_antonym = movies_no_negation.replace("important", "insignificant")
    
    # Print out
    print(movies_antonym)
    
    wikipedia_article = 'In computer science, artificial intelligence (AI), sometimes called machine intelligence, is intelligence demonstrated by machines, in contrast to the natural intelligence displayed by humans and animals.'
    my_list = []
    
    # Assign the substrings to the variables
    first_pos = wikipedia_article[3:19].lower()
    second_pos = wikipedia_article[21:44].lower()
    
    # Define string with placeholders 
    my_list.append("The tool {} is used in {}")
    
    # Define string with rearranged placeholders
    my_list.append("The tool {1} is used in {0}")
    
    # Use format to print strings
    for my_string in my_list:
      	print(my_string.format(first_pos, second_pos))
    
    print(my_list)
    courses = ['artificial intelligence', 'neural networks']
    
    # Create a dictionary
    plan = {"field": courses[0], "tool": courses[1]}
    print(type(plan))