Skip to content
# Import pandas
import pandas as pd
# Load CSV into the rides variable
rides = pd.read_csv('capital-onebike.csv',
parse_dates = ['Start date', 'End date'])
# Print the initial (0th) row
print(rides.iloc[0])
trip_durations = []
for trip in onebike_datetimes:
# When the start is later than the end, set the fold to be 1
if trip['start'] > trip['end']:
trip['end'] = tz.enfold(trip['end'])
# Convert to UTC
start = trip['start'].astimezone(tz.UTC)
end = trip['end'].astimezone(tz.UTC)
# Subtract the difference
trip_length_seconds = (end-start).total_seconds()
trip_durations.append(trip_length_seconds)
# Take the shortest trip duration
print("Shortest trip: " + str(min(trip_durations)))
# Loop over trips
for trip in onebike_datetimes:
# Rides with ambiguous start if tz.datetime_ambiguous(trip['start']):
print("Ambiguous start at " + str(trip['start'])) # Rides with ambiguous end
if tz.datetime_ambiguous(trip['end']): print("Ambiguous end at " + str(trip['end']))
eastern = tz.getz ("US/Eastern")
#2017-11-05 01:00:00
first_1am = datetime(2017, 11 5, 1, 0, 0, tzinfo=eastern)
tz.datetime_ambiguous(first_1am)
second_1am = datetime(2017, 11 5, 1, 0, 0, tzinfo=eastern)
second_am = tz.enfold(second_1am)
first_1am = first_1am.astimezone(tz.UTC)
second_1am = second_1am.astimezone(tz.UTC)
(second_1am - first_1am).total_second()
# Import datetime and tz
from datetime import datetime
from dateutil import tz
# Create starting date
dt = datetime(2000, 3, 29, tzinfo = tz.gettz('Europe/London'))
# Loop over the dates, replacing the year, and print the ISO timestamp
for y in range(2000, 2011):
print(dt.replace(year= y).isoformat())
# Import datetime, timedelta, tz, timezone
from datetime import datetime, timedelta, timezone
from dateutil import tz
# Start on March 12, 2017, midnight, then add 6 hours
start = datetime(2017, 3, 12, tzinfo = tz.gettz('America/New_York'))
end = start + timedelta(hours=6)
print(start.isoformat() + " to " + end.isoformat())
# How many hours have elapsed?
print((end - start).total_seconds()/(60*60))
# What if we move to UTC?
print((end.astimezone(timezone.utc) - start.astimezone(timezone.utc))\
.total_seconds()/(60*60))
# Import datetime, timedelta, tz, timezone
from datetime import datetime, timedelta, timezone
from dateutil import tz
# Start on March 12, 2017, midnight, then add 6 hours
start = datetime(2017, 3, 12, tzinfo = tz.gettz('America/New_York'))
end = start + timedelta(hours=6)
print(start.isoformat() + " to " + end.isoformat())
spring_ahead_a59am = spring_ahead_a59am.replace(tzinfo = EST)
spring_ahead_a59am.isoformat()
spring_ahead_3am = spring_ahead_3am.replace(tzinfo = EDT)
spring_ahead_3am.isoformat()
(spring_ahead_3am - spring_ahead_a59am).seconds
from dateutil import tz
eastern = tz.gettz('America/New_York')
spring_ahead_a59am - datetime(2017, 3, 12, 1, 59, 59, tsinfo = eastern)
spring_ahead_3am - datetime(2017, 3, 12, 3, 0, 0, tsinfo = eastern)
# Create the timezone object
sm = tz.gettz('Pacific/Apia')
# Pull out the start of the first trip
local = onebike_datetimes[0]['start']
# What time was it in Samoa?
notlocal = local.astimezone(sm)
# Print them out and see the difference
print(local.isoformat())
print(notlocal.isoformat())
# Import tz
from dateutil import tz
# Create a timezone object for Eastern Time
et = tz.gettz('America/New_York')
# Loop over trips, updating the datetimes to be in Eastern Time
for trip in onebike_datetimes[:10]:
# Update trip['start'] and trip['end']
trip['start'] = trip['start'].replace(tzinfo=et)
trip['end'] = trip['end'].replace(tzinfo=et)
# Loop over the trips for trip in onebike_datetimes[:10]: # Pull out the start dt = trip['start'] # Move dt to be in UTC dt = dt.astimezone(timezone.utc) # Print the start time in UTC print('Original:', trip['start'], '| UTC:', dt.isoformat())
###import
from datetime import datetime
from dateutil import tz
#eaastern time
et = t.gettz("America.new_York") -- ## format: 'Continent/City'
# Import datetime, timedelta, timezone
from datetime import datetime, timedelta, timezone
# Create a timezone for Australian Eastern Daylight Time, or UTC+11
aedt = timezone(timedelta(hours=-8))
# October 1, 2017 at 15:26:26, UTC+11
dt = datetime(2017, 10, 1, 15, 26, 26, tzinfo=aedt)
# Print results
print(dt.isoformat())
# Create a timezone object corresponding to UTC-4
edt = timezone(timedelta(hours=-4))
# Loop over trips, updating the start and end datetimes to be in UTC-4
for trip in onebike_datetimes[:10]:
# Update trip['start'] and trip['end']
trip['start'] = trip['start'].replace(tzinfo=edt)
trip['end'] = trip['end'].replace(tzinfo=edt)
turning dates into strings
print([d.isoformat()]) // to make it ISO 8601 format
format: strftime ("%Y")
print(d.strftime("%Y"))
print(d.strftime(" Year is %Y"))
from datetime import date
# Assign the earliest date to first_date
first_date = min(florida_hurricane_dates)
# Convert to ISO and US formats
iso = "Our earliest hurricane date: " + first_date.isoformat()
us = "Our earliest hurricane date: " + first_date.strftime("%m/%d/%Y")
print("ISO: " + iso)
print("US: " + us)
# Import date
from datetime import date
# Create a date object
andrew = date(1992, 8, 26)
# Print the date in the format 'YYYY-DDD'
print(andrew.strftime('%Y-%j')) // %B for full MONTH, %j day of the year
# Import datetime
from datetime import datetime
# Create a datetime object
dt = datetime(2017, 10, 1, 15, 26, 26)
# Print the results in ISO 8601 format
print(dt.isoformat())
# Import datetime
from datetime import datetime
# Create a datetime object
dt = datetime(2017, 12, 31, 15, 19, 13)
# Replace the year with 1917
dt_old = dt.replace(1917)
# Print the results in ISO 8601 format
print(dt_old)
# Create dictionary to hold results
trip_counts = {'AM': 0, 'PM': 0}
# Loop over all trips
for trip in onebike_datetimes:
# Check to see if the trip starts before noon
if trip['start'].hour < 12:
# Increment the counter for before noon
trip_counts['AM'] += 1
else:
# Increment the counter for after noon
trip_counts['PM'] += 1
print(trip_counts)
print(datetime.fromtimestamp(ts))
# Import the datetime class
from datetime import datetime
# Starting string, in YYYY-MM-DD HH:MM:SS format
s = '2017-02-03 00:00:01'
# Write a format string to parse s
fmt = '%Y-%m-%d %H:%M:%S'
# Create a datetime object d
d = datetime.strptime(s, fmt)
# Print d
print(d)
# Import the datetime class
from datetime import datetime
# Starting string, in MM/DD/YYYY HH:MM:SS format
s = '12/15/1986 08:00:00'
# Write a format string to parse s
fmt = '%m/%d/%Y %H:%M:%S'
# Create a datetime object d
d = datetime.strptime(s, fmt)
# Print d
print(d)
# Write down the format string
fmt = "%Y-%m-%d %H:%M:%S"
# Initialize a list for holding the pairs of datetime objects
onebike_datetimes = []
# Loop over all trips
for (start, end) in onebike_datetime_strings:
trip = {'start': datetime.strptime(start, fmt),
'end': datetime.strptime(end, fmt)}
# Append the trip
onebike_datetimes.append(trip)
# Import datetime
from datetime import datetime
# Pull out the start of the first trip
first_start = onebike_datetimes[0]['start']
# Format to feed to strftime()
fmt = "%Y-%m-%dT%H:%M:%S"
# Print out date with .isoformat(), then with .strftime() to compare
print(first_start.isoformat())
print(first_start.strftime(fmt))
# Import datetime
from datetime import datetime
# Starting timestamps
timestamps = [1514665153, 1514664543]
# Datetime objects
dts = []
# Loop
for ts in timestamps:
dts.append(datetime.fromtimestamp(ts))
# Print results
print(dts)
# Initialize a list for all the trip durations
onebike_durations = []
for trip in onebike_datetimes:
# Create a timedelta object corresponding to the length of the trip
trip_duration = trip['end'] - trip['start']
# Get the total elapsed seconds in trip_duration
trip_length_seconds = trip_duration.total_seconds()
# Append the results to our list
onebike_durations.append(trip_length_seconds)
# Calculate shortest and longest trips
shortest_trip = min(onebike_durations)
longest_trip = max(onebike_durations)
# Print out the results
print("The shortest trip was " + str(shortest_trip) + " seconds")
print("The longest trip was " + str(longest_trip) + " seconds")
Hidden output
Working with Dates and Times in python
# Import date from datetime
from datetime import date
# Create a date object
hurricane_andrew = date(1992, 8, 24)
# Which day of the week is the date?
print(hurricane_andrew.weekday())
# Counter for how many before June 1
early_hurricanes = 0
# We loop over the dates
for hurricane in florida_hurricane_dates:
# Check if the month is before June (month number 6)
if hurricane.month < 6:
early_hurricanes = early_hurricanes + 1
print(early_hurricanes)
# Import date
from datetime import date
# Create a date object for May 9th, 2007
start = date(2007, 5, 9)
# Create a date object for December 13th, 2007
end = date(2007, 12 , 13)
# Subtract the two dates and print the number of days
print((end - start).days)
# A dictionary to count hurricanes per calendar month
hurricanes_each_month = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6:0,
7: 0, 8:0, 9:0, 10:0, 11:0, 12:0}
# Loop over all hurricanes
for hurricane in florida_hurricane_dates:
# Pull out the month
month = hurricane.month
# Increment the count in your dictionary by one
hurricanes_each_month[month] += 1
print(hurricanes_each_month)
# Print the first and last scrambled dates
print(dates_scrambled[0])
print(dates_scrambled[-1])
# Print the first and last scrambled dates
print(dates_scrambled[0])
print(dates_scrambled[-1])
# Put the dates in order
dates_ordered = sorted(dates_scrambled)
# Print the first and last ordered dates
print(dates_ordered[0])
print(dates_ordered[-1])
##pandas alternative to looping
pandas .apply() method
example ....
run_diffs_apply = baseball_df.apply(
lambda row: calc_run_diff(row['RS'], row['RA']),
axis=1)
baseball_df['RD'] = run_diffs_apply
print(baseball_df)
# Display the first five rows of the DataFrame
print(dbacks_df.head())
# Display the first five rows of the DataFrame
print(dbacks_df.head())
# Create a win percentage Series
win_percs = dbacks_df.apply(lambda row: calc_win_perc(row['W'], row['G']), axis=1)
print(win_percs, '\n')
# Display the first five rows of the DataFrame
print(dbacks_df.head())
# Create a win percentage Series
win_percs = dbacks_df.apply(lambda row: calc_win_perc(row['W'], row['G']), axis=1)
print(win_percs, '\n')
# Append a new column to dbacks_df
dbacks_df['WP'] = win_percs
print(dbacks_df, '\n')
# Display dbacks_df where WP is greater than 0.50
print(dbacks_df[dbacks_df['WP'] >= 0.50])