Skip to content
Autism vs Autism Spectrum Disorder Part 2 Cleaning Geo Data
import pandas as pd
def scrape_filenames():
'''
lists the names of uploaded files
cleans them and returns a list of file names
'''
string_list = !ls
file_list = []
for s in string_list:
s = s.replace(' ', ',').replace(' ', ',').replace('\t', ',')
file_list.extend(s.split(','))
file_list = [s for s in file_list if len(s)>0]
file_list = [s.strip() for s in file_list]
return file_listgeo_list = scrape_filenames()print(geo_list)def read_batch(file_list):
'''
takes a list of file names
joins them into a path
and returns the data frame
'''
all_countries = pd.read_csv('afrikaans_short_geoMap.csv', skiprows = 2)
country_names = []
for name in file_list:
path = name
if (path == 'notebook.ipynb') | (path == 'rsv_countries.csv')| (path == 'autism_geo.ipynb'):
continue
col_name = name.split('.')[0]
df = pd.read_csv(path, skiprows = 2)
df = df.rename(columns = {df.columns[1]:col_name})
all_countries = all_countries.merge(df, how='outer', on='Kraj')
all_countries = all_countries.rename(columns = {'Kraj': 'country'})
all_countries.drop(columns = 'Outisme: (Od 1.01.2004 do 13.10.2023)', inplace = True)
return all_countriesall_countries = read_batch(geo_list)all_countries.head()all_countries = all_countries.fillna(0)all_countries.head()countries_pol = all_countries['country'].valuescountries_eng = ['South Africa', 'Kiribati',
'Sao Tome and Principe', 'Tonga', 'Turks and Caicos',
'East Timor', 'South Sudan', 'Grenada',
'Antigua and Barbuda', 'US Virgin Islands',
'French Polynesia', 'Namibia', 'Lesotho', 'Burundi',
'Sierra Leone', 'Curaçao', 'Malawi', 'Maldives', 'Fiji',
'Turkmenistan', 'Cuba', 'Trinidad and Tobago', 'Nepal', 'Cameroon',
'Luxembourg', 'Netherlands', 'Belgium', 'Cambodia', 'Tunisia',
'Algeria', 'Morocco', 'Kuwait', 'Sudan', 'El Salvador', 'Qatar',
'Nigeria', 'Bolivia', 'Lithuania', 'Denmark', 'Romania', 'Slovakia',
'Taiwan', 'Malaysia', 'Singapore', 'Switzerland', 'France', 'Hungary',
'Philippines', 'Iran', 'Portugal', 'Peru', 'Australia',
'UK', 'Indonesia', 'Thailand', 'Turkey', 'Germany',
'India', 'Mexico', 'Italy', 'United States', 'Brazil',
'Aruba', 'Afghanistan', 'Angola', 'Anguilla', 'Åland Islands',
'Albania', 'Andorra', 'United Arab Emirates', 'Argentina',
'Armenia', 'American Samoa', 'Antarctica',
'French Southern and Antarctic Territories', 'Austria',
'Azerbaijan', 'Benin', 'Caribbean Netherlands', 'Burkina Faso',
'Bangladesh', 'Bulgaria', 'Bahrain', 'Bahamas',
'Bosnia and Herzegovina', 'Saint-Barthélemy', 'Belarus', 'Belize',
'Bermuda', 'Barbados', 'Brunei', 'Bhutan', 'Bouvet Island',
'Botswana', 'Central African Republic', 'Canada',
'Cocos Islands', 'Chile', 'China', 'Côte d\'Ivoire',
'Democratic Republic of the Congo', 'Congo', 'Cook Islands',
'Colombia', 'Comoros', 'Cape Verde', 'Costa Rica',
'Christmas Island', 'Cayman Islands', 'Cyprus', 'Czech Republic', 'Djibouti',
'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'Eritrea',
'Western Sahara', 'Spain', 'Estonia', 'Ethiopia', 'Finland',
'Falkland Islands (Malvinas)', 'Faroe Islands', 'Micronesia', 'Gabon',
'Georgia', 'Guernsey', 'Ghana', 'Gibraltar', 'Guinea', 'Guadeloupe',
'Gambia', 'Guinea-Bissau', 'Equatorial Guinea', 'Greece',
'Greenland', 'Guatemala', 'French Guiana', 'Guam', 'Guyana',
'Hong Kong', 'Heard and McDonald Islands', 'Honduras', 'Croatia',
'Haiti', 'Isle of Man', 'British Indian Ocean Territory',
'Ireland', 'Iraq', 'Iceland', 'Israel', 'Jamaica', 'Jersey',
'Jordan', 'Japan', 'Kazakhstan', 'Kenya', 'Kyrgyzstan',
'Saint Kitts and Nevis', 'South Korea', 'Laos', 'Lebanon',
'Liberia', 'Libya', 'Saint Lucia', 'Liechtenstein', 'Sri Lanka',
'Latvia', 'Macau', 'Saint-Martin', 'Monaco', 'Moldova',
'Madagascar', 'Marshall Islands', 'North Macedonia', 'Mali',
'Malta', 'Myanmar (Burma)', 'Montenegro', 'Mongolia',
'Northern Mariana Islands', 'Mozambique', 'Mauritania', 'Montserrat',
'Martinique', 'Mauritius', 'Mayotte', 'New Caledonia', 'Niger',
'Norfolk', 'Nicaragua', 'Niue', 'Norway', 'Nauru',
'New Zealand', 'Oman', 'Pakistan', 'Panama', 'Pitcairn', 'Palau',
'Papua New Guinea', 'Poland', 'Puerto Rico', 'North Korea',
'Paraguay', 'Palestine', 'Reunion', 'Russia', 'Rwanda',
'Saudi Arabia', 'Senegal',
'South Georgia and the South Sandwich Islands', 'St. Helena Island',
'Svalbard and Jan Mayen', 'Solomon Islands', 'San Marino', 'Somalia',
'Saint-Pierre and Miquelon', 'Serbia', 'Suriname', 'Slovenia',
'Sweden', 'Eswatini', 'Sint Maarten', 'Seychelles', 'Syria', 'Chad',
'Togo', 'Tajikistan', 'Tokelau', 'Tuvalu', 'Tanzania', 'Uganda',
'Ukraine', 'United States Minor Outlying Islands',
'Uruguay', 'Uzbekistan', 'Vatican', 'Saint Vincent and the Grenadines',
'Venezuela', 'British Virgin Islands', 'Vietnam', 'Vanuatu',
'Wallis and Futuna', 'Samoa', 'Kosovo', 'Yemen', 'Zambia',
'Zimbabwe']
country_dict = dict(map(lambda i,j : (i,j) , countries_pol,countries_eng))
print(country_dict)all_countries = all_countries.set_index('country')
all_countries = all_countries.rename(index=country_dict)
all_countries.head()!pip install pycountry
import pycountry
def alpha3code(column):
CODE=[]
for country in column:
try:
code=pycountry.countries.get(name=country).alpha_3
CODE.append(code)
except:
CODE.append('None')
return CODE
# create a column for code