Skip to content
import pandas as pd
def scrape_filenames():
  '''
  lists the names of uploaded files
  cleans them and returns a list of file names
  '''
  string_list = !ls
  file_list = []

  for s in string_list:
    s = s.replace('  ', ',').replace('    ', ',').replace('\t', ',')
    file_list.extend(s.split(','))

  file_list = [s for s in file_list if len(s)>0]
  file_list = [s.strip() for s in file_list]

  return file_list
geo_list = scrape_filenames()
print(geo_list)
def read_batch(file_list):
  '''
  takes a list of file names
  joins them into a path
  and returns the data frame
  '''
  all_countries = pd.read_csv('afrikaans_short_geoMap.csv', skiprows = 2)
  country_names = []
  for name in file_list:
    path = name
    if (path == 'notebook.ipynb') | (path == 'rsv_countries.csv')| (path == 'autism_geo.ipynb'):
      continue
    col_name = name.split('.')[0]
    df = pd.read_csv(path, skiprows = 2)
    df = df.rename(columns = {df.columns[1]:col_name})
    all_countries = all_countries.merge(df, how='outer', on='Kraj')

  all_countries = all_countries.rename(columns = {'Kraj': 'country'})
  all_countries.drop(columns = 'Outisme: (Od 1.01.2004 do 13.10.2023)', inplace = True)

  return all_countries
all_countries = read_batch(geo_list)
all_countries.head()
all_countries = all_countries.fillna(0)
all_countries.head()
countries_pol = all_countries['country'].values
countries_eng = ['South Africa', 'Kiribati',
        'Sao Tome and Principe', 'Tonga', 'Turks and Caicos',
        'East Timor', 'South Sudan', 'Grenada',
        'Antigua and Barbuda', 'US Virgin Islands',
        'French Polynesia', 'Namibia', 'Lesotho', 'Burundi',
        'Sierra Leone', 'Curaçao', 'Malawi', 'Maldives', 'Fiji',
        'Turkmenistan', 'Cuba', 'Trinidad and Tobago', 'Nepal', 'Cameroon',
        'Luxembourg', 'Netherlands', 'Belgium', 'Cambodia', 'Tunisia',
        'Algeria', 'Morocco', 'Kuwait', 'Sudan', 'El Salvador', 'Qatar',
        'Nigeria', 'Bolivia', 'Lithuania', 'Denmark', 'Romania', 'Slovakia',
        'Taiwan', 'Malaysia', 'Singapore', 'Switzerland', 'France', 'Hungary',
        'Philippines', 'Iran', 'Portugal', 'Peru', 'Australia',
        'UK', 'Indonesia', 'Thailand', 'Turkey', 'Germany',
        'India', 'Mexico', 'Italy', 'United States', 'Brazil',
        'Aruba', 'Afghanistan', 'Angola', 'Anguilla', 'Åland Islands',
        'Albania', 'Andorra', 'United Arab Emirates', 'Argentina',
        'Armenia', 'American Samoa', 'Antarctica',
        'French Southern and Antarctic Territories', 'Austria',
        'Azerbaijan', 'Benin', 'Caribbean Netherlands', 'Burkina Faso',
        'Bangladesh', 'Bulgaria', 'Bahrain', 'Bahamas',
        'Bosnia and Herzegovina', 'Saint-Barthélemy', 'Belarus', 'Belize',
        'Bermuda', 'Barbados', 'Brunei', 'Bhutan', 'Bouvet Island',
        'Botswana', 'Central African Republic', 'Canada',
        'Cocos Islands', 'Chile', 'China', 'Côte d\'Ivoire',
        'Democratic Republic of the Congo', 'Congo', 'Cook Islands',
        'Colombia', 'Comoros', 'Cape Verde', 'Costa Rica',
        'Christmas Island', 'Cayman Islands', 'Cyprus', 'Czech Republic', 'Djibouti',
        'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'Eritrea',
        'Western Sahara', 'Spain', 'Estonia', 'Ethiopia', 'Finland',
        'Falkland Islands (Malvinas)', 'Faroe Islands', 'Micronesia', 'Gabon',
        'Georgia', 'Guernsey', 'Ghana', 'Gibraltar', 'Guinea', 'Guadeloupe',
        'Gambia', 'Guinea-Bissau', 'Equatorial Guinea', 'Greece',
        'Greenland', 'Guatemala', 'French Guiana', 'Guam', 'Guyana',
        'Hong Kong', 'Heard and McDonald Islands', 'Honduras', 'Croatia',
        'Haiti', 'Isle of Man', 'British Indian Ocean Territory',
        'Ireland', 'Iraq', 'Iceland', 'Israel', 'Jamaica', 'Jersey',
        'Jordan', 'Japan', 'Kazakhstan', 'Kenya', 'Kyrgyzstan',
        'Saint Kitts and Nevis', 'South Korea', 'Laos', 'Lebanon',
        'Liberia', 'Libya', 'Saint Lucia', 'Liechtenstein', 'Sri Lanka',
        'Latvia', 'Macau', 'Saint-Martin', 'Monaco', 'Moldova',
        'Madagascar', 'Marshall Islands', 'North Macedonia', 'Mali',
        'Malta', 'Myanmar (Burma)', 'Montenegro', 'Mongolia',
        'Northern Mariana Islands', 'Mozambique', 'Mauritania', 'Montserrat',
        'Martinique', 'Mauritius', 'Mayotte', 'New Caledonia', 'Niger',
        'Norfolk', 'Nicaragua', 'Niue', 'Norway', 'Nauru',
        'New Zealand', 'Oman', 'Pakistan', 'Panama', 'Pitcairn', 'Palau',
        'Papua New Guinea', 'Poland', 'Puerto Rico', 'North Korea',
        'Paraguay', 'Palestine', 'Reunion', 'Russia', 'Rwanda',
        'Saudi Arabia', 'Senegal',
        'South Georgia and the South Sandwich Islands', 'St. Helena Island',
        'Svalbard and Jan Mayen', 'Solomon Islands', 'San Marino', 'Somalia',
        'Saint-Pierre and Miquelon', 'Serbia', 'Suriname', 'Slovenia',
        'Sweden', 'Eswatini', 'Sint Maarten', 'Seychelles', 'Syria', 'Chad',
        'Togo', 'Tajikistan', 'Tokelau', 'Tuvalu', 'Tanzania', 'Uganda',
        'Ukraine', 'United States Minor Outlying Islands',
        'Uruguay', 'Uzbekistan', 'Vatican', 'Saint Vincent and the Grenadines',
        'Venezuela', 'British Virgin Islands', 'Vietnam', 'Vanuatu',
        'Wallis and Futuna', 'Samoa', 'Kosovo', 'Yemen', 'Zambia',
        'Zimbabwe']

country_dict = dict(map(lambda i,j : (i,j) , countries_pol,countries_eng))
print(country_dict)
all_countries = all_countries.set_index('country')

all_countries = all_countries.rename(index=country_dict)
all_countries.head()
!pip install pycountry
import pycountry
def alpha3code(column):
    CODE=[]
    for country in column:
        try:
            code=pycountry.countries.get(name=country).alpha_3

            CODE.append(code)
        except:
            CODE.append('None')
    return CODE
# create a column for code