Lecture 2 exercise 2 solution#
Using “International migrants and refugees” from the UN.
import pandas as pd
migrants = pd.read_csv(
"https://data.un.org/_Docs/SYB/CSV/SYB67_327_202411_International%20Migrants%20and%20Refugees.csv",
skiprows=1,
)
migrants
Region/Country/Area | Unnamed: 1 | Year | Series | Value | Footnotes | Source | |
---|---|---|---|---|---|---|---|
0 | 1 | Total, all countries or areas | 2005 | International migrant stock: Both sexes (number) | 191,446,828 | NaN | United Nations Population Division, New York, ... |
1 | 1 | Total, all countries or areas | 2005 | International migrant stock: Both sexes (% tot... | 2.9 | NaN | United Nations Population Division, New York, ... |
2 | 1 | Total, all countries or areas | 2005 | International migrant stock: Male (% total Pop... | 3.0 | NaN | United Nations Population Division, New York, ... |
3 | 1 | Total, all countries or areas | 2005 | International migrant stock: Female (% total P... | 2.9 | NaN | United Nations Population Division, New York, ... |
4 | 1 | Total, all countries or areas | 2010 | International migrant stock: Both sexes (number) | 220,983,187 | NaN | United Nations Population Division, New York, ... |
... | ... | ... | ... | ... | ... | ... | ... |
7228 | 716 | Zimbabwe | 2020 | Total population of concern to UNHCR (number) | 292,147 | NaN | United Nations High Commissioner for Refugees ... |
7229 | 716 | Zimbabwe | 2023 | Total refugees and people in refugee-like situ... | 10,181 | NaN | United Nations High Commissioner for Refugees ... |
7230 | 716 | Zimbabwe | 2023 | Asylum seekers, including pending cases (number) | 11,986 | NaN | United Nations High Commissioner for Refugees ... |
7231 | 716 | Zimbabwe | 2023 | Other of concern to UNHCR (number) | 972 | NaN | United Nations High Commissioner for Refugees ... |
7232 | 716 | Zimbabwe | 2023 | Total population of concern to UNHCR (number) | 23,139 | NaN | United Nations High Commissioner for Refugees ... |
7233 rows × 7 columns
Clean up the columns.
migrants = migrants.drop(columns=["Region/Country/Area", "Footnotes", "Source"])
migrants = migrants.rename(columns={"Unnamed: 1": "Region/Country/Area"})
migrants
Region/Country/Area | Year | Series | Value | |
---|---|---|---|---|
0 | Total, all countries or areas | 2005 | International migrant stock: Both sexes (number) | 191,446,828 |
1 | Total, all countries or areas | 2005 | International migrant stock: Both sexes (% tot... | 2.9 |
2 | Total, all countries or areas | 2005 | International migrant stock: Male (% total Pop... | 3.0 |
3 | Total, all countries or areas | 2005 | International migrant stock: Female (% total P... | 2.9 |
4 | Total, all countries or areas | 2010 | International migrant stock: Both sexes (number) | 220,983,187 |
... | ... | ... | ... | ... |
7228 | Zimbabwe | 2020 | Total population of concern to UNHCR (number) | 292,147 |
7229 | Zimbabwe | 2023 | Total refugees and people in refugee-like situ... | 10,181 |
7230 | Zimbabwe | 2023 | Asylum seekers, including pending cases (number) | 11,986 |
7231 | Zimbabwe | 2023 | Other of concern to UNHCR (number) | 972 |
7232 | Zimbabwe | 2023 | Total population of concern to UNHCR (number) | 23,139 |
7233 rows × 4 columns
migrants["Series"].unique()
array(['International migrant stock: Both sexes (number)',
'International migrant stock: Both sexes (% total population)',
'International migrant stock: Male (% total Population)',
'International migrant stock: Female (% total Population)',
'Total refugees and people in refugee-like situations (number)',
'Asylum seekers, including pending cases (number)',
'Other of concern to UNHCR (number)',
'Total population of concern to UNHCR (number)'], dtype=object)
migrants_total = migrants[
migrants["Series"] == "International migrant stock: Both sexes (number)"
]
migrants_total
Region/Country/Area | Year | Series | Value | |
---|---|---|---|---|
0 | Total, all countries or areas | 2005 | International migrant stock: Both sexes (number) | 191,446,828 |
4 | Total, all countries or areas | 2010 | International migrant stock: Both sexes (number) | 220,983,187 |
8 | Total, all countries or areas | 2015 | International migrant stock: Both sexes (number) | 247,958,644 |
16 | Total, all countries or areas | 2020 | International migrant stock: Both sexes (number) | 280,598,105 |
28 | Africa | 2005 | International migrant stock: Both sexes (number) | 16,040,087 |
... | ... | ... | ... | ... |
7186 | Zambia | 2020 | International migrant stock: Both sexes (number) | 187,955 |
7198 | Zimbabwe | 2005 | International migrant stock: Both sexes (number) | 402,226 |
7206 | Zimbabwe | 2010 | International migrant stock: Both sexes (number) | 398,307 |
7213 | Zimbabwe | 2015 | International migrant stock: Both sexes (number) | 400,482 |
7221 | Zimbabwe | 2020 | International migrant stock: Both sexes (number) | 416,141 |
1045 rows × 4 columns
list(migrants_total["Region/Country/Area"].unique())
['Total, all countries or areas',
'Africa',
'Northern Africa',
'Sub-Saharan Africa',
'Eastern Africa',
'Middle Africa',
'Southern Africa',
'Western Africa',
'Northern America',
'Latin America & the Caribbean',
'Caribbean',
'Central America',
'South America',
'Asia',
'Central Asia',
'Eastern Asia',
'South-central Asia',
'South-eastern Asia',
'Southern Asia',
'Western Asia',
'Europe',
'Eastern Europe',
'Northern Europe',
'Southern Europe',
'Western Europe',
'Oceania',
'Australia and New Zealand',
'Melanesia',
'Micronesia',
'Polynesia',
'Afghanistan',
'Albania',
'Algeria',
'American Samoa',
'Andorra',
'Angola',
'Anguilla',
'Antigua and Barbuda',
'Argentina',
'Armenia',
'Aruba',
'Australia',
'Austria',
'Azerbaijan',
'Bahamas',
'Bahrain',
'Bangladesh',
'Barbados',
'Belarus',
'Belgium',
'Belize',
'Benin',
'Bermuda',
'Bhutan',
'Bolivia (Plurin. State of)',
'Bonaire, St. Eustatius & Saba',
'Bosnia and Herzegovina',
'Botswana',
'Brazil',
'British Virgin Islands',
'Brunei Darussalam',
'Bulgaria',
'Burkina Faso',
'Burundi',
'Cabo Verde',
'Cambodia',
'Cameroon',
'Canada',
'Cayman Islands',
'Central African Republic',
'Chad',
'Channel Islands',
'Chile',
'China',
'China, Hong Kong SAR',
'China, Macao SAR',
'Colombia',
'Comoros',
'Congo',
'Cook Islands',
'Costa Rica',
'Côte d’Ivoire',
'Croatia',
'Cuba',
'Curaçao',
'Cyprus',
'Czechia',
"Dem. People's Rep. Korea",
'Dem. Rep. of the Congo',
'Denmark',
'Djibouti',
'Dominica',
'Dominican Republic',
'Ecuador',
'Egypt',
'El Salvador',
'Equatorial Guinea',
'Eritrea',
'Estonia',
'Eswatini',
'Ethiopia',
'Falkland Islands (Malvinas)',
'Faroe Islands',
'Fiji',
'Finland',
'France',
'French Guiana',
'French Polynesia',
'Gabon',
'Gambia',
'Georgia',
'Germany',
'Ghana',
'Gibraltar',
'Greece',
'Greenland',
'Grenada',
'Guadeloupe',
'Guam',
'Guatemala',
'Guinea',
'Guinea-Bissau',
'Guyana',
'Haiti',
'Holy See',
'Honduras',
'Hungary',
'Iceland',
'India',
'Indonesia',
'Iran (Islamic Republic of)',
'Iraq',
'Ireland',
'Isle of Man',
'Israel',
'Italy',
'Jamaica',
'Japan',
'Jordan',
'Kazakhstan',
'Kenya',
'Kiribati',
'Kuwait',
'Kyrgyzstan',
"Lao People's Dem. Rep.",
'Latvia',
'Lebanon',
'Lesotho',
'Liberia',
'Libya',
'Liechtenstein',
'Lithuania',
'Luxembourg',
'Madagascar',
'Malawi',
'Malaysia',
'Maldives',
'Mali',
'Malta',
'Marshall Islands',
'Martinique',
'Mauritania',
'Mauritius',
'Mayotte',
'Mexico',
'Micronesia (Fed. States of)',
'Monaco',
'Mongolia',
'Montenegro',
'Montserrat',
'Morocco',
'Mozambique',
'Myanmar',
'Namibia',
'Nauru',
'Nepal',
'Netherlands (Kingdom of the)',
'New Caledonia',
'New Zealand',
'Nicaragua',
'Niger',
'Nigeria',
'Niue',
'North Macedonia',
'Northern Mariana Islands',
'Norway',
'Oman',
'Pakistan',
'Palau',
'Panama',
'Papua New Guinea',
'Paraguay',
'Peru',
'Philippines',
'Poland',
'Portugal',
'Puerto Rico',
'Qatar',
'Republic of Korea',
'Republic of Moldova',
'Réunion',
'Romania',
'Russian Federation',
'Rwanda',
'Saint Helena',
'Saint Kitts and Nevis',
'Saint Lucia',
'Saint Pierre and Miquelon',
'Saint Vincent & Grenadines',
'Samoa',
'San Marino',
'Sao Tome and Principe',
'Saudi Arabia',
'Senegal',
'Serbia',
'Seychelles',
'Sierra Leone',
'Singapore',
'Sint Maarten (Dutch part)',
'Slovakia',
'Slovenia',
'Solomon Islands',
'Somalia',
'South Africa',
'South Sudan',
'Spain',
'Sri Lanka',
'State of Palestine',
'Sudan',
'Suriname',
'Sweden',
'Switzerland',
'Syrian Arab Republic',
'Tajikistan',
'Thailand',
'Timor-Leste',
'Togo',
'Tokelau',
'Tonga',
'Trinidad and Tobago',
'Tunisia',
'Türkiye',
'Turkmenistan',
'Turks and Caicos Islands',
'Tuvalu',
'Uganda',
'Ukraine',
'United Arab Emirates',
'United Kingdom',
'United Rep. of Tanzania',
'United States of America',
'United States Virgin Islands',
'Uruguay',
'Uzbekistan',
'Vanuatu',
'Venezuela (Boliv. Rep. of)',
'Viet Nam',
'Wallis and Futuna Islands',
'Western Sahara',
'Yemen',
'Zambia',
'Zimbabwe']
demographics = pd.read_csv(
"https://data.un.org/_Docs/SYB/CSV/SYB67_1_202411_Population,%20Surface%20Area%20and%20Density.csv",
skiprows=1,
)
demographics
Region/Country/Area | Unnamed: 1 | Year | Series | Value | Footnotes | Source | |
---|---|---|---|---|---|---|---|
0 | 1 | Total, all countries or areas | 2010 | Population mid-year estimates (millions) | 7,021.73 | NaN | United Nations Population Division, New York, ... |
1 | 1 | Total, all countries or areas | 2010 | Population mid-year estimates for males (milli... | 3,533.14 | NaN | United Nations Population Division, New York, ... |
2 | 1 | Total, all countries or areas | 2010 | Population mid-year estimates for females (mil... | 3,488.59 | NaN | United Nations Population Division, New York, ... |
3 | 1 | Total, all countries or areas | 2010 | Sex ratio (males per 100 females) | 101.3 | NaN | United Nations Population Division, New York, ... |
4 | 1 | Total, all countries or areas | 2010 | Population aged 0 to 14 years old (percentage) | 27.3 | Calculated by the UN Statistics Division. | United Nations Population Division, New York, ... |
... | ... | ... | ... | ... | ... | ... | ... |
7705 | 722 | SIDS | 2024 | Population mid-year estimates for females (mil... | 36.77 | Projected estimate (medium fertility variant). | United Nations Population Division, New York, ... |
7706 | 722 | SIDS | 2024 | Sex ratio (males per 100 females) | 99.8 | Projected estimate (medium fertility variant). | United Nations Population Division, New York, ... |
7707 | 722 | SIDS | 2024 | Population aged 0 to 14 years old (percentage) | 24.5 | Projected estimate (medium fertility variant).... | United Nations Population Division, New York, ... |
7708 | 722 | SIDS | 2024 | Population aged 60+ years old (percentage) | 13.8 | Projected estimate (medium fertility variant).... | United Nations Population Division, New York, ... |
7709 | 722 | SIDS | 2024 | Population density | 30.8 | Projected estimate (medium fertility variant). | United Nations Population Division, New York, ... |
7710 rows × 7 columns
demographics = demographics.drop(columns=["Region/Country/Area", "Footnotes", "Source"])
demographics = demographics.rename(columns={"Unnamed: 1": "Region/Country/Area"})
demographics
Region/Country/Area | Year | Series | Value | |
---|---|---|---|---|
0 | Total, all countries or areas | 2010 | Population mid-year estimates (millions) | 7,021.73 |
1 | Total, all countries or areas | 2010 | Population mid-year estimates for males (milli... | 3,533.14 |
2 | Total, all countries or areas | 2010 | Population mid-year estimates for females (mil... | 3,488.59 |
3 | Total, all countries or areas | 2010 | Sex ratio (males per 100 females) | 101.3 |
4 | Total, all countries or areas | 2010 | Population aged 0 to 14 years old (percentage) | 27.3 |
... | ... | ... | ... | ... |
7705 | SIDS | 2024 | Population mid-year estimates for females (mil... | 36.77 |
7706 | SIDS | 2024 | Sex ratio (males per 100 females) | 99.8 |
7707 | SIDS | 2024 | Population aged 0 to 14 years old (percentage) | 24.5 |
7708 | SIDS | 2024 | Population aged 60+ years old (percentage) | 13.8 |
7709 | SIDS | 2024 | Population density | 30.8 |
7710 rows × 4 columns
demographics["Series"].unique()
array(['Population mid-year estimates (millions)',
'Population mid-year estimates for males (millions)',
'Population mid-year estimates for females (millions)',
'Sex ratio (males per 100 females)',
'Population aged 0 to 14 years old (percentage)',
'Population aged 60+ years old (percentage)', 'Population density',
'Surface area (thousand km2)'], dtype=object)
pop = demographics[demographics["Series"] == "Population mid-year estimates (millions)"]
pop
Region/Country/Area | Year | Series | Value | |
---|---|---|---|---|
0 | Total, all countries or areas | 2010 | Population mid-year estimates (millions) | 7,021.73 |
7 | Total, all countries or areas | 2015 | Population mid-year estimates (millions) | 7,470.49 |
14 | Total, all countries or areas | 2022 | Population mid-year estimates (millions) | 8,021.41 |
22 | Total, all countries or areas | 2024 | Population mid-year estimates (millions) | 8,161.97 |
29 | Africa | 2010 | Population mid-year estimates (millions) | 1,072.22 |
... | ... | ... | ... | ... |
7675 | LDC§ | 2015 | Population mid-year estimates (millions) | 962.71 |
7682 | LDC§ | 2024 | Population mid-year estimates (millions) | 1,188.01 |
7689 | SIDS | 2010 | Population mid-year estimates (millions) | 64.59 |
7696 | SIDS | 2015 | Population mid-year estimates (millions) | 68.19 |
7703 | SIDS | 2024 | Population mid-year estimates (millions) | 73.46 |
1065 rows × 4 columns
merged = pd.merge(migrants_total, pop, on=["Region/Country/Area", "Year"])
merged
Region/Country/Area | Year | Series_x | Value_x | Series_y | Value_y | |
---|---|---|---|---|---|---|
0 | Total, all countries or areas | 2010 | International migrant stock: Both sexes (number) | 220,983,187 | Population mid-year estimates (millions) | 7,021.73 |
1 | Total, all countries or areas | 2015 | International migrant stock: Both sexes (number) | 247,958,644 | Population mid-year estimates (millions) | 7,470.49 |
2 | Africa | 2010 | International migrant stock: Both sexes (number) | 17,806,677 | Population mid-year estimates (millions) | 1,072.22 |
3 | Africa | 2015 | International migrant stock: Both sexes (number) | 22,860,792 | Population mid-year estimates (millions) | 1,220.08 |
4 | Northern Africa | 2010 | International migrant stock: Both sexes (number) | 1,952,040 | Population mid-year estimates (millions) | 210.94 |
... | ... | ... | ... | ... | ... | ... |
515 | Yemen | 2015 | International migrant stock: Both sexes (number) | 379,882 | Population mid-year estimates (millions) | 31.16 |
516 | Zambia | 2010 | International migrant stock: Both sexes (number) | 149,962 | Population mid-year estimates (millions) | 13.97 |
517 | Zambia | 2015 | International migrant stock: Both sexes (number) | 132,107 | Population mid-year estimates (millions) | 16.40 |
518 | Zimbabwe | 2010 | International migrant stock: Both sexes (number) | 398,307 | Population mid-year estimates (millions) | 13.36 |
519 | Zimbabwe | 2015 | International migrant stock: Both sexes (number) | 400,482 | Population mid-year estimates (millions) | 14.40 |
520 rows × 6 columns
Clean up columns.
merged = merged.rename(
columns={
"Value_x": "Migrants",
"Value_y": "Population",
}
)
merged = merged.drop(columns=["Series_x", "Series_y"])
merged
Region/Country/Area | Year | Migrants | Population | |
---|---|---|---|---|
0 | Total, all countries or areas | 2010 | 220,983,187 | 7,021.73 |
1 | Total, all countries or areas | 2015 | 247,958,644 | 7,470.49 |
2 | Africa | 2010 | 17,806,677 | 1,072.22 |
3 | Africa | 2015 | 22,860,792 | 1,220.08 |
4 | Northern Africa | 2010 | 1,952,040 | 210.94 |
... | ... | ... | ... | ... |
515 | Yemen | 2015 | 379,882 | 31.16 |
516 | Zambia | 2010 | 149,962 | 13.97 |
517 | Zambia | 2015 | 132,107 | 16.40 |
518 | Zimbabwe | 2010 | 398,307 | 13.36 |
519 | Zimbabwe | 2015 | 400,482 | 14.40 |
520 rows × 4 columns
merged["Population"] * 1000000
0 7,021.737,021.737,021.737,021.737,021.737,021....
1 7,470.497,470.497,470.497,470.497,470.497,470....
2 1,072.221,072.221,072.221,072.221,072.221,072....
3 1,220.081,220.081,220.081,220.081,220.081,220....
4 210.94210.94210.94210.94210.94210.94210.94210....
...
515 31.1631.1631.1631.1631.1631.1631.1631.1631.163...
516 13.9713.9713.9713.9713.9713.9713.9713.9713.971...
517 16.4016.4016.4016.4016.4016.4016.4016.4016.401...
518 13.3613.3613.3613.3613.3613.3613.3613.3613.361...
519 14.4014.4014.4014.4014.4014.4014.4014.4014.401...
Name: Population, Length: 520, dtype: object
merged.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 520 entries, 0 to 519
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Region/Country/Area 520 non-null object
1 Year 520 non-null int64
2 Migrants 520 non-null object
3 Population 520 non-null object
dtypes: int64(1), object(3)
memory usage: 16.4+ KB
pop_float = merged["Population"].str.replace(",", "").astype(float)
merged["Population"] = pop_float * 1000000
merged["Migrants"] = merged["Migrants"].str.replace(",", "").astype(float)
merged.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 520 entries, 0 to 519
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Region/Country/Area 520 non-null object
1 Year 520 non-null int64
2 Migrants 520 non-null float64
3 Population 520 non-null float64
dtypes: float64(2), int64(1), object(1)
memory usage: 16.4+ KB
merged["Migrant %"] = merged["Migrants"] / merged["Population"]
merged
Region/Country/Area | Year | Migrants | Population | Migrant % | |
---|---|---|---|---|---|
0 | Total, all countries or areas | 2010 | 220983187.0 | 7.021730e+09 | 0.031471 |
1 | Total, all countries or areas | 2015 | 247958644.0 | 7.470490e+09 | 0.033192 |
2 | Africa | 2010 | 17806677.0 | 1.072220e+09 | 0.016607 |
3 | Africa | 2015 | 22860792.0 | 1.220080e+09 | 0.018737 |
4 | Northern Africa | 2010 | 1952040.0 | 2.109400e+08 | 0.009254 |
... | ... | ... | ... | ... | ... |
515 | Yemen | 2015 | 379882.0 | 3.116000e+07 | 0.012191 |
516 | Zambia | 2010 | 149962.0 | 1.397000e+07 | 0.010735 |
517 | Zambia | 2015 | 132107.0 | 1.640000e+07 | 0.008055 |
518 | Zimbabwe | 2010 | 398307.0 | 1.336000e+07 | 0.029813 |
519 | Zimbabwe | 2015 | 400482.0 | 1.440000e+07 | 0.027811 |
520 rows × 5 columns
merged.sort_values("Migrant %", ascending=False)
Region/Country/Area | Year | Migrants | Population | Migrant % | |
---|---|---|---|---|---|
360 | Niue | 2010 | 589.0 | 0.000000e+00 | inf |
470 | Tokelau | 2010 | 1128.0 | 0.000000e+00 | inf |
471 | Tokelau | 2015 | 1185.0 | 0.000000e+00 | inf |
361 | Niue | 2015 | 588.0 | 0.000000e+00 | inf |
200 | Falkland Islands (Malvinas) | 2010 | 1436.0 | 0.000000e+00 | inf |
... | ... | ... | ... | ... | ... |
508 | Viet Nam | 2010 | 61756.0 | 8.746000e+07 | 0.000706 |
145 | China | 2015 | 978046.0 | 1.396130e+09 | 0.000701 |
164 | Cuba | 2010 | 7373.0 | 1.130000e+07 | 0.000652 |
144 | China | 2010 | 849861.0 | 1.351560e+09 | 0.000629 |
165 | Cuba | 2015 | 4649.0 | 1.128000e+07 | 0.000412 |
520 rows × 5 columns
has_pop = merged["Population"] > 0
pop_2015 = merged["Year"] == 2015
merged_2015 = merged[has_pop & pop_2015]
merged_2015.sort_values("Migrant %", ascending=False)
Region/Country/Area | Year | Migrants | Population | Migrant % | |
---|---|---|---|---|---|
111 | Bonaire, St. Eustatius & Saba | 2015 | 24571.0 | 2.000000e+04 | 1.228550 |
491 | United Arab Emirates | 2015 | 7995126.0 | 8.670000e+06 | 0.922160 |
281 | Kuwait | 2015 | 2866136.0 | 3.830000e+06 | 0.748338 |
391 | Qatar | 2015 | 1687640.0 | 2.430000e+06 | 0.694502 |
433 | Sint Maarten (Dutch part) | 2015 | 27295.0 | 4.000000e+04 | 0.682375 |
... | ... | ... | ... | ... | ... |
255 | Indonesia | 2015 | 338124.0 | 2.618000e+08 | 0.001292 |
303 | Madagascar | 2015 | 32075.0 | 2.543000e+07 | 0.001261 |
509 | Viet Nam | 2015 | 72793.0 | 9.282000e+07 | 0.000784 |
145 | China | 2015 | 978046.0 | 1.396130e+09 | 0.000701 |
165 | Cuba | 2015 | 4649.0 | 1.128000e+07 | 0.000412 |
256 rows × 5 columns
Save the data to a file.
# merged_2015.to_csv("data/migrants_by_country.csv", index=False)