This repository has been archived on 2025-09-30. You can view files and clone it, but cannot push or open issues or pull requests.
Files
FlightUnrest/clean_data.ipynb
2025-09-26 00:11:14 -05:00

4689 lines
109 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "3e53265d",
"metadata": {},
"outputs": [],
"source": [
"# Module Imports\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import torch\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"from mappings import country_name_map\n",
"from haversine import calculate_served_population, assign_events_to_cities"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "38eee6f5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1899697/1743729008.py:8: DtypeWarning: Columns (28,30,31,32,34,35,36,37,38,39,40,41,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" acled = pd.read_csv(\"./Data/acled.csv\")\n"
]
}
],
"source": [
"# Data Imports\n",
"\n",
"vdem = pd.read_csv(\"./Data/vdem.csv\")\n",
"hdi = pd.read_excel(\"./Data/hdi.xlsx\")\n",
"flights = pd.read_csv(\"./Data/routes.csv\")\n",
"airports = pd.read_csv(\"./Data/airports.csv\")\n",
"wb = pd.read_csv(\"./Data/wb.csv\")\n",
"acled = pd.read_csv(\"./Data/acled.csv\")\n",
"cities = pd.read_csv(\"./Data/cities.csv\", sep=\";\") \n"
]
},
{
"cell_type": "markdown",
"id": "cb2cf447",
"metadata": {},
"source": [
"First we'll start of by cleaning and organizing the three most important datasets: flights, cities, and acled. "
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "f7c52db9",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "Origin",
"rawType": "object",
"type": "string"
},
{
"name": "Number_of_Flights",
"rawType": "int64",
"type": "integer"
},
{
"name": "0",
"rawType": "float64",
"type": "float"
},
{
"name": "Name",
"rawType": "object",
"type": "unknown"
},
{
"name": "City",
"rawType": "object",
"type": "unknown"
},
{
"name": "Country",
"rawType": "object",
"type": "unknown"
},
{
"name": "IATA",
"rawType": "object",
"type": "unknown"
},
{
"name": "ICAO",
"rawType": "object",
"type": "unknown"
},
{
"name": "Latitude",
"rawType": "float64",
"type": "float"
},
{
"name": "Longitude",
"rawType": "float64",
"type": "float"
},
{
"name": "Altitude",
"rawType": "float64",
"type": "float"
},
{
"name": "Timezone",
"rawType": "object",
"type": "unknown"
},
{
"name": "DST",
"rawType": "object",
"type": "unknown"
},
{
"name": "Tz Database",
"rawType": "object",
"type": "unknown"
},
{
"name": "Type",
"rawType": "object",
"type": "unknown"
},
{
"name": "Source",
"rawType": "object",
"type": "unknown"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "690bbaa3-62cd-427a-ad28-5b35f048f8cb",
"rows": [
[
"0",
"AAE",
"9",
"220.0",
"Rabah Bitat Airport",
"Annaba",
"Algeria",
"AAE",
"DABB",
"36.822201",
"7.809174",
"16.0",
"1",
"N",
"Africa/Algiers",
"airport",
"OurAirports"
],
[
"1",
"AAL",
"20",
"628.0",
"Aalborg Airport",
"Aalborg",
"Denmark",
"AAL",
"EKYT",
"57.0927589138",
"9.84924316406",
"10.0",
"1",
"E",
"Europe/Copenhagen",
"airport",
"OurAirports"
],
[
"2",
"AAN",
"2",
"5937.0",
"Al Ain International Airport",
"Al Ain",
"United Arab Emirates",
"AAN",
"OMAL",
"24.2616996765137",
"55.6091995239258",
"869.0",
"4",
"U",
"Asia/Dubai",
"airport",
"OurAirports"
],
[
"3",
"AAQ",
"3",
"4353.0",
"Anapa Vityazevo Airport",
"Anapa",
"Russia",
"AAQ",
"URKA",
"45.002101898193",
"37.347301483154",
"174.0",
"3",
"N",
"Europe/Moscow",
"airport",
"OurAirports"
],
[
"4",
"AAR",
"8",
"607.0",
"Aarhus Airport",
"Aarhus",
"Denmark",
"AAR",
"EKAH",
"56.2999992371",
"10.6190004349",
"82.0",
"1",
"E",
"Europe/Copenhagen",
"airport",
"OurAirports"
],
[
"5",
"AAT",
"2",
"6946.0",
"Altay Air Base",
"Altay",
"China",
"AAT",
"ZWAT",
"47.7498855591",
"88.0858078003",
"2460.0",
"8",
"U",
"Asia/Shanghai",
"airport",
"OurAirports"
],
[
"6",
"AAX",
"1",
"7395.0",
"Romeu Zema Airport",
"Araxa",
"Brazil",
"AAX",
"SBAX",
"-19.563199996948",
"-46.960399627686",
"3276.0",
"-3",
"S",
"America/Sao_Paulo",
"airport",
"OurAirports"
],
[
"7",
"AAY",
"1",
"3979.0",
"Al Ghaidah International Airport",
"Al Ghaidah Intl",
"Yemen",
"AAY",
"OYGD",
"16.1916999816895",
"52.1749992370606",
"134.0",
"3",
"U",
"Asia/Aden",
"airport",
"OurAirports"
],
[
"8",
"ABA",
"4",
"2955.0",
"Abakan Airport",
"Abakan",
"Russia",
"ABA",
"UNAA",
"53.7400016784668",
"91.3850021362305",
"831.0",
"7",
"N",
"Asia/Krasnoyarsk",
"airport",
"OurAirports"
],
[
"9",
"ABB",
"2",
"9825.0",
"Asaba International Airport",
"Asaba",
"Nigeria",
"ABB",
"DNAS",
"6.204167",
"6.665278",
"305.0",
"1",
"N",
"Africa/Lagos",
"airport",
"OurAirports"
],
[
"10",
"ABD",
"6",
"2097.0",
"Abadan Airport",
"Abadan",
"Iran",
"ABD",
"OIAA",
"30.371099472",
"48.2282981873",
"10.0",
"3.5",
"E",
"Asia/Tehran",
"airport",
"OurAirports"
],
[
"11",
"ABE",
"13",
"4355.0",
"Lehigh Valley International Airport",
"Allentown",
"United States",
"ABE",
"KABE",
"40.652099609375",
"-75.440803527832",
"393.0",
"-5",
"A",
"America/New_York",
"airport",
"OurAirports"
],
[
"12",
"ABI",
"2",
"3718.0",
"Abilene Regional Airport",
"Abilene",
"United States",
"ABI",
"KABI",
"32.4113006592",
"-99.6819000244",
"1791.0",
"-6",
"A",
"America/Chicago",
"airport",
"OurAirports"
],
[
"13",
"ABJ",
"50",
"253.0",
"Port Bouet Airport",
"Abidjan",
"Cote d'Ivoire",
"ABJ",
"DIAP",
"5.261390209198",
"-3.9262900352478",
"21.0",
"0",
"N",
"Africa/Abidjan",
"airport",
"OurAirports"
],
[
"14",
"ABL",
"4",
"7177.0",
"Ambler Airport",
"Ambler",
"United States",
"ABL",
"PAFM",
"67.1063",
"-157.856989",
"334.0",
"-9",
"A",
"America/Anchorage",
"airport",
"OurAirports"
],
[
"15",
"ABM",
"1",
"3318.0",
"Northern Peninsula Airport",
"Amberley",
"Australia",
"ABM",
"YBAM",
"-10.9508",
"142.459",
"34.0",
"10",
"O",
"Australia/Brisbane",
"airport",
"OurAirports"
],
[
"16",
"ABQ",
"42",
"4019.0",
"Albuquerque International Sunport",
"Albuquerque",
"United States",
"ABQ",
"KABQ",
"35.040199",
"-106.609001",
"5355.0",
"-7",
"A",
"America/Denver",
"airport",
"OurAirports"
],
[
"17",
"ABR",
"1",
"5714.0",
"Aberdeen Regional Airport",
"Aberdeen",
"United States",
"ABR",
"KABR",
"45.4491004943848",
"-98.4217987060547",
"1302.0",
"-6",
"A",
"America/Chicago",
"airport",
"OurAirports"
],
[
"18",
"ABS",
"1",
"1127.0",
"Abu Simbel Airport",
"Abu Simbel",
"Egypt",
"ABS",
"HEBL",
"22.3759994507",
"31.611700058",
"616.0",
"2",
"U",
"Africa/Cairo",
"airport",
"OurAirports"
],
[
"19",
"ABT",
"3",
"2061.0",
"Al Baha Airport",
"El-baha",
"Saudi Arabia",
"ABT",
"OEBA",
"20.2961006165",
"41.6343002319",
"5486.0",
"3",
"U",
"Asia/Riyadh",
"airport",
"OurAirports"
],
[
"20",
"ABV",
"30",
"260.0",
"Nnamdi Azikiwe International Airport",
"Abuja",
"Nigeria",
"ABV",
"DNAA",
"9.00679016113281",
"7.26316976547241",
"1123.0",
"1",
"N",
"Africa/Lagos",
"airport",
"OurAirports"
],
[
"21",
"ABX",
"4",
"3333.0",
"Albury Airport",
"Albury",
"Australia",
"ABX",
"YMAY",
"-36.067798614502",
"146.957992553711",
"539.0",
"10",
"O",
"Australia/Sydney",
"airport",
"OurAirports"
],
[
"22",
"ABY",
"4",
"5715.0",
"Southwest Georgia Regional Airport",
"Albany",
"United States",
"ABY",
"KABY",
"31.5354995727539",
"-84.1945037841797",
"197.0",
"-5",
"A",
"America/New_York",
"airport",
"OurAirports"
],
[
"23",
"ABZ",
"41",
"532.0",
"Aberdeen Dyce Airport",
"Aberdeen",
"United Kingdom",
"ABZ",
"EGPD",
"57.2019004821777",
"-2.19777989387512",
"215.0",
"0",
"E",
"Europe/London",
"airport",
"OurAirports"
],
[
"24",
"ACA",
"8",
"1783.0",
"General Juan N Alvarez International Airport",
"Acapulco",
"Mexico",
"ACA",
"MMAA",
"16.7570991516113",
"-99.7539978027344",
"16.0",
"-6",
"S",
"America/Mexico_City",
"airport",
"OurAirports"
],
[
"25",
"ACC",
"54",
"248.0",
"Kotoka International Airport",
"Accra",
"Ghana",
"ACC",
"DGAA",
"5.60518980026245",
"-0.166786000132561",
"205.0",
"0",
"N",
"Africa/Accra",
"airport",
"OurAirports"
],
[
"26",
"ACE",
"116",
"1055.0",
"Lanzarote Airport",
"Arrecife",
"Spain",
"ACE",
"GCRR",
"28.945499",
"-13.6052",
"46.0",
"0",
"E",
"Atlantic/Canary",
"airport",
"OurAirports"
],
[
"27",
"ACH",
"2",
"1679.0",
"St Gallen Altenrhein Airport",
"Altenrhein",
"Switzerland",
"ACH",
"LSZR",
"47.4850006104",
"9.56077003479",
"1306.0",
"1",
"E",
"Europe/Zurich",
"airport",
"OurAirports"
],
[
"28",
"ACI",
"2",
"497.0",
"Alderney Airport",
"Alderney",
"Guernsey",
"ACI",
"EGJA",
"49.706104",
"-2.21472",
"290.0",
"0",
"E",
"Europe/Guernsey",
"airport",
"OurAirports"
],
[
"29",
"ACK",
"6",
"3517.0",
"Nantucket Memorial Airport",
"Nantucket",
"United States",
"ACK",
"KACK",
"41.25310135",
"-70.06020355",
"47.0",
"-5",
"A",
"America/New_York",
"airport",
"OurAirports"
],
[
"30",
"ACR",
"2",
"6740.0",
"Araracuara Airport",
"Araracuara",
"Colombia",
"ACR",
"SKAC",
"-0.5833",
"-72.4083",
"1250.0",
"-5",
"U",
"America/Bogota",
"airport",
"OurAirports"
],
[
"31",
"ACT",
"2",
"3700.0",
"Waco Regional Airport",
"Waco",
"United States",
"ACT",
"KACT",
"31.6112995147705",
"-97.2304992675781",
"516.0",
"-6",
"A",
"America/Chicago",
"airport",
"OurAirports"
],
[
"32",
"ACU",
"1",
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
[
"33",
"ACV",
"3",
"4384.0",
"California Redwood Coast-Humboldt County Airport",
"Arcata CA",
"United States",
"ACV",
"KACV",
"40.978101",
"-124.109",
"221.0",
"-8",
"A",
"America/Los_Angeles",
"airport",
"OurAirports"
],
[
"34",
"ACX",
"6",
"7506.0",
"Xingyi Airport",
"Xingyi",
"China",
"ACX",
"ZUYI",
"25.0863888889",
"104.959444444",
"4150.0",
"8",
"U",
"Asia/Shanghai",
"airport",
"OurAirports"
],
[
"35",
"ACY",
"10",
"3524.0",
"Atlantic City International Airport",
"Atlantic City",
"United States",
"ACY",
"KACY",
"39.4575996398926",
"-74.5772018432617",
"75.0",
"-5",
"A",
"America/New_York",
"airport",
"OurAirports"
],
[
"36",
"ACZ",
"1",
"2165.0",
"Zabol Airport",
"Zabol",
"Iran",
"ACZ",
"OIZB",
"31.0983009338379",
"61.5438995361328",
"1628.0",
"3.5",
"E",
"Asia/Tehran",
"airport",
"OurAirports"
],
[
"37",
"ADA",
"17",
"1685.0",
"Adana Airport",
"Adana",
"Turkey",
"ADA",
"LTAF",
"36.9822006226",
"35.2803993225",
"65.0",
"3",
"E",
"Europe/Istanbul",
"airport",
"OurAirports"
],
[
"38",
"ADB",
"66",
"1706.0",
"Adnan Menderes International Airport",
"Izmir",
"Turkey",
"ADB",
"LTBJ",
"38.2924003601",
"27.156999588",
"412.0",
"3",
"E",
"Europe/Istanbul",
"airport",
"OurAirports"
],
[
"39",
"ADD",
"105",
"1107.0",
"Addis Ababa Bole International Airport",
"Addis Ababa",
"Ethiopia",
"ADD",
"HAAB",
"8.97789001465",
"38.7993011475",
"7630.0",
"3",
"U",
"Africa/Addis_Ababa",
"airport",
"OurAirports"
],
[
"40",
"ADE",
"23",
"3977.0",
"Aden International Airport",
"Aden",
"Yemen",
"ADE",
"OYAA",
"12.8295001983643",
"45.0288009643555",
"7.0",
"3",
"U",
"Asia/Aden",
"airport",
"OurAirports"
],
[
"41",
"ADF",
"2",
"5800.0",
"Adıyaman Airport",
"Adiyaman",
"Turkey",
"ADF",
"LTCP",
"37.7313995361",
"38.4688987732",
"2216.0",
"3",
"E",
"Europe/Istanbul",
"airport",
"OurAirports"
],
[
"42",
"ADK",
"1",
"5959.0",
"Adak Airport",
"Adak Island",
"United States",
"ADK",
"PADK",
"51.8779983520508",
"-176.64599609375",
"18.0",
"-10",
"A",
"America/Adak",
"airport",
"OurAirports"
],
[
"43",
"ADL",
"51",
"3341.0",
"Adelaide International Airport",
"Adelaide",
"Australia",
"ADL",
"YPAD",
"-34.945",
"138.531006",
"20.0",
"9.5",
"O",
"Australia/Adelaide",
"airport",
"OurAirports"
],
[
"44",
"ADQ",
"11",
"3531.0",
"Kodiak Airport",
"Kodiak",
"United States",
"ADQ",
"PADQ",
"57.75",
"-152.4940033",
"78.0",
"-9",
"A",
"America/Anchorage",
"airport",
"OurAirports"
],
[
"45",
"ADU",
"3",
"5935.0",
"Ardabil Airport",
"Ardabil",
"Iran",
"ADU",
"OITL",
"38.3256988525",
"48.4244003296",
"4315.0",
"3.5",
"E",
"Asia/Tehran",
"airport",
"OurAirports"
],
[
"46",
"ADZ",
"11",
"2749.0",
"Gustavo Rojas Pinilla International Airport",
"San Andres Island",
"Colombia",
"ADZ",
"SKSP",
"12.5836",
"-81.7112",
"19.0",
"-5",
"U",
"America/Bogota",
"airport",
"OurAirports"
],
[
"47",
"AEB",
"3",
"8082.0",
"Baise Youjiang Airport",
"Baise",
"China",
"AEB",
"ZGBS",
"23.7206001282",
"106.959999084",
"490.0",
"8",
"N",
"Asia/Shanghai",
"airport",
"OurAirports"
],
[
"48",
"AEP",
"65",
"2442.0",
"Jorge Newbery Airpark",
"Buenos Aires",
"Argentina",
"AEP",
"SABE",
"-34.5592",
"-58.4156",
"18.0",
"-3",
"N",
"America/Buenos_Aires",
"airport",
"OurAirports"
],
[
"49",
"AER",
"26",
"2965.0",
"Sochi International Airport",
"Sochi",
"Russia",
"AER",
"URSS",
"43.449902",
"39.9566",
"89.0",
"3",
"N",
"Europe/Moscow",
"airport",
"OurAirports"
]
],
"shape": {
"columns": 16,
"rows": 3409
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Origin</th>\n",
" <th>Number_of_Flights</th>\n",
" <th>0</th>\n",
" <th>Name</th>\n",
" <th>City</th>\n",
" <th>Country</th>\n",
" <th>IATA</th>\n",
" <th>ICAO</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" <th>Altitude</th>\n",
" <th>Timezone</th>\n",
" <th>DST</th>\n",
" <th>Tz Database</th>\n",
" <th>Type</th>\n",
" <th>Source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AAE</td>\n",
" <td>9</td>\n",
" <td>220.0</td>\n",
" <td>Rabah Bitat Airport</td>\n",
" <td>Annaba</td>\n",
" <td>Algeria</td>\n",
" <td>AAE</td>\n",
" <td>DABB</td>\n",
" <td>36.822201</td>\n",
" <td>7.809174</td>\n",
" <td>16.0</td>\n",
" <td>1</td>\n",
" <td>N</td>\n",
" <td>Africa/Algiers</td>\n",
" <td>airport</td>\n",
" <td>OurAirports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AAL</td>\n",
" <td>20</td>\n",
" <td>628.0</td>\n",
" <td>Aalborg Airport</td>\n",
" <td>Aalborg</td>\n",
" <td>Denmark</td>\n",
" <td>AAL</td>\n",
" <td>EKYT</td>\n",
" <td>57.092759</td>\n",
" <td>9.849243</td>\n",
" <td>10.0</td>\n",
" <td>1</td>\n",
" <td>E</td>\n",
" <td>Europe/Copenhagen</td>\n",
" <td>airport</td>\n",
" <td>OurAirports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>AAN</td>\n",
" <td>2</td>\n",
" <td>5937.0</td>\n",
" <td>Al Ain International Airport</td>\n",
" <td>Al Ain</td>\n",
" <td>United Arab Emirates</td>\n",
" <td>AAN</td>\n",
" <td>OMAL</td>\n",
" <td>24.261700</td>\n",
" <td>55.609200</td>\n",
" <td>869.0</td>\n",
" <td>4</td>\n",
" <td>U</td>\n",
" <td>Asia/Dubai</td>\n",
" <td>airport</td>\n",
" <td>OurAirports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>AAQ</td>\n",
" <td>3</td>\n",
" <td>4353.0</td>\n",
" <td>Anapa Vityazevo Airport</td>\n",
" <td>Anapa</td>\n",
" <td>Russia</td>\n",
" <td>AAQ</td>\n",
" <td>URKA</td>\n",
" <td>45.002102</td>\n",
" <td>37.347301</td>\n",
" <td>174.0</td>\n",
" <td>3</td>\n",
" <td>N</td>\n",
" <td>Europe/Moscow</td>\n",
" <td>airport</td>\n",
" <td>OurAirports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>AAR</td>\n",
" <td>8</td>\n",
" <td>607.0</td>\n",
" <td>Aarhus Airport</td>\n",
" <td>Aarhus</td>\n",
" <td>Denmark</td>\n",
" <td>AAR</td>\n",
" <td>EKAH</td>\n",
" <td>56.299999</td>\n",
" <td>10.619000</td>\n",
" <td>82.0</td>\n",
" <td>1</td>\n",
" <td>E</td>\n",
" <td>Europe/Copenhagen</td>\n",
" <td>airport</td>\n",
" <td>OurAirports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3404</th>\n",
" <td>ZUH</td>\n",
" <td>60</td>\n",
" <td>6355.0</td>\n",
" <td>Zhuhai Jinwan Airport</td>\n",
" <td>Zhuhai</td>\n",
" <td>China</td>\n",
" <td>ZUH</td>\n",
" <td>ZGSD</td>\n",
" <td>22.006399</td>\n",
" <td>113.375999</td>\n",
" <td>23.0</td>\n",
" <td>8</td>\n",
" <td>U</td>\n",
" <td>Asia/Shanghai</td>\n",
" <td>airport</td>\n",
" <td>OurAirports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3405</th>\n",
" <td>ZUM</td>\n",
" <td>2</td>\n",
" <td>5550.0</td>\n",
" <td>Churchill Falls Airport</td>\n",
" <td>Churchill Falls</td>\n",
" <td>Canada</td>\n",
" <td>ZUM</td>\n",
" <td>CZUM</td>\n",
" <td>53.561901</td>\n",
" <td>-64.106400</td>\n",
" <td>1442.0</td>\n",
" <td>-4</td>\n",
" <td>A</td>\n",
" <td>America/Halifax</td>\n",
" <td>airport</td>\n",
" <td>OurAirports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3406</th>\n",
" <td>ZVK</td>\n",
" <td>3</td>\n",
" <td>3118.0</td>\n",
" <td>Savannakhet Airport</td>\n",
" <td>Savannakhet</td>\n",
" <td>Laos</td>\n",
" <td>ZVK</td>\n",
" <td>VLSK</td>\n",
" <td>16.556601</td>\n",
" <td>104.760002</td>\n",
" <td>509.0</td>\n",
" <td>7</td>\n",
" <td>U</td>\n",
" <td>Asia/Vientiane</td>\n",
" <td>airport</td>\n",
" <td>OurAirports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3407</th>\n",
" <td>ZYI</td>\n",
" <td>15</td>\n",
" <td>9846.0</td>\n",
" <td>Zunyi Xinzhou Airport</td>\n",
" <td>Zunyi</td>\n",
" <td>China</td>\n",
" <td>ZYI</td>\n",
" <td>ZUZY</td>\n",
" <td>27.589500</td>\n",
" <td>107.000700</td>\n",
" <td>2920.0</td>\n",
" <td>8</td>\n",
" <td>N</td>\n",
" <td>Asia/Shanghai</td>\n",
" <td>airport</td>\n",
" <td>OurAirports</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3408</th>\n",
" <td>ZYL</td>\n",
" <td>4</td>\n",
" <td>3074.0</td>\n",
" <td>Osmany International Airport</td>\n",
" <td>Sylhet Osmani</td>\n",
" <td>Bangladesh</td>\n",
" <td>ZYL</td>\n",
" <td>VGSY</td>\n",
" <td>24.963200</td>\n",
" <td>91.866798</td>\n",
" <td>50.0</td>\n",
" <td>6</td>\n",
" <td>U</td>\n",
" <td>Asia/Dhaka</td>\n",
" <td>airport</td>\n",
" <td>OurAirports</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3409 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" Origin Number_of_Flights 0 Name \\\n",
"0 AAE 9 220.0 Rabah Bitat Airport \n",
"1 AAL 20 628.0 Aalborg Airport \n",
"2 AAN 2 5937.0 Al Ain International Airport \n",
"3 AAQ 3 4353.0 Anapa Vityazevo Airport \n",
"4 AAR 8 607.0 Aarhus Airport \n",
"... ... ... ... ... \n",
"3404 ZUH 60 6355.0 Zhuhai Jinwan Airport \n",
"3405 ZUM 2 5550.0 Churchill Falls Airport \n",
"3406 ZVK 3 3118.0 Savannakhet Airport \n",
"3407 ZYI 15 9846.0 Zunyi Xinzhou Airport \n",
"3408 ZYL 4 3074.0 Osmany International Airport \n",
"\n",
" City Country IATA ICAO Latitude Longitude \\\n",
"0 Annaba Algeria AAE DABB 36.822201 7.809174 \n",
"1 Aalborg Denmark AAL EKYT 57.092759 9.849243 \n",
"2 Al Ain United Arab Emirates AAN OMAL 24.261700 55.609200 \n",
"3 Anapa Russia AAQ URKA 45.002102 37.347301 \n",
"4 Aarhus Denmark AAR EKAH 56.299999 10.619000 \n",
"... ... ... ... ... ... ... \n",
"3404 Zhuhai China ZUH ZGSD 22.006399 113.375999 \n",
"3405 Churchill Falls Canada ZUM CZUM 53.561901 -64.106400 \n",
"3406 Savannakhet Laos ZVK VLSK 16.556601 104.760002 \n",
"3407 Zunyi China ZYI ZUZY 27.589500 107.000700 \n",
"3408 Sylhet Osmani Bangladesh ZYL VGSY 24.963200 91.866798 \n",
"\n",
" Altitude Timezone DST Tz Database Type Source \n",
"0 16.0 1 N Africa/Algiers airport OurAirports \n",
"1 10.0 1 E Europe/Copenhagen airport OurAirports \n",
"2 869.0 4 U Asia/Dubai airport OurAirports \n",
"3 174.0 3 N Europe/Moscow airport OurAirports \n",
"4 82.0 1 E Europe/Copenhagen airport OurAirports \n",
"... ... ... .. ... ... ... \n",
"3404 23.0 8 U Asia/Shanghai airport OurAirports \n",
"3405 1442.0 -4 A America/Halifax airport OurAirports \n",
"3406 509.0 7 U Asia/Vientiane airport OurAirports \n",
"3407 2920.0 8 N Asia/Shanghai airport OurAirports \n",
"3408 50.0 6 U Asia/Dhaka airport OurAirports \n",
"\n",
"[3409 rows x 16 columns]"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# First we have to merge the flights and airports datasets to get the number of flights per airport by City.\n",
"\n",
"# Make a copy of the flights dataframe to avoid modifying the original\n",
"flights_count = flights.copy()\n",
"\n",
"# Group by Origin airport and count flights\n",
"flights_count = (\n",
" flights_count.groupby(\"Origin\")\n",
" .agg(Number_of_Flights=(\"Origin\", \"count\"))\n",
" .reset_index()\n",
")\n",
"\n",
"# First, merge flights_count with airports to get city information\n",
"airports_with_flights = pd.merge(\n",
" flights_count,\n",
" airports,\n",
" left_on='Origin',\n",
" right_on='IATA',\n",
" how='left')\n",
"airports_with_flights\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "539edd39",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "Country",
"rawType": "object",
"type": "unknown"
},
{
"name": "ASCII Name",
"rawType": "object",
"type": "unknown"
},
{
"name": "Population",
"rawType": "int64",
"type": "integer"
},
{
"name": "Latitude",
"rawType": "float64",
"type": "float"
},
{
"name": "Longitude",
"rawType": "float64",
"type": "float"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "8fa05524-d0ab-465a-bc26-1ee0d3abd366",
"rows": [
[
"0",
"Peru",
"Huayllati",
"515",
"-13.92862",
"-72.48496"
],
[
"1",
"Peru",
"Duraznopampa",
"249",
"-6.59306",
"-77.80806"
],
[
"2",
"Peru",
"Kimbiri",
"4369",
"-12.61935",
"-73.78814"
],
[
"3",
"Peru",
"Urb. Santo Domingo",
"5000",
"-11.87655",
"-77.03345"
],
[
"4",
"Peru",
"Cono Norte",
"14542",
"-16.30111",
"-71.61647"
],
[
"5",
"Peru",
"Miramar",
"9614",
"-8.17945",
"-78.99381"
],
[
"6",
"Papua New Guinea",
"Kimbe",
"18847",
"-5.55085",
"150.13766"
],
[
"7",
"Philippines",
"NIA Valencia",
"83591",
"7.90639",
"125.09417"
],
[
"8",
"Philippines",
"Upper Klinan",
"3768",
"6.22722",
"125.12083"
],
[
"9",
"Philippines",
"Unidos",
"2111",
"8.5942",
"123.668"
],
[
"10",
"Philippines",
"Tumarbong",
"3261",
"10.37659",
"119.4579"
],
[
"11",
"Philippines",
"Tumalaytay",
"2340",
"12.27584",
"123.23201"
],
[
"12",
"Philippines",
"Tuhian",
"2646",
"13.6222",
"122.2178"
],
[
"13",
"Philippines",
"Tubao",
"0",
"16.3482",
"120.4122"
],
[
"14",
"Philippines",
"Topdac",
"2288",
"16.5553",
"120.7116"
],
[
"15",
"Philippines",
"Tongouson",
"6267",
"5.0214",
"120.1328"
],
[
"16",
"Philippines",
"Toledo",
"207314",
"10.3773",
"123.6386"
],
[
"17",
"Philippines",
"Tiwi",
"2269",
"10.92972",
"122.73417"
],
[
"18",
"Philippines",
"Tinutulan",
"2305",
"7.1",
"124.65"
],
[
"19",
"Philippines",
"Tinongan",
"62146",
"10.215",
"123.03528"
],
[
"20",
"Philippines",
"Tinawagan",
"2632",
"13.65",
"123.48333"
],
[
"21",
"Philippines",
"Tinambac",
"10299",
"13.8164",
"123.3261"
],
[
"22",
"Philippines",
"Tignapalan",
"4098",
"8.26389",
"124.57472"
],
[
"23",
"Philippines",
"Tigbauan",
"12659",
"10.67466",
"122.3776"
],
[
"24",
"Philippines",
"Tiblawan",
"2843",
"6.47834",
"126.10828"
],
[
"25",
"Philippines",
"Tapon",
"5245",
"10.0631",
"123.4453"
],
[
"26",
"Philippines",
"Tapia",
"2720",
"7.75417",
"126.01194"
],
[
"27",
"Philippines",
"Tandayag",
"3533",
"9.4547",
"123.2299"
],
[
"28",
"Philippines",
"Tamnag",
"6302",
"6.57298",
"124.88004"
],
[
"29",
"Philippines",
"Tambilil",
"4740",
"5.96222",
"124.66972"
],
[
"30",
"Philippines",
"Talipao",
"0",
"5.97639",
"121.11611"
],
[
"31",
"Philippines",
"Tagdanua",
"4116",
"7.2043",
"125.8841"
],
[
"32",
"Philippines",
"Tabon",
"2537",
"15.27098",
"120.91273"
],
[
"33",
"Philippines",
"Surup",
"2119",
"6.39035",
"126.14558"
],
[
"34",
"Philippines",
"Sumagui",
"3495",
"12.79532",
"121.47401"
],
[
"35",
"Philippines",
"Solano",
"36222",
"16.51918",
"121.18124"
],
[
"36",
"Philippines",
"Sison",
"3171",
"9.66028",
"125.52861"
],
[
"37",
"Philippines",
"San Vicente",
"3247",
"6.45375",
"124.78005"
],
[
"38",
"Philippines",
"Santiago",
"4939",
"14.0211",
"121.2792"
],
[
"39",
"Philippines",
"Santa Fe",
"2358",
"12.15656",
"121.99411"
],
[
"40",
"Philippines",
"Santa Fe",
"2204",
"10.2375",
"124.77194"
],
[
"41",
"Philippines",
"Santa Barbara",
"15445",
"16.0009",
"120.4023"
],
[
"42",
"Philippines",
"Santa Ana",
"47158",
"15.0955",
"120.767"
],
[
"43",
"Philippines",
"Santa",
"2283",
"17.486",
"120.4348"
],
[
"44",
"Philippines",
"San Patricio",
"3647",
"15.09775",
"120.72876"
],
[
"45",
"Philippines",
"San Nicolas",
"0",
"16.0703",
"120.7624"
],
[
"46",
"Philippines",
"San Miguel",
"2375",
"9.9869",
"124.3411"
],
[
"47",
"Philippines",
"San Jose",
"0",
"10.745",
"121.9415"
],
[
"48",
"Philippines",
"San Francisco",
"2535",
"13.89806",
"121.24806"
],
[
"49",
"Philippines",
"San Francisco",
"0",
"9.77694",
"125.42472"
]
],
"shape": {
"columns": 5,
"rows": 147035
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country</th>\n",
" <th>ASCII Name</th>\n",
" <th>Population</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Peru</td>\n",
" <td>Huayllati</td>\n",
" <td>515</td>\n",
" <td>-13.92862</td>\n",
" <td>-72.48496</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Peru</td>\n",
" <td>Duraznopampa</td>\n",
" <td>249</td>\n",
" <td>-6.59306</td>\n",
" <td>-77.80806</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Peru</td>\n",
" <td>Kimbiri</td>\n",
" <td>4369</td>\n",
" <td>-12.61935</td>\n",
" <td>-73.78814</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Peru</td>\n",
" <td>Urb. Santo Domingo</td>\n",
" <td>5000</td>\n",
" <td>-11.87655</td>\n",
" <td>-77.03345</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Peru</td>\n",
" <td>Cono Norte</td>\n",
" <td>14542</td>\n",
" <td>-16.30111</td>\n",
" <td>-71.61647</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147038</th>\n",
" <td>Hong Kong</td>\n",
" <td>Chak On Estate</td>\n",
" <td>3913</td>\n",
" <td>22.34029</td>\n",
" <td>114.16454</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147039</th>\n",
" <td>Hong Kong</td>\n",
" <td>Hing Man Estate</td>\n",
" <td>5990</td>\n",
" <td>22.26682</td>\n",
" <td>114.23270</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147040</th>\n",
" <td>Hong Kong</td>\n",
" <td>Baguio Villa</td>\n",
" <td>5339</td>\n",
" <td>22.26234</td>\n",
" <td>114.13364</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147041</th>\n",
" <td>Hong Kong</td>\n",
" <td>Tsui Chuk Garden</td>\n",
" <td>10071</td>\n",
" <td>22.34639</td>\n",
" <td>114.18766</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147042</th>\n",
" <td>Spain</td>\n",
" <td>Rabade</td>\n",
" <td>1635</td>\n",
" <td>43.11700</td>\n",
" <td>-7.61714</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>147035 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" Country ASCII Name Population Latitude Longitude\n",
"0 Peru Huayllati 515 -13.92862 -72.48496\n",
"1 Peru Duraznopampa 249 -6.59306 -77.80806\n",
"2 Peru Kimbiri 4369 -12.61935 -73.78814\n",
"3 Peru Urb. Santo Domingo 5000 -11.87655 -77.03345\n",
"4 Peru Cono Norte 14542 -16.30111 -71.61647\n",
"... ... ... ... ... ...\n",
"147038 Hong Kong Chak On Estate 3913 22.34029 114.16454\n",
"147039 Hong Kong Hing Man Estate 5990 22.26682 114.23270\n",
"147040 Hong Kong Baguio Villa 5339 22.26234 114.13364\n",
"147041 Hong Kong Tsui Chuk Garden 10071 22.34639 114.18766\n",
"147042 Spain Rabade 1635 43.11700 -7.61714\n",
"\n",
"[147035 rows x 5 columns]"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Next we do the initial cleanup for our Cities dataset\n",
"\n",
"# Split up the coordinates into latitude and longitude\n",
"cities['Latitude'] = cities['Coordinates'].str.split(',', expand=True)[0].astype(float)\n",
"cities['Longitude'] = cities['Coordinates'].str.split(',', expand=True)[1].str.strip().astype(float)\n",
"\n",
"# Apply country name mapping to standardize country names\n",
"\n",
"# Create a copy of the country_name_map dictionary for additional mappings\n",
"extended_country_map = country_name_map.copy()\n",
"\n",
"# Add specific mappings for problematic countries with commas (had to do this twice for some reason but it's ok)\n",
"problematic_countries = {\n",
" 'Taiwan, China': 'Taiwan',\n",
" 'Tanzania, United Republic of': 'Tanzania',\n",
" 'Sudan, The Republic of': 'Sudan',\n",
" \"Korea, Dem. People's Rep. of\": 'North Korea',\n",
" 'Korea, Republic of': 'South Korea',\n",
" 'Moldova, Republic of': 'Moldova',\n",
" 'Macedonia, The former Yugoslav Rep. of': 'North Macedonia',\n",
" 'Iran, Islamic Rep. of': 'Iran',\n",
" 'Congo, Democratic Republic of the': 'Democratic Republic of the Congo',\n",
" 'Venezuela, Bolivarian Rep. of': 'Venezuela',\n",
" 'South Sudan, The Republic of': 'South Sudan',\n",
" 'Macau, China': 'Macau'\n",
"}\n",
"\n",
"\n",
"\n",
"\n",
"# Update our mapping dictionary with these specific cases\n",
"extended_country_map.update(problematic_countries)\n",
"\n",
"# Now apply the extended mapping\n",
"cities[\"Country\"] = cities[\"Country name EN\"].replace(extended_country_map)\n",
"\n",
"cities_to_drop = ['New York City', 'London', 'Hong Kong']\n",
"cities = cities[~cities['ASCII Name'].isin(cities_to_drop)]\n",
"\n",
"# Create the population dataframe that will be used for matching in later cells\n",
"population_locations = cities[['Country', 'ASCII Name', 'Population', 'Latitude', 'Longitude']].copy()\n",
"\n",
"# Display the first few rows\n",
"population_locations"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "3c40d636",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"📊 Assigning using weighted score: 100%|██████████| 74/74 [00:37<00:00, 1.97it/s]\n"
]
}
],
"source": [
"DISTANCE_PARAMETER = 50 #km\n",
"\n",
"served_populations = calculate_served_population(\n",
" airports_df=airports_with_flights,\n",
" cities_df=population_locations,\n",
" distance_threshold_km=DISTANCE_PARAMETER,\n",
" device=\"cpu\",\n",
" batch_size=2000,\n",
")\n",
"\n",
"airports_with_flights['Served_Population'] = served_populations.astype(int)\n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "6ce1ee35",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "City",
"rawType": "object",
"type": "string"
},
{
"name": "Country",
"rawType": "object",
"type": "string"
},
{
"name": "Number_of_Flights",
"rawType": "int64",
"type": "integer"
},
{
"name": "Airports",
"rawType": "int64",
"type": "integer"
},
{
"name": "Latitude",
"rawType": "float64",
"type": "float"
},
{
"name": "Longitude",
"rawType": "float64",
"type": "float"
},
{
"name": "Served_Population",
"rawType": "int64",
"type": "integer"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "e9281a0f-da39-4b80-ae27-55d84e8f26a5",
"rows": [
[
"0",
"Aalborg",
"Denmark",
"20",
"1",
"57.0927589138",
"9.84924316406",
"343777"
],
[
"1",
"Aarhus",
"Denmark",
"8",
"1",
"56.2999992371",
"10.6190004349",
"563419"
],
[
"3",
"Abadan",
"Iran",
"6",
"1",
"30.371099472",
"48.2282981873",
"700786"
],
[
"4",
"Abakan",
"Russia",
"4",
"1",
"53.7400016784668",
"91.3850021362305",
"358031"
],
[
"5",
"Abbotsford",
"Canada",
"2",
"1",
"49.0252990722656",
"-122.361000061035",
"288453"
],
[
"6",
"Aberdeen",
"United Kingdom",
"41",
"1",
"57.2019004821777",
"-2.19777989387512",
"368130"
],
[
"7",
"Aberdeen",
"United States",
"1",
"1",
"45.4491004943848",
"-98.4217987060547",
"30569"
],
[
"8",
"Abha",
"Saudi Arabia",
"15",
"1",
"18.2404003143",
"42.6566009521",
"598439"
],
[
"9",
"Abidjan",
"Cote d'Ivoire",
"50",
"1",
"5.261390209198",
"-3.9262900352478",
"8510351"
],
[
"10",
"Abilene",
"United States",
"2",
"1",
"32.4113006592",
"-99.6819000244",
"139636"
],
[
"12",
"Abu Dhabi",
"United Arab Emirates",
"245",
"2",
"24.43065071105955",
"54.554599761962905",
"2277923"
],
[
"13",
"Abu Simbel",
"Egypt",
"1",
"1",
"22.3759994507",
"31.611700058",
"5000"
],
[
"14",
"Abuja",
"Nigeria",
"30",
"1",
"9.00679016113281",
"7.26316976547241",
"2967502"
],
[
"15",
"Acapulco",
"Mexico",
"8",
"1",
"16.7570991516113",
"-99.7539978027344",
"813885"
],
[
"16",
"Accra",
"Ghana",
"54",
"1",
"5.60518980026245",
"-0.166786000132561",
"3729607"
],
[
"18",
"Adana",
"Turkey",
"17",
"1",
"36.9822006226",
"35.2803993225",
"2270308"
],
[
"19",
"Addis Ababa",
"Ethiopia",
"105",
"1",
"8.97789001465",
"38.7993011475",
"4365800"
],
[
"20",
"Adelaide",
"Australia",
"51",
"1",
"-34.945",
"138.531006",
"2691219"
],
[
"21",
"Aden",
"Yemen",
"23",
"1",
"12.8295001983643",
"45.0288009643555",
"886375"
],
[
"22",
"Adiyaman",
"Turkey",
"2",
"1",
"37.7313995361",
"38.4688987732",
"359925"
],
[
"23",
"Adrar",
"Algeria",
"6",
"1",
"27.8376007080078",
"-0.186414003372192",
"68276"
],
[
"24",
"Agadir",
"Morocco",
"28",
"1",
"30.3250007629395",
"-9.41306972503662",
"1649383"
],
[
"26",
"Agartala",
"India",
"8",
"1",
"23.8869991302",
"91.2404022217",
"287229"
],
[
"27",
"Agatti Island",
"India",
"1",
"1",
"10.8236999511719",
"72.1760025024414",
"7521"
],
[
"28",
"Agen",
"France",
"2",
"1",
"44.1747016906738",
"0.590556025505066",
"204820"
],
[
"29",
"Agra",
"India",
"2",
"1",
"27.1557998657227",
"77.9608993530273",
"2826199"
],
[
"30",
"Agri",
"Turkey",
"2",
"1",
"39.654541015625",
"43.0259780883789",
"236320"
],
[
"32",
"Aguascalientes",
"Mexico",
"9",
"1",
"21.705601",
"-102.318001",
"1054038"
],
[
"34",
"Ahmedabad",
"India",
"35",
"1",
"23.0771999359",
"72.6346969604",
"7180293"
],
[
"35",
"Ahwaz",
"Iran",
"24",
"1",
"31.3374004364",
"48.7620010376",
"846410"
],
[
"37",
"Aizwal",
"India",
"6",
"1",
"23.8405990601",
"92.6196975708",
"317200"
],
[
"38",
"Ajaccio",
"France",
"39",
"1",
"41.9235992431641",
"8.8029203414917",
"88987"
],
[
"41",
"Akita",
"Japan",
"8",
"1",
"39.6156005859375",
"140.218994140625",
"531435"
],
[
"42",
"Akron",
"United States",
"22",
"1",
"40.9160995483398",
"-81.4421997070313",
"412251"
],
[
"43",
"Aksu",
"China",
"6",
"1",
"41.262501",
"80.291702",
"546917"
],
[
"44",
"Aktau",
"Kazakhstan",
"21",
"1",
"43.8601",
"51.091999",
"149328"
],
[
"45",
"Aktyubinsk",
"Kazakhstan",
"7",
"1",
"50.2458",
"57.206699",
"500757"
],
[
"47",
"Akureyri",
"Iceland",
"1",
"1",
"65.6600036621094",
"-18.0727005004883",
"21579"
],
[
"48",
"Akutan",
"United States",
"1",
"1",
"54.1337704415",
"-165.778895617",
"1040"
],
[
"49",
"Al Ain",
"United Arab Emirates",
"2",
"1",
"24.2616996765137",
"55.6091995239258",
"846747"
],
[
"51",
"Al Ghaidah Intl",
"Yemen",
"1",
"1",
"16.1916999816895",
"52.1749992370606",
"10948"
],
[
"52",
"Al Hociema",
"Morocco",
"1",
"1",
"35.1771011352539",
"-3.83951997756958",
"534402"
],
[
"53",
"Al-Jawf",
"Saudi Arabia",
"2",
"1",
"29.7851009368897",
"40.0999984741211",
"136847"
],
[
"54",
"Al-Ula",
"Saudi Arabia",
"2",
"1",
"26.48",
"38.1288888889",
"32413"
],
[
"55",
"Al-ahsa",
"Saudi Arabia",
"3",
"1",
"25.2852993011475",
"49.4851989746094",
"694996"
],
[
"57",
"Alamosa",
"United States",
"6",
"1",
"37.434898",
"-105.866997",
"18451"
],
[
"58",
"Alanya",
"Turkey",
"6",
"1",
"36.299217",
"32.300598",
"155567"
],
[
"59",
"Albany",
"Australia",
"3",
"1",
"-34.9432983398438",
"117.80899810791",
"63320"
],
[
"60",
"Albany",
"United States",
"35",
"2",
"37.141899108886705",
"-78.99810028076175",
"742897"
],
[
"61",
"Albuquerque",
"United States",
"42",
"1",
"35.040199",
"-106.609001",
"900135"
]
],
"shape": {
"columns": 7,
"rows": 2674
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>City</th>\n",
" <th>Country</th>\n",
" <th>Number_of_Flights</th>\n",
" <th>Airports</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" <th>Served_Population</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Aalborg</td>\n",
" <td>Denmark</td>\n",
" <td>20</td>\n",
" <td>1</td>\n",
" <td>57.092759</td>\n",
" <td>9.849243</td>\n",
" <td>343777</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Aarhus</td>\n",
" <td>Denmark</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>56.299999</td>\n",
" <td>10.619000</td>\n",
" <td>563419</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Abadan</td>\n",
" <td>Iran</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>30.371099</td>\n",
" <td>48.228298</td>\n",
" <td>700786</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Abakan</td>\n",
" <td>Russia</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>53.740002</td>\n",
" <td>91.385002</td>\n",
" <td>358031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Abbotsford</td>\n",
" <td>Canada</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>49.025299</td>\n",
" <td>-122.361000</td>\n",
" <td>288453</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3176</th>\n",
" <td>Zurich</td>\n",
" <td>Switzerland</td>\n",
" <td>247</td>\n",
" <td>1</td>\n",
" <td>47.464699</td>\n",
" <td>8.549170</td>\n",
" <td>3466448</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3177</th>\n",
" <td>Zweibruecken</td>\n",
" <td>Germany</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>49.209400</td>\n",
" <td>7.400560</td>\n",
" <td>276108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3178</th>\n",
" <td>Ängelholm</td>\n",
" <td>Sweden</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>56.296101</td>\n",
" <td>12.847100</td>\n",
" <td>282496</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3179</th>\n",
" <td>Çorlu</td>\n",
" <td>Turkey</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>41.138199</td>\n",
" <td>27.919100</td>\n",
" <td>448217</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3181</th>\n",
" <td>Östersund</td>\n",
" <td>Sweden</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>63.194401</td>\n",
" <td>14.500300</td>\n",
" <td>58570</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2674 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" City Country Number_of_Flights Airports Latitude \\\n",
"0 Aalborg Denmark 20 1 57.092759 \n",
"1 Aarhus Denmark 8 1 56.299999 \n",
"3 Abadan Iran 6 1 30.371099 \n",
"4 Abakan Russia 4 1 53.740002 \n",
"5 Abbotsford Canada 2 1 49.025299 \n",
"... ... ... ... ... ... \n",
"3176 Zurich Switzerland 247 1 47.464699 \n",
"3177 Zweibruecken Germany 5 1 49.209400 \n",
"3178 Ängelholm Sweden 3 1 56.296101 \n",
"3179 Çorlu Turkey 1 1 41.138199 \n",
"3181 Östersund Sweden 3 1 63.194401 \n",
"\n",
" Longitude Served_Population \n",
"0 9.849243 343777 \n",
"1 10.619000 563419 \n",
"3 48.228298 700786 \n",
"4 91.385002 358031 \n",
"5 -122.361000 288453 \n",
"... ... ... \n",
"3176 8.549170 3466448 \n",
"3177 7.400560 276108 \n",
"3178 12.847100 282496 \n",
"3179 27.919100 448217 \n",
"3181 14.500300 58570 \n",
"\n",
"[2674 rows x 7 columns]"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Group by city AND country to count airports and aggregate flight data\n",
"\n",
"flights_by_city = airports_with_flights.groupby(['City', 'Country']).agg(\n",
" Number_of_Flights=('Number_of_Flights', 'sum'), # Sum all flights from all airports in the city\n",
" Airports=('IATA', 'count'), # Count number of airports in the city\n",
" Latitude=('Latitude', 'mean'), # Average latitude of all airports in the city\n",
" Longitude=('Longitude', 'mean'), # Average longitude of all airports in the city\n",
" Served_Population=('Served_Population', 'sum')\n",
").reset_index()\n",
"\n",
"\n",
"# Now make sure the dataset conforms to our country name mapping conventions\n",
"flights_by_city['Country'] = flights_by_city['Country'].replace(country_name_map)\n",
"flights_by_city = flights_by_city[flights_by_city['Served_Population'] != 0]\n",
"flights_by_city"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "df1a7fda",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "Country",
"rawType": "object",
"type": "string"
},
{
"name": "Latitude",
"rawType": "float64",
"type": "float"
},
{
"name": "Longitude",
"rawType": "float64",
"type": "float"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "36fdee89-b686-4131-b971-9b183830142a",
"rows": [
[
"0",
"Bolivia",
"-16.3991",
"-67.719"
],
[
"1",
"Yemen",
"14.1381",
"43.9717"
],
[
"2",
"Yemen",
"16.6537",
"43.2992"
],
[
"3",
"Yemen",
"17.019",
"43.2983"
],
[
"4",
"Yemen",
"16.791",
"43.3719"
],
[
"5",
"Yemen",
"17.1439",
"43.2487"
],
[
"6",
"Yemen",
"16.9411",
"43.5175"
],
[
"7",
"Yemen",
"13.2824",
"44.2092"
],
[
"8",
"Yemen",
"13.2685",
"44.3043"
],
[
"9",
"Yemen",
"13.7161",
"44.2125"
],
[
"10",
"Yemen",
"13.658",
"43.8351"
],
[
"11",
"Argentina",
"-32.9528",
"-60.6473"
],
[
"12",
"Argentina",
"-31.4189",
"-64.498"
],
[
"13",
"Argentina",
"-42.7672",
"-65.0363"
],
[
"14",
"Argentina",
"-33.2964",
"-66.3279"
],
[
"15",
"Bahrain",
"26.2294",
"50.4806"
],
[
"16",
"Belize",
"17.25",
"-88.7667"
],
[
"17",
"Bolivia",
"-17.65",
"-62.75"
],
[
"18",
"Bolivia",
"-17.6425",
"-62.4925"
],
[
"19",
"Bolivia",
"-17.622",
"-62.2243"
],
[
"20",
"Bolivia",
"-17.6361",
"-61.7121"
],
[
"21",
"Brazil",
"-9.6658",
"-35.7353"
],
[
"22",
"Brazil",
"-19.9208",
"-43.9378"
],
[
"23",
"Brazil",
"-21.7631",
"-41.3193"
],
[
"24",
"Chile",
"-33.4569",
"-70.6483"
],
[
"25",
"Colombia",
"2.4638",
"-77.2356"
],
[
"26",
"Colombia",
"4.572",
"-74.1124"
],
[
"27",
"Democratic Republic of Congo",
"0.1447",
"29.3352"
],
[
"28",
"Democratic Republic of Congo",
"-1.6459",
"29.2016"
],
[
"29",
"Democratic Republic of Congo",
"-2.3554",
"28.7932"
],
[
"30",
"Democratic Republic of Congo",
"-2.097",
"28.905"
],
[
"31",
"Ethiopia",
"11.2212",
"37.6491"
],
[
"32",
"Ethiopia",
"12.9521",
"36.1563"
],
[
"33",
"Ethiopia",
"11.3125",
"39.6791"
],
[
"34",
"Ethiopia",
"11.6",
"37.3833"
],
[
"35",
"Ghana",
"9.0345",
"-2.4834"
],
[
"36",
"Ghana",
"6.6936",
"-1.6218"
],
[
"37",
"Ghana",
"5.9144",
"0.0262"
],
[
"38",
"Honduras",
"14.839",
"-87.2842"
],
[
"39",
"Honduras",
"15.6144",
"-87.953"
],
[
"40",
"Honduras",
"15.7597",
"-86.7822"
],
[
"41",
"Honduras",
"14.0818",
"-87.2068"
],
[
"42",
"Iran",
"29.5791",
"50.517"
],
[
"43",
"Iran",
"29.5791",
"50.517"
],
[
"44",
"Iran",
"34.7992",
"48.5146"
],
[
"45",
"Iran",
"33.3066",
"47.0371"
],
[
"46",
"Iran",
"32.8663",
"59.2211"
],
[
"47",
"Iran",
"36.6764",
"48.4963"
],
[
"48",
"Iraq",
"36.1926",
"44.0106"
],
[
"49",
"Iraq",
"33.754",
"44.6052"
]
],
"shape": {
"columns": 3,
"rows": 428678
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Bolivia</td>\n",
" <td>-16.3991</td>\n",
" <td>-67.7190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Yemen</td>\n",
" <td>14.1381</td>\n",
" <td>43.9717</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Yemen</td>\n",
" <td>16.6537</td>\n",
" <td>43.2992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Yemen</td>\n",
" <td>17.0190</td>\n",
" <td>43.2983</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Yemen</td>\n",
" <td>16.7910</td>\n",
" <td>43.3719</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>428673</th>\n",
" <td>India</td>\n",
" <td>32.8083</td>\n",
" <td>74.8958</td>\n",
" </tr>\n",
" <tr>\n",
" <th>428674</th>\n",
" <td>South Africa</td>\n",
" <td>-33.9502</td>\n",
" <td>18.5545</td>\n",
" </tr>\n",
" <tr>\n",
" <th>428675</th>\n",
" <td>France</td>\n",
" <td>-17.5343</td>\n",
" <td>-149.5654</td>\n",
" </tr>\n",
" <tr>\n",
" <th>428676</th>\n",
" <td>Israel</td>\n",
" <td>31.7690</td>\n",
" <td>35.2163</td>\n",
" </tr>\n",
" <tr>\n",
" <th>428677</th>\n",
" <td>Israel</td>\n",
" <td>31.7690</td>\n",
" <td>35.2163</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>428678 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Country Latitude Longitude\n",
"0 Bolivia -16.3991 -67.7190\n",
"1 Yemen 14.1381 43.9717\n",
"2 Yemen 16.6537 43.2992\n",
"3 Yemen 17.0190 43.2983\n",
"4 Yemen 16.7910 43.3719\n",
"... ... ... ...\n",
"428673 India 32.8083 74.8958\n",
"428674 South Africa -33.9502 18.5545\n",
"428675 France -17.5343 -149.5654\n",
"428676 Israel 31.7690 35.2163\n",
"428677 Israel 31.7690 35.2163\n",
"\n",
"[428678 rows x 3 columns]"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# And the same for ACLED\n",
"\n",
"acled.loc[acled[\"admin1\"] == \"Hong Kong\", \"country\"] = \"Hong Kong\" \n",
"acled[\"Country\"] = acled[\"country\"].replace(country_name_map)\n",
"acled[\"Latitude\"] = acled[\"latitude\"]\n",
"acled[\"Longitude\"] = acled[\"longitude\"]\n",
"acled_filtered = acled[[\"Country\", \"Latitude\", \"Longitude\"]]\n",
"acled_filtered"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "67984435",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"📍 Assigning events to cities: 100%|██████████| 43/43 [00:04<00:00, 9.65it/s]\n"
]
}
],
"source": [
"# Constants\n",
"\n",
"DISTANCE_PARAMETER = 50 #km\n",
"\n",
"flights_events = assign_events_to_cities(flights_by_city, acled_filtered, \n",
" DISTANCE_PARAMETER, device=\"cpu\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60cf0dbb",
"metadata": {},
"outputs": [],
"source": [
"wb = wb[wb[\"2019 [YR2019]\"] != \"..\"] # Filter out rows with missing 2019 data\n",
"wb = wb[wb[\"Series Name\"].isin([\"Unemployment, total (% of total labor force) (modeled ILO estimate)\",\n",
" \"Population, total\",\n",
" \"GDP per capita (current US$)\",\n",
" \"Land area (sq. km)\"])] # Filter for required indicators\n",
"wb = wb.pivot(index=\"Country Name\", columns=\"Series Name\", values=\"2019 [YR2019]\") # Pivot the table\n",
"wb.reset_index(inplace=True) # Reset index for cleaner DataFrame\n",
"\n",
"# Clean and prepare World Bank data\n",
"# Converting string values to numeric for calculations\n",
"wb['GDP per capita (current US$)'] = pd.to_numeric(wb['GDP per capita (current US$)'], errors='coerce')\n",
"wb['Land area (sq. km)'] = pd.to_numeric(wb['Land area (sq. km)'], errors='coerce')\n",
"wb['Unemployment, total (% of total labor force) (modeled ILO estimate)'] = pd.to_numeric(wb['Unemployment, total (% of total labor force) (modeled ILO estimate)'], errors='coerce')\n",
"\n",
"wb['Country Name'] = wb['Country Name'].replace(country_name_map)\n",
"\n",
"gdp_map = wb.set_index('Country Name')['GDP per capita (current US$)'].to_dict()\n",
"land_area_map = wb.set_index('Country Name')['Land area (sq. km)'].to_dict()\n",
"unemployment_map = wb.set_index('Country Name')['Unemployment, total (% of total labor force) (modeled ILO estimate)'].to_dict()\n",
"\n",
"flights_events_wb = flights_events.copy()\n",
"\n",
"flights_events_wb['GDP_per_capita'] = flights_events['Country'].map(gdp_map)\n",
"flights_events_wb['Land_area'] = flights_events['Country'].map(land_area_map)\n",
"flights_events_wb['Unemployment'] = flights_events['Country'].map(unemployment_map)\n"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "7edbc9e3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_17006/2197385407.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" vdem['country_name'] = vdem['country_name'].replace(country_name_map)\n"
]
}
],
"source": [
"vdem = vdem[vdem['year'] == 2021]\n",
"\n",
"# Apply the mapping to the Country column\n",
"vdem['country_name'] = vdem['country_name'].replace(country_name_map)\n",
"\n",
"vdem_rename_map = {\n",
" 'v2x_regime': 'Regime Type',\n",
" 'v2x_polyarchy': 'Electoral Democracy Index',\n",
" 'v2x_libdem': 'Liberal Democracy Index',\n",
" 'v2x_egaldem': 'Egalitarian Democracy Index',\n",
" 'v2x_freexp_altinf': 'Freedom of Expression & Alt Info',\n",
" 'v2xcl_rol': 'Rule of Law Index',\n",
" 'v2xcs_ccsi': 'Core Civil Society Index',\n",
" 'v2x_cspart': 'Civil Society Participation',\n",
" 'v2x_clphy': 'Freedom from Political Violence',\n",
" 'v2xps_party': 'Party System Institutionalization',\n",
" 'v2x_execorr': 'Executive Corruption Index',\n",
" 'v2x_corr': 'Political Corruption Index',\n",
" 'v2x_suffr': 'Suffrage'\n",
"}\n",
"vdem = vdem.rename(columns=vdem_rename_map)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "957d1a67",
"metadata": {},
"outputs": [],
"source": [
"freedom_expr_map = vdem.set_index('country_name')['Freedom of Expression & Alt Info'].to_dict()\n",
"civil_society_map = vdem.set_index('country_name')['Core Civil Society Index'].to_dict()\n",
"flights_events_vdem = flights_events_wb.copy()\n",
"flights_events_vdem['Freedom_of_Expression'] = flights_events_vdem['Country'].map(freedom_expr_map)\n",
"flights_events_vdem['Civil_Society_Index'] = flights_events_vdem['Country'].map(civil_society_map)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "6d2abf8b",
"metadata": {},
"outputs": [],
"source": [
"hdi = hdi.pivot(index=[\"country\"], columns=\"indicator\", values=\"value\").reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22af0ad6",
"metadata": {},
"outputs": [],
"source": [
"# Create a dictionary mapping countries to their HDI values\n",
"hdi_map = hdi.set_index('country')['Human Development Index (value)'].to_dict()\n",
"life_expectancy_map = hdi.set_index('country')['Life Expectancy at Birth (years)'].to_dict()\n",
"schooling_map = hdi.set_index('country')['Mean Years of Schooling (years)'].to_dict()\n",
"\n",
"# Replace country names in the maps if needed\n",
"for old_name, new_name in country_name_map.items():\n",
" if old_name in hdi_map:\n",
" hdi_map[new_name] = hdi_map.pop(old_name)\n",
" if old_name in life_expectancy_map:\n",
" life_expectancy_map[new_name] = life_expectancy_map.pop(old_name)\n",
" if old_name in schooling_map:\n",
" schooling_map[new_name] = schooling_map.pop(old_name)\n",
"\n",
"flights_events_hdi = flights_events_vdem.copy()\n",
"\n",
"# Add HDI, Life Expectancy, and Mean Years of Schooling to cities dataframe\n",
"flights_events_hdi['HDI'] = flights_events_hdi['Country'].map(hdi_map)\n",
"flights_events_hdi['Life_Expectancy'] = flights_events_hdi['Country'].map(life_expectancy_map)\n",
"flights_events_hdi['Mean_Schooling_Years'] = flights_events_hdi['Country'].map(schooling_map)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "708e9daa",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "City",
"rawType": "object",
"type": "string"
},
{
"name": "Country",
"rawType": "object",
"type": "string"
},
{
"name": "Number_of_Flights",
"rawType": "int64",
"type": "integer"
},
{
"name": "Served_Population",
"rawType": "int64",
"type": "integer"
},
{
"name": "Number_of_Events",
"rawType": "int64",
"type": "integer"
},
{
"name": "GDP_per_capita",
"rawType": "float64",
"type": "float"
},
{
"name": "Land_area",
"rawType": "float64",
"type": "float"
},
{
"name": "Unemployment",
"rawType": "float64",
"type": "float"
},
{
"name": "Freedom_of_Expression",
"rawType": "float64",
"type": "float"
},
{
"name": "Civil_Society_Index",
"rawType": "float64",
"type": "float"
},
{
"name": "HDI",
"rawType": "float64",
"type": "float"
},
{
"name": "Life_Expectancy",
"rawType": "float64",
"type": "float"
},
{
"name": "Mean_Schooling_Years",
"rawType": "float64",
"type": "float"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "96aae5a8-bf6e-4261-b206-2470ef0e566b",
"rows": [
[
"0",
"Aalborg",
"Denmark",
"20",
"343777",
"57",
"63064.6343127366",
"1396.0",
null,
"0.985",
"0.969",
"0.959",
"81.291",
"13.0273206"
],
[
"1",
"Aarhus",
"Denmark",
"8",
"563419",
"110",
"63064.6343127366",
"1396.0",
null,
"0.985",
"0.969",
"0.959",
"81.291",
"13.0273206"
],
[
"3",
"Abadan",
"Iran",
"6",
"700786",
"66",
"3831.27894763337",
"1622500.0",
"10.74",
"0.166",
"0.081",
"0.793",
"76.799",
"10.84671198"
],
[
"4",
"Abakan",
"Russia",
"4",
"358031",
"2",
"11447.701171875",
"16376870.0",
"4.513",
"0.267",
"0.246",
"0.826",
"72.517",
"12.40999985"
],
[
"5",
"Abbotsford",
"Canada",
"2",
"288453",
"17",
"46352.8693445211",
"8965590.0",
"5.69",
"0.939",
"0.874",
"0.935",
"81.249",
"13.86999989"
],
[
"6",
"Aberdeen",
"United Kingdom",
"41",
"368130",
"18",
"42662.5353740311",
"241930.0",
"3.613",
"0.941",
"0.909",
"0.946",
"81.074",
"13.48999977"
],
[
"7",
"Aberdeen",
"United States",
"1",
"30569",
"0",
"65604.6815198734",
"9147420.0",
"3.669",
"0.931",
"0.969",
"0.93",
"77.979",
"13.90999985"
],
[
"8",
"Abha",
"Saudi Arabia",
"15",
"598439",
"0",
"27892.8421521179",
"2149690.0",
"5.636",
"0.088",
"0.121",
"0.893",
"77.31",
"11.60642778"
],
[
"9",
"Abidjan",
"Cote d'Ivoire",
"50",
"8510351",
"87",
"2141.76836170115",
"318000.0",
"2.401",
"0.652",
"0.827",
"0.565",
"61.562",
"4.869999886"
],
[
"10",
"Abilene",
"United States",
"2",
"139636",
"2",
"65604.6815198734",
"9147420.0",
"3.669",
"0.931",
"0.969",
"0.93",
"77.979",
"13.90999985"
],
[
"12",
"Abu Dhabi",
"United Arab Emirates",
"245",
"2277923",
"1",
"44251.4541390148",
"71020.0",
"2.331",
"0.054",
"0.159",
"0.921",
"80.487",
"12.98999977"
],
[
"13",
"Abu Simbel",
"Egypt",
"1",
"5000",
"0",
"2962.98892023003",
"995450.0",
"7.851",
"0.151",
"0.135",
"0.751",
"71.01",
"10.13611962"
],
[
"14",
"Abuja",
"Nigeria",
"30",
"2967502",
"482",
"2265.15520863056",
"910770.0",
"5.206",
"0.866",
"0.764",
"0.557",
"54.079",
"7.585969925"
],
[
"15",
"Acapulco",
"Mexico",
"8",
"813885",
"1027",
"10369.5553568761",
"1943950.0",
"3.477",
"0.76",
"0.727",
"0.783",
"73.973",
"9.349510934"
],
[
"16",
"Accra",
"Ghana",
"54",
"3729607",
"227",
"2186.18924111824",
"227533.0",
"3.16",
"0.864",
"0.914",
"0.625",
"65.246",
"7.107210159"
],
[
"18",
"Adana",
"Turkey",
"17",
"2270308",
"443",
"9215.44049888114",
"769630.0",
"13.73",
"0.189",
"0.228",
"0.853",
"77.591",
"8.98624366"
],
[
"19",
"Addis Ababa",
"Ethiopia",
"105",
"4365800",
"69",
"828.70897229725",
"1128570.9",
"2.917",
"0.397",
"0.655",
"0.494",
"66.897",
"2.402539813"
],
[
"20",
"Adelaide",
"Australia",
"51",
"2691219",
"110",
"54972.7017885437",
"7692020.0",
"5.159",
"0.906",
"0.9",
"0.952",
"82.766",
"12.86999989"
],
[
"21",
"Aden",
"Yemen",
"23",
"886375",
"490",
"623.376164898164",
"527970.0",
"17.202",
"0.149",
"0.248",
"0.466",
"67.952",
"5.267992865"
],
[
"22",
"Adiyaman",
"Turkey",
"2",
"359925",
"70",
"9215.44049888114",
"769630.0",
"13.73",
"0.189",
"0.228",
"0.853",
"77.591",
"8.98624366"
],
[
"23",
"Adrar",
"Algeria",
"6",
"68276",
"2",
"4468.45341883656",
"2381741.0",
"12.259",
"0.504",
"0.191",
"0.761",
"76.129",
"7.416386987"
],
[
"24",
"Agadir",
"Morocco",
"28",
"1649383",
"449",
"3508.09765625",
"446300.0",
"9.194",
"0.589",
"0.443",
"0.704",
"75.161",
"6.198"
],
[
"26",
"Agartala",
"India",
"8",
"287229",
"640",
"2041.42863698585",
"2973190.0",
"6.51",
"0.462",
"0.418",
"0.676",
"71.698",
"6.570000172"
],
[
"27",
"Agatti Island",
"India",
"1",
"7521",
"0",
"2041.42863698585",
"2973190.0",
"6.51",
"0.462",
"0.418",
"0.676",
"71.698",
"6.570000172"
],
[
"28",
"Agen",
"France",
"2",
"204820",
"114",
"33411.3358835672",
"18280.0",
"10.88",
"0.971",
"0.835",
"0.916",
"82.475",
"11.7656677"
],
[
"29",
"Agra",
"India",
"2",
"2826199",
"469",
"2041.42863698585",
"2973190.0",
"6.51",
"0.462",
"0.418",
"0.676",
"71.698",
"6.570000172"
],
[
"30",
"Agri",
"Turkey",
"2",
"236320",
"55",
"9215.44049888114",
"769630.0",
"13.73",
"0.189",
"0.228",
"0.853",
"77.591",
"8.98624366"
],
[
"32",
"Aguascalientes",
"Mexico",
"9",
"1054038",
"80",
"10369.5553568761",
"1943950.0",
"3.477",
"0.76",
"0.727",
"0.783",
"73.973",
"9.349510934"
],
[
"34",
"Ahmedabad",
"India",
"35",
"7180293",
"154",
"2041.42863698585",
"2973190.0",
"6.51",
"0.462",
"0.418",
"0.676",
"71.698",
"6.570000172"
],
[
"35",
"Ahwaz",
"Iran",
"24",
"846410",
"444",
"3831.27894763337",
"1622500.0",
"10.74",
"0.166",
"0.081",
"0.793",
"76.799",
"10.84671198"
],
[
"37",
"Aizwal",
"India",
"6",
"317200",
"56",
"2041.42863698585",
"2973190.0",
"6.51",
"0.462",
"0.418",
"0.676",
"71.698",
"6.570000172"
],
[
"38",
"Ajaccio",
"France",
"39",
"88987",
"121",
"33411.3358835672",
"18280.0",
"10.88",
"0.971",
"0.835",
"0.916",
"82.475",
"11.7656677"
],
[
"41",
"Akita",
"Japan",
"8",
"531435",
"8",
"40415.9567649547",
"364500.0",
"2.351",
"0.848",
"0.955",
"0.921",
"84.054",
"12.68404671"
],
[
"42",
"Akron",
"United States",
"22",
"412251",
"56",
"65604.6815198734",
"9147420.0",
"3.669",
"0.931",
"0.969",
"0.93",
"77.979",
"13.90999985"
],
[
"43",
"Aksu",
"China",
"6",
"546917",
"2",
"10143.8602206165",
"9388210.0",
"4.56",
"0.059",
"0.08",
"0.796",
"78.202",
"8.036181597"
],
[
"44",
"Aktau",
"Kazakhstan",
"21",
"149328",
"117",
"9457.12614657136",
"2699700.0",
"4.8",
"0.337",
"0.376",
"0.831",
"73.503",
"12.54717423"
],
[
"45",
"Aktyubinsk",
"Kazakhstan",
"7",
"500757",
"73",
"9457.12614657136",
"2699700.0",
"4.8",
"0.337",
"0.376",
"0.831",
"73.503",
"12.54717423"
],
[
"47",
"Akureyri",
"Iceland",
"1",
"21579",
"4",
"68452.23622306",
"100830.0",
"3.507",
"0.927",
"0.927",
"0.964",
"81.588",
"13.90892628"
],
[
"48",
"Akutan",
"United States",
"1",
"1040",
"0",
"65604.6815198734",
"9147420.0",
"3.669",
"0.931",
"0.969",
"0.93",
"77.979",
"13.90999985"
],
[
"49",
"Al Ain",
"United Arab Emirates",
"2",
"846747",
"0",
"44251.4541390148",
"71020.0",
"2.331",
"0.054",
"0.159",
"0.921",
"80.487",
"12.98999977"
],
[
"51",
"Al Ghaidah Intl",
"Yemen",
"1",
"10948",
"15",
"623.376164898164",
"527970.0",
"17.202",
"0.149",
"0.248",
"0.466",
"67.952",
"5.267992865"
],
[
"52",
"Al Hociema",
"Morocco",
"1",
"534402",
"60",
"3508.09765625",
"446300.0",
"9.194",
"0.589",
"0.443",
"0.704",
"75.161",
"6.198"
],
[
"53",
"Al-Jawf",
"Saudi Arabia",
"2",
"136847",
"0",
"27892.8421521179",
"2149690.0",
"5.636",
"0.088",
"0.121",
"0.893",
"77.31",
"11.60642778"
],
[
"54",
"Al-Ula",
"Saudi Arabia",
"2",
"32413",
"0",
"27892.8421521179",
"2149690.0",
"5.636",
"0.088",
"0.121",
"0.893",
"77.31",
"11.60642778"
],
[
"55",
"Al-ahsa",
"Saudi Arabia",
"3",
"694996",
"0",
"27892.8421521179",
"2149690.0",
"5.636",
"0.088",
"0.121",
"0.893",
"77.31",
"11.60642778"
],
[
"57",
"Alamosa",
"United States",
"6",
"18451",
"0",
"65604.6815198734",
"9147420.0",
"3.669",
"0.931",
"0.969",
"0.93",
"77.979",
"13.90999985"
],
[
"58",
"Alanya",
"Turkey",
"6",
"155567",
"19",
"9215.44049888114",
"769630.0",
"13.73",
"0.189",
"0.228",
"0.853",
"77.591",
"8.98624366"
],
[
"59",
"Albany",
"Australia",
"3",
"63320",
"4",
"54972.7017885437",
"7692020.0",
"5.159",
"0.906",
"0.9",
"0.952",
"82.766",
"12.86999989"
],
[
"60",
"Albany",
"United States",
"35",
"742897",
"1",
"65604.6815198734",
"9147420.0",
"3.669",
"0.931",
"0.969",
"0.93",
"77.979",
"13.90999985"
],
[
"61",
"Albuquerque",
"United States",
"42",
"900135",
"87",
"65604.6815198734",
"9147420.0",
"3.669",
"0.931",
"0.969",
"0.93",
"77.979",
"13.90999985"
]
],
"shape": {
"columns": 13,
"rows": 2674
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>City</th>\n",
" <th>Country</th>\n",
" <th>Number_of_Flights</th>\n",
" <th>Served_Population</th>\n",
" <th>Number_of_Events</th>\n",
" <th>GDP_per_capita</th>\n",
" <th>Land_area</th>\n",
" <th>Unemployment</th>\n",
" <th>Freedom_of_Expression</th>\n",
" <th>Civil_Society_Index</th>\n",
" <th>HDI</th>\n",
" <th>Life_Expectancy</th>\n",
" <th>Mean_Schooling_Years</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Aalborg</td>\n",
" <td>Denmark</td>\n",
" <td>20</td>\n",
" <td>343777</td>\n",
" <td>57</td>\n",
" <td>63064.634313</td>\n",
" <td>1396.00</td>\n",
" <td>NaN</td>\n",
" <td>0.985</td>\n",
" <td>0.969</td>\n",
" <td>0.959</td>\n",
" <td>81.291</td>\n",
" <td>13.027321</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Aarhus</td>\n",
" <td>Denmark</td>\n",
" <td>8</td>\n",
" <td>563419</td>\n",
" <td>110</td>\n",
" <td>63064.634313</td>\n",
" <td>1396.00</td>\n",
" <td>NaN</td>\n",
" <td>0.985</td>\n",
" <td>0.969</td>\n",
" <td>0.959</td>\n",
" <td>81.291</td>\n",
" <td>13.027321</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Abadan</td>\n",
" <td>Iran</td>\n",
" <td>6</td>\n",
" <td>700786</td>\n",
" <td>66</td>\n",
" <td>3831.278948</td>\n",
" <td>1622500.00</td>\n",
" <td>10.740</td>\n",
" <td>0.166</td>\n",
" <td>0.081</td>\n",
" <td>0.793</td>\n",
" <td>76.799</td>\n",
" <td>10.846712</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Abakan</td>\n",
" <td>Russia</td>\n",
" <td>4</td>\n",
" <td>358031</td>\n",
" <td>2</td>\n",
" <td>11447.701172</td>\n",
" <td>16376870.00</td>\n",
" <td>4.513</td>\n",
" <td>0.267</td>\n",
" <td>0.246</td>\n",
" <td>0.826</td>\n",
" <td>72.517</td>\n",
" <td>12.410000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Abbotsford</td>\n",
" <td>Canada</td>\n",
" <td>2</td>\n",
" <td>288453</td>\n",
" <td>17</td>\n",
" <td>46352.869345</td>\n",
" <td>8965590.00</td>\n",
" <td>5.690</td>\n",
" <td>0.939</td>\n",
" <td>0.874</td>\n",
" <td>0.935</td>\n",
" <td>81.249</td>\n",
" <td>13.870000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3176</th>\n",
" <td>Zurich</td>\n",
" <td>Switzerland</td>\n",
" <td>247</td>\n",
" <td>3466448</td>\n",
" <td>61</td>\n",
" <td>84121.931030</td>\n",
" <td>39509.63</td>\n",
" <td>4.394</td>\n",
" <td>0.978</td>\n",
" <td>0.954</td>\n",
" <td>0.966</td>\n",
" <td>83.200</td>\n",
" <td>13.949121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3177</th>\n",
" <td>Zweibruecken</td>\n",
" <td>Germany</td>\n",
" <td>5</td>\n",
" <td>276108</td>\n",
" <td>61</td>\n",
" <td>47623.865607</td>\n",
" <td>349390.00</td>\n",
" <td>3.163</td>\n",
" <td>0.955</td>\n",
" <td>0.897</td>\n",
" <td>0.955</td>\n",
" <td>80.580</td>\n",
" <td>14.296372</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3178</th>\n",
" <td>Ängelholm</td>\n",
" <td>Sweden</td>\n",
" <td>3</td>\n",
" <td>282496</td>\n",
" <td>21</td>\n",
" <td>51773.046456</td>\n",
" <td>407280.00</td>\n",
" <td>6.833</td>\n",
" <td>0.969</td>\n",
" <td>0.967</td>\n",
" <td>0.959</td>\n",
" <td>83.046</td>\n",
" <td>12.740326</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3179</th>\n",
" <td>Çorlu</td>\n",
" <td>Turkey</td>\n",
" <td>1</td>\n",
" <td>448217</td>\n",
" <td>85</td>\n",
" <td>9215.440499</td>\n",
" <td>769630.00</td>\n",
" <td>13.730</td>\n",
" <td>0.189</td>\n",
" <td>0.228</td>\n",
" <td>0.853</td>\n",
" <td>77.591</td>\n",
" <td>8.986244</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3181</th>\n",
" <td>Östersund</td>\n",
" <td>Sweden</td>\n",
" <td>3</td>\n",
" <td>58570</td>\n",
" <td>18</td>\n",
" <td>51773.046456</td>\n",
" <td>407280.00</td>\n",
" <td>6.833</td>\n",
" <td>0.969</td>\n",
" <td>0.967</td>\n",
" <td>0.959</td>\n",
" <td>83.046</td>\n",
" <td>12.740326</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2674 rows × 13 columns</p>\n",
"</div>"
],
"text/plain": [
" City Country Number_of_Flights Served_Population \\\n",
"0 Aalborg Denmark 20 343777 \n",
"1 Aarhus Denmark 8 563419 \n",
"3 Abadan Iran 6 700786 \n",
"4 Abakan Russia 4 358031 \n",
"5 Abbotsford Canada 2 288453 \n",
"... ... ... ... ... \n",
"3176 Zurich Switzerland 247 3466448 \n",
"3177 Zweibruecken Germany 5 276108 \n",
"3178 Ängelholm Sweden 3 282496 \n",
"3179 Çorlu Turkey 1 448217 \n",
"3181 Östersund Sweden 3 58570 \n",
"\n",
" Number_of_Events GDP_per_capita Land_area Unemployment \\\n",
"0 57 63064.634313 1396.00 NaN \n",
"1 110 63064.634313 1396.00 NaN \n",
"3 66 3831.278948 1622500.00 10.740 \n",
"4 2 11447.701172 16376870.00 4.513 \n",
"5 17 46352.869345 8965590.00 5.690 \n",
"... ... ... ... ... \n",
"3176 61 84121.931030 39509.63 4.394 \n",
"3177 61 47623.865607 349390.00 3.163 \n",
"3178 21 51773.046456 407280.00 6.833 \n",
"3179 85 9215.440499 769630.00 13.730 \n",
"3181 18 51773.046456 407280.00 6.833 \n",
"\n",
" Freedom_of_Expression Civil_Society_Index HDI Life_Expectancy \\\n",
"0 0.985 0.969 0.959 81.291 \n",
"1 0.985 0.969 0.959 81.291 \n",
"3 0.166 0.081 0.793 76.799 \n",
"4 0.267 0.246 0.826 72.517 \n",
"5 0.939 0.874 0.935 81.249 \n",
"... ... ... ... ... \n",
"3176 0.978 0.954 0.966 83.200 \n",
"3177 0.955 0.897 0.955 80.580 \n",
"3178 0.969 0.967 0.959 83.046 \n",
"3179 0.189 0.228 0.853 77.591 \n",
"3181 0.969 0.967 0.959 83.046 \n",
"\n",
" Mean_Schooling_Years \n",
"0 13.027321 \n",
"1 13.027321 \n",
"3 10.846712 \n",
"4 12.410000 \n",
"5 13.870000 \n",
"... ... \n",
"3176 13.949121 \n",
"3177 14.296372 \n",
"3178 12.740326 \n",
"3179 8.986244 \n",
"3181 12.740326 \n",
"\n",
"[2674 rows x 13 columns]"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clean_data = flights_events_hdi.drop(columns=[\"Longitude\", \"Latitude\", \"Airports\"])\n",
"\n",
"taiwan_data = {\n",
" 'GDP_per_capita': 33000,\n",
" 'Land_area': 36197,\n",
" 'Unemployment': 3.4,\n",
" 'Freedom_of_Expression': 0.80,\n",
" 'Civil_Society_Index': 0.82,\n",
" 'HDI': 0.915,\n",
" 'Life_Expectancy': 80.5,\n",
" 'Mean_Schooling_Years': 12.3\n",
"}\n",
"\n",
"for col, val in taiwan_data.items():\n",
" clean_data.loc[clean_data['Country'] == 'Taiwan', col] = val\n",
"\n",
"clean_data"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "3.12.9",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}