final

2025-09-26 00:11:14 -05:00
commit 3f6cd4ba59
14 changed files with 688056 additions and 0 deletions
--- a/Data/acled.csv
+++ b/Data/acled.csv
--- a/Data/airports.csv
+++ b/Data/airports.csv
--- a/Data/cities.csv
+++ b/Data/cities.csv
--- a/Data/hdi.xlsx
+++ b/Data/hdi.xlsx
--- a/Data/routes.csv
+++ b/Data/routes.csv
--- a/Data/vdem.csv
+++ b/Data/vdem.csv
--- a/Data/wb.csv
+++ b/Data/wb.csv
--- a/FINAL_r_reg_final.ipynb
+++ b/FINAL_r_reg_final.ipynb
@@ -0,0 +1,362 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d0c8ee28",
   "metadata": {
    "vscode": {
     "languageId": "r"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "corrplot 0.95 loaded\n",
      "\n"
     ]
    }
   ],
   "source": [
    "library(pscl)\n",
    "library(MASS)\n",
    "library(dplyr)\n",
    "library(car)\n",
    "library(corrplot)\n",
    "df <- read.csv(\"clean_data.csv\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b2731cb4",
   "metadata": {
    "vscode": {
     "languageId": "r"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                     [,1]\n",
      "Number_of_Flights_Mean       2.435639e+01\n",
      "Number_of_Flights_Median     5.000000e+00\n",
      "Number_of_Flights_SD         6.742273e+01\n",
      "Number_of_Flights_Min        1.000000e+00\n",
      "Number_of_Flights_Max        1.226000e+03\n",
      "Served_Population_Mean       9.937426e+05\n",
      "Served_Population_Median     2.699935e+05\n",
      "Served_Population_SD         2.479144e+06\n",
      "Served_Population_Min        2.300000e+01\n",
      "Served_Population_Max        3.372453e+07\n",
      "Number_of_Events_Mean        9.763500e+01\n",
      "Number_of_Events_Median      1.700000e+01\n",
      "Number_of_Events_SD          2.590523e+02\n",
      "Number_of_Events_Min         0.000000e+00\n",
      "Number_of_Events_Max         6.079000e+03\n",
      "GDP_per_capita_Mean          2.690592e+04\n",
      "GDP_per_capita_Median        1.144770e+04\n",
      "GDP_per_capita_SD            2.487818e+04\n",
      "GDP_per_capita_Min           2.102365e+02\n",
      "GDP_per_capita_Max           1.127264e+05\n",
      "Land_area_Mean               4.148564e+06\n",
      "Land_area_Median             1.557258e+06\n",
      "Land_area_SD                 4.617248e+06\n",
      "Land_area_Min                2.000000e+01\n",
      "Land_area_Max                1.637687e+07\n",
      "Unemployment_Mean            6.307765e+00\n",
      "Unemployment_Median          4.560000e+00\n",
      "Unemployment_SD              4.294981e+00\n",
      "Unemployment_Min             1.000000e-01\n",
      "Unemployment_Max             2.846800e+01\n",
      "Freedom_of_Expression_Mean   6.868388e-01\n",
      "Freedom_of_Expression_Median 8.080000e-01\n",
      "Freedom_of_Expression_SD     3.023572e-01\n",
      "Freedom_of_Expression_Min    1.200000e-02\n",
      "Freedom_of_Expression_Max    9.850000e-01\n",
      "Civil_Society_Index_Mean     6.990277e-01\n",
      "Civil_Society_Index_Median   8.370000e-01\n",
      "Civil_Society_Index_SD       3.056219e-01\n",
      "Civil_Society_Index_Min      1.500000e-02\n",
      "Civil_Society_Index_Max      9.790000e-01\n",
      "HDI_Mean                     8.239568e-01\n",
      "HDI_Median                   8.260000e-01\n",
      "HDI_SD                       1.211331e-01\n",
      "HDI_Min                      3.850000e-01\n",
      "HDI_Max                      9.670000e-01\n",
      "Life_Expectancy_Mean         7.612774e+01\n",
      "Life_Expectancy_Median       7.797900e+01\n",
      "Life_Expectancy_SD           5.569156e+00\n",
      "Life_Expectancy_Min          1.881800e+01\n",
      "Life_Expectancy_Max          8.405400e+01\n",
      "Mean_Schooling_Years_Mean    1.069395e+01\n",
      "Mean_Schooling_Years_Median  1.118458e+01\n",
      "Mean_Schooling_Years_SD      2.839204e+00\n",
      "Mean_Schooling_Years_Min     1.412289e+00\n",
      "Mean_Schooling_Years_Max     1.429637e+01\n"
     ]
    }
   ],
   "source": [
    "numeric_vars <- df %>%\n",
    "  select(where(is.numeric))\n",
    "\n",
    "summary_stats <- numeric_vars %>%\n",
    "  summarise(across(everything(),\n",
    "                   list(Mean = ~mean(., na.rm = TRUE),\n",
    "                        Median = ~median(., na.rm = TRUE),\n",
    "                        SD = ~sd(., na.rm = TRUE),\n",
    "                        Min = ~min(., na.rm = TRUE),\n",
    "                        Max = ~max(., na.rm = TRUE)),\n",
    "                   .names = \"{.col}_{.fn}\"))\n",
    "\n",
    "summary_stats_t <- t(summary_stats)\n",
    "print(summary_stats_t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "965aa97f",
   "metadata": {
    "vscode": {
     "languageId": "r"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    Served_Population      Number_of_Events        GDP_per_capita \n",
      "             1.424350              1.430105              4.591687 \n",
      "            Land_area          Unemployment Freedom_of_Expression \n",
      "             1.574135              1.270525             14.585125 \n",
      "  Civil_Society_Index                   HDI       Life_Expectancy \n",
      "            13.931561             30.454321              9.553996 \n",
      " Mean_Schooling_Years \n",
      "            11.736238 \n"
     ]
    }
   ],
   "source": [
    "vif_model <- lm(numeric_vars[[1]] ~ ., data = numeric_vars[, -1])\n",
    "vif_values <- vif(vif_model)\n",
    "print(vif_values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d97fa5af",
   "metadata": {
    "vscode": {
     "languageId": "r"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                      Number_of_Flights Served_Population Number_of_Events\n",
      "Number_of_Flights           1.000000000       0.671699241      0.367851267\n",
      "Served_Population           0.671699241       1.000000000      0.506996810\n",
      "Number_of_Events            0.367851267       0.506996810      1.000000000\n",
      "GDP_per_capita              0.083266657      -0.107883557     -0.097270848\n",
      "Land_area                   0.007304305      -0.009571527     -0.159943385\n",
      "Unemployment               -0.039639363      -0.026894147      0.077944485\n",
      "Freedom_of_Expression      -0.013162926      -0.154830547      0.021788094\n",
      "Civil_Society_Index        -0.015615442      -0.153716921      0.004262525\n",
      "HDI                         0.102218228      -0.075387890     -0.098668654\n",
      "Life_Expectancy             0.111275449      -0.035340725     -0.048171784\n",
      "Mean_Schooling_Years        0.067418917      -0.112865885     -0.111281573\n",
      "                      GDP_per_capita    Land_area Unemployment\n",
      "Number_of_Flights         0.08326666  0.007304305  -0.03963936\n",
      "Served_Population        -0.10788356 -0.009571527  -0.02689415\n",
      "Number_of_Events         -0.09727085 -0.159943385   0.07794448\n",
      "GDP_per_capita            1.00000000  0.318471442  -0.30005120\n",
      "Land_area                 0.31847144  1.000000000  -0.20297109\n",
      "Unemployment             -0.30005120 -0.202971089   1.00000000\n",
      "Freedom_of_Expression     0.59430211 -0.143566301  -0.01042337\n",
      "Civil_Society_Index       0.58418574 -0.125211593  -0.01680905\n",
      "HDI                       0.79047286  0.287845223  -0.14071643\n",
      "Life_Expectancy           0.62585773  0.116634966  -0.09027353\n",
      "Mean_Schooling_Years      0.79576076  0.337944765  -0.22155575\n",
      "                      Freedom_of_Expression Civil_Society_Index         HDI\n",
      "Number_of_Flights               -0.01316293        -0.015615442  0.10221823\n",
      "Served_Population               -0.15483055        -0.153716921 -0.07538789\n",
      "Number_of_Events                 0.02178809         0.004262525 -0.09866865\n",
      "GDP_per_capita                   0.59430211         0.584185742  0.79047286\n",
      "Land_area                       -0.14356630        -0.125211593  0.28784522\n",
      "Unemployment                    -0.01042337        -0.016809049 -0.14071643\n",
      "Freedom_of_Expression            1.00000000         0.961643751  0.42397498\n",
      "Civil_Society_Index              0.96164375         1.000000000  0.38741832\n",
      "HDI                              0.42397498         0.387418317  1.00000000\n",
      "Life_Expectancy                  0.29538471         0.253887855  0.88027648\n",
      "Mean_Schooling_Years             0.47456356         0.445093459  0.90374489\n",
      "                      Life_Expectancy Mean_Schooling_Years\n",
      "Number_of_Flights          0.11127545           0.06741892\n",
      "Served_Population         -0.03534072          -0.11286588\n",
      "Number_of_Events          -0.04817178          -0.11128157\n",
      "GDP_per_capita             0.62585773           0.79576076\n",
      "Land_area                  0.11663497           0.33794476\n",
      "Unemployment              -0.09027353          -0.22155575\n",
      "Freedom_of_Expression      0.29538471           0.47456356\n",
      "Civil_Society_Index        0.25388786           0.44509346\n",
      "HDI                        0.88027648           0.90374489\n",
      "Life_Expectancy            1.00000000           0.65439475\n",
      "Mean_Schooling_Years       0.65439475           1.00000000\n"
     ]
    }
   ],
   "source": [
    "numeric_complete <- na.omit(numeric_vars)\n",
    "cor_matrix <- cor(numeric_complete)\n",
    "print(cor_matrix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "8d22a613",
   "metadata": {
    "vscode": {
     "languageId": "r"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\n",
       "Call:\n",
       "glm.nb(formula = Number_of_Events ~ ., data = df_model, control = glm.control(maxit = 100), \n",
       "    init.theta = 0.4173593549, link = log)\n",
       "\n",
       "Coefficients:\n",
       "                        Estimate Std. Error z value Pr(>|z|)    \n",
       "(Intercept)            5.271e+00  2.296e-01  22.952  < 2e-16 ***\n",
       "Number_of_Flights      4.101e-03  6.156e-04   6.661 2.72e-11 ***\n",
       "HDI                   -2.751e+00  3.066e-01  -8.973  < 2e-16 ***\n",
       "Served_Population      3.587e-07  1.679e-08  21.357  < 2e-16 ***\n",
       "Land_area             -1.192e-07  7.361e-09 -16.190  < 2e-16 ***\n",
       "Unemployment           4.779e-02  7.225e-03   6.616 3.70e-11 ***\n",
       "Freedom_of_Expression  1.136e+00  1.185e-01   9.592  < 2e-16 ***\n",
       "---\n",
       "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n",
       "\n",
       "(Dispersion parameter for Negative Binomial(0.4174) family taken to be 1)\n",
       "\n",
       "    Null deviance: 4369.5  on 2619  degrees of freedom\n",
       "Residual deviance: 3164.7  on 2613  degrees of freedom\n",
       "AIC: 25106\n",
       "\n",
       "Number of Fisher Scoring iterations: 1\n",
       "\n",
       "\n",
       "              Theta:  0.4174 \n",
       "          Std. Err.:  0.0107 \n",
       "\n",
       " 2 x log-likelihood:  -25090.2560 "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_model <- na.omit(df[, c(\"Number_of_Events\", \"Number_of_Flights\", \"HDI\",\n",
    "                           \"Served_Population\", \"Land_area\", \n",
    "                           \"Unemployment\", \"Freedom_of_Expression\")])\n",
    "\n",
    "nb_model <- glm.nb(Number_of_Events ~ ., data = df_model, control = glm.control(maxit = 100))\n",
    "\n",
    "summary(nb_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "32cdb5b8",
   "metadata": {
    "vscode": {
     "languageId": "r"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fitting null model for pseudo-r2\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".dl-inline {width: auto; margin:0; padding: 0}\n",
       ".dl-inline>dt, .dl-inline>dd {float: none; width: auto; display: inline-block}\n",
       ".dl-inline>dt::after {content: \":\\0020\"; padding-right: .5ex}\n",
       ".dl-inline>dt:not(:first-of-type) {padding-left: .5ex}\n",
       "</style><dl class=dl-inline><dt>llh</dt><dd>-12545.1280487307</dd><dt>llhNull</dt><dd>-13056.4659219827</dd><dt>G2</dt><dd>1022.67574650416</dd><dt>McFadden</dt><dd>0.0391635743016152</dd><dt>r2ML</dt><dd>0.323169396916811</dd><dt>r2CU</dt><dd>0.323184565266037</dd></dl>\n"
      ],
      "text/latex": [
       "\\begin{description*}\n",
       "\\item[llh] -12545.1280487307\n",
       "\\item[llhNull] -13056.4659219827\n",
       "\\item[G2] 1022.67574650416\n",
       "\\item[McFadden] 0.0391635743016152\n",
       "\\item[r2ML] 0.323169396916811\n",
       "\\item[r2CU] 0.323184565266037\n",
       "\\end{description*}\n"
      ],
      "text/markdown": [
       "llh\n",
       ":   -12545.1280487307llhNull\n",
       ":   -13056.4659219827G2\n",
       ":   1022.67574650416McFadden\n",
       ":   0.0391635743016152r2ML\n",
       ":   0.323169396916811r2CU\n",
       ":   0.323184565266037\n",
       "\n"
      ],
      "text/plain": [
       "          llh       llhNull            G2      McFadden          r2ML \n",
       "-1.254513e+04 -1.305647e+04  1.022676e+03  3.916357e-02  3.231694e-01 \n",
       "         r2CU \n",
       " 3.231846e-01 "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pR2(nb_model)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "4.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/clean_data.csv
+++ b/clean_data.csv
--- a/clean_data.ipynb
+++ b/clean_data.ipynb
--- a/final_paper.odt
+++ b/final_paper.odt
--- a/final_paper.pdf
+++ b/final_paper.pdf
--- a/haversine.py
+++ b/haversine.py
@@ -0,0 +1,134 @@
 import torch
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
 def assign_events_to_cities(
    city_df, event_df, distance_threshold_km, device,
    city_lat='Latitude', city_lon='Longitude', city_country='Country',
    event_lat='Latitude', event_lon='Longitude', event_country='Country',
    batch_size=10000
 ):
    """
    Assign each event to the nearest city (within distance_threshold_km and same country),
    and return city_df with a new 'Number_of_Events' column.
    """
    def to_rad_tensor(df, lat_col, lon_col):
        return torch.deg2rad(torch.tensor(df[[lat_col, lon_col]].values, dtype=torch.float32, device=device))
    def build_country_mask(src_countries, dst_countries):
        src = np.array(src_countries)
        dst = np.array(dst_countries)
        return torch.tensor((src[:, None] == dst[None, :]), dtype=torch.bool, device=device)
    def haversine_distance_matrix(src_rad, dst_rad):
        src_lat, src_lon = src_rad[:, 0:1], src_rad[:, 1:2]
        dst_lat, dst_lon = dst_rad[:, 0], dst_rad[:, 1]
        dlat = dst_lat - src_lat
        dlon = dst_lon - src_lon
        a = torch.sin(dlat / 2) ** 2 + torch.cos(src_lat) * torch.cos(dst_lat) * torch.sin(dlon / 2) ** 2
        c = 2 * torch.arcsin(torch.sqrt(a))
        return 6371 * c  # km
    # Prepare coordinate tensors
    event_coords = to_rad_tensor(event_df, event_lat, event_lon)
    city_coords = to_rad_tensor(city_df, city_lat, city_lon)
    country_mask = build_country_mask(event_df[event_country].values, city_df[city_country].values)
    # Assignment arrays
    min_dists, min_idxs = [], []
    for start in tqdm(range(0, len(event_df), batch_size), desc="📍 Assigning events to cities"):
        end = min(start + batch_size, len(event_df))
        batch_coords = event_coords[start:end]
        mask_batch = country_mask[start:end]
        dist_matrix = haversine_distance_matrix(batch_coords, city_coords)
        dist_matrix[~mask_batch] = 1e6  # mask out different countries
        min_dist, min_idx = torch.min(dist_matrix, dim=1)
        min_dists.append(min_dist)
        min_idxs.append(min_idx)
    min_dists = torch.cat(min_dists)
    min_idxs = torch.cat(min_idxs)
    valid_mask = min_dists <= distance_threshold_km
    # Build result DataFrame
    assigned_idxs = min_idxs[valid_mask].cpu().numpy()
    result_df = city_df.copy()
    result_df['Number_of_Events'] = 0
    # Safe event assignment using .iloc to align correctly
    counts = pd.Series(assigned_idxs).value_counts()
    for idx, count in counts.items():
        result_df.iloc[idx, result_df.columns.get_loc('Number_of_Events')] = count
    return result_df
 def calculate_served_population(airports_df, cities_df, distance_threshold_km, device, batch_size=2000):
    """
    Assign each city's population to the airport with the highest score:
        score = Departures / (Distance_km + 1)
    Only airports within 50 km and same country are considered.
    """
    # Tensors
    airport_coords = torch.tensor(airports_df[['Latitude', 'Longitude']].values, dtype=torch.float32).to(device)
    city_coords = torch.tensor(cities_df[['Latitude', 'Longitude']].values, dtype=torch.float32).to(device)
    city_pops = torch.tensor(cities_df['Population'].values, dtype=torch.float32).to(device)
    airport_departures = torch.tensor(airports_df['Number_of_Flights'].values, dtype=torch.float32).to(device)
    airport_countries = airports_df['Country'].values
    city_countries = cities_df['Country'].values
    def haversine_batch(lat1, lon1, lat2, lon2):
        lat1_rad = torch.deg2rad(lat1)
        lon1_rad = torch.deg2rad(lon1)
        lat2_rad = torch.deg2rad(lat2)
        lon2_rad = torch.deg2rad(lon2)
        dlon = lon2_rad - lon1_rad
        dlat = lat2_rad - lat1_rad
        a = torch.sin(dlat / 2) ** 2 + torch.cos(lat1_rad) * torch.cos(lat2_rad) * torch.sin(dlon / 2) ** 2
        c = 2 * torch.asin(torch.sqrt(a))
        return 6371 * c
    assigned_pop = torch.zeros(len(airport_coords), device=device)
    for start in tqdm(range(0, len(city_coords), batch_size), desc="📊 Assigning using weighted score"):
        end = min(start + batch_size, len(city_coords))
        batch_coords = city_coords[start:end]
        batch_pops = city_pops[start:end]
        batch_countries = city_countries[start:end]
        for i in range(batch_coords.shape[0]):
            country = batch_countries[i]
            matching_idx = [j for j, c in enumerate(airport_countries) if c == country]
            if not matching_idx:
                continue
            subset_coords = airport_coords[matching_idx]
            subset_deps = airport_departures[matching_idx]
            lat1 = batch_coords[i, 0].unsqueeze(0)
            lon1 = batch_coords[i, 1].unsqueeze(0)
            dists = haversine_batch(
                lat1, lon1,
                subset_coords[:, 0], subset_coords[:, 1]
            ).squeeze()
            in_range_mask = dists <= distance_threshold_km
            if torch.any(in_range_mask):
                dists_in_range = dists[in_range_mask]
                deps_in_range = subset_deps[in_range_mask]
                scores = deps_in_range / (dists_in_range + 1)
                best_idx = torch.argmax(scores)
                chosen_airport = torch.tensor(matching_idx)[in_range_mask][best_idx]
                assigned_pop[chosen_airport] += batch_pops[i]
        torch.cuda.empty_cache()
    return assigned_pop.cpu().numpy()
--- a/mappings.py
+++ b/mappings.py
@@ -0,0 +1,127 @@
 # This dictionary will be used to ensure standardized country naming conventions for each dataset.
 country_name_map = {
    'Puerto Rico' : 'United States',
    'Guam' : 'United States',
    'Netherlands Antilles' : 'Netherlands',
    'Jersey' : 'United Kingdom',
    'Greenland' : 'Netherlands',
    'New Caledonia' : 'France',
    'Guernsey' : 'United Kingdom',
    'Congo (Kinshasa)' : 'Democratic Republic of the Congo',
    'Congo (Brazzaville)' : 'Republic of the Congo',
    'Reunion' : 'France',
    "Martinique" : "France",
    "Guadeloupe" : "France",
    "French Guiana" : "France",
    "Gibraltar" : "United Kingdom",
    "French Polynesia" : "France",
    "Isle of Man" : "United Kingdom",
    "Bermuda" : "United Kingdom",
    'Anguilla': 'United Kingdom',
    'Saint Pierre and Miquelon': 'France',
    'Wallis and Futuna': 'France',
    'Aruba': 'Netherlands',
    'Faroe Islands': 'Denmark',
    'Cook Islands': 'New Zealand',
    'Mayotte': 'France',
    "Northern Mariana Islands" : "United States",
    "Réunion" : "France",
    'United States of America': 'United States',
    'Türkiye' : 'Turkey',
    'Czechia' : 'Czech Republic',
    'Burma/Myanmar' : 'Burma',
    'Ivory Coast' : "Cote d'Ivoire",
    "Timor-Leste" : "East Timor",
    "North Macedonia" : "Macedonia",
    'Turkiye': 'Turkey',   
    'Russian Federation' : 'Russia',
    "Taiwan, China" : "Taiwan",
    "Venezuela, Bolivarian Rep. of" : "Venezuela",
    'Korea, Rep.' : 'South Korea',
    "Korea, Dem. People's Rep." : 'North Korea',
    "Hong Kong SAR, China" : "Hong Kong",
    "Macao SAR, China" : "Macau",
    "Egypt, Arab Rep.": "Egypt",
    "Viet Nam" : "Vietnam",
    "South Sudan, The Republic of" : "South Sudan",
    "Sudan, The Republic of" : "Sudan",
    "Korea, Republic of" : "South Korea",
    "Macedonia, The former Yugoslav Rep. of" : "Macedonia",
    "Venezuela, RB" : "Venezuela",
    "Bahamas, The" : "Bahamas",
    "Iran, Islamic Rep." : "Iran",
    "Congo, Democratic Republic of the" : "Democratic Republic of the Congo",
    "Myanmar" : "Burma",
    "Kyrgyz Republic" : "Kyrgyzstan",
    "Yemen, Rep." : "Yemen",
    "Congo, Dem. Rep." : "Democratic Republic of the Congo",
    "Tanzania, United Republic of" : "Tanzania",
    "Iran, Islamic Rep. of" : "Iran",
    "Congo, Rep." : "Republic of the Congo",
    "Lao PDR" : "Laos",
    "Slovak Republic" :"Slovakia",
    "Gambia, The" : "Gambia",
    'Western Sahara': 'Morocco',
    'Venezuela (Bolivarian Republic of)': 'Venezuela',
    'Iran (Islamic Republic of)': 'Iran',
    'Congo': 'Republic of the Congo',
    'Congo (Democratic Republic of the)': 'Democratic Republic of the Congo',
    'Lao People\'s Democratic Republic': 'Laos',
    'Republic of Moldova': 'Moldova',
    'Syrian Arab Republic': 'Syria',
    'Tanzania (United Republic of)': 'Tanzania',
    "Hong Kong, China (SAR)" : "Hong Kong",
    "Hong Kong, China" : "Hong Kong",
    "Korea (Democratic People's Rep. of)" : "North Korea",
    "Korea (Republic of)" : "South Korea",
    "Côte d'Ivoire" : "Cote d'Ivoire",
    "Bolivia (Plurinational State of)" : "Bolivia",
    "Moldova (Republic of)" : "Moldova",
    "Brunei Darussalam" : "Brunei",
    "Cabo Verde" : "Cape Verde",
    'Turkiye': 'Turkey',    # Added both versions to be safe
    'Russian Federation' : 'Russia',
    'Korea, Rep.' : 'South Korea',
    "Korea, Dem. People's Rep." : 'North Korea',
    "Hong Kong SAR, China" : "Hong Kong",
    "Macao SAR, China" : "Macau",
    "Czechia" : "Czech Republic",
    "Egypt, Arab Rep.": "Egypt",
    "Viet Nam" : "Vietnam",
    "Venezuela, RB" : "Venezuela",
    "Bahamas, The" : "Bahamas",
    "Iran, Islamic Rep." : "Iran",
    "Myanmar" : "Burma",
    "Kyrgyz Republic" : "Kyrgyzstan",
    "Yemen, Rep." : "Yemen",
    "Congo, Dem. Rep." : "Democratic Republic of the Congo",
    "Congo, Rep." : "Republic of the Congo",
    "Lao PDR" : "Laos",
    "North Macedonia" : "Macedonia",
    "Slovak Republic" :"Slovakia",
    "Gambia, The" : "Gambia",
    'Western Sahara': 'Morocco',
    'Republic of Korea': 'South Korea',
    'Czechia': 'Czech Republic',
    'Viet Nam': 'Vietnam',
    'Venezuela (Bolivarian Republic of)': 'Venezuela',
    'Iran (Islamic Republic of)': 'Iran',
    'Myanmar': 'Burma',
    'Congo': 'Republic of the Congo',
    'Congo (Democratic Republic of the)': 'Democratic Republic of the Congo',
    'Lao People\'s Democratic Republic': 'Laos',
    'Republic of Moldova': 'Moldova',
    'Syrian Arab Republic': 'Syria',
    'Tanzania (United Republic of)': 'Tanzania',
    "Hong Kong, China (SAR)" : "Hong Kong",
    "Korea (Democratic People's Rep. of)" : "North Korea",
    "Korea (Republic of)" : "South Korea",
    "Türkiye" : "Turkey",
    "Côte d'Ivoire" : "Cote d'Ivoire",
    "Bolivia (Plurinational State of)" : "Bolivia",
    "North Macedonia" : "Macedonia",
    "Moldova (Republic of)" : "Moldova",
    "Brunei Darussalam" : "Brunei",
    "Timor-Leste" : "East Timor",
    "Cabo Verde" : "Cape Verde"
    }