{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "d0c8ee28", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "corrplot 0.95 loaded\n", "\n" ] } ], "source": [ "library(pscl)\n", "library(MASS)\n", "library(dplyr)\n", "library(car)\n", "library(corrplot)\n", "df <- read.csv(\"clean_data.csv\")\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "b2731cb4", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " [,1]\n", "Number_of_Flights_Mean 2.435639e+01\n", "Number_of_Flights_Median 5.000000e+00\n", "Number_of_Flights_SD 6.742273e+01\n", "Number_of_Flights_Min 1.000000e+00\n", "Number_of_Flights_Max 1.226000e+03\n", "Served_Population_Mean 9.937426e+05\n", "Served_Population_Median 2.699935e+05\n", "Served_Population_SD 2.479144e+06\n", "Served_Population_Min 2.300000e+01\n", "Served_Population_Max 3.372453e+07\n", "Number_of_Events_Mean 9.763500e+01\n", "Number_of_Events_Median 1.700000e+01\n", "Number_of_Events_SD 2.590523e+02\n", "Number_of_Events_Min 0.000000e+00\n", "Number_of_Events_Max 6.079000e+03\n", "GDP_per_capita_Mean 2.690592e+04\n", "GDP_per_capita_Median 1.144770e+04\n", "GDP_per_capita_SD 2.487818e+04\n", "GDP_per_capita_Min 2.102365e+02\n", "GDP_per_capita_Max 1.127264e+05\n", "Land_area_Mean 4.148564e+06\n", "Land_area_Median 1.557258e+06\n", "Land_area_SD 4.617248e+06\n", "Land_area_Min 2.000000e+01\n", "Land_area_Max 1.637687e+07\n", "Unemployment_Mean 6.307765e+00\n", "Unemployment_Median 4.560000e+00\n", "Unemployment_SD 4.294981e+00\n", "Unemployment_Min 1.000000e-01\n", "Unemployment_Max 2.846800e+01\n", "Freedom_of_Expression_Mean 6.868388e-01\n", "Freedom_of_Expression_Median 8.080000e-01\n", "Freedom_of_Expression_SD 3.023572e-01\n", "Freedom_of_Expression_Min 1.200000e-02\n", "Freedom_of_Expression_Max 9.850000e-01\n", "Civil_Society_Index_Mean 6.990277e-01\n", "Civil_Society_Index_Median 8.370000e-01\n", "Civil_Society_Index_SD 3.056219e-01\n", "Civil_Society_Index_Min 1.500000e-02\n", "Civil_Society_Index_Max 9.790000e-01\n", "HDI_Mean 8.239568e-01\n", "HDI_Median 8.260000e-01\n", "HDI_SD 1.211331e-01\n", "HDI_Min 3.850000e-01\n", "HDI_Max 9.670000e-01\n", "Life_Expectancy_Mean 7.612774e+01\n", "Life_Expectancy_Median 7.797900e+01\n", "Life_Expectancy_SD 5.569156e+00\n", "Life_Expectancy_Min 1.881800e+01\n", "Life_Expectancy_Max 8.405400e+01\n", "Mean_Schooling_Years_Mean 1.069395e+01\n", "Mean_Schooling_Years_Median 1.118458e+01\n", "Mean_Schooling_Years_SD 2.839204e+00\n", "Mean_Schooling_Years_Min 1.412289e+00\n", "Mean_Schooling_Years_Max 1.429637e+01\n" ] } ], "source": [ "numeric_vars <- df %>%\n", " select(where(is.numeric))\n", "\n", "summary_stats <- numeric_vars %>%\n", " summarise(across(everything(),\n", " list(Mean = ~mean(., na.rm = TRUE),\n", " Median = ~median(., na.rm = TRUE),\n", " SD = ~sd(., na.rm = TRUE),\n", " Min = ~min(., na.rm = TRUE),\n", " Max = ~max(., na.rm = TRUE)),\n", " .names = \"{.col}_{.fn}\"))\n", "\n", "summary_stats_t <- t(summary_stats)\n", "print(summary_stats_t)" ] }, { "cell_type": "code", "execution_count": 4, "id": "965aa97f", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Served_Population Number_of_Events GDP_per_capita \n", " 1.424350 1.430105 4.591687 \n", " Land_area Unemployment Freedom_of_Expression \n", " 1.574135 1.270525 14.585125 \n", " Civil_Society_Index HDI Life_Expectancy \n", " 13.931561 30.454321 9.553996 \n", " Mean_Schooling_Years \n", " 11.736238 \n" ] } ], "source": [ "vif_model <- lm(numeric_vars[[1]] ~ ., data = numeric_vars[, -1])\n", "vif_values <- vif(vif_model)\n", "print(vif_values)" ] }, { "cell_type": "code", "execution_count": 6, "id": "d97fa5af", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Number_of_Flights Served_Population Number_of_Events\n", "Number_of_Flights 1.000000000 0.671699241 0.367851267\n", "Served_Population 0.671699241 1.000000000 0.506996810\n", "Number_of_Events 0.367851267 0.506996810 1.000000000\n", "GDP_per_capita 0.083266657 -0.107883557 -0.097270848\n", "Land_area 0.007304305 -0.009571527 -0.159943385\n", "Unemployment -0.039639363 -0.026894147 0.077944485\n", "Freedom_of_Expression -0.013162926 -0.154830547 0.021788094\n", "Civil_Society_Index -0.015615442 -0.153716921 0.004262525\n", "HDI 0.102218228 -0.075387890 -0.098668654\n", "Life_Expectancy 0.111275449 -0.035340725 -0.048171784\n", "Mean_Schooling_Years 0.067418917 -0.112865885 -0.111281573\n", " GDP_per_capita Land_area Unemployment\n", "Number_of_Flights 0.08326666 0.007304305 -0.03963936\n", "Served_Population -0.10788356 -0.009571527 -0.02689415\n", "Number_of_Events -0.09727085 -0.159943385 0.07794448\n", "GDP_per_capita 1.00000000 0.318471442 -0.30005120\n", "Land_area 0.31847144 1.000000000 -0.20297109\n", "Unemployment -0.30005120 -0.202971089 1.00000000\n", "Freedom_of_Expression 0.59430211 -0.143566301 -0.01042337\n", "Civil_Society_Index 0.58418574 -0.125211593 -0.01680905\n", "HDI 0.79047286 0.287845223 -0.14071643\n", "Life_Expectancy 0.62585773 0.116634966 -0.09027353\n", "Mean_Schooling_Years 0.79576076 0.337944765 -0.22155575\n", " Freedom_of_Expression Civil_Society_Index HDI\n", "Number_of_Flights -0.01316293 -0.015615442 0.10221823\n", "Served_Population -0.15483055 -0.153716921 -0.07538789\n", "Number_of_Events 0.02178809 0.004262525 -0.09866865\n", "GDP_per_capita 0.59430211 0.584185742 0.79047286\n", "Land_area -0.14356630 -0.125211593 0.28784522\n", "Unemployment -0.01042337 -0.016809049 -0.14071643\n", "Freedom_of_Expression 1.00000000 0.961643751 0.42397498\n", "Civil_Society_Index 0.96164375 1.000000000 0.38741832\n", "HDI 0.42397498 0.387418317 1.00000000\n", "Life_Expectancy 0.29538471 0.253887855 0.88027648\n", "Mean_Schooling_Years 0.47456356 0.445093459 0.90374489\n", " Life_Expectancy Mean_Schooling_Years\n", "Number_of_Flights 0.11127545 0.06741892\n", "Served_Population -0.03534072 -0.11286588\n", "Number_of_Events -0.04817178 -0.11128157\n", "GDP_per_capita 0.62585773 0.79576076\n", "Land_area 0.11663497 0.33794476\n", "Unemployment -0.09027353 -0.22155575\n", "Freedom_of_Expression 0.29538471 0.47456356\n", "Civil_Society_Index 0.25388786 0.44509346\n", "HDI 0.88027648 0.90374489\n", "Life_Expectancy 1.00000000 0.65439475\n", "Mean_Schooling_Years 0.65439475 1.00000000\n" ] } ], "source": [ "numeric_complete <- na.omit(numeric_vars)\n", "cor_matrix <- cor(numeric_complete)\n", "print(cor_matrix)" ] }, { "cell_type": "code", "execution_count": 17, "id": "8d22a613", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "data": { "text/plain": [ "\n", "Call:\n", "glm.nb(formula = Number_of_Events ~ ., data = df_model, control = glm.control(maxit = 100), \n", " init.theta = 0.4173593549, link = log)\n", "\n", "Coefficients:\n", " Estimate Std. Error z value Pr(>|z|) \n", "(Intercept) 5.271e+00 2.296e-01 22.952 < 2e-16 ***\n", "Number_of_Flights 4.101e-03 6.156e-04 6.661 2.72e-11 ***\n", "HDI -2.751e+00 3.066e-01 -8.973 < 2e-16 ***\n", "Served_Population 3.587e-07 1.679e-08 21.357 < 2e-16 ***\n", "Land_area -1.192e-07 7.361e-09 -16.190 < 2e-16 ***\n", "Unemployment 4.779e-02 7.225e-03 6.616 3.70e-11 ***\n", "Freedom_of_Expression 1.136e+00 1.185e-01 9.592 < 2e-16 ***\n", "---\n", "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n", "\n", "(Dispersion parameter for Negative Binomial(0.4174) family taken to be 1)\n", "\n", " Null deviance: 4369.5 on 2619 degrees of freedom\n", "Residual deviance: 3164.7 on 2613 degrees of freedom\n", "AIC: 25106\n", "\n", "Number of Fisher Scoring iterations: 1\n", "\n", "\n", " Theta: 0.4174 \n", " Std. Err.: 0.0107 \n", "\n", " 2 x log-likelihood: -25090.2560 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_model <- na.omit(df[, c(\"Number_of_Events\", \"Number_of_Flights\", \"HDI\",\n", " \"Served_Population\", \"Land_area\", \n", " \"Unemployment\", \"Freedom_of_Expression\")])\n", "\n", "nb_model <- glm.nb(Number_of_Events ~ ., data = df_model, control = glm.control(maxit = 100))\n", "\n", "summary(nb_model)" ] }, { "cell_type": "code", "execution_count": 18, "id": "32cdb5b8", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "fitting null model for pseudo-r2\n" ] }, { "data": { "text/html": [ "