This repository has been archived on 2025-09-30. You can view files and clone it, but cannot push or open issues or pull requests.
Files
FlightUnrest/FINAL_r_reg_final.ipynb
2025-09-26 00:11:14 -05:00

363 lines
14 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "d0c8ee28",
"metadata": {
"vscode": {
"languageId": "r"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"corrplot 0.95 loaded\n",
"\n"
]
}
],
"source": [
"library(pscl)\n",
"library(MASS)\n",
"library(dplyr)\n",
"library(car)\n",
"library(corrplot)\n",
"df <- read.csv(\"clean_data.csv\")\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b2731cb4",
"metadata": {
"vscode": {
"languageId": "r"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" [,1]\n",
"Number_of_Flights_Mean 2.435639e+01\n",
"Number_of_Flights_Median 5.000000e+00\n",
"Number_of_Flights_SD 6.742273e+01\n",
"Number_of_Flights_Min 1.000000e+00\n",
"Number_of_Flights_Max 1.226000e+03\n",
"Served_Population_Mean 9.937426e+05\n",
"Served_Population_Median 2.699935e+05\n",
"Served_Population_SD 2.479144e+06\n",
"Served_Population_Min 2.300000e+01\n",
"Served_Population_Max 3.372453e+07\n",
"Number_of_Events_Mean 9.763500e+01\n",
"Number_of_Events_Median 1.700000e+01\n",
"Number_of_Events_SD 2.590523e+02\n",
"Number_of_Events_Min 0.000000e+00\n",
"Number_of_Events_Max 6.079000e+03\n",
"GDP_per_capita_Mean 2.690592e+04\n",
"GDP_per_capita_Median 1.144770e+04\n",
"GDP_per_capita_SD 2.487818e+04\n",
"GDP_per_capita_Min 2.102365e+02\n",
"GDP_per_capita_Max 1.127264e+05\n",
"Land_area_Mean 4.148564e+06\n",
"Land_area_Median 1.557258e+06\n",
"Land_area_SD 4.617248e+06\n",
"Land_area_Min 2.000000e+01\n",
"Land_area_Max 1.637687e+07\n",
"Unemployment_Mean 6.307765e+00\n",
"Unemployment_Median 4.560000e+00\n",
"Unemployment_SD 4.294981e+00\n",
"Unemployment_Min 1.000000e-01\n",
"Unemployment_Max 2.846800e+01\n",
"Freedom_of_Expression_Mean 6.868388e-01\n",
"Freedom_of_Expression_Median 8.080000e-01\n",
"Freedom_of_Expression_SD 3.023572e-01\n",
"Freedom_of_Expression_Min 1.200000e-02\n",
"Freedom_of_Expression_Max 9.850000e-01\n",
"Civil_Society_Index_Mean 6.990277e-01\n",
"Civil_Society_Index_Median 8.370000e-01\n",
"Civil_Society_Index_SD 3.056219e-01\n",
"Civil_Society_Index_Min 1.500000e-02\n",
"Civil_Society_Index_Max 9.790000e-01\n",
"HDI_Mean 8.239568e-01\n",
"HDI_Median 8.260000e-01\n",
"HDI_SD 1.211331e-01\n",
"HDI_Min 3.850000e-01\n",
"HDI_Max 9.670000e-01\n",
"Life_Expectancy_Mean 7.612774e+01\n",
"Life_Expectancy_Median 7.797900e+01\n",
"Life_Expectancy_SD 5.569156e+00\n",
"Life_Expectancy_Min 1.881800e+01\n",
"Life_Expectancy_Max 8.405400e+01\n",
"Mean_Schooling_Years_Mean 1.069395e+01\n",
"Mean_Schooling_Years_Median 1.118458e+01\n",
"Mean_Schooling_Years_SD 2.839204e+00\n",
"Mean_Schooling_Years_Min 1.412289e+00\n",
"Mean_Schooling_Years_Max 1.429637e+01\n"
]
}
],
"source": [
"numeric_vars <- df %>%\n",
" select(where(is.numeric))\n",
"\n",
"summary_stats <- numeric_vars %>%\n",
" summarise(across(everything(),\n",
" list(Mean = ~mean(., na.rm = TRUE),\n",
" Median = ~median(., na.rm = TRUE),\n",
" SD = ~sd(., na.rm = TRUE),\n",
" Min = ~min(., na.rm = TRUE),\n",
" Max = ~max(., na.rm = TRUE)),\n",
" .names = \"{.col}_{.fn}\"))\n",
"\n",
"summary_stats_t <- t(summary_stats)\n",
"print(summary_stats_t)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "965aa97f",
"metadata": {
"vscode": {
"languageId": "r"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Served_Population Number_of_Events GDP_per_capita \n",
" 1.424350 1.430105 4.591687 \n",
" Land_area Unemployment Freedom_of_Expression \n",
" 1.574135 1.270525 14.585125 \n",
" Civil_Society_Index HDI Life_Expectancy \n",
" 13.931561 30.454321 9.553996 \n",
" Mean_Schooling_Years \n",
" 11.736238 \n"
]
}
],
"source": [
"vif_model <- lm(numeric_vars[[1]] ~ ., data = numeric_vars[, -1])\n",
"vif_values <- vif(vif_model)\n",
"print(vif_values)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d97fa5af",
"metadata": {
"vscode": {
"languageId": "r"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Number_of_Flights Served_Population Number_of_Events\n",
"Number_of_Flights 1.000000000 0.671699241 0.367851267\n",
"Served_Population 0.671699241 1.000000000 0.506996810\n",
"Number_of_Events 0.367851267 0.506996810 1.000000000\n",
"GDP_per_capita 0.083266657 -0.107883557 -0.097270848\n",
"Land_area 0.007304305 -0.009571527 -0.159943385\n",
"Unemployment -0.039639363 -0.026894147 0.077944485\n",
"Freedom_of_Expression -0.013162926 -0.154830547 0.021788094\n",
"Civil_Society_Index -0.015615442 -0.153716921 0.004262525\n",
"HDI 0.102218228 -0.075387890 -0.098668654\n",
"Life_Expectancy 0.111275449 -0.035340725 -0.048171784\n",
"Mean_Schooling_Years 0.067418917 -0.112865885 -0.111281573\n",
" GDP_per_capita Land_area Unemployment\n",
"Number_of_Flights 0.08326666 0.007304305 -0.03963936\n",
"Served_Population -0.10788356 -0.009571527 -0.02689415\n",
"Number_of_Events -0.09727085 -0.159943385 0.07794448\n",
"GDP_per_capita 1.00000000 0.318471442 -0.30005120\n",
"Land_area 0.31847144 1.000000000 -0.20297109\n",
"Unemployment -0.30005120 -0.202971089 1.00000000\n",
"Freedom_of_Expression 0.59430211 -0.143566301 -0.01042337\n",
"Civil_Society_Index 0.58418574 -0.125211593 -0.01680905\n",
"HDI 0.79047286 0.287845223 -0.14071643\n",
"Life_Expectancy 0.62585773 0.116634966 -0.09027353\n",
"Mean_Schooling_Years 0.79576076 0.337944765 -0.22155575\n",
" Freedom_of_Expression Civil_Society_Index HDI\n",
"Number_of_Flights -0.01316293 -0.015615442 0.10221823\n",
"Served_Population -0.15483055 -0.153716921 -0.07538789\n",
"Number_of_Events 0.02178809 0.004262525 -0.09866865\n",
"GDP_per_capita 0.59430211 0.584185742 0.79047286\n",
"Land_area -0.14356630 -0.125211593 0.28784522\n",
"Unemployment -0.01042337 -0.016809049 -0.14071643\n",
"Freedom_of_Expression 1.00000000 0.961643751 0.42397498\n",
"Civil_Society_Index 0.96164375 1.000000000 0.38741832\n",
"HDI 0.42397498 0.387418317 1.00000000\n",
"Life_Expectancy 0.29538471 0.253887855 0.88027648\n",
"Mean_Schooling_Years 0.47456356 0.445093459 0.90374489\n",
" Life_Expectancy Mean_Schooling_Years\n",
"Number_of_Flights 0.11127545 0.06741892\n",
"Served_Population -0.03534072 -0.11286588\n",
"Number_of_Events -0.04817178 -0.11128157\n",
"GDP_per_capita 0.62585773 0.79576076\n",
"Land_area 0.11663497 0.33794476\n",
"Unemployment -0.09027353 -0.22155575\n",
"Freedom_of_Expression 0.29538471 0.47456356\n",
"Civil_Society_Index 0.25388786 0.44509346\n",
"HDI 0.88027648 0.90374489\n",
"Life_Expectancy 1.00000000 0.65439475\n",
"Mean_Schooling_Years 0.65439475 1.00000000\n"
]
}
],
"source": [
"numeric_complete <- na.omit(numeric_vars)\n",
"cor_matrix <- cor(numeric_complete)\n",
"print(cor_matrix)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "8d22a613",
"metadata": {
"vscode": {
"languageId": "r"
}
},
"outputs": [
{
"data": {
"text/plain": [
"\n",
"Call:\n",
"glm.nb(formula = Number_of_Events ~ ., data = df_model, control = glm.control(maxit = 100), \n",
" init.theta = 0.4173593549, link = log)\n",
"\n",
"Coefficients:\n",
" Estimate Std. Error z value Pr(>|z|) \n",
"(Intercept) 5.271e+00 2.296e-01 22.952 < 2e-16 ***\n",
"Number_of_Flights 4.101e-03 6.156e-04 6.661 2.72e-11 ***\n",
"HDI -2.751e+00 3.066e-01 -8.973 < 2e-16 ***\n",
"Served_Population 3.587e-07 1.679e-08 21.357 < 2e-16 ***\n",
"Land_area -1.192e-07 7.361e-09 -16.190 < 2e-16 ***\n",
"Unemployment 4.779e-02 7.225e-03 6.616 3.70e-11 ***\n",
"Freedom_of_Expression 1.136e+00 1.185e-01 9.592 < 2e-16 ***\n",
"---\n",
"Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1\n",
"\n",
"(Dispersion parameter for Negative Binomial(0.4174) family taken to be 1)\n",
"\n",
" Null deviance: 4369.5 on 2619 degrees of freedom\n",
"Residual deviance: 3164.7 on 2613 degrees of freedom\n",
"AIC: 25106\n",
"\n",
"Number of Fisher Scoring iterations: 1\n",
"\n",
"\n",
" Theta: 0.4174 \n",
" Std. Err.: 0.0107 \n",
"\n",
" 2 x log-likelihood: -25090.2560 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_model <- na.omit(df[, c(\"Number_of_Events\", \"Number_of_Flights\", \"HDI\",\n",
" \"Served_Population\", \"Land_area\", \n",
" \"Unemployment\", \"Freedom_of_Expression\")])\n",
"\n",
"nb_model <- glm.nb(Number_of_Events ~ ., data = df_model, control = glm.control(maxit = 100))\n",
"\n",
"summary(nb_model)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "32cdb5b8",
"metadata": {
"vscode": {
"languageId": "r"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"fitting null model for pseudo-r2\n"
]
},
{
"data": {
"text/html": [
"<style>\n",
".dl-inline {width: auto; margin:0; padding: 0}\n",
".dl-inline>dt, .dl-inline>dd {float: none; width: auto; display: inline-block}\n",
".dl-inline>dt::after {content: \":\\0020\"; padding-right: .5ex}\n",
".dl-inline>dt:not(:first-of-type) {padding-left: .5ex}\n",
"</style><dl class=dl-inline><dt>llh</dt><dd>-12545.1280487307</dd><dt>llhNull</dt><dd>-13056.4659219827</dd><dt>G2</dt><dd>1022.67574650416</dd><dt>McFadden</dt><dd>0.0391635743016152</dd><dt>r2ML</dt><dd>0.323169396916811</dd><dt>r2CU</dt><dd>0.323184565266037</dd></dl>\n"
],
"text/latex": [
"\\begin{description*}\n",
"\\item[llh] -12545.1280487307\n",
"\\item[llhNull] -13056.4659219827\n",
"\\item[G2] 1022.67574650416\n",
"\\item[McFadden] 0.0391635743016152\n",
"\\item[r2ML] 0.323169396916811\n",
"\\item[r2CU] 0.323184565266037\n",
"\\end{description*}\n"
],
"text/markdown": [
"llh\n",
": -12545.1280487307llhNull\n",
": -13056.4659219827G2\n",
": 1022.67574650416McFadden\n",
": 0.0391635743016152r2ML\n",
": 0.323169396916811r2CU\n",
": 0.323184565266037\n",
"\n"
],
"text/plain": [
" llh llhNull G2 McFadden r2ML \n",
"-1.254513e+04 -1.305647e+04 1.022676e+03 3.916357e-02 3.231694e-01 \n",
" r2CU \n",
" 3.231846e-01 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pR2(nb_model)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "4.5.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}