final
This commit is contained in:
134
haversine.py
Normal file
134
haversine.py
Normal file
@@ -0,0 +1,134 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
def assign_events_to_cities(
|
||||
city_df, event_df, distance_threshold_km, device,
|
||||
city_lat='Latitude', city_lon='Longitude', city_country='Country',
|
||||
event_lat='Latitude', event_lon='Longitude', event_country='Country',
|
||||
batch_size=10000
|
||||
):
|
||||
"""
|
||||
Assign each event to the nearest city (within distance_threshold_km and same country),
|
||||
and return city_df with a new 'Number_of_Events' column.
|
||||
"""
|
||||
|
||||
def to_rad_tensor(df, lat_col, lon_col):
|
||||
return torch.deg2rad(torch.tensor(df[[lat_col, lon_col]].values, dtype=torch.float32, device=device))
|
||||
|
||||
def build_country_mask(src_countries, dst_countries):
|
||||
src = np.array(src_countries)
|
||||
dst = np.array(dst_countries)
|
||||
return torch.tensor((src[:, None] == dst[None, :]), dtype=torch.bool, device=device)
|
||||
|
||||
def haversine_distance_matrix(src_rad, dst_rad):
|
||||
src_lat, src_lon = src_rad[:, 0:1], src_rad[:, 1:2]
|
||||
dst_lat, dst_lon = dst_rad[:, 0], dst_rad[:, 1]
|
||||
dlat = dst_lat - src_lat
|
||||
dlon = dst_lon - src_lon
|
||||
a = torch.sin(dlat / 2) ** 2 + torch.cos(src_lat) * torch.cos(dst_lat) * torch.sin(dlon / 2) ** 2
|
||||
c = 2 * torch.arcsin(torch.sqrt(a))
|
||||
return 6371 * c # km
|
||||
|
||||
# Prepare coordinate tensors
|
||||
event_coords = to_rad_tensor(event_df, event_lat, event_lon)
|
||||
city_coords = to_rad_tensor(city_df, city_lat, city_lon)
|
||||
country_mask = build_country_mask(event_df[event_country].values, city_df[city_country].values)
|
||||
|
||||
# Assignment arrays
|
||||
min_dists, min_idxs = [], []
|
||||
|
||||
for start in tqdm(range(0, len(event_df), batch_size), desc="📍 Assigning events to cities"):
|
||||
end = min(start + batch_size, len(event_df))
|
||||
batch_coords = event_coords[start:end]
|
||||
mask_batch = country_mask[start:end]
|
||||
|
||||
dist_matrix = haversine_distance_matrix(batch_coords, city_coords)
|
||||
dist_matrix[~mask_batch] = 1e6 # mask out different countries
|
||||
|
||||
min_dist, min_idx = torch.min(dist_matrix, dim=1)
|
||||
min_dists.append(min_dist)
|
||||
min_idxs.append(min_idx)
|
||||
|
||||
min_dists = torch.cat(min_dists)
|
||||
min_idxs = torch.cat(min_idxs)
|
||||
valid_mask = min_dists <= distance_threshold_km
|
||||
|
||||
# Build result DataFrame
|
||||
assigned_idxs = min_idxs[valid_mask].cpu().numpy()
|
||||
result_df = city_df.copy()
|
||||
result_df['Number_of_Events'] = 0
|
||||
|
||||
# Safe event assignment using .iloc to align correctly
|
||||
counts = pd.Series(assigned_idxs).value_counts()
|
||||
for idx, count in counts.items():
|
||||
result_df.iloc[idx, result_df.columns.get_loc('Number_of_Events')] = count
|
||||
|
||||
return result_df
|
||||
|
||||
|
||||
def calculate_served_population(airports_df, cities_df, distance_threshold_km, device, batch_size=2000):
|
||||
"""
|
||||
Assign each city's population to the airport with the highest score:
|
||||
score = Departures / (Distance_km + 1)
|
||||
Only airports within 50 km and same country are considered.
|
||||
"""
|
||||
|
||||
# Tensors
|
||||
airport_coords = torch.tensor(airports_df[['Latitude', 'Longitude']].values, dtype=torch.float32).to(device)
|
||||
city_coords = torch.tensor(cities_df[['Latitude', 'Longitude']].values, dtype=torch.float32).to(device)
|
||||
city_pops = torch.tensor(cities_df['Population'].values, dtype=torch.float32).to(device)
|
||||
airport_departures = torch.tensor(airports_df['Number_of_Flights'].values, dtype=torch.float32).to(device)
|
||||
|
||||
airport_countries = airports_df['Country'].values
|
||||
city_countries = cities_df['Country'].values
|
||||
|
||||
def haversine_batch(lat1, lon1, lat2, lon2):
|
||||
lat1_rad = torch.deg2rad(lat1)
|
||||
lon1_rad = torch.deg2rad(lon1)
|
||||
lat2_rad = torch.deg2rad(lat2)
|
||||
lon2_rad = torch.deg2rad(lon2)
|
||||
dlon = lon2_rad - lon1_rad
|
||||
dlat = lat2_rad - lat1_rad
|
||||
a = torch.sin(dlat / 2) ** 2 + torch.cos(lat1_rad) * torch.cos(lat2_rad) * torch.sin(dlon / 2) ** 2
|
||||
c = 2 * torch.asin(torch.sqrt(a))
|
||||
return 6371 * c
|
||||
|
||||
assigned_pop = torch.zeros(len(airport_coords), device=device)
|
||||
|
||||
for start in tqdm(range(0, len(city_coords), batch_size), desc="📊 Assigning using weighted score"):
|
||||
end = min(start + batch_size, len(city_coords))
|
||||
batch_coords = city_coords[start:end]
|
||||
batch_pops = city_pops[start:end]
|
||||
batch_countries = city_countries[start:end]
|
||||
|
||||
for i in range(batch_coords.shape[0]):
|
||||
country = batch_countries[i]
|
||||
matching_idx = [j for j, c in enumerate(airport_countries) if c == country]
|
||||
|
||||
if not matching_idx:
|
||||
continue
|
||||
|
||||
subset_coords = airport_coords[matching_idx]
|
||||
subset_deps = airport_departures[matching_idx]
|
||||
lat1 = batch_coords[i, 0].unsqueeze(0)
|
||||
lon1 = batch_coords[i, 1].unsqueeze(0)
|
||||
|
||||
dists = haversine_batch(
|
||||
lat1, lon1,
|
||||
subset_coords[:, 0], subset_coords[:, 1]
|
||||
).squeeze()
|
||||
|
||||
in_range_mask = dists <= distance_threshold_km
|
||||
if torch.any(in_range_mask):
|
||||
dists_in_range = dists[in_range_mask]
|
||||
deps_in_range = subset_deps[in_range_mask]
|
||||
scores = deps_in_range / (dists_in_range + 1)
|
||||
best_idx = torch.argmax(scores)
|
||||
chosen_airport = torch.tensor(matching_idx)[in_range_mask][best_idx]
|
||||
assigned_pop[chosen_airport] += batch_pops[i]
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
return assigned_pop.cpu().numpy()
|
Reference in New Issue
Block a user