In [ ]:
!pip install fiona
Collecting fiona
  Downloading fiona-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (56 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/56.6 kB ? eta -:--:--
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.6/56.6 kB 3.5 MB/s eta 0:00:00
Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.11/dist-packages (from fiona) (25.3.0)
Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from fiona) (2025.6.15)
Requirement already satisfied: click~=8.0 in /usr/local/lib/python3.11/dist-packages (from fiona) (8.2.1)
Collecting click-plugins>=1.0 (from fiona)
  Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting cligj>=0.5 (from fiona)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Downloading fiona-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 17.3/17.3 MB 54.6 MB/s eta 0:00:00
Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl (11 kB)
Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Installing collected packages: cligj, click-plugins, fiona
Successfully installed click-plugins-1.1.1.2 cligj-0.7.2 fiona-1.10.1
In [ ]:
#Ejercicio 1
#importo las librerías que voy a necesitar
from fiona import listlayers
linkWorldMap="https://github.com/CienciaDeDatosEspacial/intro_geodataframe/raw/main/maps/worldMaps.gpkg"
airportslink="https://github.com/TartariaData/GDF_Analytics/raw/main/data/de-airports.csv"
layers= listlayers(linkWorldMap)
layers
airports= pd.read_csv(airportslink)
In [ ]:
# import mi geopandas
import geopandas as gpd
airports = gpd.GeoDataFrame(airports, geometry=gpd.points_from_xy(airports.longitude_deg, airports.latitude_deg), crs=4326)
# datos a leer
countries = gpd.read_file(linkWorldMap, layer='countries')
rivers = gpd.read_file(linkWorldMap, layer='rivers')
cities = gpd.read_file(linkWorldMap, layer='cities')
indicators = gpd.read_file(linkWorldMap, layer='indicators')
# seleccionamos solo datos de Alemania
Germany = countries[countries.COUNTRY == 'Germany']
Germany.head()
Out[ ]:
COUNTRY geometry
87 Germany MULTIPOLYGON (((7.36901 49.16878, 7.36403 49.1...
In [ ]:
# lectura de los puertos
import pandas as pd

portsFileLink="https://github.com/CienciaDeDatosEspacial/GeoDataFrame_Analytics/raw/main/data/UpdatedPub150.csv"
infoseaports=pd.read_csv(portsFileLink)

#columns available (so many)
infoseaports.columns.to_list()
Out[ ]:
['World Port Index Number',
 'Region Name',
 'Main Port Name',
 'Alternate Port Name',
 'UN/LOCODE',
 'Country Code',
 'World Water Body',
 'IHO S-130 Sea Area',
 'Sailing Direction or Publication',
 'Publication Link',
 'Standard Nautical Chart',
 'IHO S-57 Electronic Navigational Chart',
 'IHO S-101 Electronic Navigational Chart',
 'Digital Nautical Chart',
 'Tidal Range (m)',
 'Entrance Width (m)',
 'Channel Depth (m)',
 'Anchorage Depth (m)',
 'Cargo Pier Depth (m)',
 'Oil Terminal Depth (m)',
 'Liquified Natural Gas Terminal Depth (m)',
 'Maximum Vessel Length (m)',
 'Maximum Vessel Beam (m)',
 'Maximum Vessel Draft (m)',
 'Offshore Maximum Vessel Length (m)',
 'Offshore Maximum Vessel Beam (m)',
 'Offshore Maximum Vessel Draft (m)',
 'Harbor Size',
 'Harbor Type',
 'Harbor Use',
 'Shelter Afforded',
 'Entrance Restriction - Tide',
 'Entrance Restriction - Heavy Swell',
 'Entrance Restriction - Ice',
 'Entrance Restriction - Other',
 'Overhead Limits',
 'Underkeel Clearance Management System',
 'Good Holding Ground',
 'Turning Area',
 'Port Security',
 'Estimated Time of Arrival Message',
 'Quarantine - Pratique',
 'Quarantine - Sanitation',
 'Quarantine - Other',
 'Traffic Separation Scheme',
 'Vessel Traffic Service',
 'First Port of Entry',
 'US Representative',
 'Pilotage - Compulsory',
 'Pilotage - Available',
 'Pilotage - Local Assistance',
 'Pilotage - Advisable',
 'Tugs - Salvage',
 'Tugs - Assistance',
 'Communications - Telephone',
 'Communications - Telefax',
 'Communications - Radio',
 'Communications - Radiotelephone',
 'Communications - Airport',
 'Communications - Rail',
 'Search and Rescue',
 'NAVAREA',
 'Facilities - Wharves',
 'Facilities - Anchorage',
 'Facilities - Dangerous Cargo Anchorage',
 'Facilities - Med Mooring',
 'Facilities - Beach Mooring',
 'Facilities - Ice Mooring',
 'Facilities - Ro-Ro',
 'Facilities - Solid Bulk',
 'Facilities - Liquid Bulk',
 'Facilities - Container',
 'Facilities - Breakbulk',
 'Facilities - Oil Terminal',
 'Facilities - LNG Terminal',
 'Facilities - Other',
 'Medical Facilities',
 'Garbage Disposal',
 'Chemical Holding Tank Disposal',
 'Degaussing',
 'Dirty Ballast Disposal',
 'Cranes - Fixed',
 'Cranes - Mobile',
 'Cranes - Floating',
 'Cranes - Container',
 'Lifts - 100+ Tons',
 'Lifts - 50-100 Tons',
 'Lifts - 25-49 Tons',
 'Lifts - 0-24 Tons',
 'Services - Longshoremen',
 'Services - Electricity',
 'Services - Steam',
 'Services - Navigation Equipment',
 'Services - Electrical Repair',
 'Services - Ice Breaking',
 'Services - Diving',
 'Supplies - Provisions',
 'Supplies - Potable Water',
 'Supplies - Fuel Oil',
 'Supplies - Diesel Oil',
 'Supplies - Aviation Fuel',
 'Supplies - Deck',
 'Supplies - Engine',
 'Repairs',
 'Dry Dock',
 'Railway',
 'Latitude',
 'Longitude']

Let's do some preprocessing:

In [ ]:
#rename
infoseaports.rename(columns={'Main Port Name':'portName'},inplace=True)
#keep few columns
infoseaports=infoseaports.loc[:,['portName', 'Country Code','Latitude', 'Longitude']]

# we have now
infoseaports.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3739 entries, 0 to 3738
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   portName      3739 non-null   object 
 1   Country Code  3739 non-null   object 
 2   Latitude      3739 non-null   float64
 3   Longitude     3739 non-null   float64
dtypes: float64(2), object(2)
memory usage: 117.0+ KB
In [ ]:
# some rows
infoseaports.head()
Out[ ]:
portName Country Code Latitude Longitude
0 Maurer United States 40.533333 -74.250000
1 Mangkasa Oil Terminal Indonesia -2.733333 121.066667
2 Iharana Madagascar -13.350000 50.000000
3 Andoany Madagascar -13.400000 48.300000
4 Chake Chake Tanzania -5.250000 39.766667
In [ ]:
# Puertos que son de Alemania
# Crear un GeoDataFrame a partir de los puntos (sin proyectar)
seaports = gpd.GeoDataFrame(data=infoseaports.copy(),
                           geometry=gpd.points_from_xy(infoseaports.Longitude,
                                                       infoseaports.Latitude),
                          crs=4326)  # no está proyectado

# Mantener solo los puertos de Alemania
seaports_ger = seaports[seaports['Country Code'] == 'Germany'].copy()

# Reiniciar los índices
seaports_ger.reset_index(drop=True, inplace=True)

# Reproyectar
seaports_ger_25832 = seaports_ger.to_crs(25832)  # CRS proyectado para Alemania

# Mostrar las primeras filas del GeoDataFrame de los puertos en Alemania proyectado
seaports_ger_25832.head()
Out[ ]:
portName Country Code Latitude Longitude geometry
0 Kiel Germany 54.316667 10.133333 POINT (573722.969 6019347.392)
1 Butzfleth Germany 53.650000 9.516667 POINT (534150.597 5944705.652)
2 Emden Germany 53.333333 7.183333 POINT (379029.642 5910890.648)
3 Bremen Germany 53.133333 8.766667 POINT (484389.231 5887128.322)
4 Nordenham Germany 53.483333 8.483333 POINT (465714.877 5926163.772)
In [ ]:
# Formaremos un subconjunto de aeropuertos grandes de Alemania
# subsetting
largeAirports=airports[airports['type']=='large_airport'] #can't use "airports.type"
largeAirports.reset_index(drop=True, inplace=True)

#plotting
base=largeAirports.plot(color='red',marker="^")
seaports_ger_25832.plot(ax=base,alpha=0.5,markersize=3)
Out[ ]:
<Axes: >
No description has been provided for this image
In [ ]:
# Crear un GeoDataFrame a partir de los puntos (sin proyectar)
seaports = gpd.GeoDataFrame(data=infoseaports.copy(),
                           geometry=gpd.points_from_xy(infoseaports.Longitude,
                                                       infoseaports.Latitude),
                          crs=4326)  # Observa que no está proyectado

# Mantener solo los puertos de Alemania
seaports_ger = seaports[seaports['Country Code'] == 'Germany'].copy()

# Reiniciar los índices
seaports_ger.reset_index(drop=True, inplace=True)

# Reproyectar
seaports_ger_25832 = seaports_ger.to_crs(25832)  # CRS proyectado para Alemania

# Asegurarse de que los aeropuertos están en el mismo CRS
airports_gdf = gpd.GeoDataFrame(airports,
                                geometry=gpd.points_from_xy(airports.longitude_deg, airports.latitude_deg),
                                crs=4326)

# Reproyectar aeropuertos al CRS de Alemania
airports_gdf_25832 = airports_gdf.to_crs(25832)

# Crear un subconjunto de aeropuertos grandes en Alemania
largeAirports = airports_gdf_25832[airports_gdf_25832['type'] == 'large_airport'].copy()
largeAirports.reset_index(drop=True, inplace=True)

# Ploteando los puertos y los aeropuertos grandes en Alemania
base = largeAirports.plot(color='red', marker="^")
seaports_ger_25832.plot(ax=base, alpha=0.5, markersize=3)
plt.show()
No description has been provided for this image
In [ ]:
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster

# Crear un mapa base centrado en Alemania
m = folium.Map(location=[51.1657, 10.4515], zoom_start=6)

# Añadir aeropuertos grandes
for idx, row in largeAirports.iterrows():
    folium.Marker(location=[row['latitude_deg'], row['longitude_deg']], popup=row['name'], icon=folium.Icon(color='red', icon='plane')).add_to(m)

# Añadir puertos
for idx, row in seaports_ger_25832.iterrows():
    folium.Marker(location=[row['Latitude'], row['Longitude']], popup=row['portName'], icon=folium.Icon(color='blue', icon='anchor')).add_to(m)

# Mostrar el mapa
m.save('map.html')
m
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# Mostrar las primeras filas del GeoDataFrame de los puertos en Alemania proyectado
seaports_ger_25832.head()
Out[ ]:
portName Country Code Latitude Longitude geometry
0 Kiel Germany 54.316667 10.133333 POINT (573722.969 6019347.392)
1 Butzfleth Germany 53.650000 9.516667 POINT (534150.597 5944705.652)
2 Emden Germany 53.333333 7.183333 POINT (379029.642 5910890.648)
3 Bremen Germany 53.133333 8.766667 POINT (484389.231 5887128.322)
4 Nordenham Germany 53.483333 8.483333 POINT (465714.877 5926163.772)
In [ ]:
largeAirports.head()
Out[ ]:
id ident type name latitude_deg longitude_deg elevation_ft continent country_name iso_country ... scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords score last_updated geometry
0 2212 EDDF large_airport Frankfurt Airport 50.030241 8.561096 364.0 EU Germany DE ... 1 EDDF FRA NaN https://www.frankfurt-airport.de/ https://en.wikipedia.org/wiki/Frankfurt_Airport EDAF, Frankfurt am Main, Frankfurt Main, Rhein... 1144675 2024-04-02T16:06:40+00:00 POINT (468564.822 5542085.319)
1 2218 EDDM large_airport Munich Airport 48.353802 11.786100 1487.0 EU Germany DE ... 1 EDDM MUC NaN http://www.munich-airport.com/ https://en.wikipedia.org/wiki/Munich_Airport Franz Josef Strauss Airport, Flughafen München... 1026675 2022-03-29T22:05:19+00:00 POINT (706396.099 5359376.354)
2 2217 EDDL large_airport Düsseldorf Airport 51.289501 6.766780 147.0 EU Germany DE ... 1 EDDL DUS NaN http://www.dus.com/dus_en/ https://en.wikipedia.org/wiki/D%C3%BCsseldorf_... NaN 1017675 2023-12-04T14:06:43+00:00 POINT (344281.592 5684387.872)
3 2214 EDDH large_airport Hamburg Helmut Schmidt Airport 53.630402 9.988230 53.0 EU Germany DE ... 1 EDDH HAM NaN https://www.hamburg-airport.de/en/ https://en.wikipedia.org/wiki/Hamburg_Airport Hamburg-Fuhlsbüttel Airport 1012575 2021-03-12T16:04:07+00:00 POINT (565349.504 5942855.095)
4 2216 EDDK large_airport Cologne Bonn Airport 50.865898 7.142740 302.0 EU Germany DE ... 1 EDDK CGN NaN http://www.koeln-bonn-airport.de/en/ https://en.wikipedia.org/wiki/Cologne_Bonn_Air... Köln 51475 2023-12-04T20:06:04+00:00 POINT (369306.134 5636555.728)

5 rows × 24 columns

In [ ]:
# distance between 'Guarulhos' and 'Dtse / Gegua Oil Terminal'
largeAirports.geometry[0].distance(seaports_ger_25832.geometry[0])/1000  # in km
Out[ ]:
488.7098548169685
In [ ]:
# Opción 3: reordenar la salida previa
distances_reordered = seaports_ger_25832.set_index('portName').geometry.apply(
    lambda g: largeAirports.set_index('name').geometry.distance(g) / 1000
).sort_index(axis=0).sort_index(axis=1)
distances_reordered.head()
Out[ ]:
name Berlin Brandenburg Airport Cologne Bonn Airport Düsseldorf Airport Frankfurt Airport Hamburg Helmut Schmidt Airport Hannover Airport Leipzig/Halle Airport Munich Airport Nuremberg Airport Stuttgart Airport
portName
Brake 354.778592 289.408259 255.698314 367.403245 105.191619 126.288439 332.251958 600.496340 463.014246 518.955116
Bremen 330.911344 275.736729 246.442367 345.405683 98.285498 97.087601 303.659694 572.546233 435.099551 495.151915
Bremerhaven 355.188793 312.609820 278.504627 389.616247 93.635224 140.334364 341.534133 618.301524 480.899723 540.464947
Brunsbuttel Canal Terminals 337.681912 364.073827 332.218896 432.297306 62.871259 164.016574 345.740454 643.765117 507.310293 579.434335
Brunsbuttel Elbahafen 335.832292 362.794067 331.166675 430.560771 61.040591 161.966988 343.606464 641.666145 505.226500 577.571266
In [ ]:
# Crear la matriz de distancias entre puertos y aeropuertos grandes (mantener nombres de índices)
distanceMatrixKM_sea_air = seaports_ger_25832.set_index('portName').geometry.apply(
    lambda g: largeAirports.set_index('name').geometry.distance(g) / 1000
).sort_index(axis=0).sort_index(axis=1)

# La distancia media desde un puerto a todos los aeropuertos grandes (ordenada)
mean_distances = distanceMatrixKM_sea_air.mean(axis=1).sort_values(ascending=True)
mean_distances.head()
Out[ ]:
0
portName
Bremen 320.032662
Oldenburg 331.704344
Elsfleth 334.086680
Hamburg 337.005726
Brake 341.348613

In [ ]:
# Calcular más estadísticas
SomeStats = pd.DataFrame()
SomeStats['mean'] = distanceMatrixKM_sea_air.mean(axis=1)
SomeStats['min'] = distanceMatrixKM_sea_air.min(axis=1)
SomeStats['max'] = distanceMatrixKM_sea_air.max(axis=1)

# Ver algunas estadísticas
SomeStats.head(10)
Out[ ]:
mean min max
portName
Brake 341.348613 105.191619 600.496340
Bremen 320.032662 97.087601 572.546233
Bremerhaven 355.108940 93.635224 618.301524
Brunsbuttel Canal Terminals 376.940997 62.871259 643.765117
Brunsbuttel Elbahafen 375.143176 61.040591 641.666145
Busum 403.365856 92.554962 674.223724
Butzfleth 349.225603 31.253741 610.146622
Cuxhaven 381.169587 87.875182 649.570969
Eckernforde 429.394014 95.442681 695.155599
Elsfleth 334.086680 110.319333 590.817263
In [ ]:
# Aeropuerto más lejano a cada puerto
farthest_airports = distanceMatrixKM_sea_air.idxmax(axis=1)

# Mostrar resultados
farthest_airports
Out[ ]:
0
portName
Brake Munich Airport
Bremen Munich Airport
Bremerhaven Munich Airport
Brunsbuttel Canal Terminals Munich Airport
Brunsbuttel Elbahafen Munich Airport
Busum Munich Airport
Butzfleth Munich Airport
Cuxhaven Munich Airport
Eckernforde Munich Airport
Elsfleth Munich Airport
Emden Munich Airport
Flensburg Munich Airport
Gluckstadt Munich Airport
Hamburg Munich Airport
Heiligenhafen Munich Airport
Husum Munich Airport
Itzehoe Munich Airport
Kappeln Munich Airport
Kiel Munich Airport
Leer Munich Airport
Lubeck Munich Airport
Lubeck-Travemunde Munich Airport
Neuhaus Munich Airport
Neustadt Munich Airport
Nordenham Munich Airport
Oldenburg Munich Airport
Orth Munich Airport
Papenburg Munich Airport
Rendsburg Munich Airport
Rostock Munich Airport
Sassnitz Stuttgart Airport
Stralsund Stuttgart Airport
Wilhelmshaven Munich Airport
Wismar Munich Airport
Wolgast Stuttgart Airport

In [ ]:
# Puerto más lejano a cada aeropuerto
farthest_seaports = distanceMatrixKM_sea_air.idxmax(axis=0)

# Mostrar resultados
farthest_seaports
Out[ ]:
0
name
Berlin Brandenburg Airport Emden
Cologne Bonn Airport Sassnitz
Düsseldorf Airport Sassnitz
Frankfurt Airport Sassnitz
Hamburg Helmut Schmidt Airport Sassnitz
Hannover Airport Sassnitz
Leipzig/Halle Airport Flensburg
Munich Airport Flensburg
Nuremberg Airport Flensburg
Stuttgart Airport Sassnitz

In [ ]:
# Aeropuerto más cercano a cada puerto
closest_airports = distanceMatrixKM_sea_air.idxmin(axis=1)

# Mostrar resultados
closest_airports
Out[ ]:
0
portName
Brake Hamburg Helmut Schmidt Airport
Bremen Hannover Airport
Bremerhaven Hamburg Helmut Schmidt Airport
Brunsbuttel Canal Terminals Hamburg Helmut Schmidt Airport
Brunsbuttel Elbahafen Hamburg Helmut Schmidt Airport
Busum Hamburg Helmut Schmidt Airport
Butzfleth Hamburg Helmut Schmidt Airport
Cuxhaven Hamburg Helmut Schmidt Airport
Eckernforde Hamburg Helmut Schmidt Airport
Elsfleth Hamburg Helmut Schmidt Airport
Emden Hamburg Helmut Schmidt Airport
Flensburg Hamburg Helmut Schmidt Airport
Gluckstadt Hamburg Helmut Schmidt Airport
Hamburg Hamburg Helmut Schmidt Airport
Heiligenhafen Hamburg Helmut Schmidt Airport
Husum Hamburg Helmut Schmidt Airport
Itzehoe Hamburg Helmut Schmidt Airport
Kappeln Hamburg Helmut Schmidt Airport
Kiel Hamburg Helmut Schmidt Airport
Leer Hannover Airport
Lubeck Hamburg Helmut Schmidt Airport
Lubeck-Travemunde Hamburg Helmut Schmidt Airport
Neuhaus Hamburg Helmut Schmidt Airport
Neustadt Hamburg Helmut Schmidt Airport
Nordenham Hamburg Helmut Schmidt Airport
Oldenburg Hannover Airport
Orth Hamburg Helmut Schmidt Airport
Papenburg Hannover Airport
Rendsburg Hamburg Helmut Schmidt Airport
Rostock Hamburg Helmut Schmidt Airport
Sassnitz Berlin Brandenburg Airport
Stralsund Hamburg Helmut Schmidt Airport
Wilhelmshaven Hamburg Helmut Schmidt Airport
Wismar Hamburg Helmut Schmidt Airport
Wolgast Berlin Brandenburg Airport

In [ ]:
# Puerto más cercano a cada aeropuerto
closest_seaports = distanceMatrixKM_sea_air.idxmin(axis=0)

# Mostrar resultados
closest_seaports
Out[ ]:
0
name
Berlin Brandenburg Airport Wolgast
Cologne Bonn Airport Papenburg
Düsseldorf Airport Papenburg
Frankfurt Airport Oldenburg
Hamburg Helmut Schmidt Airport Hamburg
Hannover Airport Bremen
Leipzig/Halle Airport Wismar
Munich Airport Bremen
Nuremberg Airport Bremen
Stuttgart Airport Bremen

In [ ]:
#  mapa con las lineas entre aeropuertos y puertos

import matplotlib.pyplot as plt
from shapely.geometry import LineString


seaports_ger_4326 = seaports_ger_25832.to_crs(4326)
largeAirports_4326 = largeAirports.to_crs(4326)

#  mapa base centrado en Alemania
m = folium.Map(location=[51.1657, 10.4515], zoom_start=6)

# Añadir marcadores para aeropuertos y puertos
for idx, row in largeAirports_4326.iterrows():
    folium.Marker(location=[row['latitude_deg'], row['longitude_deg']], popup=row['name'], icon=folium.Icon(color='red', icon='plane')).add_to(m)

for idx, row in seaports_ger_4326.iterrows():
    folium.Marker(location=[row['Latitude'], row['Longitude']], popup=row['portName'], icon=folium.Icon(color='blue', icon='anchor')).add_to(m)

# Añadir líneas entre cada puerto y su aeropuerto más cercano
for index_port, row_port in seaports_ger_4326.iterrows():
    port_name = row_port['portName']
    closest_airport_name = closest_airports[port_name]
    closest_airport_row = largeAirports_4326[largeAirports_4326['name'] == closest_airport_name].iloc[0]

    # Coordenadas del puerto y del aeropuerto
    port_coords = (row_port['Latitude'], row_port['Longitude'])
    airport_coords = (closest_airport_row['latitude_deg'], closest_airport_row['longitude_deg'])

    # Crear la línea
    line = folium.PolyLine(locations=[port_coords, airport_coords], color='green', weight=1)

    # Añadir la línea al mapa
    m.add_child(line)

# Mostrar el mapa
m
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# Ejercicio 2
In [ ]:
# Visualizar las primeras filas del DataFrame de ríos
rivers.head()
Out[ ]:
NAME SYSTEM geometry
0 Aldan Lena MULTILINESTRING ((124.00678 56.47258, 123.2595...
1 Amazon Amazon MULTILINESTRING ((-61.2773 -3.60706, -60.68466...
2 Amu Darya None MULTILINESTRING ((73.98818 37.49952, 73.52595 ...
3 Amur None MULTILINESTRING ((122.63956 49.9973, 120.47874...
4 Angara None MULTILINESTRING ((105.07841 51.93053, 103.9295...
In [ ]:
# Filtrar un río en Alemania (por ejemplo, el río 'Rhine')
rhine_river = rivers[rivers.NAME.str.contains('Rhine', case=False, na=False)]
rhine_river.head()
Out[ ]:
NAME SYSTEM geometry
61 Rhine None MULTILINESTRING ((6.25146 51.93364, 6.11321 52...
In [ ]:
# Calculamos la distancia desde cada aeropuerto al río 'Rhine'
distances_to_rhine = rhine_river.iloc[0].geometry.distance(largeAirports.set_index('name').geometry) / 1000  # en km

# Mostrar las distancias
distances_to_rhine
Out[ ]:
geometry
name
Frankfurt Airport 5561.805472
Munich Airport 5405.677007
Düsseldorf Airport 5694.751921
Hamburg Helmut Schmidt Airport 5969.633262
Cologne Bonn Airport 5648.588879
Stuttgart Airport 5417.616635
Nuremberg Airport 5523.353923
Hannover Airport 5838.132539
Berlin Brandenburg Airport 5866.501357
Leipzig/Halle Airport 5747.390731

In [ ]:
# Calcular la matriz de distancias entre ríos y aeropuertos en Alemania
distanceMatrixKM_riv_air = rivers.set_index('NAME').geometry.apply(
    lambda g: largeAirports.set_index('name').geometry.distance(g) / 1000
).sort_index(axis=0).sort_index(axis=1)

# Mostrar la matriz de distancias
distanceMatrixKM_riv_air
Out[ ]:
name Berlin Brandenburg Airport Cologne Bonn Airport Düsseldorf Airport Frankfurt Airport Hamburg Helmut Schmidt Airport Hannover Airport Leipzig/Halle Airport Munich Airport Nuremberg Airport Stuttgart Airport
NAME
Aldan 5866.472563 5648.569178 5694.732844 5561.783386 5969.609855 5838.109269 5747.363366 5405.649066 5523.327607 5417.593152
Amazon 5866.562370 5648.646101 5694.808875 5561.863715 5969.692061 5838.191280 5747.451166 5405.737676 5523.413929 5417.675466
Amu Darya 5866.502129 5648.593553 5694.756861 5561.809130 5969.636354 5838.135690 5747.392120 5405.678148 5523.355765 5417.619695
Amur 5866.481693 5648.578888 5694.742590 5561.792955 5969.619344 5838.118766 5747.372599 5405.658258 5523.336916 5417.602636
Angara 5866.481318 5648.575737 5694.739245 5561.790532 5969.617318 5838.116699 5747.371778 5405.657616 5523.335768 5417.600634
... ... ... ... ... ... ... ... ... ... ...
Xingu 5866.562370 5648.646101 5694.808875 5561.863715 5969.692061 5838.191280 5747.451166 5405.737676 5523.413929 5417.675466
Yangtze 5866.504867 5648.600267 5694.763751 5561.815164 5969.641775 5838.141179 5747.395573 5405.681313 5523.359741 5417.625078
Yenisey 5866.472708 5648.565597 5694.729010 5561.780778 5969.607787 5838.107145 5747.362924 5405.648862 5523.326733 5417.591115
Yukon 5866.507684 5648.584199 5694.746492 5561.803666 5969.633051 5838.132162 5747.395319 5405.682295 5523.357235 5417.616516
Zambezi 5866.561299 5648.650610 5694.813762 5561.866767 5969.694305 5838.193609 5747.450977 5405.737132 5523.414387 5417.677664

98 rows × 10 columns

In [ ]:
# fila específica de la matriz de distancias, en este caso, el río 'Rhine'
distanceMatrixKM_riv_air.loc['Rhine'].sort_values()
Out[ ]:
Rhine
name
Munich Airport 5405.677007
Stuttgart Airport 5417.616635
Nuremberg Airport 5523.353923
Frankfurt Airport 5561.805472
Cologne Bonn Airport 5648.588879
Düsseldorf Airport 5694.751921
Leipzig/Halle Airport 5747.390731
Hannover Airport 5838.132539
Berlin Brandenburg Airport 5866.501357
Hamburg Helmut Schmidt Airport 5969.633262

In [ ]:
# Instalar mapclassify
!pip install mapclassify

# Crear el mapa base con los aeropuertos grandes
base = largeAirports.explore(color='red', marker_kwds=dict(radius=10))

# Agregar el río Rhine al mapa
rivers[rivers.NAME.str.contains('Rhine')].explore(m=base)
Collecting mapclassify
  Downloading mapclassify-2.9.0-py3-none-any.whl.metadata (3.1 kB)
Requirement already satisfied: networkx>=3.2 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (3.5)
Requirement already satisfied: numpy>=1.26 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (2.0.2)
Requirement already satisfied: pandas>=2.1 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (2.2.2)
Requirement already satisfied: scikit-learn>=1.4 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (1.6.1)
Requirement already satisfied: scipy>=1.12 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (1.15.3)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas>=2.1->mapclassify) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=2.1->mapclassify) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=2.1->mapclassify) (2025.2)
Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=1.4->mapclassify) (1.5.1)
Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=1.4->mapclassify) (3.6.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas>=2.1->mapclassify) (1.17.0)
Downloading mapclassify-2.9.0-py3-none-any.whl (286 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 286.7/286.7 kB 6.5 MB/s eta 0:00:00
Installing collected packages: mapclassify
Successfully installed mapclassify-2.9.0
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# Seleccionar los ríos que pertenecen a Alemania
riversGermany_clipped= gpd.clip(gdf=rivers,
                               mask=Germany)  #Los ríos de Alemania en la data no presentan "SYSTEM" (sale como "None"), por tal motivo se realizó el clipeo.
# Mostrar los ríos seleccionados
riversGermany_clipped
Out[ ]:
NAME SYSTEM geometry
14 Danube None MULTILINESTRING ((8.73734 47.99981, 9.56016 48...
61 Rhine None MULTILINESTRING ((6.42693 51.85182, 6.88766 51...
In [ ]:
# Renombramos
systems = riversGermany_clipped
systems
Out[ ]:
NAME SYSTEM geometry
14 Danube None MULTILINESTRING ((8.73734 47.99981, 9.56016 48...
61 Rhine None MULTILINESTRING ((6.42693 51.85182, 6.88766 51...
In [ ]:
# Formatear el GeoDataFrame de los sistemas de ríos
systems.reset_index(drop=False, inplace=True)
systems.drop(columns='NAME', inplace=True)

# Mostrar el resultado formateado
systems
Out[ ]:
index SYSTEM geometry
0 14 None MULTILINESTRING ((8.73734 47.99981, 9.56016 48...
1 61 None MULTILINESTRING ((6.42693 51.85182, 6.88766 51...
In [ ]:
#Eliminamos columna index
systems.drop(columns=['index', 'NAME'], inplace=True, errors='ignore')
systems
Out[ ]:
SYSTEM geometry
0 None MULTILINESTRING ((8.73734 47.99981, 9.56016 48...
1 None MULTILINESTRING ((6.42693 51.85182, 6.88766 51...
In [ ]:
# Calcular la matriz de distancias entre los sistemas de ríos y los aeropuertos grandes
distanceMatrixKM_sys_air = systems.set_index('SYSTEM').geometry.apply(
    lambda g: largeAirports.set_index('name').geometry.distance(g) / 1000
).sort_index(axis=0).sort_index(axis=1)

# Mostrar la matriz de distancias
distanceMatrixKM_sys_air
Out[ ]:
name Berlin Brandenburg Airport Cologne Bonn Airport Düsseldorf Airport Frankfurt Airport Hamburg Helmut Schmidt Airport Hannover Airport Leipzig/Halle Airport Munich Airport Nuremberg Airport Stuttgart Airport
SYSTEM
None 5866.503444 5648.591429 5694.754503 5561.807903 5969.635626 5838.134910 5747.392892 5405.679138 5523.356138 5417.618995
None 5866.501502 5648.589047 5694.752091 5561.805635 5969.633421 5838.132698 5747.390879 5405.677154 5523.354074 5417.616794
In [ ]:
# Obtener las distancias mínimas
mins = distanceMatrixKM_sys_air.idxmin(axis="columns")  # lo mismo que axis=1
mins
Out[ ]:
0
SYSTEM
None Munich Airport
None Munich Airport

In [ ]:
# Obtener uno de los valores mínimos
mins.iloc[1]
Out[ ]:
'Munich Airport'
In [ ]:
# Crear el mapa base con los sistemas de ríos
base = systems.explore()

# Aeropuertos más cercanos
largeAirports[largeAirports.name.isin(mins)].explore(
    m=base,
    color='red',
    marker_kwds=dict(radius=10),
    legend=True,
    name='Closest Airports'
)

# Aeropuertos no cercanos
largeAirports[~largeAirports.name.isin(mins)].explore(
    m=base,
    color='blue',
    marker_kwds=dict(radius=5),
    legend=True,
    name='Not Closest Airports'
)
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# Ejercicio 3
In [ ]:
# Creación de envolventes convexas (HULLs) para un conjunto de mapas de líneas.

# Creamos algunos cascos convexos para nuestro sistema alemán de ríos:
# Polígono para cada sistema
systems.convex_hull
Out[ ]:
0
0 POLYGON ((8.73734 47.99981, 8.3896 48.14701, 8...
1 POLYGON ((9.72328 47.53838, 7.83207 47.5805, 7...

In [ ]:
# Visualización de los polígonos
systems.convex_hull.plot()
Out[ ]:
<Axes: >
No description has been provided for this image
In [ ]:
# Ahora, un GeoDataFrame para los hulls
systems_hulls=systems.convex_hull.to_frame()
systems_hulls['system']=['None', 'None']
systems_hulls.rename(columns={0:'geometry'},inplace=True)
systems_hulls=systems_hulls.set_geometry('geometry')
systems_hulls.crs="EPSG:4326" #Empleamos este valor de EPSG ya que es el que mejor se ajusta.
systems_hulls
Out[ ]:
geometry system
0 POLYGON ((8.73734 47.99981, 8.3896 48.14701, 8... None
1 POLYGON ((9.72328 47.53838, 7.83207 47.5805, 7... None
In [ ]:
# Definimos la matriz de distancias
distanceMatrixKM_sysHull_air=systems_hulls.set_index('system').geometry.apply\
(lambda g: largeAirports.set_index('name').geometry.distance(g)/1000).\
sort_index(axis=0).sort_index(axis=1)

distanceMatrixKM_sysHull_air
Out[ ]:
name Berlin Brandenburg Airport Cologne Bonn Airport Düsseldorf Airport Frankfurt Airport Hamburg Helmut Schmidt Airport Hannover Airport Leipzig/Halle Airport Munich Airport Nuremberg Airport Stuttgart Airport
system
None 5866.503444 5648.591429 5694.754503 5561.807903 5969.635626 5838.134910 5747.392892 5405.679138 5523.356138 5417.618995
None 5866.501502 5648.589047 5694.752091 5561.805635 5969.633421 5838.132698 5747.390879 5405.677154 5523.354074 5417.616794
In [ ]:
# Definimos la matriz de distancias
distanceMatrixKM_sysHull_air=systems_hulls.set_index('system').geometry.apply\
(lambda g: largeAirports.set_index('name').geometry.distance(g)/1000).\
sort_index(axis=0).sort_index(axis=1)

distanceMatrixKM_sysHull_air
Out[ ]:
name Berlin Brandenburg Airport Cologne Bonn Airport Düsseldorf Airport Frankfurt Airport Hamburg Helmut Schmidt Airport Hannover Airport Leipzig/Halle Airport Munich Airport Nuremberg Airport Stuttgart Airport
system
None 5866.503444 5648.591429 5694.754503 5561.807903 5969.635626 5838.134910 5747.392892 5405.679138 5523.356138 5417.618995
None 5866.501502 5648.589047 5694.752091 5561.805635 5969.633421 5838.132698 5747.390879 5405.677154 5523.354074 5417.616794
In [ ]:
# Graficamos los cascos y los puntos. Además, se muestran los puntos más cercanos y más lejanos al casco.
base=systems_hulls.explore()
largeAirports[largeAirports.name.isin(mins)].explore(m=base,color='red',marker_kwds=dict(radius=10))
largeAirports[~largeAirports.name.isin(mins)].explore(m=base,color='blue',marker_kwds=dict(radius=5))
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# Ejercicio 4

# Recordamos
distanceMatrixKM_riv_air #DF con distancias en kilómetros entre diferentes sistemas de ríos y aeropuertos de Alemania.
Out[ ]:
name Berlin Brandenburg Airport Cologne Bonn Airport Düsseldorf Airport Frankfurt Airport Hamburg Helmut Schmidt Airport Hannover Airport Leipzig/Halle Airport Munich Airport Nuremberg Airport Stuttgart Airport
NAME
Aldan 5866.472563 5648.569178 5694.732844 5561.783386 5969.609855 5838.109269 5747.363366 5405.649066 5523.327607 5417.593152
Amazon 5866.562370 5648.646101 5694.808875 5561.863715 5969.692061 5838.191280 5747.451166 5405.737676 5523.413929 5417.675466
Amu Darya 5866.502129 5648.593553 5694.756861 5561.809130 5969.636354 5838.135690 5747.392120 5405.678148 5523.355765 5417.619695
Amur 5866.481693 5648.578888 5694.742590 5561.792955 5969.619344 5838.118766 5747.372599 5405.658258 5523.336916 5417.602636
Angara 5866.481318 5648.575737 5694.739245 5561.790532 5969.617318 5838.116699 5747.371778 5405.657616 5523.335768 5417.600634
... ... ... ... ... ... ... ... ... ... ...
Xingu 5866.562370 5648.646101 5694.808875 5561.863715 5969.692061 5838.191280 5747.451166 5405.737676 5523.413929 5417.675466
Yangtze 5866.504867 5648.600267 5694.763751 5561.815164 5969.641775 5838.141179 5747.395573 5405.681313 5523.359741 5417.625078
Yenisey 5866.472708 5648.565597 5694.729010 5561.780778 5969.607787 5838.107145 5747.362924 5405.648862 5523.326733 5417.591115
Yukon 5866.507684 5648.584199 5694.746492 5561.803666 5969.633051 5838.132162 5747.395319 5405.682295 5523.357235 5417.616516
Zambezi 5866.561299 5648.650610 5694.813762 5561.866767 5969.694305 5838.193609 5747.450977 5405.737132 5523.414387 5417.677664

98 rows × 10 columns

In [ ]:
# Elegimos un valor cualquiera, en este caso min

distanceMatrixKM_riv_air.loc['Amazon'].min() # Encuentra la distancia más corta (mínima) entre el sistema de ríos "Amazon" y
#cualquiera de los aeropuertos listados en las columnas del DF
Out[ ]:
5405.737675775065
In [ ]:
distanceMatrixKM_riv_air
Out[ ]:
name Berlin Brandenburg Airport Cologne Bonn Airport Düsseldorf Airport Frankfurt Airport Hamburg Helmut Schmidt Airport Hannover Airport Leipzig/Halle Airport Munich Airport Nuremberg Airport Stuttgart Airport
NAME
Aldan 5866.472563 5648.569178 5694.732844 5561.783386 5969.609855 5838.109269 5747.363366 5405.649066 5523.327607 5417.593152
Amazon 5866.562370 5648.646101 5694.808875 5561.863715 5969.692061 5838.191280 5747.451166 5405.737676 5523.413929 5417.675466
Amu Darya 5866.502129 5648.593553 5694.756861 5561.809130 5969.636354 5838.135690 5747.392120 5405.678148 5523.355765 5417.619695
Amur 5866.481693 5648.578888 5694.742590 5561.792955 5969.619344 5838.118766 5747.372599 5405.658258 5523.336916 5417.602636
Angara 5866.481318 5648.575737 5694.739245 5561.790532 5969.617318 5838.116699 5747.371778 5405.657616 5523.335768 5417.600634
... ... ... ... ... ... ... ... ... ... ...
Xingu 5866.562370 5648.646101 5694.808875 5561.863715 5969.692061 5838.191280 5747.451166 5405.737676 5523.413929 5417.675466
Yangtze 5866.504867 5648.600267 5694.763751 5561.815164 5969.641775 5838.141179 5747.395573 5405.681313 5523.359741 5417.625078
Yenisey 5866.472708 5648.565597 5694.729010 5561.780778 5969.607787 5838.107145 5747.362924 5405.648862 5523.326733 5417.591115
Yukon 5866.507684 5648.584199 5694.746492 5561.803666 5969.633051 5838.132162 5747.395319 5405.682295 5523.357235 5417.616516
Zambezi 5866.561299 5648.650610 5694.813762 5561.866767 5969.694305 5838.193609 5747.450977 5405.737132 5523.414387 5417.677664

98 rows × 10 columns

In [ ]:
# Solo quiero los ríos de Alemania

specific_rivers = ['Danube', 'Rhine']

# Filtrar el DF para incluir solo los ríos especificados
distanceMatrixKM_riv_air = distanceMatrixKM_riv_air.loc[specific_rivers]
distanceMatrixKM_riv_air # Mostramos
Out[ ]:
name Berlin Brandenburg Airport Cologne Bonn Airport Düsseldorf Airport Frankfurt Airport Hamburg Helmut Schmidt Airport Hannover Airport Leipzig/Halle Airport Munich Airport Nuremberg Airport Stuttgart Airport
NAME
Danube 5866.503444 5648.591429 5694.754503 5561.807903 5969.635626 5838.134910 5747.392892 5405.679138 5523.356138 5417.618995
Rhine 5866.501357 5648.588879 5694.751921 5561.805472 5969.633262 5838.132539 5747.390731 5405.677007 5523.353923 5417.616635
In [ ]:
# Elegimos un valor cualquiera, en este caso min

distanceMatrixKM_riv_air.loc['Rhine'].min() # Encuentra la distancia más corta (mínima) entre el río "Rhine"
# y cualquiera de los aeropuertos listados en las columnas del DF
Out[ ]:
5405.67700688333
In [ ]:
# Usamos un valor random para crear el buffer:
minMts=distanceMatrixKM_riv_air.loc['Rhine'].min()*1 # km
# El buffer es un polígono, entonces
rivers[rivers.NAME=='Rhine'].buffer(distance = minMts)
/tmp/ipython-input-76-3227383231.py:4: UserWarning: Geometry is in a geographic CRS. Results from 'buffer' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.

  rivers[rivers.NAME=='Rhine'].buffer(distance = minMts)
Out[ ]:
0
61 POLYGON ((-5398.13956 16.90368, -5401.50576 10...

In [ ]:
rivers = rivers.to_crs(epsg=3395)

# Crear un buffer alrededor del río Rin en el sistema de coordenadas proyectadas
bufferAroundRhine = rivers[rivers.NAME=='Rhine'].buffer(distance=minMts)

# Crear un mapa base con el buffer coloreado en rojo
bufferAsBase = bufferAroundRhine.explore(color='red')

# Visualizar el río Rin y su buffer en el mapa base, coloreando el río en azul
rivers[rivers.NAME=='Rhine'].explore(m=bufferAsBase, color='blue', style_kwds={'weight': 2})
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# parte 2
In [99]:
 
In [85]:
import seaborn as sea

sea.histplot(indicatorsMap.fragility, color='yellow')
Out[85]:
<Axes: xlabel='fragility', ylabel='Count'>
No description has been provided for this image
In [86]:
!pip install mapclassify
Requirement already satisfied: mapclassify in /usr/local/lib/python3.11/dist-packages (2.9.0)
Requirement already satisfied: networkx>=3.2 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (3.5)
Requirement already satisfied: numpy>=1.26 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (2.0.2)
Requirement already satisfied: pandas>=2.1 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (2.2.2)
Requirement already satisfied: scikit-learn>=1.4 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (1.6.1)
Requirement already satisfied: scipy>=1.12 in /usr/local/lib/python3.11/dist-packages (from mapclassify) (1.15.3)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas>=2.1->mapclassify) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=2.1->mapclassify) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=2.1->mapclassify) (2025.2)
Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=1.4->mapclassify) (1.5.1)
Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=1.4->mapclassify) (3.6.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas>=2.1->mapclassify) (1.17.0)
In [89]:
indicatorsMap.explore(
    column="fragility",
    scheme="fisherjenks",
    legend=True,
    tooltip=False,
    popup=['DEPARTAMENTO', 'PROVINCIA', 'DISTRITO'],  # show popup (on-click)
    legend_kwds=dict(colorbar=False)
)
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
/usr/local/lib/python3.11/dist-packages/IPython/core/formatters.py in __call__(self, obj)
    343             method = get_real_method(obj, self.print_method)
    344             if method is not None:
--> 345                 return method()
    346             return None
    347         else:

/usr/local/lib/python3.11/dist-packages/folium/folium.py in _repr_html_(self, **kwargs)
    356             self._parent = None
    357         else:
--> 358             out = self._parent._repr_html_(**kwargs)
    359         return out
    360 

/usr/local/lib/python3.11/dist-packages/branca/element.py in _repr_html_(self, **kwargs)
    408     def _repr_html_(self, **kwargs) -> str:
    409         """Displays the Figure in a Jupyter notebook."""
--> 410         html = escape(self.render(**kwargs))
    411         if self.height is None:
    412             iframe = (

/usr/local/lib/python3.11/dist-packages/branca/element.py in render(self, **kwargs)
    403         """Renders the HTML representation of the element."""
    404         for name, child in self._children.items():
--> 405             child.render(**kwargs)
    406         return self._template.render(this=self, kwargs=kwargs)
    407 

/usr/local/lib/python3.11/dist-packages/folium/elements.py in render(self, **kwargs)
     35             figure.header.add_child(CssLink(url), name=name)
     36 
---> 37         super().render(**kwargs)
     38 
     39     def add_css_link(self, name: str, url: str):

/usr/local/lib/python3.11/dist-packages/branca/element.py in render(self, **kwargs)
    734 
    735         for name, element in self._children.items():
--> 736             element.render(**kwargs)

/usr/local/lib/python3.11/dist-packages/folium/features.py in render(self, **kwargs)
    869             if self.highlight:
    870                 self.highlight_map = mapper.get_highlight_map(self.highlight_function)
--> 871         super().render()
    872 
    873 

/usr/local/lib/python3.11/dist-packages/folium/map.py in render(self, **kwargs)
     88                 name=self.get_name() + "_add",
     89             )
---> 90         super().render(**kwargs)
     91 
     92 

/usr/local/lib/python3.11/dist-packages/branca/element.py in render(self, **kwargs)
    734 
    735         for name, element in self._children.items():
--> 736             element.render(**kwargs)

/usr/local/lib/python3.11/dist-packages/folium/features.py in render(self, **kwargs)
   1234         for value in self.fields:
   1235             assert (
-> 1236                 value in keys
   1237             ), f"The field {value} is not available in the data. Choose from: {keys}."
   1238         figure.header.add_child(

AssertionError: The field DEPARTAMENTO is not available in the data. Choose from: ('COUNTRY', 'Officialstatename', 'InternetccTLD', 'iso2', 'iso3', 'fragility_date', 'fragility', 'co2', 'co2_date', 'region', 'ForestRev_gdp', 'ForestRev_date', 'fragility_Qt', 'fragility_Qt_jc5', 'fragility_Qt_jc5_cat', '__folium_color').
Out[89]:
<folium.folium.Map at 0x7993cad77210>

Exercises Part II¶

Exercises 5,6,7,8, and 9 for this part must read you data from GitHub links, and the coding and results must be published using GitHub Pages. This is a different project, so those exercises will be published in a different webpage.

Exercise 5¶

  1. Get a polygons map of the lowest administrative unit possible.

  2. Get a table of variables for those units. At least 3 numerical variables.

  3. Preprocess both tables and get them ready for merging.

  4. Do the merging, making the changes needed so that you keep the most rows.

Mining one variable¶

In the session on Intro to GeoDF we did a lot on this. The main idea was simply to know the behavior of one variable, and plot it as a choropleth map.

In this case, spatial properties of the data were NOT used at all, for example:

  1. Descriptive stats would be same in a simple data frame:
In [ ]:
# statistics
datadisMap.IDH2019.describe()
  1. A histogram would be same in a simple data frame:
In [ ]:
import seaborn as sea

sea.histplot(datadisMap.IDH2019, color='yellow')
  1. Transform and Discretize: We also learned that we could rescale and discretize. But, given this behavior, bell-shaped, we just need to discretize; which I will simply do using the fisherjenks scheme:
In [ ]:
datadisMap.explore(
    column="IDH2019",
    scheme="fisherjenks",
    legend=True,
    tooltip=False,
    popup=['DEPARTAMEN', 'PROVINCIA', 'DISTRITO'],  # show popup (on-click)
    legend_kwds=dict(colorbar=False)
)

Spatial Properties: determining the neighborhood¶

We can compute the neighborhood for each object in a map using different options:

  1. The polygons that share borders:
No description has been provided for this image
In [ ]:
from libpysal.weights import Queen, Rook, KNN

# rook
w_rook = Rook.from_dataframe(datadisMap,use_index=False)
In [ ]:
w_rook.islands
  1. The polygons that share at least a point:
In [ ]:
# queen
w_queen = Queen.from_dataframe(datadisMap,use_index=False)
In [ ]:
w_queen.islands

Let me show the islands detected in the previous steps:

In [ ]:
datadisMap.iloc[w_queen.islands,:].explore()

The presence of islands will be problematic in more complex applications. An alternative is:

  1. Nearest neighbors:
In [ ]:
# k=8 nearest neighbors
w_knn8 = KNN.from_dataframe(datadisMap, k=8)
In [ ]:
w_knn8.islands

Let's understand the differences:

In [ ]:
# first district in the GDF:
datadisMap.tail()
In [ ]:
# amount of neighbors of that district
len(w_rook.neighbors[1870]),len(w_queen.neighbors[1870])
In [ ]:
datadisMap.iloc[w_rook.neighbors[1870] ,]
In [ ]:
# see the neighbor
datadisMap.iloc[w_rook.neighbors[1870] ,].plot(facecolor="yellow")
In [ ]:
# see whole area
base=datadisMap.iloc[w_rook.neighbors[1870] ,].plot(facecolor="yellow",edgecolor='k')
datadisMap.loc[datadisMap.DISTRITO=='ANCON'].plot(ax=base,facecolor="red")

Let's do the same with queen neighbors:

In [ ]:
# details
datadisMap.iloc[w_queen.neighbors[1870] ,]
In [ ]:
# whole area
# see whole area
base=datadisMap.iloc[w_queen.neighbors[1870] ,].plot(facecolor="yellow",edgecolor='k')
datadisMap.loc[datadisMap.DISTRITO=='ANCON'].plot(ax=base,facecolor="red")

What about the eight closest ones?

In [ ]:
w_knn8.neighbors[1870]
In [ ]:
base=datadisMap.iloc[w_knn8.neighbors[1870] ,].plot(facecolor="yellow",edgecolor='k')
datadisMap.loc[datadisMap.DISTRITO=='ANCON'].plot(ax=base,facecolor="red")

Exercise 6¶

Compute the neighbors of the capital city of your country. Plot the results for each of the options.

Global spatial correlation¶

If a spatial unit (a row) value in a variable is correlated with values of the neighbors, you know that proximity is interfering with the interpretation.

We need the neighboorhood matrix (the weight matrix) to compute spatial correlation.

In [ ]:
pd.DataFrame(*w_knn8.full()) # 1 means both are neighbors

If we standardize by row, the neighboors in a row add to 1:

In [ ]:
# needed for spatial correlation
w_knn8.transform = 'R'
In [ ]:
# after transformation
pd.DataFrame(*w_knn8.full()).sum(axis=1)

Spatial correlation is measured by the Moran's I statistic:

In [ ]:
from esda.moran import Moran

moranIDH = Moran(datadisMap['IDH2019'], w_knn8)
moranIDH.I,moranIDH.p_sim

A significant Moran's I suggest spatial correlation. Let's see the spatial scatter plot

In [ ]:
from splot.esda import moran_scatterplot

fig, ax = moran_scatterplot(moranIDH, aspect_equal=True)
ax.set_xlabel('IDH_std')
ax.set_ylabel('SpatialLag_IDH_std');

Exercise 7¶

  1. Compute the Moran's coefficient for one of your three numeric variables.

  2. Make a scatter plot for each variable.

Local Spatial Correlation¶

We can compute a Local Index of Spatial Association (LISA -local Moran) for each map object. That will help us find spatial clusters (spots) and spatial outliers:

  • A hotSpot is a polygon whose value in the variable is high AND is surrounded with polygons with also high values.

  • A coldSpot is a polygon whose value in the variable is low AND is surrounded with polygons with also low values.

  • A coldOutlier is a polygon whose value in the variable is low BUT is surrounded with polygons with high values.

  • A hotOutlier is a polygon whose value in the variable is high BUT is surrounded with polygons with low values.

It is also possible that no significant correlation is detected. Let's see those values:

In [ ]:
# A LISA for each district using IDH2019
from esda.moran import Moran_Local
lisaIDH = Moran_Local(y=datadisMap['IDH2019'], w=w_knn8,seed=2022)
In [ ]:
fig, ax = moran_scatterplot(lisaIDH,p=0.05)
ax.set_xlabel('IDH_std')
ax.set_ylabel('SpatialLag_IDH_std');

You find that a district is in a quadrant. If the district is NOT grey, then the LISA is significant. Let's represent that information in a map, using the lisaIDH object:

In [ ]:
# quadrant, # significance
lisaIDH.q, lisaIDH.p_sim
In [ ]:
# quadrant: 1 HH,  2 LH,  3 LL,  4 HL
pd.Series(lisaIDH.q).value_counts()

The info in lisaIDH.q can not be used right away, we need to add if the local spatial correlation is significant:

In [ ]:
datadisMap['IDH_quadrant']=[l if p <0.05 else 0 for l,p in zip(lisaIDH.q,lisaIDH.p_sim)  ]
datadisMap['IDH_quadrant'].value_counts()

Now, we recode:

In [ ]:
labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']

datadisMap['IDH_quadrant_names']=[labels[i] for i in datadisMap['IDH_quadrant']]

datadisMap['IDH_quadrant_names'].value_counts()
In [ ]:
# custom colors
from matplotlib import colors
myColMap = colors.ListedColormap([ 'white', 'pink', 'cyan', 'azure','red'])

# Set up figure and ax
f, ax = plt.subplots(1, figsize=(12,12))
# Plot unique values choropleth including
# a legend and with no boundary lines

plt.title('Spots and Outliers')

datadisMap.plot(column='IDH_quadrant_names',
                categorical=True,
                cmap=myColMap,
                linewidth=0.1,
                edgecolor='k',
                legend=True,
                legend_kwds={'loc': 'center left',
                             'bbox_to_anchor': (0.7, 0.6)},
                ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()

Exercise 8¶

  1. Compute the Local Moran for the variables in your data that have significant spatial correlation.

  2. Create a new column for each of those variables, with a label ('0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier').

  3. Prepare a map for each of the variables analyzed, showing the spots and outliers.

Mining several variables¶

Let me select some columns:

In [ ]:
selected_variables = ['Educ_sec_comp2019_pct',
                     'NBI2017_pct',
                     'Viv_sin_serv_hig2017_pct']
In [ ]:
# see distribution
sea.boxplot(datadisMap[selected_variables])

Let me check their monotony:

In [ ]:
datadisMap[selected_variables].corr()
In [ ]:
sea.pairplot(
    datadisMap[selected_variables], kind="reg", diag_kind="kde"
)

Here, we can reverse the values of Educ_sec_comp2019_pct. First let me standardize:

In [ ]:
from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()
normalized_data = scaler.fit_transform(datadisMap[selected_variables])
sea.displot(pd.melt(pd.DataFrame(normalized_data,columns=selected_variables)),
            x="value", hue="variable",kind="kde",
            log_scale=(False,False))

Let me create new variables with the standardized values:

In [ ]:
# new names
selected_variables_new_std=[s+'_std' for s in selected_variables]

# add colunms
datadisMap[selected_variables_new_std]=normalized_data

Now, it is easy to reverse:

In [ ]:
datadisMap['Educ_sec_NO_comp2019_pct_std']=-1*datadisMap.Educ_sec_comp2019_pct_std
In [ ]:
# as a result:
selected_variables_new_std = ['Educ_sec_NO_comp2019_pct_std',
                     'NBI2017_pct_std',
                     'Viv_sin_serv_hig2017_pct_std']
sea.pairplot(
    datadisMap[selected_variables_new_std], kind="reg", diag_kind="kde"
)

Conventional Clustering¶

Here, I will use the three variables to create clusters of districts. Let me explore how many clusters could be created:

In [ ]:
from scipy.cluster import hierarchy as hc


Z = hc.linkage(datadisMap[selected_variables_new_std], 'ward')
# calculate full dendrogram
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('cases')
plt.ylabel('distance')
hc.dendrogram(
    Z,
    leaf_rotation=90.,  # rotates the x axis labels
    leaf_font_size=1,  # font size for the x axis labels
)
plt.show()

The dendogram recommends three groups. Let me request six.

Let me use a common hierarchical technique following a agglomerative approach:

In [ ]:
from sklearn.cluster import AgglomerativeClustering as agnes

import numpy as np
np.random.seed(12345)# Set seed for reproducibility

# Initialize the algorithm, requesting 6 clusters
model = agnes(linkage="ward", n_clusters=6).fit(datadisMap[selected_variables_new_std])

# Assign labels to main data table
datadisMap["hc_ag6"] = model.labels_
In [ ]:
# see distribution of districts
datadisMap["hc_ag6"].value_counts()

We could try to find the pattern that created the clusters:

In [ ]:
datadisMap.groupby("hc_ag6")[selected_variables_new_std].mean()

Let me show you the six groups of districts which have similar behavior in three variables:

In [ ]:
# Set up figure and ax
f, ax = plt.subplots(1, figsize=(9, 9))
# Plot unique values choropleth including
# a legend and with no boundary lines
datadisMap.plot(
    column="hc_ag6", categorical=True, legend=True, linewidth=0, ax=ax
)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()

Regionalization: Spatial Clustering¶

Spatial clustering or Regionalization will force the contiguity of the polygons to make a cluster.

In [ ]:
# modify previous funtion call to specify cluster model with spatial constraint

model_queen = agnes(linkage="ward",
                    n_clusters=6,
                    connectivity=w_queen.sparse).fit(datadisMap[selected_variables_new_std])
# Fit algorithm to the data
datadisMap["hc_ag6_wQueen"] = model_queen.labels_

We knew this would happen because we have islands. Then this results may not be satisfactory:

In [ ]:
# Set up figure and ax
f, ax = plt.subplots(1, figsize=(9, 9))
# Plot unique values choropleth including a legend and with no boundary lines
datadisMap.plot(
    column="hc_ag6_wQueen",
    categorical=True,
    legend=True,
    linewidth=0,
    ax=ax,
)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()

We have a couple the KNN alternative. Let's use that instead:

In [ ]:
model_wknn8 = agnes(linkage="ward",
                    n_clusters=6,
                    connectivity=w_knn8.sparse).fit(datadisMap[selected_variables_new_std])
datadisMap["hc_ag6_wknn8"] = model_wknn8.labels_
In [ ]:
# Set up figure and ax
f, ax = plt.subplots(1, figsize=(10, 12))
# Plot unique values choropleth including a legend and with no boundary lines
datadisMap.plot(
    column="hc_ag6_wknn8",
    categorical=True,
    legend=True,
    linewidth=0,
    ax=ax,
)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()

We could evaluate two aspects of these clustering results:

  • “Compactness” of cluster shape, using the isoperimetric quotient (IPQ). This compares the area of the region to the area of a circle with the same perimeter as the region. For this measure, more compact shapes have an IPQ closer to 1, whereas very elongated or spindly shapes will have IPQs closer to zero. For the clustering solutions, we would expect the IPQ to be very small indeed, since the perimeter of a cluster/region gets smaller the more boundaries that members share.
In [ ]:
from esda import shape as shapestats
results={}
for cluster_type in ("hc_ag6_wknn8", "hc_ag6"):
    # compute the region polygons using a dissolve
    regions = datadisMap[[cluster_type, "geometry"]].to_crs(24892).dissolve(by=cluster_type)
    # compute the actual isoperimetric quotient for these regions
    ipqs = shapestats.isoperimetric_quotient(regions)
    # cast to a dataframe
    result = {cluster_type:ipqs}
    results.update(result)
# stack the series together along columns
pd.DataFrame(results)

An alternative could be convex_hull_ratio, simply the division of the area of the cluster by the area of its convex hull.

In [ ]:
from esda import shape as shapestats
results={}
for cluster_type in ("hc_ag6_wknn8", "hc_ag6"):
    # compute the region polygons using a dissolve
    regions = datadisMap[[cluster_type, "geometry"]].to_crs(24892).dissolve(by=cluster_type)
    # compute the actual convex hull quotient for these regions
    chullr = shapestats.convex_hull_ratio(regions)
    # cast to a dataframe
    result = {cluster_type:chullr}
    results.update(result)
# stack the series together along columns
pd.DataFrame(results)

In both cases, the spatial clusters do better.

  • Goodness of fit. Here we have two metrics:
    • metrics.calinski_harabasz_score
    • silhouette_score
In [ ]:
from sklearn import metrics

fit_scores = []
for cluster_type in ("hc_ag6_wknn8", "hc_ag6"):
    # compute the CH score
    ch_score = metrics.calinski_harabasz_score(
        # using scaled variables
        datadisMap[selected_variables_new_std],
        # using these labels
        datadisMap[cluster_type],
    )
    sil_score = metrics.silhouette_score(
        # using scaled variables
        datadisMap[selected_variables_new_std],
        # using these labels
        datadisMap[cluster_type],
    )
    # and append the cluster type with the CH score
    fit_scores.append((cluster_type, ch_score,sil_score))


# re-arrange the scores into a dataframe for display
pd.DataFrame(
    fit_scores, columns=["cluster type", "CH score", "SIL score"]
).set_index("cluster type")

Here, the conventional clustering beats the others, as you want bigger values in both.

Exercise 9¶

Use your three variables to carry out the cluster/regional analysis.

Conventional Regression¶

This is a basic regression:

In [ ]:
from pysal.model import spreg

dep_var_name=['NBI2017_pct']
ind_vars_names=['Educ_sec_comp2019_pct','Viv_sin_serv_hig2017_pct']
labels=['Lack Basic Needs %','High-School completed %', 'No H-Hold sanitation %']

ols_model = spreg.OLS(
    # Dependent variable
    datadisMap[dep_var_name].values,
    # Independent variables
    datadisMap[ind_vars_names].values,
    # Dependent variable name
    name_y=labels[0],
    # Independent variable name
    name_x=labels[1:],
    name_ds='datadisMap')

print(ols_model.summary)

Would we have some spatial effect playing we have not noticed?

In [ ]:
# the dependent variable
moranNBI = Moran(datadisMap[dep_var_name], w_knn8)
moranNBI.I,moranNBI.p_sim
In [ ]:
# the error term
moranError = Moran(ols_model.u, w_knn8)
moranError.I,moranError.p_sim

We have a strong suggestion that either or both are playing here.

Spatial Regression¶

I. Spatial Lag Regression (the dependent variable)

In [ ]:
SAC_model = spreg.ML_Lag(
    # Dependent variable
    datadisMap[dep_var_name].values,
    # Independent variables
    datadisMap[ind_vars_names].values,
    w=w_knn8,
    # Dependent variable name
    name_y=labels[0],
    # Independent variable name
    name_x=labels[1:],
    name_w='KNN8',
    name_ds='datadisMap'
    )

print(SAC_model.summary)

As you get this: Coefficient W_NBI2017_pct (ρ = 0.571, p = 0.000), you know that a 10% increase in deprivation in neighboring areas leads to a 5.71% increase in local deprivation. Then, Poverty is highly "contagious" across nearby regions. A policy suggestion: Anti-poverty programs must target entire clusters (not just individual areas) to break spillover cycles.

  • Spatial Error Regression
In [ ]:
SER_model = spreg.ML_Error(
    # Dependent variable
    datadisMap[dep_var_name].values,
    # Independent variables
    datadisMap[ind_vars_names].values,
    w=w_knn8,
    # Dependent variable name
    name_y=labels[0],
    # Independent variable name
    name_x=labels[1:],
    name_w='KNN8',
    name_ds='datadisMap'
    )

print(SER_model.summary)

Strong Spatial Spillovers (λ = 0.837), meaning that 83.7% of unobserved deprivation factors (e.g., informal economies, cultural norms) spill over from neighboring areas. Then, poverty is highly contagious across space—a 10% deprivation increase in nearby regions raises local deprivation by 8.37% through hidden channels.

What if?

In [ ]:
SAC_model = spreg.GM_Combo_Het(
    # Dependent variable
    datadisMap[dep_var_name].values,
    # Independent variables
    datadisMap[ind_vars_names].values,
    w=w_knn8,
    # Dependent variable name
    name_y=labels[0],
    # Independent variable name
    name_x=labels[1:],
    name_w='KNN8',
    name_ds='datadisMap'
)

# Print results
print(SAC_model.summary)

The SAR is not needed, neither the SAC; SER is what matters; in this case.

Exercise 10¶

Use your three variables to carry out regression analysis (conventional and spatial).