'How to extracts the different land use areas from TIF file within different distances?

I have a TIF format raster data file of land use and a vector point file in SHP format, and want to write a Python script using python libraries like geopandas to create buffers with different distances for the vector points and automatically extracts the different land use areas from TIF file within different distances. How can I do it? Thank you very much!



Solution 1:[1]

  • your question noted that you wanted to extract (clip) data from a GeoTIFF on land usages
  • comment then states you want counts of usage pixels associated with buffered points. This answer does this
    • clip xarray to buffered point
    • run analysis on this clipped xarray to get counts
    • add usage columns back to geopandas data frame as columns
import rioxarray
from pathlib import Path
import geopandas as gpd
import shapely.geometry
import pandas as pd
import numpy as np

# geta raster..
url = "https://www.eea.europa.eu/data-and-maps/data/global-land-cover-250m/zipped-tif-files-250m-raster/zipped-tif-files-250m-raster/at_download/file"

f = Path.cwd().joinpath("land-cover.zip")
if not f.exists():
    r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
    with open(f, "wb") as fd:
        for chunk in r.iter_content(chunk_size=128):
            fd.write(chunk)
    zfile = ZipFile(f)
    tif = [f for f in zfile.infolist() if "/" not in f.filename]
    zfile.extractall(path=f.stem, members=tif)

# open the raster
f = list(Path.cwd().joinpath(f.stem).glob("*.tif"))[0]
xds = rioxarray.open_rasterio(f)

# reduce size of tif for purpose of dev / testing turnaround speed
devmode = True
if devmode == True:
    _f = f.parent.joinpath("smaller/smaller.tif")
    if not _f.parent.exists():
        _f.parent.mkdir()
    if not _f.exists():
        box = shapely.geometry.box(
            *shapely.geometry.box(*xds.rio.bounds()).centroid.buffer(5 * 10**5).bounds
        )
        xa_cut = xds.rio.clip([box.__geo_interface__], drop=True)
        xa_cut.rio.to_raster(_f)
    xds = rioxarray.open_rasterio(_f)

# some points - cities within bounds of GeoTIFF
gdf = (
    gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
    .to_crs(xds.rio.crs)
    .clip(shapely.geometry.box(*xds.rio.bounds()))
).reset_index(drop=True)

# random distance for each point
np.random.seed(7)
gdf["radius"] = np.random.randint(5 * 10**4, 10**5, len(gdf))
gdf["geometry"] = gdf.apply(lambda r: r["geometry"].buffer(r["radius"]), axis=1)

def land_usage(g):
    xa = xds.rio.clip([g], drop=True)
    xa = xa.where(xa != 0, drop=True)
    counts = xa.groupby(xa).count().drop("spatial_ref").squeeze().to_dict()

    return {
        str(int(k)): v
        for k, v in zip(counts["coords"]["group"]["data"], counts["data"])
    }


# add counts of land use as colums to buffered points
gdf = gdf.join(gdf["geometry"].apply(land_usage).apply(pd.Series))

output

name radius 2 4 6 13 16 20 22 14 15 17 19
0 Budapest 99689 70923 2984 2083 2342 347318 1055 17074 nan nan nan nan
1 Bratislava 60742 19247 8750 1085 629 146807 2281 6355 nan nan nan nan
2 Vienna 88467 63181 66289 13330 837 237979 2525 8631 nan nan nan nan
3 Prague 50919 2301 20483 3799 15 98426 nan 5079 nan nan nan nan
4 Berlin 63927 1719 44207 18960 21410 78925 4171 18038 3337 251 14070 nan
5 København 84140 6460 6326 11066 1732 141932 133341 16274 665 683 11198 6
6 Warsaw 52583 1998 11428 9947 10557 77328 1700 8836 5251 109 11599 nan
7 Vilnius 78232 4826 41176 23952 17708 86706 3889 2637 4547 62 21022 nan

Solution 2:[2]

full code

from pathlib import Path
from zipfile import ZipFile
import requests
import rasterio, rasterio.mask, rasterio.plot
import geopandas as gpd
import shapely
import numpy as np
import random
import matplotlib.pyplot as plt

# geta raster..
url = "https://www.eea.europa.eu/data-and-maps/data/global-land-cover-250m/zipped-tif-files-250m-raster/zipped-tif-files-250m-raster/at_download/file"

f = Path.cwd().joinpath("land-cover.zip")
if not f.exists():
    r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
    with open(f, "wb") as fd:
        for chunk in r.iter_content(chunk_size=128):
            fd.write(chunk)
    zfile = ZipFile(f)
    tif = [f for f in zfile.infolist() if "/" not in f.filename]
    zfile.extractall(path=f.stem, members=tif)

# open the raster
src = rasterio.open(list(Path.cwd().joinpath(f.stem).glob("*.tif"))[0])

# some points - cities within bounds of GeoTIFF
gdf = (
    gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
    .to_crs(src.crs)
    .clip(shapely.geometry.box(*src.bounds))
).sample(15)

# random distance for each point
gdf["geometry"] = gdf["geometry"].apply(lambda p: p.buffer(random.randint(10**5, 5*10**5)))

# shapes to mask raster
shapes = [f["geometry"] for f in gdf["geometry"].__geo_interface__["features"]]

# mask the raster, with points buffered to distance
out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True)
out_meta = src.meta

# save and reload masked raster
out_meta.update({"driver": "GTiff",
                 "height": out_image.shape[1],
                 "width": out_image.shape[2],
                 "transform": out_transform})

f = Path.cwd().joinpath("land-cover/masked")
if not f.exists(): f.mkdir()
f = f.joinpath("land-maksed.tif")
with rasterio.open(f, "w", **out_meta) as dest:
    dest.write(out_image)
msk = rasterio.open(f)

# let's have a look at what we've done..
fig, ax = plt.subplots()

# transform rasterio plot to real world coords
extent=msk.bounds
ax = rasterio.plot.show(msk, extent=extent, ax=ax)

output

Different size circles as have used random buffer radius of random points.

enter image description here

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Rob Raymond
Solution 2 Rob Raymond