Source code for trackintel.io.from_geopandas

import warnings
import pandas as pd
import geopandas as gpd

from trackintel import Positionfixes, Staypoints, Triplegs, Locations, Trips, Tours


[docs] def read_positionfixes_gpd( gdf, tracked_at="tracked_at", user_id="user_id", geom_col=None, crs=None, tz=None, mapper=None ): """ Read positionfixes from GeoDataFrames. Warps the pd.rename function to simplify the import of GeoDataFrames. Parameters ---------- gdf : GeoDataFrame GeoDataFrame with valid point geometry, containing the positionfixes to import tracked_at : str, default 'tracked_at' Name of the column storing the timestamps. user_id : str, default 'user_id' Name of the column storing the user_id. geom_col : str, optional Name of the column storing the geometry. If None assumes geometry is already set. crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg "EPSG:4326") or a WKT string. tz : str, optional pytz compatible timezone string. If None UTC will be assumed mapper : dict, optional Further columns that should be renamed. Returns ------- pfs : Positionfixes A GeoDataFrame containing the positionfixes. Examples -------- >>> trackintel.read_positionfixes_gpd(gdf, user_id='User', geom_col='geom', tz='utc') """ columns = {tracked_at: "tracked_at", user_id: "user_id"} if mapper is not None: columns.update(mapper) pfs = _trackintel_model(gdf, columns, geom_col, crs, ["tracked_at"], tz) return Positionfixes(pfs)
[docs] def read_staypoints_gpd( gdf, started_at="started_at", finished_at="finished_at", user_id="user_id", geom_col=None, crs=None, tz=None, mapper=None, ): """ Read staypoints from GeoDataFrames. Warps the pd.rename function to simplify the import of GeoDataFrames. Parameters ---------- gdf : GeoDataFrame GeoDataFrame with valid point geometry, containing the staypoints to import started_at : str, default 'started_at' Name of the column storing the starttime of the staypoints. finished_at : str, default 'finished_at' Name of the column storing the endtime of the staypoints. user_id : str, default 'user_id' Name of the column storing the user_id. geom_col : str Name of the column storing the geometry. If None assumes geometry is already set. crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg "EPSG:4326") or a WKT string. tz : str, optional pytz compatible timezone string. If None UTC is assumed. mapper : dict, optional Further columns that should be renamed. Returns ------- sp : Staypoints A GeoDataFrame containing the staypoints Examples -------- >>> trackintel.read_staypoints_gpd(gdf, started_at='start_time', finished_at='end_time', tz='utc') """ columns = {started_at: "started_at", finished_at: "finished_at", user_id: "user_id"} if mapper is not None: columns.update(mapper) sp = _trackintel_model(gdf, columns, geom_col, crs, ["started_at", "finished_at"], tz) return Staypoints(sp)
[docs] def read_triplegs_gpd( gdf, started_at="started_at", finished_at="finished_at", user_id="user_id", geom_col=None, crs=None, tz=None, mapper=None, ): """ Read triplegs from GeoDataFrames. warps the pd.rename function to simplify the import of GeoDataFrames. Parameters ---------- gdf : GeoDataFrame GeoDataFrame with valid line geometry, containing the triplegs to import. started_at : str, default 'started_at' Name of the column storing the starttime of the triplegs. finished_at : str, default 'finished_at' Name of the column storing the endtime of the triplegs. user_id : str, default 'user_id' Name of the column storing the user_id. geom_col : str, optional Name of the column storing the geometry. If None assumes geometry is already set. crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg "EPSG:4326") or a WKT string. tz : str, optional pytz compatible timezone string. If None UTC is assumed. mapper : dict, optional Further columns that should be renamed. Returns ------- tpls : Triplegs A GeoDataFrame containing the triplegs Examples -------- >>> trackintel.read_triplegs_gpd(gdf, user_id='User', geom_col='geom', tz='utc') """ columns = {started_at: "started_at", finished_at: "finished_at", user_id: "user_id"} if mapper is not None: columns.update(mapper) tpls = _trackintel_model(gdf, columns, geom_col, crs, ["started_at", "finished_at"], tz) return Triplegs(tpls)
[docs] def read_trips_gpd( gdf, started_at="started_at", finished_at="finished_at", user_id="user_id", origin_staypoint_id="origin_staypoint_id", destination_staypoint_id="destination_staypoint_id", geom_col=None, crs=None, tz=None, mapper=None, ): """ Read trips from GeoDataFrames/DataFrames. Warps the pd.rename function to simplify the import of GeoDataFrames (DataFrames). Parameters ---------- gdf : GeoDataFrame or DataFrame (Geo)DataFrame containing the trips to import. started_at : str, default 'started_at' Name of the column storing the starttime of the staypoints. finished_at : str, default 'finished_at' Name of the column storing the endtime of the staypoints. user_id : str, default 'user_id' Name of the column storing the user_id. origin_staypoint_id : str, default 'origin_staypoint_id' Name of the column storing the staypoint_id of the start of the tripleg. destination_staypoint_id : str, default 'destination_staypoint_id' Name of the column storing the staypoint_id of the end of the tripleg geom_col : str, optional Name of the column storing the geometry. If None assumes has no geometry! crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg "EPSG:4326") or a WKT string. Ignored if "geom_col" is None. tz : str, optional pytz compatible timezone string. If None UTC is assumed. mapper : dict, optional Further columns that should be renamed. Returns ------- trips : Trips Examples -------- >>> trackintel.read_trips_gpd(df, tz='utc') """ columns = { started_at: "started_at", finished_at: "finished_at", user_id: "user_id", origin_staypoint_id: "origin_staypoint_id", destination_staypoint_id: "destination_staypoint_id", } if mapper is not None: columns.update(mapper) trips = _trackintel_model(gdf, columns, geom_col, crs, ["started_at", "finished_at"], tz) return Trips(trips)
[docs] def read_locations_gpd(gdf, user_id="user_id", center="center", extent=None, crs=None, mapper=None): """ Read locations from GeoDataFrames. Warps the pd.rename function to simplify the import of GeoDataFrames. Parameters ---------- gdf : GeoDataFrame GeoDataFrame with valid point geometry, containing the locations to import. user_id : str, default 'user_id' Name of the column storing the user_id. center : str, default 'center' Name of the column storing the geometry (center of the location). extent : str, optional Name of the column storing the additionaly geometry (extent of location). crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg "EPSG:4326") or a WKT string. mapper : dict, optional Further columns that should be renamed. Returns ------- locs : Locations Examples -------- >>> trackintel.read_locations_gpd(df, user_id='User', center='geometry') """ columns = {user_id: "user_id", center: "center"} if extent is not None: columns[extent] = "extent" if mapper is not None: columns.update(mapper) locs = _trackintel_model(gdf, columns, "center", crs) if extent is not None: locs["extent"] = gpd.GeoSeries(locs["extent"]) return Locations(locs)
[docs] def read_tours_gpd( gdf, user_id="user_id", started_at="started_at", finished_at="finished_at", tz=None, mapper=None, ): """ Read tours from GeoDataFrames. Wraps the pd.rename function to simplify the import of GeoDataFrames. Parameters ---------- gdf : GeoDataFrame GeoDataFrame containing the tours to import. user_id : str, default 'user_id' Name of the column storing the user_id. started_at : str, default 'started_at' Name of the column storing the start time of the tours. finished_at : str, default 'finished_at' Name of the column storing the end time of the tours. tz : str, optional pytz compatible timezone string. If None UTC is assumed. mapper : dict, optional Further columns that should be renamed. Returns ------- tours : Tours """ columns = { user_id: "user_id", started_at: "started_at", finished_at: "finished_at", } if mapper is not None: columns.update(mapper) tours = _trackintel_model(gdf, set_names=columns, tz_cols=["started_at", "finished_at"], tz=tz) return Tours(tours)
def _trackintel_model(gdf, set_names=None, geom_col=None, crs=None, tz_cols=None, tz=None): """Help function to assure the trackintel model on a GeoDataFrame. Parameters ---------- gdf : GeoDataFrame Input GeoDataFrame set_names : dict, optional Renaming dictionary for the columns of the GeoDataFrame. set_geometry : str, optional Set geometry of GeoDataFrame. crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg "EPSG:4326") or a WKT string. tz_cols : list, optional List of timezone aware datetime columns. tz : str, optional pytz compatible timezone string. If None UTC will be assumed Returns ------- gdf : GeoDataFrame The input GeoDataFrame transformed to match the trackintel format. """ if set_names is not None: gdf = gdf.rename(columns=set_names) if tz_cols is not None: for col in tz_cols: if not isinstance(gdf[col].dtype, pd.DatetimeTZDtype): gdf[col] = _localize_timestamp(dt_series=gdf[col], pytz_tzinfo=tz, col_name=col) # If is not GeoDataFrame and no geom_col is set end early. # That allows us to handle DataFrames and GeoDataFrames in one function. if not isinstance(gdf, gpd.GeoDataFrame) and geom_col is None: return gdf if geom_col is not None: gdf = gdf.set_geometry(geom_col) else: try: gdf.geometry except AttributeError: raise AttributeError("GeoDataFrame has no geometry, set it with keyword argument.") if crs is not None: gdf = gdf.set_crs(crs) return gdf def _localize_timestamp(dt_series, pytz_tzinfo, col_name): """ Add timezone info to timestamp. Parameters ---------- dt_series : pandas.Series a pandas datetime series pytz_tzinfo : str pytz compatible timezone string. If none UTC will be assumed col_name : str Column name for informative warning message Returns ------- pd.Series a timezone aware pandas datetime series """ if pytz_tzinfo is None: warnings.warn(f"Assuming UTC timezone for column {col_name}") pytz_tzinfo = "utc" def localize(ts, tz): """Localize ts if tz is not set else leave it be""" ts = pd.Timestamp(ts) if ts.tz is not None: return ts return pd.Timestamp.tz_localize(ts, tz) # localize all datetimes without a timezone dt_series = dt_series.apply(localize, tz=pytz_tzinfo) # create a Timeseries (utc=False will create warning) dt_series = pd.to_datetime(dt_series, utc=True) # convert it back to right tz return dt_series.dt.tz_convert(pytz_tzinfo)