import geopandas as gpd
import pandas as pd
import trackintel as ti
from trackintel.model.util import (
TrackintelBase,
TrackintelDataFrame,
TrackintelGeoDataFrame,
_register_trackintel_accessor,
_shared_docs,
doc,
)
[docs]
@_register_trackintel_accessor("as_trips")
class Trips:
"""Trackintel class to treat (Geo)DataFrames as collections of trips.
The class constructor will create a TripsDataFrame or a TripsGeoDataFrame depending if a geometry column is present.
Requires at least the following columns:
['user_id', 'started_at', 'finished_at', 'origin_staypoint_id', 'destination_staypoint_id']
The 'index' of the (Geo)DataFrame will be treated as unique identifier of the `Trips`
Trips have an optional geometry of type MultiPoint which describes the start and the end point of the trip
For several usecases, the following additional columns are required:
['origin_purpose', 'destination_purpose', 'modes', 'primary_mode', 'tour_id']
Notes
-----
`Trips` are an aggregation level in transport planning that summarize all movement and all non-essential actions
(e.g., waiting) between two relevant activities. The following assumptions are implemented
- If we do not record a person for more than `gap_threshold` minutes, we assume that the person performed an \
activity in the recording gap and split the trip at the gap.
- Trips that start/end in a recording gap can have an unknown origin/destination staypoint id.
- If the origin (or destination) staypoint is unknown (and a geometry column exists), the origin/destination
geometry is set as the first coordinate of the first tripleg (or the last coordinate of the last tripleg)
- There are no trips without a (recorded) tripleg.
'started_at' and 'finished_at' are timezone aware pandas datetime objects.
Examples
--------
>>> trips.generate_tours()
"""
def __new__(cls, *args, **kwargs):
is_gdf = (
(len(args) > 0 and isinstance(args[0], gpd.GeoDataFrame))
or "geometry" in kwargs
or ("data" in kwargs and isinstance(kwargs["data"], gpd.GeoDataFrame))
)
if is_gdf:
return TripsGeoDataFrame(*args, **kwargs)
return TripsDataFrame(*args, **kwargs)
_required_columns = ["user_id", "started_at", "finished_at", "origin_staypoint_id", "destination_staypoint_id"]
class TripsDataFrame(TrackintelBase, TrackintelDataFrame):
"""Class to treat a DataFrame as collections of trips.
Requires at least the following columns:
['user_id', 'started_at', 'finished_at', 'origin_staypoint_id', 'destination_staypoint_id']
The 'index' of the DataFrame will be treated as unique identifier of the `Trips`
For several usecases, the following additional columns are required:
['origin_purpose', 'destination_purpose', 'modes', 'primary_mode', 'tour_id']
Notes
-----
`Trips` are an aggregation level in transport planning that summarize all movement and all non-essential actions
(e.g., waiting) between two relevant activities. The following assumptions are implemented
- If we do not record a person for more than `gap_threshold` minutes, we assume that the person performed an \
activity in the recording gap and split the trip at the gap.
- Trips that start/end in a recording gap can have an unknown origin/destination staypoint id.
- If the origin (or destination) staypoint is unknown (and a geometry column exists), the origin/destination
geometry is set as the first coordinate of the first tripleg (or the last coordinate of the last tripleg)
- There are no trips without a (recorded) tripleg.
'started_at' and 'finished_at' are timezone aware pandas datetime objects.
Examples
--------
>>> trips.generate_tours()
"""
def __init__(self, *args, validate=True, **kwargs):
super().__init__(*args, **kwargs)
if validate:
TripsDataFrame.validate(self) # static call
@staticmethod
def validate(obj):
if any([c not in obj.columns for c in _required_columns]):
raise AttributeError(
"To process a DataFrame as a collection of trips, it must have the properties"
f" {_required_columns}, but it has [{', '.join(obj.columns)}]."
)
# check timestamp dtypes
assert isinstance(
obj["started_at"].dtype, pd.DatetimeTZDtype
), f"dtype of started_at is {obj['started_at'].dtype} but has to be datetime64 and timezone aware"
assert isinstance(
obj["finished_at"].dtype, pd.DatetimeTZDtype
), f"dtype of finished_at is {obj['finished_at'].dtype} but has to be datetime64 and timezone aware"
@doc(_shared_docs["write_csv"], first_arg="", long="trips", short="trips")
def to_csv(self, filename, *args, **kwargs):
ti.io.write_trips_csv(self, filename, *args, **kwargs)
@doc(_shared_docs["write_postgis"], first_arg="", long="trips", short="trips")
def to_postgis(
self, name, con, schema=None, if_exists="fail", index=True, index_label=None, chunksize=None, dtype=None
):
ti.io.write_trips_postgis(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
def temporal_tracking_quality(self, granularity="all"):
"""
Calculate per-user temporal tracking quality (temporal coverage).
See :func:`trackintel.analysis.temporal_tracking_quality` for full documentation.
"""
return ti.analysis.temporal_tracking_quality(self, granularity=granularity)
def generate_tours(self, **kwargs):
"""
Generate trackintel-tours from trips
See :func:`trackintel.preprocessing.generate_tours` for full documentation.
"""
return ti.preprocessing.generate_tours(trips=self, **kwargs)
# added GeoDataFrame manually afterwards such that our methods always come first
class TripsGeoDataFrame(TrackintelGeoDataFrame, TripsDataFrame, gpd.GeoDataFrame):
"""Class to treat a GeoDataFrame as collections of trips.
Requires at least the following columns:
['user_id', 'started_at', 'finished_at', 'origin_staypoint_id', 'destination_staypoint_id']
The 'index' of the GeoDataFrame will be treated as unique identifier of the `Trips`
TripsGeoDataFrame must have a geometry of type MultiPoint which describes the start and the end point of the trip.
For several usecases, the following additional columns are required:
['origin_purpose', 'destination_purpose', 'modes', 'primary_mode', 'tour_id']
Notes
-----
`Trips` are an aggregation level in transport planning that summarize all movement and all non-essential actions
(e.g., waiting) between two relevant activities. The following assumptions are implemented
- If we do not record a person for more than `gap_threshold` minutes, we assume that the person performed an \
activity in the recording gap and split the trip at the gap.
- Trips that start/end in a recording gap can have an unknown origin/destination staypoint id.
- If the origin (or destination) staypoint is unknown (and a geometry column exists), the origin/destination
geometry is set as the first coordinate of the first tripleg (or the last coordinate of the last tripleg)
- There are no trips without a (recorded) tripleg.
'started_at' and 'finished_at' are timezone aware pandas datetime objects.
Examples
--------
>>> trips.generate_tours()
"""
fallback_class = TripsDataFrame
def __init__(self, *args, validate=True, **kwargs):
super().__init__(*args, validate=validate, **kwargs)
if validate:
TripsGeoDataFrame.validate(self)
@staticmethod
def validate(self):
TripsDataFrame.validate(self)
assert (
self.geometry.is_valid.all()
), "Not all geometries are valid. Try x[~x.geometry.is_valid] where x is you GeoDataFrame"
if self.geometry.iloc[0].geom_type != "MultiPoint":
raise ValueError("The geometry must be a MultiPoint (only first checked).")