Source code for trackintel.model.trips

import geopandas as gpd
import pandas as pd

import trackintel as ti
from trackintel.model.util import (
    TrackintelBase,
    TrackintelDataFrame,
    TrackintelGeoDataFrame,
    _register_trackintel_accessor,
    _shared_docs,
    doc,
)



[docs]
@_register_trackintel_accessor("as_trips")
class Trips:
    """Trackintel class to treat (Geo)DataFrames as collections of trips.

    The class constructor will create a TripsDataFrame or a TripsGeoDataFrame depending if a geometry column is present.

    Requires at least the following columns:
    ['user_id', 'started_at', 'finished_at', 'origin_staypoint_id', 'destination_staypoint_id']

    The 'index' of the (Geo)DataFrame will be treated as unique identifier of the `Trips`

    Trips have an optional geometry of type MultiPoint which describes the start and the end point of the trip

    For several usecases, the following additional columns are required:
    ['origin_purpose', 'destination_purpose', 'modes', 'primary_mode', 'tour_id']

    Notes
    -----
    `Trips` are an aggregation level in transport planning that summarize all movement and all non-essential actions
    (e.g., waiting) between two relevant activities. The following assumptions are implemented
        - If we do not record a person for more than `gap_threshold` minutes, we assume that the person performed an \
          activity in the recording gap and split the trip at the gap.
        - Trips that start/end in a recording gap can have an unknown origin/destination staypoint id.
        - If the origin (or destination) staypoint is unknown (and a geometry column exists), the origin/destination
          geometry is set as the first coordinate of the first tripleg (or the last coordinate of the last tripleg)
        - There are no trips without a (recorded) tripleg.

    'started_at' and 'finished_at' are timezone aware pandas datetime objects.

    Examples
    --------
    >>> trips.generate_tours()
    """

    def __new__(cls, *args, **kwargs):
        is_gdf = (
            (len(args) > 0 and isinstance(args[0], gpd.GeoDataFrame))
            or "geometry" in kwargs
            or ("data" in kwargs and isinstance(kwargs["data"], gpd.GeoDataFrame))
        )
        if is_gdf:
            return TripsGeoDataFrame(*args, **kwargs)
        return TripsDataFrame(*args, **kwargs)



_required_columns = ["user_id", "started_at", "finished_at", "origin_staypoint_id", "destination_staypoint_id"]


class TripsDataFrame(TrackintelBase, TrackintelDataFrame):
    """Class to treat a DataFrame as collections of trips.

    Requires at least the following columns:
    ['user_id', 'started_at', 'finished_at', 'origin_staypoint_id', 'destination_staypoint_id']

    The 'index' of the DataFrame will be treated as unique identifier of the `Trips`

    For several usecases, the following additional columns are required:
    ['origin_purpose', 'destination_purpose', 'modes', 'primary_mode', 'tour_id']

    Notes
    -----
    `Trips` are an aggregation level in transport planning that summarize all movement and all non-essential actions
    (e.g., waiting) between two relevant activities. The following assumptions are implemented
        - If we do not record a person for more than `gap_threshold` minutes, we assume that the person performed an \
          activity in the recording gap and split the trip at the gap.
        - Trips that start/end in a recording gap can have an unknown origin/destination staypoint id.
        - If the origin (or destination) staypoint is unknown (and a geometry column exists), the origin/destination
          geometry is set as the first coordinate of the first tripleg (or the last coordinate of the last tripleg)
        - There are no trips without a (recorded) tripleg.

    'started_at' and 'finished_at' are timezone aware pandas datetime objects.

    Examples
    --------
    >>> trips.generate_tours()
    """

    def __init__(self, *args, validate=True, **kwargs):
        super().__init__(*args, **kwargs)
        if validate:
            TripsDataFrame.validate(self)  # static call

    @staticmethod
    def validate(obj):
        if any([c not in obj.columns for c in _required_columns]):
            raise AttributeError(
                "To process a DataFrame as a collection of trips, it must have the properties"
                f" {_required_columns}, but it has [{', '.join(obj.columns)}]."
            )

        # check timestamp dtypes
        assert isinstance(
            obj["started_at"].dtype, pd.DatetimeTZDtype
        ), f"dtype of started_at is {obj['started_at'].dtype} but has to be datetime64 and timezone aware"
        assert isinstance(
            obj["finished_at"].dtype, pd.DatetimeTZDtype
        ), f"dtype of finished_at is {obj['finished_at'].dtype} but has to be datetime64 and timezone aware"

    @doc(_shared_docs["write_csv"], first_arg="", long="trips", short="trips")
    def to_csv(self, filename, *args, **kwargs):
        ti.io.write_trips_csv(self, filename, *args, **kwargs)

    @doc(_shared_docs["write_postgis"], first_arg="", long="trips", short="trips")
    def to_postgis(
        self, name, con, schema=None, if_exists="fail", index=True, index_label=None, chunksize=None, dtype=None
    ):
        ti.io.write_trips_postgis(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)

    def temporal_tracking_quality(self, granularity="all"):
        """
        Calculate per-user temporal tracking quality (temporal coverage).

        See :func:`trackintel.analysis.temporal_tracking_quality` for full documentation.
        """
        return ti.analysis.temporal_tracking_quality(self, granularity=granularity)

    def generate_tours(self, **kwargs):
        """
        Generate trackintel-tours from trips

        See :func:`trackintel.preprocessing.generate_tours` for full documentation.
        """
        return ti.preprocessing.generate_tours(trips=self, **kwargs)


# added GeoDataFrame manually afterwards such that our methods always come first
class TripsGeoDataFrame(TrackintelGeoDataFrame, TripsDataFrame, gpd.GeoDataFrame):
    """Class to treat a GeoDataFrame as collections of trips.

    Requires at least the following columns:
    ['user_id', 'started_at', 'finished_at', 'origin_staypoint_id', 'destination_staypoint_id']

    The 'index' of the GeoDataFrame will be treated as unique identifier of the `Trips`

    TripsGeoDataFrame must have a geometry of type MultiPoint which describes the start and the end point of the trip.

    For several usecases, the following additional columns are required:
    ['origin_purpose', 'destination_purpose', 'modes', 'primary_mode', 'tour_id']

    Notes
    -----
    `Trips` are an aggregation level in transport planning that summarize all movement and all non-essential actions
    (e.g., waiting) between two relevant activities. The following assumptions are implemented
        - If we do not record a person for more than `gap_threshold` minutes, we assume that the person performed an \
          activity in the recording gap and split the trip at the gap.
        - Trips that start/end in a recording gap can have an unknown origin/destination staypoint id.
        - If the origin (or destination) staypoint is unknown (and a geometry column exists), the origin/destination
          geometry is set as the first coordinate of the first tripleg (or the last coordinate of the last tripleg)
        - There are no trips without a (recorded) tripleg.

    'started_at' and 'finished_at' are timezone aware pandas datetime objects.

    Examples
    --------
    >>> trips.generate_tours()
    """

    fallback_class = TripsDataFrame

    def __init__(self, *args, validate=True, **kwargs):
        super().__init__(*args, validate=validate, **kwargs)
        if validate:
            TripsGeoDataFrame.validate(self)

    @staticmethod
    def validate(self):
        TripsDataFrame.validate(self)
        assert (
            self.geometry.is_valid.all()
        ), "Not all geometries are valid. Try x[~x.geometry.is_valid] where x is you GeoDataFrame"
        if self.geometry.iloc[0].geom_type != "MultiPoint":
            raise ValueError("The geometry must be a MultiPoint (only first checked).")