import ast
import geopandas as gpd
import pandas as pd
from geopandas.geodataframe import GeoDataFrame
from trackintel.io.from_geopandas import (
read_locations_gpd,
read_positionfixes_gpd,
read_staypoints_gpd,
read_tours_gpd,
read_triplegs_gpd,
read_trips_gpd,
)
from trackintel.io.util import _index_warning_default_none
from trackintel.model.util import doc, _shared_docs
[docs]
@_index_warning_default_none
def read_positionfixes_csv(*args, columns=None, tz=None, index_col=None, geom_col="geom", crs=None, **kwargs):
"""
Read positionfixes from csv file.
Wraps the pandas read_csv function, extracts longitude and latitude and
builds a POINT GeoSeries, extracts datetime from column `tracked_at`.
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
The required columns for this function include: "user_id", "tracked_at", "latitude"
and "longitude".
tz : str, optional
pytz compatible timezone string. If None UTC is assumed.
index_col : str, optional
column name to be used as index. If None the default index is assumed
as unique identifier.
geom_col : str, default "geom"
Name of the column containing the geometry.
crs : pyproj.crs or str, optional
Set coordinate reference system. The value can be anything accepted
by pyproj.CRS.from_user_input(), such as an authority string
(eg 'EPSG:4326') or a WKT string.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
pfs : Positionfixes
Notes
-----
Note that this function is primarily useful if data is available in a
longitude/latitude format. If your data already contains a WKT column,
might be easier to just use the GeoPandas import functions
:func:`trackintel.io.read_positionfixes_gpd`.
Examples
--------
>>> trackintel.read_positionfixes_csv('data.csv')
>>> trackintel.read_positionfixes_csv('data.csv', columns={'time':'tracked_at', 'User':'user_id'})
tracked_at user_id geom
id
0 2008-10-23 02:53:04+00:00 0 POINT (116.31842 39.98470)
1 2008-10-23 02:53:10+00:00 0 POINT (116.31845 39.98468)
2 2008-10-23 02:53:15+00:00 0 POINT (116.31842 39.98469)
3 2008-10-23 02:53:20+00:00 0 POINT (116.31839 39.98469)
4 2008-10-23 02:53:25+00:00 0 POINT (116.31826 39.98465)
"""
columns = {} if columns is None else columns
df = pd.read_csv(*args, index_col=index_col, **kwargs)
df.rename(columns=columns, inplace=True)
df["tracked_at"] = pd.to_datetime(df["tracked_at"])
df[geom_col] = gpd.points_from_xy(df["longitude"], df["latitude"])
df.drop(columns=["longitude", "latitude"], inplace=True)
return read_positionfixes_gpd(df, geom_col=geom_col, crs=crs, tz=tz)
[docs]
def write_positionfixes_csv(positionfixes, filename, *args, **kwargs):
"""
Write positionfixes to csv file.
Wraps the pandas to_csv function, but strips the geometry column and
stores the longitude and latitude in respective columns.
Parameters
----------
positionfixes : Positionfixes
filename : str
The file to write to.
args
Additional arguments passed to pd.DataFrame.to_csv().
kwargs
Additional keyword arguments passed to pd.DataFrame.to_csv().
Notes
-----
"longitude" and "latitude" is extracted from the geometry column and the orignal
geometry column is dropped.
Examples
---------
>>> pfs.to_csv("export_pfs.csv")
>>> ti.io.write_positionfixes_csv(pfs, "export_pfs.csv")
"""
gdf = positionfixes.copy()
gdf["longitude"] = positionfixes.geometry.x
gdf["latitude"] = positionfixes.geometry.y
df = gdf.drop(columns=[gdf.geometry.name])
df.to_csv(filename, index=True, *args, **kwargs)
[docs]
@_index_warning_default_none
def read_triplegs_csv(*args, columns=None, tz=None, index_col=None, geom_col="geom", crs=None, **kwargs):
"""
Read triplegs from csv file.
Wraps the pandas read_csv function, extracts a WKT for the tripleg geometry (LINESTRING)
and builds a Triplegs instance, extracts datetime from column `started_at` & `finished_at`.
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
The required columns for this function include: "user_id", "started_at", "finished_at"
and "geom".
tz : str, optional
pytz compatible timezone string. If None UTC is assumed.
index_col : str, optional
Column name to be used as index. If None the default index is assumed
as unique identifier.
geom_col : str, default "geom"
Name of the column containing the geometry as WKT.
crs : pyproj.crs or str, optional
Set coordinate reference system. The value can be anything accepted
by pyproj.CRS.from_user_input(), such as an authority string
(eg “EPSG:4326”) or a WKT string.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
tpls : Triplegs
Examples
--------
>>> trackintel.read_triplegs_csv('data.csv')
>>> trackintel.read_triplegs_csv('data.csv', columns={'start_time':'started_at', 'User':'user_id'})
user_id started_at finished_at geom
id
0 1 2015-11-27 08:00:00+00:00 2015-11-27 10:00:00+00:00 LINESTRING (8.54878 47.37652, 8.52770 47.39935...
1 1 2015-11-27 12:00:00+00:00 2015-11-27 14:00:00+00:00 LINESTRING (8.56340 47.95600, 8.64560 47.23345...
"""
columns = {} if columns is None else columns
df = pd.read_csv(*args, index_col=index_col, **kwargs)
df.rename(columns=columns, inplace=True)
df["started_at"] = pd.to_datetime(df["started_at"])
df["finished_at"] = pd.to_datetime(df["finished_at"])
df[geom_col] = gpd.GeoSeries.from_wkt(df[geom_col])
return read_triplegs_gpd(df, geom_col=geom_col, crs=crs, tz=tz, mapper=columns)
[docs]
@doc(
_shared_docs["write_csv"],
first_arg="\ntriplegs : Triplegs\n",
long="triplegs",
short="tpls",
)
def write_triplegs_csv(triplegs, filename, *args, **kwargs):
pd.DataFrame.to_csv(triplegs.to_wkt(rounding_precision=-1, trim=False), filename, index=True, *args, **kwargs)
[docs]
@_index_warning_default_none
def read_staypoints_csv(*args, columns=None, tz=None, index_col=None, geom_col="geom", crs=None, **kwargs):
"""
Read staypoints from csv file.
Wraps the pandas read_csv function, extracts a WKT for the staypoint geometry (Point)
and builds a Staypoints instance, extracts datetime from column `started_at` & `finished_at`.
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
The required columns for this function include: "user_id", "started_at", "finished_at"
and "geom".
tz : str, optional
pytz compatible timezone string. If None UTC is assumed.
index_col : str, optional
column name to be used as index. If None the default index is assumed
as unique identifier.
geom_col : str, default "geom"
Name of the column containing the geometry as WKT.
crs : pyproj.crs or str, optional
Set coordinate reference system. The value can be anything accepted
by pyproj.CRS.from_user_input(), such as an authority string
(eg “EPSG:4326”) or a WKT string.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
sp : Staypoints
Examples
--------
>>> trackintel.read_staypoints_csv('data.csv')
>>> trackintel.read_staypoints_csv('data.csv', columns={'start_time':'started_at', 'User':'user_id'})
user_id started_at finished_at geom
id
0 1 2015-11-27 08:00:00+00:00 2015-11-27 10:00:00+00:00 POINT (8.52822 47.39519)
1 1 2015-11-27 12:00:00+00:00 2015-11-27 14:00:00+00:00 POINT (8.54340 47.95600)
"""
columns = {} if columns is None else columns
df = pd.read_csv(*args, index_col=index_col, **kwargs)
df.rename(columns=columns, inplace=True)
df["started_at"] = pd.to_datetime(df["started_at"])
df["finished_at"] = pd.to_datetime(df["finished_at"])
df[geom_col] = gpd.GeoSeries.from_wkt(df[geom_col])
return read_staypoints_gpd(df, geom_col=geom_col, crs=crs, tz=tz)
[docs]
@doc(
_shared_docs["write_csv"],
first_arg="\nstaypoints : Staypoints\n",
long="staypoints",
short="sp",
)
def write_staypoints_csv(staypoints, filename, *args, **kwargs):
pd.DataFrame.to_csv(staypoints.to_wkt(rounding_precision=-1, trim=False), filename, index=True, *args, **kwargs)
[docs]
@_index_warning_default_none
def read_locations_csv(*args, columns=None, index_col=None, crs=None, **kwargs):
"""
Read locations from csv file.
Wraps the pandas read_csv function, extracts a WKT for the location center geometry (POINT)
(and optional extent (POLYGON)) and builds a Locations instance, extracts datetime from
column `started_at` & `finished_at`.
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
The required columns for this function include: "user_id" and "center".
index_col : str, optional
column name to be used as index. If None the default index is assumed
as unique identifier.
crs : pyproj.crs or str, optional
Set coordinate reference system. The value can be anything accepted
by pyproj.CRS.from_user_input(), such as an authority string
(eg “EPSG:4326”) or a WKT string.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
locs : Locations
Examples
--------
>>> trackintel.read_locations_csv('data.csv')
>>> trackintel.read_locations_csv('data.csv', columns={'User':'user_id'})
user_id center extent
id
0 1 POINT (8.54878 47.37652) POLYGON ((8.548779487999999 47.37651505, 8.527...
1 1 POINT (8.56340 47.95600) POLYGON ((8.5634 47.956, 8.6456 47.23345, 8.45...
"""
columns = {} if columns is None else columns
df = pd.read_csv(*args, index_col=index_col, **kwargs)
df.rename(columns=columns, inplace=True)
df["center"] = gpd.GeoSeries.from_wkt(df["center"])
if "extent" in df.columns:
df["extent"] = gpd.GeoSeries.from_wkt(df["extent"])
return read_locations_gpd(df, crs=crs)
[docs]
@doc(
_shared_docs["write_csv"],
first_arg="\nlocations : Locations\n",
long="locations",
short="locs",
)
def write_locations_csv(locations, filename, *args, **kwargs):
pd.DataFrame.to_csv(locations.to_wkt(rounding_precision=-1, trim=False), filename, index=True, *args, **kwargs)
[docs]
@_index_warning_default_none
def read_trips_csv(*args, columns=None, tz=None, index_col=None, geom_col=None, crs=None, **kwargs):
"""
Read trips from csv file.
Wraps the pandas read_csv function and builds a Trips instance.
Extracts datetime from column `started_at` & `finished_at`.
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
The required columns for this function include: "user_id", "started_at",
"finished_at", "origin_staypoint_id" and "destination_staypoint_id".
An optional column is "geom" of type MultiPoint, containing start and destination points of the trip
tz : str, optional
pytz compatible timezone string. If None UTC is assumed.
index_col : str, optional
column name to be used as index. If None the default index is assumed
as unique identifier.
geom_col : str, default None
Name of the column containing the geometry as WKT.
If None no geometry gets added.
crs : pyproj.crs or str, optional
Set coordinate reference system. The value can be anything accepted
by pyproj.CRS.from_user_input(), such as an authority string
(eg “EPSG:4326”) or a WKT string. Ignored if geom_col is None.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
trips : Trips
A TripsDataFrame containing the trips. TripsGeoDataFrame if geometry column exists.
Notes
-----
Geometry is not mandatory for trackintel trips.
Examples
--------
>>> trackintel.read_trips_csv('data.csv')
>>> trackintel.read_trips_csv('data.csv', columns={'start_time':'started_at', 'User':'user_id'})
user_id started_at finished_at origin_staypoint_id destination_staypoint_id\
id
0 1 2015-11-27 08:00:00+00:00 2015-11-27 08:15:00+00:00 2 5
1 1 2015-11-27 08:20:22+00:00 2015-11-27 08:35:22+00:00 5 3
geom
id
0 MULTIPOINT (116.31842 39.98470, 116.29873 39.999729)
1 MULTIPOINT (116.29873 39.98402, 116.32480 40.009269)
"""
columns = {} if columns is None else columns
trips = pd.read_csv(*args, index_col=index_col, **kwargs)
trips.rename(columns=columns, inplace=True)
trips["started_at"] = pd.to_datetime(trips["started_at"])
trips["finished_at"] = pd.to_datetime(trips["finished_at"])
if geom_col is not None:
trips[geom_col] = gpd.GeoSeries.from_wkt(trips[geom_col])
return read_trips_gpd(trips, geom_col=geom_col, crs=crs, tz=tz)
[docs]
@doc(_shared_docs["write_csv"], first_arg="\ntrips : Trips\n", long="trips", short="trips")
def write_trips_csv(trips, filename, *args, **kwargs):
if isinstance(trips, GeoDataFrame):
trips = trips.to_wkt(rounding_precision=-1, trim=False)
# static call necessary as TripsDataFrame has a to_csv method as well.
pd.DataFrame.to_csv(trips, filename, index=True, *args, **kwargs)
[docs]
@_index_warning_default_none
def read_tours_csv(*args, columns=None, index_col=None, tz=None, **kwargs):
"""
Read tours from csv file.
Extracts datetime from column `started_at` & `finished_at`.
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
index_col : str, optional
column name to be used as index. If None the default index is assumed as unique identifier.
tz : str, optional
pytz compatible timezone string. If None UTC is assumed.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
tours : Tours
Examples
--------
>>> trackintel.read_tours_csv('data.csv', columns={'uuid':'user_id'})
"""
columns = {} if columns is None else columns
kwargs.setdefault("converters", {}).setdefault("trips", ast.literal_eval)
tours = pd.read_csv(*args, index_col=index_col, **kwargs)
tours.rename(columns=columns, inplace=True)
tours["started_at"] = pd.to_datetime(tours["started_at"])
tours["finished_at"] = pd.to_datetime(tours["finished_at"])
return read_tours_gpd(tours, tz=tz)
[docs]
@doc(_shared_docs["write_csv"], first_arg="\ntours : Tours\n", long="tours", short="tours")
def write_tours_csv(tours, filename, *args, **kwargs):
pd.DataFrame.to_csv(tours, filename, index=True, *args, **kwargs)