{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "20eKtQoSMsrg" }, "source": [ "# 演習・交通手段分類(データの特徴をつかむ)\n", "\n", "このNotebookでは、GPSデータの基本的な特徴をいくつか抽出します。" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "executionInfo": { "elapsed": 1810, "status": "ok", "timestamp": 1684722788200, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "IuK8Hp3gM0V4" }, "outputs": [], "source": [ "from geopy import distance\n", "import numpy as np\n", "from math import radians, cos, sin, asin, sqrt, atan2\n", "import matplotlib.pyplot as plt\n", "import os\n", "import pandas as pd\n", "import seaborn as sns\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 112 }, "executionInfo": { "elapsed": 7438, "status": "ok", "timestamp": 1684722797536, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "ij9kI0nfMxNZ", "outputId": "d8d9ba36-885c-44d6-dc82-5c8c2c144512" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/x2/7ss5kcb12n1gtlpc8p2mtjtw0000gn/T/ipykernel_10032/953823710.py:3: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.\n", " traj_df = pd.read_csv(f'./traj_{user}_labeled.csv', index_col=0)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
latitudelongitudeheightdays_totaldatetimerecord_dtusertrans_triptrans_mode
039.138159117.217108-3639805.9617482008-12-2323:04:552008-12-23 23:04:5510NaNNaN
139.138196117.217068-7239805.9617592008-12-2323:04:562008-12-23 23:04:5610NaNNaN
\n", "
" ], "text/plain": [ " latitude longitude height days_total date time \\\n", "0 39.138159 117.217108 -36 39805.961748 2008-12-23 23:04:55 \n", "1 39.138196 117.217068 -72 39805.961759 2008-12-23 23:04:56 \n", "\n", " record_dt user trans_trip trans_mode \n", "0 2008-12-23 23:04:55 10 NaN NaN \n", "1 2008-12-23 23:04:56 10 NaN NaN " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# load the GPS data which we cleaned in 00_clean_GPS_data.ipynb\n", "user = '010'\n", "traj_df = pd.read_csv(f'./traj_{user}_labeled.csv', index_col=0)\n", "# traj_df = pd.read_csv(f'./traj_{user}_labeled.csv', index_col=0)\n", "traj_df.head(2)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 224 }, "executionInfo": { "elapsed": 974, "status": "ok", "timestamp": 1684722798507, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "lRp1L1c2M-SI", "outputId": "74e4e1ec-bd1f-44e6-8350-4174c5c69773" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Size of observations: 534,140\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
latitudelongitudeheightdays_totaldatetimerecord_dtusertrans_triptrans_mode
039.845966116.47393813139713.9926622008-09-2223:49:262008-09-22 23:49:2610163.0train
139.846383116.47336313539713.9926742008-09-2223:49:272008-09-22 23:49:2710163.0train
239.846803116.47278814139713.9926852008-09-2223:49:282008-09-22 23:49:2810163.0train
339.847218116.47221314139713.9926972008-09-2223:49:292008-09-22 23:49:2910163.0train
439.847631116.47164114139713.9927082008-09-2223:49:302008-09-22 23:49:3010163.0train
\n", "
" ], "text/plain": [ " latitude longitude height days_total date time \\\n", "0 39.845966 116.473938 131 39713.992662 2008-09-22 23:49:26 \n", "1 39.846383 116.473363 135 39713.992674 2008-09-22 23:49:27 \n", "2 39.846803 116.472788 141 39713.992685 2008-09-22 23:49:28 \n", "3 39.847218 116.472213 141 39713.992697 2008-09-22 23:49:29 \n", "4 39.847631 116.471641 141 39713.992708 2008-09-22 23:49:30 \n", "\n", " record_dt user trans_trip trans_mode \n", "0 2008-09-22 23:49:26 10 163.0 train \n", "1 2008-09-22 23:49:27 10 163.0 train \n", "2 2008-09-22 23:49:28 10 163.0 train \n", "3 2008-09-22 23:49:29 10 163.0 train \n", "4 2008-09-22 23:49:30 10 163.0 train " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# exclude the rows containing no information about transportation mode\n", "traj_df = traj_df.dropna(subset=['trans_mode'], axis=0)\n", "print(\"Size of observations: {:,}\".format(traj_df.shape[0]))\n", "traj_df.head()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "sjXutU9TLXqr" }, "source": [ "## 一つ前の場所(ポイント)からの時間差を計算する\n", "" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "executionInfo": { "elapsed": 94381, "status": "ok", "timestamp": 1684722892880, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "_zo166ITLcNH" }, "outputs": [], "source": [ "# convert time values to timestamp\n", "traj_df['timestamp'] = traj_df['time'].apply(lambda x: pd.to_datetime(x))\n", "# sort values by trip id (trans_trip) and timestamp\n", "traj_df.sort_values(['trans_trip','timestamp'], inplace=True)\n", "# create the start and end time for each trajectory\n", "traj_df['time_delta'] = (traj_df.timestamp - traj_df.groupby(['trans_trip']).timestamp.shift(1))\n", "traj_df['dt_seconds'] = traj_df['time_delta'].dt.seconds" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "Z45zHJm7LXqs" }, "source": [ "Let's check the histogram of longest time between two points per trip." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 405 }, "executionInfo": { "elapsed": 21, "status": "ok", "timestamp": 1684722892884, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "STnQmbp7UIib", "outputId": "960f1251-8618-445d-d328-212df6fa43ce" }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "0.0 1.0\n", "0.1 2.0\n", "0.2 2.0\n", "0.3 4.0\n", "0.4 48.0\n", "0.5 61.0\n", "0.6 82.0\n", "0.7 120.0\n", "0.8 317.0\n", "0.9 1076.0\n", "Name: dt_seconds, dtype: float64\n" ] } ], "source": [ "# plot the histogram of the maximum time differences between points per trip\n", "trip_dt = traj_df.groupby(['trans_trip']).dt_seconds.max()\n", "trip_dt.hist(bins=10, figsize=(3,2))\n", "plt.show()\n", "# Let's also print out the percentiles of the maximum time differences between points per trip\n", "print(trip_dt.quantile(np.arange(0, 1, 0.1)))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "executionInfo": { "elapsed": 418, "status": "ok", "timestamp": 1684722893295, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "LUSy_33BULpd", "outputId": "a735134b-e05b-4604-f98a-195312b80e21" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanmedianmaxmincount
trans_mode
airplane3699.0000003699.07394.04.02
bus2597.94117667.585896.02.034
car119.000000119.0119.0119.01
subway1916.59574559.085903.02.047
taxi7997.21875068.585474.02.096
train8911.3939391061.084677.02.099
walk602.0065793.085263.01.0152
\n", "
" ], "text/plain": [ " mean median max min count\n", "trans_mode \n", "airplane 3699.000000 3699.0 7394.0 4.0 2\n", "bus 2597.941176 67.5 85896.0 2.0 34\n", "car 119.000000 119.0 119.0 119.0 1\n", "subway 1916.595745 59.0 85903.0 2.0 47\n", "taxi 7997.218750 68.5 85474.0 2.0 96\n", "train 8911.393939 1061.0 84677.0 2.0 99\n", "walk 602.006579 3.0 85263.0 1.0 152" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# summary info on time differences between points, grouped by trip id (trans_trip)\n", "(traj_df.groupby(['trans_mode','trans_trip'])\n", " .time_delta\n", " .max()\n", " .dt.seconds\n", " .groupby(level=0).agg(['mean','median','max','min','count']))" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "pktLPM35LXqt" }, "source": [ "Since airplane and car trips are less, we remove these modes for further analysis." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1684722893295, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "zUudCp3aLXqt" }, "outputs": [], "source": [ "traj_df = traj_df[~traj_df['trans_mode'].isin(['airplane','car'])]" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "executionInfo": { "elapsed": 3, "status": "aborted", "timestamp": 1684538406128, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "CMOrdGdpQ4kW" }, "source": [ "## 前のポイントからの距離\n", "" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "executionInfo": { "elapsed": 5, "status": "ok", "timestamp": 1684722893295, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "utMQx74qLXqt" }, "outputs": [], "source": [ "# store previous points' latitude and logitude by using `shift` function of pandas\n", "traj_df[['latitude_prev','longitude_prev']] = traj_df.groupby('trans_trip')[['latitude','longitude']].shift(1)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "executionInfo": { "elapsed": 5, "status": "ok", "timestamp": 1684722893295, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "XaqMlU-QLXqu" }, "outputs": [], "source": [ "def calc_distance(lat2, lon2, lat1, lon1):\n", " \"\"\"function to calculate distances between two coordinates\n", " \"\"\"\n", " if pd.isna(lat1) or pd.isna(lon1):\n", " return np.nan\n", " else:\n", " return distance.great_circle((lat2, lon2), (lat1, lon1)).km" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "executionInfo": { "elapsed": 22112, "status": "ok", "timestamp": 1684722915402, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "7Ueuw-E-LXqu" }, "outputs": [], "source": [ "# map the calculation function above to the dataframe\n", "traj_df['distance'] = traj_df.apply(lambda x:calc_distance(x[\"latitude\"], x[\"longitude\"], x['latitude_prev'], x['longitude_prev']), axis=1)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 211 }, "executionInfo": { "elapsed": 14, "status": "ok", "timestamp": 1684722915403, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "dy4YMm73LXqu", "outputId": "a589c093-2c41-49b5-9468-89ae40f39c62" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
latitudelongitudeheightdays_totaldatetimerecord_dtusertrans_triptrans_modetimestamptime_deltadt_secondslatitude_prevlongitude_prevdistance
139.894178116.318200-77739535.6212962008-03-2814:54:402008-03-28 14:54:40101.0train2023-06-07 14:54:40NaTNaNNaNNaNNaN
239.894505116.321132-77739535.6216902008-03-2814:55:142008-03-28 14:55:14101.0train2023-06-07 14:55:140 days 00:00:3434.039.894178116.31820.252764
\n", "
" ], "text/plain": [ " latitude longitude height days_total date time \\\n", "1 39.894178 116.318200 -777 39535.621296 2008-03-28 14:54:40 \n", "2 39.894505 116.321132 -777 39535.621690 2008-03-28 14:55:14 \n", "\n", " record_dt user trans_trip trans_mode timestamp \\\n", "1 2008-03-28 14:54:40 10 1.0 train 2023-06-07 14:54:40 \n", "2 2008-03-28 14:55:14 10 1.0 train 2023-06-07 14:55:14 \n", "\n", " time_delta dt_seconds latitude_prev longitude_prev distance \n", "1 NaT NaN NaN NaN NaN \n", "2 0 days 00:00:34 34.0 39.894178 116.3182 0.252764 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "traj_df.head(2)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "BWYkxVvALXqu" }, "source": [ "## 速度\n", "\n", "次に速度:距離(km)/時を計算します" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "executionInfo": { "elapsed": 11, "status": "ok", "timestamp": 1684722915403, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "jyDzJ-S5LXqu" }, "outputs": [], "source": [ "traj_df['speed'] = \\\n", " np.where(((traj_df['distance'].notnull())&(traj_df['dt_seconds']!=0)), traj_df['distance'] / \n", " (traj_df['dt_seconds'] / 3600), 0)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 238 }, "executionInfo": { "elapsed": 11, "status": "ok", "timestamp": 1684722915403, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "mWqmrTe7LXqv", "outputId": "42dbe098-6cdb-4baf-f0f0-aa7e5d268c8f" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanminmaxstd
trans_mode
bus40.7318930.0219.97033629.834764
subway52.3692170.0217.57840425.219674
taxi30.3551160.0156.12930627.505678
train87.3057980.076412.488301129.458882
walk5.4537860.077.7003333.290184
\n", "
" ], "text/plain": [ " mean min max std\n", "trans_mode \n", "bus 40.731893 0.0 219.970336 29.834764\n", "subway 52.369217 0.0 217.578404 25.219674\n", "taxi 30.355116 0.0 156.129306 27.505678\n", "train 87.305798 0.0 76412.488301 129.458882\n", "walk 5.453786 0.0 77.700333 3.290184" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Let's check the descriptions of speed per transportation mode\n", "traj_df.groupby(['trans_mode']).speed.agg(['mean','min','max','std'])" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "41SG1QIJLXqv" }, "source": [ "## 加速度\n", "\n", "\n", "2点間の速度の差/2点間の時間の差でAccelation(加速度)を計算します。" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "executionInfo": { "elapsed": 10, "status": "ok", "timestamp": 1684722915403, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "2nPNnh-FLXqv" }, "outputs": [], "source": [ "# store previous points' speed\n", "traj_df['speed_prev'] = traj_df.groupby('trans_trip')['speed'].shift(1)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "executionInfo": { "elapsed": 11, "status": "ok", "timestamp": 1684722915404, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "mQC1AKdCLXqv" }, "outputs": [], "source": [ "def calc_accel(speed_1, speed_2, time_diff_sec):\n", " \"\"\"function to calculate acceleration between two points\n", " \"\"\"\n", " speed_delta = speed_2 - speed_1\n", " if (time_diff_sec == 0) or (speed_delta == 0):\n", " return 0\n", " return speed_delta / time_diff_sec" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "executionInfo": { "elapsed": 13370, "status": "ok", "timestamp": 1684722928763, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "WM1dBu4iLXqv" }, "outputs": [], "source": [ "# map the above calculation fuction to the dataframe\n", "traj_df['accel'] = traj_df.apply(lambda x: calc_accel(x['speed'], x['speed_prev'], x['dt_seconds']), axis=1)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 238 }, "executionInfo": { "elapsed": 18, "status": "ok", "timestamp": 1684722928765, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "2DC_ab1iLXqv", "outputId": "db875df1-9f64-49e0-ad56-101ba03aca5d" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmeanstdmin25%50%75%max
trans_mode
bus35156.0-0.0721195.457844-166.371582-1.161253-3.753887e-061.108991178.138813
subway20338.0-0.0914526.851066-136.151441-1.2339282.384665e-011.791577126.904634
taxi56731.0-0.0617554.192712-97.929174-1.2724900.000000e+001.30082296.055705
train380835.0-0.0071124.349676-1122.655229-0.9730444.474713e-090.9877601293.744609
walk36954.0-0.0514433.310213-64.769768-1.1910702.163060e-031.20144559.242515
\n", "
" ], "text/plain": [ " count mean std min 25% 50% \\\n", "trans_mode \n", "bus 35156.0 -0.072119 5.457844 -166.371582 -1.161253 -3.753887e-06 \n", "subway 20338.0 -0.091452 6.851066 -136.151441 -1.233928 2.384665e-01 \n", "taxi 56731.0 -0.061755 4.192712 -97.929174 -1.272490 0.000000e+00 \n", "train 380835.0 -0.007112 4.349676 -1122.655229 -0.973044 4.474713e-09 \n", "walk 36954.0 -0.051443 3.310213 -64.769768 -1.191070 2.163060e-03 \n", "\n", " 75% max \n", "trans_mode \n", "bus 1.108991 178.138813 \n", "subway 1.791577 126.904634 \n", "taxi 1.300822 96.055705 \n", "train 0.987760 1293.744609 \n", "walk 1.201445 59.242515 " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check basic statistics of acceleration per transportation mode\n", "traj_df.groupby('trans_mode')['accel'].describe()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "KJFXfVf_LXqv" }, "source": [ "## Angular velocity\n", "\n", "\n", "ここでは、2点間の角度の差/2点間の時間の差でAngular velocityを計算します。" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "FWmbirqcLXqv" }, "source": [ "そのためにまず、2点間の角度の差を計算します。" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "executionInfo": { "elapsed": 17, "status": "ok", "timestamp": 1684722928766, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "qvZqFpAJLXqv" }, "outputs": [], "source": [ "def calc_angle(lat1, lat2, lon1, lon2):\n", " \"\"\"function to calculate angle differences between two coordinates\n", " \"\"\"\n", " x = cos(lat2) * sin(lon2-lon1)\n", " y = cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(lon2-lon1)\n", " brng = atan2(x, y)\n", " brng = np.degrees(brng)\n", " brng = brng + 360 if brng < 0 else brng\n", " return brng" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "executionInfo": { "elapsed": 11619, "status": "ok", "timestamp": 1684722940368, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "EYcAluRvLXqw" }, "outputs": [], "source": [ "# apply the calculation function abvoe to the dataframe\n", "traj_df['angle'] = traj_df.apply(lambda x: calc_angle(x['latitude'], x['latitude_prev'], x['longitude'], x['longitude_prev']), axis=1)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 238 }, "executionInfo": { "elapsed": 17, "status": "ok", "timestamp": 1684722940370, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "RvG7TJaoLXqw", "outputId": "bd899425-8a3d-4254-ae8b-ea0fc465a870" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmeanstdmin25%50%75%max
trans_mode
bus35156.0159.299021111.391769-0.068.399355172.127466255.799236359.999455
subway20338.0137.644859104.2305770.019.438924173.706695196.554703359.827198
taxi56731.0158.724686116.988570-0.029.485250178.534543226.851081359.929870
train380835.0179.733586103.0232250.098.886739179.800484269.325408359.999506
walk36954.0181.486218104.4979550.097.345459181.463673269.999640359.812308
\n", "
" ], "text/plain": [ " count mean std min 25% 50% \\\n", "trans_mode \n", "bus 35156.0 159.299021 111.391769 -0.0 68.399355 172.127466 \n", "subway 20338.0 137.644859 104.230577 0.0 19.438924 173.706695 \n", "taxi 56731.0 158.724686 116.988570 -0.0 29.485250 178.534543 \n", "train 380835.0 179.733586 103.023225 0.0 98.886739 179.800484 \n", "walk 36954.0 181.486218 104.497955 0.0 97.345459 181.463673 \n", "\n", " 75% max \n", "trans_mode \n", "bus 255.799236 359.999455 \n", "subway 196.554703 359.827198 \n", "taxi 226.851081 359.929870 \n", "train 269.325408 359.999506 \n", "walk 269.999640 359.812308 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# print out the description of angle differences between two points per transportation mode\n", "traj_df.groupby(['trans_mode'])['angle'].describe()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "fl9xTTX6LXqw" }, "source": [ "Angular velocityを求めます。" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "executionInfo": { "elapsed": 15, "status": "ok", "timestamp": 1684722940370, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "lzZ8H2c9LXqw" }, "outputs": [], "source": [ "def calc_angular_velocity(angle1, angle2, time_diff_sec):\n", " \"\"\"function to calculate angular velocity\n", " \"\"\"\n", " bear_delta = angle2 - angle1\n", " if time_diff_sec == 0:\n", " return 0\n", " return abs(bear_delta / time_diff_sec)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 9716, "status": "ok", "timestamp": 1684722950071, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "gm29z1UkLXqw", "outputId": "ccdebb91-e22f-4458-ed20-2bf2b0185c47" }, "outputs": [ { "data": { "text/plain": [ "count 529586.000000\n", "mean 10.510742\n", "std 45.292572\n", "min 0.000000\n", "25% 0.272122\n", "50% 0.870065\n", "75% 2.537062\n", "max 359.997255\n", "Name: angular_velocity, dtype: float64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# create a new column holding the previous points' angle difference\n", "traj_df['angle_prev'] = traj_df.groupby('trans_trip')['angle'].shift(1)\n", "# apply the calculation function to the dataframe\n", "traj_df['angular_velocity'] = traj_df.apply(lambda x: calc_angular_velocity(x['angle'],x['angle_prev'],x['dt_seconds']), axis=1)\n", "traj_df['angular_velocity'].describe()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "3vJKfwiQLXqw" }, "source": [ "## 特徴の可視化\n", "\n", "上で作ったいくつかの特徴を可視化してそれぞれがどのように交通手段と関連があるか確認します。" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "executionInfo": { "elapsed": 5, "status": "ok", "timestamp": 1684722950071, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "asreHU6RLXqw" }, "outputs": [], "source": [ "# define a color list for visualizations\n", "colors = ['#0C5DA5', '#00B945', '#FF9500', '#FF2C00', '#845B97', '#474747', '#9e9e9e']" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "_iK7MEVQLXqw" }, "source": [ "\n", "### トリップごとのAngular velocity とスピードの関係性" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 316 }, "executionInfo": { "elapsed": 674, "status": "ok", "timestamp": 1684722950741, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "vWq8O_j6LXqw", "outputId": "85f681d7-cd75-4a5e-9a2b-c9a4149bd7f5" }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, ax = plt.subplots(figsize=(4, 3))\n", "markers = ['^', 'o', 's', 'v', 'd']\n", "# iterate over transportation modes\n", "for i, mode in enumerate(traj_df['trans_mode'].unique()):\n", " # create sub-dataframe holding the rows with the mode\n", " chunk = traj_df[traj_df['trans_mode']==mode]\n", " # create mean scores of speed and angular_velocity per trip\n", " trip_df = chunk.groupby('trans_trip')[['speed', 'angular_velocity']].mean()\n", " # create scatter plots of speed and angular_velocity per trip\n", " ax.scatter(trip_df['speed'], trip_df['angular_velocity'], facecolor = 'None', \n", " marker = markers[i], edgecolor=colors[i], label=mode, linewidth=0.8)\n", "ax.legend(frameon=False)\n", "ax.set_xlabel('speed (km/h)')\n", "ax.set_ylabel('angular velocity (deg/sec)')\n", "plt.show()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "_nu8VTP-LXqw" }, "source": [ "\n", "### 各交通手段ごとの加速度の箱ひげ図の確認" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 311 }, "executionInfo": { "elapsed": 940, "status": "ok", "timestamp": 1684722951666, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "MzohpSVaLXqw", "outputId": "3259f7f8-a063-4bad-ea61-e80d2a357193" }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, ax = plt.subplots(figsize=(4, 3))\n", "markers = ['^', 'o', 's', 'v', 'd']\n", "accel_df = traj_df.groupby(['trans_mode', 'trans_trip'])[['accel']].mean().reset_index()\n", "sns.boxplot(data=accel_df, x=\"trans_mode\", y=\"accel\", \n", " # showfliers=True, \n", " notch=True, showcaps=False,\n", " showmeans=True,\n", " meanprops={'markerfacecolor':'none','markeredgecolor':'green'},\n", " flierprops={\"marker\": \"x\"},\n", " boxprops={\"facecolor\": (.4, .6, .8, .2)},\n", " medianprops={\"color\": \"coral\"}, \n", " ax = ax)\n", "ax.set_xlabel('transportation mode')\n", "plt.show()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "cCZOwk_hLXqx" }, "source": [ "## 次の分析のために作成した特徴量データを保存します。" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "executionInfo": { "elapsed": 7, "status": "ok", "timestamp": 1684722951667, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "712Q_mvpLXqx" }, "outputs": [], "source": [ "# drop the rows without dt_seconds values\n", "traj_df = traj_df[traj_df['dt_seconds'].notnull()]\n", "# drop the rows which has 0 seconds gaps from previous points\n", "traj_df = traj_df[~(traj_df['dt_seconds']==0)]" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "executionInfo": { "elapsed": 23769, "status": "ok", "timestamp": 1684722975429, "user": { "displayName": "Yuya Shibuya", "userId": "13278923316285788453" }, "user_tz": -540 }, "id": "be7v71oqLXqx" }, "outputs": [], "source": [ "# PLEASE REPLACE THE BELOW PATH WITH YOUR PATH\n", "path = f'traj_{user}_labeled_with_features.csv'\n", "traj_df.to_csv(path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 4 }