Source code for preprocessing.clean_holo

import pandas as pd
import numpy as np
import os
import pickle as pk
import argparse
from src.const import DIR

data_dir = os.path.join(DIR, "../preprocessing/data")
processed_dir = os.path.join(DIR, "../preprocessing/processed_data")
final_dir = os.path.join(DIR, "../data")
lane_dir = os.path.join(DIR, "../preprocessing/lane")


[docs]def clean_data(filename: str): ''' :param filename: raw trajectory csv file path :return: a cleaned/down-sampled csv file ''' filepath = os.path.join(data_dir, filename) df = pd.read_csv(filepath) # Down sampling gt = df.Global_Time bb = np.zeros(len(gt)) for i in range(1, len(gt)): bb[i] = gt[i] // 100 == gt[i - 1] // 100 sampled_gt = gt[np.where(bb == 0)[0]] df = df.loc[df['Global_Time'].isin(sampled_gt)] columnsTitles = ["Global_X", "Global_Y"] df[["Global_Y", "Global_X"]] = df.reindex(columns=columnsTitles) df = df.loc[df['Valid'] == True] down_sample_fn = os.path.join(data_dir, 'holo_data_downsampled.csv') df.to_csv(down_sample_fn, index=False) df = pd.read_csv(down_sample_fn) # Correct vehicle frames for continuity used_id = set(list(df['Vehicle_ID'])) max_l = dict() max_w = dict() for i in range(len(df)): r = df.iloc[i] v_id = r.Vehicle_ID if v_id not in max_w.keys(): max_w[v_id] = r.v_Width max_l[v_id] = r.v_length else: max_w[v_id] = max(max_w[v_id], r.v_Width) max_l[v_id] = max(max_l[v_id], r.v_length) vehicles = dict() show_up = set() id_map = dict() # map original lost frame veh to new id discard_id = set() # veh_ids that need to be replaced id_cnt = 1 r_id_map = dict() last_LY = dict() frame_cnt = dict() for i in range(len(df)): r = df.iloc[i] v_id = r.Vehicle_ID # set width and length df.at[i, 'v_Width'] = max_w[v_id] df.at[i, 'v_length'] = max_l[v_id] if v_id in id_map: v_id = id_map[v_id] elif v_id in discard_id: # find a suitable id to replace dicard id while id_cnt in used_id or id_cnt in discard_id: id_cnt += 1 id_map[v_id] = id_cnt r_id_map[id_cnt] = v_id v_id = id_map[v_id] # if v_id != r.Vehicle_ID: # print(i, ' ', v_id, ' ', r.Vehicle_ID) df.at[i, 'Vehicle_ID'] = v_id if v_id not in vehicles.keys(): last_LY[v_id] = 0 frame_cnt[v_id] = 1 df.at[i, 'v_Acc'] = 0 df.at[i, 'v_Vel'] = 0 df.at[i, 'Frame_ID'] = (df.at[i, 'Global_Time'] - min(df['Global_Time'])) // 100 + 1 else: if df.at[i, 'Section_ID'] != df.at[vehicles[v_id], 'Section_ID']: last_LY[v_id] = df.at[vehicles[v_id], 'Local_Y'] df.at[i, 'Local_Y'] += last_LY[v_id] df.at[i, 'v_Vel'] = np.sqrt((df.at[i, 'Global_Y'] - df.at[vehicles[v_id], 'Global_Y']) ** 2 + (df.at[i, 'Global_X'] - df.at[vehicles[v_id], 'Global_X']) ** 2) df.at[i, 'v_Acc'] = df.at[i, 'v_Vel'] - df.at[vehicles[v_id], 'v_Vel'] df.at[i, 'Frame_ID'] = df.at[vehicles[v_id], 'Frame_ID'] + 1 frame_cnt[v_id] += 1 df.at[i, 'Lane_ID'] = int(df.at[i, 'Local_X'] // 3.75) show_up.add(v_id) if v_id in vehicles and (r.Global_Time // 100) != (df.at[vehicles[v_id], 'Global_Time'] // 100) + 1: vehicles.pop(v_id) discard_id.add(v_id) # id_map.pop(r_id_map[v_id]) # print(v_id) vehicles[v_id] = i for i in range(len(df)): df.at[i, 'Total_Frame'] = frame_cnt[df.at[i, 'Vehicle_ID']] df[['Local_X', 'Local_Y', 'Global_X', 'Global_Y', 'v_Vel', 'v_Acc', 'v_length', 'v_Width', 'Space_Headway']] *= 3.28 # df['Frame_ID'] = (df['Global_Time'] - min(df['Global_Time'])) // 100 + 1 df['Frame_ID'] = (df['Frame_ID'] - min(df['Frame_ID'])) + 1 df['Vehicle_ID'] += 1 df[df['Preceding'] != 0]['Preceding'] += 1 df[df['Following'] != 0]['Following'] += 1 le0_ids = np.unique(df[df['Local_X'] < 0]['Vehicle_ID']) df = df[[k not in le0_ids for k in df['Vehicle_ID']]] saved_path = os.path.join(processed_dir, 'holo_{}_perfect_cleaned.csv'.format(filename[5:19])) print("save to {}".format(saved_path)) df.to_csv(saved_path, index=False) return df.shape[0]
[docs]def csv2txt(filename: str): ''' :param filename: the file name of the csv file :return: the converted text file ''' filepath = os.path.join(processed_dir, filename) df = pd.read_csv(filepath) df = df.sort_values(by=['Vehicle_ID', 'Frame_ID']) df = df[df["Total_Frame"] >= 5] dd = df[['Vehicle_ID','Frame_ID','Total_Frame','Global_Time', 'Local_X', 'Local_Y', 'Global_X', 'Global_Y', 'v_length', 'v_Width', 'v_Class', 'v_Vel', 'v_Acc', 'Lane_ID', 'Preceding', 'Following', 'Space_Headway', 'Time_Headway']] save_file = os.path.join(final_dir, r'holo_trajectories.txt') print("save to {}".format(save_file)) np.savetxt(save_file, dd.values, fmt='%4d %8d %8d %15d %8.3f %8.3f %14.3f %12.3f %6.3f %6.3f %3d %8.3f %8.3f %d %6d %6d %8.3f %8.3f')
[docs]def create_lane(filename: str): ''' :param filename: name of the lane file :return: cleaned lane txt file ''' start = 0 lane_df = {} for k in range(3): filepath = os.path.join(data_dir, filename) file_name = filepath + str(k) df = pd.read_csv(file_name + '_corrected_smoothed.csv') lane_df[k] = df['Lane_Boundary_Left_Global'] lane_df[3] = df['Lane_Boundary_Right_Global'] lane_cnt = 4 for k in range(lane_cnt): # plt.figure() lane = np.zeros((len(df), 2)) for i in range(len(df)): # print(df.at[i,'Lane_Boundary_Left_Global'][2:-2].split(']\n [')) a = np.array( list(map(lambda x: np.array(x.split()).astype(np.float) * 3.28, lane_df[k][i][2:-2].split(']\n [')))) lane[i, :] = a[len(a) // 2] lane.T[[0, 1]] = lane.T[[1, 0]] indexes = np.unique(lane, return_index=True, axis=0)[1] lane = lane[sorted(indexes)] print(len(lane)) # gap = np.array([[0,0],[0,10],[0,20]]) # lane = np.vstack((gap, lane)) # idx = np.where((461000 < lane[:,0]) & (lane[:,0] < 466000))[0] # plt.plot(lane[:, 0], lane[:, 1]) # plt.savefig(file_name + '.png') # plt.scatter(lane[:,0], lane[:,1]) # plt.savefig(file_name + '_original.png') f = open(os.path.join(lane_dir, ('lane' + str(k) + '.pk')), 'wb') pk.dump(lane, f) # plt.show() lanes = dict() for i in range(lane_cnt): f = open(os.path.join(lane_dir, ('lane' + str(i) + '.pk')), 'rb') lanes[i] = pk.load(f) centers = {} for l in range(lane_cnt - 1): j = 0 centers[l] = np.zeros(lanes[l].shape) for i in range(len(lanes[l])): dis1 = dis2 = dis3 = 1e9 if j > 0: dis1 = np.linalg.norm(lanes[l][i, :] - lanes[l + 1][j - 1, :]) dis2 = np.linalg.norm(lanes[l][i, :] - lanes[l + 1][j, :]) if j + 1 < len(lanes[l + 1]): dis3 = np.linalg.norm(lanes[l][i, :] - lanes[l + 1][j + 1, :]) k = j if dis3 <= dis2 and dis3 <= dis1 and j < len(lanes[l + 1]): k = j + 1 j += 1 elif dis1 < dis2 and dis1 < dis3: k = j - 1 centers[l][i, :] = (lanes[l][i, :] + lanes[l + 1][k, :]) / 2 # for i in range(lane_cnt): # plt.plot(lanes[i][:, 0], lanes[i][:, 1]) # for i in range(lane_cnt - 1): # plt.plot(centers[i][:, 0], centers[i][:, 1], linestyle=':') # plt.xlim(5300 + 1.453e7, 5600 + 1.453e7) # plt.ylim(1520000, 1522000) # plt.show() boundary_fn = os.path.join(final_dir, 'boundariesHOLO.txt') f = open(boundary_fn, 'wb') f.write(b'BOUNDARIES\n') f.write((str((lane_cnt - 1) * 2) + '\n').encode()) for i in range(lane_cnt - 1): f.write(('BOUNDARY ' + str(2 * i + 1) + '\n').encode()) f.write((str(len(lanes[i])) + '\n').encode()) np.savetxt(f, lanes[i], fmt=(' %.5f %.5f')) f.write(('BOUNDARY ' + str(2 * i + 2) + '\n').encode()) f.write((str(len(lanes[i + 1])) + '\n').encode()) np.savetxt(f, lanes[i + 1], fmt=(' %.5f %.5f')) f.close() print("boundariesHOLO.txt has been saved to {}".format(boundary_fn)) centerline_fn = os.path.join(final_dir, 'centerlinesHOLO.txt') f = open(centerline_fn, 'wb') f.write(b'CENTERLINES\n') f.write((str(lane_cnt - 1) + '\n').encode()) for i in range(lane_cnt - 1): f.write(('CENTERLINE\n').encode()) f.write(('centerline' + str(i + 1) + '\n').encode()) f.write((str(len(lanes[i])) + '\n').encode()) np.savetxt(f, centers[i], fmt=(' %.5f %.5f')) f.close() print("centerlinesHOLO.txt has been saved to {}".format(centerline_fn))
if __name__ == "__main__": parser = argparse.ArgumentParser(description='validation settings') parser.add_argument('--traj_path', type=str, default=None) parser.add_argument('--lane_path', type=str, default=None) clean_args = parser.parse_args() if clean_args.traj_path is None: raise ValueError("You need to input a raw trajectory data path") clean_data(clean_args.traj_path) processed_data_path = 'holo_{}_perfect_cleaned.csv'.format(clean_args.traj_path[5:19]) csv2txt(processed_data_path) if clean_args.lane_path is not None: create_lane(clean_args.lane_path) print("Finish data preprocessing")