Source code for envs.hyperparams

'''
default hyperparameters for training
these are build as args to allow for command line options
these args are also saved along with parameters during training to
allow for rebuilding everything with the same settings
'''

import argparse
import numpy as np

from envs.utils import str2bool


[docs]def parse_args(arglist=None): parser = argparse.ArgumentParser() # decaying reward logistics parser.add_argument('--decay_reward', type=str2bool, default=False) parser.add_argument('--exp_dir', type=str, default='./data/experiments') parser.add_argument('--itrs_per_decay', type=int, default=25) # curriculum params parser.add_argument('--do_curriculum', type=str2bool, default=False) parser.add_argument('--n_envs_start', type=int, default=10) parser.add_argument('--n_envs_end', type=int, default=50) parser.add_argument('--n_envs_step', type=int, default=10) parser.add_argument('--load_params_init', type=str, default='NONE') # if not the string 'NONE', inserted into first parampath for curriculum # logistics parser.add_argument('--exp_name', type=str, default='NGSIM-gail') parser.add_argument('--params_filepath', type=str, default='') parser.add_argument('--expert_filepath', type=str, default='./data/trajectories/ngsim_holo_new.h5') parser.add_argument('--vectorize', type=str2bool, default=True) parser.add_argument('--n_envs', type=int, default=1) parser.add_argument('--normalize_clip_std_multiple', type=float, default=10.) # env parser.add_argument('--ngsim_filename', type=str, default='trajdata_holo_trajectories.txt') parser.add_argument('--h5_filename', type=str, default='trajdata_holo_trajectories.txt') parser.add_argument('--env_H', type=int, default=200) parser.add_argument('--env_primesteps', type=int, default=50) parser.add_argument('--env_action_repeat', type=int, default=1) parser.add_argument('--env_multiagent', type=str2bool, default=True) parser.add_argument('--env_reward', type=int, default=0) parser.add_argument('--env_param', type=str, default="./data/experiments/NGSIM-gail/imitate/itr_650.npz") # reward handler parser.add_argument('--reward_handler_max_epochs', type=int, default=100) parser.add_argument('--reward_handler_recognition_final_scale', type=float, default=.2) parser.add_argument('--reward_handler_use_env_rewards', type=str2bool, default=True) parser.add_argument('--reward_handler_critic_final_scale', type=float, default=1.) # policy parser.add_argument('--use_infogail', type=str2bool, default=False) parser.add_argument('--policy_mean_hidden_layer_dims', nargs='+', default=(128, 128, 64)) parser.add_argument('--policy_std_hidden_layer_dims', nargs='+', default=(128, 64)) parser.add_argument('--policy_recurrent', type=str2bool, default=True) parser.add_argument('--policy_param', type=str, default="./data/experiments/NGSIM-gail/imitate/model/policy.pkl") parser.add_argument('--recurrent_hidden_dim', type=int, default=64) # trpo policy update parser.add_argument('--l2-reg', type=float, default=1e-3, metavar='G', help='l2 regularization regression (default: 1e-3)') parser.add_argument('--max-kl', type=float, default=1e-2, metavar='G', help='max kl value (default: 1e-2)') parser.add_argument('--damping', type=float, default=1e-2, metavar='G', help='damping (default: 1e-2)') # critic parser.add_argument('--use_critic_replay_memory', type=str2bool, default=True) parser.add_argument('--n_critic_train_epochs', type=int, default=5) parser.add_argument('--critic_learning_rate', type=float, default=.0004) parser.add_argument('--critic_dropout_keep_prob', type=float, default=.8) parser.add_argument('--gradient_penalty', type=float, default=2.) parser.add_argument('--critic_grad_rescale', type=float, default=40.) parser.add_argument('--critic_batch_size', type=int, default=1000) parser.add_argument('--critic_hidden_layer_dims', nargs='+', default=(128, 128, 64)) parser.add_argument('--critic_param', type=str, default="./data/experiments/NGSIM-gail/imitate/model/critic.pkl") # recognition parser.add_argument('--latent_dim', type=int, default=4) parser.add_argument('--n_recognition_train_epochs', type=int, default=30) parser.add_argument('--scheduler_k', type=int, default=20) parser.add_argument('--recognition_learning_rate', type=float, default=.0005) parser.add_argument('--recognition_hidden_layer_dims', nargs='+', default=(128, 64)) # gail parser.add_argument('--batch_size', type=int, default=10000) parser.add_argument('--trpo_step_size', type=float, default=.01) parser.add_argument('--n_itr', type=int, default=2000) parser.add_argument('--max_path_length', type=int, default=1000) parser.add_argument('--discount', type=float, default=.95) # render parser.add_argument('--validator_render', type=str2bool, default=False) parser.add_argument('--render_every', type=int, default=25) parser.add_argument('--remove_ngsim_veh', type=str2bool, default=False) # parse and return if arglist is None: args = parser.parse_args() else: args = parser.parse_args(arglist) return args
[docs]def load_args(args_filepath): ''' This function enables backward-compatible usage of saved args files by filling in missing values with default values. ''' orig = np.load(args_filepath)['args'].item() new = parse_args(arglist=[]) orig_keys = set(orig.__dict__.keys()) new_keys = list(new.__dict__.keys()) # replace all keys in both orig and new, in new, with orig values for k in new_keys: if k in orig_keys: new.__dict__[k] = orig.__dict__[k] return new