featuregood first issue
Description
This is a feature request. I was playing around with rl_coach and it automatically generates a json file that contains all of an experiment's parameters. It's really helpful for repeatability and record keeping. Would love if garage had something like this too.
data_store:
None
name:
simple_rl_graph
environments:
0: <rl_coach.environments.gym_environment.GymEnvironment object at 0x7f5cb7e30710>
q
level_managers:
0: <rl_coach.level_manager.LevelManager object at 0x7f5cb7de0eb8>
_phase:
RunPhase.UNDEFINED
reset_required:
False
env_params:
"GymVectorEnvironment" {
"additional_simulator_parameters": {},
"custom_reward_threshold": null,
"default_input_filter": {
"_observation_filters": {},
"_reward_filters": {},
"i_am_a_reference_filter": false,
"name": "no_input_filter",
"__class__": "NoInputFilter"
},
"default_output_filter": {
"_action_filters": {},
"i_am_a_reference_filter": false,
"name": null,
"__class__": "NoOutputFilter"
},
"experiment_path": "./experiments/bob213/21_07_2020-14_29",
"frame_skip": 1,
"human_control": false,
"level": "CartPole-v0",
"max_over_num_frames": 1,
"observation_space_type": null,
"random_initialization_steps": 0,
"seed": null,
"target_success_rate": 1.0
}
agent_params:
"PolicyGradientsAgentParameters" {
"algorithm": {
"act_for_full_episodes": false,
"apply_gradients_every_x_episodes": 5,
"beta_entropy": 0,
"discount": 0.99,
"distributed_coach_synchronization_type": null,
"heatup_using_network_decisions": false,
"in_action_space": null,
"load_memory_from_file_path": null,
"n_step": -1,
"num_consecutive_playing_steps": {
"_num_steps": 1,
"__class__": "EnvironmentSteps"
},
"num_consecutive_training_steps": 1,
"num_steps_between_copying_online_weights_to_target": {
"_num_steps": 0,
"__class__": "TrainingSteps"
},
"num_steps_between_gradient_updates": 20000,
"override_episode_rewards_with_the_last_transition_reward": false,
"policy_gradient_rescaler": {
"_value_": 3,
"__objclass__": "<enum 'PolicyGradientRescaler'>",
"_name_": "FUTURE_RETURN_NORMALIZED_BY_TIMESTEP"
},
"rate_for_copying_weights_to_target": 1.0,
"share_statistics_between_workers": true,
"store_transitions_only_when_episodes_are_terminated": false,
"supports_parameter_noise": false,
"update_pre_network_filters_state_on_inference": true,
"update_pre_network_filters_state_on_train": false,
"use_accumulated_reward_as_measurement": false,
"__class__": "PolicyGradientAlgorithmParameters"
},
"current_episode": 0,
"exploration": {
"action_space": {
"_high": "array([1.])",
"_low": "array([0.])",
"_shape": "array([1])",
"default_action": 0,
"descriptions": {},
"num_dimensions": 1,
"num_elements": 1,
"__class__": "DiscreteActionSpace"
},
"__class__": "CategoricalParameters"
},
"full_name_id": "main_level/agent",
"input_filter": {
"_observation_filters": {},
"_reward_filters": {
"rescale": {
"name": null,
"rescale_factor": 0.005,
"supports_batching": false,
"__class__": "RewardRescaleFilter"
}
},
"i_am_a_reference_filter": false,
"name": "input_filter",
"__class__": "InputFilter"
},
"is_a_highest_level_agent": true,
"is_a_lowest_level_agent": true,
"is_batch_rl_training": false,
"memory": {
"load_memory_from_file_path": null,
"shared_memory": false,
"__class__": "SingleEpisodeBufferParameters"
},
"name": "agent",
"network_wrappers": {
"main": {
"adam_optimizer_beta1": 0.9,
"adam_optimizer_beta2": 0.99,
"async_training": true,
"batch_size": 32,
"clip_gradients": null,
"create_target_network": false,
"embedding_merger_type": {
"_value_": 0,
"__objclass__": "<enum 'EmbeddingMergerType'>",
"_name_": "Concat"
},
"force_cpu": false,
"framework": {
"_value_": "TensorFlow",
"__objclass__": "<enum 'Frameworks'>",
"_name_": "tensorflow"
},
"gradients_clipping_method": {
"_value_": 0,
"__objclass__": "<enum 'GradientClippingMethod'>",
"_name_": "ClipByGlobalNorm"
},
"heads_parameters": {
"0": {
"activation_function": "tanh",
"dense_layer": null,
"is_training": false,
"loss_weight": 1.0,
"name": "policy_head_params",
"num_output_head_copies": 1,
"parameterized_class_name": "PolicyHead",
"rescale_gradient_from_head_by_factor": 1.0,
"__class__": "PolicyHeadParameters"
}
},
"input_embedders_parameters": {
"observation": {
"activation_function": "relu",
"batchnorm": false,
"dense_layer": null,
"dropout_rate": 0.0,
"input_clipping": null,
"input_offset": {
"image": 0.0,
"tensor": 0.0,
"vector": 0.0
},
"input_rescaling": {
"image": 255.0,
"tensor": 1.0,
"vector": 1.0
},
"is_training": false,
"name": "embedder",
"scheme": {
"_value_": "Medium",
"__objclass__": "<enum 'EmbedderScheme'>",
"_name_": "Medium"
},
"__class__": "InputEmbedderParameters"
}
},
"l2_regularization": 0,
"learning_rate": 0.0005,
"learning_rate_decay_rate": 0,
"learning_rate_decay_steps": 0,
"middleware_parameters": {
"activation_function": "relu",
"batchnorm": false,
"dense_layer": null,
"dropout_rate": 0.0,
"is_training": false,
"name": "middleware_fc_embedder",
"num_streams": 1,
"parameterized_class_name": "FCMiddleware",
"scheme": {
"_value_": "Medium",
"__objclass__": "<enum 'MiddlewareScheme'>",
"_name_": "Medium"
},
"__class__": "FCMiddlewareParameters"
},
"optimizer_epsilon": 0.0001,
"optimizer_type": "Adam",
"replace_mse_with_huber_loss": false,
"rms_prop_optimizer_decay": 0.9,
"scale_down_gradients_by_number_of_workers_for_sync_training": true,
"sess": null,
"shared_optimizer": true,
"softmax_temperature": 1,
"tensorflow_support": true,
"use_separate_networks_per_head": false,
"__class__": "PolicyGradientNetworkParameters"
}
},
"output_filter": {
"_action_filters": {},
"i_am_a_reference_filter": false,
"name": "output_filter",
"__class__": "NoOutputFilter"
},
"pre_network_filter": {
"_observation_filters": {},
"_reward_filters": {},
"i_am_a_reference_filter": false,
"name": "pre_network_filter",
"__class__": "NoInputFilter"
},
"task_parameters": {
"apply_stop_condition": false,
"checkpoint_restore_path": null,
"checkpoint_save_dir": null,
"checkpoint_save_secs": null,
"evaluate_only": null,
"experiment_path": "./experiments/bob213/21_07_2020-14_29",
"export_onnx_graph": false,
"framework_type": {
"_value_": "TensorFlow",
"__objclass__": "<enum 'Frameworks'>",
"_name_": "tensorflow"
},
"num_gpu": 1,
"seed": null,
"task_index": 0,
"use_cpu": false,
"__class__": "TaskParameters"
},
"visualization": {
"add_rendered_image_to_env_response": false,
"dump_csv": true,
"dump_gifs": false,
"dump_in_episode_signals": false,
"dump_mp4": false,
"dump_parameters_documentation": true,
"dump_signals_to_csv_every_x_episodes": 5,
"max_fps_for_human_control": 10,
"native_rendering": false,
"print_networks_summary": false,
"render": false,
"tensorboard": false,
"video_dump_filters": {
"0": {
"run_phases": {
"0": {
"_value_": "Testing",
"__objclass__": "<enum 'RunPhase'>",
"_name_": "TEST"
}
},
"__class__": "SelectedPhaseOnlyDumpFilter"
},
"1": {
"max_reward_achieved": -Infinity,
"__class__": "MaxDumpFilter"
}
},
"__class__": "VisualizationParameters"
}
}
graph_creation_time:
1595366960.1503983
time_metric:
TimeTypes.EpisodeNumber
visualization_parameters:
"VisualizationParameters" {
"add_rendered_image_to_env_response": false,
"dump_csv": true,
"dump_gifs": false,
"dump_in_episode_signals": false,
"dump_mp4": false,
"dump_parameters_documentation": true,
"dump_signals_to_csv_every_x_episodes": 5,
"max_fps_for_human_control": 10,
"native_rendering": false,
"print_networks_summary": false,
"render": false,
"tensorboard": false,
"video_dump_filters": {
"0": {
"run_phases": {
"0": {
"_value_": "Testing",
"__objclass__": "<enum 'RunPhase'>",
"_name_": "TEST"
}
},
"__class__": "SelectedPhaseOnlyDumpFilter"
},
"1": {
"max_reward_achieved": -Infinity,
"__class__": "MaxDumpFilter"
}
}
}
checkpoint_saver:
<rl_coach.saver.SaverCollection object at 0x7f5cb7df30b8>
is_batch_rl:
False
sess:
<tensorflow.python.client.session.Session object at 0x7f5cb7e30160>
preset_validation_params:
"PresetValidationParameters" {
"max_episodes_to_achieve_reward": 550,
"min_reward_threshold": 130,
"num_workers": 1,
"read_csv_tries": 200,
"reward_test_level": null,
"test": true,
"test_using_a_trace_test": true,
"trace_max_env_steps": 5000,
"trace_test_levels": null
}
task_parameters:
"TaskParameters" {
"apply_stop_condition": false,
"checkpoint_restore_path": null,
"checkpoint_save_dir": null,
"checkpoint_save_secs": null,
"evaluate_only": null,
"experiment_path": "./experiments/bob213/21_07_2020-14_29",
"export_onnx_graph": false,
"framework_type": {
"_value_": "TensorFlow",
"__objclass__": "<enum 'Frameworks'>",
"_name_": "tensorflow"
},
"num_gpu": 1,
"seed": null,
"task_index": 0,
"use_cpu": false
}
top_level_manager:
<rl_coach.level_manager.LevelManager object at 0x7f5cb7de0eb8>
heatup_steps:
<rl_coach.core_types.EnvironmentSteps object at 0x7f5cb7e1dfd0>
last_checkpoint_saving_time:
1595366960.148359
checkpoint_id:
0
checkpoint_state_updater:
None
improve_steps:
<rl_coach.core_types.TrainingSteps object at 0x7f5cb7e1df28>
evaluation_steps:
<rl_coach.core_types.EnvironmentEpisodes object at 0x7f5cb7e1df98>
steps_between_evaluation_periods:
<rl_coach.core_types.EnvironmentEpisodes object at 0x7f5cb7e1df60>
total_steps_counters:
RunPhase.TRAIN: <rl_coach.core_types.TotalStepsCounter object at 0x7f5cb7e305c0>
RunPhase.HEATUP: <rl_coach.core_types.TotalStepsCounter object at 0x7f5cb7e30588>
RunPhase.TEST: <rl_coach.core_types.TotalStepsCounter object at 0x7f5cb7e305f8>
graph_logger:
<rl_coach.logger.Logger object at 0x7f5cb7e30630>