Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
e8bdf86
Add Oignons policy
vcharraut May 22, 2026
5bc6c02
Update weights
vcharraut May 24, 2026
5113bb9
Update obs functions and variables
vcharraut May 24, 2026
cbbf1ea
Merge branch 'update-weights' into vcha/update-obs-viz
vcharraut May 24, 2026
b978f2b
Update notebooks
vcharraut May 24, 2026
7aff842
Refactor configuration keys and update evaluation logic in WOSAC eval…
vcharraut May 25, 2026
0181a42
Refactor code structure for improved readability and maintainability
vcharraut May 25, 2026
0aff1bb
Update interactive replay speed options and default values
vcharraut May 25, 2026
8ea9985
Add progress tracking to Evaluator's HTML generation and adjust compr…
vcharraut May 25, 2026
7a19f07
Merge branch 'emerge/temp_training' into vcha/update-obs-viz
vcharraut May 25, 2026
a497f33
Enhance interactive replay with pool slot counts and improve visualiz…
vcharraut May 25, 2026
5e71db6
Remove unused traffic control scope constants from drive.h
vcharraut May 26, 2026
8d8cd81
Refactor code structure for improved readability and maintainability
vcharraut May 26, 2026
0b0b823
Merge branch 'emerge/temp_training' into vcha/update-obs-viz
eugenevinitsky May 26, 2026
0e0dcb8
Match temp_training branch
vcharraut May 26, 2026
4575aba
Merge branch 'emerge/temp_training' into vcha/update-obs-viz
vcharraut May 26, 2026
c3642d0
Merge branch 'emerge/temp_training' into vcha/update-obs-viz
vcharraut May 26, 2026
b01cc57
Merge branch 'vcha/update-obs-viz' into emerge/temp_training-oignons
vcharraut May 27, 2026
f0fad65
Add weights
vcharraut May 27, 2026
d3841f7
Enhance interactive replay metadata with target type and feature counts
vcharraut May 27, 2026
63564c0
Update drive configuration: add target waypoints, adjust scenario len…
vcharraut May 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 14 additions & 44 deletions notebooks/01_observations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,16 @@
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from pufferlib.ocean.drive.drive import Drive\n",
"from pufferlib.ocean.drive import binding\n",
"import pufferlib.viz\n",
"from notebooks.notebook_utils import (\n",
" COEF_NAMES,\n",
" EGO_LABELS,\n",
" make_drive_env,\n",
" notebook_dims,\n",
" random_actions,\n",
" zero_actions,\n",
")\n",
"\n",
"# --- Environment configuration ---\n",
"NUM_AGENTS = 64\n",
Expand All @@ -34,44 +41,15 @@
"COLLISION_BEHAVIOR = 1\n",
"OFFROAD_BEHAVIOR = 1\n",
"SEED = 42\n",
"MAP_DIR = \"../pufferlib/resources/drive/binaries/carla\"\n",
"\n",
"# --- Observation dimensions (configurable) ---\n",
"# --- Observation dimensions ---\n",
"MAX_PARTNERS = 16\n",
"MAX_LANES = 32\n",
"MAX_BOUNDS = 32\n",
"MAX_TRAFFIC = 4\n",
"\n",
"# --- Derived from binding (compile-time) ---\n",
"EGO_DIM = binding.EGO_FEATURES_JERK\n",
"NUM_COEFS = binding.NUM_REWARD_COEFS\n",
"PARTNER_F = binding.PARTNER_FEATURES\n",
"ROAD_F = binding.ROAD_FEATURES\n",
"TRAFFIC_CONTROL_F = binding.TRAFFIC_CONTROL_FEATURES\n",
"NUM_TRAFFIC_CONTROL_TYPES = binding.NUM_TRAFFIC_CONTROL_TYPES\n",
"COEF_NAMES = [\n",
" \"goal_radius\",\n",
" \"collision\",\n",
" \"offroad\",\n",
" \"comfort\",\n",
" \"lane_align\",\n",
" \"lane_center\",\n",
" \"velocity\",\n",
" \"traffic_light\",\n",
" \"center_bias\",\n",
" \"vel_align\",\n",
" \"overspeed\",\n",
" \"timestep\",\n",
" \"reverse\",\n",
" \"throttle\",\n",
" \"steer\",\n",
" \"acc\",\n",
"]\n",
"\n",
"# --- Create environment ---\n",
"env = Drive(\n",
"env, obs, info = make_drive_env(\n",
" num_agents=NUM_AGENTS,\n",
" num_maps=1,\n",
" min_agents_per_env=NUM_AGENTS,\n",
" max_agents_per_env=NUM_AGENTS,\n",
" simulation_mode=SIMULATION_MODE,\n",
Expand All @@ -83,7 +61,6 @@
" reward_conditioning=REWARD_CONDITIONING,\n",
" reward_randomization=REWARD_RANDOMIZATION,\n",
" target_type=TARGET_TYPE,\n",
" map_dir=MAP_DIR,\n",
" collision_behavior=COLLISION_BEHAVIOR,\n",
" offroad_behavior=OFFROAD_BEHAVIOR,\n",
" obs_slots_lane_n=MAX_LANES,\n",
Expand All @@ -92,14 +69,9 @@
" obs_slots_traffic_controls_n=MAX_TRAFFIC,\n",
" seed=SEED,\n",
")\n",
"obs, info = env.reset(seed=SEED)\n",
"globals().update(notebook_dims(env))\n",
"\n",
"# --- Derived from env ---\n",
"MAX_TARGET = env.num_target_waypoints\n",
"TARGET_F = binding.STATIC_TARGET_FEATURES if TARGET_TYPE == \"static\" else binding.DYNAMIC_TARGET_FEATURES\n",
"TARGET_DIM = MAX_TARGET * TARGET_F\n",
"\n",
"print(f\"obs shape: {obs.shape}, dtype: {obs.dtype}\")\n",
"print(f\"env ready: {N} agents, obs={obs.shape}, act_shape={ACT_SHAPE}\")\n",
"print(f\"EGO_DIM={EGO_DIM}, NUM_COEFS={NUM_COEFS}, MAX_PARTNERS={MAX_PARTNERS}, PARTNER_F={PARTNER_F}\")\n",
"print(f\"MAX_LANES={MAX_LANES}, MAX_BOUNDS={MAX_BOUNDS}, ROAD_F={ROAD_F}\")\n",
"print(f\"MAX_TRAFFIC={MAX_TRAFFIC}, TRAFFIC_F={TRAFFIC_CONTROL_F}\")"
Expand All @@ -119,7 +91,7 @@
"outputs": [],
"source": [
"# Take first step so obs are populated\n",
"actions = np.zeros([env.num_agents, 1], dtype=np.int64)\n",
"actions = zero_actions(env)\n",
"\n",
"obs, rew, term, trunc, info = env.step(actions)\n",
"\n",
Expand Down Expand Up @@ -157,7 +129,6 @@
"source": [
"ego, target, partners, lanes, boundaries, traffic = pufferlib.viz.unpack_obs(\n",
" obs[:1],\n",
" dynamics_model=DYNAMICS_MODEL,\n",
" target_type=TARGET_TYPE,\n",
" reward_conditioning=REWARD_CONDITIONING,\n",
" num_target_waypoints=env.num_target_waypoints,\n",
Expand Down Expand Up @@ -398,7 +369,6 @@
"source": [
"img = pufferlib.viz.plot_observation(\n",
" obs[:1],\n",
" dynamics_model=DYNAMICS_MODEL,\n",
" target_type=TARGET_TYPE,\n",
" reward_conditioning=True,\n",
" num_target_waypoints=env.num_target_waypoints,\n",
Expand Down Expand Up @@ -472,7 +442,7 @@
"ego_history = np.zeros((N_STEPS, EGO_DIM))\n",
"\n",
"for t in range(N_STEPS):\n",
" actions = np.zeros([env.num_agents, 1], dtype=np.int64)\n",
" actions = zero_actions(env)\n",
" obs_t, _, _, _, _ = env.step(actions)\n",
" ego_history[t] = obs_t[0, :EGO_DIM]\n",
"\n",
Expand Down
72 changes: 22 additions & 50 deletions notebooks/02_rewards.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,18 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from pufferlib.ocean.drive.drive import Drive\n",
"from pufferlib.ocean.drive import binding\n",
"import pufferlib.viz\n",
"from notebooks.notebook_utils import (\n",
" COEF_NAMES,\n",
" EGO_LABELS,\n",
" make_drive_env,\n",
" notebook_dims,\n",
" random_actions,\n",
" zero_actions,\n",
")\n",
"\n",
"# --- Environment configuration ---\n",
"NUM_AGENTS = 64\n",
Expand All @@ -35,45 +41,15 @@
"COLLISION_BEHAVIOR = 1\n",
"OFFROAD_BEHAVIOR = 1\n",
"SEED = 42\n",
"MAP_DIR = \"../pufferlib/resources/drive/binaries/carla\"\n",
"\n",
"# --- Observation dimensions (configurable) ---\n",
"# --- Observation dimensions ---\n",
"MAX_PARTNERS = 16\n",
"MAX_LANES = 32\n",
"MAX_BOUNDS = 32\n",
"MAX_TRAFFIC = 10\n",
"MAX_STOP_SIGNS = 0\n",
"\n",
"# --- Derived from binding (compile-time) ---\n",
"EGO_DIM = binding.EGO_FEATURES_JERK\n",
"NUM_COEFS = binding.NUM_REWARD_COEFS\n",
"PARTNER_F = binding.PARTNER_FEATURES\n",
"ROAD_F = binding.ROAD_FEATURES\n",
"TRAFFIC_CONTROL_F = binding.TRAFFIC_CONTROL_FEATURES\n",
"NUM_TRAFFIC_CONTROL_TYPES = binding.NUM_TRAFFIC_CONTROL_TYPES\n",
"COEF_NAMES = [\n",
" \"goal_radius\",\n",
" \"collision\",\n",
" \"offroad\",\n",
" \"comfort\",\n",
" \"lane_align\",\n",
" \"lane_center\",\n",
" \"velocity\",\n",
" \"traffic_light\",\n",
" \"center_bias\",\n",
" \"vel_align\",\n",
" \"overspeed\",\n",
" \"timestep\",\n",
" \"reverse\",\n",
" \"throttle\",\n",
" \"steer\",\n",
" \"acc\",\n",
"]\n",
"\n",
"# --- Create environment ---\n",
"env = Drive(\n",
"env, obs, info = make_drive_env(\n",
" num_agents=NUM_AGENTS,\n",
" num_maps=1,\n",
" min_agents_per_env=NUM_AGENTS,\n",
" max_agents_per_env=NUM_AGENTS,\n",
" simulation_mode=SIMULATION_MODE,\n",
Expand All @@ -85,25 +61,20 @@
" reward_conditioning=REWARD_CONDITIONING,\n",
" reward_randomization=REWARD_RANDOMIZATION,\n",
" target_type=TARGET_TYPE,\n",
" map_dir=MAP_DIR,\n",
" collision_behavior=COLLISION_BEHAVIOR,\n",
" offroad_behavior=OFFROAD_BEHAVIOR,\n",
" obs_slots_lane_n=MAX_LANES,\n",
" obs_slots_boundary_n=MAX_BOUNDS,\n",
" obs_slots_partners_n=MAX_PARTNERS,\n",
" obs_slots_traffic_controls_n=MAX_TRAFFIC,\n",
" seed=SEED,\n",
")\n",
"obs, info = env.reset(seed=SEED)\n",
"\n",
"# --- Derived from env ---\n",
"MAX_TARGET = env.num_target_waypoints\n",
"TARGET_F = binding.STATIC_TARGET_FEATURES if TARGET_TYPE == \"static\" else binding.DYNAMIC_TARGET_FEATURES\n",
"TARGET_DIM = MAX_TARGET * TARGET_F\n",
"N_ACTIONS = 12\n",
"N = env.num_agents\n",
"ACT_SHAPE = (N, len(env.single_action_space.nvec))\n",
"globals().update(notebook_dims(env))\n",
"\n",
"print(f\"env ready: {N} agents, obs={obs.shape}, act_shape={ACT_SHAPE}\")"
"print(f\"env ready: {N} agents, obs={obs.shape}, act_shape={ACT_SHAPE}\")\n",
"print(f\"EGO_DIM={EGO_DIM}, NUM_COEFS={NUM_COEFS}, MAX_PARTNERS={MAX_PARTNERS}, PARTNER_F={PARTNER_F}\")\n",
"print(f\"MAX_LANES={MAX_LANES}, MAX_BOUNDS={MAX_BOUNDS}, ROAD_F={ROAD_F}\")\n",
"print(f\"MAX_TRAFFIC={MAX_TRAFFIC}, TRAFFIC_F={TRAFFIC_CONTROL_F}\")"
]
},
{
Expand All @@ -119,7 +90,7 @@
"metadata": {},
"outputs": [],
"source": [
"actions = np.zeros(ACT_SHAPE, dtype=np.int64)\n",
"actions = zero_actions(env)\n",
"obs, rew, term, trunc, info = env.step(actions)\n",
"\n",
"print(f\"reward shape: {rew.shape}\")\n",
Expand Down Expand Up @@ -155,7 +126,7 @@
"terms_history = np.zeros((N_STEPS, N))\n",
"\n",
"for t in range(N_STEPS):\n",
" actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
" actions = random_actions(env)\n",
" obs, rew, term, trunc, info = env.step(actions)\n",
" rewards_history[t] = rew\n",
" terms_history[t] = term\n",
Expand Down Expand Up @@ -228,7 +199,7 @@
"term_rewards, trunc_rewards = [], []\n",
"\n",
"for t in range(N_STEPS):\n",
" actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
" actions = random_actions(env)\n",
" obs, rew, term, trunc, info = env.step(actions)\n",
" for i in range(N):\n",
" if term[i]:\n",
Expand Down Expand Up @@ -279,11 +250,12 @@
"\n",
"for t in range(N_STEPS):\n",
" prev_obs = obs.copy()\n",
" actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
" actions = random_actions(env)\n",
" obs, rew, term, trunc, info = env.step(actions)\n",
" for i in range(N):\n",
" if rew[i] >= 0.5:\n",
" goal_dist = np.sqrt(prev_obs[i, 0] ** 2 + prev_obs[i, 1] ** 2)\n",
" target_start = EGO_DIM + NUM_COEFS\n",
" goal_dist = np.sqrt(prev_obs[i, target_start] ** 2 + prev_obs[i, target_start + 1] ** 2)\n",
" goal_events.append((t, i, rew[i], goal_dist))\n",
"\n",
"print(f\"Goal-like events (reward >= 0.5): {len(goal_events)}\")\n",
Expand Down
66 changes: 19 additions & 47 deletions notebooks/03_metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,16 @@
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from pufferlib.ocean.drive.drive import Drive\n",
"from pufferlib.ocean.drive import binding\n",
"import pufferlib.viz\n",
"from notebooks.notebook_utils import (\n",
" COEF_NAMES,\n",
" EGO_LABELS,\n",
" make_drive_env,\n",
" notebook_dims,\n",
" random_actions,\n",
" zero_actions,\n",
")\n",
"\n",
"# --- Environment configuration ---\n",
"NUM_AGENTS = 64\n",
Expand All @@ -34,45 +41,15 @@
"COLLISION_BEHAVIOR = 1\n",
"OFFROAD_BEHAVIOR = 1\n",
"SEED = 42\n",
"MAP_DIR = \"../pufferlib/resources/drive/binaries/carla\"\n",
"\n",
"# --- Observation dimensions (configurable) ---\n",
"# --- Observation dimensions ---\n",
"MAX_PARTNERS = 16\n",
"MAX_LANES = 32\n",
"MAX_BOUNDS = 32\n",
"MAX_TRAFFIC = 10\n",
"MAX_STOP_SIGNS = 0\n",
"\n",
"# --- Derived from binding (compile-time) ---\n",
"EGO_DIM = binding.EGO_FEATURES_JERK\n",
"NUM_COEFS = binding.NUM_REWARD_COEFS\n",
"PARTNER_F = binding.PARTNER_FEATURES\n",
"ROAD_F = binding.ROAD_FEATURES\n",
"TRAFFIC_CONTROL_F = binding.TRAFFIC_CONTROL_FEATURES\n",
"NUM_TRAFFIC_CONTROL_TYPES = binding.NUM_TRAFFIC_CONTROL_TYPES\n",
"COEF_NAMES = [\n",
" \"goal_radius\",\n",
" \"collision\",\n",
" \"offroad\",\n",
" \"comfort\",\n",
" \"lane_align\",\n",
" \"lane_center\",\n",
" \"velocity\",\n",
" \"traffic_light\",\n",
" \"center_bias\",\n",
" \"vel_align\",\n",
" \"overspeed\",\n",
" \"timestep\",\n",
" \"reverse\",\n",
" \"throttle\",\n",
" \"steer\",\n",
" \"acc\",\n",
"]\n",
"\n",
"# --- Create environment ---\n",
"env = Drive(\n",
"env, obs, info = make_drive_env(\n",
" num_agents=NUM_AGENTS,\n",
" num_maps=1,\n",
" min_agents_per_env=NUM_AGENTS,\n",
" max_agents_per_env=NUM_AGENTS,\n",
" simulation_mode=SIMULATION_MODE,\n",
Expand All @@ -84,25 +61,20 @@
" reward_conditioning=REWARD_CONDITIONING,\n",
" reward_randomization=REWARD_RANDOMIZATION,\n",
" target_type=TARGET_TYPE,\n",
" map_dir=MAP_DIR,\n",
" collision_behavior=COLLISION_BEHAVIOR,\n",
" offroad_behavior=OFFROAD_BEHAVIOR,\n",
" obs_slots_lane_n=MAX_LANES,\n",
" obs_slots_boundary_n=MAX_BOUNDS,\n",
" obs_slots_partners_n=MAX_PARTNERS,\n",
" obs_slots_traffic_controls_n=MAX_TRAFFIC,\n",
" seed=SEED,\n",
")\n",
"obs, info = env.reset(seed=SEED)\n",
"\n",
"# --- Derived from env ---\n",
"MAX_TARGET = env.num_target_waypoints\n",
"TARGET_F = binding.STATIC_TARGET_FEATURES if TARGET_TYPE == \"static\" else binding.DYNAMIC_TARGET_FEATURES\n",
"TARGET_DIM = MAX_TARGET * TARGET_F\n",
"N_ACTIONS = 12\n",
"N = env.num_agents\n",
"ACT_SHAPE = (N, len(env.single_action_space.nvec))\n",
"globals().update(notebook_dims(env))\n",
"\n",
"print(f\"env ready: {N} agents, act_shape={ACT_SHAPE}\")"
"print(f\"env ready: {N} agents, obs={obs.shape}, act_shape={ACT_SHAPE}\")\n",
"print(f\"EGO_DIM={EGO_DIM}, NUM_COEFS={NUM_COEFS}, MAX_PARTNERS={MAX_PARTNERS}, PARTNER_F={PARTNER_F}\")\n",
"print(f\"MAX_LANES={MAX_LANES}, MAX_BOUNDS={MAX_BOUNDS}, ROAD_F={ROAD_F}\")\n",
"print(f\"MAX_TRAFFIC={MAX_TRAFFIC}, TRAFFIC_F={TRAFFIC_CONTROL_F}\")"
]
},
{
Expand All @@ -119,7 +91,7 @@
"outputs": [],
"source": [
"for _ in range(10):\n",
" actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
" actions = random_actions(env)\n",
" obs, rew, term, trunc, info = env.step(actions)\n",
"\n",
"log = binding.vec_log(env.c_envs, N)\n",
Expand Down Expand Up @@ -152,7 +124,7 @@
"all_truncs = np.zeros((N_STEPS, N))\n",
"\n",
"for t in range(N_STEPS):\n",
" actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
" actions = random_actions(env)\n",
" obs, rew, term, trunc, info = env.step(actions)\n",
" all_rewards[t] = rew\n",
" all_terms[t] = term\n",
Expand Down Expand Up @@ -242,7 +214,7 @@
"xy_history = np.zeros((TRACK_STEPS, TRACK_AGENTS, 2))\n",
"\n",
"for t in range(TRACK_STEPS):\n",
" actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
" actions = random_actions(env)\n",
" env.step(actions)\n",
" states = env.get_global_agent_state()\n",
" for i in range(TRACK_AGENTS):\n",
Expand Down
Loading
Loading