Emerge-Lab · eugenevinitsky · May 22, 2026 · May 24, 2026 · May 24, 2026 · May 24, 2026
diff --git a/notebooks/01_observations.ipynb b/notebooks/01_observations.ipynb
@@ -16,9 +16,16 @@
    "source": [
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
-    "from pufferlib.ocean.drive.drive import Drive\n",
     "from pufferlib.ocean.drive import binding\n",
     "import pufferlib.viz\n",
+    "from notebooks.notebook_utils import (\n",
+    "    COEF_NAMES,\n",
+    "    EGO_LABELS,\n",
+    "    make_drive_env,\n",
+    "    notebook_dims,\n",
+    "    random_actions,\n",
+    "    zero_actions,\n",
+    ")\n",
     "\n",
     "# --- Environment configuration ---\n",
     "NUM_AGENTS = 64\n",
@@ -34,44 +41,15 @@
     "COLLISION_BEHAVIOR = 1\n",
     "OFFROAD_BEHAVIOR = 1\n",
     "SEED = 42\n",
-    "MAP_DIR = \"../pufferlib/resources/drive/binaries/carla\"\n",
     "\n",
-    "# --- Observation dimensions (configurable) ---\n",
+    "# --- Observation dimensions ---\n",
     "MAX_PARTNERS = 16\n",
     "MAX_LANES = 32\n",
     "MAX_BOUNDS = 32\n",
     "MAX_TRAFFIC = 4\n",
     "\n",
-    "# --- Derived from binding (compile-time) ---\n",
-    "EGO_DIM = binding.EGO_FEATURES_JERK\n",
-    "NUM_COEFS = binding.NUM_REWARD_COEFS\n",
-    "PARTNER_F = binding.PARTNER_FEATURES\n",
-    "ROAD_F = binding.ROAD_FEATURES\n",
-    "TRAFFIC_CONTROL_F = binding.TRAFFIC_CONTROL_FEATURES\n",
-    "NUM_TRAFFIC_CONTROL_TYPES = binding.NUM_TRAFFIC_CONTROL_TYPES\n",
-    "COEF_NAMES = [\n",
-    "    \"goal_radius\",\n",
-    "    \"collision\",\n",
-    "    \"offroad\",\n",
-    "    \"comfort\",\n",
-    "    \"lane_align\",\n",
-    "    \"lane_center\",\n",
-    "    \"velocity\",\n",
-    "    \"traffic_light\",\n",
-    "    \"center_bias\",\n",
-    "    \"vel_align\",\n",
-    "    \"overspeed\",\n",
-    "    \"timestep\",\n",
-    "    \"reverse\",\n",
-    "    \"throttle\",\n",
-    "    \"steer\",\n",
-    "    \"acc\",\n",
-    "]\n",
-    "\n",
-    "# --- Create environment ---\n",
-    "env = Drive(\n",
+    "env, obs, info = make_drive_env(\n",
     "    num_agents=NUM_AGENTS,\n",
-    "    num_maps=1,\n",
     "    min_agents_per_env=NUM_AGENTS,\n",
     "    max_agents_per_env=NUM_AGENTS,\n",
     "    simulation_mode=SIMULATION_MODE,\n",
@@ -83,7 +61,6 @@
     "    reward_conditioning=REWARD_CONDITIONING,\n",
     "    reward_randomization=REWARD_RANDOMIZATION,\n",
     "    target_type=TARGET_TYPE,\n",
-    "    map_dir=MAP_DIR,\n",
     "    collision_behavior=COLLISION_BEHAVIOR,\n",
     "    offroad_behavior=OFFROAD_BEHAVIOR,\n",
     "    obs_slots_lane_n=MAX_LANES,\n",
@@ -92,14 +69,9 @@
     "    obs_slots_traffic_controls_n=MAX_TRAFFIC,\n",
     "    seed=SEED,\n",
     ")\n",
-    "obs, info = env.reset(seed=SEED)\n",
+    "globals().update(notebook_dims(env))\n",
     "\n",
-    "# --- Derived from env ---\n",
-    "MAX_TARGET = env.num_target_waypoints\n",
-    "TARGET_F = binding.STATIC_TARGET_FEATURES if TARGET_TYPE == \"static\" else binding.DYNAMIC_TARGET_FEATURES\n",
-    "TARGET_DIM = MAX_TARGET * TARGET_F\n",
-    "\n",
-    "print(f\"obs shape: {obs.shape}, dtype: {obs.dtype}\")\n",
+    "print(f\"env ready: {N} agents, obs={obs.shape}, act_shape={ACT_SHAPE}\")\n",
     "print(f\"EGO_DIM={EGO_DIM}, NUM_COEFS={NUM_COEFS}, MAX_PARTNERS={MAX_PARTNERS}, PARTNER_F={PARTNER_F}\")\n",
     "print(f\"MAX_LANES={MAX_LANES}, MAX_BOUNDS={MAX_BOUNDS}, ROAD_F={ROAD_F}\")\n",
     "print(f\"MAX_TRAFFIC={MAX_TRAFFIC}, TRAFFIC_F={TRAFFIC_CONTROL_F}\")"
@@ -119,7 +91,7 @@
    "outputs": [],
    "source": [
     "# Take first step so obs are populated\n",
-    "actions = np.zeros([env.num_agents, 1], dtype=np.int64)\n",
+    "actions = zero_actions(env)\n",
     "\n",
     "obs, rew, term, trunc, info = env.step(actions)\n",
     "\n",
@@ -157,7 +129,6 @@
    "source": [
     "ego, target, partners, lanes, boundaries, traffic = pufferlib.viz.unpack_obs(\n",
     "    obs[:1],\n",
-    "    dynamics_model=DYNAMICS_MODEL,\n",
     "    target_type=TARGET_TYPE,\n",
     "    reward_conditioning=REWARD_CONDITIONING,\n",
     "    num_target_waypoints=env.num_target_waypoints,\n",
@@ -398,7 +369,6 @@
    "source": [
     "img = pufferlib.viz.plot_observation(\n",
     "    obs[:1],\n",
-    "    dynamics_model=DYNAMICS_MODEL,\n",
     "    target_type=TARGET_TYPE,\n",
     "    reward_conditioning=True,\n",
     "    num_target_waypoints=env.num_target_waypoints,\n",
@@ -472,7 +442,7 @@
     "ego_history = np.zeros((N_STEPS, EGO_DIM))\n",
     "\n",
     "for t in range(N_STEPS):\n",
-    "    actions = np.zeros([env.num_agents, 1], dtype=np.int64)\n",
+    "    actions = zero_actions(env)\n",
     "    obs_t, _, _, _, _ = env.step(actions)\n",
     "    ego_history[t] = obs_t[0, :EGO_DIM]\n",
     "\n",

diff --git a/notebooks/02_rewards.ipynb b/notebooks/02_rewards.ipynb
@@ -14,12 +14,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
-    "from pufferlib.ocean.drive.drive import Drive\n",
     "from pufferlib.ocean.drive import binding\n",
     "import pufferlib.viz\n",
+    "from notebooks.notebook_utils import (\n",
+    "    COEF_NAMES,\n",
+    "    EGO_LABELS,\n",
+    "    make_drive_env,\n",
+    "    notebook_dims,\n",
+    "    random_actions,\n",
+    "    zero_actions,\n",
+    ")\n",
     "\n",
     "# --- Environment configuration ---\n",
     "NUM_AGENTS = 64\n",
@@ -35,45 +41,15 @@
     "COLLISION_BEHAVIOR = 1\n",
     "OFFROAD_BEHAVIOR = 1\n",
     "SEED = 42\n",
-    "MAP_DIR = \"../pufferlib/resources/drive/binaries/carla\"\n",
     "\n",
-    "# --- Observation dimensions (configurable) ---\n",
+    "# --- Observation dimensions ---\n",
     "MAX_PARTNERS = 16\n",
     "MAX_LANES = 32\n",
     "MAX_BOUNDS = 32\n",
     "MAX_TRAFFIC = 10\n",
-    "MAX_STOP_SIGNS = 0\n",
-    "\n",
-    "# --- Derived from binding (compile-time) ---\n",
-    "EGO_DIM = binding.EGO_FEATURES_JERK\n",
-    "NUM_COEFS = binding.NUM_REWARD_COEFS\n",
-    "PARTNER_F = binding.PARTNER_FEATURES\n",
-    "ROAD_F = binding.ROAD_FEATURES\n",
-    "TRAFFIC_CONTROL_F = binding.TRAFFIC_CONTROL_FEATURES\n",
-    "NUM_TRAFFIC_CONTROL_TYPES = binding.NUM_TRAFFIC_CONTROL_TYPES\n",
-    "COEF_NAMES = [\n",
-    "    \"goal_radius\",\n",
-    "    \"collision\",\n",
-    "    \"offroad\",\n",
-    "    \"comfort\",\n",
-    "    \"lane_align\",\n",
-    "    \"lane_center\",\n",
-    "    \"velocity\",\n",
-    "    \"traffic_light\",\n",
-    "    \"center_bias\",\n",
-    "    \"vel_align\",\n",
-    "    \"overspeed\",\n",
-    "    \"timestep\",\n",
-    "    \"reverse\",\n",
-    "    \"throttle\",\n",
-    "    \"steer\",\n",
-    "    \"acc\",\n",
-    "]\n",
     "\n",
-    "# --- Create environment ---\n",
-    "env = Drive(\n",
+    "env, obs, info = make_drive_env(\n",
     "    num_agents=NUM_AGENTS,\n",
-    "    num_maps=1,\n",
     "    min_agents_per_env=NUM_AGENTS,\n",
     "    max_agents_per_env=NUM_AGENTS,\n",
     "    simulation_mode=SIMULATION_MODE,\n",
@@ -85,25 +61,20 @@
     "    reward_conditioning=REWARD_CONDITIONING,\n",
     "    reward_randomization=REWARD_RANDOMIZATION,\n",
     "    target_type=TARGET_TYPE,\n",
-    "    map_dir=MAP_DIR,\n",
     "    collision_behavior=COLLISION_BEHAVIOR,\n",
     "    offroad_behavior=OFFROAD_BEHAVIOR,\n",
     "    obs_slots_lane_n=MAX_LANES,\n",
     "    obs_slots_boundary_n=MAX_BOUNDS,\n",
     "    obs_slots_partners_n=MAX_PARTNERS,\n",
+    "    obs_slots_traffic_controls_n=MAX_TRAFFIC,\n",
     "    seed=SEED,\n",
     ")\n",
-    "obs, info = env.reset(seed=SEED)\n",
-    "\n",
-    "# --- Derived from env ---\n",
-    "MAX_TARGET = env.num_target_waypoints\n",
-    "TARGET_F = binding.STATIC_TARGET_FEATURES if TARGET_TYPE == \"static\" else binding.DYNAMIC_TARGET_FEATURES\n",
-    "TARGET_DIM = MAX_TARGET * TARGET_F\n",
-    "N_ACTIONS = 12\n",
-    "N = env.num_agents\n",
-    "ACT_SHAPE = (N, len(env.single_action_space.nvec))\n",
+    "globals().update(notebook_dims(env))\n",
     "\n",
-    "print(f\"env ready: {N} agents, obs={obs.shape}, act_shape={ACT_SHAPE}\")"
+    "print(f\"env ready: {N} agents, obs={obs.shape}, act_shape={ACT_SHAPE}\")\n",
+    "print(f\"EGO_DIM={EGO_DIM}, NUM_COEFS={NUM_COEFS}, MAX_PARTNERS={MAX_PARTNERS}, PARTNER_F={PARTNER_F}\")\n",
+    "print(f\"MAX_LANES={MAX_LANES}, MAX_BOUNDS={MAX_BOUNDS}, ROAD_F={ROAD_F}\")\n",
+    "print(f\"MAX_TRAFFIC={MAX_TRAFFIC}, TRAFFIC_F={TRAFFIC_CONTROL_F}\")"
    ]
   },
   {
@@ -119,7 +90,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "actions = np.zeros(ACT_SHAPE, dtype=np.int64)\n",
+    "actions = zero_actions(env)\n",
     "obs, rew, term, trunc, info = env.step(actions)\n",
     "\n",
     "print(f\"reward shape: {rew.shape}\")\n",
@@ -155,7 +126,7 @@
     "terms_history = np.zeros((N_STEPS, N))\n",
     "\n",
     "for t in range(N_STEPS):\n",
-    "    actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
+    "    actions = random_actions(env)\n",
     "    obs, rew, term, trunc, info = env.step(actions)\n",
     "    rewards_history[t] = rew\n",
     "    terms_history[t] = term\n",
@@ -228,7 +199,7 @@
     "term_rewards, trunc_rewards = [], []\n",
     "\n",
     "for t in range(N_STEPS):\n",
-    "    actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
+    "    actions = random_actions(env)\n",
     "    obs, rew, term, trunc, info = env.step(actions)\n",
     "    for i in range(N):\n",
     "        if term[i]:\n",
@@ -279,11 +250,12 @@
     "\n",
     "for t in range(N_STEPS):\n",
     "    prev_obs = obs.copy()\n",
-    "    actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
+    "    actions = random_actions(env)\n",
     "    obs, rew, term, trunc, info = env.step(actions)\n",
     "    for i in range(N):\n",
     "        if rew[i] >= 0.5:\n",
-    "            goal_dist = np.sqrt(prev_obs[i, 0] ** 2 + prev_obs[i, 1] ** 2)\n",
+    "            target_start = EGO_DIM + NUM_COEFS\n",
+    "            goal_dist = np.sqrt(prev_obs[i, target_start] ** 2 + prev_obs[i, target_start + 1] ** 2)\n",
     "            goal_events.append((t, i, rew[i], goal_dist))\n",
     "\n",
     "print(f\"Goal-like events (reward >= 0.5): {len(goal_events)}\")\n",

diff --git a/notebooks/03_metrics.ipynb b/notebooks/03_metrics.ipynb
@@ -16,9 +16,16 @@
    "source": [
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
-    "from pufferlib.ocean.drive.drive import Drive\n",
     "from pufferlib.ocean.drive import binding\n",
     "import pufferlib.viz\n",
+    "from notebooks.notebook_utils import (\n",
+    "    COEF_NAMES,\n",
+    "    EGO_LABELS,\n",
+    "    make_drive_env,\n",
+    "    notebook_dims,\n",
+    "    random_actions,\n",
+    "    zero_actions,\n",
+    ")\n",
     "\n",
     "# --- Environment configuration ---\n",
     "NUM_AGENTS = 64\n",
@@ -34,45 +41,15 @@
     "COLLISION_BEHAVIOR = 1\n",
     "OFFROAD_BEHAVIOR = 1\n",
     "SEED = 42\n",
-    "MAP_DIR = \"../pufferlib/resources/drive/binaries/carla\"\n",
     "\n",
-    "# --- Observation dimensions (configurable) ---\n",
+    "# --- Observation dimensions ---\n",
     "MAX_PARTNERS = 16\n",
     "MAX_LANES = 32\n",
     "MAX_BOUNDS = 32\n",
     "MAX_TRAFFIC = 10\n",
-    "MAX_STOP_SIGNS = 0\n",
-    "\n",
-    "# --- Derived from binding (compile-time) ---\n",
-    "EGO_DIM = binding.EGO_FEATURES_JERK\n",
-    "NUM_COEFS = binding.NUM_REWARD_COEFS\n",
-    "PARTNER_F = binding.PARTNER_FEATURES\n",
-    "ROAD_F = binding.ROAD_FEATURES\n",
-    "TRAFFIC_CONTROL_F = binding.TRAFFIC_CONTROL_FEATURES\n",
-    "NUM_TRAFFIC_CONTROL_TYPES = binding.NUM_TRAFFIC_CONTROL_TYPES\n",
-    "COEF_NAMES = [\n",
-    "    \"goal_radius\",\n",
-    "    \"collision\",\n",
-    "    \"offroad\",\n",
-    "    \"comfort\",\n",
-    "    \"lane_align\",\n",
-    "    \"lane_center\",\n",
-    "    \"velocity\",\n",
-    "    \"traffic_light\",\n",
-    "    \"center_bias\",\n",
-    "    \"vel_align\",\n",
-    "    \"overspeed\",\n",
-    "    \"timestep\",\n",
-    "    \"reverse\",\n",
-    "    \"throttle\",\n",
-    "    \"steer\",\n",
-    "    \"acc\",\n",
-    "]\n",
     "\n",
-    "# --- Create environment ---\n",
-    "env = Drive(\n",
+    "env, obs, info = make_drive_env(\n",
     "    num_agents=NUM_AGENTS,\n",
-    "    num_maps=1,\n",
     "    min_agents_per_env=NUM_AGENTS,\n",
     "    max_agents_per_env=NUM_AGENTS,\n",
     "    simulation_mode=SIMULATION_MODE,\n",
@@ -84,25 +61,20 @@
     "    reward_conditioning=REWARD_CONDITIONING,\n",
     "    reward_randomization=REWARD_RANDOMIZATION,\n",
     "    target_type=TARGET_TYPE,\n",
-    "    map_dir=MAP_DIR,\n",
     "    collision_behavior=COLLISION_BEHAVIOR,\n",
     "    offroad_behavior=OFFROAD_BEHAVIOR,\n",
     "    obs_slots_lane_n=MAX_LANES,\n",
     "    obs_slots_boundary_n=MAX_BOUNDS,\n",
     "    obs_slots_partners_n=MAX_PARTNERS,\n",
+    "    obs_slots_traffic_controls_n=MAX_TRAFFIC,\n",
     "    seed=SEED,\n",
     ")\n",
-    "obs, info = env.reset(seed=SEED)\n",
-    "\n",
-    "# --- Derived from env ---\n",
-    "MAX_TARGET = env.num_target_waypoints\n",
-    "TARGET_F = binding.STATIC_TARGET_FEATURES if TARGET_TYPE == \"static\" else binding.DYNAMIC_TARGET_FEATURES\n",
-    "TARGET_DIM = MAX_TARGET * TARGET_F\n",
-    "N_ACTIONS = 12\n",
-    "N = env.num_agents\n",
-    "ACT_SHAPE = (N, len(env.single_action_space.nvec))\n",
+    "globals().update(notebook_dims(env))\n",
     "\n",
-    "print(f\"env ready: {N} agents, act_shape={ACT_SHAPE}\")"
+    "print(f\"env ready: {N} agents, obs={obs.shape}, act_shape={ACT_SHAPE}\")\n",
+    "print(f\"EGO_DIM={EGO_DIM}, NUM_COEFS={NUM_COEFS}, MAX_PARTNERS={MAX_PARTNERS}, PARTNER_F={PARTNER_F}\")\n",
+    "print(f\"MAX_LANES={MAX_LANES}, MAX_BOUNDS={MAX_BOUNDS}, ROAD_F={ROAD_F}\")\n",
+    "print(f\"MAX_TRAFFIC={MAX_TRAFFIC}, TRAFFIC_F={TRAFFIC_CONTROL_F}\")"
    ]
   },
   {
@@ -119,7 +91,7 @@
    "outputs": [],
    "source": [
     "for _ in range(10):\n",
-    "    actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
+    "    actions = random_actions(env)\n",
     "    obs, rew, term, trunc, info = env.step(actions)\n",
     "\n",
     "log = binding.vec_log(env.c_envs, N)\n",
@@ -152,7 +124,7 @@
     "all_truncs = np.zeros((N_STEPS, N))\n",
     "\n",
     "for t in range(N_STEPS):\n",
-    "    actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
+    "    actions = random_actions(env)\n",
     "    obs, rew, term, trunc, info = env.step(actions)\n",
     "    all_rewards[t] = rew\n",
     "    all_terms[t] = term\n",
@@ -242,7 +214,7 @@
     "xy_history = np.zeros((TRACK_STEPS, TRACK_AGENTS, 2))\n",
     "\n",
     "for t in range(TRACK_STEPS):\n",
-    "    actions = np.random.randint(0, N_ACTIONS, size=ACT_SHAPE)\n",
+    "    actions = random_actions(env)\n",
     "    env.step(actions)\n",
     "    states = env.get_global_agent_state()\n",
     "    for i in range(TRACK_AGENTS):\n",