diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fd48acc..9648fe4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/ambv/black - rev: stable + rev: 23.12.1 hooks: - id: black language_version: python3.8 diff --git a/minihack/envs/boxohack.py b/minihack/envs/boxohack.py index 4a3535a..bb4f3e6 100644 --- a/minihack/envs/boxohack.py +++ b/minihack/envs/boxohack.py @@ -6,9 +6,12 @@ from importlib_resources import files from nle import nethack from minihack.envs import register -from minihack import LevelGenerator, MiniHackNavigation +from minihack.level_generator import LevelGenerator +from minihack.navigation import MiniHackNavigation -LEVELS_PATH = files("minihack.dat").joinpath("boxoban-levels-master") +LEVELS_PATH = ( + files("minihack").joinpath("dat").joinpath("boxoban-levels-master") +) # The agent can only move towards 4 cardinal directions (instead of default 8) MOVE_ACTIONS = tuple(nethack.CompassCardinalDirection) @@ -43,11 +46,11 @@ def __init__(self, *args, **kwargs): self._flags = tuple(kwargs.pop("flags", [])) try: self._levels = load_boxoban_levels(cur_levels_path) - except FileNotFoundError: + except FileNotFoundError as e: raise ModuleNotFoundError( "To use Boxoban environments, please download maps using " "the minihack/scripts/download_boxoban_levels.py script." - ) + ) from e self._reward_shaping_coefficient = kwargs.pop( "reward_shaping_coefficient", 0 @@ -56,6 +59,7 @@ def __init__(self, *args, **kwargs): super().__init__( *args, des_file=self.get_lvl_gen().get_des(), **kwargs ) + self._goal_pos_set = None def get_env_map(self, level): info = {"fountains": [], "boulders": []} @@ -90,14 +94,18 @@ def get_lvl_gen(self): lvl_gen.set_start_pos(info["player"]) return lvl_gen - def reset(self, options=dict(wizkit_items=None)): + def reset(self, *args, **kwargs): self.update(self.get_lvl_gen().get_des()) - initial_obs = super().reset(options=options) + if "options" not in kwargs: + kwargs["options"] = dict(wizkit_items=None) + initial_obs = super().reset(*args, **kwargs) self._goal_pos_set = self._object_positions(self.last_observation, "{") return initial_obs def _is_episode_end(self, observation): # If every boulder is on a fountain, we're done + if self._goal_pos_set is None: + return self.StepStatus.RUNNING if self._goal_pos_set == self._object_positions(observation, "`"): return self.StepStatus.TASK_SUCCESSFUL else: diff --git a/minihack/scripts/download_boxoban_levels.py b/minihack/scripts/download_boxoban_levels.py index b18d56b..2802802 100644 --- a/minihack/scripts/download_boxoban_levels.py +++ b/minihack/scripts/download_boxoban_levels.py @@ -4,7 +4,7 @@ import zipfile from importlib_resources import files -DESTINATION_PATH = files("minihack.dat") +DESTINATION_PATH = files("minihack").joinpath("dat") BOXOBAN_REPO_URL = ( "https://github.com/deepmind/boxoban-levels/archive/refs/heads/master.zip" ) @@ -14,7 +14,7 @@ def download_boxoban_levels(): print("Downloading Boxoban levels...") os.system( f"wget -c --read-timeout=5 --tries=0 " - f'"{BOXOBAN_REPO_URL}" -P {DESTINATION_PATH}' + f"{BOXOBAN_REPO_URL!r} -P {DESTINATION_PATH}" ) print("Boxoban levels downloaded, unpacking...") diff --git a/minihack/tests/test_envs.py b/minihack/tests/test_envs.py index 29d1ed9..f27a7a3 100644 --- a/minihack/tests/test_envs.py +++ b/minihack/tests/test_envs.py @@ -6,6 +6,7 @@ import sys import tempfile +import shutil import gymnasium as gym import numpy as np import pytest @@ -14,6 +15,9 @@ import nle from nle import nethack +from minihack.scripts.download_boxoban_levels import download_boxoban_levels +from minihack.envs.boxohack import LEVELS_PATH + def get_minihack_env_ids(): specs = gym.envs.registry.keys() @@ -163,7 +167,8 @@ def test_rollout(self, env_name, rollout_len): assert os.path.exists( os.path.join( savedir, - "nle.%i.0.ttyrec%i.bz2" % (os.getpid(), nethack.TTYREC_VERSION), + "nle.%i.0.ttyrec%i.bz2" + % (os.getpid(), nethack.TTYREC_VERSION), ) ) assert os.path.exists( @@ -290,12 +295,16 @@ def test_reward(self, env): _, _ = env.reset() for _ in range(4): - _, reward, done, _, _ = env.step(env.unwrapped.actions.index(ord("j"))) + _, reward, done, _, _ = env.step( + env.unwrapped.actions.index(ord("j")) + ) assert reward == 0.0 assert not done for _ in range(3): - _, reward, done, _, _ = env.step(env.unwrapped.actions.index(ord("l"))) + _, reward, done, _, _ = env.step( + env.unwrapped.actions.index(ord("l")) + ) assert reward == 0.0 assert not done @@ -304,3 +313,32 @@ def test_reward(self, env): assert done assert reward == 1.0 + + +@pytest.mark.parametrize( + "env_name", + [ + "MiniHack-Boxoban-Unfiltered-v0", + "MiniHack-Boxoban-Medium-v0", + "MiniHack-Boxoban-Hard-v0", + ], +) +class TestBoxoban: + @pytest.fixture(autouse=True) + def setup_boxoban_levels(self): + self.levels_downloaded = False + if not os.path.isdir(LEVELS_PATH): + download_boxoban_levels() + self.levels_downloaded = True + + yield + + if self.levels_downloaded and os.path.isdir(LEVELS_PATH): + shutil.rmtree(LEVELS_PATH) + + def test_boxoban_init(self, env_name): + env = gym.make(env_name) + env.reset() + for _ in range(5): + env.step(env.action_space.sample()) + env.close()