diff --git a/predicators/approaches/bilevel_planning_approach.py b/predicators/approaches/bilevel_planning_approach.py
index cb8ca38347..4f67894bad 100644
--- a/predicators/approaches/bilevel_planning_approach.py
+++ b/predicators/approaches/bilevel_planning_approach.py
@@ -72,6 +72,11 @@ def _solve(self, task: Task, timeout: int) -> Callable[[State], Action]:
             for act in nsrt_plan:
                 logging.debug(act)
 
+            for act in nsrt_plan:
+                print(act)
+            import ipdb
+            ipdb.set_trace()
+
         # Run full bilevel planning.
         else:
             option_plan, nsrt_plan, metrics = self._run_sesame_plan(
diff --git a/predicators/approaches/documentation/grammar_search_invention_approach.md b/predicators/approaches/documentation/grammar_search_invention_approach.md
index 7d629f0d78..c527e747c0 100644
--- a/predicators/approaches/documentation/grammar_search_invention_approach.md
+++ b/predicators/approaches/documentation/grammar_search_invention_approach.md
@@ -62,7 +62,7 @@ apple_coring__vlm_demos__456__2
     | options.txt
 
 ### Running predicate invention using these image demos
-To use the Gemini VLM, you need to set the `GOOGLE_API_KEY` environment variable in your terminal. You can make/get an API key [here](https://aistudio.google.com/app/apikey).
+To use the Gemini VLM, you need to set the `GOOGLE_API_KEY` environment variable in your terminal. You can make/get an API key [here](https://aistudio.google.com/app/apikey). To use an Open AI VLM, the `OPENAI_API_KEY` variable needs to be set.
 
 Example command: `python predicators/main.py --env apple_coring --seed 456 --approach grammar_search_invention --excluded_predicates all --num_train_tasks 1 --num_test_tasks 0 --offline_data_method saved_vlm_img_demos_folder --vlm_trajs_folder_name apple_coring__vlm_demos__456__1`
 
diff --git a/predicators/approaches/spot_wrapper_approach.py b/predicators/approaches/spot_wrapper_approach.py
index 85af72d148..ba82a42f92 100644
--- a/predicators/approaches/spot_wrapper_approach.py
+++ b/predicators/approaches/spot_wrapper_approach.py
@@ -18,6 +18,7 @@
 from predicators import utils
 from predicators.approaches import BaseApproach, BaseApproachWrapper
 from predicators.envs.spot_env import get_detection_id_for_object, get_robot
+from predicators.settings import CFG
 from predicators.spot_utils.skills.spot_find_objects import find_objects
 from predicators.spot_utils.skills.spot_stow_arm import stow_arm
 from predicators.spot_utils.utils import get_allowed_map_regions
@@ -66,7 +67,7 @@ def _policy(state: State) -> Action:
                     state.get(obj, "lost") > 0.5:
                     lost_objects.add(obj)
             # Need to find the objects.
-            if lost_objects:
+            if lost_objects and len(CFG.spot_vlm_teleop_demo_folderpath) == 0:
                 logging.info(f"[Spot Wrapper] Lost objects: {lost_objects}")
                 # Reset the base approach policy.
                 base_approach_policy = None
diff --git a/predicators/envs/spot_env.py b/predicators/envs/spot_env.py
index 911ff34dd4..af7a944e14 100644
--- a/predicators/envs/spot_env.py
+++ b/predicators/envs/spot_env.py
@@ -6,7 +6,7 @@
 import time
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Callable, ClassVar, Collection, Dict, Iterator, List, \
+from typing import Any, Callable, ClassVar, Collection, Dict, Iterator, List, \
     Optional, Sequence, Set, Tuple
 
 import matplotlib
@@ -42,7 +42,7 @@
 from predicators.spot_utils.spot_localization import SpotLocalizer
 from predicators.spot_utils.utils import _base_object_type, _broom_type, \
     _container_type, _dustpan_type, _immovable_object_type, \
-    _movable_object_type, _robot_type, _wrappers_type, \
+    _movable_object_type, _robot_type, _table_type, _wrappers_type, \
     construct_state_given_pbrspot, get_allowed_map_regions, \
     get_graph_nav_dir, get_robot_gripper_open_percentage, get_spot_home_pose, \
     load_spot_metadata, object_to_top_down_geom, update_pbrspot_given_state, \
@@ -84,6 +84,9 @@ class _SpotObservation:
     # A placeholder until all predicates have classifiers
     nonpercept_atoms: Set[GroundAtom]
     nonpercept_predicates: Set[Predicate]
+    # Object detections per camera in self.images.
+    object_detections_per_camera: Dict[str, List[Tuple[ObjectDetectionID,
+                                                       SegmentedBoundingBox]]]
 
 
 @dataclass(frozen=True)
@@ -140,10 +143,8 @@ def allclose(self, other: State) -> bool:
 
     def copy(self) -> State:
         state_copy = {o: self._copy_state_value(self.data[o]) for o in self}
-        sim_state_copy = {
-            "predicates": self._simulator_state_predicates.copy(),
-            "atoms": self._simulator_state_atoms.copy()
-        }
+        if self.simulator_state is not None:
+            sim_state_copy = self.simulator_state.copy()
         return _PartialPerceptionState(state_copy,
                                        simulator_state=sim_state_copy)
 
@@ -758,7 +759,18 @@ def _build_realworld_observation(
                                 break
                             if response == "n":
                                 break
-
+        # Construct the detections per camera.
+        obj_detections_per_camera: Dict[str,
+                                        List[Tuple[ObjectDetectionID,
+                                                   SegmentedBoundingBox]]] = {
+                                                       k: []
+                                                       for k in rgbds.keys()
+                                                   }
+        for object_id, d in all_artifacts['language'][
+                'object_id_to_img_detections'].items():
+            for camera_name, seg_bb in d.items():
+                obj_detections_per_camera[camera_name].append(
+                    (object_id, seg_bb))
         # Prepare the non-percepts.
         nonpercept_preds = self.predicates - self.percept_predicates
         assert all(a.predicate in nonpercept_preds for a in ground_atoms)
@@ -766,7 +778,8 @@ def _build_realworld_observation(
                                objects_in_hand_view,
                                objects_in_any_view_except_back,
                                self._spot_object, gripper_open_percentage,
-                               robot_pos, ground_atoms, nonpercept_preds)
+                               robot_pos, ground_atoms, nonpercept_preds,
+                               obj_detections_per_camera)
 
         return obs
 
@@ -857,7 +870,8 @@ def _actively_construct_env_task(self) -> EnvironmentTask:
         # an initial observation.
         assert self._robot is not None
         assert self._localizer is not None
-        objects_in_view = self._actively_construct_initial_object_views()
+        objects_in_view, artifacts = self._actively_construct_initial_object_views(
+        )
         rgbd_images = capture_images(self._robot, self._localizer)
         gripper_open_percentage = get_robot_gripper_open_percentage(
             self._robot)
@@ -866,9 +880,20 @@ def _actively_construct_env_task(self) -> EnvironmentTask:
         nonpercept_atoms = self._get_initial_nonpercept_atoms()
         nonpercept_preds = self.predicates - self.percept_predicates
         assert all(a.predicate in nonpercept_preds for a in nonpercept_atoms)
+        # Construct the detections per camera.
+        obj_detections_per_camera: Dict[str, List[Tuple[
+            ObjectDetectionID,
+            SegmentedBoundingBox]]] = {k: []
+                                       for k in rgbd_images.keys()}
+        for object_id, d in artifacts['language'][
+                'object_id_to_img_detections'].items():
+            for camera_name, seg_bb in d.items():
+                obj_detections_per_camera[camera_name].append(
+                    (object_id, seg_bb))
         obs = _SpotObservation(rgbd_images, objects_in_view, set(), set(),
                                self._spot_object, gripper_open_percentage,
-                               robot_pos, nonpercept_atoms, nonpercept_preds)
+                               robot_pos, nonpercept_atoms, nonpercept_preds,
+                               obj_detections_per_camera)
         goal_description = self._generate_goal_description()
         task = EnvironmentTask(obs, goal_description)
         # Save the task for future use.
@@ -985,41 +1010,40 @@ def _load_task_from_json(self, json_file: Path) -> EnvironmentTask:
         # Prepare the non-percepts.
         nonpercept_atoms = self._get_initial_nonpercept_atoms()
         nonpercept_preds = self.predicates - self.percept_predicates
-        init_obs = _SpotObservation(
-            images,
-            objects_in_view,
-            set(),
-            set(),
-            robot,
-            gripper_open_percentage,
-            robot_pos,
-            nonpercept_atoms,
-            nonpercept_preds,
-        )
+        init_obs = _SpotObservation(images, objects_in_view, set(), set(),
+                                    robot, gripper_open_percentage, robot_pos,
+                                    nonpercept_atoms, nonpercept_preds, {})
         # The goal can remain the same.
         goal = base_env_task.goal_description
         return EnvironmentTask(init_obs, goal)
 
     def _actively_construct_initial_object_views(
-            self) -> Dict[Object, math_helpers.SE3Pose]:
+            self) -> Tuple[Dict[Object, math_helpers.SE3Pose], Dict[str, Any]]:
         assert self._robot is not None
         assert self._localizer is not None
         stow_arm(self._robot)
-        go_home(self._robot, self._localizer)
+        # go_home(self._robot, self._localizer)
         self._localizer.localize()
         detection_ids = self._detection_id_to_obj.keys()
-        detections = self._run_init_search_for_objects(set(detection_ids))
+        detections, artifacts = self._run_init_search_for_objects(
+            set(detection_ids))
         stow_arm(self._robot)
         obj_to_se3_pose = {
             self._detection_id_to_obj[det_id]: val
             for (det_id, val) in detections.items()
         }
         self._last_known_object_poses.update(obj_to_se3_pose)
-        return obj_to_se3_pose
+        # Move the robot into a good place to construct the initial state
+        # by running VLM predicates.
+        prompt = "Finished initial search for objects. Take control of the robot and move it into a good initial location for constructing the initial state of the task. Press 'Enter' when done!"
+        _ = input(prompt)
+        assert self._lease_client is not None
+        self._lease_client.take()
+        return obj_to_se3_pose, artifacts
 
     def _run_init_search_for_objects(
         self, detection_ids: Set[ObjectDetectionID]
-    ) -> Dict[ObjectDetectionID, math_helpers.SE3Pose]:
+    ) -> Tuple[Dict[ObjectDetectionID, math_helpers.SE3Pose], Dict[str, Any]]:
         """Have the hand look down from high up at first."""
         assert self._robot is not None
         assert self._localizer is not None
@@ -1041,7 +1065,8 @@ def _run_init_search_for_objects(
             no_detections_outfile = outdir / f"no_detections_{time_str}.png"
             visualize_all_artifacts(artifacts, detections_outfile,
                                     no_detections_outfile)
-        return detections
+        self._lease_client
+        return detections, artifacts
 
     @property
     @abc.abstractmethod
@@ -1075,11 +1100,8 @@ def _generate_goal_description(self) -> GoalDescription:
 
 ## Types
 _ALL_TYPES = {
-    _robot_type,
-    _base_object_type,
-    _movable_object_type,
-    _immovable_object_type,
-    _container_type,
+    _robot_type, _base_object_type, _movable_object_type,
+    _immovable_object_type, _container_type, _table_type
 }
 
 
@@ -1483,6 +1505,24 @@ def _get_vlm_query_str(pred_name: str, objects: Sequence[Object]) -> str:
 _VLMOn = utils.create_vlm_predicate("VLMOn",
                                     [_movable_object_type, _base_object_type],
                                     lambda o: _get_vlm_query_str("OnTopOf", o))
+_VLMOnTable = utils.create_vlm_predicate(
+    "VLMOnTable", [_movable_object_type, _table_type],
+    lambda o: _get_vlm_query_str("OnTopTable", o))
+_VLMOnFloor = utils.create_vlm_predicate(
+    "VLMOnFloor", [_movable_object_type],
+    lambda o: _get_vlm_query_str("OnFloor", o))
+_TableClear = utils.create_vlm_predicate(
+    "TableClear", [_table_type],
+    lambda o: _get_vlm_query_str("ClearOfObjects", o))
+_CanBeUsedForErasing = utils.create_vlm_predicate(
+    "CanBeUsedForErasing", [_base_object_type],
+    lambda o: _get_vlm_query_str("CanBeUsedForErasing", o))
+_TableWiped = utils.create_vlm_predicate(
+    "TableWiped", [_table_type],
+    lambda o: _get_vlm_query_str("WipedOfMarkerScribbles", o))
+_TableClean = utils.create_vlm_predicate(
+    "TableClean", [_table_type],
+    lambda o: _get_vlm_query_str("CleanOfObjectsAndMarkings", o))
 _Upright = utils.create_vlm_predicate(
     "Upright", [_movable_object_type],
     lambda o: _get_vlm_query_str("Upright", o))
@@ -1491,7 +1531,7 @@ def _get_vlm_query_str(pred_name: str, objects: Sequence[Object]) -> str:
     lambda o: _get_vlm_query_str("Toasted", o))
 _VLMIn = utils.create_vlm_predicate(
     "VLMIn", [_movable_object_type, _immovable_object_type],
-    lambda o: _get_vlm_query_str("In", o))
+    lambda o: _get_vlm_query_str("Inside", o))
 _Open = utils.create_vlm_predicate("Open", [_movable_object_type],
                                    lambda o: _get_vlm_query_str("Open", o))
 _Stained = utils.create_vlm_predicate(
@@ -1513,8 +1553,14 @@ def _get_vlm_query_str(pred_name: str, objects: Sequence[Object]) -> str:
 }
 _VLM_PREDICATES = {
     _VLMOn,
+    _VLMOnTable,
+    _VLMOnFloor,
+    _TableClear,
+    _TableWiped,
+    _TableClean,
     _Upright,
     _Toasted,
+    _CanBeUsedForErasing,
     _VLMIn,
     _Open,
     _Stained,
@@ -1996,7 +2042,7 @@ def _dry_simulate_move_to_view_hand(
         robot_pos=robot_pose,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
 
     return next_obs
 
@@ -2028,7 +2074,7 @@ def _dry_simulate_move_to_reach_obj(
         robot_pos=robot_pose,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
 
     return next_obs
 
@@ -2066,7 +2112,7 @@ def _dry_simulate_pick_from_top(
         robot_pos=robot_pose,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
 
     return next_obs
 
@@ -2138,7 +2184,7 @@ def _dry_simulate_place_on_top(
         robot_pos=robot_pose,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
 
     return next_obs
 
@@ -2182,7 +2228,7 @@ def _dry_simulate_drop_inside(
         robot_pos=robot_pose,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
 
     return next_obs
 
@@ -2229,7 +2275,7 @@ def _dry_simulate_drag(last_obs: _SpotObservation, held_obj: Object,
         robot_pos=robot_pose,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
 
     return next_obs
 
@@ -2284,7 +2330,7 @@ def _dry_simulate_prepare_container_for_sweeping(
         robot_pos=robot_pose,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
 
     return next_obs
 
@@ -2362,7 +2408,7 @@ def _dry_simulate_sweep_into_container(
         robot_pos=robot_pose,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
 
     return next_obs
 
@@ -2388,7 +2434,7 @@ def _dry_simulate_drop_not_placeable_object(
         robot_pos=robot_pose,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
     return next_obs
 
 
@@ -2410,7 +2456,7 @@ def _dry_simulate_noop(last_obs: _SpotObservation,
         robot_pos=robot_pose,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
     return next_obs
 
 
@@ -2439,21 +2485,22 @@ def _dry_simulate_pick_and_dump_container(
         robot_pos=obs.robot_pos,
         nonpercept_atoms=nonpercept_atoms,
         nonpercept_predicates=last_obs.nonpercept_predicates,
-    )
+        object_detections_per_camera={})
     return next_obs
 
 
 ###############################################################################
-#                         VLM Generic Test Env                                #
+#                         VLM No Teleop Test Env                              #
 ###############################################################################
 class SpotMinimalVLMPredicateEnv(SpotRearrangementEnv):
     """An abstract env that makes it easy to test the VLM-based predicate
     invention and evaluation pipeline on the real Spot robot.
 
     Importantly note that every env that inherits from this doesn't
-    require a map. Rather, it just works directly without a map. TODO:
-    see if this assumption is actually tenable, or if we need to remove
-    it?
+    require a map. Rather, it just works directly without a map. This
+    means that we don't get to use all the nice navigation skills that
+    we have on the robot. To use those, we need to use a variant of the
+    SpotRearrangementEnv from above.
     """
 
     def __init__(self, use_gui: bool = True) -> None:  #pylint:disable=super-init-not-called
@@ -2469,7 +2516,10 @@ def __init__(self, use_gui: bool = True) -> None:  #pylint:disable=super-init-no
         self._last_action: Optional[Action] = None
         # Create constant objects.
         self._spot_object = Object("robot", _robot_type)
-        op_to_name = {o.name: o for o in self._create_operators()}
+        op_to_name = {o.name: o
+                      for o in self._create_operators()
+                      } | {o.name: o
+                           for o in _create_operators()}
         self._strips_operators = {
             op_to_name[o]
             for o in self.op_names_to_keep()
@@ -2707,7 +2757,7 @@ def goal_predicates(self) -> Set[Predicate]:
 
     @classmethod
     def get_name(cls) -> str:
-        return "spot_vlm_cup_table_env"
+        return "spot_vlm_simple_cup_table_env"
 
     @property
     def _detection_id_to_obj(self) -> Dict[ObjectDetectionID, Object]:
@@ -2764,6 +2814,135 @@ def _generate_goal_description(self) -> GoalDescription:
         return "get the cup onto the table!"
 
 
+class SimpleTableWipingEnv(SpotMinimalVLMPredicateEnv):
+    """An environment to test the task of actually clearing objects from a
+    table and then wiping the table."""
+
+    @property
+    def predicates(self) -> Set[Predicate]:
+        preds = set(p for p in _ALL_PREDICATES | _VLM_PREDICATES if p.name in [
+            "Holding", "HandEmpty", "NotHolding", "Inside", "VLMOnTable",
+            "VLMIn", "CanBeUsedForErasing", "TableClean", "TableWiped",
+            "TableClear", "VLMOnFloor"
+        ])
+        return preds
+
+    @property
+    def goal_predicates(self) -> Set[Predicate]:
+        return self.predicates
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "spot_vlm_simple_table_wiping_env"
+
+    @property
+    def _detection_id_to_obj(self) -> Dict[ObjectDetectionID, Object]:
+        detection_id_to_obj: Dict[ObjectDetectionID, Object] = {}
+        objects = {
+            Object("clear_plastic_trash_can", _immovable_object_type),
+            Object("fluffy_toy_duster", _movable_object_type),
+            Object("apple", _movable_object_type),
+            Object("childrens_play_table", _table_type),
+        }
+        for o in objects:
+            detection_id = LanguageObjectDetectionID(o.name)
+            detection_id_to_obj[detection_id] = o
+        return detection_id_to_obj
+
+    def _create_operators(self) -> Iterator[STRIPSOperator]:
+        # Pick object to clear table.
+        robot = Variable("?robot", _robot_type)
+        obj = Variable("?object", _movable_object_type)
+        surface = Variable("?table", _table_type)
+        parameters = [robot, obj, surface]
+        preconds: Set[LiftedAtom] = {
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+            LiftedAtom(_VLMOnTable, [obj, surface]),
+        }
+        add_effs: Set[LiftedAtom] = {
+            LiftedAtom(_Holding, [robot, obj]),
+            LiftedAtom(_TableClear, [surface])
+        }
+        del_effs: Set[LiftedAtom] = {
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+            LiftedAtom(_VLMOnTable, [obj, surface]),
+        }
+        ignore_effs: Set[Predicate] = set()
+        yield STRIPSOperator("TeleopPickToClearTable", parameters, preconds,
+                             add_effs, del_effs, ignore_effs)
+
+        # Picking from the floor.
+        robot = Variable("?robot", _robot_type)
+        obj = Variable("?object", _movable_object_type)
+        surface = Variable("?table", _table_type)
+        parameters = [robot, obj]
+        preconds: Set[LiftedAtom] = {
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+        }
+        add_effs: Set[LiftedAtom] = {
+            LiftedAtom(_Holding, [robot, obj]),
+        }
+        del_effs: Set[LiftedAtom] = {
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+        }
+        ignore_effs: Set[Predicate] = set()
+        yield STRIPSOperator("TeleopPickFromFloor", parameters, preconds,
+                             add_effs, del_effs, ignore_effs)
+
+        # Place object inside
+        robot = Variable("?robot", _robot_type)
+        obj = Variable("?object", _movable_object_type)
+        surf = Variable("?surf", _immovable_object_type)
+        parameters = [robot, obj, surf]
+        preconds = {LiftedAtom(_Holding, [robot, obj])}
+        add_effs = {
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+            LiftedAtom(_VLMIn, [obj, surf])
+        }
+        del_effs = {LiftedAtom(_Holding, [robot, obj])}
+        ignore_effs = set()
+        yield STRIPSOperator("TeleopPlaceInside", parameters, preconds,
+                             add_effs, del_effs, ignore_effs)
+
+        # Wipe surface
+        robot = Variable("?robot", _robot_type)
+        obj = Variable("?object", _movable_object_type)
+        surf = Variable("?surf", _table_type)
+        parameters = [robot, obj, surf]
+        preconds = {
+            LiftedAtom(_Holding, [robot, obj]),
+            LiftedAtom(_CanBeUsedForErasing, [obj]),
+            LiftedAtom(_TableClear, [surf]),
+        }
+        add_effs = {
+            LiftedAtom(_TableWiped, [surf]),
+        }
+        del_effs = {}
+        ignore_effs = set()
+        yield STRIPSOperator("TeleopWipe", parameters, preconds, add_effs,
+                             del_effs, ignore_effs)
+
+    def op_names_to_keep(self) -> Set[str]:
+        """Return the names of the operators we want to keep."""
+        return {
+            "TeleopPickToClearTable",
+            "TeleopPlaceInside",
+            "TeleopWipe",
+            "MoveToReachObject",
+            "MoveToHandViewObject",
+            "TeleopPickFromFloor",
+            "PlaceObjectOnTop",
+        }
+
+    def _generate_goal_description(self) -> GoalDescription:
+        return "clean up the table!"
+
+
 class DustpanSweepingTestEnv(SpotMinimalVLMPredicateEnv):
     """An environment to test a demo task of sweeping some wrappers into a
     dustpan."""
@@ -2826,8 +3005,8 @@ def _create_operators(self) -> Iterator[STRIPSOperator]:
         }
         del_effs = {LiftedAtom(_Holding, [robot, dustpan])}
         ignore_effs = set()
-        yield STRIPSOperator("PlaceNextTo", parameters, preconds, add_effs,
-                             del_effs, ignore_effs)
+        yield STRIPSOperator("TeleopPlaceNextTo", parameters, preconds,
+                             add_effs, del_effs, ignore_effs)
 
         # Pick(robot, broom)
         robot = Variable("?robot", _robot_type)
@@ -2860,8 +3039,8 @@ def _create_operators(self) -> Iterator[STRIPSOperator]:
         add_effs = {LiftedAtom(_Inside, [mess, dustpan])}
         del_effs = set()
         ignore_effs = set()
-        yield STRIPSOperator("Sweep", parameters, preconds, add_effs, del_effs,
-                             ignore_effs)
+        yield STRIPSOperator("TeleopSweep", parameters, preconds, add_effs,
+                             del_effs, ignore_effs)
 
         # Place(robot, broom)
         robot = Variable("?robot", _robot_type)
@@ -2874,14 +3053,14 @@ def _create_operators(self) -> Iterator[STRIPSOperator]:
         }
         del_effs = {LiftedAtom(_Holding, [robot, broom])}
         ignore_effs = set()
-        yield STRIPSOperator("PlaceOnFloor", parameters, preconds, add_effs,
-                             del_effs, ignore_effs)
+        yield STRIPSOperator("TeleopPlaceOnFloor", parameters, preconds,
+                             add_effs, del_effs, ignore_effs)
 
     def op_names_to_keep(self) -> Set[str]:
         """Return the names of the operators we want to keep."""
         return {
-            "TeleopPick1", "PlaceNextTo", "TeleopPick2", "Sweep",
-            "PlaceOnFloor"
+            "TeleopPick1", "TeleopPlaceNextTo", "TeleopPick2", "TeleopSweep",
+            "TeleopPlaceOnFloor"
         }
 
     def _generate_goal_description(self) -> GoalDescription:
@@ -2997,7 +3176,7 @@ def _get_dry_task(self, train_or_test: str,
             robot_pos=robot_pose,
             nonpercept_atoms=self._get_initial_nonpercept_atoms(),
             nonpercept_predicates=(self.predicates - self.percept_predicates),
-        )
+            object_detections_per_camera={})
 
         # Finish the task.
         goal_description = self._generate_goal_description()
@@ -3421,7 +3600,7 @@ def _get_dry_task(self, train_or_test: str,
             robot_pos=robot_pose,
             nonpercept_atoms=self._get_initial_nonpercept_atoms(),
             nonpercept_predicates=(self.predicates - self.percept_predicates),
-        )
+            object_detections_per_camera={})
 
         # Finish the task.
         goal_description = self._generate_goal_description()
@@ -3580,7 +3759,7 @@ def _detection_id_to_obj(self) -> Dict[ObjectDetectionID, Object]:
 
         red_block = Object("red_block", _movable_object_type)
         red_block_detection = LanguageObjectDetectionID(
-            "red block/orange block/yellow block")
+            "green block/red block/orange block/yellow block")
         detection_id_to_obj[red_block_detection] = red_block
 
         for obj, pose in get_known_immovable_objects().items():
@@ -3595,3 +3774,291 @@ def _generate_goal_description(self) -> GoalDescription:
     def _get_dry_task(self, train_or_test: str,
                       task_idx: int) -> EnvironmentTask:
         raise NotImplementedError("Dry task generation not implemented.")
+
+
+###############################################################################
+#                             VLM Test Env with Map                           #
+###############################################################################
+
+
+class VLMCupEnv(SpotRearrangementEnv):
+    """A version of the SimpleVLMCupEnv, but with actual skills that the robot
+    can execute instead of relying on teleop."""
+
+    def __init__(self, use_gui: bool = True) -> None:
+        super().__init__(use_gui)
+
+        op_to_name = {o.name: o for o in _create_operators()}
+        op_names_to_keep = {
+            "MoveToReachObject",
+            "MoveToHandViewObject",
+            "PickObjectFromTop",
+        }
+        self._strips_operators = {op_to_name[o] for o in op_names_to_keep}
+        # We add in a place operator that uses VLMOn instead
+        # of the typical 'OnTop' predicate.
+        # PlaceObjectOnTop
+        robot = Variable("?robot", _robot_type)
+        held = Variable("?held", _movable_object_type)
+        surface = Variable("?surface", _immovable_object_type)
+        parameters = [robot, held, surface]
+        preconds = {
+            LiftedAtom(_Holding, [robot, held]),
+            LiftedAtom(_Reachable, [robot, surface]),
+            LiftedAtom(_NEq, [held, surface]),
+            LiftedAtom(_IsPlaceable, [held]),
+            LiftedAtom(_HasFlatTopSurface, [surface]),
+            LiftedAtom(_FitsInXY, [held, surface]),
+        }
+        add_effs = {
+            LiftedAtom(_VLMOn, [held, surface]),
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, held]),
+        }
+        del_effs = {
+            LiftedAtom(_Holding, [robot, held]),
+        }
+        ignore_effs: Set[Predicate] = set()
+        self._strips_operators.add(
+            STRIPSOperator("PlaceObjectOnTop", parameters, preconds, add_effs,
+                           del_effs, ignore_effs))
+
+    @property
+    def predicates(self) -> Set[Predicate]:
+        return set(p for p in _ALL_PREDICATES | _VLM_PREDICATES if p.name in [
+            "Holding", "HandEmpty", "NotHolding", "Inside", "VLMOn",
+            "Reachable", "InHandView", "InView"
+        ])
+
+    @property
+    def goal_predicates(self) -> Set[Predicate]:
+        return self.predicates
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "spot_vlm_cup_table_env"
+
+    @property
+    def _detection_id_to_obj(self) -> Dict[ObjectDetectionID, Object]:
+        detection_id_to_obj: Dict[ObjectDetectionID, Object] = {}
+        objects = {
+            Object("yellow_toy_cup", _movable_object_type),
+            Object("small_cardboard_box_with_black_tape",
+                   _immovable_object_type),
+        }
+        for o in objects:
+            detection_id = LanguageObjectDetectionID(o.name)
+            detection_id_to_obj[detection_id] = o
+
+        for obj, pose in get_known_immovable_objects().items():
+            stat_detection_id = KnownStaticObjectDetectionID(obj.name, pose)
+            detection_id_to_obj[stat_detection_id] = obj
+
+        return detection_id_to_obj
+
+    def _generate_goal_description(self) -> GoalDescription:
+        return "get the cup onto the table!"
+
+    def _get_dry_task(self, train_or_test: str,
+                      task_idx: int) -> EnvironmentTask:
+        raise NotImplementedError("Dry task generation not implemented.")
+
+
+###############################################################################
+#                       Table Wiping Env with Map                             #
+###############################################################################
+
+
+class VLMTableWipingEnv(SpotRearrangementEnv):
+    """A version of the SimpleTableWipingEnv, but with an actual map and some
+    skills that the robot can execute instead of relying on teleop.
+
+    NOTE: for now, we mostly have teleop actions, but we intend to
+    replace them with actual skills in the future.
+    """
+
+    def __init__(self, use_gui: bool = True) -> None:
+        super().__init__(use_gui)
+
+        # NOTE: temporary just to make data collection easier.
+        # Comment out this block below when actually running!
+        # op_to_name = {o.name: o for o in _create_operators()}
+        # op_names_to_keep = {
+        #     "MoveToReachObject",
+        #     "MoveToHandViewObject",
+        # }
+        # self._strips_operators = {op_to_name[o] for o in op_names_to_keep}
+
+        # NOTE: temporary just to make data collection easier.
+        # Add teleop versions of MoveToReach and MoveToHandView!
+        # MoveToReachObject
+        self._strips_operators = set()
+        robot = Variable("?robot", _robot_type)
+        obj = Variable("?object", _base_object_type)
+        parameters = [robot, obj]
+        preconds = {
+            LiftedAtom(_NotBlocked, [obj]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+        }
+        add_effs = {LiftedAtom(_Reachable, [robot, obj])}
+        del_effs: Set[LiftedAtom] = set()
+        ignore_effs = {
+            _Reachable, _InHandView, _InView, _RobotReadyForSweeping
+        }
+        self._strips_operators.add(
+            STRIPSOperator("TeleopMoveToReachObject", parameters, preconds,
+                           add_effs, del_effs, ignore_effs))
+
+        # MoveToHandViewObject
+        robot = Variable("?robot", _robot_type)
+        obj = Variable("?object", _movable_object_type)
+        parameters = [robot, obj]
+        preconds = {
+            LiftedAtom(_NotBlocked, [obj]),
+            LiftedAtom(_HandEmpty, [robot])
+        }
+        add_effs = {LiftedAtom(_InHandView, [robot, obj])}
+        del_effs = set()
+        ignore_effs = {
+            _Reachable, _InHandView, _InView, _RobotReadyForSweeping
+        }
+        self._strips_operators.add(
+            STRIPSOperator("TeleopMoveToHandViewObject", parameters, preconds,
+                           add_effs, del_effs, ignore_effs))
+
+        # We now add in specific operators for the table wiping task that
+        # are tied to specific teleop actions.
+        # Pick object to clear table.
+        robot = Variable("?robot", _robot_type)
+        obj = Variable("?object", _movable_object_type)
+        surface = Variable("?table", _table_type)
+        parameters = [robot, obj, surface]
+        preconds: Set[LiftedAtom] = {
+            LiftedAtom(_InHandView, [robot, obj]),
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+            LiftedAtom(_VLMOnTable, [obj, surface]),
+        }
+        add_effs: Set[LiftedAtom] = {
+            LiftedAtom(_Holding, [robot, obj]),
+            LiftedAtom(_TableClear, [surface])
+        }
+        del_effs: Set[LiftedAtom] = {
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+            LiftedAtom(_VLMOnTable, [obj, surface]),
+            LiftedAtom(_InHandView, [robot, obj]),
+        }
+        ignore_effs: Set[Predicate] = set()
+        self._strips_operators.add(
+            STRIPSOperator("TeleopPickToClearTable", parameters, preconds,
+                           add_effs, del_effs, ignore_effs))
+
+        # Picking from the floor.
+        robot = Variable("?robot", _robot_type)
+        obj = Variable("?object", _movable_object_type)
+        surface = Variable("?table", _table_type)
+        parameters = [robot, obj]
+        preconds: Set[LiftedAtom] = {
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+            LiftedAtom(_InHandView, [robot, obj]),
+        }
+        add_effs: Set[LiftedAtom] = {
+            LiftedAtom(_Holding, [robot, obj]),
+        }
+        del_effs: Set[LiftedAtom] = {
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+            LiftedAtom(_InHandView, [robot, obj]),
+        }
+        ignore_effs: Set[Predicate] = set()
+        self._strips_operators.add(
+            STRIPSOperator("TeleopPickFromFloor", parameters, preconds,
+                           add_effs, del_effs, ignore_effs))
+
+        # Place object inside
+        robot = Variable("?robot", _robot_type)
+        obj = Variable("?object", _movable_object_type)
+        surf = Variable("?surf", _immovable_object_type)
+        parameters = [robot, obj, surf]
+        preconds = {
+            LiftedAtom(_Holding, [robot, obj]),
+            LiftedAtom(_Reachable, [robot, surf]),
+        }
+        add_effs = {
+            LiftedAtom(_HandEmpty, [robot]),
+            LiftedAtom(_NotHolding, [robot, obj]),
+            LiftedAtom(_VLMIn, [obj, surf])
+        }
+        del_effs = {LiftedAtom(_Holding, [robot, obj])}
+        ignore_effs = set()
+        self._strips_operators.add(
+            STRIPSOperator("TeleopPlaceInside", parameters, preconds, add_effs,
+                           del_effs, ignore_effs))
+
+        # Wipe surface
+        robot = Variable("?robot", _robot_type)
+        obj = Variable("?object", _movable_object_type)
+        surf = Variable("?surf", _table_type)
+        parameters = [robot, obj, surf]
+        preconds = {
+            LiftedAtom(_Reachable, [robot, surf]),
+            LiftedAtom(_Holding, [robot, obj]),
+            LiftedAtom(_CanBeUsedForErasing, [obj]),
+            LiftedAtom(_TableClear, [surf]),
+        }
+        add_effs = {
+            LiftedAtom(_TableWiped, [surf]),
+        }
+        del_effs = {}
+        ignore_effs = set()
+        self._strips_operators.add(
+            STRIPSOperator("TeleopWipe", parameters, preconds, add_effs,
+                           del_effs, ignore_effs))
+
+    @property
+    def predicates(self) -> Set[Predicate]:
+        preds = set(p for p in _ALL_PREDICATES | _VLM_PREDICATES if p.name in [
+            "Holding", "HandEmpty", "NotHolding", "Inside", "VLMOnTable",
+            "VLMIn", "CanBeUsedForErasing", "TableClean", "TableWiped",
+            "TableClear", "VLMOnFloor", "InHandView", "Reachable"
+        ])
+        return preds
+
+    @property
+    def goal_predicates(self) -> Set[Predicate]:
+        return self.predicates
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "spot_vlm_table_wiping_env"
+
+    @property
+    def _detection_id_to_obj(self) -> Dict[ObjectDetectionID, Object]:
+        detection_id_to_obj: Dict[ObjectDetectionID, Object] = {}
+        objects = {
+            Object("clear_plastic_trash_can", _immovable_object_type),
+            Object("fluffy_toy_duster", _movable_object_type),
+        }
+        for o in objects:
+            detection_id = LanguageObjectDetectionID(o.name)
+            detection_id_to_obj[detection_id] = o
+
+        detection_id_to_obj[LanguageObjectDetectionID(
+            "coffee_table/surfboard")] = Object("table", _table_type)
+        detection_id_to_obj[LanguageObjectDetectionID(
+            "apple/red_ball")] = Object("apple", _movable_object_type)
+
+        for obj, pose in get_known_immovable_objects().items():
+            stat_detection_id = KnownStaticObjectDetectionID(obj.name, pose)
+            detection_id_to_obj[stat_detection_id] = obj
+
+        return detection_id_to_obj
+
+    def _generate_goal_description(self) -> GoalDescription:
+        return "clean up the table!"
+
+    def _get_dry_task(self, train_or_test: str,
+                      task_idx: int) -> EnvironmentTask:
+        raise NotImplementedError("Dry task generation not implemented.")
diff --git a/predicators/execution_monitoring/expected_atoms_monitor.py b/predicators/execution_monitoring/expected_atoms_monitor.py
index 4c3bd1b564..4d9b1f2c36 100644
--- a/predicators/execution_monitoring/expected_atoms_monitor.py
+++ b/predicators/execution_monitoring/expected_atoms_monitor.py
@@ -40,11 +40,20 @@ def step(self, state: State) -> bool:
             for a in (next_expected_atoms - next_expected_vlm_atoms)
             if not a.holds(state)
         }
+        # Query the VLM for the expected atom values by leveraging
+        # utils.abstract(). This is a bit ugly, but it's better than querying
+        # for the specific atoms we want to check - especially in the Spot env,
+        # we already have evaluated all the necessary VLM atoms and put them in
+        # the state.
         vlm_unsat_atoms = set()
         if len(next_expected_vlm_atoms) > 0:
-            vlm_unsat_atoms = next_expected_atoms - (
-                utils.query_vlm_for_atom_vals(next_expected_vlm_atoms,
-                                              state))  # pragma: no cover
+            next_expected_vlm_preds = {
+                atom.predicate
+                for atom in next_expected_vlm_atoms
+            }
+            vlm_atom_vals = utils.abstract(state, next_expected_vlm_preds)
+            vlm_atoms_that_hold_in_state = next_expected_atoms & vlm_atom_vals
+            vlm_unsat_atoms = next_expected_vlm_atoms - vlm_atoms_that_hold_in_state
         unsat_atoms = non_vlm_unsat_atoms | vlm_unsat_atoms
         if not unsat_atoms:
             return False
diff --git a/predicators/ground_truth_models/spot_env/nsrts.py b/predicators/ground_truth_models/spot_env/nsrts.py
index 55dfbe571d..1c76ac9665 100644
--- a/predicators/ground_truth_models/spot_env/nsrts.py
+++ b/predicators/ground_truth_models/spot_env/nsrts.py
@@ -289,7 +289,8 @@ def get_env_names(cls) -> Set[str]:
             "spot_cube_env", "spot_soda_floor_env", "spot_soda_table_env",
             "spot_soda_bucket_env", "spot_soda_chair_env",
             "spot_main_sweep_env", "spot_ball_and_cup_sticky_table_env",
-            "spot_brush_shelf_env", "lis_spot_block_floor_env"
+            "spot_brush_shelf_env", "lis_spot_block_floor_env",
+            "spot_vlm_simple_table_wiping_env", "spot_vlm_table_wiping_env"
         }
 
     @staticmethod
@@ -321,10 +322,7 @@ def get_nsrts(env_name: str, types: Dict[str, Type],
             "PrepareContainerForSweeping": _prepare_sweeping_sampler,
             "DropNotPlaceableObject": utils.null_sampler,
             "MoveToReadySweep": utils.null_sampler,
-            "TeleopPick1": utils.null_sampler,
-            "TeleopPlace1": utils.null_sampler,
             "PlaceNextTo": utils.null_sampler,
-            "TeleopPick2": utils.null_sampler,
             "Sweep": utils.null_sampler,
             "PlaceOnFloor": utils.null_sampler
         }
@@ -337,7 +335,10 @@ def get_nsrts(env_name: str, types: Dict[str, Type],
             # similarly in the future.
 
         for strips_op in env.strips_operators:
-            sampler = operator_name_to_sampler[strips_op.name]
+            if "teleop" in strips_op.name.lower():
+                sampler = utils.null_sampler
+            else:
+                sampler = operator_name_to_sampler[strips_op.name]
             option = options[strips_op.name]
             nsrt = strips_op.make_nsrt(
                 option=option,
diff --git a/predicators/ground_truth_models/spot_env/options.py b/predicators/ground_truth_models/spot_env/options.py
index e5562c7ad3..75a9412138 100644
--- a/predicators/ground_truth_models/spot_env/options.py
+++ b/predicators/ground_truth_models/spot_env/options.py
@@ -963,12 +963,6 @@ def _teleop(robot: Robot, lease_client: LeaseClient) -> None:
     "PrepareContainerForSweeping": Box(-np.inf, np.inf, (3, )),  # dx, dy, dyaw
     "DropNotPlaceableObject": Box(0, 1, (0, )),  # empty
     "MoveToReadySweep": Box(0, 1, (0, )),  # empty
-    "TeleopPick1": Box(0, 1, (0, )),  # empty
-    "PlaceNextTo": Box(0, 1, (0, )),  # empty
-    "TeleopPick2": Box(0, 1, (0, )),  # empty
-    "TeleopPlace1": Box(0, 1, (0, )),  # empty
-    "Sweep": Box(0, 1, (0, )),  # empty
-    "PlaceOnFloor": Box(0, 1, (0, ))  # empty
 }
 
 # NOTE: the policies MUST be unique because they output actions with extra info
@@ -992,13 +986,6 @@ def _teleop(robot: Robot, lease_client: LeaseClient) -> None:
     "PrepareContainerForSweeping": _prepare_container_for_sweeping_policy,
     "DropNotPlaceableObject": _drop_not_placeable_object_policy,
     "MoveToReadySweep": _move_to_ready_sweep_policy,
-    "TeleopPick1": _create_teleop_policy_with_name("TeleopPick1"),
-    "PlaceNextTo": _create_teleop_policy_with_name("PlaceNextTo"),
-    "TeleopPlace": _create_teleop_policy_with_name("TeleopPlace"),
-    "TeleopPick2": _create_teleop_policy_with_name("TeleopPick2"),
-    "TeleopPlace1": _create_teleop_policy_with_name("TeleopPlace1"),
-    "Sweep": _create_teleop_policy_with_name("Sweep"),
-    "PlaceOnFloor": _create_teleop_policy_with_name("PlaceOnFloor")
 }
 
 
@@ -1021,8 +1008,12 @@ def __init__(self, operator_name: str, types: List[Type]) -> None:
                 0, 1, (0, ))
             _OPERATOR_NAME_TO_POLICY[
                 "PickObjectFromTop"] = _sim_safe_pick_object_from_top_policy
-        params_space = _OPERATOR_NAME_TO_PARAM_SPACE[operator_name]
-        policy = _OPERATOR_NAME_TO_POLICY[operator_name]
+        if "teleop" in operator_name.lower():
+            policy = _create_teleop_policy_with_name(operator_name)
+            params_space = Box(0, 1, (0, ))  # null
+        else:
+            params_space = _OPERATOR_NAME_TO_PARAM_SPACE[operator_name]
+            policy = _OPERATOR_NAME_TO_POLICY[operator_name]
         super().__init__(operator_name, policy, types, params_space)
 
     def __reduce__(self) -> Tuple:
@@ -1035,17 +1026,12 @@ class SpotEnvsGroundTruthOptionFactory(GroundTruthOptionFactory):
     @classmethod
     def get_env_names(cls) -> Set[str]:
         return {
-            "spot_vlm_dustpan_test_env",
-            "spot_vlm_cup_table_env",
-            "spot_cube_env",
-            "spot_soda_floor_env",
-            "spot_soda_table_env",
-            "spot_soda_bucket_env",
-            "spot_soda_chair_env",
-            "spot_main_sweep_env",
-            "spot_ball_and_cup_sticky_table_env",
-            "spot_brush_shelf_env",
-            "lis_spot_block_floor_env",
+            "spot_vlm_dustpan_test_env", "spot_vlm_cup_table_env",
+            "spot_cube_env", "spot_soda_floor_env", "spot_soda_table_env",
+            "spot_soda_bucket_env", "spot_soda_chair_env",
+            "spot_main_sweep_env", "spot_ball_and_cup_sticky_table_env",
+            "spot_brush_shelf_env", "lis_spot_block_floor_env",
+            "spot_vlm_simple_table_wiping_env", "spot_vlm_table_wiping_env"
         }
 
     @classmethod
diff --git a/predicators/perception/spot_perceiver.py b/predicators/perception/spot_perceiver.py
index 1a5f9114ca..2d80cc80de 100644
--- a/predicators/perception/spot_perceiver.py
+++ b/predicators/perception/spot_perceiver.py
@@ -3,7 +3,7 @@
 import logging
 import time
 from pathlib import Path
-from typing import Dict, List, Optional, Set
+from typing import Dict, List, Optional, Set, Tuple
 
 import imageio.v2 as iio
 import numpy as np
@@ -15,19 +15,103 @@
 from predicators import utils
 from predicators.envs import BaseEnv, get_or_create_env
 from predicators.envs.spot_env import HANDEMPTY_GRIPPER_THRESHOLD, \
-    SpotCubeEnv, SpotRearrangementEnv, _drafting_table_type, \
-    _PartialPerceptionState, _SpotObservation, in_general_view_classifier
+    LanguageObjectDetectionID, ObjectDetectionID, RGBDImageWithContext, \
+    SegmentedBoundingBox, SpotCubeEnv, SpotRearrangementEnv, \
+    _drafting_table_type, _PartialPerceptionState, _SpotObservation, \
+    in_general_view_classifier
 from predicators.perception.base_perceiver import BasePerceiver
 from predicators.settings import CFG
 from predicators.spot_utils.utils import _container_type, _dustpan_type, \
-    _immovable_object_type, _movable_object_type, _robot_type, \
+    _immovable_object_type, _movable_object_type, _robot_type, _table_type, \
     _wrappers_type, get_allowed_map_regions, load_spot_metadata, \
     object_to_top_down_geom
 from predicators.structs import Action, DefaultState, EnvironmentTask, \
     GoalDescription, GroundAtom, Object, Observation, Predicate, \
     SpotActionExtraInfo, State, Task, Video, VLMPredicate, _Option
 
+# Helper functions.
+CAMERA_NAME_TO_ANNOTATION = {
+    'hand_color_image': "Hand Camera Image",
+    'back_fisheye_image': "Back Camera Image",
+    'frontleft_fisheye_image': "Front Left Camera Image",
+    'frontright_fisheye_image': "Front Right Camera Image",
+    'left_fisheye_image': "Left Camera Image",
+    'right_fisheye_image': "Right Camera Image"
+}
 
+
+def annotate_imgs_with_detections(
+    img_objects: Dict[str, RGBDImageWithContext],
+    object_detections_per_camera: Dict[str, List[Tuple[ObjectDetectionID,
+                                                       SegmentedBoundingBox]]]
+) -> List[PIL.Image.Image]:
+    """Annotate images via editing the pixesl directly to include object
+    detection bounding boxes and camera names."""
+    img_names = [v.camera_name for _, v in img_objects.items()]
+    imgs = [v.rotated_rgb for _, v in img_objects.items()]
+    pil_imgs = [PIL.Image.fromarray(img) for img in imgs]  # type: ignore
+    # Annotate images with detected objects (names + bounding box)
+    # and camera name.
+    for i, camera_name in enumerate(img_names):
+        draw = ImageDraw.Draw(pil_imgs[i])
+        # Annotate with camera name.
+        font = utils.get_scaled_default_font(draw, 4)
+        _ = utils.add_text_to_draw_img(draw, (0, 0),
+                                       CAMERA_NAME_TO_ANNOTATION[camera_name],
+                                       font)
+    # TODO: just commenting out for now to see if this helps labelling.
+    #     # Annotate with object detections.
+    #     detections = object_detections_per_camera[camera_name]
+    #     for obj_id, seg_bb in detections:
+    #         if isinstance(obj_id, LanguageObjectDetectionID):
+    #             x0, y0, x1, y1 = seg_bb.bounding_box
+    #             x0, x1 = sorted([x0, x1])
+    #             y0, y1 = sorted([y0, y1])
+    #             draw.rectangle([(x0, y0), (x1, y1)], outline='green', width=2)
+    #             text = f"{obj_id.language_id}"
+    #             font = utils.get_scaled_default_font(draw, 3)
+    #             text_mask = font.getmask(text)  # type: ignore
+    #             text_width, text_height = text_mask.size
+    #             text_bbox = [(x0, y0 - 1.5 * text_height),
+    #                          (x0 + text_width + 1, y0)]
+    #             draw.rectangle(text_bbox, fill='green')
+    #             draw.text((x0 + 1, y0 - 1.5 * text_height),
+    #                       text,
+    #                       fill='white',
+    #                       font=font)
+    annotated_imgs = list(pil_imgs)
+    return annotated_imgs
+
+
+def save_annotated_imgs_for_vlm_demo(annotated_imgs: List[PIL.Image.Image],
+                                     save_dir: Path) -> None:
+    """Save annotated images useful images as part of creating demonstrations
+    for learning from teleop'ed Spot trajectories."""
+    # If `save_dir` doesn't exist, create it.
+    save_dir.mkdir(parents=True, exist_ok=True)
+    # Collect all the names of folders within `save_dir`.
+    subfolders = [f for f in save_dir.iterdir() if f.is_dir()]
+    # Find the highest int that's a subfolder of `save_dir`.
+    prev_timestep = -1
+    for folder in subfolders:
+        try:
+            folder_int = int(folder.name)
+            prev_timestep = max(prev_timestep, folder_int)
+        except ValueError:
+            # Skip folders that are not integers.
+            continue
+    # Add 1 to `prev_timestep` to get `curr_timestep`.
+    curr_timestep = prev_timestep + 1
+    curr_timestep_dir = save_dir / str(curr_timestep)
+    # Create the new folder for `curr_timestep`.
+    curr_timestep_dir.mkdir(parents=True, exist_ok=True)
+    # Save all the images as jpg files under the new folder.
+    for i, img in enumerate(annotated_imgs):
+        img_path = curr_timestep_dir / f"image_{i}.jpg"
+        img.save(img_path, format="JPEG")
+
+
+# Main perceiver classes.
 class SpotPerceiver(BasePerceiver):
     """A perceiver specific to spot envs."""
 
@@ -57,6 +141,13 @@ def __init__(self) -> None:
         # Load static, hard-coded features of objects, like their shapes.
         meta = load_spot_metadata()
         self._static_object_features = meta.get("static-object-features", {})
+        # Histories and other artefacts (for VLM labelling).
+        self._curr_state: Optional[State] = DefaultState
+        self._curr_state.simulator_state = {}
+        self._curr_annotated_imgs: List[PIL.Image.Image] = []
+        self._state_history: List[State] = []
+        self._executed_skill_history: List[Optional[_Option]] = []
+        self._vlm_label_history: List[str] = []
 
     @classmethod
     def get_name(cls) -> str:
@@ -85,6 +176,15 @@ def reset(self, env_task: EnvironmentTask) -> Task:
         self._prev_action = None  # already processed at the end of the cycle
         init_state = self._create_state()
         goal = self._create_goal(init_state, env_task.goal_description)
+
+        # Reset run-specific things.
+        self._curr_state = DefaultState
+        self._curr_state.simulator_state = {}
+        self._state_history = []
+        self._executed_skill_history = []
+        self._vlm_label_history = []
+        self._prev_action = None
+
         return Task(init_state, goal)
 
     def update_perceiver_with_action(self, action: Action) -> None:
@@ -96,6 +196,11 @@ def update_perceiver_with_action(self, action: Action) -> None:
 
     def step(self, observation: Observation) -> State:
         self._update_state_from_observation(observation)
+        # If we're trying to record VLM demos, then save the images.
+        if len(CFG.spot_vlm_teleop_demo_folderpath) > 0:
+            save_annotated_imgs_for_vlm_demo(
+                self._curr_annotated_imgs,
+                Path(CFG.spot_vlm_teleop_demo_folderpath))
         # Update the curr held item when applicable.
         assert self._curr_env is not None
         if self._prev_action is not None:
@@ -166,7 +271,6 @@ def step(self, observation: Observation) -> State:
                         logging.info("[Perceiver] An object was lost: "
                                      f"{prev_held_object} was lost!")
                         self._lost_objects.add(prev_held_object)
-
         return self._create_state()
 
     def _update_state_from_observation(self, observation: Observation) -> None:
@@ -202,10 +306,13 @@ def _update_state_from_observation(self, observation: Observation) -> None:
         self._robot_pos = observation.robot_pos
         for obj in observation.objects_in_view:
             self._lost_objects.discard(obj)
+        self._curr_annotated_imgs = annotate_imgs_with_detections(
+            observation.images, observation.object_detections_per_camera)
 
     def _create_state(self) -> State:
         if self._waiting_for_observation:
             return DefaultState
+        assert self._curr_state is not None
         # Build the continuous part of the state.
         assert self._robot is not None
         state_dict = {
@@ -284,10 +391,68 @@ def _create_state(self) -> State:
         # logging.info("Simulator state:")
         # logging.info(simulator_state)
 
+        # Add the images and histories into the simulator_state.
+        simulator_state["images"] = self._curr_annotated_imgs
+        # At the first timestep, these histories will be empty due to
+        # self.reset(). But at every timestep that isn't the first one,
+        # they will be non-empty.
+        simulator_state["state_history"] = list(self._state_history)
+        # We do this here so the call to `utils.abstract()` a few lines later
+        # has the skill that was just run.
+        executed_skill = None
+
+        if self._prev_action is not None:
+            assert self._prev_action.extra_info is not None
+            if self._prev_action.extra_info.action_name == "done":
+                # Just return the default state
+                return DefaultState
+            if self._prev_action.has_option():
+                executed_skill = self._prev_action.get_option()
+        self._executed_skill_history.append(
+            executed_skill)  # None in first timestep.
+        simulator_state["skill_history"] = list(self._executed_skill_history)
+        simulator_state["vlm_label_history"] = list(self._vlm_label_history)
+
+        # Add to histories.
+        # A bit of extra work is required to build the VLM label history.
+        # We want to keep `utils.abstract()` as straightforward as possible,
+        # so we'll "rebuild" the VLM labels from the abstract state
+        # returned by `utils.abstract()`. And since we call this function,
+        # we might as well store the abstract state as a part of the simulator
+        # state so that we don't need to recompute it later in the approach or
+        # in planning.
+        assert self._curr_env is not None
+        preds = self._curr_env.predicates
+        state_copy = percept_state.copy()
+        state_copy.simulator_state = simulator_state
+        abstract_state = utils.abstract(state_copy, preds)
+        simulator_state["abstract_state"] = abstract_state
+        # Compute all the VLM atoms. `utils.abstract()` only returns the ones
+        # that are True. The remaining ones are the ones that are False.
+        vlm_preds = set(pred for pred in preds
+                        if isinstance(pred, VLMPredicate))
+        vlm_atoms = set()
+        for pred in vlm_preds:
+            for choice in utils.get_object_combinations(
+                    list(state_copy), pred.types):
+                vlm_atoms.add(GroundAtom(pred, choice))
+        vlm_atoms_list = sorted(vlm_atoms)
+        reconstructed_all_vlm_responses = []
+        for atom in vlm_atoms_list:
+            if atom in abstract_state:
+                truth_value = 'True'
+            else:
+                truth_value = 'False'
+            atom_label = f"* {atom.get_vlm_query_str()}: {truth_value}"
+            reconstructed_all_vlm_responses.append(atom_label)
+        str_vlm_response = '\n'.join(reconstructed_all_vlm_responses)
+        self._vlm_label_history.append(str_vlm_response)
+
         # Now finish the state.
         state = _PartialPerceptionState(percept_state.data,
                                         simulator_state=simulator_state)
-
+        self._curr_state = state
+        self._state_history.append(self._curr_state.copy())
         return state
 
     def _create_goal(self, state: State,
@@ -503,6 +668,35 @@ def _create_goal(self, state: State,
                 GroundAtom(ContainerReadyForSweeping, [bucket, black_table]),
                 GroundAtom(IsSweeper, [brush])
             }
+        if goal_description == "get the cup onto the table!":
+            robot = Object("robot", _robot_type)
+            cup = Object("yellow_toy_cup", _movable_object_type)
+            table = Object("small_cardboard_box_with_black_tape",
+                           _immovable_object_type)
+            HandEmpty = pred_name_to_pred["HandEmpty"]
+            VLMOn = pred_name_to_pred["VLMOn"]
+            goal = {
+                GroundAtom(HandEmpty, [robot]),
+                GroundAtom(VLMOn, [cup, table])
+            }
+            return goal
+        if goal_description == "clean up the table!":
+            VLMIn = pred_name_to_pred["VLMIn"]
+            TableClean = pred_name_to_pred["TableClean"]
+            TableClear = pred_name_to_pred["TableClear"]
+            TableWiped = pred_name_to_pred["TableWiped"]
+            CanBeUsedForErasing = pred_name_to_pred["CanBeUsedForErasing"]
+            trash_can = Object("clear_plastic_trash_can",
+                               _immovable_object_type)
+            apple = Object("apple", _movable_object_type)
+            table = Object("table", _table_type)
+            eraser = Object("fluffy_toy_duster", _movable_object_type)
+            robot = Object("robot", _robot_type)
+            goal = {
+                GroundAtom(VLMIn, [apple, trash_can]),
+                GroundAtom(TableWiped, [table]),
+            }
+            return goal
         raise NotImplementedError("Unrecognized goal description")
 
     def render_mental_images(self, observation: Observation,
@@ -590,15 +784,6 @@ class SpotMinimalPerceiver(BasePerceiver):
     anything about.
     """
 
-    camera_name_to_annotation = {
-        'hand_color_image': "Hand Camera Image",
-        'back_fisheye_image': "Back Camera Image",
-        'frontleft_fisheye_image': "Front Left Camera Image",
-        'frontright_fisheye_image': "Front Right Camera Image",
-        'left_fisheye_image': "Left Camera Image",
-        'right_fisheye_image': "Right Camera Image"
-    }
-
     def render_mental_images(self, observation: Observation,
                              env_task: EnvironmentTask) -> Video:
         raise NotImplementedError()
@@ -636,9 +821,9 @@ def _create_goal(self, state: State,
         Inside = pred_name_to_pred["Inside"]
         Holding = pred_name_to_pred["Holding"]
         HandEmpty = pred_name_to_pred["HandEmpty"]
-        VLMOn = pred_name_to_pred["VLMOn"]
 
         if goal_description == "get the cup onto the table!":
+            VLMOn = pred_name_to_pred["VLMOn"]
             robot = Object("robot", _robot_type)
             cup = Object("yellow_toy_cup", _movable_object_type)
             table = Object("cardboard_table", _immovable_object_type)
@@ -656,6 +841,23 @@ def _create_goal(self, state: State,
                 GroundAtom(Holding, [robot, dustpan])
             }
             return goal
+        if goal_description == "clean up the table!":
+            VLMIn = pred_name_to_pred["VLMIn"]
+            TableClean = pred_name_to_pred["TableClean"]
+            TableClear = pred_name_to_pred["TableClear"]
+            TableWiped = pred_name_to_pred["TableWiped"]
+            CanBeUsedForErasing = pred_name_to_pred["CanBeUsedForErasing"]
+            trash_can = Object("clear_plastic_trash_can",
+                               _immovable_object_type)
+            apple = Object("apple", _movable_object_type)
+            table = Object("childrens_play_table", _table_type)
+            eraser = Object("neon_green_fluffy_eraser", _movable_object_type)
+            robot = Object("robot", _robot_type)
+            goal = {
+                GroundAtom(VLMIn, [apple, trash_can]),
+                GroundAtom(TableWiped, [table]),
+            }
+            return goal
 
         raise NotImplementedError("Unrecognized goal description")
 
@@ -689,47 +891,20 @@ def step(self, observation: Observation) -> State:
         self._waiting_for_observation = False
         self._robot = observation.robot
 
-        img_objects = observation.rgbd_images  # RGBDImage objects
-        img_names = [v.camera_name for _, v in img_objects.items()]
-        imgs = [v.rotated_rgb for _, v in img_objects.items()]
-        pil_imgs = [PIL.Image.fromarray(img) for img in imgs]  # type: ignore
-        # Annotate images with detected objects (names + bounding box)
-        # and camera name.
-        object_detections_per_camera = observation.object_detections_per_camera
-        for i, camera_name in enumerate(img_names):
-            draw = ImageDraw.Draw(pil_imgs[i])
-            # Annotate with camera name.
-            font = utils.get_scaled_default_font(draw, 4)
-            _ = utils.add_text_to_draw_img(
-                draw, (0, 0), self.camera_name_to_annotation[camera_name],
-                font)
-            # Annotate with object detections.
-            detections = object_detections_per_camera[camera_name]
-            for obj_id, seg_bb in detections:
-                x0, y0, x1, y1 = seg_bb.bounding_box
-                x0, x1 = sorted([x0, x1])
-                y0, y1 = sorted([y0, y1])
-                draw.rectangle([(x0, y0), (x1, y1)], outline='green', width=2)
-                text = f"{obj_id.language_id}"
-                font = utils.get_scaled_default_font(draw, 3)
-                text_mask = font.getmask(text)  # type: ignore
-                text_width, text_height = text_mask.size
-                text_bbox = [(x0, y0 - 1.5 * text_height),
-                             (x0 + text_width + 1, y0)]
-                draw.rectangle(text_bbox, fill='green')
-                draw.text((x0 + 1, y0 - 1.5 * text_height),
-                          text,
-                          fill='white',
-                          font=font)
-        annotated_imgs = list(pil_imgs)
+        annotated_imgs = annotate_imgs_with_detections(
+            observation.rgbd_images, observation.object_detections_per_camera)
         self._gripper_open_percentage = observation.gripper_open_percentage
+        # If we're trying to record VLM demos, then save the images.
+        if len(CFG.spot_vlm_teleop_demo_folderpath) > 0:
+            save_annotated_imgs_for_vlm_demo(
+                annotated_imgs, Path(CFG.spot_vlm_teleop_demo_folderpath))
 
         self._curr_state = self._create_state()
         if observation.executed_skill is not None:
             if "Pick" in observation.executed_skill.extra_info.action_name:
                 for obj in observation.executed_skill.extra_info.\
                         operator_objects:
-                    if not obj.is_instance(_robot_type):
+                    if obj.is_instance(_movable_object_type):
                         # Turn the held feature on
                         self._curr_state.set(obj, "held", 1.0)
             if "Place" in observation.executed_skill.extra_info.action_name:
@@ -842,7 +1017,7 @@ def _create_state(self) -> State:
                     "in_view": 0,
                     "is_sweeper": 0,
                 })
-            elif obj.type.name == "immovable":
+            elif obj.type.name in ["immovable", "table"]:
                 state_dict[obj].update({"flat_top_surface": 1})
             else:
                 raise ValueError(
diff --git a/predicators/planning.py b/predicators/planning.py
index c578c1dfd9..36e20bb3cd 100644
--- a/predicators/planning.py
+++ b/predicators/planning.py
@@ -326,6 +326,8 @@ def task_plan(
         logging.info(f"Initial atoms: {init_atoms}")
         logging.info(
             f"Reachable atoms not in init: {reachable_atoms - init_atoms}")
+        import ipdb
+        ipdb.set_trace()
         raise PlanningFailure(f"Goal {goal} not dr-reachable")
     dummy_task = Task(DefaultState, goal)
     metrics: Metrics = defaultdict(float)
@@ -1197,7 +1199,6 @@ def run_task_plan_once(
     init_atoms = utils.abstract(task.init, preds)
     goal = task.goal
     objects = set(task.init)
-
     start_time = time.perf_counter()
 
     if CFG.sesame_task_planner == "astar":
diff --git a/predicators/settings.py b/predicators/settings.py
index 68fe316ac0..4310b6c28c 100644
--- a/predicators/settings.py
+++ b/predicators/settings.py
@@ -186,6 +186,7 @@ class GlobalSettings:
     spot_run_dry = False
     spot_use_perfect_samplers = False  # for debugging
     spot_sweep_env_goal_description = "get the objects into the bucket"
+    spot_vlm_teleop_demo_folderpath = ""
 
     # pddl blocks env parameters
     pddl_blocks_procedural_train_min_num_blocks = 3
@@ -435,7 +436,7 @@ class GlobalSettings:
     override_json_with_input = False  # Only works with SpotEnv for now
 
     # parameters for vision language models
-    # gemini-1.5-pro-latest, gemini-1.5-pro-flash gpt-4-turbo, gpt-4o
+    # gemini-1.5-pro, gemini-1.5-flash, gpt-4-turbo, gpt-4o
     # NOTE: we need to create a dummy vlm so that tests on CI pass.
     vlm_model_name = "dummy"
     vlm_temperature = 0.0
diff --git a/predicators/spot_utils/README.md b/predicators/spot_utils/README.md
index 45e743651e..6524673f7a 100644
--- a/predicators/spot_utils/README.md
+++ b/predicators/spot_utils/README.md
@@ -25,7 +25,7 @@ python predicators/main.py --env <env_name> --approach "spot_wrapper[oracle]" --
 python predicators/main.py --spot_robot_ip 192.168.80.3 --spot_graph_nav_map b45-621 --env lis_spot_block_floor_env --approach spot_wrapper[oracle] --bilevel_plan_without_sim True --seed 0
 
 # an example to run LIS spot without a map
-python predicators/main.py --env spot_vlm_cup_table_env --approach "spot_wrapper[oracle]" --seed 0 --num_train_tasks 0 --num_test_tasks 1 --spot_robot_ip 192.168.80.3 --perceiver spot_minimal_perceiver --bilevel_plan_without_sim True --vlm_test_time_atom_label_prompt_type img_option_diffs_label_history --vlm_model_name gpt-4o --execution_monitor expected_atoms
+python predicators/main.py --env spot_vlm_simple_cup_table_env --approach "spot_wrapper[oracle]" --seed 0 --num_train_tasks 0 --num_test_tasks 1 --spot_robot_ip 192.168.80.3 --perceiver spot_minimal_perceiver --bilevel_plan_without_sim True --vlm_test_time_atom_label_prompt_type img_option_diffs_label_history --vlm_model_name gpt-4o --execution_monitor expected_atoms
 ```
 
 ### Implement Your Task
diff --git a/predicators/spot_utils/graph_nav_maps/b45-621/metadata.yaml b/predicators/spot_utils/graph_nav_maps/b45-621/metadata.yaml
index cb37e2d951..404dcef608 100644
--- a/predicators/spot_utils/graph_nav_maps/b45-621/metadata.yaml
+++ b/predicators/spot_utils/graph_nav_maps/b45-621/metadata.yaml
@@ -46,4 +46,45 @@ static-object-features:
     length: 0.1
     width: 0.1
     placeable: 1
-    is_sweeper: 0
\ No newline at end of file
+    is_sweeper: 0
+  yellow_toy_cup:
+    shape: 2
+    height: 0.1
+    length: 0.1
+    width: 0.1
+    placeable: 1
+    is_sweeper: 0
+  small_cardboard_box_with_black_tape:
+    shape: 1  # cuboid
+    height: 0.55
+    length: 0.28
+    width: 0.6
+    flat_top_surface: 1
+  apple:
+    shape: 2
+    height: 0.1
+    length: 0.1
+    width: 0.1
+    placeable: 1
+    is_sweeper: 0
+  clear_plastic_trash_can:
+    shape: 2
+    height: 0.1
+    length: 0.2
+    width: 0.2
+    placeable: 1
+    is_sweeper: 0
+    flat_top_surface: 0
+  fluffy_toy_duster:
+    shape: 1
+    height: 0.1
+    length: 0.1
+    width: 0.1
+    placeable: 1
+    is_sweeper: 0
+  table:
+    shape: 1
+    height: 0.1
+    length: 0.3
+    width: 0.5
+    flat_top_surface: 1
\ No newline at end of file
diff --git a/predicators/spot_utils/perception/object_detection.py b/predicators/spot_utils/perception/object_detection.py
index 388cf61845..c2e158a9e7 100644
--- a/predicators/spot_utils/perception/object_detection.py
+++ b/predicators/spot_utils/perception/object_detection.py
@@ -244,6 +244,7 @@ def _get_detection_score(img_detections: Dict[str, SegmentedBoundingBox],
                                  f"{obj_id} because it's out of bounds. " + \
                                  f"(pose = {pose_xy})")
                     continue
+
             # Pose extraction succeeded.
             detections[obj_id] = pose
             break
diff --git a/predicators/spot_utils/skills/spot_find_objects.py b/predicators/spot_utils/skills/spot_find_objects.py
index ce8db3826d..43c42eb7d1 100644
--- a/predicators/spot_utils/skills/spot_find_objects.py
+++ b/predicators/spot_utils/skills/spot_find_objects.py
@@ -97,12 +97,12 @@ def _find_objects_with_choreographed_moves(
         return all_detections, all_artifacts
 
     # Fail. Analyze the RGBDs if you want (by uncommenting here).
-    # import imageio.v2 as iio
-    # for i, rgbds in enumerate(all_rgbds):
-    #     for camera, rgbd in rgbds.items():
-    #         path = f"init_search_for_objects_angle{i}_{camera}.png"
-    #         iio.imsave(path, rgbd.rgb)
-    #         print(f"Wrote out to {path}.")
+    import imageio.v2 as iio
+    for i, rgbds in enumerate(all_rgbds):
+        for camera, rgbd in rgbds.items():
+            path = f"init_search_for_objects_angle{i}_{camera}.png"
+            iio.imsave(path, rgbd.rgb)
+            print(f"Wrote out to {path}.")
 
     remaining_object_ids = set(object_ids) - set(all_detections)
     raise RuntimeError(f"Could not find objects: {remaining_object_ids}")
diff --git a/predicators/spot_utils/spot_record_option_demo.py b/predicators/spot_utils/spot_record_option_demo.py
new file mode 100644
index 0000000000..6eed642a40
--- /dev/null
+++ b/predicators/spot_utils/spot_record_option_demo.py
@@ -0,0 +1,49 @@
+"""Script to make it easy to record demonstrations from the Spot."""
+
+from predicators.perception.spot_perceiver import save_annotated_imgs_for_vlm_demo, annotate_imgs_with_detections
+from predicators import utils
+from predicators.settings import CFG
+from predicators.spot_utils.utils import verify_estop
+from predicators.spot_utils.perception.spot_cameras import capture_images_without_context
+from pathlib import Path
+
+def main():
+    # Run this file alone to test manually.
+    # Make sure to pass in --spot_robot_ip.
+
+    # pylint: disable=ungrouped-imports
+    import numpy as np
+    from bosdyn.client import create_standard_sdk
+    from bosdyn.client.lease import LeaseClient
+    from bosdyn.client.util import authenticate
+
+    # Put inside a function to avoid variable scoping issues.
+    args = utils.parse_args(env_required=False,
+                            seed_required=False,
+                            approach_required=False)
+    utils.update_config(args)
+
+    # Get constants.
+    hostname = CFG.spot_robot_ip
+
+    # Instantiate a robot.
+    sdk = create_standard_sdk('MoveHandSkillTestClient')
+    robot = sdk.create_robot(hostname)
+    authenticate(robot)
+    verify_estop(robot)
+    lease_client = robot.ensure_client(LeaseClient.default_service_name)
+    lease_client.take()
+
+    assert len(CFG.spot_vlm_teleop_demo_folderpath) > 0, "Please set the spot_vlm_teleop_demo_folderpath!"
+
+    # Pull all the images from the spot cameras and annotate them
+    # with the camera names.
+    # NOTE: we currently don't run any object detection, though we could.
+    rgbd_images = capture_images_without_context(robot)
+    annotated_imgs = annotate_imgs_with_detections(rgbd_images, {})
+    # Save the images properly.
+    save_annotated_imgs_for_vlm_demo(annotated_imgs, Path(CFG.spot_vlm_teleop_demo_folderpath))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/predicators/spot_utils/utils.py b/predicators/spot_utils/utils.py
index d9e046a013..b3dc3109a6 100644
--- a/predicators/spot_utils/utils.py
+++ b/predicators/spot_utils/utils.py
@@ -96,6 +96,9 @@ class _Spot3DShape(Enum):
 _wrappers_type = Type("wrappers",
                       list(_movable_object_type.feature_names),
                       parent=_movable_object_type)
+_table_type = Type("table",
+                   list(_immovable_object_type.feature_names),
+                   parent=_immovable_object_type)
 
 
 def get_collision_geoms_for_nav(state: State) -> List[_Geom2D]:
diff --git a/predicators/utils.py b/predicators/utils.py
index 2e7c59c383..92d08f1471 100644
--- a/predicators/utils.py
+++ b/predicators/utils.py
@@ -2660,13 +2660,18 @@ def query_vlm_for_atom_vals(
     logging.info(f"VLM output: \n{vlm_output_str}")
     # Parse out stuff.
     if len(label_history) > 0:  # pragma: no cover
-        truth_values = re.findall(r'\* (.*): (True|False)', vlm_output_str)
+        truth_values = re.findall(r'\* (.*): (True|False|Unknown)',
+                                  vlm_output_str)
         for i, (atom_query,
                 pred_label) in enumerate(zip(atom_queries_list, truth_values)):
             pred, label = pred_label
-            assert pred in atom_query
+            try:
+                assert pred in atom_query
+            except AssertionError:
+                import ipdb
+                ipdb.set_trace()
             label = label.lower()
-            if label == "true":
+            if "true" in label.lower():
                 true_atoms.add(vlm_atoms[i])
     else:
         all_vlm_responses = vlm_output_str.strip().split("\n")
@@ -2683,7 +2688,7 @@ def query_vlm_for_atom_vals(
             # value = curr_vlm_output_line[len(atom_query + ":"):
             # period_idx].lower().strip()
             value = curr_vlm_output_line.split(': ')[-1].strip('.').lower()
-            if value == "true":
+            if "true" in value:
                 true_atoms.add(vlm_atoms[i])
     return true_atoms
 
@@ -2699,7 +2704,11 @@ def abstract(state: State,
     try:
         if state.simulator_state is not None and "abstract_state" in \
             state.simulator_state: # pragma: no cover
-            return state.simulator_state["abstract_state"]
+            return {
+                atom
+                for atom in state.simulator_state["abstract_state"]
+                if atom.predicate in preds
+            }
     except (AttributeError, TypeError):
         pass
     # Start by pulling out all VLM predicates.