diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..020ab0f --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,37 @@ +name: Continuous integration + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: psf/black@stable + tests: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10'] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install + run: | + python3 -m venv .env + source .env/bin/activate + make install + - name: Unit tests + run: | + source .env/bin/activate + make test diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..c8e4e55 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,37 @@ +name: Release + +on: + push: + branches: + - main +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions-ecosystem/action-regex-match@v2 + id: regex-match + with: + text: ${{ github.event.head_commit.message }} + regex: '^Release ([^ ]+)' + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Release + if: ${{ steps.regex-match.outputs.match != '' }} + uses: softprops/action-gh-release@v1 + with: + tag_name: ${{ steps.regex-match.outputs.group1 }} + - name: Build and publish + if: ${{ steps.regex-match.outputs.match != '' }} + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..39b9f67 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +install: ## [Local development] Upgrade pip, install requirements, install package. + python -m pip install -U pip + python -m pip install -e . + +install-dev: ## [Local development] Install requirements + python -m pip install -r requirements.txt + +black: ## [Local development] Auto-format python code using black + python -m black . + +test: ## [Local development] Run unit tests + python -m pytest -x -s -v tests + +.PHONY: help + +help: # Run `make help` to get help on the make commands + @grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' \ No newline at end of file diff --git a/README.md b/README.md index 7a64e92..22df311 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@

- Paper | Project Page + Paper | Project Page

## Requirements @@ -21,25 +21,28 @@ Holodeck is based on [AI2-THOR](https://ai2thor.allenai.org/ithor/documentation/ ## Installation After cloning the repo, you can install the required dependencies using the following commands: ``` -conda create --name holodeck python=3.9.16 +conda create --name holodeck python=3.10 conda activate holodeck pip install -r requirements.txt -pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+6f165fdaf3cf2d03728f931f39261d14a67414d0 +pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+8524eadda94df0ab2dbb2ef5a577e4d37c712897 ``` ## Data -Download the data from [google drive](https://drive.google.com/file/d/1MQbFbNfTz94x8Pxfkgbohz4l46O5e3G1/view?usp=sharing) and extract it to the `data/` folder, or use the following command to download from S3: -``` -wget https://holodeck-ai2.s3.amazonaws.com/data.zip -unzip data.zip +Download the data by running the following commands: +```bash +python -m objathor.dataset.download_holodeck_metadata --version 2023_09_23 +python -m objathor.dataset.download_assets --version 2023_09_23 +python -m objathor.dataset.download_annotations --version 2023_09_23 +python -m objathor.dataset.download_features --version 2023_09_23 ``` +by default these will save to `~/.objathor-assets/...`, you can change this director by specifying the `--path` argument. If you change the `--path`, you'll need to set the `OBJAVERSE_ASSETS_DIR` environment variable to the path where the assets are stored when you use Holodeck. ## Usage You can use the following command to generate a new environment. ``` -python main.py --query "a living room" --openai_api_key +python holodeck/main.py --query "a living room" --openai_api_key ``` -To be noticed, our system uses `gpt-4-1106-preview`, so please ensure you have access to it. +Our system uses `gpt-4-1106-preview`, **so please ensure you have access to it.** **Note:** To yield better layouts, use `DFS` as the solver. If you pull the repo before `12/28/2023`, you must set the [argument](https://github.com/allenai/Holodeck/blob/386b0a868def29175436dc3b1ed85b6309eb3cad/main.py#L78) `--use_milp` to `False` to use `DFS`. diff --git a/ai2holodeck/__init__.py b/ai2holodeck/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ai2holodeck/constants.py b/ai2holodeck/constants.py new file mode 100644 index 0000000..20c453f --- /dev/null +++ b/ai2holodeck/constants.py @@ -0,0 +1,35 @@ +import os +from pathlib import Path + +ABS_PATH_OF_HOLODECK = os.path.abspath(os.path.dirname(Path(__file__))) + +ASSETS_VERSION = os.environ.get("ASSETS_VERSION", "2023_09_23") +HD_BASE_VERSION = os.environ.get("HD_BASE_VERSION", "2023_09_23") + +OBJATHOR_ASSETS_BASE_DIR = os.environ.get( + "OBJATHOR_ASSETS_BASE_DIR", os.path.expanduser(f"~/.objathor-assets") +) + +OBJATHOR_VERSIONED_DIR = os.path.join(OBJATHOR_ASSETS_BASE_DIR, ASSETS_VERSION) +OBJATHOR_ASSETS_DIR = os.path.join(OBJATHOR_VERSIONED_DIR, "assets") +OBJATHOR_FEATURES_DIR = os.path.join(OBJATHOR_VERSIONED_DIR, "features") +OBJATHOR_ANNOTATIONS_PATH = os.path.join(OBJATHOR_VERSIONED_DIR, "annotations.json.gz") + +HOLODECK_BASE_DATA_DIR = os.path.join( + OBJATHOR_ASSETS_BASE_DIR, "holodeck", HD_BASE_VERSION +) + +HOLODECK_THOR_FEATURES_DIR = os.path.join(HOLODECK_BASE_DATA_DIR, "thor_object_data") +HOLODECK_THOR_ANNOTATIONS_PATH = os.path.join( + HOLODECK_BASE_DATA_DIR, "thor_object_data", "annotations.json.gz" +) + +if ASSETS_VERSION > "2023_09_23": + THOR_COMMIT_ID = "8524eadda94df0ab2dbb2ef5a577e4d37c712897" +else: + THOR_COMMIT_ID = "3213d486cd09bcbafce33561997355983bdf8d1a" + +# LLM_MODEL_NAME = "gpt-4-1106-preview" +LLM_MODEL_NAME = "gpt-4o-2024-05-13" + +DEBUGGING = os.environ.get("DEBUGGING", "0").lower() in ["1", "true", "True", "t", "T"] diff --git a/ai2holodeck/generation/__init__.py b/ai2holodeck/generation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modules/ceiling_objects.py b/ai2holodeck/generation/ceiling_objects.py similarity index 54% rename from modules/ceiling_objects.py rename to ai2holodeck/generation/ceiling_objects.py index 3703d5a..9337152 100644 --- a/modules/ceiling_objects.py +++ b/ai2holodeck/generation/ceiling_objects.py @@ -1,36 +1,54 @@ -import re import copy +import re + import torch -from colorama import Fore import torch.nn.functional as F -import modules.prompts as prompts -from langchain import PromptTemplate +from colorama import Fore +from langchain import PromptTemplate, OpenAI from shapely.geometry import Polygon - -class CeilingObjectGenerator(): - def __init__(self, llm, object_retriever): - self.json_template = {"assetId": None, "id": None, "kinematic": True, - "position": {}, "rotation": {}, "material": None, "roomId": None} +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.utils import get_bbox_dims, get_annotations + + +class CeilingObjectGenerator: + def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): + self.json_template = { + "assetId": None, + "id": None, + "kinematic": True, + "position": {}, + "rotation": {}, + "material": None, + "roomId": None, + } self.llm = llm self.object_retriever = object_retriever self.database = object_retriever.database - self.ceiling_template = PromptTemplate(input_variables=["input", "rooms", "additional_requirements"], - template=prompts.ceiling_selection_prompt) - + self.ceiling_template = PromptTemplate( + input_variables=["input", "rooms", "additional_requirements"], + template=prompts.ceiling_selection_prompt, + ) def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A"): room_types = [room["roomType"] for room in scene["rooms"]] room_types_str = str(room_types).replace("'", "")[1:-1] - ceiling_prompt = self.ceiling_template.format(input=scene["query"], - rooms=room_types_str, - additional_requirements=additional_requirements_ceiling) + ceiling_prompt = self.ceiling_template.format( + input=scene["query"], + rooms=room_types_str, + additional_requirements=additional_requirements_ceiling, + ) - if "raw_ceiling_plan" not in scene: raw_ceiling_plan = self.llm(ceiling_prompt) - else: raw_ceiling_plan = scene["raw_ceiling_plan"] + if "raw_ceiling_plan" not in scene: + raw_ceiling_plan = self.llm(ceiling_prompt) + else: + raw_ceiling_plan = scene["raw_ceiling_plan"] print(f"\nUser: {ceiling_prompt}\n") - print(f"{Fore.GREEN}AI: Here is the ceiling plan:\n{raw_ceiling_plan}{Fore.RESET}") + print( + f"{Fore.GREEN}AI: Here is the ceiling plan:\n{raw_ceiling_plan}{Fore.RESET}" + ) ceiling_objects = [] parsed_ceiling_plan = self.parse_ceiling_plan(raw_ceiling_plan) @@ -38,77 +56,88 @@ def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A") room = self.get_room_by_type(scene["rooms"], room_type) if room is None: - print("Room type {} not found in scene.".format(room_type)) + print(f"Room type {room_type} not found in scene.") continue - + ceiling_object_id = self.select_ceiling_object(ceiling_object_description) - if ceiling_object_id is None: continue + if ceiling_object_id is None: + continue # Temporary solution: place at the center of the room - dimension = self.database[ceiling_object_id]['assetMetadata']['boundingBox'] + dimension = get_bbox_dims(self.database[ceiling_object_id]) floor_polygon = Polygon(room["vertices"]) x = floor_polygon.centroid.x z = floor_polygon.centroid.y y = scene["wall_height"] - dimension["y"] / 2 - + ceiling_object = copy.deepcopy(self.json_template) ceiling_object["assetId"] = ceiling_object_id ceiling_object["id"] = f"ceiling ({room_type})" ceiling_object["position"] = {"x": x, "y": y, "z": z} ceiling_object["rotation"] = {"x": 0, "y": 0, "z": 0} ceiling_object["roomId"] = room["id"] - ceiling_object["object_name"] = self.database[ceiling_object_id]["annotations"]["category"] + ceiling_object["object_name"] = get_annotations( + self.database[ceiling_object_id] + )["category"] ceiling_objects.append(ceiling_object) return raw_ceiling_plan, ceiling_objects - def parse_ceiling_plan(self, raw_ceiling_plan): plans = [plan.lower() for plan in raw_ceiling_plan.split("\n") if "|" in plan] parsed_plans = {} for plan in plans: # remove index - pattern = re.compile(r'^\d+\.\s*') - plan = pattern.sub('', plan) - if plan[-1] == ".": plan = plan[:-1] # remove the last period + pattern = re.compile(r"^\d+\.\s*") + plan = pattern.sub("", plan) + if plan[-1] == ".": + plan = plan[:-1] # remove the last period room_type, ceiling_object_description = plan.split("|") room_type = room_type.strip() ceiling_object_description = ceiling_object_description.strip() - if room_type not in parsed_plans: # only consider one type of ceiling object for each room + if ( + room_type not in parsed_plans + ): # only consider one type of ceiling object for each room parsed_plans[room_type] = ceiling_object_description return parsed_plans - def get_room_by_type(self, rooms, room_type): for room in rooms: if room["roomType"] == room_type: return room return None - def select_ceiling_object(self, description): - candidates = self.object_retriever.retrieve([f"a 3D model of {description}"], threshold=29) - ceiling_candiates = [candidate for candidate in candidates if self.database[candidate[0]]["annotations"]["onCeiling"] == True] + candidates = self.object_retriever.retrieve( + [f"a 3D model of {description}"], threshold=29 + ) + ceiling_candiates = [ + candidate + for candidate in candidates + if get_annotations(self.database[candidate[0]])["onCeiling"] == True + ] valid_ceiling_candiates = [] for candidate in ceiling_candiates: - dimension = self.database[candidate[0]]['assetMetadata']['boundingBox'] - if dimension["y"] <= 1.0: valid_ceiling_candiates.append(candidate) + dimension = get_bbox_dims(self.database[candidate[0]]) + if dimension["y"] <= 1.0: + valid_ceiling_candiates.append(candidate) if len(valid_ceiling_candiates) == 0: print("No ceiling object found for description: {}".format(description)) return None - + selected_ceiling_object_id = self.random_select(valid_ceiling_candiates)[0] return selected_ceiling_object_id - def random_select(self, candidates): scores = [candidate[1] for candidate in candidates] scores_tensor = torch.Tensor(scores) - probas = F.softmax(scores_tensor, dim=0) # TODO: consider using normalized scores + probas = F.softmax( + scores_tensor, dim=0 + ) # TODO: consider using normalized scores selected_index = torch.multinomial(probas, 1).item() selected_candidate = candidates[selected_index] - return selected_candidate \ No newline at end of file + return selected_candidate diff --git a/modules/doors.py b/ai2holodeck/generation/doors.py similarity index 50% rename from modules/doors.py rename to ai2holodeck/generation/doors.py index 00ddcc8..0636a3e 100644 --- a/modules/doors.py +++ b/ai2holodeck/generation/doors.py @@ -1,24 +1,38 @@ import copy -import json -import torch -import pickle +import os import random + +import compress_json +import compress_pickle import numpy as np +import torch from PIL import Image -from tqdm import tqdm from colorama import Fore -import modules.prompts as prompts -from langchain import PromptTemplate - - -class DoorGenerator(): - def __init__(self, clip_model, clip_preprocess, clip_tokenizer, llm): - self.json_template = {"assetId": None, "id": None, "openable": False, - "openness": 0, "room0": None, "room1": None, - "wall0": None, "wall1": None, "holePolygon": [], - "assetPosition": {}} - - self.door_data = json.load(open("data/doors/door-database.json", "r")) +from langchain import PromptTemplate, OpenAI +from tqdm import tqdm + +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.constants import HOLODECK_BASE_DATA_DIR + + +class DoorGenerator: + def __init__(self, clip_model, clip_preprocess, clip_tokenizer, llm: OpenAI): + self.json_template = { + "assetId": None, + "id": None, + "openable": False, + "openness": 0, + "room0": None, + "room1": None, + "wall0": None, + "wall1": None, + "holePolygon": [], + "assetPosition": {}, + } + + self.door_data = compress_json.load( + os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door-database.json") + ) self.door_ids = list(self.door_data.keys()) self.used_assets = [] @@ -28,25 +42,42 @@ def __init__(self, clip_model, clip_preprocess, clip_tokenizer, llm): self.load_features() self.llm = llm - self.doorway_template = PromptTemplate(input_variables=["input", "rooms", "room_sizes", "room_pairs", "additional_requirements"], - template=prompts.doorway_prompt) - + self.doorway_template = PromptTemplate( + input_variables=[ + "input", + "rooms", + "room_sizes", + "room_pairs", + "additional_requirements", + ], + template=prompts.doorway_prompt, + ) def load_features(self): try: - self.door_feature_clip = pickle.load(open("data/doors/door_feature_clip.p", "rb")) + self.door_feature_clip = compress_pickle.load( + os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door_feature_clip.pkl") + ) except: print("Precompute image features for doors...") self.door_feature_clip = [] for door_id in tqdm(self.door_ids): - image = self.preprocess(Image.open(f"data/doors/images/{door_id}.png")).unsqueeze(0) + image = self.preprocess( + Image.open( + os.path.join( + HOLODECK_BASE_DATA_DIR, f"doors/images/{door_id}.png" + ) + ) + ).unsqueeze(0) with torch.no_grad(): image_features = self.clip_model.encode_image(image) image_features /= image_features.norm(dim=-1, keepdim=True) self.door_feature_clip.append(image_features) self.door_feature_clip = torch.vstack(self.door_feature_clip) - pickle.dump(self.door_feature_clip, open("data/doors/door_feature_clip.p", "wb")) - + compress_pickle.dump( + self.door_feature_clip, + os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door_feature_clip.pkl"), + ) def generate_doors(self, scene, additional_requirements_door): # get room pairs @@ -56,70 +87,103 @@ def generate_doors(self, scene, additional_requirements_door): room_sizes_str = self.get_room_size_str(scene) room_pairs_str = str(room_pairs).replace("'", "")[1:-1] - doorway_prompt = self.doorway_template.format(input=scene["query"], - rooms=room_types_str, - room_sizes=room_sizes_str, - room_pairs=room_pairs_str, - additional_requirements=additional_requirements_door) - + doorway_prompt = self.doorway_template.format( + input=scene["query"], + rooms=room_types_str, + room_sizes=room_sizes_str, + room_pairs=room_pairs_str, + additional_requirements=additional_requirements_door, + ) + # generate raw doorway plan if not exist - if "raw_doorway_plan" not in scene: raw_doorway_plan = self.llm(doorway_prompt) - else: raw_doorway_plan = scene["raw_doorway_plan"] + if "raw_doorway_plan" not in scene: + raw_doorway_plan = self.llm(doorway_prompt) + else: + raw_doorway_plan = scene["raw_doorway_plan"] print(f"\nUser: {doorway_prompt}\n") - print(f"{Fore.GREEN}AI: Here is the doorway plan:\n{raw_doorway_plan}{Fore.RESET}") + print( + f"{Fore.GREEN}AI: Here is the doorway plan:\n{raw_doorway_plan}{Fore.RESET}" + ) rooms = scene["rooms"] walls = scene["walls"] doors = [] open_room_pairs = [] plans = [plan.lower() for plan in raw_doorway_plan.split("\n") if "|" in plan] - room_types = [room["roomType"] for room in rooms] + ['exterior'] + room_types = [room["roomType"] for room in rooms] + ["exterior"] for i, plan in enumerate(plans): # TODO: rewrite the parsing logic current_door = copy.deepcopy(self.json_template) parsed_plan = self.parse_door_plan(plan) - if parsed_plan == None: continue + if parsed_plan == None: + continue - if parsed_plan["room_type0"] not in room_types or parsed_plan["room_type1"] not in room_types: - print(f"{Fore.RED}{parsed_plan['room_type0']} or {parsed_plan['room_type1']} not exist{Fore.RESET}") + if ( + parsed_plan["room_type0"] not in room_types + or parsed_plan["room_type1"] not in room_types + ): + print( + f"{Fore.RED}{parsed_plan['room_type0']} or {parsed_plan['room_type1']} not exist{Fore.RESET}" + ) continue current_door["room0"] = parsed_plan["room_type0"] current_door["room1"] = parsed_plan["room_type1"] - current_door["id"] = f"door|{i}|{parsed_plan['room_type0']}|{parsed_plan['room_type1']}" + current_door["id"] = ( + f"door|{i}|{parsed_plan['room_type0']}|{parsed_plan['room_type1']}" + ) if parsed_plan["connection_type"] == "open": - open_room_pairs.append((parsed_plan["room_type0"], parsed_plan["room_type1"])) + open_room_pairs.append( + (parsed_plan["room_type0"], parsed_plan["room_type1"]) + ) continue # get connection exterior = False - if parsed_plan["room_type0"] == "exterior" or parsed_plan["room_type1"] == "exterior": - connection = self.get_connection_exterior(parsed_plan["room_type0"], parsed_plan["room_type1"], walls) + if ( + parsed_plan["room_type0"] == "exterior" + or parsed_plan["room_type1"] == "exterior" + ): + connection = self.get_connection_exterior( + parsed_plan["room_type0"], parsed_plan["room_type1"], walls + ) exterior = True else: - connection = self.get_connection(parsed_plan["room_type0"], parsed_plan["room_type1"], walls) - - if connection == None: continue + connection = self.get_connection( + parsed_plan["room_type0"], parsed_plan["room_type1"], walls + ) + + if connection == None: + continue # get wall information current_door["wall0"] = connection["wall0"] current_door["wall1"] = connection["wall1"] # get door asset - if exterior: parsed_plan["connection_type"] = "doorway" # force to use doorway for exterior - door_id = self.select_door(parsed_plan["connection_type"], parsed_plan["size"], parsed_plan["style"]) + if exterior: + parsed_plan["connection_type"] = ( + "doorway" # force to use doorway for exterior + ) + door_id = self.select_door( + parsed_plan["connection_type"], + parsed_plan["size"], + parsed_plan["style"], + ) current_door["assetId"] = door_id - + if parsed_plan["connection_type"] == "doorway" and not exterior: current_door["openable"] = True current_door["openness"] = 1 - + # get polygon - door_dimension = self.door_data[door_id]['boundingBox'] - door_polygon = self.get_door_polygon(connection["segment"], door_dimension, parsed_plan["connection_type"]) + door_dimension = self.door_data[door_id]["boundingBox"] + door_polygon = self.get_door_polygon( + connection["segment"], door_dimension, parsed_plan["connection_type"] + ) if door_polygon != None: polygon, position, door_boxes, door_segment = door_polygon @@ -128,36 +192,44 @@ def generate_doors(self, scene, additional_requirements_door): current_door["doorBoxes"] = door_boxes current_door["doorSegment"] = door_segment doors.append(current_door) - + # check if there is any room has no door connected_rooms = [] for door in doors: connected_rooms.append(door["room0"]) connected_rooms.append(door["room1"]) - + for pair in open_room_pairs: connected_rooms.append(pair[0]) connected_rooms.append(pair[1]) - + unconnected_rooms = [] for room in rooms: - if room["roomType"] not in connected_rooms: unconnected_rooms.append(room["roomType"]) - + if room["roomType"] not in connected_rooms: + unconnected_rooms.append(room["roomType"]) + if len(unconnected_rooms) > 0: for room in unconnected_rooms: - if room in connected_rooms: continue + if room in connected_rooms: + continue current_door = copy.deepcopy(self.json_template) - current_walls = [wall for wall in walls if wall["roomId"] == room and "exterior" not in wall["id"] and len(wall["connected_rooms"]) != 0] + current_walls = [ + wall + for wall in walls + if wall["roomId"] == room + and "exterior" not in wall["id"] + and len(wall["connected_rooms"]) != 0 + ] widest_wall = max(current_walls, key=lambda x: x["width"]) - room_to_connect = widest_wall['connected_rooms'][0]['roomId'] + room_to_connect = widest_wall["connected_rooms"][0]["roomId"] current_door["room0"] = room current_door["room1"] = room_to_connect current_door["id"] = f"door|{i}|{room}|{room_to_connect}" - wall_to_connect = widest_wall['connected_rooms'][0]['wallId'] + wall_to_connect = widest_wall["connected_rooms"][0]["wallId"] current_door["wall0"] = widest_wall["id"] current_door["wall1"] = wall_to_connect @@ -166,10 +238,14 @@ def generate_doors(self, scene, additional_requirements_door): current_door["assetId"] = door_id # get polygon - door_dimension = self.door_data[door_id]['boundingBox'] - door_type = self.door_data[door_id]['type'] - - door_polygon = self.get_door_polygon(widest_wall["connected_rooms"][0]["intersection"], door_dimension, door_type) + door_dimension = self.door_data[door_id]["boundingBox"] + door_type = self.door_data[door_id]["type"] + + door_polygon = self.get_door_polygon( + widest_wall["connected_rooms"][0]["intersection"], + door_dimension, + door_type, + ) if door_polygon != None: polygon, position, door_boxes, door_segment = door_polygon @@ -181,14 +257,13 @@ def generate_doors(self, scene, additional_requirements_door): connected_rooms.append(room) connected_rooms.append(room_to_connect) - + return raw_doorway_plan, doors, room_pairs, open_room_pairs - def get_room(self, rooms, room_type): for room in rooms: - if room_type == room["roomType"]: return room - + if room_type == room["roomType"]: + return room def parse_door_plan(self, plan): try: @@ -198,19 +273,18 @@ def parse_door_plan(self, plan): "room_type1": room_type1.strip(), "connection_type": connection_type.strip(), "size": size.strip(), - "style": style.strip() + "style": style.strip(), } except: print(f"{Fore.RED}Invalid door plan:{Fore.RESET}", plan) return None - def get_door_polygon(self, segment, door_dimension, connection_type): door_width = door_dimension["x"] door_height = door_dimension["y"] - start = np.array([segment[0]['x'], segment[0]['z']]) - end = np.array([segment[1]['x'], segment[1]['z']]) + start = np.array([segment[0]["x"], segment[0]["z"]]) + end = np.array([segment[1]["x"], segment[1]["z"]]) original_vector = end - start original_length = np.linalg.norm(original_vector) @@ -219,23 +293,29 @@ def get_door_polygon(self, segment, door_dimension, connection_type): if door_width >= original_length: print(f"{Fore.RED}The wall is too narrow to install a door.{Fore.RESET}") return None - + else: door_start = random.uniform(0, original_length - door_width) door_end = door_start + door_width - polygon = [{"x": door_start, "y": 0, "z": 0}, - {"x": door_end, "y": door_height, "z": 0}] - - door_segment = [list(start + normalized_vector * door_start), list(start + normalized_vector * door_end)] + polygon = [ + {"x": door_start, "y": 0, "z": 0}, + {"x": door_end, "y": door_height, "z": 0}, + ] + + door_segment = [ + list(start + normalized_vector * door_start), + list(start + normalized_vector * door_end), + ] door_boxes = self.create_rectangles(door_segment, connection_type) - - position = {"x": (polygon[0]["x"] + polygon[1]["x"]) / 2, - "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, - "z": (polygon[0]["z"] + polygon[1]["z"]) / 2} - + + position = { + "x": (polygon[0]["x"] + polygon[1]["x"]) / 2, + "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, + "z": (polygon[0]["z"] + polygon[1]["z"]) / 2, + } + return polygon, position, door_boxes, door_segment - def get_connection(self, room0_id, room1_id, walls): room0_walls = [wall for wall in walls if wall["roomId"] == room0_id] @@ -245,41 +325,59 @@ def get_connection(self, room0_id, room1_id, walls): if len(connections) != 0: for connection in connections: if connection["roomId"] == room1_id: - valid_connections.append({"wall0": wall["id"], - "wall1": connection["wallId"], - "segment": connection["intersection"]}) - + valid_connections.append( + { + "wall0": wall["id"], + "wall1": connection["wallId"], + "segment": connection["intersection"], + } + ) + if len(valid_connections) == 0: - print(f"{Fore.RED}There is no wall between {room0_id} and {room1_id}{Fore.RESET}") + print( + f"{Fore.RED}There is no wall between {room0_id} and {room1_id}{Fore.RESET}" + ) return None elif len(valid_connections) == 1: connection = valid_connections[0] - - else: # handle the case when there are multiple ways - print(f"{Fore.RED}There are multiple ways between {room0_id} and {room1_id}{Fore.RESET}") + + else: # handle the case when there are multiple ways + print( + f"{Fore.RED}There are multiple ways between {room0_id} and {room1_id}{Fore.RESET}" + ) longest_segment_length = 0 connection = None for current_connection in valid_connections: current_segment = current_connection["segment"] - current_segment_length = np.linalg.norm(np.array([current_segment[0]["x"], current_segment[0]["z"]]) - np.array([current_segment[1]["x"], current_segment[1]["z"]])) + current_segment_length = np.linalg.norm( + np.array([current_segment[0]["x"], current_segment[0]["z"]]) + - np.array([current_segment[1]["x"], current_segment[1]["z"]]) + ) if current_segment_length > longest_segment_length: connection = current_connection longest_segment_length = current_segment_length return connection - def get_connection_exterior(self, room0_id, room1_id, walls): room_id = room0_id if room0_id != "exterior" else room1_id - interior_walls = [wall["id"] for wall in walls if wall["roomId"] == room_id and "exterior" not in wall["id"]] - exterior_walls = [wall["id"] for wall in walls if wall["roomId"] == room_id and "exterior" in wall["id"]] + interior_walls = [ + wall["id"] + for wall in walls + if wall["roomId"] == room_id and "exterior" not in wall["id"] + ] + exterior_walls = [ + wall["id"] + for wall in walls + if wall["roomId"] == room_id and "exterior" in wall["id"] + ] wall_pairs = [] for interior_wall in interior_walls: for exterior_wall in exterior_walls: if interior_wall in exterior_wall: wall_pairs.append({"wall0": exterior_wall, "wall1": interior_wall}) - + valid_connections = [] for wall_pair in wall_pairs: wall0 = wall_pair["wall0"] @@ -288,52 +386,70 @@ def get_connection_exterior(self, room0_id, room1_id, walls): if wall["id"] == wall0: wall0_segment = wall["segment"] break - segment = [{"x": wall0_segment[0][0], "y": 0.0, "z": wall0_segment[0][1]}, - {"x": wall0_segment[1][0], "y": 0.0, "z": wall0_segment[1][1]}] + segment = [ + {"x": wall0_segment[0][0], "y": 0.0, "z": wall0_segment[0][1]}, + {"x": wall0_segment[1][0], "y": 0.0, "z": wall0_segment[1][1]}, + ] - valid_connections.append({"wall0": wall0, "wall1": wall1, "segment": segment}) - - if len(valid_connections) == 0: return None + valid_connections.append( + {"wall0": wall0, "wall1": wall1, "segment": segment} + ) + + if len(valid_connections) == 0: + return None - elif len(valid_connections) == 1: return valid_connections[0] + elif len(valid_connections) == 1: + return valid_connections[0] else: - print(f"{Fore.RED}There are multiple ways between {room0_id} and {room1_id}{Fore.RESET}") + print( + f"{Fore.RED}There are multiple ways between {room0_id} and {room1_id}{Fore.RESET}" + ) longest_segment_length = 0 connection = None for current_connection in valid_connections: current_segment = current_connection["segment"] - current_segment_length = np.linalg.norm(np.array([current_segment[0]["x"], current_segment[0]["z"]]) - np.array([current_segment[1]["x"], current_segment[1]["z"]])) + current_segment_length = np.linalg.norm( + np.array([current_segment[0]["x"], current_segment[0]["z"]]) + - np.array([current_segment[1]["x"], current_segment[1]["z"]]) + ) if current_segment_length > longest_segment_length: connection = current_connection longest_segment_length = current_segment_length return connection - def select_door(self, door_type, door_size, query): with torch.no_grad(): - query_feature_clip = self.clip_model.encode_text(self.clip_tokenizer([query])) + query_feature_clip = self.clip_model.encode_text( + self.clip_tokenizer([query]) + ) query_feature_clip /= query_feature_clip.norm(dim=-1, keepdim=True) - + clip_similarity = query_feature_clip @ self.door_feature_clip.T sorted_indices = torch.argsort(clip_similarity, descending=True)[0] valid_door_ids = [] for ind in sorted_indices: door_id = self.door_ids[ind] - if self.door_data[door_id]["type"] == door_type and self.door_data[door_id]["size"] == door_size: + if ( + self.door_data[door_id]["type"] == door_type + and self.door_data[door_id]["size"] == door_size + ): valid_door_ids.append(door_id) top_door_id = valid_door_ids[0] - valid_door_ids = [door_id for door_id in valid_door_ids if door_id not in self.used_assets] - if len(valid_door_ids) == 0: valid_door_ids = [top_door_id] - + valid_door_ids = [ + door_id for door_id in valid_door_ids if door_id not in self.used_assets + ] + if len(valid_door_ids) == 0: + valid_door_ids = [top_door_id] + return valid_door_ids[0] - def create_rectangles(self, segment, connection_type): box_width = 1.0 - if connection_type == "doorframe": box_width = 1.0 + if connection_type == "doorframe": + box_width = 1.0 # Convert to numpy arrays for easier calculations pt1 = np.array(segment[0]) @@ -348,21 +464,37 @@ def create_rectangles(self, segment, connection_type): perp_vec *= box_width # Calculate the four points for each rectangle - top_rectangle = [list(pt1 + perp_vec), list(pt2 + perp_vec), list(pt2), list(pt1)] - bottom_rectangle = [list(pt1), list(pt2), list(pt2 - perp_vec), list(pt1 - perp_vec)] + top_rectangle = [ + list(pt1 + perp_vec), + list(pt2 + perp_vec), + list(pt2), + list(pt1), + ] + bottom_rectangle = [ + list(pt1), + list(pt2), + list(pt2 - perp_vec), + list(pt1 - perp_vec), + ] return top_rectangle, bottom_rectangle - - + def get_room_pairs_str(self, rooms, walls): - room_pairs = [(wall["roomId"], wall["connected_rooms"][0]["roomId"]) for wall in walls if len(wall["connected_rooms"]) == 1 and wall["width"] >= 2.0] + room_pairs = [ + (wall["roomId"], wall["connected_rooms"][0]["roomId"]) + for wall in walls + if len(wall["connected_rooms"]) == 1 and wall["width"] >= 2.0 + ] for wall in walls: if "exterior" in wall["id"]: room_pairs.append(("exterior", wall["roomId"])) room_pairs_no_dup = [] for pair in room_pairs: - if pair not in room_pairs_no_dup and (pair[1], pair[0]) not in room_pairs_no_dup: + if ( + pair not in room_pairs_no_dup + and (pair[1], pair[0]) not in room_pairs_no_dup + ): room_pairs_no_dup.append(pair) room_pairs_clean = [] @@ -370,12 +502,13 @@ def get_room_pairs_str(self, rooms, walls): for pair in room_pairs_no_dup: if pair[0] not in existed_rooms or pair[1] not in existed_rooms: room_pairs_clean.append(pair) - - if pair[0] not in existed_rooms: existed_rooms.append(pair[0]) - if pair[1] not in existed_rooms: existed_rooms.append(pair[1]) - return room_pairs_clean + if pair[0] not in existed_rooms: + existed_rooms.append(pair[0]) + if pair[1] not in existed_rooms: + existed_rooms.append(pair[1]) + return room_pairs_clean def get_room_size_str(self, scene): wall_height = scene["wall_height"] @@ -383,21 +516,31 @@ def get_room_size_str(self, scene): for room in scene["rooms"]: room_name = room["roomType"] room_size = self.get_room_size(room) - room_size_str += f"{room_name}: {room_size[0]} m x {room_size[1]} m x {wall_height} m\n" + room_size_str += ( + f"{room_name}: {room_size[0]} m x {room_size[1]} m x {wall_height} m\n" + ) return room_size_str - def get_room_size(self, room): floor_polygon = room["floorPolygon"] - x_values = [point['x'] for point in floor_polygon] - z_values = [point['z'] for point in floor_polygon] + x_values = [point["x"] for point in floor_polygon] + z_values = [point["z"] for point in floor_polygon] return (max(x_values) - min(x_values), max(z_values) - min(z_values)) - def get_random_door(self, wall_width): - single_doors = [door_id for door_id in self.door_ids if self.door_data[door_id]["size"] == "single"] - double_doors = [door_id for door_id in self.door_ids if self.door_data[door_id]["size"] == "double"] - - if wall_width < 2.0: return random.choice(single_doors) - else: return random.choice(double_doors+single_doors) \ No newline at end of file + single_doors = [ + door_id + for door_id in self.door_ids + if self.door_data[door_id]["size"] == "single" + ] + double_doors = [ + door_id + for door_id in self.door_ids + if self.door_data[door_id]["size"] == "double" + ] + + if wall_width < 2.0: + return random.choice(single_doors) + else: + return random.choice(double_doors + single_doors) diff --git a/modules/empty_house.json b/ai2holodeck/generation/empty_house.json similarity index 100% rename from modules/empty_house.json rename to ai2holodeck/generation/empty_house.json diff --git a/ai2holodeck/generation/floor_objects.py b/ai2holodeck/generation/floor_objects.py new file mode 100644 index 0000000..0532cc7 --- /dev/null +++ b/ai2holodeck/generation/floor_objects.py @@ -0,0 +1,1696 @@ +import copy +import datetime +import json +import math +import multiprocessing +import random +import re +import time + +import editdistance +import matplotlib.pyplot as plt +import numpy as np +from langchain import PromptTemplate, OpenAI +from rtree import index +from scipy.interpolate import interp1d +from shapely.geometry import Polygon, Point, box, LineString + +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.generation.milp_utils import * +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.utils import get_bbox_dims + + +class FloorObjectGenerator: + def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): + self.json_template = { + "assetId": None, + "id": None, + "kinematic": True, + "position": {}, + "rotation": {}, + "material": None, + "roomId": None, + } + self.llm = llm + self.object_retriever = object_retriever + self.database = object_retriever.database + self.constraint_prompt = PromptTemplate( + input_variables=["room_type", "room_size", "objects"], + template=prompts.object_constraints_prompt, + ) + self.baseline_prompt = PromptTemplate( + input_variables=["room_type", "room_size", "objects"], + template=prompts.floor_baseline_prompt, + ) + self.grid_density = 20 + self.add_window = False + self.size_buffer = 10 # add 10 cm buffer to object size + + self.constraint_type = "llm" + self.use_milp = False + self.multiprocessing = False + + def generate_objects(self, scene, use_constraint=True): + rooms = scene["rooms"] + doors = scene["doors"] + windows = scene["windows"] + open_walls = scene["open_walls"] + selected_objects = scene["selected_objects"] + results = [] + + packed_args = [ + (room, doors, windows, open_walls, selected_objects, use_constraint) + for room in rooms + ] + if self.multiprocessing: + pool = multiprocessing.Pool(processes=4) + all_placements = pool.map(self.generate_objects_per_room, packed_args) + pool.close() + pool.join() + else: + all_placements = [ + self.generate_objects_per_room(args) for args in packed_args + ] + + for placements in all_placements: + results += placements + + return results + + def generate_objects_per_room(self, args): + room, doors, windows, open_walls, selected_objects, use_constraint = args + + selected_floor_objects = selected_objects[room["roomType"]]["floor"] + object_name2id = { + object_name: asset_id for object_name, asset_id in selected_floor_objects + } + + room_id = room["id"] + room_type = room["roomType"] + room_x, room_z = self.get_room_size(room) + + room_size = f"{room_x} cm x {room_z} cm" + grid_size = max(room_x // self.grid_density, room_z // self.grid_density) + + object_names = list(object_name2id.keys()) + + if use_constraint: + # get constraints + constraint_prompt = self.constraint_prompt.format( + room_type=room_type, + room_size=room_size, + objects=", ".join(object_names), + ) + + if self.constraint_type == "llm": + constraint_plan = self.llm(constraint_prompt) + elif self.constraint_type in ["middle", "edge"]: + constraint_plan = "" + for object_name in object_names: + constraint_plan += f"{object_name} | {self.constraint_type}\n" + else: + print("Error: constraint type not supported!") + + print(f"plan for {room_type}: {constraint_plan}") + constraints = self.parse_constraints(constraint_plan, object_names) + + # get objects list + object2dimension = { + object_name: get_bbox_dims(self.database[object_id]) + for object_name, object_id in object_name2id.items() + } + + objects_list = [ + ( + object_name, + ( + object2dimension[object_name]["x"] * 100 + self.size_buffer, + object2dimension[object_name]["z"] * 100 + self.size_buffer, + ), + ) + for object_name in constraints + ] + + # get initial state + room_vertices = [(x * 100, y * 100) for (x, y) in room["vertices"]] + room_poly = Polygon(room_vertices) + initial_state = self.get_door_window_placements( + doors, windows, room_vertices, open_walls, self.add_window + ) + + # solve + solver = DFS_Solver_Floor( + grid_size=grid_size, max_duration=30, constraint_bouns=1 + ) + solution = solver.get_solution( + room_poly, + objects_list, + constraints, + initial_state, + use_milp=self.use_milp, + ) + placements = self.solution2placement(solution, object_name2id, room_id) + else: + object_information = "" + for object_name in object_names: + object_id = object_name2id[object_name] + dimension = get_bbox_dims(self.database[object_name2id[object_name]]) + size_x = int(dimension["x"] * 100) + size_z = int(dimension["z"] * 100) + object_information += f"{object_name}: {size_x} cm x {size_z} cm\n" + + baseline_prompt = self.baseline_prompt.format( + room_type=room_type, + room_size=room_size, + objects=", ".join(object_names), + ) + room_origin = [ + min(v[0] for v in room["vertices"]), + min(v[1] for v in room["vertices"]), + ] + all_is_placed = False + while not all_is_placed: + completion_text = self.llm(baseline_prompt) + try: + completion_text = re.findall( + r"```(.*?)```", completion_text, re.DOTALL + )[0] + completion_text = re.sub( + r"^json", "", completion_text, flags=re.MULTILINE + ) + all_data = json.loads(completion_text) + except json.JSONDecodeError: + continue + print(f"completion text for {room_type}: {completion_text}") + placements = list() + all_is_placed = True + for data in all_data: + object_name = data["object_name"] + try: + object_id = object_name2id[object_name] + except KeyError: + all_is_placed = False + break + + dimension = get_bbox_dims( + self.database[object_name2id[object_name]] + ) + placement = self.json_template.copy() + placement["id"] = f"{object_name} ({room_id})" + placement["object_name"] = object_name + placement["assetId"] = object_id + placement["roomId"] = room_id + placement["position"] = { + "x": room_origin[0] + (data["position"]["X"] / 100), + "y": dimension["y"] / 2, + "z": room_origin[1] + (data["position"]["Y"] / 100), + } + placement["rotation"] = {"x": 0, "y": data["rotation"], "z": 0} + placements.append(placement) + break # only one iteration + + return placements + + def get_door_window_placements( + self, doors, windows, room_vertices, open_walls, add_window=True + ): + room_poly = Polygon(room_vertices) + door_window_placements = {} + i = 0 + for door in doors: + door_boxes = door["doorBoxes"] + for door_box in door_boxes: + door_vertices = [(x * 100, z * 100) for (x, z) in door_box] + door_poly = Polygon(door_vertices) + door_center = door_poly.centroid + if room_poly.contains(door_center): + door_window_placements[f"door-{i}"] = ( + (door_center.x, door_center.y), + 0, + door_vertices, + 1, + ) + i += 1 + + if add_window: + for window in windows: + window_boxes = window["windowBoxes"] + for window_box in window_boxes: + window_vertices = [(x * 100, z * 100) for (x, z) in window_box] + window_poly = Polygon(window_vertices) + window_center = window_poly.centroid + if room_poly.contains(window_center): + door_window_placements[f"window-{i}"] = ( + (window_center.x, window_center.y), + 0, + window_vertices, + 1, + ) + i += 1 + + if open_walls != []: + for open_wall_box in open_walls["openWallBoxes"]: + open_wall_vertices = [(x * 100, z * 100) for (x, z) in open_wall_box] + open_wall_poly = Polygon(open_wall_vertices) + open_wall_center = open_wall_poly.centroid + if room_poly.contains(open_wall_center): + door_window_placements[f"open-{i}"] = ( + (open_wall_center.x, open_wall_center.y), + 0, + open_wall_vertices, + 1, + ) + i += 1 + + return door_window_placements + + def get_room_size(self, room): + floor_polygon = room["floorPolygon"] + x_values = [point["x"] for point in floor_polygon] + z_values = [point["z"] for point in floor_polygon] + return ( + int(max(x_values) - min(x_values)) * 100, + int(max(z_values) - min(z_values)) * 100, + ) + + def solution2placement(self, solutions, object_name2id, room_id): + placements = [] + for object_name, solution in solutions.items(): + if ( + "door" in object_name + or "window" in object_name + or "open" in object_name + ): + continue + dimension = get_bbox_dims(self.database[object_name2id[object_name]]) + placement = self.json_template.copy() + placement["assetId"] = object_name2id[object_name] + placement["id"] = f"{object_name} ({room_id})" + placement["position"] = { + "x": solution[0][0] / 100, + "y": dimension["y"] / 2, + "z": solution[0][1] / 100, + } + placement["rotation"] = {"x": 0, "y": solution[1], "z": 0} + placement["roomId"] = room_id + placement["vertices"] = list(solution[2]) + placement["object_name"] = object_name + placements.append(placement) + return placements + + def parse_constraints(self, constraint_text, object_names): + constraint_name2type = { + "edge": "global", + "middle": "global", + "in front of": "relative", + "behind": "relative", + "left of": "relative", + "right of": "relative", + "side of": "relative", + "around": "relative", + "face to": "direction", + "face same as": "direction", + "aligned": "alignment", + "center alignment": "alignment", + "center aligned": "alignment", + "aligned center": "alignment", + "edge alignment": "alignment", + "near": "distance", + "far": "distance", + } + + object2constraints = {} + plans = [plan.lower() for plan in constraint_text.split("\n") if "|" in plan] + + for plan in plans: + # remove index + pattern = re.compile(r"^(\d+[\.\)]\s*|- )") + plan = pattern.sub("", plan) + if plan[-1] == ".": + plan = plan[:-1] + + object_name = ( + plan.split("|")[0].replace("*", "").strip() + ) # remove * in object name + + if object_name not in object_names: + continue + + object2constraints[object_name] = [] + + constraints = plan.split("|")[1:] + for constraint in constraints: + constraint = constraint.strip() + constraint_name = constraint.split(",")[0].strip() + + if constraint_name == "n/a": + continue + + try: + constraint_type = constraint_name2type[constraint_name] + except: + _, new_constraint_name = min( + [ + (editdistance.eval(cn, constraint_name), cn) + for cn in constraint_name2type + ] + ) + print( + f"constraint type {constraint_name} not found, using {new_constraint_name} instead." + ) + constraint_name = new_constraint_name + constraint_type = constraint_name2type[constraint_name] + + if constraint_type == "global": + object2constraints[object_name].append( + {"type": constraint_type, "constraint": constraint_name} + ) + elif constraint_type in [ + "relative", + "direction", + "alignment", + "distance", + ]: + try: + target = constraint.split(",")[1].strip() + except: + print(f"wrong format of constraint: {constraint}") + continue + + if target in object2constraints: + if constraint_name == "around": + object2constraints[object_name].append( + { + "type": "distance", + "constraint": "near", + "target": target, + } + ) + object2constraints[object_name].append( + { + "type": "direction", + "constraint": "face to", + "target": target, + } + ) + elif constraint_name == "in front of": + object2constraints[object_name].append( + { + "type": "relative", + "constraint": "in front of", + "target": target, + } + ) + object2constraints[object_name].append( + { + "type": "alignment", + "constraint": "center aligned", + "target": target, + } + ) + else: + object2constraints[object_name].append( + { + "type": constraint_type, + "constraint": constraint_name, + "target": target, + } + ) + else: + print( + f"target object {target} not found in the existing constraint plan" + ) + continue + else: + print(f"constraint type {constraint_type} not found") + continue + + # clean the constraints + object2constraints_cleaned = {} + for object_name, constraints in object2constraints.items(): + constraints_cleaned = [] + constraint_types = [] + for constraint in constraints: + if constraint["type"] not in constraint_types: + constraint_types.append(constraint["type"]) + constraints_cleaned.append(constraint) + object2constraints_cleaned[object_name] = constraints_cleaned + + return object2constraints + + def order_objects_by_size(self, selected_floor_objects): + ordered_floor_objects = [] + for object_name, asset_id in selected_floor_objects: + dimensions = get_bbox_dims(self.database[asset_id]) + size = dimensions["x"] * dimensions["z"] + ordered_floor_objects.append([object_name, asset_id, size]) + ordered_floor_objects.sort(key=lambda x: x[2], reverse=True) + ordered_floor_objects_no_size = [ + [object_name, asset_id] + for object_name, asset_id, size in ordered_floor_objects + ] + return ordered_floor_objects_no_size + + +class SolutionFound(Exception): + def __init__(self, solution): + self.solution = solution + + +class DFS_Solver_Floor: + def __init__(self, grid_size, random_seed=0, max_duration=5, constraint_bouns=0.2): + self.grid_size = grid_size + self.random_seed = random_seed + self.max_duration = max_duration # maximum allowed time in seconds + self.constraint_bouns = constraint_bouns + self.start_time = None + self.solutions = [] + self.vistualize = False + + # Define the functions in a dictionary to avoid if-else conditions + self.func_dict = { + "global": {"edge": self.place_edge}, + "relative": self.place_relative, + "direction": self.place_face, + "alignment": self.place_alignment_center, + "distance": self.place_distance, + } + + self.constraint_type2weight = { + "global": 1.0, + "relative": 0.5, + "direction": 0.5, + "alignment": 0.5, + "distance": 1.8, + } + + self.edge_bouns = 0.0 # worth more than one constraint + + def get_solution( + self, bounds, objects_list, constraints, initial_state, use_milp=False + ): + self.start_time = time.time() + if use_milp: + # iterate through the constraints list + # for each constraint type "distance", add the same constraint to the target object + new_constraints = constraints.copy() + for object_name, object_constraints in constraints.items(): + for constraint in object_constraints: + if constraint["type"] == "distance": + target_object_name = constraint["target"] + if target_object_name in constraints.keys(): + # if there is already a distance constraint of target object_name, continue + if any( + constraint["type"] == "distance" + and constraint["target"] == object_name + for constraint in constraints[target_object_name] + ): + continue + new_constraint = constraint.copy() + new_constraint["target"] = object_name + new_constraints[target_object_name].append(new_constraint) + # iterate through the constraints list + # for each constraint type "left of" or "right of", add the same constraint to the target object + # for object_name, object_constraints in constraints.items(): + # for constraint in object_constraints: if constraint["type"] == "relative": + # if constraint["constraint"] == "left of": + constraints = new_constraints + + try: + self.milp_dfs(bounds, objects_list, constraints, initial_state, 10) + except SolutionFound as e: + print(f"Time taken: {time.time() - self.start_time}") + + else: + grid_points = self.create_grids(bounds) + grid_points = self.remove_points(grid_points, initial_state) + try: + self.dfs( + bounds, objects_list, constraints, grid_points, initial_state, 30 + ) + except SolutionFound as e: + print(f"Time taken: {time.time() - self.start_time}") + + print(f"Number of solutions found: {len(self.solutions)}") + max_solution = self.get_max_solution(self.solutions) + + if not use_milp and self.vistualize: + self.visualize_grid(bounds, grid_points, max_solution) + + return max_solution + + def get_max_solution(self, solutions): + path_weights = [] + for solution in solutions: + path_weights.append(sum([obj[-1] for obj in solution.values()])) + max_index = np.argmax(path_weights) + return solutions[max_index] + + def dfs( + self, + room_poly, + objects_list, + constraints, + grid_points, + placed_objects, + branch_factor, + ): + if len(objects_list) == 0: + self.solutions.append(placed_objects) + return placed_objects + + if time.time() - self.start_time > self.max_duration: + print(f"Time limit reached.") + raise SolutionFound(self.solutions) + + object_name, object_dim = objects_list[0] + placements = self.get_possible_placements( + room_poly, object_dim, constraints[object_name], grid_points, placed_objects + ) + + if len(placements) == 0 and len(placed_objects) != 0: + self.solutions.append(placed_objects) + + paths = [] + if branch_factor > 1: + random.shuffle(placements) # shuffle the placements of the first object + + for placement in placements[:branch_factor]: + placed_objects_updated = copy.deepcopy(placed_objects) + placed_objects_updated[object_name] = placement + grid_points_updated = self.remove_points( + grid_points, placed_objects_updated + ) + + sub_paths = self.dfs( + room_poly, + objects_list[1:], + constraints, + grid_points_updated, + placed_objects_updated, + 1, + ) + paths.extend(sub_paths) + + return paths + + def get_possible_placements( + self, room_poly, object_dim, constraints, grid_points, placed_objects + ): + solutions = self.filter_collision( + placed_objects, self.get_all_solutions(room_poly, grid_points, object_dim) + ) + solutions = self.filter_facing_wall(room_poly, solutions, object_dim) + edge_solutions = self.place_edge( + room_poly, copy.deepcopy(solutions), object_dim + ) + + if len(edge_solutions) == 0: + return edge_solutions + + global_constraint = next( + ( + constraint + for constraint in constraints + if constraint["type"] == "global" + ), + None, + ) + + if global_constraint is None: + global_constraint = {"type": "global", "constraint": "edge"} + + if global_constraint["constraint"] == "edge": + candidate_solutions = copy.deepcopy( + edge_solutions + ) # edge is hard constraint + else: + if len(constraints) > 1: + candidate_solutions = ( + solutions + edge_solutions + ) # edge is soft constraint + else: + candidate_solutions = copy.deepcopy(solutions) # the first object + + candidate_solutions = self.filter_collision( + placed_objects, candidate_solutions + ) # filter again after global constraint + + if candidate_solutions == []: + return candidate_solutions + random.shuffle(candidate_solutions) + placement2score = { + tuple(solution[:3]): solution[-1] for solution in candidate_solutions + } + + # add a bias to edge solutions + for solution in candidate_solutions: + if solution in edge_solutions and len(constraints) >= 1: + placement2score[tuple(solution[:3])] += self.edge_bouns + + for constraint in constraints: + if "target" not in constraint: + continue + + func = self.func_dict.get(constraint["type"]) + valid_solutions = func( + constraint["constraint"], + placed_objects[constraint["target"]], + candidate_solutions, + ) + + weight = self.constraint_type2weight[constraint["type"]] + if constraint["type"] == "distance": + for solution in valid_solutions: + bouns = solution[-1] + placement2score[tuple(solution[:3])] += bouns * weight + else: + for solution in valid_solutions: + placement2score[tuple(solution[:3])] += ( + self.constraint_bouns * weight + ) + + # normalize the scores + for placement in placement2score: + placement2score[placement] /= max(len(constraints), 1) + + sorted_placements = sorted( + placement2score, key=placement2score.get, reverse=True + ) + sorted_solutions = [ + list(placement) + [placement2score[placement]] + for placement in sorted_placements + ] + + return sorted_solutions + + def create_grids(self, room_poly): + # get the min and max bounds of the room + min_x, min_z, max_x, max_z = room_poly.bounds + + # create grid points + grid_points = [] + for x in range(int(min_x), int(max_x), self.grid_size): + for y in range(int(min_z), int(max_z), self.grid_size): + point = Point(x, y) + if room_poly.contains(point): + grid_points.append((x, y)) + + return grid_points + + def remove_points(self, grid_points, objects_dict): + # Create an r-tree index + idx = index.Index() + + # Populate the index with bounding boxes of the objects + for i, (_, _, obj, _) in enumerate(objects_dict.values()): + idx.insert(i, Polygon(obj).bounds) + + # Create Shapely Polygon objects only once + polygons = [Polygon(obj) for _, _, obj, _ in objects_dict.values()] + + valid_points = [] + + for point in grid_points: + p = Point(point) + # Get a list of potential candidates + candidates = [polygons[i] for i in idx.intersection(p.bounds)] + # Check if point is in any of the candidate polygons + if not any(candidate.contains(p) for candidate in candidates): + valid_points.append(point) + + return valid_points + + def get_all_solutions(self, room_poly, grid_points, object_dim): + obj_length, obj_width = object_dim + obj_half_length, obj_half_width = obj_length / 2, obj_width / 2 + + rotation_adjustments = { + 0: ((-obj_half_length, -obj_half_width), (obj_half_length, obj_half_width)), + 90: ( + (-obj_half_width, -obj_half_length), + (obj_half_width, obj_half_length), + ), + 180: ( + (-obj_half_length, obj_half_width), + (obj_half_length, -obj_half_width), + ), + 270: ( + (obj_half_width, -obj_half_length), + (-obj_half_width, obj_half_length), + ), + } + + solutions = [] + for rotation in [0, 90, 180, 270]: + for point in grid_points: + center_x, center_y = point + lower_left_adjustment, upper_right_adjustment = rotation_adjustments[ + rotation + ] + lower_left = ( + center_x + lower_left_adjustment[0], + center_y + lower_left_adjustment[1], + ) + upper_right = ( + center_x + upper_right_adjustment[0], + center_y + upper_right_adjustment[1], + ) + obj_box = box(*lower_left, *upper_right) + + if room_poly.contains(obj_box): + solutions.append( + [point, rotation, tuple(obj_box.exterior.coords[:]), 1] + ) + + return solutions + + def filter_collision(self, objects_dict, solutions): + valid_solutions = [] + object_polygons = [ + Polygon(obj_coords) for _, _, obj_coords, _ in list(objects_dict.values()) + ] + for solution in solutions: + sol_obj_coords = solution[2] + sol_obj = Polygon(sol_obj_coords) + if not any(sol_obj.intersects(obj) for obj in object_polygons): + valid_solutions.append(solution) + return valid_solutions + + def filter_facing_wall(self, room_poly, solutions, obj_dim): + valid_solutions = [] + obj_width = obj_dim[1] + obj_half_width = obj_width / 2 + + front_center_adjustments = { + 0: (0, obj_half_width), + 90: (obj_half_width, 0), + 180: (0, -obj_half_width), + 270: (-obj_half_width, 0), + } + + valid_solutions = [] + for solution in solutions: + center_x, center_y = solution[0] + rotation = solution[1] + + front_center_adjustment = front_center_adjustments[rotation] + front_center_x, front_center_y = ( + center_x + front_center_adjustment[0], + center_y + front_center_adjustment[1], + ) + + front_center_distance = room_poly.boundary.distance( + Point(front_center_x, front_center_y) + ) + + if front_center_distance >= 30: # TODO: make this a parameter + valid_solutions.append(solution) + + return valid_solutions + + def place_edge(self, room_poly, solutions, obj_dim): + valid_solutions = [] + obj_width = obj_dim[1] + obj_half_width = obj_width / 2 + + back_center_adjustments = { + 0: (0, -obj_half_width), + 90: (-obj_half_width, 0), + 180: (0, obj_half_width), + 270: (obj_half_width, 0), + } + + for solution in solutions: + center_x, center_y = solution[0] + rotation = solution[1] + + back_center_adjustment = back_center_adjustments[rotation] + back_center_x, back_center_y = ( + center_x + back_center_adjustment[0], + center_y + back_center_adjustment[1], + ) + + back_center_distance = room_poly.boundary.distance( + Point(back_center_x, back_center_y) + ) + center_distance = room_poly.boundary.distance(Point(center_x, center_y)) + + if ( + back_center_distance <= self.grid_size + and back_center_distance < center_distance + ): + solution[-1] += self.constraint_bouns + # valid_solutions.append(solution) # those are still valid solutions, but we need to move the object to the edge + + # move the object to the edge + center2back_vector = np.array( + [back_center_x - center_x, back_center_y - center_y] + ) + center2back_vector /= np.linalg.norm(center2back_vector) + offset = center2back_vector * ( + back_center_distance + 4.5 + ) # add a small distance to avoid the object cross the wall + solution[0] = (center_x + offset[0], center_y + offset[1]) + solution[2] = ( + (solution[2][0][0] + offset[0], solution[2][0][1] + offset[1]), + (solution[2][1][0] + offset[0], solution[2][1][1] + offset[1]), + (solution[2][2][0] + offset[0], solution[2][2][1] + offset[1]), + (solution[2][3][0] + offset[0], solution[2][3][1] + offset[1]), + ) + valid_solutions.append(solution) + + return valid_solutions + + def place_corner(self, room_poly, solutions, obj_dim): + obj_length, obj_width = obj_dim + obj_half_length, _ = obj_length / 2, obj_width / 2 + + rotation_center_adjustments = { + 0: ((-obj_half_length, 0), (obj_half_length, 0)), + 90: ((0, obj_half_length), (0, -obj_half_length)), + 180: ((obj_half_length, 0), (-obj_half_length, 0)), + 270: ((0, -obj_half_length), (0, obj_half_length)), + } + + edge_solutions = self.place_edge(room_poly, solutions, obj_dim) + + valid_solutions = [] + + for solution in edge_solutions: + (center_x, center_y), rotation = solution[:2] + (dx_left, dy_left), (dx_right, dy_right) = rotation_center_adjustments[ + rotation + ] + + left_center_x, left_center_y = center_x + dx_left, center_y + dy_left + right_center_x, right_center_y = center_x + dx_right, center_y + dy_right + + left_center_distance = room_poly.boundary.distance( + Point(left_center_x, left_center_y) + ) + right_center_distance = room_poly.boundary.distance( + Point(right_center_x, right_center_y) + ) + + if min(left_center_distance, right_center_distance) < self.grid_size: + solution[-1] += self.constraint_bouns + valid_solutions.append(solution) + + return valid_solutions + + def place_relative(self, place_type, target_object, solutions): + valid_solutions = [] + _, target_rotation, target_coords, _ = target_object + target_polygon = Polygon(target_coords) + + min_x, min_y, max_x, max_y = target_polygon.bounds + mean_x = (min_x + max_x) / 2 + mean_y = (min_y + max_y) / 2 + + comparison_dict = { + "left of": { + 0: lambda sol_center: sol_center[0] < min_x + and min_y <= sol_center[1] <= max_y, + 90: lambda sol_center: sol_center[1] > max_y + and min_x <= sol_center[0] <= max_x, + 180: lambda sol_center: sol_center[0] > max_x + and min_y <= sol_center[1] <= max_y, + 270: lambda sol_center: sol_center[1] < min_y + and min_x <= sol_center[0] <= max_x, + }, + "right of": { + 0: lambda sol_center: sol_center[0] > max_x + and min_y <= sol_center[1] <= max_y, + 90: lambda sol_center: sol_center[1] < min_y + and min_x <= sol_center[0] <= max_x, + 180: lambda sol_center: sol_center[0] < min_x + and min_y <= sol_center[1] <= max_y, + 270: lambda sol_center: sol_center[1] > max_y + and min_x <= sol_center[0] <= max_x, + }, + "in front of": { + 0: lambda sol_center: sol_center[1] > max_y + and mean_x - self.grid_size + < sol_center[0] + < mean_x + self.grid_size, # in front of and centered + 90: lambda sol_center: sol_center[0] > max_x + and mean_y - self.grid_size < sol_center[1] < mean_y + self.grid_size, + 180: lambda sol_center: sol_center[1] < min_y + and mean_x - self.grid_size < sol_center[0] < mean_x + self.grid_size, + 270: lambda sol_center: sol_center[0] < min_x + and mean_y - self.grid_size < sol_center[1] < mean_y + self.grid_size, + }, + "behind": { + 0: lambda sol_center: sol_center[1] < min_y + and min_x <= sol_center[0] <= max_x, + 90: lambda sol_center: sol_center[0] < min_x + and min_y <= sol_center[1] <= max_y, + 180: lambda sol_center: sol_center[1] > max_y + and min_x <= sol_center[0] <= max_x, + 270: lambda sol_center: sol_center[0] > max_x + and min_y <= sol_center[1] <= max_y, + }, + "side of": { + 0: lambda sol_center: min_y <= sol_center[1] <= max_y, + 90: lambda sol_center: min_x <= sol_center[0] <= max_x, + 180: lambda sol_center: min_y <= sol_center[1] <= max_y, + 270: lambda sol_center: min_x <= sol_center[0] <= max_x, + }, + } + + compare_func = comparison_dict.get(place_type).get(target_rotation) + + for solution in solutions: + sol_center = solution[0] + + if compare_func(sol_center): + solution[-1] += self.constraint_bouns + valid_solutions.append(solution) + + return valid_solutions + + def place_distance(self, distance_type, target_object, solutions): + target_coords = target_object[2] + target_poly = Polygon(target_coords) + distances = [] + valid_solutions = [] + for solution in solutions: + sol_coords = solution[2] + sol_poly = Polygon(sol_coords) + distance = target_poly.distance(sol_poly) + distances.append(distance) + + solution[-1] = distance + valid_solutions.append(solution) + + min_distance = min(distances) + max_distance = max(distances) + + if distance_type == "near": + if min_distance < 80: + points = [(min_distance, 1), (80, 0), (max_distance, 0)] + else: + points = [(min_distance, 0), (max_distance, 0)] + + elif distance_type == "far": + points = [(min_distance, 0), (max_distance, 1)] + + x = [point[0] for point in points] + y = [point[1] for point in points] + + f = interp1d(x, y, kind="linear", fill_value="extrapolate") + + for solution in valid_solutions: + distance = solution[-1] + solution[-1] = float(f(distance)) + + return valid_solutions + + def place_face(self, face_type, target_object, solutions): + if face_type == "face to": + return self.place_face_to(target_object, solutions) + + elif face_type == "face same as": + return self.place_face_same(target_object, solutions) + + elif face_type == "face opposite to": + return self.place_face_opposite(target_object, solutions) + + def place_face_to(self, target_object, solutions): + # Define unit vectors for each rotation + unit_vectors = { + 0: np.array([0.0, 1.0]), # Facing up + 90: np.array([1.0, 0.0]), # Facing right + 180: np.array([0.0, -1.0]), # Facing down + 270: np.array([-1.0, 0.0]), # Facing left + } + + target_coords = target_object[2] + target_poly = Polygon(target_coords) + + valid_solutions = [] + + for solution in solutions: + sol_center = solution[0] + sol_rotation = solution[1] + + # Define an arbitrarily large point in the direction of the solution's rotation + far_point = sol_center + 1e6 * unit_vectors[sol_rotation] + + # Create a half-line from the solution's center to the far point + half_line = LineString([sol_center, far_point]) + + # Check if the half-line intersects with the target polygon + if half_line.intersects(target_poly): + solution[-1] += self.constraint_bouns + valid_solutions.append(solution) + + return valid_solutions + + def place_face_same(self, target_object, solutions): + target_rotation = target_object[1] + valid_solutions = [] + + for solution in solutions: + sol_rotation = solution[1] + if sol_rotation == target_rotation: + solution[-1] += self.constraint_bouns + valid_solutions.append(solution) + + return valid_solutions + + def place_face_opposite(self, target_object, solutions): + target_rotation = (target_object[1] + 180) % 360 + valid_solutions = [] + + for solution in solutions: + sol_rotation = solution[1] + if sol_rotation == target_rotation: + solution[-1] += self.constraint_bouns + valid_solutions.append(solution) + + return valid_solutions + + def place_alignment_center(self, alignment_type, target_object, solutions): + target_center = target_object[0] + valid_solutions = [] + eps = 5 + for solution in solutions: + sol_center = solution[0] + if ( + abs(sol_center[0] - target_center[0]) < eps + or abs(sol_center[1] - target_center[1]) < eps + ): + solution[-1] += self.constraint_bouns + valid_solutions.append(solution) + return valid_solutions + + def visualize_grid(self, room_poly, grid_points, solutions): + plt.rcParams["font.family"] = "Times New Roman" + plt.rcParams["font.size"] = 22 + + # create a new figure + fig, ax = plt.subplots() + + # draw the room + x, y = room_poly.exterior.xy + ax.plot(x, y, "-", label="Room", color="black", linewidth=2) + + # draw the grid points + grid_x = [point[0] for point in grid_points] + grid_y = [point[1] for point in grid_points] + ax.plot(grid_x, grid_y, "o", markersize=2, color="grey") + + # draw the solutions + for object_name, solution in solutions.items(): + center, rotation, box_coords = solution[:3] + center_x, center_y = center + + # create a polygon for the solution + obj_poly = Polygon(box_coords) + x, y = obj_poly.exterior.xy + ax.plot(x, y, "-", linewidth=2, color="black") + + # ax.text(center_x, center_y, object_name, fontsize=18, ha='center') + + # set arrow direction based on rotation + if rotation == 0: + ax.arrow(center_x, center_y, 0, 25, head_width=10, fc="black") + elif rotation == 90: + ax.arrow(center_x, center_y, 25, 0, head_width=10, fc="black") + elif rotation == 180: + ax.arrow(center_x, center_y, 0, -25, head_width=10, fc="black") + elif rotation == 270: + ax.arrow(center_x, center_y, -25, 0, head_width=10, fc="black") + # axis off + ax.axis("off") + ax.set_aspect("equal", "box") # to keep the ratios equal along x and y axis + create_time = ( + str(datetime.datetime.now()) + .replace(" ", "-") + .replace(":", "-") + .replace(".", "-") + ) + plt.savefig(f"{create_time}.pdf", bbox_inches="tight", dpi=300) + plt.show() + + def milp_dfs( + self, room_poly, all_objects_list, constraints, placed_objects, branch_factor=1 + ): + if len(all_objects_list) == 0: + self.solutions.append(placed_objects) + return placed_objects + + if time.time() - self.start_time > self.max_duration: + print(f"Time limit reached.") + raise SolutionFound(self.solutions) + + def milp_solve(soft_constraints_list, hard_constraints_list, verbose=False): + problem = cp.Problem( + cp.Maximize(sum(soft_constraints_list)), hard_constraints_list + ) + if verbose: + print("solving milp using GUROBI ...") + problem.solve(solver=cp.GUROBI, reoptimize=True, verbose=False) + return problem.value + + def parse_object_properties(object_properties): + x, y = object_properties[0] + rotation = int(object_properties[1] or 0) + # set rotation to the closest 90 degree + rotation = int(round(rotation / 90) * 90) + assert rotation in [0, 90, 180, 270] + object_bbox = object_properties[2] + min_x = min([point[0] for point in object_bbox]) + max_x = max([point[0] for point in object_bbox]) + min_y = min([point[1] for point in object_bbox]) + max_y = max([point[1] for point in object_bbox]) + object_dim = ( + (max_x - min_x, max_y - min_y) + if rotation == 0 or rotation == 180 + else (max_y - min_y, max_x - min_x) + ) + return x, y, rotation, object_dim + + def find_object_dim(target_object_name, objects_list, placed_objects): + target_object_dim = None + for object_name_1, object_dim_1 in objects_list: + if object_name_1 == target_object_name: + target_object_dim = object_dim_1 + return target_object_dim + + if not None: + for object_name_1, object_properties in placed_objects.items(): + if object_name_1 == target_object_name: + x, y, rotation, target_object_dim = parse_object_properties( + object_properties + ) + return target_object_dim + return None + + found_a_solution = False + # randomly select a set of objects from all_objects_list + # start with the largest object + more objects --> gradually reduce the number of objects + for branch_idx in range(branch_factor): + # sample a set of objects from a list that contains the first object + + k = random.randint(0, min(5, len(all_objects_list) - 1)) + objects_list = [all_objects_list[0]] + random.sample( + all_objects_list[1:], k + ) + + hard_constraints_list = [] + soft_constraints_list = [0] + + # formulate the milp problem + # object_name, object_dim = objects_list[0] + # x, y, rotate_180, rotate_90 + variables_dict = { + object[0]: [ + cp.Variable(), + cp.Variable(), + cp.Variable(boolean=True), + cp.Variable(boolean=True), + ] + for object in objects_list + } + # add placed objects into variables dict even though they are not variables + for object, object_properties in placed_objects.items(): + x, y = object_properties[0] + rotation = int(object_properties[1]) + variables_dict[object] = [ + x, + y, + rotation == 180, + rotation == 90 or rotation == 270, + ] + + # Initialize a list of variables, each variable represents the coordinate for each object + room_min_x, room_min_y, room_max_x, room_max_y = room_poly.bounds + # Add boundary constraints to all objects + for object_name, object_dim in objects_list: + hard_constraints_list.extend( + create_boundary_constraints( + variables_dict[object_name], + object_dim, + (room_min_x, room_min_y, room_max_x, room_max_y), + ) + ) + # Add pariwise collision constraints + for object_name_1, object_dim_1 in objects_list: + for object_name_2, object_dim_2 in objects_list: + if object_name_1 == object_name_2: + continue + # collision constraints should be hard constraints + hard_constraints_list.extend( + create_nooverlap_constraints( + variables_dict[object_name_1], + variables_dict[object_name_2], + object_dim_1, + object_dim_2, + ) + ) + + # Add pariwise collision constraints with placed objects + for object_name_1, object_dim_1 in objects_list: + for object_name_2, object_properties_2 in placed_objects.items(): + # bbox is a list of four points + x, y, rotation, object_dim_2 = parse_object_properties( + object_properties_2 + ) + + hard_constraints_list.extend( + create_nooverlap_constraints( + variables_dict[object_name_1], + [x, y, rotation == 180, rotation == 90 or rotation == 270], + object_dim_1, + object_dim_2, + ) + ) + + # default constraints / heuristics? + for object_name, object_dim in objects_list: + # encourage dispersement of assets + all_other_objects_list = [ + x[0] for x in objects_list if x[0] != object_name + ] + list(placed_objects.keys()) + for target_object_name in all_other_objects_list: + hard_constraints, soft_constraints = create_distance_constraints( + variables_dict[object_name], + variables_dict[target_object_name], + upper_bound=[room_max_x - room_min_x, room_max_y - room_min_y], + type="far", + ) + assert len(soft_constraints) == 1 + # soft_constraints[0] *= 0.001 + hard_constraints_list.extend(hard_constraints) + soft_constraints_list.extend(soft_constraints) + + # use cvxpy to solve for the hard constraints + for object_name, object_dim in objects_list: + + # by default - add soft edge constraints although this might make the solver take a longer time + if not any( + constraint["type"] == "global" + for constraint in constraints[object_name] + ): + hard_constraints, soft_constraints = create_edge_constraints( + variables_dict[object_name], + object_dim, + room_dim=(room_min_x, room_min_y, room_max_x, room_max_y), + hard=False, + ) + soft_constraints[0] *= 100 + hard_constraints_list.extend(hard_constraints) + soft_constraints_list.extend(soft_constraints) + + for constraint in constraints[object_name]: + if constraint["type"] == "global": + if constraint["constraint"] == "edge": # hard constraints + hard_constraints, soft_constraints = ( + create_edge_constraints( + variables_dict[object_name], + object_dim, + room_dim=( + room_min_x, + room_min_y, + room_max_x, + room_max_y, + ), + hard=True, + ) + ) + hard_constraints_list.extend(hard_constraints) + soft_constraints_list.extend(soft_constraints) + + if constraint["type"] == "direction": + assert constraint["constraint"] == "face to" + target_object_name = constraint["target"] + target_object_dim = find_object_dim( + target_object_name, objects_list, placed_objects + ) + if target_object_dim: + hard_constraints_list.extend( + create_directional_constraints( + variables_dict[object_name], + variables_dict[target_object_name], + object_dim, + target_object_dim, + ) + ) + + if constraint["type"] == "alignment": + assert constraint["constraint"] == "center aligned" + target_object_name = constraint["target"] + target_object_dim = find_object_dim( + target_object_name, objects_list, placed_objects + ) + if target_object_dim: + hard_constraints_list.extend( + create_alignment_constraints( + variables_dict[object_name], + variables_dict[target_object_name], + object_dim, + target_object_dim, + ) + ) + + if constraint["type"] == "distance": + target_object_name = constraint["target"] + target_object_dim = find_object_dim( + target_object_name, objects_list, placed_objects + ) + if target_object_dim: + hard_constraints, soft_constraints = ( + create_distance_constraints( + variables_dict[object_name], + variables_dict[target_object_name], + upper_bound=[ + room_max_x - room_min_x, + room_max_y - room_min_y, + ], + type=constraint["constraint"], + ) + ) + hard_constraints_list.extend(hard_constraints) + soft_constraints_list.extend(soft_constraints) + assert len(soft_constraints) == 1 + # higher weighting + soft_constraints[0] *= 0.01 + + if constraint["type"] == "relative": + target_object_name = constraint["target"] + target_object_dim = find_object_dim( + target_object_name, objects_list, placed_objects + ) + if target_object_dim: + hard_constraints_list.extend( + create_relative_constraints( + variables_dict[object_name], + variables_dict[target_object_name], + object_dim, + target_object_dim, + constraint["constraint"], + ) + ) + + result = milp_solve( + soft_constraints_list, hard_constraints_list, verbose=False + ) + if result is None or math.isnan(result) or math.isinf(result): + continue + + found_a_solution = True + print(result, [x[0] for x in objects_list]) + + # we fonud a valid solution + # convert the placements to the same format as the dfs solver + placed_objects_updated = copy.deepcopy(placed_objects) + for object_name, object_dim in objects_list: + # (x, y), rotation, bbox, score + x = variables_dict[object_name][0].value.item() + y = variables_dict[object_name][1].value.item() + rotate_180 = variables_dict[object_name][2].value + rotate_90 = variables_dict[object_name][3].value + if not rotate_180: + rotate_180 = 0 + if not rotate_90: + rotate_90 = 0 + + # bbox has taken into account of the rotation + if rotate_90: + bbox = [ + (x - object_dim[1] / 2, y - object_dim[0] / 2), + (x + object_dim[1] / 2, y - object_dim[0] / 2), + (x + object_dim[1] / 2, y + object_dim[0] / 2), + (x - object_dim[1] / 2, y + object_dim[0] / 2), + ] + else: + bbox = [ + (x - object_dim[0] / 2, y - object_dim[1] / 2), + (x + object_dim[0] / 2, y - object_dim[1] / 2), + (x + object_dim[0] / 2, y + object_dim[1] / 2), + (x - object_dim[0] / 2, y + object_dim[1] / 2), + ] + + placed_objects_updated[object_name] = [ + (x, y), + rotate_180 * 180 + rotate_90 * 90, + bbox, + len(constraints[object_name]), + ] + + # remove all elemnts in objects_list from all_objects_list + self.milp_dfs( + room_poly, + [x for x in all_objects_list if x not in objects_list], + constraints, + placed_objects_updated, + branch_factor=1, + ) + + if not found_a_solution and len(placed_objects) != 0: + self.solutions.append(placed_objects) + + def test_dfs_placement(self): + room_vertices = ((0, 0), (0, 500), (500, 500), (500, 0)) + room_poly = Polygon(room_vertices) + grid_points = self.create_grids(room_poly) + objects = {"door": ((50, 50), 0, ((0, 0), (100, 0), (100, 100), (0, 100)), 1)} + grid_points = self.remove_points(grid_points, objects) + # self.visualize_grid(room_poly, grid_points, objects) + + object_dim = (200, 100) + solutions = self.get_all_solutions(room_poly, grid_points, object_dim) + solutions = self.filter_collision(objects, solutions) + solutions = self.place_edge(room_poly, solutions, object_dim) + + # for i, solution in enumerate(solutions): + # objects[f"sofa-{i}"] = solution + # self.visualize_grid(room_poly, grid_points, objects) + + random.seed(0) + objects["sofa"] = random.choice(solutions) + # self.visualize_grid(room_poly, grid_points, objects) + object_1_dim = (100, 50) + + solutions_1 = self.get_all_solutions(room_poly, grid_points, object_1_dim) + solutions_1 = self.filter_collision(objects, solutions_1) + + # random.seed(42) + # for i, solution in enumerate(random.sample(solutions_1, 25)): + # objects[f"coffee table-{i}"] = solution + + # objects[f"coffee table"] = [(300, 350), 0, ((350.0, 325.0), (350.0, 375.0), (250.0, 375.0), (250.0, 325.0), (350.0, 325.0)), 1.0] + # self.visualize_grid(room_poly, grid_points, objects) + + solutions_1 = self.place_face_to(objects["sofa"], solutions_1) + solutions_1 = self.place_relative("in front of", objects["sofa"], solutions_1) + solutions_1 = self.place_alignment_center( + "center alignment", objects["sofa"], solutions_1 + ) + solutions_1 = self.place_distance("near", objects["sofa"], solutions_1) + objects[f"coffee table"] = solutions_1[-1] + self.visualize_grid(room_poly, grid_points, objects) + + def test_milp_placement(self, simple=False, use_milp=True): + room_vertices = ((0, 0), (0, 600), (800, 600), (800, 0)) + room_poly = Polygon(room_vertices) + grid_points = self.create_grids(room_poly) + + if not simple: + constraints = { + "sofa-0": [{"type": "global", "constraint": "edge"}], + "sofa-1": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "near", "target": "sofa-0"}, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-0", + }, + ], + "tv stand-0": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "far", "target": "sofa-1"}, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-1", + }, + ], + "coffee table-0": [ + {"type": "global", "constraint": "middle"}, + {"type": "distance", "constraint": "near", "target": "sofa-0"}, + { + "type": "relative", + "constraint": "in front of", + "target": "sofa-0", + }, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-0", + }, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-0", + }, + { + "type": "direction", + "constraint": "face to", + "target": "tv stand-0", + }, + ], + "coffee table-1": [ + {"type": "global", "constraint": "middle"}, + {"type": "distance", "constraint": "near", "target": "sofa-1"}, + { + "type": "relative", + "constraint": "in front of", + "target": "sofa-1", + }, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-1", + }, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-1", + }, + { + "type": "direction", + "constraint": "face to", + "target": "tv stand-0", + }, + ], + "side table-0": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "near", "target": "sofa-0"}, + {"type": "relative", "constraint": "side of", "target": "sofa-0"}, + ], + "side table-1": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "near", "target": "sofa-1"}, + {"type": "relative", "constraint": "side of", "target": "sofa-1"}, + ], + "armchair-0": [ + {"type": "global", "constraint": "middle"}, + { + "type": "distance", + "constraint": "near", + "target": "coffee table-0", + }, + { + "type": "direction", + "constraint": "face to", + "target": "coffee table-0", + }, + { + "type": "direction", + "constraint": "face to", + "target": "coffee table-0", + }, + ], + "armchair-1": [ + {"type": "global", "constraint": "middle"}, + { + "type": "distance", + "constraint": "near", + "target": "coffee table-1", + }, + { + "type": "direction", + "constraint": "face to", + "target": "coffee table-1", + }, + { + "type": "direction", + "constraint": "face to", + "target": "coffee table-1", + }, + ], + "bookshelf-0": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "far", "target": "tv stand-0"}, + ], + "bookshelf-1": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "far", "target": "bookshelf-0"}, + { + "type": "alignment", + "constraint": "center aligned", + "target": "bookshelf-0", + }, + ], + } + + initial_state = { + "door-0": ( + (586.7550200520433, 550.0), + 0, + [ + (640.8300346432603, 500.0), + (532.6800054608262, 500.0), + (532.6800054608262, 600.0), + (640.8300346432603, 600.0), + ], + 1, + ) + } + + objects = [ + ("sofa-0", (301.6667297651499, 106.48952360032415)), + ("sofa-1", (301.6667297651499, 106.48952360032415)), + ("tv stand-0", (201.0964714933229, 59.39910836195032)), + ("coffee table-0", (69.15754261308616, 126.69169450358964)), + ("coffee table-1", (69.15754261308616, 126.69169450358964)), + ("side table-0", (61.74632023132328, 61.74453745262855)), + ("side table-1", (61.74632023132328, 61.74453745262855)), + ("armchair-0", (79.0368498902692, 89.4893987892571)), + ("armchair-1", (79.0368498902692, 89.4893987892571)), + ("bookshelf-0", (67.94689517917222, 43.8934937031396)), + ("bookshelf-1", (67.94689517917222, 43.8934937031396)), + ] + solution = self.get_solution( + room_poly, objects, constraints, initial_state, use_milp=use_milp + ) + else: + constraints = { + "dining table": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "far", "target": "door"}, + {"type": "distance", "constraint": "near", "target": "chair"}, + ], + "chair": [ + { + "type": "relative", + "constraint": "side of", + "target": "dining table", + } + ], + } + initial_state = { + "door": ((50, 50), 0, ((0, 0), (100, 0), (100, 100), (0, 100)), 1) + } + objects = [("dining table", (100, 50)), ("chair", (50, 50))] + solution = self.get_solution( + room_poly, objects, constraints, initial_state, use_milp=use_milp + ) + + print("milp solution:", len(solution)) + for object_name, object_properties in solution.items(): + print(object_name, object_properties) + # if object_properties[2] == 90 or object_properties[2] == 270: + self.visualize_grid(room_poly, grid_points, solution) + + +if __name__ == "__main__": + solver = DFS_Solver_Floor(max_duration=30, grid_size=50) + solver.test_dfs_placement() + solver.test_milp_placement(simple=False, use_milp=True) diff --git a/ai2holodeck/generation/holodeck.py b/ai2holodeck/generation/holodeck.py new file mode 100644 index 0000000..0bc0a7d --- /dev/null +++ b/ai2holodeck/generation/holodeck.py @@ -0,0 +1,506 @@ +import datetime +import os +from typing import Optional, Dict, Any, Tuple + +import compress_json +import open_clip +from langchain.llms import OpenAI +from sentence_transformers import SentenceTransformer +from tqdm import tqdm + +from ai2holodeck.constants import ( + HOLODECK_BASE_DATA_DIR, + OBJATHOR_VERSIONED_DIR, + OBJATHOR_ASSETS_DIR, + OBJATHOR_FEATURES_DIR, + OBJATHOR_ANNOTATIONS_PATH, + HOLODECK_THOR_FEATURES_DIR, + HOLODECK_THOR_ANNOTATIONS_PATH, + LLM_MODEL_NAME, +) +from ai2holodeck.generation.ceiling_objects import CeilingObjectGenerator +from ai2holodeck.generation.doors import DoorGenerator +from ai2holodeck.generation.floor_objects import FloorObjectGenerator +from ai2holodeck.generation.layers import map_asset2layer +from ai2holodeck.generation.lights import generate_lights +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.object_selector import ObjectSelector +from ai2holodeck.generation.rooms import FloorPlanGenerator +from ai2holodeck.generation.skybox import getSkybox +from ai2holodeck.generation.small_objects import SmallObjectGenerator +from ai2holodeck.generation.utils import get_top_down_frame, room_video +from ai2holodeck.generation.wall_objects import WallObjectGenerator +from ai2holodeck.generation.walls import WallGenerator +from ai2holodeck.generation.windows import WindowGenerator + + +def confirm_paths_exist(): + for p in [ + OBJATHOR_VERSIONED_DIR, + OBJATHOR_ASSETS_DIR, + OBJATHOR_FEATURES_DIR, + OBJATHOR_ANNOTATIONS_PATH, + HOLODECK_BASE_DATA_DIR, + HOLODECK_THOR_FEATURES_DIR, + HOLODECK_THOR_ANNOTATIONS_PATH, + ]: + if not os.path.exists(p): + raise FileNotFoundError( + f"Path {p} does not exist, this must exist for Holodeck generation to succeed." + f" Please see the Holodeck README file at https://github.com/allenai/Holodeck/blob/main/README.md" + f" for instruction on how to set up the required data directories." + ) + + +class Holodeck: + def __init__( + self, + openai_api_key: str, + openai_org: Optional[str], + objaverse_asset_dir: str, + single_room, + ): + confirm_paths_exist() + + if openai_org is not None: + os.environ["OPENAI_ORG"] = openai_org + + # initialize llm + self.llm = OpenAI( + model_name=LLM_MODEL_NAME, + max_tokens=2048, + openai_api_key=openai_api_key, + ) + + # initialize CLIP + ( + self.clip_model, + _, + self.clip_preprocess, + ) = open_clip.create_model_and_transforms( + "ViT-L-14", pretrained="laion2b_s32b_b82k" + ) + self.clip_tokenizer = open_clip.get_tokenizer("ViT-L-14") + + # initialize sentence transformer + self.sbert_model = SentenceTransformer("all-mpnet-base-v2", device="cpu") + + # objaverse version and asset dir + self.objaverse_asset_dir = objaverse_asset_dir + + # initialize generation + self.retrieval_threshold = 28 + self.object_retriever = ObjathorRetriever( + clip_model=self.clip_model, + clip_preprocess=self.clip_preprocess, + clip_tokenizer=self.clip_tokenizer, + sbert_model=self.sbert_model, + retrieval_threshold=self.retrieval_threshold, + ) + self.floor_generator = FloorPlanGenerator( + self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.llm + ) + self.wall_generator = WallGenerator(self.llm) + self.door_generator = DoorGenerator( + self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.llm + ) + self.window_generator = WindowGenerator(self.llm) + self.object_selector = ObjectSelector( + object_retriever=self.object_retriever, llm=self.llm + ) + self.floor_object_generator = FloorObjectGenerator( + object_retriever=self.object_retriever, llm=self.llm + ) + self.wall_object_generator = WallObjectGenerator( + object_retriever=self.object_retriever, llm=self.llm + ) + self.ceiling_generator = CeilingObjectGenerator( + object_retriever=self.object_retriever, llm=self.llm + ) + self.small_object_generator = SmallObjectGenerator( + object_retriever=self.object_retriever, llm=self.llm + ) + + # additional requirements + single_room_requirements = "I only need one room" + + if single_room: + self.additional_requirements_room = single_room_requirements + else: + self.additional_requirements_room = "N/A" + + self.additional_requirements_door = "N/A" + self.additional_requirements_window = ( + "Only one wall of each room should have windows" + ) + self.additional_requirements_object = "N/A" + self.additional_requirements_ceiling = "N/A" + + def get_empty_scene(self): + return compress_json.load("generation/empty_house.json") + + def empty_house(self, scene): + scene["rooms"] = [] + scene["walls"] = [] + scene["doors"] = [] + scene["windows"] = [] + scene["objects"] = [] + scene["proceduralParameters"]["lights"] = [] + return scene + + def generate_rooms(self, scene, additional_requirements_room, used_assets=[]): + self.floor_generator.used_assets = used_assets + rooms = self.floor_generator.generate_rooms(scene, additional_requirements_room) + scene["rooms"] = rooms + return scene + + def generate_walls(self, scene): + wall_height, walls = self.wall_generator.generate_walls(scene) + scene["wall_height"] = wall_height + scene["walls"] = walls + return scene + + def generate_doors(self, scene, additional_requirements_door="N/A", used_assets=[]): + self.door_generator.used_assets = used_assets + + # generate doors + ( + raw_doorway_plan, + doors, + room_pairs, + open_room_pairs, + ) = self.door_generator.generate_doors(scene, additional_requirements_door) + scene["raw_doorway_plan"] = raw_doorway_plan + scene["doors"] = doors + scene["room_pairs"] = room_pairs + scene["open_room_pairs"] = open_room_pairs + + # update walls + updated_walls, open_walls = self.wall_generator.update_walls( + scene["walls"], open_room_pairs + ) + scene["walls"] = updated_walls + scene["open_walls"] = open_walls + return scene + + def generate_windows( + self, + scene, + additional_requirements_window="I want to install windows to only one wall of each room", + used_assets=[], + ): + self.window_generator.used_assets = used_assets + raw_window_plan, walls, windows = self.window_generator.generate_windows( + scene, additional_requirements_window + ) + scene["raw_window_plan"] = raw_window_plan + scene["windows"] = windows + scene["walls"] = walls + return scene + + def select_objects(self, scene, additional_requirements_object, used_assets=[]): + self.object_selector.used_assets = used_assets + object_selection_plan, selected_objects = self.object_selector.select_objects( + scene, additional_requirements_object + ) + scene["object_selection_plan"] = object_selection_plan + scene["selected_objects"] = selected_objects + return scene + + def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A"): + ( + raw_ceiling_plan, + ceiling_objects, + ) = self.ceiling_generator.generate_ceiling_objects( + scene, additional_requirements_ceiling + ) + scene["ceiling_objects"] = ceiling_objects + scene["raw_ceiling_plan"] = raw_ceiling_plan + return scene + + def generate_small_objects(self, scene, used_assets=[]): + self.small_object_generator.used_assets = used_assets + controller = self.small_object_generator.start_controller( + scene, self.objaverse_asset_dir + ) + event = controller.reset() + receptacle_ids = [ + obj["objectId"] + for obj in event.metadata["objects"] + if obj["receptacle"] and "___" not in obj["objectId"] + ] + if "Floor" in receptacle_ids: + receptacle_ids.remove("Floor") + + try: + ( + small_objects, + receptacle2small_objects, + ) = self.small_object_generator.generate_small_objects( + scene, controller, receptacle_ids + ) + scene["small_objects"] = small_objects + scene["receptacle2small_objects"] = receptacle2small_objects + except: + scene["small_objects"] = [] + print("Failed to generate small objects") + + controller.stop() # stop controller to avoid memory leak + return scene + + def change_ceiling_material(self, scene): + first_wall_material = scene["rooms"][0]["wallMaterial"] + scene["proceduralParameters"]["ceilingMaterial"] = first_wall_material + return scene + + def generate_scene( + self, + scene, + query: str, + save_dir: str, + used_assets=[], + add_ceiling=False, + generate_image=True, + generate_video=False, + add_time=True, + use_constraint=True, + random_selection=False, + use_milp=False, + ) -> Tuple[Dict[str, Any], str]: + # initialize scene + query = query.replace("_", " ") + scene["query"] = query + + # empty house + scene = self.empty_house(scene) + + # generate rooms + scene = self.generate_rooms( + scene, + additional_requirements_room=self.additional_requirements_room, + used_assets=used_assets, + ) + + # generate walls + scene = self.generate_walls(scene) + + # generate doors + scene = self.generate_doors( + scene, + additional_requirements_door=self.additional_requirements_door, + used_assets=used_assets, + ) + + # generate windows + scene = self.generate_windows( + scene, + additional_requirements_window=self.additional_requirements_window, + used_assets=used_assets, + ) + + # select objects + self.object_selector.random_selection = random_selection + scene = self.select_objects( + scene, + additional_requirements_object=self.additional_requirements_object, + used_assets=used_assets, + ) + + # generate floor objects + self.floor_object_generator.use_milp = use_milp + scene["floor_objects"] = self.floor_object_generator.generate_objects( + scene, use_constraint=use_constraint + ) + + # generate wall objects + scene["wall_objects"] = self.wall_object_generator.generate_wall_objects( + scene, use_constraint=use_constraint + ) + + # combine floor and wall objects + scene["objects"] = scene["floor_objects"] + scene["wall_objects"] + + # generate small objects + scene = self.generate_small_objects(scene, used_assets=used_assets) + scene["objects"] += scene["small_objects"] + + # generate ceiling objects + if add_ceiling: + scene = self.generate_ceiling_objects( + scene, + additional_requirements_ceiling=self.additional_requirements_ceiling, + ) + scene["objects"] += scene["ceiling_objects"] + + # generate lights + lights = generate_lights(scene) + scene["proceduralParameters"]["lights"] = lights + + # assign layers + scene = map_asset2layer(scene) + + # assign skybox + scene = getSkybox(scene) + + # change ceiling material + scene = self.change_ceiling_material(scene) + + # create folder + query_name = query.replace(" ", "_").replace("'", "")[:30] + create_time = ( + str(datetime.datetime.now()) + .replace(" ", "-") + .replace(":", "-") + .replace(".", "-") + ) + + if add_time: + folder_name = f"{query_name}-{create_time}" # query name + time + else: + folder_name = query_name # query name only + + save_dir = os.path.abspath(os.path.join(save_dir, folder_name)) + os.makedirs(save_dir, exist_ok=True) + compress_json.dump( + scene, + os.path.join(save_dir, f"{query_name}.json"), + json_kwargs=dict(indent=4), + ) + + # save top down image + if generate_image: + top_image = get_top_down_frame(scene, self.objaverse_asset_dir, 1024, 1024) + top_image.show() + top_image.save(os.path.join(save_dir, f"{query_name}.png")) + + # save video + if generate_video: + scene["objects"] = ( + scene["floor_objects"] + scene["wall_objects"] + scene["small_objects"] + ) + final_video = room_video(scene, self.objaverse_asset_dir, 1024, 1024) + final_video.write_videofile( + os.path.join(save_dir, f"{query_name}.mp4"), fps=30 + ) + + return scene, save_dir + + def generate_variants( + self, + query, + original_scene, + save_dir=os.path.join(HOLODECK_BASE_DATA_DIR, "scenes"), + number_of_variants=5, + used_assets=[], + ): + self.object_selector.reuse_selection = ( + False # force the selector to retrieve different assets + ) + + # create the list of used assets + used_assets += [ + obj["assetId"] + for obj in original_scene["objects"] + + original_scene["windows"] + + original_scene["doors"] + ] + used_assets += [ + room["floorMaterial"]["name"] for room in original_scene["rooms"] + ] + used_assets += [wall["material"]["name"] for wall in original_scene["walls"]] + used_assets = list(set(used_assets)) + + variant_scenes = [] + for i in tqdm(range(number_of_variants)): + variant_scene, _ = self.generate_scene( + original_scene.copy(), + query, + save_dir, + used_assets, + generate_image=True, + generate_video=False, + add_time=True, + ) + variant_scenes.append(variant_scene) + used_assets += [ + obj["assetId"] + for obj in variant_scene["objects"] + + variant_scene["windows"] + + variant_scene["doors"] + ] + used_assets += [ + room["floorMaterial"]["name"] for room in variant_scene["rooms"] + ] + used_assets += [wall["material"]["name"] for wall in variant_scene["walls"]] + used_assets = list(set(used_assets)) + return variant_scenes + + def ablate_placement( + self, + scene, + query, + save_dir, + used_assets=[], + add_ceiling=False, + generate_image=True, + generate_video=False, + add_time=True, + use_constraint=False, + constraint_type="llm", + ): + # place floor objects + if use_constraint: + self.floor_object_generator.constraint_type = ( + constraint_type # ablate the constraint types + ) + scene["floor_objects"] = self.floor_object_generator.generate_objects( + scene, use_constraint=use_constraint + ) + if len(scene["floor_objects"]) == 0: + print("No object is placed, skip this scene") + return None # if no object is placed, return None + # place wall objects + if use_constraint: + self.wall_object_generator.constraint_type = constraint_type + scene["wall_objects"] = self.wall_object_generator.generate_wall_objects( + scene, use_constraint=use_constraint + ) + + # combine floor and wall objects + scene["objects"] = scene["floor_objects"] + scene["wall_objects"] + + # generate small objects + scene = self.generate_small_objects(scene, used_assets=used_assets) + scene["objects"] += scene["small_objects"] + + # assign layers + scene = map_asset2layer(scene) + + # take the first 30 characters of the query as the folder name + query_name = query.replace(" ", "_").replace("'", "")[:30] + create_time = ( + str(datetime.datetime.now()) + .replace(" ", "-") + .replace(":", "-") + .replace(".", "-") + ) + + if add_time: + folder_name = f"{query_name}-{create_time}" # query name + time + else: + folder_name = query_name # query name only + + os.makedirs(f"{save_dir}/{folder_name}", exist_ok=True) + compress_json.dump( + scene, + f"{save_dir}/{folder_name}/{query_name}.json", + json_kwargs=dict(indent=4), + ) + + # save top down image + if generate_image: + top_image = get_top_down_frame(scene, self.objaverse_asset_dir, 1024, 1024) + top_image.show() + top_image.save(f"{save_dir}/{folder_name}/{query_name}.png") + + return scene diff --git a/modules/layers.py b/ai2holodeck/generation/layers.py similarity index 60% rename from modules/layers.py rename to ai2holodeck/generation/layers.py index 04b54bd..6327cb2 100644 --- a/modules/layers.py +++ b/ai2holodeck/generation/layers.py @@ -33,8 +33,8 @@ def color_rooms(room): if room == len(adjacency_list.keys()): return True for color in range(4): # Use color range 0-3 - if assign_color(list(adjacency_list.keys())[room], f'Procedural{color}'): - colors[list(adjacency_list.keys())[room]] = f'Procedural{color}' + if assign_color(list(adjacency_list.keys())[room], f"Procedural{color}"): + colors[list(adjacency_list.keys())[room]] = f"Procedural{color}" if color_rooms(room + 1): return True colors[list(adjacency_list.keys())[room]] = -1 @@ -48,47 +48,55 @@ def color_rooms(room): def map_asset2layer(scene): - room2layer = get_room2layer(scene['room_pairs'], scene['open_room_pairs']) - all_layers = ['Procedural0', 'Procedural1', 'Procedural2', 'Procedural3'] - + room2layer = get_room2layer(scene["room_pairs"], scene["open_room_pairs"]) + all_layers = ["Procedural0", "Procedural1", "Procedural2", "Procedural3"] + if len(scene["rooms"]) == 1: print("Only one room in the scene. Assigning the room to Procedural0.") room2layer = {scene["rooms"][0]["id"]: "Procedural0"} # Check if all rooms are assigned a layer - for room in scene['rooms']: - if room['id'] not in room2layer: - room2layer[room['id']] = 'Procedural0' + for room in scene["rooms"]: + if room["id"] not in room2layer: + room2layer[room["id"]] = "Procedural0" # Assign layer to each room - for room in scene['rooms']: - room['layer'] = room2layer[room['id']] + for room in scene["rooms"]: + room["layer"] = room2layer[room["id"]] # Assign layer to each wall - for wall in scene['walls']: - wall['layer'] = room2layer[wall['roomId']] - + for wall in scene["walls"]: + wall["layer"] = room2layer[wall["roomId"]] + # Assign layer to each object # TODO: consider small children objects - for obj in scene['objects']: - obj['layer'] = room2layer[obj['roomId']] - + for obj in scene["objects"]: + obj["layer"] = room2layer[obj["roomId"]] + # Assign layer to each window - for window in scene['windows']: - window['layer'] = room2layer[window['roomId']] + for window in scene["windows"]: + window["layer"] = room2layer[window["roomId"]] # Assign layer to each light - for light in scene['proceduralParameters']['lights']: - try: light['layer'] = room2layer[light['roomId']] - except: continue - - light['cullingMaskOff'] = [layer for layer in all_layers if layer != light['layer']] + for light in scene["proceduralParameters"]["lights"]: + try: + light["layer"] = room2layer[light["roomId"]] + except: + continue + + light["cullingMaskOff"] = [ + layer for layer in all_layers if layer != light["layer"] + ] return scene - + if __name__ == "__main__": - room_pairs = [('Living Room', 'Bedroom'), ('Living Room', 'Kitchen'), - ('Kitchen', 'Bathroom'), ('Bedroom', 'Bathroom')] - open_room_pairs = [('Living Room', 'Kitchen'), ('Living Room', 'Bedroom')] - room2layer = get_room2layer(room_pairs, open_room_pairs) \ No newline at end of file + room_pairs = [ + ("Living Room", "Bedroom"), + ("Living Room", "Kitchen"), + ("Kitchen", "Bathroom"), + ("Bedroom", "Bathroom"), + ] + open_room_pairs = [("Living Room", "Kitchen"), ("Living Room", "Bedroom")] + room2layer = get_room2layer(room_pairs, open_room_pairs) diff --git a/modules/lights.py b/ai2holodeck/generation/lights.py similarity index 90% rename from modules/lights.py rename to ai2holodeck/generation/lights.py index 2cad73c..39779ef 100644 --- a/modules/lights.py +++ b/ai2holodeck/generation/lights.py @@ -1,5 +1,6 @@ -from shapely import Polygon from procthor.utils.types import RGB, Light, LightShadow, Vector3 +from shapely import Polygon + def generate_lights(scene): lights = [ @@ -31,10 +32,10 @@ def generate_lights(scene): light_height = scene["wall_height"] - 0.2 try: for object in scene["ceiling_objects"]: - if object["roomId"] == room_id: light_height = object["position"]["y"] - 0.2 + if object["roomId"] == room_id: + light_height = object["position"]["y"] - 0.2 except: light_height = scene["wall_height"] - 0.2 - lights.append( Light( @@ -52,8 +53,8 @@ def generate_lights(scene): nearPlane=0.2, resolution="FromQualitySettings", ), - roomId=room_id + roomId=room_id, ) ) - return lights \ No newline at end of file + return lights diff --git a/modules/milp_utils.py b/ai2holodeck/generation/milp_utils.py similarity index 70% rename from modules/milp_utils.py rename to ai2holodeck/generation/milp_utils.py index c975e50..3ac03f3 100644 --- a/modules/milp_utils.py +++ b/ai2holodeck/generation/milp_utils.py @@ -12,8 +12,12 @@ def create_boundary_constraints(c, object_dim, bbox): cx, cy, rotate_90 = c[0], c[1], c[3] # Half-sizes considering rotation - half_width = cp.multiply(0.5 * x_size, 1 - rotate_90) + cp.multiply(0.5 * y_size, rotate_90) - half_height = cp.multiply(0.5 * y_size, 1 - rotate_90) + cp.multiply(0.5 * x_size, rotate_90) + half_width = cp.multiply(0.5 * x_size, 1 - rotate_90) + cp.multiply( + 0.5 * y_size, rotate_90 + ) + half_height = cp.multiply(0.5 * y_size, 1 - rotate_90) + cp.multiply( + 0.5 * x_size, rotate_90 + ) # Constraints constraints = [ @@ -41,11 +45,19 @@ def create_directional_constraints(c1, c2, object_dim_1, object_dim_2): rotate_90_2 = c2[3] # Half-sizes considering rotation - half_width1 = cp.multiply(0.5 * x_size1, 1 - rotate_90_1) + cp.multiply(0.5 * y_size1, rotate_90_1) - half_height1 = cp.multiply(0.5 * y_size1, 1 - rotate_90_1) + cp.multiply(0.5 * x_size1, rotate_90_1) - - half_width2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply(0.5 * y_size2, rotate_90_2) - half_height2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply(0.5 * x_size2, rotate_90_2) + half_width1 = cp.multiply(0.5 * x_size1, 1 - rotate_90_1) + cp.multiply( + 0.5 * y_size1, rotate_90_1 + ) + half_height1 = cp.multiply(0.5 * y_size1, 1 - rotate_90_1) + cp.multiply( + 0.5 * x_size1, rotate_90_1 + ) + + half_width2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * y_size2, rotate_90_2 + ) + half_height2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * x_size2, rotate_90_2 + ) # Binary variables to determine the relative positions left_of = cp.Variable(boolean=True) @@ -66,7 +78,6 @@ def create_directional_constraints(c1, c2, object_dim_1, object_dim_2): cy1 - cy2 >= EPSILON + half_height1 + half_height2 - M * (1 - above), # Ensure that at least one of the binary variables must be True left_of + right_of + above + below >= 1, - # make sure the object does not face the wall # when "left of" is true, the object should not face left rotate_90_1 + rotate_180_1 <= 1 + M * (1 - left_of), @@ -95,11 +106,19 @@ def create_nooverlap_constraints(c1, c2, object_dim_1, object_dim_2): rotate_90_2 = c2[3] # Half-sizes considering rotation - half_width1 = cp.multiply(0.5 * x_size1, 1 - rotate_90_1) + cp.multiply(0.5 * y_size1, rotate_90_1) - half_height1 = cp.multiply(0.5 * y_size1, 1 - rotate_90_1) + cp.multiply(0.5 * x_size1, rotate_90_1) - - half_width2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply(0.5 * y_size2, rotate_90_2) - half_height2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply(0.5 * x_size2, rotate_90_2) + half_width1 = cp.multiply(0.5 * x_size1, 1 - rotate_90_1) + cp.multiply( + 0.5 * y_size1, rotate_90_1 + ) + half_height1 = cp.multiply(0.5 * y_size1, 1 - rotate_90_1) + cp.multiply( + 0.5 * x_size1, rotate_90_1 + ) + + half_width2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * y_size2, rotate_90_2 + ) + half_height2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * x_size2, rotate_90_2 + ) # Binary variables to determine the relative positions left_of = cp.Variable(boolean=True) @@ -110,7 +129,6 @@ def create_nooverlap_constraints(c1, c2, object_dim_1, object_dim_2): # Constraints constraints = [ # Constraints for binary variable activation - # object 1 is left of object 2 cx2 - cx1 >= EPSILON + half_width1 + half_width2 - M * (1 - left_of), # object 1 is right of object 2 @@ -119,7 +137,6 @@ def create_nooverlap_constraints(c1, c2, object_dim_1, object_dim_2): cy2 - cy1 >= EPSILON + half_height1 + half_height2 - M * (1 - below), # object 1 is above object 2 cy1 - cy2 >= EPSILON + half_height1 + half_height2 - M * (1 - above), - # Ensure that at least one of the binary variables must be True left_of + right_of + above + below >= 1, ] @@ -152,7 +169,6 @@ def create_alignment_constraints(c1, c2, object_dim_1, object_dim_2): cx1 - cx2 <= M * (1 - x_aligned), cy2 - cy1 <= M * (1 - y_aligned), cy1 - cy2 <= M * (1 - y_aligned), - # Ensure that at least one of the binary variables must be True x_aligned + y_aligned >= 1, ] @@ -168,8 +184,12 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg room_min_x, room_min_y, room_max_x, room_max_y = room_dim # Half-sizes considering rotation - half_width1 = cp.multiply(0.5 * x_size, 1 - rotate_90) + cp.multiply(0.5 * y_size, rotate_90) - half_height1 = cp.multiply(0.5 * y_size, 1 - rotate_90) + cp.multiply(0.5 * x_size, rotate_90) + half_width1 = cp.multiply(0.5 * x_size, 1 - rotate_90) + cp.multiply( + 0.5 * y_size, rotate_90 + ) + half_height1 = cp.multiply(0.5 * y_size, 1 - rotate_90) + cp.multiply( + 0.5 * x_size, rotate_90 + ) a = room_min_x + half_width1 b = room_max_x - half_width1 @@ -183,7 +203,6 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg by_c = cp.Variable(boolean=True) by_d = cp.Variable(boolean=True) - # Constraints that link the binary variables with the conditions # Constraints if hard: @@ -192,7 +211,6 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg # half_width1 - half_height1 >= -M * (1 - x_size_longer), # If x_size_longer is false (0), then half_width1 must not be greater than half_height1 # half_width1 - half_height1 <= M * x_size_longer, - # Constraints for binary variable activation x - a <= M * (1 - bx_a), a - x <= M * (1 - bx_a), @@ -202,7 +220,6 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg c - y <= M * (1 - by_c), y - d <= M * (1 - by_d), d - y <= M * (1 - by_d), - # make sure the object does not face the wall # 0: facing up, 90: facing right ... # when bx_a is true, the left edge is aligned with the left wall @@ -215,7 +232,6 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg (1 - rotate_90) + rotate_180 <= 1 + M * (1 - by_c), # when by_d is true, the top edge is aligned with the top wall (1 - rotate_90) + (1 - rotate_180) <= 1 + M * (1 - by_d), - # Only one of the binary variables needs to be true (logical OR) bx_a + bx_b + by_c + by_d >= 1, ] @@ -228,12 +244,10 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg half_width1 - half_height1 >= -M * (1 - x_size_longer), # If x_size_longer is false (0), then half_width1 must not be greater than half_height1 half_width1 - half_height1 <= M * x_size_longer, - by_c + by_d >= 1 - M * (1 - x_size_longer), bx_a + bx_b >= 1 - M * x_size_longer, ] - return hard_constraints, [bx_a + bx_b + by_c + by_d] else: hard_constraints = [ @@ -246,7 +260,6 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg c - y <= M * (1 - by_c), y - d <= M * (1 - by_d), d - y <= M * (1 - by_d), - # when bx_a is true, the left edge is aligned with the left wall rotate_90 + rotate_180 <= 1 + M * (1 - bx_a), # when bx_b is true, the right edge is aligned with the right wall @@ -261,7 +274,7 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg return hard_constraints, [bx_a + bx_b + by_c + by_d] -def create_abs_constraints(X, Y, a, constraint_type='geq'): +def create_abs_constraints(X, Y, a, constraint_type="geq"): """ Create a constraint for |X - Y| <= a or |X - Y| >= a. @@ -272,31 +285,31 @@ def create_abs_constraints(X, Y, a, constraint_type='geq'): :return: A list of one or two cvxpy constraints. """ constraints = [] - - if constraint_type == 'leq': + + if constraint_type == "leq": # For |X - Y| <= a, we need two inequalities: constraints.append(X - Y <= a) constraints.append(Y - X <= a) - - elif constraint_type == 'geq': + + elif constraint_type == "geq": # For |X - Y| >= a, we introduce an auxiliary boolean variable to handle the OR condition z = cp.Variable(boolean=True) # Now we create two constraints that together represent the OR condition # If z is True (1), then the first constraint (X - Y >= a) must be satisfied. # If z is False (0), then the second constraint (Y - X >= a) must be satisfied. - constraints.append((X - Y) - M*z >= a - M) - constraints.append((Y - X) - M*(1 - z) >= a - M) - + constraints.append((X - Y) - M * z >= a - M) + constraints.append((Y - X) - M * (1 - z) >= a - M) + else: raise ValueError("Invalid constraint_type. Use 'leq' or 'geq'.") - + return constraints -def create_distance_constraints(c1, c2, upper_bound, type='near'): +def create_distance_constraints(c1, c2, upper_bound, type="near"): X1, Y1 = c1[0], c1[1] X2, Y2 = c2[0], c2[1] - if type == 'near': + if type == "near": # Auxiliary variables for the absolute differences abs_diff_x = cp.Variable() abs_diff_y = cp.Variable() @@ -310,14 +323,14 @@ def create_distance_constraints(c1, c2, upper_bound, type='near'): abs_diff_x >= 0, abs_diff_y >= 0, abs_diff_x <= upper_bound[0], - abs_diff_y <= upper_bound[1] + abs_diff_y <= upper_bound[1], ] - + # L1 distance is the sum of the absolute differences l1_distance = abs_diff_x + abs_diff_y soft_constraints = [-l1_distance] - elif type == 'far': + elif type == "far": x_lower_bound = cp.Variable() y_lower_bound = cp.Variable() # Maximize L1 distance @@ -336,7 +349,7 @@ def create_distance_constraints(c1, c2, upper_bound, type='near'): soft_constraints = [x_lower_bound + y_lower_bound] else: raise ValueError("type must be 'near' or 'far'") - + # Return the objective and the constraints together return hard_constraints, soft_constraints @@ -345,11 +358,7 @@ def create_if_and_constraints(A, B): # A and B are binary conditions # A and B are true if and only if z is true z = cp.Variable(boolean=True) # New binary variable for the AND condition - constraints = [ - z <= A, - z <= B, - z >= A + B - 1 - ] + constraints = [z <= A, z <= B, z >= A + B - 1] return constraints, z @@ -373,187 +382,229 @@ def create_relative_constraints(c1, c2, object_dim_1, object_dim_2, constraint_t # half_xwidth1 = cp.multiply(0.5 * x_size1, 1 - rotate_90_1) + cp.multiply(0.5 * y_size1, rotate_90_1) # half_yheight1 = cp.multiply(0.5 * y_size1, 1 - rotate_90_1) + cp.multiply(0.5 * x_size1, rotate_90_1) - half_xwidth2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply(0.5 * y_size2, rotate_90_2) - half_yheight2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply(0.5 * x_size2, rotate_90_2) + half_xwidth2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * y_size2, rotate_90_2 + ) + half_yheight2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * x_size2, rotate_90_2 + ) hard_constraints = [] soft_constraints = [] - if constraint_type == 'left of': + if constraint_type == "left of": # if rotate_90_2 == 0 and rotate_180_2 == 0, face up - constraints, z = create_if_and_constraints(1-rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) # constraints activated by z being true - hard_constraints.extend([ - cx1 <= cx2 + M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 <= cx2 + M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) ## if rotate_90_2 == 1 and rotate_180_2 == 0, face right - constraints, z = create_if_and_constraints(rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 >= cy2 - M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 >= cy2 - M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) # if rotate_90_2 == 0 and rotate_180_2 == 1, face down - constraints, z = create_if_and_constraints(1-rotate_90_2, rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 >= cx2 - M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 >= cx2 - M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) # if rotate_90_2 == 1 and rotate_180_2 == 1, face left constraints, z = create_if_and_constraints(rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 <= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 <= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) - if constraint_type == 'right of': + if constraint_type == "right of": # if rotate_90_2 == 0 and rotate_180_2 == 0, face up - constraints, z = create_if_and_constraints(1-rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) # constraints activated by z being true - hard_constraints.extend([ - cx1 >= cx2 - M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 >= cx2 - M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) ## if rotate_90_2 == 1 and rotate_180_2 == 0, face right - constraints, z = create_if_and_constraints(rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 <= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 <= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) # if rotate_90_2 == 0 and rotate_180_2 == 1, face down - constraints, z = create_if_and_constraints(1-rotate_90_2, rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 <= cx2 + M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 <= cx2 + M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) # if rotate_90_2 == 1 and rotate_180_2 == 1, face left constraints, z = create_if_and_constraints(rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 >= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 >= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) - if constraint_type == 'side of': + if constraint_type == "side of": # if rotate_90_2 == 0 and rotate_180_2 == 0, face up - constraints, z = create_if_and_constraints(1-rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) # constraints activated by z being true - hard_constraints.extend([ - # cx1 >= cx2 - M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + # cx1 >= cx2 - M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) ## if rotate_90_2 == 1 and rotate_180_2 == 0, face right - constraints, z = create_if_and_constraints(rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - # cy1 <= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + # cy1 <= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) # if rotate_90_2 == 0 and rotate_180_2 == 1, face down - constraints, z = create_if_and_constraints(1-rotate_90_2, rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - # cx1 <= cx2 + M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + # cx1 <= cx2 + M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) # if rotate_90_2 == 1 and rotate_180_2 == 1, face left constraints, z = create_if_and_constraints(rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - # cy1 >= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + # cy1 >= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) - if constraint_type == 'in front of': + if constraint_type == "in front of": # if rotate_90_2 == 0 and rotate_180_2 == 0, face up - constraints, z = create_if_and_constraints(1-rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) # constraints activated by z being true - hard_constraints.extend([ - cy1 >= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 >= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) ## if rotate_90_2 == 1 and rotate_180_2 == 0, face right - constraints, z = create_if_and_constraints(rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 >= cx2 - M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 >= cx2 - M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) # if rotate_90_2 == 0 and rotate_180_2 == 1, face down - constraints, z = create_if_and_constraints(1-rotate_90_2, rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 <= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - - ]) + hard_constraints.extend( + [ + cy1 <= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) # if rotate_90_2 == 1 and rotate_180_2 == 1, face left constraints, z = create_if_and_constraints(rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 <= cx2 + M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 <= cx2 + M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) - if constraint_type == 'behind': + if constraint_type == "behind": # if rotate_90_2 == 0 and rotate_180_2 == 0, face up - constraints, z = create_if_and_constraints(1-rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) # constraints activated by z being true - hard_constraints.extend([ - cy1 <= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 <= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) ## if rotate_90_2 == 1 and rotate_180_2 == 0, face right - constraints, z = create_if_and_constraints(rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 <= cx2 + M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 <= cx2 + M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) # if rotate_90_2 == 0 and rotate_180_2 == 1, face down - constraints, z = create_if_and_constraints(1-rotate_90_2, rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 >= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - - ]) + hard_constraints.extend( + [ + cy1 >= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) # if rotate_90_2 == 1 and rotate_180_2 == 1, face left constraints, z = create_if_and_constraints(rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 >= cx2 - M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 >= cx2 - M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) - return hard_constraints \ No newline at end of file + return hard_constraints diff --git a/ai2holodeck/generation/objaverse_retriever.py b/ai2holodeck/generation/objaverse_retriever.py new file mode 100644 index 0000000..3637afe --- /dev/null +++ b/ai2holodeck/generation/objaverse_retriever.py @@ -0,0 +1,146 @@ +import os + +import compress_json +import compress_pickle +import numpy as np +import torch +import torch.nn.functional as F + +from ai2holodeck.constants import ( + OBJATHOR_ANNOTATIONS_PATH, + HOLODECK_THOR_ANNOTATIONS_PATH, + OBJATHOR_FEATURES_DIR, + HOLODECK_THOR_FEATURES_DIR, +) +from ai2holodeck.generation.utils import get_bbox_dims + + +class ObjathorRetriever: + def __init__( + self, + clip_model, + clip_preprocess, + clip_tokenizer, + sbert_model, + retrieval_threshold, + ): + objathor_annotations = compress_json.load(OBJATHOR_ANNOTATIONS_PATH) + thor_annotations = compress_json.load(HOLODECK_THOR_ANNOTATIONS_PATH) + self.database = {**objathor_annotations, **thor_annotations} + + objathor_clip_features_dict = compress_pickle.load( + os.path.join(OBJATHOR_FEATURES_DIR, f"clip_features.pkl") + ) # clip features + objathor_sbert_features_dict = compress_pickle.load( + os.path.join(OBJATHOR_FEATURES_DIR, f"sbert_features.pkl") + ) # sbert features + assert ( + objathor_clip_features_dict["uids"] == objathor_sbert_features_dict["uids"] + ) + + objathor_uids = objathor_clip_features_dict["uids"] + objathor_clip_features = objathor_clip_features_dict["img_features"].astype( + np.float32 + ) + objathor_sbert_features = objathor_sbert_features_dict["text_features"].astype( + np.float32 + ) + + thor_clip_features_dict = compress_pickle.load( + os.path.join(HOLODECK_THOR_FEATURES_DIR, "clip_features.pkl") + ) # clip features + thor_sbert_features_dict = compress_pickle.load( + os.path.join(HOLODECK_THOR_FEATURES_DIR, "sbert_features.pkl") + ) # clip features + assert thor_clip_features_dict["uids"] == thor_sbert_features_dict["uids"] + + thor_uids = thor_clip_features_dict["uids"] + thor_clip_features = thor_clip_features_dict["img_features"].astype(np.float32) + thor_sbert_features = thor_sbert_features_dict["text_features"].astype( + np.float32 + ) + + self.clip_features = torch.from_numpy( + np.concatenate([objathor_clip_features, thor_clip_features], axis=0) + ) + self.clip_features = F.normalize(self.clip_features, p=2, dim=-1) + + self.sbert_features = torch.from_numpy( + np.concatenate([objathor_sbert_features, thor_sbert_features], axis=0) + ) + + self.asset_ids = objathor_uids + thor_uids + + self.clip_model = clip_model + self.clip_preprocess = clip_preprocess + self.clip_tokenizer = clip_tokenizer + self.sbert_model = sbert_model + + self.retrieval_threshold = retrieval_threshold + + self.use_text = True + + def retrieve(self, queries, threshold=28): + with torch.no_grad(): + query_feature_clip = self.clip_model.encode_text( + self.clip_tokenizer(queries) + ) + + query_feature_clip = F.normalize(query_feature_clip, p=2, dim=-1) + + clip_similarities = 100 * torch.einsum( + "ij, lkj -> ilk", query_feature_clip, self.clip_features + ) + clip_similarities = torch.max(clip_similarities, dim=-1).values + + query_feature_sbert = self.sbert_model.encode( + queries, convert_to_tensor=True, show_progress_bar=False + ) + sbert_similarities = query_feature_sbert @ self.sbert_features.T + + if self.use_text: + similarities = clip_similarities + sbert_similarities + else: + similarities = clip_similarities + + threshold_indices = torch.where(clip_similarities > threshold) + + unsorted_results = [] + for query_index, asset_index in zip(*threshold_indices): + score = similarities[query_index, asset_index].item() + unsorted_results.append((self.asset_ids[asset_index], score)) + + # Sorting the results in descending order by score + results = sorted(unsorted_results, key=lambda x: x[1], reverse=True) + + return results + + def compute_size_difference(self, target_size, candidates): + candidate_sizes = [] + for uid, _ in candidates: + size = get_bbox_dims(self.database[uid]) + size_list = [size["x"] * 100, size["y"] * 100, size["z"] * 100] + size_list.sort() + candidate_sizes.append(size_list) + + candidate_sizes = torch.tensor(candidate_sizes) + + target_size_list = list(target_size) + target_size_list.sort() + target_size = torch.tensor(target_size_list) + + size_difference = abs(candidate_sizes - target_size).mean(axis=1) / 100 + size_difference = size_difference.tolist() + + candidates_with_size_difference = [] + for i, (uid, score) in enumerate(candidates): + candidates_with_size_difference.append( + (uid, score - size_difference[i] * 10) + ) + + # sort the candidates by score + candidates_with_size_difference = sorted( + candidates_with_size_difference, key=lambda x: x[1], reverse=True + ) + + return candidates_with_size_difference diff --git a/ai2holodeck/generation/object_selector.py b/ai2holodeck/generation/object_selector.py new file mode 100644 index 0000000..fd038ed --- /dev/null +++ b/ai2holodeck/generation/object_selector.py @@ -0,0 +1,983 @@ +import ast +import copy +import json +import multiprocessing +import random +import re +import traceback +from typing import Dict, List + +import torch +import torch.nn.functional as F +from colorama import Fore +from langchain import PromptTemplate, OpenAI +from shapely import Polygon + +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.generation.floor_objects import DFS_Solver_Floor +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.utils import get_bbox_dims, get_annotations +from ai2holodeck.generation.wall_objects import DFS_Solver_Wall + +EXPECTED_OBJECT_ATTRIBUTES = [ + "description", + "location", + "size", + "quantity", + "variance_type", + "objects_on_top", +] + + +class ObjectSelector: + def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): + # object retriever + self.object_retriever = object_retriever + self.database = object_retriever.database + + # language model and prompt templates + self.llm = llm + self.object_selection_template_1 = prompts.object_selection_prompt_new_1 + self.object_selection_template_2 = PromptTemplate( + input_variables=[ + "object_selection_prompt_new_1", + "object_selection_1", + "room", + ], + template=prompts.object_selection_prompt_new_2, + ) + + # hyperparameters + self.floor_capacity_ratio = 0.4 + self.wall_capacity_ratio = 0.5 + self.object_size_tolerance = 0.8 + self.similarity_threshold_floor = 31 # need to be tuned + self.similarity_threshold_wall = 31 # need to be tuned + self.thin_threshold = 3 + self.used_assets = [] + self.consider_size = True + self.size_buffer = 10 + + self.random_selection = False + self.reuse_selection = False + self.multiprocessing = True + + def select_objects(self, scene, additional_requirements="N/A"): + rooms_types = [room["roomType"] for room in scene["rooms"]] + room2area = { + room["roomType"]: self.get_room_area(room) for room in scene["rooms"] + } + room2size = { + room["roomType"]: self.get_room_size(room, scene["wall_height"]) + for room in scene["rooms"] + } + room2perimeter = { + room["roomType"]: self.get_room_perimeter(room) for room in scene["rooms"] + } + room2vertices = { + room["roomType"]: [(x * 100, y * 100) for (x, y) in room["vertices"]] + for room in scene["rooms"] + } + + room2floor_capacity = { + room_type: [room_area * self.floor_capacity_ratio, 0] + for room_type, room_area in room2area.items() + } + room2floor_capacity = self.update_floor_capacity(room2floor_capacity, scene) + room2wall_capacity = { + room_type: [room_perimeter * self.wall_capacity_ratio, 0] + for room_type, room_perimeter in room2perimeter.items() + } + selected_objects = { + room["roomType"]: {"floor": [], "wall": []} for room in scene["rooms"] + } + + if "object_selection_plan" in scene: + object_selection_plan = scene["object_selection_plan"] + if self.reuse_selection: + selected_objects = scene["selected_objects"] + else: + for room_type in rooms_types: + floor_objects, _, wall_objects, _ = self.get_objects_by_room( + object_selection_plan[room_type], + scene, + room2size[room_type], + room2floor_capacity[room_type], + room2wall_capacity[room_type], + room2vertices[room_type], + ) + selected_objects[room_type]["floor"] = floor_objects + selected_objects[room_type]["wall"] = wall_objects + else: + object_selection_plan = {room["roomType"]: [] for room in scene["rooms"]} + packed_args = [ + ( + room_type, + scene, + additional_requirements, + room2size, + room2floor_capacity, + room2wall_capacity, + room2vertices, + ) + for room_type in rooms_types + ] + + if self.multiprocessing: + pool = multiprocessing.Pool(processes=4) + results = pool.map(self.plan_room, packed_args) + pool.close() + pool.join() + else: + results = [self.plan_room(args) for args in packed_args] + + for room_type, result in results: + selected_objects[room_type]["floor"] = result["floor"] + selected_objects[room_type]["wall"] = result["wall"] + object_selection_plan[room_type] = result["plan"] + + print( + f"\n{Fore.GREEN}AI: Here is the object selection plan:\n{object_selection_plan}{Fore.RESET}" + ) + return object_selection_plan, selected_objects + + def plan_room(self, args): + ( + room_type, + scene, + additional_requirements, + room2size, + room2floor_capacity, + room2wall_capacity, + room2vertices, + ) = args + print(f"\n{Fore.GREEN}AI: Selecting objects for {room_type}...{Fore.RESET}\n") + + result = {} + room_size_str = f"{int(room2size[room_type][0])*100}cm in length, {int(room2size[room_type][1])*100}cm in width, {int(room2size[room_type][2])*100}cm in height" + + prompt_1 = ( + self.object_selection_template_1.replace("INPUT", scene["query"]) + .replace("ROOM_TYPE", room_type) + .replace("ROOM_SIZE", room_size_str) + .replace("REQUIREMENTS", additional_requirements) + ) + + output_1 = self.llm(prompt_1).lower() + plan_1 = self.extract_json(output_1) + + if plan_1 is None: + print(f"Error while extracting the JSON for {room_type}.") + return result + + ( + floor_objects, + floor_capacity, + wall_objects, + wall_capacity, + ) = self.get_objects_by_room( + plan_1, + scene, + room2size[room_type], + room2floor_capacity[room_type], + room2wall_capacity[room_type], + room2vertices[room_type], + ) + + required_floor_capacity_percentage = 0.8 + if floor_capacity[1] / floor_capacity[0] >= required_floor_capacity_percentage: + result["floor"] = floor_objects + result["wall"] = wall_objects + result["plan"] = plan_1 + else: + print( + f"{Fore.RED}AI: The used floor capacity of {room_type} is {floor_capacity[1]:.2g}m^2," + f" which is less than {100*required_floor_capacity_percentage:.0f}% of the total floor capacity" + f" {floor_capacity[0]:.2g}m^2." + f"{Fore.RESET}" + ) + prompt_2 = self.object_selection_template_2.format( + object_selection_prompt_new_1=prompt_1, + object_selection_1=output_1, + room=room_type, + ) + output_2 = self.llm(prompt_2).lower() + plan_2 = self.extract_json(output_2) + + if plan_2 is None: + print( + f"{Fore.RED}AI: Replanning failed, will use original plan.{Fore.RESET}" + ) + plan_2 = plan_1 + + new_plan = copy.deepcopy(plan_1) + for object in plan_2: + new_plan[object] = plan_2[object] + + floor_objects, _, wall_objects, _ = self.get_objects_by_room( + new_plan, + scene, + room2size[room_type], + room2floor_capacity[room_type], + room2wall_capacity[room_type], + room2vertices[room_type], + ) + + result["floor"] = floor_objects + result["wall"] = wall_objects + result["plan"] = new_plan + + return room_type, result + + def _recursively_normalize_attribute_keys(self, obj): + if isinstance(obj, Dict): + return { + key.strip() + .lower() + .replace(" ", "_"): self._recursively_normalize_attribute_keys(value) + for key, value in obj.items() + } + elif isinstance(obj, List): + return [self._recursively_normalize_attribute_keys(value) for value in obj] + elif isinstance(obj, (str, int, float, bool)): + return obj + else: + print( + f"Unexpected type {type(obj)} in {obj} while normalizing attribute keys." + f" Returning the object as is." + ) + return obj + + def extract_json(self, input_string): + # Using regex to identify the JSON structure in the string + json_match = re.search(r"{.*}", input_string, re.DOTALL) + if json_match: + extracted_json = json_match.group(0) + + # Convert the extracted JSON string into a Python dictionary + json_dict = None + try: + json_dict = json.loads(extracted_json) + except: + try: + json_dict = ast.literal_eval(extracted_json) + except: + pass + + if json_dict is None: + print( + f"{Fore.RED}[ERROR] while parsing the JSON for:\n{input_string}{Fore.RESET}", + flush=True, + ) + return None + + json_dict = self._recursively_normalize_attribute_keys(json_dict) + try: + json_dict = self.check_dict(json_dict) + except Exception as e: + print( + f"{Fore.RED}[ERROR] Dictionary check failed for:" + f"\n{json_dict}" + f"\nFailure reason:{traceback.format_exception_only(e)}" + f"{Fore.RESET}", + flush=True, + ) + + return json_dict + + else: + print(f"No valid JSON found in:\n{input_string}", flush=True) + return None + + def check_dict(self, dict): + valid = True + + for key, value in dict.items(): + if not isinstance(key, str): + valid = False + break + + if not isinstance(value, Dict): + valid = False + break + + for attribute in EXPECTED_OBJECT_ATTRIBUTES: + if attribute not in value: + valid = False + break + + if not isinstance(value["description"], str): + valid = False + break + + if value.get("location") not in ["floor", "wall"]: + dict[key]["location"] = "floor" + + if ( + not isinstance(value["size"], list) + or len(value["size"]) != 3 + or not all(isinstance(i, int) for i in value["size"]) + ): + dict[key]["size"] = None + + if not isinstance(value["quantity"], int): + dict[key]["quantity"] = 1 + + if not isinstance(value.get("variance_type"), str) or value[ + "variance_type" + ] not in ["same", "varied"]: + dict[key]["variance_type"] = "same" + + if not isinstance(value.get("objects_on_top"), list): + dict[key]["objects_on_top"] = [] + + for i, child in enumerate(value["objects_on_top"]): + if not isinstance(child, Dict): + valid = False + break + + for attribute in ["object_name", "quantity"]: + if attribute not in child: + valid = False + break + + if not isinstance(child["object_name"], str): + valid = False + break + + if not isinstance(child["quantity"], int): + dict[key]["objects_on_top"][i]["quantity"] = 1 + + if not isinstance(child.get("variance_type"), str) or child[ + "variance_type" + ] not in ["same", "varied"]: + dict[key]["objects_on_top"][i]["variance_type"] = "same" + + if not valid: + return None + else: + return dict + + def get_objects_by_room( + self, parsed_plan, scene, room_size, floor_capacity, wall_capacity, vertices + ): + # get the floor and wall objects + floor_object_list = [] + wall_object_list = [] + for object_name, object_info in parsed_plan.items(): + object_info["object_name"] = object_name + if object_info["location"] == "floor": + floor_object_list.append(object_info) + else: + wall_object_list.append(object_info) + + floor_objects, floor_capacity = self.get_floor_objects( + floor_object_list, floor_capacity, room_size, vertices, scene + ) + wall_objects, wall_capacity = self.get_wall_objects( + wall_object_list, wall_capacity, room_size, vertices, scene + ) + + return floor_objects, floor_capacity, wall_objects, wall_capacity + + def get_room_size(self, room, wall_height): + floor_polygon = room["floorPolygon"] + x_values = [point["x"] for point in floor_polygon] + z_values = [point["z"] for point in floor_polygon] + x_dim = max(x_values) - min(x_values) + z_dim = max(z_values) - min(z_values) + + if x_dim > z_dim: + return (x_dim, wall_height, z_dim) + else: + return (z_dim, wall_height, x_dim) + + def get_room_area(self, room): + room_vertices = room["vertices"] + room_polygon = Polygon(room_vertices) + return room_polygon.area + + def get_room_perimeter(self, room): + room_vertices = room["vertices"] + room_polygon = Polygon(room_vertices) + return room_polygon.length + + def get_floor_objects( + self, floor_object_list, floor_capacity, room_size, room_vertices, scene + ): + selected_floor_objects_all = [] + for floor_object in floor_object_list: + object_type = floor_object["object_name"] + object_description = floor_object["description"] + object_size = floor_object["size"] + quantity = min(floor_object["quantity"], 10) + + if "variance_type" not in floor_object: + print( + f'[WARNING] variance_type not found in the the object:\n{floor_object}, will set this to be "same".' + ) + variance_type = floor_object.get("variance_type", "same") + + candidates = self.object_retriever.retrieve( + [f"a 3D model of {object_type}, {object_description}"], + self.similarity_threshold_floor, + ) + + candidates = [ + candidate + for candidate, annotation in zip( + candidates, + [ + get_annotations(self.database[candidate[0]]) + for candidate in candidates + ], + ) + if annotation["onFloor"] # only select objects on the floor + and ( + not annotation["onCeiling"] + ) # only select objects not on the ceiling + and all( # ignore doors and windows and frames + k not in annotation["category"].lower() + for k in ["door", "window", "frame"] + ) + ] + + # check if the object is too big + candidates = self.check_object_size(candidates, room_size) + + # check if object can be placed on the floor + candidates = self.check_floor_placement( + candidates[:20], room_vertices, scene + ) + + # No candidates found + if len(candidates) == 0: + print( + "No candidates found for {} {}".format( + object_type, object_description + ) + ) + continue + + # remove used assets + top_one_candidate = candidates[0] + if len(candidates) > 1: + candidates = [ + candidate + for candidate in candidates + if candidate[0] not in self.used_assets + ] + if len(candidates) == 0: + candidates = [top_one_candidate] + + # consider object size difference + if object_size is not None and self.consider_size: + candidates = self.object_retriever.compute_size_difference( + object_size, candidates + ) + + candidates = candidates[:10] # only select top 10 candidates + + selected_asset_ids = [] + if variance_type == "same": + selected_candidate = self.random_select(candidates) + selected_asset_id = selected_candidate[0] + selected_asset_ids = [selected_asset_id] * quantity + + elif variance_type == "varied": + for i in range(quantity): + selected_candidate = self.random_select(candidates) + selected_asset_id = selected_candidate[0] + selected_asset_ids.append(selected_asset_id) + if len(candidates) > 1: + candidates.remove(selected_candidate) + + for i in range(quantity): + selected_asset_id = selected_asset_ids[i] + object_name = f"{object_type}-{i}" + selected_floor_objects_all.append((object_name, selected_asset_id)) + + # reselect objects if they exceed floor capacity, consider the diversity of objects + selected_floor_objects = [] + while True: + if len(selected_floor_objects_all) == 0: + break + current_selected_asset_ids = [] + current_number_of_objects = len(selected_floor_objects) + for object_name, selected_asset_id in selected_floor_objects_all: + if selected_asset_id not in current_selected_asset_ids: + selected_asset_size = get_bbox_dims( + self.database[selected_asset_id] + ) + selected_asset_capacity = ( + selected_asset_size["x"] * selected_asset_size["z"] + ) + if ( + floor_capacity[1] + selected_asset_capacity > floor_capacity[0] + and len(selected_floor_objects) > 0 + ): + print( + f"{object_type} {object_description} exceeds floor capacity" + ) + else: + current_selected_asset_ids.append(selected_asset_id) + selected_floor_objects.append((object_name, selected_asset_id)) + selected_floor_objects_all.remove( + (object_name, selected_asset_id) + ) + floor_capacity = ( + floor_capacity[0], + floor_capacity[1] + selected_asset_capacity, + ) + if len(selected_floor_objects) == current_number_of_objects: + print("No more objects can be added") + break + + # sort objects by object type + object_type2objects = {} + for object_name, selected_asset_id in selected_floor_objects: + object_type = object_name.split("-")[0] + if object_type not in object_type2objects: + object_type2objects[object_type] = [] + object_type2objects[object_type].append((object_name, selected_asset_id)) + + selected_floor_objects_ordered = [] + for object_type in object_type2objects: + selected_floor_objects_ordered += sorted(object_type2objects[object_type]) + + return selected_floor_objects_ordered, floor_capacity + + def get_wall_objects( + self, wall_object_list, wall_capacity, room_size, room_vertices, scene + ): + selected_wall_objects_all = [] + for wall_object in wall_object_list: + object_type = wall_object["object_name"] + object_description = wall_object["description"] + object_size = wall_object["size"] + quantity = min(wall_object["quantity"], 10) + variance_type = wall_object["variance_type"] + + candidates = self.object_retriever.retrieve( + [f"a 3D model of {object_type}, {object_description}"], + self.similarity_threshold_wall, + ) + + # check on wall objects + candidates = [ + candidate + for candidate in candidates + if get_annotations(self.database[candidate[0]])["onWall"] == True + ] # only select objects on the wall + + # ignore doors and windows + candidates = [ + candidate + for candidate in candidates + if "door" + not in get_annotations(self.database[candidate[0]])["category"].lower() + ] + candidates = [ + candidate + for candidate in candidates + if "window" + not in get_annotations(self.database[candidate[0]])["category"].lower() + ] + + # check if the object is too big + candidates = self.check_object_size(candidates, room_size) + + # check thin objects + candidates = self.check_thin_object(candidates) + + # check if object can be placed on the wall + candidates = self.check_wall_placement( + candidates[:20], room_vertices, scene + ) + + if len(candidates) == 0: + print( + "No candidates found for {} {}".format( + object_type, object_description + ) + ) + continue + + # remove used assets + top_one_candidate = candidates[0] + if len(candidates) > 1: + candidates = [ + candidate + for candidate in candidates + if candidate[0] not in self.used_assets + ] + if len(candidates) == 0: + candidates = [top_one_candidate] + + # consider object size difference + if object_size is not None and self.consider_size: + candidates = self.object_retriever.compute_size_difference( + object_size, candidates + ) + + candidates = candidates[:10] # only select top 10 candidates + + selected_asset_ids = [] + if variance_type == "same": + selected_candidate = self.random_select(candidates) + selected_asset_id = selected_candidate[0] + selected_asset_ids = [selected_asset_id] * quantity + + elif variance_type == "varied": + for i in range(quantity): + selected_candidate = self.random_select(candidates) + selected_asset_id = selected_candidate[0] + selected_asset_ids.append(selected_asset_id) + if len(candidates) > 1: + candidates.remove(selected_candidate) + + for i in range(quantity): + selected_asset_id = selected_asset_ids[i] + object_name = f"{object_type}-{i}" + selected_wall_objects_all.append((object_name, selected_asset_id)) + + # reselect objects if they exceed wall capacity, consider the diversity of objects + selected_wall_objects = [] + while True: + if len(selected_wall_objects_all) == 0: + break + current_selected_asset_ids = [] + current_number_of_objects = len(selected_wall_objects) + for object_name, selected_asset_id in selected_wall_objects_all: + if selected_asset_id not in current_selected_asset_ids: + selected_asset_size = get_bbox_dims( + self.database[selected_asset_id] + ) + selected_asset_capacity = selected_asset_size["x"] + if ( + wall_capacity[1] + selected_asset_capacity > wall_capacity[0] + and len(selected_wall_objects) > 0 + ): + print( + f"{object_type} {object_description} exceeds wall capacity" + ) + else: + current_selected_asset_ids.append(selected_asset_id) + selected_wall_objects.append((object_name, selected_asset_id)) + selected_wall_objects_all.remove( + (object_name, selected_asset_id) + ) + wall_capacity = ( + wall_capacity[0], + wall_capacity[1] + selected_asset_capacity, + ) + if len(selected_wall_objects) == current_number_of_objects: + print("No more objects can be added") + break + + # sort objects by object type + object_type2objects = {} + for object_name, selected_asset_id in selected_wall_objects: + object_type = object_name.split("-")[0] + if object_type not in object_type2objects: + object_type2objects[object_type] = [] + object_type2objects[object_type].append((object_name, selected_asset_id)) + + selected_wall_objects_ordered = [] + for object_type in object_type2objects: + selected_wall_objects_ordered += sorted(object_type2objects[object_type]) + + return selected_wall_objects_ordered, wall_capacity + + def check_object_size(self, candidates, room_size): + valid_candidates = [] + for candidate in candidates: + dimension = get_bbox_dims(self.database[candidate[0]]) + size = [dimension["x"], dimension["y"], dimension["z"]] + if size[2] > size[0]: + size = [size[2], size[1], size[0]] # make sure that x > z + + if size[0] > room_size[0] * self.object_size_tolerance: + continue + if size[1] > room_size[1] * self.object_size_tolerance: + continue + if size[2] > room_size[2] * self.object_size_tolerance: + continue + if size[0] * size[2] > room_size[0] * room_size[2] * 0.5: + continue # TODO: consider using the floor area instead of the room area + + valid_candidates.append(candidate) + + return valid_candidates + + def check_thin_object(self, candidates): + valid_candidates = [] + for candidate in candidates: + dimension = get_bbox_dims(self.database[candidate[0]]) + size = [dimension["x"], dimension["y"], dimension["z"]] + if size[2] > min(size[0], size[1]) * self.thin_threshold: + continue + valid_candidates.append(candidate) + return valid_candidates + + def random_select(self, candidates): + if self.random_selection: + selected_candidate = random.choice(candidates) + else: + scores = [candidate[1] for candidate in candidates] + scores_tensor = torch.Tensor(scores) + probas = F.softmax( + scores_tensor, dim=0 + ) # TODO: consider using normalized scores + selected_index = torch.multinomial(probas, 1).item() + selected_candidate = candidates[selected_index] + return selected_candidate + + def update_floor_capacity(self, room2floor_capacity, scene): + for room in scene["rooms"]: + room_vertices = room["vertices"] + room_poly = Polygon(room_vertices) + for door in scene["doors"]: + for door_vertices in door["doorBoxes"]: + door_poly = Polygon(door_vertices) + door_center = door_poly.centroid + door_area = door_poly.area + if room_poly.contains(door_center): + room2floor_capacity[room["id"]][1] += door_area * 0.6 + + if scene["open_walls"] != []: + for open_wall_vertices in scene["open_walls"]["openWallBoxes"]: + open_wall_poly = Polygon(open_wall_vertices) + open_wall_center = open_wall_poly.centroid + if room_poly.contains(open_wall_center): + room2floor_capacity[room["id"]][1] += open_wall_poly.area * 0.6 + + return room2floor_capacity + + def update_wall_capacity(self, room2wall_capacity, scene): + for room in scene["rooms"]: + room_vertices = room["vertices"] + room_poly = Polygon(room_vertices) + for window in scene["windows"]: + for window_vertices in window["windowBoxes"]: + window_poly = Polygon(window_vertices) + window_center = window_poly.centroid + window_x = window_poly.bounds[2] - window_poly.bounds[0] + window_y = window_poly.bounds[3] - window_poly.bounds[1] + window_width = max(window_x, window_y) + if room_poly.contains(window_center): + room2wall_capacity[room["id"]][1] += window_width * 0.6 + + if scene["open_walls"] != []: + for open_wall_vertices in scene["open_walls"]["openWallBoxes"]: + open_wall_poly = Polygon(open_wall_vertices) + open_wall_center = open_wall_poly.centroid + open_wall_x = open_wall_poly.bounds[2] - open_wall_poly.bounds[0] + open_wall_y = open_wall_poly.bounds[3] - open_wall_poly.bounds[1] + open_wall_width = max(open_wall_x, open_wall_y) + if room_poly.contains(open_wall_center): + room2wall_capacity[room["id"]][1] += open_wall_width * 0.6 + + return room2wall_capacity + + def check_floor_placement(self, candidates, room_vertices, scene): + room_x = max([vertex[0] for vertex in room_vertices]) - min( + [vertex[0] for vertex in room_vertices] + ) + room_z = max([vertex[1] for vertex in room_vertices]) - min( + [vertex[1] for vertex in room_vertices] + ) + grid_size = int(max(room_x // 20, room_z // 20)) + + solver = DFS_Solver_Floor(grid_size=grid_size) + + room_poly = Polygon(room_vertices) + initial_state = self.get_initial_state_floor( + room_vertices, scene, add_window=False + ) + + grid_points = solver.create_grids(room_poly) + grid_points = solver.remove_points(grid_points, initial_state) + + valid_candidates = [] + for candidate in candidates: + object_size = get_bbox_dims(self.database[candidate[0]]) + object_dim = ( + object_size["x"] * 100 + self.size_buffer, + object_size["z"] * 100 + self.size_buffer, + ) + + solutions = solver.get_all_solutions(room_poly, grid_points, object_dim) + solutions = solver.filter_collision(initial_state, solutions) + solutions = solver.place_edge(room_poly, solutions, object_dim) + + if solutions != []: + valid_candidates.append(candidate) + else: + print( + f"Floor Object {candidate[0]} (size: {object_dim}) cannot be placed in room" + ) + continue + + return valid_candidates + + def check_wall_placement(self, candidates, room_vertices, scene): + room_x = max([vertex[0] for vertex in room_vertices]) - min( + [vertex[0] for vertex in room_vertices] + ) + room_z = max([vertex[1] for vertex in room_vertices]) - min( + [vertex[1] for vertex in room_vertices] + ) + grid_size = int(max(room_x // 20, room_z // 20)) + + solver = DFS_Solver_Wall(grid_size=grid_size) + + room_poly = Polygon(room_vertices) + initial_state = self.get_initial_state_wall(room_vertices, scene) + grid_points = solver.create_grids(room_poly) + + valid_candidates = [] + for candidate in candidates: + object_size = get_bbox_dims(self.database[candidate[0]]) + object_dim = ( + object_size["x"] * 100, + object_size["y"] * 100, + object_size["z"] * 100, + ) + + solutions = solver.get_all_solutions( + room_poly, grid_points, object_dim, height=0 + ) + solutions = solver.filter_collision(initial_state, solutions) + + if solutions != []: + valid_candidates.append(candidate) + else: + print( + f"Wall Object {candidate[0]} (size: {object_dim}) cannot be placed in room" + ) + continue + + return valid_candidates + + def get_initial_state_floor(self, room_vertices, scene, add_window=True): + doors, windows, open_walls = ( + scene["doors"], + scene["windows"], + scene["open_walls"], + ) + room_poly = Polygon(room_vertices) + + initial_state = {} + i = 0 + for door in doors: + door_boxes = door["doorBoxes"] + for door_box in door_boxes: + door_vertices = [(x * 100, z * 100) for (x, z) in door_box] + door_poly = Polygon(door_vertices) + door_center = door_poly.centroid + if room_poly.contains(door_center): + initial_state[f"door-{i}"] = ( + (door_center.x, door_center.y), + 0, + door_vertices, + 1, + ) + i += 1 + + if add_window: + for window in windows: + window_boxes = window["windowBoxes"] + for window_box in window_boxes: + window_vertices = [(x * 100, z * 100) for (x, z) in window_box] + window_poly = Polygon(window_vertices) + window_center = window_poly.centroid + if room_poly.contains(window_center): + initial_state[f"window-{i}"] = ( + (window_center.x, window_center.y), + 0, + window_vertices, + 1, + ) + i += 1 + + if open_walls != []: + for open_wall_box in open_walls["openWallBoxes"]: + open_wall_vertices = [(x * 100, z * 100) for (x, z) in open_wall_box] + open_wall_poly = Polygon(open_wall_vertices) + open_wall_center = open_wall_poly.centroid + if room_poly.contains(open_wall_center): + initial_state[f"open-{i}"] = ( + (open_wall_center.x, open_wall_center.y), + 0, + open_wall_vertices, + 1, + ) + i += 1 + + return initial_state + + def get_initial_state_wall(self, room_vertices, scene): + doors, windows, open_walls = ( + scene["doors"], + scene["windows"], + scene["open_walls"], + ) + room_poly = Polygon(room_vertices) + initial_state = {} + i = 0 + for door in doors: + door_boxes = door["doorBoxes"] + for door_box in door_boxes: + door_vertices = [(x * 100, z * 100) for (x, z) in door_box] + door_poly = Polygon(door_vertices) + door_center = door_poly.centroid + if room_poly.contains(door_center): + door_height = door["assetPosition"]["y"] * 100 * 2 + x_min, z_min, x_max, z_max = door_poly.bounds + initial_state[f"door-{i}"] = ( + (x_min, 0, z_min), + (x_max, door_height, z_max), + 0, + door_vertices, + 1, + ) + i += 1 + + for window in windows: + window_boxes = window["windowBoxes"] + for window_box in window_boxes: + window_vertices = [(x * 100, z * 100) for (x, z) in window_box] + window_poly = Polygon(window_vertices) + window_center = window_poly.centroid + if room_poly.contains(window_center): + y_min = window["holePolygon"][0]["y"] * 100 + y_max = window["holePolygon"][1]["y"] * 100 + x_min, z_min, x_max, z_max = window_poly.bounds + initial_state[f"window-{i}"] = ( + (x_min, y_min, z_min), + (x_max, y_max, z_max), + 0, + window_vertices, + 1, + ) + i += 1 + + if len(open_walls) != 0: + open_wall_boxes = open_walls["openWallBoxes"] + for open_wall_box in open_wall_boxes: + open_wall_vertices = [(x * 100, z * 100) for (x, z) in open_wall_box] + open_wall_poly = Polygon(open_wall_vertices) + open_wall_center = open_wall_poly.centroid + if room_poly.contains(open_wall_center): + x_min, z_min, x_max, z_max = open_wall_poly.bounds + initial_state[f"open-{i}"] = ( + (x_min, 0, z_min), + (x_max, scene["wall_height"] * 100, z_max), + 0, + open_wall_vertices, + 1, + ) + i += 1 + + return initial_state diff --git a/modules/prompts.py b/ai2holodeck/generation/prompts.py similarity index 92% rename from modules/prompts.py rename to ai2holodeck/generation/prompts.py index 328eb16..8db9aae 100644 --- a/modules/prompts.py +++ b/ai2holodeck/generation/prompts.py @@ -52,7 +52,7 @@ Provide a concise response, omitting any additional text at the beginning or end. """ -object_selection_prompt = """Assist me in selecting large, floor-based objects to furnish each room, excluding mats, carpets, and rugs. Provide a comprehensive description since I will use it to retrieve object. If multiple identical items are to be placed in the room, please indicate the quantity. +object_selection_prompt = """Assist me in selecting large, floor-based objects to furnish each room, excluding mats, carpets, and rugs. Provide a comprehensive description since I will use it to retrieve object. If multiple identical items are to be placed in the room, please indicate the quantity. Present your recommendations in this format: room type | object category | object description | quantity For example: @@ -113,7 +113,7 @@ Please first use natural language to explain your high-level design strategy, and then follow the desired format *strictly* (do not add any additional text at the beginning or end) to provide the constraints for each object.""" -wall_object_selection_prompt = """Assist me in selecting wall-based objects to furnish each room. +wall_object_selection_prompt = """Assist me in selecting wall-based objects to furnish each room. Present your recommendations in this format: room type | object category | object description | quantity For example: living room | painting | abstract painting | 2 @@ -136,7 +136,7 @@ Please do not add additional text at the beginning or in the end.""" -ceiling_selection_prompt = """Assist me in selecting ceiling objects (light/fan) to furnish each room. +ceiling_selection_prompt = """Assist me in selecting ceiling objects (light/fan) to furnish each room. Present your recommendations in this format: room type | ceiling object description For example: living room | modern, 3-light, semi-flush mount ceiling light @@ -147,7 +147,7 @@ Your response should be precise, without additional text at the beginning or end. """ -small_object_selection_prompt = """As an experienced room designer, you are tasked to bring life into the room by strategically placing more *small* objects. Those objects should only be arranged *on top of* large objects which serve as receptacles. +small_object_selection_prompt = """As an experienced room designer, you are tasked to bring life into the room by strategically placing more *small* objects. Those objects should only be arranged *on top of* large objects which serve as receptacles. The output should be formatted as follows: receptacle | small object-1, quantity, variance type | small object-2, quantity, variance type | ... Here, the variance type specifies whether the small objects are same or varied. There's no restriction on the number of small objects you can select for each receptacle. An example of this format is as follows: sofa-0 (living room) | remote control for TV, 1, same | book, 2, varied | gray fabric pillow, 2, varied @@ -174,7 +174,7 @@ # Your response should be precise, without additional text at the beginning or end. -object_selection_prompt_1 = """You are an experienced room designer, please assist me in selecting *large* floor and wall objects to furnish each room. I want the objects that can be directly placed on the floor or wall, *not* the small objects that need to be placed on the large objects. +object_selection_prompt_1 = """You are an experienced room designer, please assist me in selecting *large* floor and wall objects to furnish each room. I want the objects that can be directly placed on the floor or wall, *not* the small objects that need to be placed on the large objects. You must provide a comprehensive description for each object since I will use it to retrieve object. If multiple identical items are to be placed in the room, please indicate the quantity and variance type (same or varied). Present your recommendations in this format: room type | location | object category | object description | quantity, variance type For example: @@ -203,8 +203,8 @@ Agent: """ -object_selection_prompt_new_1 = """You are an experienced room designer, please assist me in selecting large *floor*/*wall* objects and small objects on top of them to furnish the room. You need to select appropriate objects to satisfy the customer's requirements. -You must provide a description and desired size for each object since I will use it to retrieve object. If multiple identical items are to be placed in the room, please indicate the quantity and variance type (same or varied). +object_selection_prompt_new_1 = """You are an experienced room designer, please assist me in selecting large *floor*/*wall* objects and small objects on top of them to furnish the room. You need to select appropriate objects to satisfy the customer's requirements. +You must provide a description and desired size for each object since I will use it to retrieve object. If multiple items are to be placed in the room with the same description, please indicate the quantity and variance_type ("same" if they should be identical, otherwise "varied"). Present your recommendations in JSON format: { object_name:{ diff --git a/modules/rooms.py b/ai2holodeck/generation/rooms.py similarity index 59% rename from modules/rooms.py rename to ai2holodeck/generation/rooms.py index dc1b492..9520cba 100644 --- a/modules/rooms.py +++ b/ai2holodeck/generation/rooms.py @@ -1,49 +1,63 @@ import ast import copy import math -import json +import os +from difflib import SequenceMatcher + +import compress_json +import compress_pickle +import matplotlib.colors as mcolors +import matplotlib.patches as patches +import matplotlib.pyplot as plt import torch -import pickle -import numpy as np from PIL import Image -from tqdm import tqdm from colorama import Fore -import matplotlib.pyplot as plt -import matplotlib.patches as patches -import modules.prompts as prompts -import matplotlib.colors as mcolors -from difflib import SequenceMatcher -from langchain import PromptTemplate +from langchain import PromptTemplate, OpenAI from shapely.geometry import LineString, Point, Polygon +from tqdm import tqdm - -class FloorPlanGenerator(): - def __init__(self, clip_model, clip_process, clip_tokenizer, llm): - self.json_template = {"ceilings": [], "children": [], "vertices": None, - "floorMaterial": {"name": None, "color": None}, - "floorPolygon": [], "id": None, "roomType": None} - self.material_selector = MaterialSelector(clip_model, clip_process, clip_tokenizer) - self.floor_plan_template = PromptTemplate(input_variables=["input", "additional_requirements"], template=prompts.floor_plan_prompt) +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.constants import HOLODECK_BASE_DATA_DIR, DEBUGGING + + +class FloorPlanGenerator: + def __init__(self, clip_model, clip_process, clip_tokenizer, llm: OpenAI): + self.json_template = { + "ceilings": [], + "children": [], + "vertices": None, + "floorMaterial": {"name": None, "color": None}, + "floorPolygon": [], + "id": None, + "roomType": None, + } + self.material_selector = MaterialSelector( + clip_model, clip_process, clip_tokenizer + ) + self.floor_plan_template = PromptTemplate( + input_variables=["input", "additional_requirements"], + template=prompts.floor_plan_prompt, + ) self.llm = llm self.used_assets = [] - def generate_rooms(self, scene, additional_requirements="N/A", visualize=False): # get floor plan if not provided - floor_plan_prompt = self.floor_plan_template.format(input=scene["query"], additional_requirements=additional_requirements) + floor_plan_prompt = self.floor_plan_template.format( + input=scene["query"], additional_requirements=additional_requirements + ) if "raw_floor_plan" not in scene: raw_floor_plan = self.llm(floor_plan_prompt) scene["raw_floor_plan"] = raw_floor_plan else: raw_floor_plan = scene["raw_floor_plan"] - + print(f"User: {floor_plan_prompt}\n") print(f"{Fore.GREEN}AI: Here is the floor plan:\n{raw_floor_plan}{Fore.RESET}") - + rooms = self.get_plan(scene["query"], scene["raw_floor_plan"], visualize) return rooms - def get_plan(self, query, raw_plan, visualize=False): parsed_plan = self.parse_raw_plan(raw_plan) @@ -53,16 +67,20 @@ def get_plan(self, query, raw_plan, visualize=False): all_designs.append(room["floor_design"]) all_designs.append(room["wall_design"]) design2material = self.select_materials(all_designs, topk=5) - + # assign materials for i in range(len(parsed_plan)): - parsed_plan[i]["floorMaterial"] = design2material[parsed_plan[i]["floor_design"]] - parsed_plan[i]["wallMaterial"] = design2material[parsed_plan[i]["wall_design"]] + parsed_plan[i]["floorMaterial"] = design2material[ + parsed_plan[i]["floor_design"] + ] + parsed_plan[i]["wallMaterial"] = design2material[ + parsed_plan[i]["wall_design"] + ] - if visualize: self.visualize_floor_plan(query, parsed_plan) + if visualize: + self.visualize_floor_plan(query, parsed_plan) return parsed_plan - def parse_raw_plan(self, raw_plan): parsed_plan = [] @@ -70,9 +88,10 @@ def parse_raw_plan(self, raw_plan): plans = [plan.lower() for plan in raw_plan.split("\n") if "|" in plan] for i, plan in enumerate(plans): room_type, floor_design, wall_design, vertices = plan.split("|") - room_type = room_type.strip().replace("'", "") # remove single quote + room_type = room_type.strip().replace("'", "") # remove single quote - if room_type in room_types: room_type += f"-{i}" + if room_type in room_types: + room_type += f"-{i}" room_types.append(room_type) floor_design = floor_design.strip() @@ -84,7 +103,9 @@ def parse_raw_plan(self, raw_plan): current_plan = copy.deepcopy(self.json_template) current_plan["id"] = room_type current_plan["roomType"] = room_type - current_plan["vertices"], current_plan["floorPolygon"] = self.vertices2xyz(vertices) + current_plan["vertices"], current_plan["floorPolygon"] = self.vertices2xyz( + vertices + ) current_plan["floor_design"] = floor_design current_plan["wall_design"] = wall_design parsed_plan.append(current_plan) @@ -98,46 +119,75 @@ def parse_raw_plan(self, raw_plan): for room in parsed_plan: full_vertices = self.get_full_vertices(room["vertices"], all_vertices) full_vertices = list(set(map(tuple, full_vertices))) - room["full_vertices"], room["floorPolygon"] = self.vertices2xyz(full_vertices) - + room["full_vertices"], room["floorPolygon"] = self.vertices2xyz( + full_vertices + ) + valid, msg = self.check_validity(parsed_plan) - if not valid: print(f"{Fore.RED}AI: {msg}{Fore.RESET}"); raise ValueError(msg) - else: print(f"{Fore.GREEN}AI: {msg}{Fore.RESET}"); return parsed_plan - + if not valid: + print(f"{Fore.RED}AI: {msg}{Fore.RESET}") + + if DEBUGGING: + import matplotlib.pyplot as plt + import numpy as np + + colors = plt.cm.rainbow(np.linspace(0, 1, len(parsed_plan))) + for room in parsed_plan: + for i in range(len(room["vertices"])): + a = room["vertices"][i] + b = room["vertices"][(i + 1) % len(room["vertices"])] + plt.plot([a[0], b[0]], [a[1], b[1]], color=colors[i]) + plt.show() + + raise ValueError(msg) + else: + print(f"{Fore.GREEN}AI: {msg}{Fore.RESET}") + return parsed_plan def vertices2xyz(self, vertices): sort_vertices = self.sort_vertices(vertices) - xyz_vertices = [{"x": vertex[0], "y": 0, "z": vertex[1]} for vertex in sort_vertices] + xyz_vertices = [ + {"x": vertex[0], "y": 0, "z": vertex[1]} for vertex in sort_vertices + ] return sort_vertices, xyz_vertices - def xyz2vertices(self, xyz_vertices): vertices = [(vertex["x"], vertex["z"]) for vertex in xyz_vertices] return vertices - def sort_vertices(self, vertices): # Calculate the centroid of the polygon cx = sum(x for x, y in vertices) / max(len(vertices), 1) cy = sum(y for x, y in vertices) / max(len(vertices), 1) # Sort the vertices in clockwise order - vertices_clockwise = sorted(vertices, key=lambda v: (-math.atan2(v[1]-cy, v[0]-cx)) % (2*math.pi)) + vertices_clockwise = sorted( + vertices, key=lambda v: (-math.atan2(v[1] - cy, v[0] - cx)) % (2 * math.pi) + ) # Find the vertex with the smallest x value min_vertex = min(vertices_clockwise, key=lambda v: v[0]) # Rotate the vertices so the vertex with the smallest x value is first min_index = vertices_clockwise.index(min_vertex) - vertices_clockwise = vertices_clockwise[min_index:] + vertices_clockwise[:min_index] + vertices_clockwise = ( + vertices_clockwise[min_index:] + vertices_clockwise[:min_index] + ) return vertices_clockwise - def get_full_vertices(self, original_vertices, all_vertices): # Create line segments from the original vertices - lines = [LineString([original_vertices[i], original_vertices[(i+1)%len(original_vertices)]]) for i in range(len(original_vertices))] + lines = [ + LineString( + [ + original_vertices[i], + original_vertices[(i + 1) % len(original_vertices)], + ] + ) + for i in range(len(original_vertices)) + ] # Check each vertex against each line segment full_vertices = [] @@ -149,19 +199,25 @@ def get_full_vertices(self, original_vertices, all_vertices): return full_vertices - def select_materials(self, designs, topk): - candidate_materials = self.material_selector.match_material(designs, topk=topk)[0] + candidate_materials = self.material_selector.match_material(designs, topk=topk)[ + 0 + ] candidate_colors = self.material_selector.select_color(designs, topk=topk)[0] - + # remove used materials top_materials = [[materials[0]] for materials in candidate_materials] - candidate_materials = [[material for material in materials if material not in self.used_assets] for materials in candidate_materials] + candidate_materials = [ + [material for material in materials if material not in self.used_assets] + for materials in candidate_materials + ] selected_materials = [] for i in range(len(designs)): - if len(candidate_materials[i]) == 0: selected_materials.append(top_materials[i][0]) - else: selected_materials.append(candidate_materials[i][0]) + if len(candidate_materials[i]) == 0: + selected_materials.append(top_materials[i][0]) + else: + selected_materials.append(candidate_materials[i][0]) selected_colors = [candidates[0] for candidates in candidate_colors] @@ -170,62 +226,81 @@ def select_materials(self, designs, topk): design2materials[design]["name"] = selected_materials[i] # design2materials[design]["color"] = self.color2rgb(selected_colors[i]) return design2materials - def color2rgb(self, color_name): rgb = mcolors.to_rgb(color_name) return {"r": rgb[0], "g": rgb[1], "b": rgb[2]} - def parsed2raw(self, rooms): raw_plan = "" for room in rooms: - raw_plan += " | ".join([room["roomType"], room["floor_design"], room["wall_design"], str(room["vertices"])]) + raw_plan += " | ".join( + [ + room["roomType"], + room["floor_design"], + room["wall_design"], + str(room["vertices"]), + ] + ) raw_plan += "\n" return raw_plan - def check_interior_angles(self, vertices): n = len(vertices) for i in range(n): a, b, c = vertices[i], vertices[(i + 1) % n], vertices[(i + 2) % n] - angle = abs(math.degrees(math.atan2(c[1]-b[1], c[0]-b[0]) - math.atan2(a[1]-b[1], a[0]-b[0]))) + angle = abs( + math.degrees( + math.atan2(c[1] - b[1], c[0] - b[0]) + - math.atan2(a[1] - b[1], a[0] - b[0]) + ) + ) if angle < 90 or angle > 270: return False return True - def check_validity(self, rooms): room_polygons = [Polygon(room["vertices"]) for room in rooms] # check interior angles for room in rooms: if not self.check_interior_angles(room["vertices"]): - return False, "All interior angles of the room must be greater than or equal to 90 degrees." - - if len(room_polygons) == 1: + return ( + False, + "All interior angles of the room must be greater than or equal to 90 degrees.", + ) + + if len(room_polygons) == 1: return True, "The floor plan is valid. (Only one room)" - + # check overlap, connectivity and vertex inside another room for i in range(len(room_polygons)): has_neighbor = False for j in range(len(room_polygons)): if i != j: - if room_polygons[i].equals(room_polygons[j]) or room_polygons[i].contains(room_polygons[j]) or room_polygons[j].contains(room_polygons[i]): + if ( + room_polygons[i].equals(room_polygons[j]) + or room_polygons[i].contains(room_polygons[j]) + or room_polygons[j].contains(room_polygons[i]) + ): return False, "Room polygons must not overlap." intersection = room_polygons[i].intersection(room_polygons[j]) if isinstance(intersection, LineString): has_neighbor = True for vertex in rooms[j]["vertices"]: if Polygon(rooms[i]["vertices"]).contains(Point(vertex)): - return False, "No vertex of a room can be inside another room." + return ( + False, + "No vertex of a room can be inside another room.", + ) if not has_neighbor: - return False, "Each room polygon must share an edge with at least one other room polygon." + return ( + False, + "Each room polygon must share an edge with at least one other room polygon.", + ) return True, "The floor plan is valid." - - def visualize_floor_plan(self, query, parsed_plan): plt.rcParams["font.family"] = "Times New Roman" plt.rcParams["font.size"] = 22 @@ -242,7 +317,9 @@ def midpoint(p1, p2): for i, room in enumerate(parsed_plan): coordinates = room["vertices"] - polygon = patches.Polygon(coordinates, closed=True, edgecolor='black', linewidth=2) + polygon = patches.Polygon( + coordinates, closed=True, edgecolor="black", linewidth=2 + ) polygon.set_facecolor(colors[i % len(colors)]) ax.add_patch(polygon) @@ -255,7 +332,7 @@ def midpoint(p1, p2): # ax.text(room_x, room_y, room["roomType"], ha='center', va='center') # Add points to the corners - ax.scatter(x, y, s=100, color='black') # s is the size of the point + ax.scatter(x, y, s=100, color="black") # s is the size of the point # # Display width and length # for i in range(len(coordinates)): @@ -264,21 +341,25 @@ def midpoint(p1, p2): # ax.text(*midpoint(p1, p2), label, ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', edgecolor='black', boxstyle='round4')) # Set aspect of the plot to be equal, so squares appear as squares - ax.set_aspect('equal') + ax.set_aspect("equal") ax.autoscale_view() # Turn off the axis - ax.axis('off') + ax.axis("off") folder_name = query.replace(" ", "_") - plt.savefig(f"{folder_name}.pdf", bbox_inches='tight', dpi=300) + plt.savefig(f"{folder_name}.pdf", bbox_inches="tight", dpi=300) plt.show() - -class MaterialSelector(): + +class MaterialSelector: def __init__(self, clip_model, clip_preprocess, clip_tokenizer): - materials = json.load(open("data/materials/material-database.json", "r")) - self.selected_materials = materials["Wall"] + materials["Wood"] + materials["Fabric"] + materials = compress_json.load( + os.path.join(HOLODECK_BASE_DATA_DIR, "materials/material-database.json") + ) + self.selected_materials = ( + materials["Wall"] + materials["Wood"] + materials["Fabric"] + ) self.colors = list(mcolors.CSS4_COLORS.keys()) self.clip_model = clip_model @@ -287,41 +368,78 @@ def __init__(self, clip_model, clip_preprocess, clip_tokenizer): self.load_features() - - def load_features(self): + def load_features(self): try: - self.material_feature_clip = pickle.load(open("data/materials/material_feature_clip.p", "rb")) + self.material_feature_clip = compress_pickle.load( + os.path.join( + HOLODECK_BASE_DATA_DIR, "materials/material_feature_clip.pkl" + ) + ) except: print("Precompute image features for materials...") self.material_feature_clip = [] for material in tqdm(self.selected_materials): - image = self.preprocess(Image.open(f"data/materials/images/{material}.png")).unsqueeze(0) + image = self.preprocess( + Image.open( + os.path.join( + HOLODECK_BASE_DATA_DIR, f"materials/images/{material}.png" + ) + ) + ).unsqueeze(0) with torch.no_grad(): image_features = self.clip_model.encode_image(image) image_features /= image_features.norm(dim=-1, keepdim=True) self.material_feature_clip.append(image_features) self.material_feature_clip = torch.vstack(self.material_feature_clip) - pickle.dump(self.material_feature_clip, open("data/materials/material_feature_clip.p", "wb")) - + compress_pickle.dump( + self.material_feature_clip, + os.path.join( + HOLODECK_BASE_DATA_DIR, "materials/material_feature_clip.pkl" + ), + ) + try: - self.color_feature_clip = pickle.load(open("data/materials/color_feature_clip.p", "rb")) + self.color_feature_clip = compress_pickle.load( + os.path.join(HOLODECK_BASE_DATA_DIR, "materials/color_feature_clip.pkl") + ) except: print("Precompute text features for colors...") with torch.no_grad(): - self.color_feature_clip = self.clip_model.encode_text(self.clip_tokenizer(self.colors)) - self.color_feature_clip /= self.color_feature_clip.norm(dim=-1, keepdim=True) - pickle.dump(self.color_feature_clip, open("data/materials/color_feature_clip.p", "wb")) - + self.color_feature_clip = self.clip_model.encode_text( + self.clip_tokenizer(self.colors) + ) + self.color_feature_clip /= self.color_feature_clip.norm( + dim=-1, keepdim=True + ) + + compress_pickle.dump( + self.color_feature_clip, + os.path.join( + HOLODECK_BASE_DATA_DIR, "materials/color_feature_clip.pkl" + ), + ) def match_material(self, queries, topk=5): with torch.no_grad(): - query_feature_clip = self.clip_model.encode_text(self.clip_tokenizer(queries)) + query_feature_clip = self.clip_model.encode_text( + self.clip_tokenizer(queries) + ) query_feature_clip /= query_feature_clip.norm(dim=-1, keepdim=True) - + clip_similarity = query_feature_clip @ self.material_feature_clip.T - string_similarity = torch.tensor([[self.string_match(query, material) for material in self.selected_materials] for query in queries]) - - joint_similarity = string_similarity + clip_similarity # use visual embedding only seems to be better + string_similarity = torch.tensor( + [ + [ + self.string_match(query, material) + for material in self.selected_materials + ] + for query in queries + ] + ) + + joint_similarity = ( + string_similarity + clip_similarity + ) # use visual embedding only seems to be better results = [] scores = [] @@ -331,10 +449,11 @@ def match_material(self, queries, topk=5): scores.append([sim[ind] for ind in indices]) return results, scores - def select_color(self, queries, topk=5): with torch.no_grad(): - query_feature_clip = self.clip_model.encode_text(self.clip_tokenizer(queries)) + query_feature_clip = self.clip_model.encode_text( + self.clip_tokenizer(queries) + ) query_feature_clip /= query_feature_clip.norm(dim=-1, keepdim=True) clip_similarity = query_feature_clip @ self.color_feature_clip.T @@ -345,7 +464,6 @@ def select_color(self, queries, topk=5): results.append([self.colors[ind] for ind in indices]) scores.append([sim[ind] for ind in indices]) return results, scores - def string_match(self, a, b): - return SequenceMatcher(None, a, b).ratio() \ No newline at end of file + return SequenceMatcher(None, a, b).ratio() diff --git a/modules/skybox.py b/ai2holodeck/generation/skybox.py similarity index 76% rename from modules/skybox.py rename to ai2holodeck/generation/skybox.py index 6d00667..2194dd9 100644 --- a/modules/skybox.py +++ b/ai2holodeck/generation/skybox.py @@ -1,12 +1,35 @@ import random + from procthor.utils.types import RGB, Vector3 -skyboxes = ["Sky1", "Sky2", "SkyAlbany", "SkyAlbanyHill", "SkyDalyCity", "SkyEmeryville", "SkyGarden", "SkyTropical", - "SkyGasworks", "SkyMosconeCenter", "SkyMountain", "SkyOakland", "SkySeaStacks", "SkySFCityHall", "Sky2Dusk", - "SkySFDowntown", "SkySFGarden", "SkySnow", "SkyNeighborhood", "SkySouthLakeUnion", "SkySunset", "SkyTreeFarm"] +skyboxes = [ + "Sky1", + "Sky2", + "SkyAlbany", + "SkyAlbanyHill", + "SkyDalyCity", + "SkyEmeryville", + "SkyGarden", + "SkyTropical", + "SkyGasworks", + "SkyMosconeCenter", + "SkyMountain", + "SkyOakland", + "SkySeaStacks", + "SkySFCityHall", + "Sky2Dusk", + "SkySFDowntown", + "SkySFGarden", + "SkySnow", + "SkyNeighborhood", + "SkySouthLakeUnion", + "SkySunset", + "SkyTreeFarm", +] # timeOfDays = ["Midday", "GoldenHour", "BlueHour", "Midnight"] timeOfDays = ["Midday", "GoldenHour", "BlueHour"] + def getSkybox(scene): skybox = random.choice(skyboxes) time_of_day = random.choice(timeOfDays) @@ -39,4 +62,4 @@ def getSkybox(scene): directional_light["rgb"] = RGB(r=0.93, g=0.965, b=1.0) directional_light["rotation"] = Vector3(x=41, y=-50, z=0) - return scene \ No newline at end of file + return scene diff --git a/modules/small_objects.py b/ai2holodeck/generation/small_objects.py similarity index 54% rename from modules/small_objects.py rename to ai2holodeck/generation/small_objects.py index b7783a7..c964562 100644 --- a/modules/small_objects.py +++ b/ai2holodeck/generation/small_objects.py @@ -1,39 +1,58 @@ import copy -import torch -import random import multiprocessing +import random + +import torch import torch.nn.functional as F from ai2thor.controller import Controller +from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner +from langchain import OpenAI from procthor.constants import FLOOR_Y from procthor.utils.types import Vector3 -from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner +from ai2holodeck.constants import THOR_COMMIT_ID +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.utils import ( + get_bbox_dims, + get_annotations, + get_secondary_properties, +) -class SmallObjectGenerator(): - def __init__(self, llm, object_retriever, objaverse_version): + +class SmallObjectGenerator: + def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): self.llm = llm self.object_retriever = object_retriever self.database = object_retriever.database - self.objaverse_version = objaverse_version # set kinematic to false for small objects - self.json_template = {"assetId": None, "id": None, "kinematic": False, - "position": {}, "rotation": {}, "material": None, "roomId": None} + self.json_template = { + "assetId": None, + "id": None, + "kinematic": False, + "position": {}, + "rotation": {}, + "material": None, + "roomId": None, + } self.clip_threshold = 30 self.used_assets = [] self.reuse_assets = True - def generate_small_objects(self, scene, controller, receptacle_ids): object_selection_plan = scene["object_selection_plan"] receptacle2asset_id = self.get_receptacle2asset_id(scene, receptacle_ids) # receptacle2rotation = self.get_receptacle2rotation(scene, receptacle_ids) # receptacle2position = self.get_receptacle2position(scene, receptacle_ids) - - if "receptacle2small_objects" in scene and self.reuse_assets: receptacle2small_objects = scene["receptacle2small_objects"] - else: receptacle2small_objects = self.select_small_objects(object_selection_plan, receptacle_ids, receptacle2asset_id) + + if "receptacle2small_objects" in scene and self.reuse_assets: + receptacle2small_objects = scene["receptacle2small_objects"] + else: + receptacle2small_objects = self.select_small_objects( + object_selection_plan, receptacle_ids, receptacle2asset_id + ) results = [] # Place the objects @@ -41,43 +60,54 @@ def generate_small_objects(self, scene, controller, receptacle_ids): placements = [] for object_name, asset_id, _ in small_objects: thin, rotation = self.check_thin_asset(asset_id) - small, y_rotation = self.check_small_asset(asset_id) # check if the object is small and rotate around y axis randomly + small, y_rotation = self.check_small_asset( + asset_id + ) # check if the object is small and rotate around y axis randomly obj = self.place_object(controller, asset_id, receptacle, rotation) - if obj != None: # If the object is successfully placed + if obj != None: # If the object is successfully placed placement = self.json_template.copy() placement["assetId"] = asset_id placement["id"] = f"{object_name}|{receptacle}" placement["position"] = obj["position"] - asset_height = self.database[asset_id]['assetMetadata']['boundingBox']["y"] + asset_height = get_bbox_dims(self.database[asset_id])["y"] - if obj["position"]["y"] + asset_height > scene["wall_height"]: continue # if the object is too high, skip it + if obj["position"]["y"] + asset_height > scene["wall_height"]: + continue # if the object is too high, skip it - placement["position"]["y"] = obj["position"]["y"] + (asset_height / 2) + 0.001 # add half of the height to the y position and a small offset + placement["position"]["y"] = ( + obj["position"]["y"] + (asset_height / 2) + 0.001 + ) # add half of the height to the y position and a small offset placement["rotation"] = obj["rotation"] placement["roomId"] = receptacle.split("(")[1].split(")")[0] # temporary solution fix position and rotation for thin objects - if thin: placement = self.fix_placement_for_thin_assets(placement) + if thin: + placement = self.fix_placement_for_thin_assets(placement) - if small: placement["rotation"]["y"] = y_rotation # temporary solution for random rotation around y axis for small objects + if small: + placement["rotation"][ + "y" + ] = y_rotation # temporary solution for random rotation around y axis for small objects # else: placement["rotation"]["y"] = receptacle2rotation[receptacle]["y"] - if not small and not thin: placement["kinematic"] = True # set kinematic to true for non-small objects + if not small and not thin: + placement["kinematic"] = ( + True # set kinematic to true for non-small objects + ) - if "breakable" in self.database[asset_id]["objectMetadata"].keys(): - if self.database[asset_id]["objectMetadata"]["breakable"] == True: placement["kinematic"] = True + if "CanBreak" in get_secondary_properties(self.database[asset_id]): + placement["kinematic"] = True placements.append(placement) - + # TODO: check collision between small objects on the same receptacle valid_placements = self.check_collision(placements) results.extend(valid_placements) controller.stop() return results, receptacle2small_objects - def get_receptacle2asset_id(self, scene, receptacle_ids): receptacle2asset_id = {} @@ -87,7 +117,6 @@ def get_receptacle2asset_id(self, scene, receptacle_ids): # if receptacle_id not in receptacle2asset_id and "___" in receptacle_id: # receptacle2asset_id[receptacle_id] = receptacle2asset_id[receptacle_id.split("___")[0]] return receptacle2asset_id - def get_receptacle2rotation(self, scene, receptacle_ids): receptacle2rotation = {} @@ -98,7 +127,6 @@ def get_receptacle2rotation(self, scene, receptacle_ids): # receptacle2rotation[receptacle_id] = receptacle2rotation[receptacle_id.split("___")[0]] return receptacle2rotation - def get_receptacle2position(self, scene, receptacle_ids): receptacle2rotation = {} for object in scene["objects"]: @@ -108,8 +136,9 @@ def get_receptacle2position(self, scene, receptacle_ids): # receptacle2rotation[receptacle_id] = receptacle2rotation[receptacle_id.split("___")[0]] return receptacle2rotation - - def select_small_objects(self, object_selection_plan, recpetacle_ids, receptacle2asset_id): + def select_small_objects( + self, object_selection_plan, recpetacle_ids, receptacle2asset_id + ): children_plans = [] for room_type, objects in object_selection_plan.items(): for object_name, object_info in objects.items(): @@ -118,20 +147,26 @@ def select_small_objects(self, object_selection_plan, recpetacle_ids, receptacle child_plan["room_type"] = room_type child_plan["parent"] = object_name children_plans.append(child_plan) - + receptacle2small_object_plans = {} for receptacle_id in recpetacle_ids: small_object_plans = [] for child_plan in children_plans: - if child_plan["room_type"] in receptacle_id and child_plan["parent"] in receptacle_id: + if ( + child_plan["room_type"] in receptacle_id + and child_plan["parent"] in receptacle_id + ): small_object_plans.append(child_plan) if len(small_object_plans) > 0: receptacle2small_object_plans[receptacle_id] = small_object_plans - + receptacle2small_objects = {} - packed_args = [(receptacle, small_objects, receptacle2asset_id) for receptacle, small_objects in receptacle2small_object_plans.items()] + packed_args = [ + (receptacle, small_objects, receptacle2asset_id) + for receptacle, small_objects in receptacle2small_object_plans.items() + ] pool = multiprocessing.Pool(processes=4) results = pool.map(self.select_small_objects_per_receptacle, packed_args) pool.close() @@ -141,44 +176,65 @@ def select_small_objects(self, object_selection_plan, recpetacle_ids, receptacle receptacle2small_objects[result[0]] = result[1] return receptacle2small_objects - def select_small_objects_per_receptacle(self, args): receptacle, small_objects, receptacle2asset_id = args results = [] - receptacle_dimensions = self.database[receptacle2asset_id[receptacle]]['assetMetadata']['boundingBox'] + receptacle_dimensions = get_bbox_dims( + self.database[receptacle2asset_id[receptacle]] + ) receptacle_size = [receptacle_dimensions["x"], receptacle_dimensions["z"]] receptacle_area = receptacle_size[0] * receptacle_size[1] capacity = 0 num_objects = 0 sorted(receptacle_size) for small_object in small_objects: - object_name, quantity, variance_type = small_object["object_name"], small_object["quantity"], small_object["variance_type"] - quantity = min(quantity, 5) # maximum 5 objects per receptacle + object_name, quantity, variance_type = ( + small_object["object_name"], + small_object["quantity"], + small_object["variance_type"], + ) + quantity = min(quantity, 5) # maximum 5 objects per receptacle print(f"Selecting {quantity} {object_name} for {receptacle}") # Select the object - candidates = self.object_retriever.retrieve([f"a 3D model of {object_name}"], self.clip_threshold) - candidates = [candidate for candidate in candidates - if self.database[candidate[0]]["annotations"]["onObject"] == True] # Only select objects that can be placed on other objects - - valid_candidates = [] # Only select objects with high confidence + candidates = self.object_retriever.retrieve( + [f"a 3D model of {object_name}"], self.clip_threshold + ) + candidates = [ + candidate + for candidate in candidates + if get_annotations(self.database[candidate[0]])["onObject"] == True + ] # Only select objects that can be placed on other objects + + valid_candidates = [] # Only select objects with high confidence for candidate in candidates: - candidate_dimensions = self.database[candidate[0]]['assetMetadata']['boundingBox'] + candidate_dimensions = get_bbox_dims(self.database[candidate[0]]) candidate_size = [candidate_dimensions["x"], candidate_dimensions["z"]] sorted(candidate_size) - if candidate_size[0] < receptacle_size[0] * 0.9 and candidate_size[1] < receptacle_size[1] * 0.9: # if the object is smaller than the receptacle, threshold is 90% + if ( + candidate_size[0] < receptacle_size[0] * 0.9 + and candidate_size[1] < receptacle_size[1] * 0.9 + ): # if the object is smaller than the receptacle, threshold is 90% valid_candidates.append(candidate) - - if len(valid_candidates) == 0: print(f"No valid candidate for {object_name}."); continue + + if len(valid_candidates) == 0: + print(f"No valid candidate for {object_name}.") + continue # remove used assets top_one_candidate = valid_candidates[0] - if len(valid_candidates) > 1: valid_candidates = [candidate for candidate in valid_candidates if candidate[0] not in self.used_assets] - if len(valid_candidates) == 0: valid_candidates = [top_one_candidate] - - valid_candidates = valid_candidates[:5] # only select top 5 candidates + if len(valid_candidates) > 1: + valid_candidates = [ + candidate + for candidate in valid_candidates + if candidate[0] not in self.used_assets + ] + if len(valid_candidates) == 0: + valid_candidates = [top_one_candidate] + + valid_candidates = valid_candidates[:5] # only select top 5 candidates selected_asset_ids = [] if variance_type == "same": @@ -191,33 +247,45 @@ def select_small_objects_per_receptacle(self, args): selected_candidate = self.random_select(valid_candidates) selected_asset_id = selected_candidate[0] selected_asset_ids.append(selected_asset_id) - if len(valid_candidates) > 1: valid_candidates.remove(selected_candidate) - + if len(valid_candidates) > 1: + valid_candidates.remove(selected_candidate) + for i in range(quantity): - small_object_dimensions = self.database[selected_asset_ids[i]]['assetMetadata']['boundingBox'] - small_object_sizes = [small_object_dimensions["x"], small_object_dimensions["y"], small_object_dimensions["z"]] + small_object_dimensions = get_bbox_dims( + self.database[selected_asset_ids[i]] + ) + small_object_sizes = [ + small_object_dimensions["x"], + small_object_dimensions["y"], + small_object_dimensions["z"], + ] sorted(small_object_sizes) # small_object_area = small_object_dimensions["x"] * small_object_dimensions["z"] # take the maximum 2 dimensions and multiply them small_object_area = small_object_sizes[1] * small_object_sizes[2] * 0.8 capacity += small_object_area num_objects += 1 - if capacity > receptacle_area * 0.9 and num_objects > 1: print(f"Warning: {receptacle} is overfilled."); break - if num_objects > 15: print(f"Warning: {receptacle} has too many objects."); break - else: results.append((f"{object_name}-{i}", selected_asset_ids[i])) - + if capacity > receptacle_area * 0.9 and num_objects > 1: + print(f"Warning: {receptacle} is overfilled.") + break + if num_objects > 15: + print(f"Warning: {receptacle} has too many objects.") + break + else: + results.append((f"{object_name}-{i}", selected_asset_ids[i])) + ordered_small_objects = [] for object_name, asset_id in results: - dimensions = self.database[asset_id]['assetMetadata']['boundingBox'] + dimensions = get_bbox_dims(self.database[asset_id]) size = max(dimensions["x"], dimensions["z"]) ordered_small_objects.append((object_name, asset_id, size)) ordered_small_objects.sort(key=lambda x: x[2], reverse=True) return receptacle, ordered_small_objects - def start_controller(self, scene, objaverse_dir): controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=1.5, @@ -232,7 +300,6 @@ def start_controller(self, scene, objaverse_dir): ), ) return controller - def place_object(self, controller, object_id, receptacle_id, rotation=[0, 0, 0]): generated_id = f"small|{object_id}" @@ -244,8 +311,8 @@ def place_object(self, controller, object_id, receptacle_id, rotation=[0, 0, 0]) position=Vector3(x=0, y=FLOOR_Y - 20, z=0), rotation=Vector3(x=0, y=0, z=0), renderImage=False, - ) - + ) + # Place the object in the receptacle # Question: Can I spawn multiple objects at once? event = controller.step( @@ -257,10 +324,12 @@ def place_object(self, controller, object_id, receptacle_id, rotation=[0, 0, 0]) allowFloor=False, renderImage=False, allowMoveable=True, - numPlacementAttempts=10, # TODO: need to find a better way to determine the number of placement attempts + numPlacementAttempts=10, # TODO: need to find a better way to determine the number of placement attempts ) - obj = next(obj for obj in event.metadata["objects"] if obj["objectId"] == generated_id) + obj = next( + obj for obj in event.metadata["objects"] if obj["objectId"] == generated_id + ) center_position = obj["axisAlignedBoundingBox"]["center"].copy() if event and center_position["y"] > FLOOR_Y: @@ -270,123 +339,163 @@ def place_object(self, controller, object_id, receptacle_id, rotation=[0, 0, 0]) action="DisableObject", objectId=generated_id, renderImage=False, - ) + ) return None - def check_thin_asset(self, asset_id): - dimensions = self.database[asset_id]["assetMetadata"]["boundingBox"] - twod_size = (dimensions["x"]*100, dimensions["z"]*100) - threshold = 5 # 3cm is the threshold for thin objects # TODO: need a better way to determine thin threshold + dimensions = get_bbox_dims(self.database[asset_id]) + twod_size = (dimensions["x"] * 100, dimensions["z"] * 100) + threshold = 5 # 3cm is the threshold for thin objects # TODO: need a better way to determine thin threshold rotations = [0, 0, 0] if twod_size[0] < threshold: - rotations = [0, 90, 0] # asset is thin in x direction + rotations = [0, 90, 0] # asset is thin in x direction return True, rotations - elif twod_size[1] < threshold: - rotations = [90, 0, 0] # asset is thin in z direction + elif twod_size[1] < threshold: + rotations = [90, 0, 0] # asset is thin in z direction return True, rotations else: return False, rotations - def fix_placement_for_thin_assets(self, placement): asset_id = placement["assetId"] - dimensions = self.database[asset_id]["assetMetadata"]["boundingBox"] - threshold = 0.03 # 0.03 meter is the threshold for thin objects + dimensions = get_bbox_dims(self.database[asset_id]) + threshold = 0.03 # 0.03 meter is the threshold for thin objects orginal_rotation = placement["rotation"] orginal_position = placement["position"] - bottom_center_position = {"x": orginal_position["x"], - "y": orginal_position["y"] - dimensions["y"]/2, - "z": orginal_position["z"]} + bottom_center_position = { + "x": orginal_position["x"], + "y": orginal_position["y"] - dimensions["y"] / 2, + "z": orginal_position["z"], + } if dimensions["x"] <= threshold: # asset is thin in x direction, need to rotate in z direction - placement["rotation"] = {"x": orginal_rotation["x"], - "y": orginal_rotation["y"], - "z": orginal_rotation["z"] + 90} - placement["position"] = {"x": bottom_center_position["x"], - "y": bottom_center_position["y"] + dimensions["x"]/2, - "z": bottom_center_position["z"]} + placement["rotation"] = { + "x": orginal_rotation["x"], + "y": orginal_rotation["y"], + "z": orginal_rotation["z"] + 90, + } + placement["position"] = { + "x": bottom_center_position["x"], + "y": bottom_center_position["y"] + dimensions["x"] / 2, + "z": bottom_center_position["z"], + } elif dimensions["z"] <= threshold: # asset is thin in z direction, need to rotate in x direction - placement["rotation"] = {"x": orginal_rotation["x"] + 90, - "y": orginal_rotation["y"], - "z": orginal_rotation["z"]} - placement["position"] = {"x": bottom_center_position["x"], - "y": bottom_center_position["y"] + dimensions["z"]/2, - "z": bottom_center_position["z"]} + placement["rotation"] = { + "x": orginal_rotation["x"] + 90, + "y": orginal_rotation["y"], + "z": orginal_rotation["z"], + } + placement["position"] = { + "x": bottom_center_position["x"], + "y": bottom_center_position["y"] + dimensions["z"] / 2, + "z": bottom_center_position["z"], + } return placement - def check_small_asset(self, asset_id): - dimensions = self.database[asset_id]["assetMetadata"]["boundingBox"] - size = (dimensions["x"]*100, dimensions["y"]*100, dimensions["z"]*100) - threshold = 25 * 25 # 25cm * 25cm is the threshold for small objects - - if size[0] * size[2] <= threshold and size[0] <= 25 and size[1] <= 25 and size[2] <= 25: + dimensions = get_bbox_dims(self.database[asset_id]) + size = (dimensions["x"] * 100, dimensions["y"] * 100, dimensions["z"] * 100) + threshold = 25 * 25 # 25cm * 25cm is the threshold for small objects + + if ( + size[0] * size[2] <= threshold + and size[0] <= 25 + and size[1] <= 25 + and size[2] <= 25 + ): return True, random.randint(0, 360) else: return False, 0 - def random_select(self, candidates): scores = [candidate[1] for candidate in candidates] scores_tensor = torch.Tensor(scores) - probas = F.softmax(scores_tensor, dim=0) # TODO: consider using normalized scores + probas = F.softmax( + scores_tensor, dim=0 + ) # TODO: consider using normalized scores selected_index = torch.multinomial(probas, 1).item() selected_candidate = candidates[selected_index] return selected_candidate - def check_collision(self, placements): - static_placements = [placement for placement in placements if placement["kinematic"] == True] + static_placements = [ + placement for placement in placements if placement["kinematic"] == True + ] if len(static_placements) <= 1: return placements else: colliding_pairs = [] for i, placement_1 in enumerate(static_placements[:-1]): - for placement_2 in static_placements[i+1:]: + for placement_2 in static_placements[i + 1 :]: box1 = self.get_bounding_box(placement_1) box2 = self.get_bounding_box(placement_2) if self.intersect_3d(box1, box2): colliding_pairs.append((placement_1["id"], placement_2["id"])) - id2assetId = {placement["id"]: placement["assetId"] for placement in placements} + id2assetId = { + placement["id"]: placement["assetId"] for placement in placements + } if len(colliding_pairs) != 0: remove_ids = [] - colliding_ids = list(set([pair[0] for pair in colliding_pairs] + [pair[1] for pair in colliding_pairs])) + colliding_ids = list( + set( + [pair[0] for pair in colliding_pairs] + + [pair[1] for pair in colliding_pairs] + ) + ) # order by size from small to large - colliding_ids = sorted(colliding_ids, key=lambda x: self.database[id2assetId[x]]["assetMetadata"]["boundingBox"]["x"] * self.database[id2assetId[x]]["assetMetadata"]["boundingBox"]["z"]) + colliding_ids = sorted( + colliding_ids, + key=lambda x: get_bbox_dims(self.database[id2assetId[x]])["x"] + * get_bbox_dims(self.database[id2assetId[x]])["z"], + ) for object_id in colliding_ids: remove_ids.append(object_id) - colliding_pairs = [pair for pair in colliding_pairs if object_id not in pair] - if len(colliding_pairs) == 0: break - valid_placements = [placement for placement in placements if placement["id"] not in remove_ids] + colliding_pairs = [ + pair for pair in colliding_pairs if object_id not in pair + ] + if len(colliding_pairs) == 0: + break + valid_placements = [ + placement + for placement in placements + if placement["id"] not in remove_ids + ] return valid_placements else: return placements - def get_bounding_box(self, placement): asset_id = placement["assetId"] - dimensions = self.database[asset_id]["assetMetadata"]["boundingBox"] - size = (dimensions["x"]*100, dimensions["y"]*100, dimensions["z"]*100) + dimensions = get_bbox_dims(self.database[asset_id]) + size = (dimensions["x"] * 100, dimensions["y"] * 100, dimensions["z"] * 100) position = placement["position"] - box = {"min": [position["x"]*100 - size[0]/2, position["y"]*100 - size[1]/2, position["z"]*100 - size[2]/2], - "max": [position["x"]*100 + size[0]/2, position["y"]*100 + size[1]/2, position["z"]*100 + size[2]/2]} + box = { + "min": [ + position["x"] * 100 - size[0] / 2, + position["y"] * 100 - size[1] / 2, + position["z"] * 100 - size[2] / 2, + ], + "max": [ + position["x"] * 100 + size[0] / 2, + position["y"] * 100 + size[1] / 2, + position["z"] * 100 + size[2] / 2, + ], + } return box - def intersect_3d(self, box1, box2): # box1 and box2 are dictionaries with 'min' and 'max' keys, # which are tuples representing the minimum and maximum corners of the 3D box. for i in range(3): - if box1['max'][i] < box2['min'][i] or box1['min'][i] > box2['max'][i]: + if box1["max"][i] < box2["min"][i] or box1["min"][i] > box2["max"][i]: return False - return True \ No newline at end of file + return True diff --git a/modules/utils.py b/ai2holodeck/generation/utils.py similarity index 60% rename from modules/utils.py rename to ai2holodeck/generation/utils.py index 5840ec4..de9029b 100644 --- a/modules/utils.py +++ b/ai2holodeck/generation/utils.py @@ -1,12 +1,22 @@ import copy -import json +import os +from argparse import ArgumentParser +from typing import Dict, Any + +import compress_json import numpy as np from PIL import Image -from tqdm import tqdm -from argparse import ArgumentParser from ai2thor.controller import Controller from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner -from moviepy.editor import TextClip, CompositeVideoClip, concatenate_videoclips, ImageSequenceClip +from moviepy.editor import ( + TextClip, + CompositeVideoClip, + concatenate_videoclips, + ImageSequenceClip, +) +from tqdm import tqdm + +from ai2holodeck.constants import HOLODECK_BASE_DATA_DIR, THOR_COMMIT_ID def all_edges_white(img): @@ -32,6 +42,7 @@ def all_edges_white(img): def get_top_down_frame(scene, objaverse_asset_dir, width=1024, height=1024): controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=1.5, @@ -43,7 +54,7 @@ def get_top_down_frame(scene, objaverse_asset_dir, width=1024, height=1024): asset_directory=objaverse_asset_dir, asset_symlink=True, verbose=True, - ) + ), ) # Setup the top-down camera @@ -56,15 +67,19 @@ def get_top_down_frame(scene, objaverse_asset_dir, width=1024, height=1024): pose["position"]["y"] = bounds["y"] del pose["orthographicSize"] - try: wall_height = wall_height = max([point["y"] for point in scene["walls"][0]["polygon"]]) - except: wall_height = 2.5 + try: + wall_height = wall_height = max( + [point["y"] for point in scene["walls"][0]["polygon"]] + ) + except: + wall_height = 2.5 - for i in range(20): + for i in range(20): pose["orthographic"] = False - + pose["farClippingPlane"] = pose["position"]["y"] + 10 pose["nearClippingPlane"] = pose["position"]["y"] - wall_height - + # add the camera to the scene event = controller.step( action="AddThirdPartyCamera", @@ -75,8 +90,9 @@ def get_top_down_frame(scene, objaverse_asset_dir, width=1024, height=1024): top_down_frame = event.third_party_camera_frames[-1] # check if the edge of the frame is white - if all_edges_white(top_down_frame): break - + if all_edges_white(top_down_frame): + break + pose["position"]["y"] += 0.75 controller.stop() @@ -87,6 +103,7 @@ def get_top_down_frame(scene, objaverse_asset_dir, width=1024, height=1024): def get_top_down_frame_ithor(scene, objaverse_asset_dir, width=1024, height=1024): controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=1.5, @@ -98,20 +115,20 @@ def get_top_down_frame_ithor(scene, objaverse_asset_dir, width=1024, height=1024 asset_directory=objaverse_asset_dir, asset_symlink=True, verbose=True, - ) + ), ) - + controller.reset(scene) - + event = controller.step(action="GetMapViewCameraProperties") pose = copy.deepcopy(event.metadata["actionReturn"]) - + event = controller.step( - action="AddThirdPartyCamera", - **pose, - skyboxColor="white", - raise_for_failure=True, - ) + action="AddThirdPartyCamera", + **pose, + skyboxColor="white", + raise_for_failure=True, + ) controller.stop() @@ -121,37 +138,32 @@ def get_top_down_frame_ithor(scene, objaverse_asset_dir, width=1024, height=1024 def main(save_path): - scene = json.load(open(save_path + f"scene.json", "r")) + scene = compress_json.load(save_path + f"scene.json", "r") image = get_top_down_frame(scene) image.save(f"test1.png") - with open(save_path + f"scene.json", "w") as f: - json.dump(scene, f, indent=4) + + compress_json.dump(scene, save_path + f"scene.json", json_kwargs=dict(indent=4)) def visualize_asset(asset_id, version): - empty_house = json.load(open("empty_house.json", "r")) - empty_house["objects"] = [{ + empty_house = compress_json.load("empty_house.json") + empty_house["objects"] = [ + { "assetId": asset_id, "id": "test_asset", "kinematic": True, - "position": { - "x": 0, - "y": 0, - "z": 0 - }, - "rotation": { - "x": 0, - "y": 0, - "z": 0 - }, - "material": None - }] + "position": {"x": 0, "y": 0, "z": 0}, + "rotation": {"x": 0, "y": 0, "z": 0}, + "material": None, + } + ] image = get_top_down_frame(empty_house, version) image.show() def get_room_images(scene, objaverse_asset_dir, width=1024, height=1024): controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=1.5, @@ -173,42 +185,59 @@ def get_room_images(scene, objaverse_asset_dir, width=1024, height=1024): room_name = room["roomType"] camera_height = wall_height - 0.2 - room_vertices = [[point["x"], point["z"]] for point in room['floorPolygon']] + room_vertices = [[point["x"], point["z"]] for point in room["floorPolygon"]] room_center = np.mean(room_vertices, axis=0) floor_center = np.array([room_center[0], 0, room_center[1]]) camera_center = np.array([room_center[0], camera_height, room_center[1]]) - corners = np.array([[point[0], camera_height, point[1]] for point in room_vertices]) + corners = np.array( + [[point[0], camera_height, point[1]] for point in room_vertices] + ) farest_corner = np.argmax(np.linalg.norm(corners - camera_center, axis=1)) vector_1 = floor_center - camera_center vector_2 = farest_corner - camera_center - x_angle = 90 - np.arccos(np.dot(vector_1, vector_2) / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2))) * 180 / np.pi + x_angle = ( + 90 + - np.arccos( + np.dot(vector_1, vector_2) + / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2)) + ) + * 180 + / np.pi + ) if not controller.last_event.third_party_camera_frames: controller.step( action="AddThirdPartyCamera", - position=dict(x=camera_center[0], y=camera_center[1], z=camera_center[2]), + position=dict( + x=camera_center[0], y=camera_center[1], z=camera_center[2] + ), rotation=dict(x=0, y=0, z=0), ) - + images = [] for angle in tqdm(range(0, 360, 90)): controller.step( action="UpdateThirdPartyCamera", - rotation=dict(x=x_angle, y=angle+45, z=0), - position=dict(x=camera_center[0], y=camera_center[1], z=camera_center[2]), + rotation=dict(x=x_angle, y=angle + 45, z=0), + position=dict( + x=camera_center[0], y=camera_center[1], z=camera_center[2] + ), ) - images.append(Image.fromarray(controller.last_event.third_party_camera_frames[0])) - + images.append( + Image.fromarray(controller.last_event.third_party_camera_frames[0]) + ) + room_images[room_name] = images - + controller.stop() return room_images def ithor_video(scene, objaverse_asset_dir, width, height, scene_type): controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=2, @@ -232,27 +261,39 @@ def ithor_video(scene, objaverse_asset_dir, width, height, scene_type): if not controller.last_event.third_party_camera_frames: controller.step( action="AddThirdPartyCamera", - position=dict(x=pose["position"]["x"], y=camera_height, z=pose["position"]["z"]), + position=dict( + x=pose["position"]["x"], y=camera_height, z=pose["position"]["z"] + ), rotation=dict(x=0, y=0, z=0), ) - + images = [] for angle in tqdm(range(0, 360, 1)): controller.step( action="UpdateThirdPartyCamera", rotation=dict(x=45, y=angle, z=0), - position=dict(x=pose["position"]["x"], y=camera_height, z=pose["position"]["z"]), + position=dict( + x=pose["position"]["x"], y=camera_height, z=pose["position"]["z"] + ), ) images.append(controller.last_event.third_party_camera_frames[0]) imsn = ImageSequenceClip(images, fps=30) # Create text clips - txt_clip_query = (TextClip(f"Query: {scene_type}", fontsize=30, color='white', font='Arial-Bold') - .set_pos(('center', 'top')).set_duration(imsn.duration)) - txt_clip_room = (TextClip(f"Room Type: {scene_type}", fontsize=30, color='white', font='Arial-Bold') - .set_pos(('center', 'bottom')).set_duration(imsn.duration)) + txt_clip_query = ( + TextClip(f"Query: {scene_type}", fontsize=30, color="white", font="Arial-Bold") + .set_pos(("center", "top")) + .set_duration(imsn.duration) + ) + txt_clip_room = ( + TextClip( + f"Room Type: {scene_type}", fontsize=30, color="white", font="Arial-Bold" + ) + .set_pos(("center", "bottom")) + .set_duration(imsn.duration) + ) # Overlay the text clip on the first video clip video = CompositeVideoClip([imsn, txt_clip_query, txt_clip_room]) @@ -264,23 +305,24 @@ def ithor_video(scene, objaverse_asset_dir, width, height, scene_type): def room_video(scene, objaverse_asset_dir, width, height): def add_line_breaks(text, max_line_length): - words = text.split(' ') + words = text.split(" ") lines = [] current_line = [] for word in words: - if len(' '.join(current_line + [word])) <= max_line_length: + if len(" ".join(current_line + [word])) <= max_line_length: current_line.append(word) else: - lines.append(' '.join(current_line)) + lines.append(" ".join(current_line)) current_line = [word] - lines.append(' '.join(current_line)) + lines.append(" ".join(current_line)) - return '\n'.join(lines) + return "\n".join(lines) """Saves a top-down video of the house.""" controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=2, @@ -294,9 +336,11 @@ def add_line_breaks(text, max_line_length): verbose=True, ), ) - - try: query = scene["query"] - except: query = scene["rooms"][0]['roomType'] + + try: + query = scene["query"] + except: + query = scene["rooms"][0]["roomType"] wall_height = max([point["y"] for point in scene["walls"][0]["polygon"]]) @@ -307,23 +351,37 @@ def add_line_breaks(text, max_line_length): camera_height = wall_height - 0.2 print("camera height: ", camera_height) - room_vertices = [[point["x"], point["z"]] for point in room['floorPolygon']] + room_vertices = [[point["x"], point["z"]] for point in room["floorPolygon"]] room_center = np.mean(room_vertices, axis=0) floor_center = np.array([room_center[0], 0, room_center[1]]) camera_center = np.array([room_center[0], camera_height, room_center[1]]) - corners = np.array([[point["x"], point["y"], point["z"]] for point in room['floorPolygon']]) - farest_corner = corners[np.argmax(np.linalg.norm(corners - camera_center, axis=1))] + corners = np.array( + [[point["x"], point["y"], point["z"]] for point in room["floorPolygon"]] + ) + farest_corner = corners[ + np.argmax(np.linalg.norm(corners - camera_center, axis=1)) + ] vector_1 = floor_center - camera_center vector_2 = farest_corner - camera_center - x_angle = 90 - np.arccos(np.dot(vector_1, vector_2) / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2))) * 180 / np.pi + x_angle = ( + 90 + - np.arccos( + np.dot(vector_1, vector_2) + / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2)) + ) + * 180 + / np.pi + ) images = [] if not controller.last_event.third_party_camera_frames: controller.step( action="AddThirdPartyCamera", - position=dict(x=camera_center[0], y=camera_center[1], z=camera_center[2]), + position=dict( + x=camera_center[0], y=camera_center[1], z=camera_center[2] + ), rotation=dict(x=0, y=0, z=0), ) @@ -331,17 +389,29 @@ def add_line_breaks(text, max_line_length): controller.step( action="UpdateThirdPartyCamera", rotation=dict(x=x_angle, y=angle, z=0), - position=dict(x=camera_center[0], y=camera_center[1], z=camera_center[2]), + position=dict( + x=camera_center[0], y=camera_center[1], z=camera_center[2] + ), ) images.append(controller.last_event.third_party_camera_frames[0]) imsn = ImageSequenceClip(images, fps=30) - + # Create text clips - txt_clip_query = (TextClip(f"Query: {text_query}", fontsize=30, color='white', font='Arial-Bold') - .set_pos(('center', 'top')).set_duration(imsn.duration)) - txt_clip_room = (TextClip(f"Room Type: {room_name}", fontsize=30, color='white', font='Arial-Bold') - .set_pos(('center', 'bottom')).set_duration(imsn.duration)) + txt_clip_query = ( + TextClip( + f"Query: {text_query}", fontsize=30, color="white", font="Arial-Bold" + ) + .set_pos(("center", "top")) + .set_duration(imsn.duration) + ) + txt_clip_room = ( + TextClip( + f"Room Type: {room_name}", fontsize=30, color="white", font="Arial-Bold" + ) + .set_pos(("center", "bottom")) + .set_duration(imsn.duration) + ) # Overlay the text clip on the first video clip video = CompositeVideoClip([imsn, txt_clip_query, txt_clip_room]) @@ -356,28 +426,84 @@ def add_line_breaks(text, max_line_length): return final_video +def get_asset_metadata(obj_data: Dict[str, Any]): + if "assetMetadata" in obj_data: + return obj_data["assetMetadata"] + elif "thor_metadata" in obj_data: + return obj_data["thor_metadata"]["assetMetadata"] + else: + raise ValueError("Can not find assetMetadata in obj_data") + + +def get_annotations(obj_data: Dict[str, Any]): + if "annotations" in obj_data: + return obj_data["annotations"] + else: + # The assert here is just double-checking that a field that should exist does. + assert "onFloor" in obj_data, f"Can not find annotations in obj_data {obj_data}" + + return obj_data + + +def get_bbox_dims(obj_data: Dict[str, Any]): + am = get_asset_metadata(obj_data) + + bbox_info = am["boundingBox"] + + if "x" in bbox_info: + return bbox_info + + if "size" in bbox_info: + return bbox_info["size"] + + mins = bbox_info["min"] + maxs = bbox_info["max"] + + return {k: maxs[k] - mins[k] for k in ["x", "y", "z"]} + + +def get_secondary_properties(obj_data: Dict[str, Any]): + am = get_asset_metadata(obj_data) + return am["secondaryProperties"] + + if __name__ == "__main__": parser = ArgumentParser() - parser.add_argument("--mode", help = "Mode to run (top_down_frame, room_video, room_image).", default = "top_down_frame") - parser.add_argument("--objaverse_asset_dir", help = "Directory to load assets from.", default = "./data/objaverse_holodeck/09_23_combine_scale/processed_2023_09_23_combine_scale") - parser.add_argument("--scene", help = "Scene to load.", default = "data/scenes/a_living_room/a_living_room.json") + parser.add_argument( + "--mode", + help="Mode to run (top_down_frame, top_down_video, room_image).", + default="top_down_frame", + ) + parser.add_argument( + "--objaverse_asset_dir", + help="Directory to load assets from.", + default="./objaverse/processed_2023_09_23_combine_scale", + ) + parser.add_argument( + "--scene", + help="Scene to load.", + default=os.path.join( + HOLODECK_BASE_DATA_DIR, "scenes/a_living_room/a_living_room.json" + ), + ) args = parser.parse_args() - scene = json.load(open(args.scene, "r")) + scene = compress_json.load(args.scene) - if "query" not in scene: scene["query"] = args.scene.split("/")[-1].split(".")[0] + if "query" not in scene: + scene["query"] = args.scene.split("/")[-1].split(".")[0] if args.mode == "top_down_frame": image = get_top_down_frame(scene, args.objaverse_asset_dir) image.show() - + elif args.mode == "room_video": video = room_video(scene, args.objaverse_asset_dir, 1024, 1024) video.write_videofile(args.scene.replace(".json", ".mp4"), fps=30) - + elif args.mode == "room_image": room_images = get_room_images(scene, args.objaverse_asset_dir, 1024, 1024) save_folder = "/".join(args.scene.split("/")[:-1]) for room_name, images in room_images.items(): for i, image in enumerate(images): - image.save(f"{save_folder}/{room_name}_{i}.png") \ No newline at end of file + image.save(f"{save_folder}/{room_name}_{i}.png") diff --git a/modules/wall_objects.py b/ai2holodeck/generation/wall_objects.py similarity index 53% rename from modules/wall_objects.py rename to ai2holodeck/generation/wall_objects.py index 0e15448..a5ab129 100644 --- a/modules/wall_objects.py +++ b/ai2holodeck/generation/wall_objects.py @@ -1,30 +1,47 @@ -import re -import time import copy -import random -import numpy as np import multiprocessing +import random +import re +import time + import matplotlib.pyplot as plt -import modules.prompts as prompts -from langchain import PromptTemplate -from shapely.ops import substring +import numpy as np +from langchain import PromptTemplate, OpenAI from shapely.geometry import Polygon, box, Point, LineString +from shapely.ops import substring - -class WallObjectGenerator(): - def __init__(self, llm, object_retriever): - self.json_template = {"assetId": None, "id": None, "kinematic": True, - "position": {}, "rotation": {}, "material": None, "roomId": None} +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.utils import get_bbox_dims + + +class WallObjectGenerator: + def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): + self.json_template = { + "assetId": None, + "id": None, + "kinematic": True, + "position": {}, + "rotation": {}, + "material": None, + "roomId": None, + } self.llm = llm self.object_retriever = object_retriever self.database = object_retriever.database - self.constraint_prompt_template = PromptTemplate(input_variables=["room_type", "wall_height", "floor_objects", "wall_objects"], - template=prompts.wall_object_constraints_prompt) + self.constraint_prompt_template = PromptTemplate( + input_variables=[ + "room_type", + "wall_height", + "floor_objects", + "wall_objects", + ], + template=prompts.wall_object_constraints_prompt, + ) self.grid_size = 25 self.default_height = 150 self.constraint_type = "llm" - def generate_wall_objects(self, scene, use_constraint=True): doors = scene["doors"] windows = scene["windows"] @@ -34,7 +51,19 @@ def generate_wall_objects(self, scene, use_constraint=True): wall_objects = [] selected_objects = scene["selected_objects"] - packed_args = [(room, scene, doors, windows, open_walls, wall_height, selected_objects, use_constraint) for room in scene["rooms"]] + packed_args = [ + ( + room, + scene, + doors, + windows, + open_walls, + wall_height, + selected_objects, + use_constraint, + ) + for room in scene["rooms"] + ] pool = multiprocessing.Pool(processes=4) all_placements = pool.map(self.generate_wall_objects_per_room, packed_args) pool.close() @@ -44,73 +73,122 @@ def generate_wall_objects(self, scene, use_constraint=True): wall_objects += placements return wall_objects - def generate_wall_objects_per_room(self, args): - room, scene, doors, windows, open_walls, wall_height, selected_objects, use_constraint = args + ( + room, + scene, + doors, + windows, + open_walls, + wall_height, + selected_objects, + use_constraint, + ) = args selected_wall_objects = selected_objects[room["roomType"]]["wall"] selected_wall_objects = self.order_objects_by_size(selected_wall_objects) - wall_object_name2id = {object_name: asset_id for object_name, asset_id in selected_wall_objects} + wall_object_name2id = { + object_name: asset_id for object_name, asset_id in selected_wall_objects + } room_id = room["id"] room_type = room["roomType"] wall_object_names = list(wall_object_name2id.keys()) - - floor_object_name2id = {object["object_name"]: object["assetId"] for object in scene["floor_objects"] if object["roomId"] == room["id"]} + + floor_object_name2id = { + object["object_name"]: object["assetId"] + for object in scene["floor_objects"] + if object["roomId"] == room["id"] + } floor_object_names = list(floor_object_name2id.keys()) - + # get constraints - constraints_prompt = self.constraint_prompt_template.format(room_type=room_type, - wall_height=int(wall_height*100), - floor_objects=", ".join(floor_object_names), - wall_objects=", ".join(wall_object_names)) + constraints_prompt = self.constraint_prompt_template.format( + room_type=room_type, + wall_height=int(wall_height * 100), + floor_objects=", ".join(floor_object_names), + wall_objects=", ".join(wall_object_names), + ) if self.constraint_type == "llm" and use_constraint: constraint_plan = self.llm(constraints_prompt) else: constraint_plan = "" for object_name in wall_object_names: - random_height = random.randint(0, int(wall_height*100)) + random_height = random.randint(0, int(wall_height * 100)) constraint_plan += f"{object_name} | N/A | {random_height} \n" print(f"\nwall object constraint plan for {room_type}:\n{constraint_plan}") - constraints = self.parse_wall_object_constraints(constraint_plan, wall_object_names, floor_object_names) - + constraints = self.parse_wall_object_constraints( + constraint_plan, wall_object_names, floor_object_names + ) + # get wall objects - wall_object2dimension = {object_name: self.database[object_id]['assetMetadata']['boundingBox'] for object_name, object_id in wall_object_name2id.items()} - wall_objects_list = [(object_name, (wall_object2dimension[object_name]['x'] * 100, wall_object2dimension[object_name]['y'] * 100, wall_object2dimension[object_name]['z'] * 100)) for object_name in constraints] - + wall_object2dimension = { + object_name: get_bbox_dims(self.database[object_id]) + for object_name, object_id in wall_object_name2id.items() + } + wall_objects_list = [ + ( + object_name, + ( + wall_object2dimension[object_name]["x"] * 100, + wall_object2dimension[object_name]["y"] * 100, + wall_object2dimension[object_name]["z"] * 100, + ), + ) + for object_name in constraints + ] + # update constraints with max height - wall_object2max_height = {object_name: min(scene["wall_height"] * 100 - wall_object2dimension[object_name]["y"] * 100 - 20, constraints[object_name]["height"]) for object_name in constraints} + wall_object2max_height = { + object_name: min( + scene["wall_height"] * 100 + - wall_object2dimension[object_name]["y"] * 100 + - 20, + constraints[object_name]["height"], + ) + for object_name in constraints + } for object_name in constraints: - constraints[object_name]["height"] = max(wall_object2max_height[object_name], 0) # avoid negative height - + constraints[object_name]["height"] = max( + wall_object2max_height[object_name], 0 + ) # avoid negative height + # get initial state room_vertices = [(x * 100, y * 100) for (x, y) in room["vertices"]] room_poly = Polygon(room_vertices) - initial_state = self.get_initial_state(scene, doors, windows, room_vertices, open_walls) + initial_state = self.get_initial_state( + scene, doors, windows, room_vertices, open_walls + ) # solve room_x, room_z = self.get_room_size(room) grid_size = max(room_x // 20, room_z // 20) - solver = DFS_Solver_Wall(grid_size=grid_size, max_duration=5, constraint_bouns=100) - solutions = solver.get_solution(room_poly, wall_objects_list, constraints, initial_state) - + solver = DFS_Solver_Wall( + grid_size=grid_size, max_duration=5, constraint_bouns=100 + ) + solutions = solver.get_solution( + room_poly, wall_objects_list, constraints, initial_state + ) + placements = self.solution2placement(solutions, wall_object_name2id, room_id) return placements - - def parse_wall_object_constraints(self, constraint_text, wall_object_names, floor_object_names): + def parse_wall_object_constraints( + self, constraint_text, wall_object_names, floor_object_names + ): object2constraints = {} - lines = [line.lower() for line in constraint_text.split('\n') if "|" in line] + lines = [line.lower() for line in constraint_text.split("\n") if "|" in line] for line in lines: # remove index - pattern = re.compile(r'^\d+\.\s*') - line = pattern.sub('', line) - if line[-1] == ".": line = line[:-1] # remove the last period + pattern = re.compile(r"^\d+\.\s*") + line = pattern.sub("", line) + if line[-1] == ".": + line = line[:-1] # remove the last period try: object_name, location, height = line.split("|") object_name = object_name.replace("*", "").strip() @@ -119,37 +197,51 @@ def parse_wall_object_constraints(self, constraint_text, wall_object_names, floo except: print(f"Warning: cannot parse {line}.") continue - - if object_name not in wall_object_names: continue - try: target_floor_object_name = location.split(", ")[-1] - except: print(f"Warning: cannot parse {location}."); target_floor_object_name = None + if object_name not in wall_object_names: + continue + + try: + target_floor_object_name = location.split(", ")[-1] + except: + print(f"Warning: cannot parse {location}.") + target_floor_object_name = None + + try: + height = int(height) + except: + height = self.default_height - try: height = int(height) - except: height = self.default_height - if target_floor_object_name in floor_object_names: - object2constraints[object_name] = {"target_floor_object_name": target_floor_object_name, "height": height} + object2constraints[object_name] = { + "target_floor_object_name": target_floor_object_name, + "height": height, + } else: - object2constraints[object_name] = {"target_floor_object_name": None, "height": height} - + object2constraints[object_name] = { + "target_floor_object_name": None, + "height": height, + } + return object2constraints - def get_room_size(self, room): floor_polygon = room["floorPolygon"] - x_values = [point['x'] for point in floor_polygon] - z_values = [point['z'] for point in floor_polygon] - return (int(max(x_values) - min(x_values)) * 100, int(max(z_values) - min(z_values)) * 100) - + x_values = [point["x"] for point in floor_polygon] + z_values = [point["z"] for point in floor_polygon] + return ( + int(max(x_values) - min(x_values)) * 100, + int(max(z_values) - min(z_values)) * 100, + ) def check_wall_object_size(self, room_size, object_size): if object_size["x"] * 100 > max(room_size) * 0.5: - print(f"Warning: object size {object_size} is too large for room size {room_size}.") + print( + f"Warning: object size {object_size} is too large for room size {room_size}." + ) return False else: return True - def get_initial_state(self, scene, doors, windows, room_vertices, open_walls): room_poly = Polygon(room_vertices) @@ -164,9 +256,15 @@ def get_initial_state(self, scene, doors, windows, room_vertices, open_walls): if room_poly.contains(door_center): door_height = door["assetPosition"]["y"] * 100 * 2 x_min, z_min, x_max, z_max = door_poly.bounds - initial_state[f"door-{i}"] = ((x_min, 0, z_min), (x_max, door_height, z_max), 0, door_vertices, 1) + initial_state[f"door-{i}"] = ( + (x_min, 0, z_min), + (x_max, door_height, z_max), + 0, + door_vertices, + 1, + ) i += 1 - + for window in windows: window_boxes = window["windowBoxes"] for window_box in window_boxes: @@ -177,9 +275,15 @@ def get_initial_state(self, scene, doors, windows, room_vertices, open_walls): y_min = window["holePolygon"][0]["y"] * 100 y_max = window["holePolygon"][1]["y"] * 100 x_min, z_min, x_max, z_max = window_poly.bounds - initial_state[f"window-{i}"] = ((x_min, y_min, z_min), (x_max, y_max, z_max), 0, window_vertices, 1) + initial_state[f"window-{i}"] = ( + (x_min, y_min, z_min), + (x_max, y_max, z_max), + 0, + window_vertices, + 1, + ) i += 1 - + if len(open_walls) != 0: open_wall_boxes = open_walls["openWallBoxes"] for open_wall_box in open_wall_boxes: @@ -188,58 +292,80 @@ def get_initial_state(self, scene, doors, windows, room_vertices, open_walls): open_wall_center = open_wall_poly.centroid if room_poly.contains(open_wall_center): x_min, z_min, x_max, z_max = open_wall_poly.bounds - initial_state[f"open-{i}"] = ((x_min, 0, z_min), (x_max, scene["wall_height"] * 100, z_max), 0, open_wall_vertices, 1) + initial_state[f"open-{i}"] = ( + (x_min, 0, z_min), + (x_max, scene["wall_height"] * 100, z_max), + 0, + open_wall_vertices, + 1, + ) i += 1 - + for object in scene["floor_objects"]: - try: object_vertices = object["vertices"] - except: continue - + try: + object_vertices = object["vertices"] + except: + continue + object_poly = Polygon(object_vertices) object_center = object_poly.centroid if room_poly.contains(object_center): - object_height = object["position"]["y"] * 100 * 2 # the height should be twice the value of the y coordinate + object_height = ( + object["position"]["y"] * 100 * 2 + ) # the height should be twice the value of the y coordinate x_min, z_min, x_max, z_max = object_poly.bounds - initial_state[object["object_name"]] = ((x_min, 0, z_min), (x_max, object_height, z_max), object["rotation"]["y"], object_vertices, 1) + initial_state[object["object_name"]] = ( + (x_min, 0, z_min), + (x_max, object_height, z_max), + object["rotation"]["y"], + object_vertices, + 1, + ) return initial_state - def solution2placement(self, solutions, wall_object_name2id, room_id): placements = [] for object_name, solution in solutions.items(): - if object_name not in wall_object_name2id: continue + if object_name not in wall_object_name2id: + continue placement = self.json_template.copy() placement["assetId"] = wall_object_name2id[object_name] placement["id"] = f"{object_name} ({room_id})" position_x = (solution[0][0] + solution[1][0]) / 200 position_y = (solution[0][1] + solution[1][1]) / 200 position_z = (solution[0][2] + solution[1][2]) / 200 - + placement["position"] = {"x": position_x, "y": position_y, "z": position_z} placement["rotation"] = {"x": 0, "y": solution[2], "z": 0} - + # move the object a little bit to avoid collision - if placement["rotation"]["y"] == 0: placement["position"]["z"] += 0.01 - elif placement["rotation"]["y"] == 90: placement["position"]["x"] += 0.01 - elif placement["rotation"]["y"]== 180: placement["position"]["z"] -= 0.01 - elif placement["rotation"]["y"] == 270: placement["position"]["x"] -= 0.01 + if placement["rotation"]["y"] == 0: + placement["position"]["z"] += 0.01 + elif placement["rotation"]["y"] == 90: + placement["position"]["x"] += 0.01 + elif placement["rotation"]["y"] == 180: + placement["position"]["z"] -= 0.01 + elif placement["rotation"]["y"] == 270: + placement["position"]["x"] -= 0.01 placement["roomId"] = room_id placement["vertices"] = list(solution[3]) placement["object_name"] = object_name placements.append(placement) return placements - def order_objects_by_size(self, selected_wall_objects): ordered_wall_objects = [] for object_name, asset_id in selected_wall_objects: - dimensions = self.database[asset_id]['assetMetadata']['boundingBox'] + dimensions = get_bbox_dims(self.database[asset_id]) size = dimensions["x"] ordered_wall_objects.append([object_name, asset_id, size]) ordered_wall_objects.sort(key=lambda x: x[2], reverse=True) - ordered_wall_objects_no_size = [[object_name, asset_id] for object_name, asset_id, size in ordered_wall_objects] + ordered_wall_objects_no_size = [ + [object_name, asset_id] + for object_name, asset_id, size in ordered_wall_objects + ] return ordered_wall_objects_no_size @@ -249,7 +375,7 @@ def __init__(self, solution): pass -class DFS_Solver_Wall(): +class DFS_Solver_Wall: def __init__(self, grid_size, random_seed=0, max_duration=5, constraint_bouns=100): self.grid_size = grid_size self.random_seed = random_seed @@ -259,21 +385,22 @@ def __init__(self, grid_size, random_seed=0, max_duration=5, constraint_bouns=10 self.solutions = [] self.visualize = False - def get_solution(self, room_poly, wall_objects_list, constraints, initial_state): grid_points = self.create_grids(room_poly) self.start_time = time.time() try: - self.dfs(room_poly, wall_objects_list, constraints, grid_points, initial_state) + self.dfs( + room_poly, wall_objects_list, constraints, grid_points, initial_state + ) except SolutionFound as e: print(f"Time taken: {time.time() - self.start_time}") - + max_solution = self.get_max_solution(self.solutions) - - if self.visualize: self.visualize_grid(room_poly, grid_points, max_solution) - return max_solution + if self.visualize: + self.visualize_grid(room_poly, grid_points, max_solution) + return max_solution def get_max_solution(self, solutions): path_weights = [] @@ -282,19 +409,22 @@ def get_max_solution(self, solutions): max_index = np.argmax(path_weights) return solutions[max_index] - - def dfs(self, room_poly, wall_objects_list, constraints, grid_points, placed_objects): + def dfs( + self, room_poly, wall_objects_list, constraints, grid_points, placed_objects + ): if len(wall_objects_list) == 0: self.solutions.append(placed_objects) return placed_objects - + if time.time() - self.start_time > self.max_duration: print(f"Time limit reached.") raise SolutionFound(self.solutions) - + object_name, object_dim = wall_objects_list[0] - placements = self.get_possible_placements(room_poly, object_dim, constraints[object_name], grid_points, placed_objects) - + placements = self.get_possible_placements( + room_poly, object_dim, constraints[object_name], grid_points, placed_objects + ) + if len(placements) == 0: self.solutions.append(placed_objects) @@ -303,23 +433,39 @@ def dfs(self, room_poly, wall_objects_list, constraints, grid_points, placed_obj placed_objects_updated = copy.deepcopy(placed_objects) placed_objects_updated[object_name] = placement - sub_paths = self.dfs(room_poly, wall_objects_list[1:], constraints, grid_points, placed_objects_updated) + sub_paths = self.dfs( + room_poly, + wall_objects_list[1:], + constraints, + grid_points, + placed_objects_updated, + ) paths.extend(sub_paths) return paths - - def get_possible_placements(self, room_poly, object_dim, constraint, grid_points, placed_objects): - all_solutions = self.filter_collision(placed_objects, self.get_all_solutions(room_poly, grid_points, object_dim, constraint["height"])) + def get_possible_placements( + self, room_poly, object_dim, constraint, grid_points, placed_objects + ): + all_solutions = self.filter_collision( + placed_objects, + self.get_all_solutions( + room_poly, grid_points, object_dim, constraint["height"] + ), + ) random.shuffle(all_solutions) target_floor_object_name = constraint["target_floor_object_name"] - if target_floor_object_name is not None and target_floor_object_name in placed_objects: - all_solutions = self.score_solution_by_distance(all_solutions, placed_objects[target_floor_object_name]) + if ( + target_floor_object_name is not None + and target_floor_object_name in placed_objects + ): + all_solutions = self.score_solution_by_distance( + all_solutions, placed_objects[target_floor_object_name] + ) # order solutions by distance to target floor object all_solutions = sorted(all_solutions, key=lambda x: x[-1], reverse=True) return all_solutions - def create_grids(self, room_poly): # Get the coordinates of the polygon poly_coords = list(room_poly.exterior.coords) @@ -332,12 +478,13 @@ def create_grids(self, room_poly): # Create points along the edge at intervals of grid size for j in range(0, int(line_length), self.grid_size): - point_on_line = substring(line, j, j) # Get a point at distance j from the start of the line + point_on_line = substring( + line, j, j + ) # Get a point at distance j from the start of the line if point_on_line: grid_points.append((point_on_line.x, point_on_line.y)) - + return grid_points - def get_all_solutions(self, room_poly, grid_points, object_dim, height): obj_length, obj_height, obj_width = object_dim @@ -347,42 +494,68 @@ def get_all_solutions(self, room_poly, grid_points, object_dim, height): 0: ((-obj_half_length, 0), (obj_half_length, obj_width)), 90: ((0, -obj_half_length), (obj_width, obj_half_length)), 180: ((-obj_half_length, -obj_width), (obj_half_length, 0)), - 270: ((-obj_width, -obj_half_length), (0, obj_half_length)) + 270: ((-obj_width, -obj_half_length), (0, obj_half_length)), } solutions = [] for rotation in [0, 90, 180, 270]: for point in grid_points: center_x, center_y = point - lower_left_adjustment, upper_right_adjustment = rotation_adjustments[rotation] - lower_left = (center_x + lower_left_adjustment[0], center_y + lower_left_adjustment[1]) - upper_right = (center_x + upper_right_adjustment[0], center_y + upper_right_adjustment[1]) + lower_left_adjustment, upper_right_adjustment = rotation_adjustments[ + rotation + ] + lower_left = ( + center_x + lower_left_adjustment[0], + center_y + lower_left_adjustment[1], + ) + upper_right = ( + center_x + upper_right_adjustment[0], + center_y + upper_right_adjustment[1], + ) obj_box = box(*lower_left, *upper_right) if room_poly.contains(obj_box): object_coords = obj_box.exterior.coords[:] - coordinates_on_edge = [coord for coord in object_coords if room_poly.boundary.contains(Point(coord))] + coordinates_on_edge = [ + coord + for coord in object_coords + if room_poly.boundary.contains(Point(coord)) + ] coordinates_on_edge = list(set(coordinates_on_edge)) if len(coordinates_on_edge) >= 2: vertex_min = (lower_left[0], height, lower_left[1]) - vertex_max = (upper_right[0], height + obj_height, upper_right[1]) + vertex_max = ( + upper_right[0], + height + obj_height, + upper_right[1], + ) + + solutions.append( + [ + vertex_min, + vertex_max, + rotation, + tuple(obj_box.exterior.coords[:]), + 1, + ] + ) - solutions.append([vertex_min, vertex_max, rotation, tuple(obj_box.exterior.coords[:]), 1]) - return solutions - def filter_collision(self, placed_objects, solutions): def intersect_3d(box1, box2): # box1 and box2 are dictionaries with 'min' and 'max' keys, # which are tuples representing the minimum and maximum corners of the 3D box. for i in range(3): - if box1['max'][i] < box2['min'][i] or box1['min'][i] > box2['max'][i]: + if box1["max"][i] < box2["min"][i] or box1["min"][i] > box2["max"][i]: return False return True valid_solutions = [] - boxes = [{"min": vertex_min, "max": vertex_max} for vertex_min, vertex_max, rotation, box_coords, path_weight in placed_objects.values()] + boxes = [ + {"min": vertex_min, "max": vertex_max} + for vertex_min, vertex_max, rotation, box_coords, path_weight in placed_objects.values() + ] for solution in solutions: for box in boxes: @@ -390,58 +563,70 @@ def intersect_3d(box1, box2): break else: valid_solutions.append(solution) - + return valid_solutions - def score_solution_by_distance(self, solutions, target_object): distances = [] scored_solutions = [] for solution in solutions: - center_x, center_y, center_z = (solution[0][0]+solution[1][0])/2, (solution[0][1]+solution[1][1])/2, (solution[0][2]+solution[1][2])/2 - target_x, target_y, target_z = (target_object[0][0]+target_object[1][0])/2, (target_object[0][1]+target_object[1][1])/2, (target_object[0][2]+target_object[1][2])/2 - distance = np.sqrt((center_x - target_x)**2 + (center_y - target_y)**2 + (center_z - target_z)**2) + center_x, center_y, center_z = ( + (solution[0][0] + solution[1][0]) / 2, + (solution[0][1] + solution[1][1]) / 2, + (solution[0][2] + solution[1][2]) / 2, + ) + target_x, target_y, target_z = ( + (target_object[0][0] + target_object[1][0]) / 2, + (target_object[0][1] + target_object[1][1]) / 2, + (target_object[0][2] + target_object[1][2]) / 2, + ) + distance = np.sqrt( + (center_x - target_x) ** 2 + + (center_y - target_y) ** 2 + + (center_z - target_z) ** 2 + ) distances.append(distance) scored_solution = solution.copy() - scored_solution[-1] = solution[-1] + self.constraint_bouns * (1/distance) + scored_solution[-1] = solution[-1] + self.constraint_bouns * (1 / distance) scored_solutions.append(scored_solution) return scored_solutions - - + def visualize_grid(self, room_poly, grid_points, solutions): # create a new figure fig, ax = plt.subplots() # draw the room x, y = room_poly.exterior.xy - ax.plot(x, y, 'b-', label='Room') + ax.plot(x, y, "b-", label="Room") # draw the grid points grid_x = [point[0] for point in grid_points] grid_y = [point[1] for point in grid_points] - ax.plot(grid_x, grid_y, 'ro', markersize=2) + ax.plot(grid_x, grid_y, "ro", markersize=2) # draw the solutions for object_name, solution in solutions.items(): vertex_min, vertex_max, rotation, box_coords = solution[:-1] - center_x, center_y = (vertex_min[0]+vertex_max[0])/2, (vertex_min[2]+vertex_max[2])/2 + center_x, center_y = (vertex_min[0] + vertex_max[0]) / 2, ( + vertex_min[2] + vertex_max[2] + ) / 2 # create a polygon for the solution obj_poly = Polygon(box_coords) x, y = obj_poly.exterior.xy - ax.plot(x, y, 'g-', linewidth=2) + ax.plot(x, y, "g-", linewidth=2) - ax.text(center_x, center_y, object_name, fontsize=12, ha='center') + ax.text(center_x, center_y, object_name, fontsize=12, ha="center") # set arrow direction based on rotation if rotation == 0: - ax.arrow(center_x, center_y, 0, 25, head_width=10, fc='g') + ax.arrow(center_x, center_y, 0, 25, head_width=10, fc="g") elif rotation == 90: - ax.arrow(center_x, center_y, 25, 0, head_width=10, fc='g') + ax.arrow(center_x, center_y, 25, 0, head_width=10, fc="g") elif rotation == 180: - ax.arrow(center_x, center_y, 0, -25, head_width=10, fc='g') + ax.arrow(center_x, center_y, 0, -25, head_width=10, fc="g") elif rotation == 270: - ax.arrow(center_x, center_y, -25, 0, head_width=10, fc='g') + ax.arrow(center_x, center_y, -25, 0, head_width=10, fc="g") - ax.set_aspect('equal', 'box') # to keep the ratios equal along x and y axis - plt.show() \ No newline at end of file + ax.set_aspect("equal", "box") # to keep the ratios equal along x and y axis + plt.show() diff --git a/modules/walls.py b/ai2holodeck/generation/walls.py similarity index 58% rename from modules/walls.py rename to ai2holodeck/generation/walls.py index 2119538..04e164d 100644 --- a/modules/walls.py +++ b/ai2holodeck/generation/walls.py @@ -1,34 +1,42 @@ import copy import random + import numpy as np from colorama import Fore -import modules.prompts as prompts -from langchain import PromptTemplate +from langchain import PromptTemplate, OpenAI from shapely.geometry import LineString, Polygon, Point +import ai2holodeck.generation.prompts as prompts + -class WallGenerator(): - def __init__(self, llm): - self.json_template = {"id": None, "roomId": None, - "material": {"name": None, "color": None}, - "polygon": []} +class WallGenerator: + def __init__(self, llm: OpenAI): + self.json_template = { + "id": None, + "roomId": None, + "material": {"name": None, "color": None}, + "polygon": [], + } self.llm = llm - self.wall_height_template = PromptTemplate(input_variables=["input"], template=prompts.wall_height_prompt) + self.wall_height_template = PromptTemplate( + input_variables=["input"], template=prompts.wall_height_prompt + ) self.used_assets = [] - def generate_walls(self, scene): # get wall height - if "wall_height" not in scene: wall_height = self.get_wall_height(scene) - else: wall_height = scene["wall_height"] + if "wall_height" not in scene: + wall_height = self.get_wall_height(scene) + else: + wall_height = scene["wall_height"] walls = [] rooms = scene["rooms"] for room in rooms: roomId = room["id"] material = room["wallMaterial"] - full_vertices = room['full_vertices'] - + full_vertices = room["full_vertices"] + for j in range(len(full_vertices)): wall = copy.deepcopy(self.json_template) wall["roomId"] = roomId @@ -36,32 +44,42 @@ def generate_walls(self, scene): # generate the wall polygon wall_endpoint1 = full_vertices[j] - wall_endpoint2 = full_vertices[(j+1) % len(full_vertices)] - wall["polygon"] = self.generate_wall_polygon(wall_endpoint1, wall_endpoint2, wall_height) + wall_endpoint2 = full_vertices[(j + 1) % len(full_vertices)] + wall["polygon"] = self.generate_wall_polygon( + wall_endpoint1, wall_endpoint2, wall_height + ) # add the room connection information - wall["connected_rooms"] = self.get_connected_rooms(wall["polygon"], rooms, wall["roomId"]) - + wall["connected_rooms"] = self.get_connected_rooms( + wall["polygon"], rooms, wall["roomId"] + ) + # add the wall direction and width - wall_width, wall_direction = self.get_wall_direction(wall_endpoint1, wall_endpoint2, full_vertices) + wall_width, wall_direction = self.get_wall_direction( + wall_endpoint1, wall_endpoint2, full_vertices + ) wall["width"] = wall_width wall["height"] = wall_height wall["direction"] = wall_direction wall["segment"] = [wall_endpoint1, wall_endpoint2] wall["id"] = f"wall|{roomId}|{wall_direction}|{j}" walls.append(wall) - + # update wall connection information for wall in walls: if wall["connected_rooms"] != []: for connection in wall["connected_rooms"]: connect_room_id = connection["roomId"] - candidate_walls = [wall for wall in walls if wall["roomId"] == connect_room_id] + candidate_walls = [ + wall for wall in walls if wall["roomId"] == connect_room_id + ] line1 = connection["line1"] for candidate_wall in candidate_walls: - if line1[0] in candidate_wall["polygon"] and line1[1] in candidate_wall["polygon"]: + if ( + line1[0] in candidate_wall["polygon"] + and line1[1] in candidate_wall["polygon"] + ): connection["wallId"] = candidate_wall["id"] - # add exterior walls updated_walls = [] @@ -76,9 +94,8 @@ def generate_walls(self, scene): updated_walls.append(exterior_wall) updated_walls.append(wall) walls = updated_walls - + return wall_height, walls - def get_wall_height(self, scene): # get wall height @@ -86,11 +103,17 @@ def get_wall_height(self, scene): if "wall_height" not in scene: wall_height = self.llm(wall_height_prompt).split("\n")[0].strip() - - try: wall_height = float(wall_height) - except: wall_height = round(random.uniform(2.5, 4.5), 1) # if failed, random height between 2.5 and 4.5 - scene["wall_height"] = min(max(wall_height, 2.0), 4.5) # limit the wall height between 2.0 and 4.5 + try: + wall_height = float(wall_height) + except: + wall_height = round( + random.uniform(2.5, 4.5), 1 + ) # if failed, random height between 2.5 and 4.5 + + scene["wall_height"] = min( + max(wall_height, 2.0), 4.5 + ) # limit the wall height between 2.0 and 4.5 wall_height = scene["wall_height"] print(f"\nUser: {wall_height_prompt}\n") @@ -98,31 +121,35 @@ def get_wall_height(self, scene): return wall_height - def generate_wall_polygon(self, point, next_point, wall_height): wall_polygon = [] # add the base point - wall_polygon.append({'x': point[0], 'y': 0, 'z': point[1]}) + wall_polygon.append({"x": point[0], "y": 0, "z": point[1]}) # add the top point (with the same x and z, but y = wall_height) - wall_polygon.append({'x': point[0], 'y': wall_height, 'z': point[1]}) + wall_polygon.append({"x": point[0], "y": wall_height, "z": point[1]}) # add the top point of the next base point - wall_polygon.append({'x': next_point[0], 'y': wall_height, 'z': next_point[1]}) + wall_polygon.append({"x": next_point[0], "y": wall_height, "z": next_point[1]}) # add the next base point - wall_polygon.append({'x': next_point[0], 'y': 0, 'z': next_point[1]}) + wall_polygon.append({"x": next_point[0], "y": 0, "z": next_point[1]}) return wall_polygon - - + def get_connected_rooms(self, wall_polygon, rooms, roomId): connected_rooms = [] - vertices0 = [(vertex['x'], vertex['z']) for vertex in wall_polygon if vertex['y'] == 0] + vertices0 = [ + (vertex["x"], vertex["z"]) for vertex in wall_polygon if vertex["y"] == 0 + ] lines0 = [LineString([vertices0[0], vertices0[1]])] for room in rooms: - if room["id"] == roomId: continue # do not consider the room itself + if room["id"] == roomId: + continue # do not consider the room itself room_polygon = room["floorPolygon"] - vertices1 = [(vertex['x'], vertex['z']) for vertex in room_polygon] - lines1 = [LineString([vertices1[i], vertices1[(i+1)%len(vertices1)]]) for i in range(len(vertices1))] + vertices1 = [(vertex["x"], vertex["z"]) for vertex in room_polygon] + lines1 = [ + LineString([vertices1[i], vertices1[(i + 1) % len(vertices1)]]) + for i in range(len(vertices1)) + ] shared_segments = self.check_connected(lines0, lines1) @@ -130,9 +157,8 @@ def get_connected_rooms(self, wall_polygon, rooms, roomId): connected_room = shared_segments[0] connected_room["roomId"] = room["id"] connected_rooms.append(connected_room) - - return connected_rooms + return connected_rooms def check_connected(self, lines0, lines1): shared_segments = [] @@ -140,29 +166,38 @@ def check_connected(self, lines0, lines1): for line1 in lines1: if line0.intersects(line1): intersection = line0.intersection(line1) - if intersection.geom_type == 'LineString': - shared_segments.append({ - "intersection": [ - {"x": intersection.xy[0][0], "y": 0, "z": intersection.xy[1][0]}, - {"x": intersection.xy[0][1], "y": 0, "z": intersection.xy[1][1]} - ], - "line0": [ - {"x": line0.xy[0][0], "y": 0, "z": line0.xy[1][0]}, - {"x": line0.xy[0][1], "y": 0, "z": line0.xy[1][1]} - ], - "line1": [ - {"x": line1.xy[0][0], "y": 0, "z": line1.xy[1][0]}, - {"x": line1.xy[0][1], "y": 0, "z": line1.xy[1][1]} - ], - }) - + if intersection.geom_type == "LineString": + shared_segments.append( + { + "intersection": [ + { + "x": intersection.xy[0][0], + "y": 0, + "z": intersection.xy[1][0], + }, + { + "x": intersection.xy[0][1], + "y": 0, + "z": intersection.xy[1][1], + }, + ], + "line0": [ + {"x": line0.xy[0][0], "y": 0, "z": line0.xy[1][0]}, + {"x": line0.xy[0][1], "y": 0, "z": line0.xy[1][1]}, + ], + "line1": [ + {"x": line1.xy[0][0], "y": 0, "z": line1.xy[1][0]}, + {"x": line1.xy[0][1], "y": 0, "z": line1.xy[1][1]}, + ], + } + ) + # Return shared line segments, if any if shared_segments: return shared_segments # If no shared line segments, return None return None - def update_walls(self, original_walls, open_room_pairs): # update walls since there could be open connections @@ -175,7 +210,10 @@ def update_walls(self, original_walls, open_room_pairs): updated_walls.append(wall) else: room1_id = connection[0]["roomId"] - if (room0_id, room1_id) in open_room_pairs or (room1_id, room0_id) in open_room_pairs: + if (room0_id, room1_id) in open_room_pairs or ( + room1_id, + room0_id, + ) in open_room_pairs: deleted_wallIds.append(wall["id"]) else: updated_walls.append(wall) @@ -188,7 +226,10 @@ def update_walls(self, original_walls, open_room_pairs): open_wall_segments_no_overlap = [] for segment in open_wall_segments: - if segment not in open_wall_segments_no_overlap and segment[::-1] not in open_wall_segments_no_overlap: + if ( + segment not in open_wall_segments_no_overlap + and segment[::-1] not in open_wall_segments_no_overlap + ): open_wall_segments_no_overlap.append(segment) open_wall_rectangles = [] @@ -196,18 +237,23 @@ def update_walls(self, original_walls, open_room_pairs): top_rectangle, bottom_rectangle = self.create_rectangles(segment) open_wall_rectangles.append(top_rectangle) open_wall_rectangles.append(bottom_rectangle) - - open_walls = {"segments": open_wall_segments_no_overlap, "openWallBoxes": open_wall_rectangles} + + open_walls = { + "segments": open_wall_segments_no_overlap, + "openWallBoxes": open_wall_rectangles, + } return updated_walls, open_walls - def get_wall_direction(self, wall_endpoint1, wall_endpoint2, room_vertices): wall_width = np.linalg.norm(np.array(wall_endpoint1) - np.array(wall_endpoint2)) - + wall_direction = None room_polygon = Polygon(room_vertices) - wall_center = [(wall_endpoint1[0] + wall_endpoint2[0])/2, (wall_endpoint1[1] + wall_endpoint2[1])/2] + wall_center = [ + (wall_endpoint1[0] + wall_endpoint2[0]) / 2, + (wall_endpoint1[1] + wall_endpoint2[1]) / 2, + ] if wall_endpoint1[1] == wall_endpoint2[1]: extend_point_1 = [wall_center[0], wall_center[1] + 0.01] @@ -217,7 +263,7 @@ def get_wall_direction(self, wall_endpoint1, wall_endpoint2, room_vertices): wall_direction = "south" elif room_polygon.contains(Point(extend_point_2)): wall_direction = "north" - + elif wall_endpoint1[0] == wall_endpoint2[0]: extend_point_1 = [wall_center[0] + 0.01, wall_center[1]] extend_point_2 = [wall_center[0] - 0.01, wall_center[1]] @@ -229,7 +275,6 @@ def get_wall_direction(self, wall_endpoint1, wall_endpoint2, room_vertices): return wall_width, wall_direction - def create_rectangles(self, segment): # Convert to numpy arrays for easier calculations pt1 = np.array(segment[0]) @@ -241,10 +286,22 @@ def create_rectangles(self, segment): # Calculate a perpendicular vector with length 1 perp_vec = np.array([-vec[1], vec[0]], dtype=np.float32) perp_vec /= np.linalg.norm(perp_vec) - perp_vec *= 0.5 # 0.5 is the hyperparameter for the width of the open connection + perp_vec *= ( + 0.5 # 0.5 is the hyperparameter for the width of the open connection + ) # Calculate the four points for each rectangle - top_rectangle = [list(pt1 + perp_vec), list(pt2 + perp_vec), list(pt2), list(pt1)] - bottom_rectangle = [list(pt1), list(pt2), list(pt2 - perp_vec), list(pt1 - perp_vec)] - - return top_rectangle, bottom_rectangle \ No newline at end of file + top_rectangle = [ + list(pt1 + perp_vec), + list(pt2 + perp_vec), + list(pt2), + list(pt1), + ] + bottom_rectangle = [ + list(pt1), + list(pt2), + list(pt2 - perp_vec), + list(pt1 - perp_vec), + ] + + return top_rectangle, bottom_rectangle diff --git a/ai2holodeck/generation/windows.py b/ai2holodeck/generation/windows.py new file mode 100644 index 0000000..3ff385f --- /dev/null +++ b/ai2holodeck/generation/windows.py @@ -0,0 +1,429 @@ +import ast +import copy +import os +import random +import re + +import compress_json +import numpy as np +from colorama import Fore +from langchain import PromptTemplate, OpenAI + +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.constants import HOLODECK_BASE_DATA_DIR + + +class WindowGenerator: + def __init__(self, llm: OpenAI): + self.json_template = { + "assetId": None, + "id": None, + "room0": None, + "room1": None, + "wall0": None, + "wall1": None, + "holePolygon": [], + "assetPosition": {}, + "roomId": None, + } + + self.window_data = compress_json.load( + os.path.join(HOLODECK_BASE_DATA_DIR, "windows/window-database.json") + ) + self.window_ids = list(self.window_data.keys()) + self.hole_offset = 0.05 # make the hole smaller than windows + self.llm = llm + self.window_template = PromptTemplate( + input_variables=[ + "input", + "walls", + "wall_height", + "additional_requirements", + ], + template=prompts.window_prompt, + ) + self.used_assets = [] + + def generate_windows(self, scene, additional_requirements_window): + # get organized walls + organized_walls, available_wall_str = self.get_wall_for_windows(scene) + window_prompt = self.window_template.format( + input=scene["query"], + walls=available_wall_str, + wall_height=int(scene["wall_height"] * 100), + additional_requirements=additional_requirements_window, + ) + + if "raw_window_plan" not in scene: + raw_window_plan = self.llm(window_prompt) + else: + raw_window_plan = scene["raw_window_plan"] + + print(f"\nUser: {window_prompt}\n") + print( + f"{Fore.GREEN}AI: Here is the window plan:\n{raw_window_plan}{Fore.RESET}" + ) + + walls = scene["walls"] + windows = [] + window_ids = [] + rows = [row.lower() for row in raw_window_plan.split("\n") if "|" in row] + room_with_windows = [] + for row in rows: + # parse window plan + parsed_plan = self.parse_raw_plan(row) + if parsed_plan is None: + continue + + # get room id + room_id = parsed_plan["room_id"] + + # only one wall with windows per room + if room_id not in room_with_windows: + room_with_windows.append(room_id) + else: + print(f"Warning: room {room_id} already has windows") + continue + + # get wall id + try: + wall_id = organized_walls[room_id][parsed_plan["wall_direction"]][ + "wall_id" + ] + except: + print("Warning: no available wall for {}".format(row)) + continue + + for wall in walls: + if wall["id"] == wall_id: + wall_info = wall + + # select window + window_id = self.select_window( + parsed_plan["window_type"], parsed_plan["window_size"] + ) + ( + window_polygons, + window_positions, + window_segments, + window_boxes, + new_wall_ids, + updated_walls, + ) = self.get_window_polygon( + window_id, + parsed_plan["window_height"], + parsed_plan["quantity"], + wall_info, + walls, + ) + walls = updated_walls # update walls + + if window_polygons == []: + print("Warning: no windows generated for {}".format(row)) + continue + + # generate window json + for i in range(len(window_polygons)): + current_wall_id = new_wall_ids[i] + current_window = copy.deepcopy(self.json_template) + current_window["wall0"] = current_wall_id + current_window["wall1"] = current_wall_id + "|exterior" + current_window["room0"] = room_id + current_window["room1"] = room_id + current_window["roomId"] = room_id + current_window["assetId"] = window_id + current_window["id"] = f"window|{current_wall_id}|{i}" + current_window["holePolygon"] = window_polygons[i] + current_window["assetPosition"] = window_positions[i] + current_window["windowSegment"] = window_segments[i] + current_window["windowBoxes"] = window_boxes[i] + + # sometimes the same window is generated twice and causes errors + if current_window["id"] not in window_ids: + window_ids.append(current_window["id"]) + windows.append(current_window) + else: + print( + "Warning: duplicated window id: {}".format(current_window["id"]) + ) + + return raw_window_plan, walls, windows + + def parse_raw_plan(self, plan): + try: + pattern = re.compile(r"^(\d+[\.\)]\s*|- )") + plan = pattern.sub("", plan) + if plan[-1] == ".": + plan = plan[:-1] # remove the last period + ( + room_id, + wall_direction, + window_type, + window_size, + quantity, + window_height, + ) = plan.split("|") + return { + "room_id": room_id.strip(), + "wall_direction": wall_direction.strip().lower(), + "window_type": window_type.strip().lower(), + "window_size": ast.literal_eval(window_size.strip()), + "quantity": int(quantity.strip()), + "window_height": float(window_height.strip()), + } + except: + print("Error: could not parse window plan: {}".format(plan)) + return None + + def get_room(self, rooms, room_type): + for room in rooms: + if room_type == room["roomType"]: + return room + + def get_wall_for_windows(self, scene): + walls_with_door = [] + for door in scene["doors"]: + walls_with_door.append(door["wall0"]) + walls_with_door.append(door["wall1"]) + + available_walls = [] + + for wall in scene["walls"]: + if "connect_exterior" in wall and wall["id"] not in walls_with_door: + available_walls.append(wall) + + organized_walls = {} + for wall in available_walls: + room_id = wall["roomId"] + wall_direction = wall["direction"] + + wall_width = wall["width"] + if wall_width < 2.0: + continue + + if room_id not in organized_walls: + organized_walls[room_id] = {} + + if wall_direction not in organized_walls[room_id]: + organized_walls[room_id][wall_direction] = { + "wall_id": wall["id"], + "wall_width": wall_width, + } + else: + if wall_width > organized_walls[room_id][wall_direction]["wall_width"]: + organized_walls[room_id][wall_direction] = { + "wall_id": wall["id"], + "wall_width": wall_width, + } + + available_wall_str = "" + for room_id in organized_walls: + current_str = "{}: ".format(room_id) + for wall_direction in organized_walls[room_id]: + current_str += "{}, {} cm; ".format( + wall_direction, + int(organized_walls[room_id][wall_direction]["wall_width"] * 100), + ) + available_wall_str += current_str + "\n" + + return organized_walls, available_wall_str + + def select_window(self, window_type, window_size): + candidate_window_ids = [ + window_id + for window_id in self.window_ids + if self.window_data[window_id]["type"] == window_type + ] + size_differences = [ + np.linalg.norm( + np.array(window_size) - np.array(self.window_data[window_id]["size"]) + ) + for window_id in candidate_window_ids + ] + sorted_window_ids = [ + x for _, x in sorted(zip(size_differences, candidate_window_ids)) + ] + + top_window_ids = sorted_window_ids[0] + sorted_window_ids = [ + window_id + for window_id in sorted_window_ids + if window_id not in self.used_assets + ] + + if len(sorted_window_ids) == 0: + selected_window_id = top_window_ids + else: + selected_window_id = sorted_window_ids[0] + + return selected_window_id + + def get_window_polygon(self, window_id, window_height, quantity, wall_info, walls): + window_x = self.window_data[window_id]["boundingBox"]["x"] - self.hole_offset + window_y = self.window_data[window_id]["boundingBox"]["y"] - self.hole_offset + + wall_width = wall_info["width"] + wall_height = wall_info["height"] + wall_segment = wall_info["segment"] + + window_height = min(window_height / 100.0, wall_height - window_y) + + quantity = min(quantity, int(wall_width / window_x)) + + wall_start = np.array(wall_segment[0]) + wall_end = np.array(wall_segment[1]) + original_vector = wall_end - wall_start + original_length = np.linalg.norm(original_vector) + normalized_vector = original_vector / original_length + subwall_length = original_length / quantity + + if quantity == 0: + return [], [], [], [], [], walls + + elif quantity == 1: + window_start = random.uniform(0, wall_width - window_x) + window_end = window_start + window_x + polygon = [ + {"x": window_start, "y": window_height, "z": 0}, + {"x": window_end, "y": window_height + window_y, "z": 0}, + ] + position = { + "x": (polygon[0]["x"] + polygon[1]["x"]) / 2, + "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, + "z": (polygon[0]["z"] + polygon[1]["z"]) / 2, + } + window_segment = [ + list(wall_start + normalized_vector * window_start), + list(wall_start + normalized_vector * window_end), + ] + window_boxes = self.create_rectangles(window_segment) + + return ( + [polygon], + [position], + [window_segment], + [window_boxes], + [wall_info["id"]], + walls, + ) + + else: + # split walls into subwalls + segments = [] + for i in range(quantity): + segment_start = wall_start + i * subwall_length * normalized_vector + segment_end = wall_start + (i + 1) * subwall_length * normalized_vector + segments.append((segment_start, segment_end)) + + # update walls + updated_walls = [] + new_wall_ids = [] + for wall in walls: + if wall_info["id"] not in wall["id"]: + updated_walls.append(wall) + + for i in range(len(segments)): + # generate new subwall json + current_wall = copy.deepcopy(wall_info) + current_wall["id"] = f"{wall_info['id']}|{i}" + current_wall["segment"] = [ + segments[i][0].tolist(), + segments[i][1].tolist(), + ] + current_wall["width"] = subwall_length + current_wall["polygon"] = self.generate_wall_polygon( + segments[i][0].tolist(), segments[i][1].tolist(), wall_height + ) + current_wall["connect_exterior"] = current_wall["id"] + "|exterior" + + # add exterior wall + current_wall_exterior = copy.deepcopy(current_wall) + current_wall_exterior["id"] = current_wall["id"] + "|exterior" + current_wall_exterior["material"] = {"name": "Walldrywall4Tiled"} + current_wall_exterior["polygon"] = current_wall["polygon"][::-1] + current_wall_exterior["segment"] = current_wall["segment"][::-1] + current_wall_exterior.pop("connect_exterior") + + updated_walls.append(current_wall) + updated_walls.append(current_wall_exterior) + new_wall_ids.append(current_wall["id"]) + + # generate window polygons + window_polygons = [] + window_positions = [] + window_segments = [] + window_boxes = [] + for i in range(len(segments)): + window_start = random.uniform( + 0, subwall_length - window_x + ) # TODO: consider the same start point for all windows + window_end = window_start + window_x + polygon = [ + {"x": window_start, "y": window_height, "z": 0}, + {"x": window_end, "y": window_height + window_y, "z": 0}, + ] + position = { + "x": (polygon[0]["x"] + polygon[1]["x"]) / 2, + "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, + "z": (polygon[0]["z"] + polygon[1]["z"]) / 2, + } + + window_segment = [ + list(segments[i][0] + normalized_vector * window_start), + list(segments[i][0] + normalized_vector * window_end), + ] + window_box = self.create_rectangles(window_segment) + window_polygons.append(polygon) + window_positions.append(position) + window_segments.append(window_segment) + window_boxes.append(window_box) + + return ( + window_polygons, + window_positions, + window_segments, + window_boxes, + new_wall_ids, + updated_walls, + ) + + def generate_wall_polygon(self, point, next_point, wall_height): + wall_polygon = [] + # add the base point + wall_polygon.append({"x": point[0], "y": 0, "z": point[1]}) + # add the top point (with the same x and z, but y = wall_height) + wall_polygon.append({"x": point[0], "y": wall_height, "z": point[1]}) + # add the top point of the next base point + wall_polygon.append({"x": next_point[0], "y": wall_height, "z": next_point[1]}) + # add the next base point + wall_polygon.append({"x": next_point[0], "y": 0, "z": next_point[1]}) + return wall_polygon + + def create_rectangles(self, segment): + # Convert to numpy arrays for easier calculations + pt1 = np.array(segment[0]) + pt2 = np.array(segment[1]) + + # Calculate the vector for the segment + vec = pt2 - pt1 + + # Calculate a perpendicular vector with length 1 + perp_vec = np.array([-vec[1], vec[0]]) + perp_vec /= np.linalg.norm(perp_vec) + perp_vec *= 0.1 # 0.1 is the width of the window + + # Calculate the four points for each rectangle + top_rectangle = [ + list(pt1 + perp_vec), + list(pt2 + perp_vec), + list(pt2), + list(pt1), + ] + bottom_rectangle = [ + list(pt1), + list(pt2), + list(pt2 - perp_vec), + list(pt1 - perp_vec), + ] + + return top_rectangle, bottom_rectangle diff --git a/ai2holodeck/main.py b/ai2holodeck/main.py new file mode 100644 index 0000000..1696073 --- /dev/null +++ b/ai2holodeck/main.py @@ -0,0 +1,214 @@ +import ast +import os +import traceback +from argparse import ArgumentParser + +import compress_json +from tqdm import tqdm + +from ai2holodeck.constants import HOLODECK_BASE_DATA_DIR, OBJATHOR_ASSETS_DIR +from ai2holodeck.generation.holodeck import Holodeck + + +def str2bool(v: str): + v = v.lower().strip() + if v in ("yes", "true", "t", "y", "1"): + return True + elif v in ("no", "false", "f", "n", "0"): + return False + else: + raise ValueError(f"{v} cannot be converted to a bool") + + +def generate_single_scene(args): + folder_name = args.query.replace(" ", "_").replace("'", "") + + scene = None + if args.original_scene is not None: + print(f"Loading original scene from {args.original_scene}.") + try: + scene = compress_json.load(args.original_scene) + except: + print( + f"[ERROR] Could not load original scene from given path {args.original_scene}." + ) + raise + else: + path = os.path.join( + HOLODECK_BASE_DATA_DIR, f"scenes/{folder_name}/{folder_name}.json" + ) + if os.path.exists(path): + print(f"Loading existing scene from {path}.") + try: + scene = compress_json.load(path) + except: + print( + f"[ERROR] The path {path} exists but could not be loaded. Please delete" + f" this file and try again." + ) + raise + + if scene is None: + print("Generating from an empty scene.") + scene = args.model.get_empty_scene() + + try: + _, save_dir = args.model.generate_scene( + scene=scene, + query=args.query, + save_dir=args.save_dir, + used_assets=args.used_assets, + generate_image=ast.literal_eval(args.generate_image), + generate_video=ast.literal_eval(args.generate_video), + add_ceiling=ast.literal_eval(args.add_ceiling), + add_time=ast.literal_eval(args.add_time), + use_constraint=ast.literal_eval(args.use_constraint), + use_milp=ast.literal_eval(args.use_milp), + random_selection=ast.literal_eval(args.random_selection), + ) + except: + print( + f"[ERROR] Could not generate scene from {args.query}. Traceback:\n{traceback.format_exc()}" + ) + return + + print( + f"Generation complete for {args.query}. Scene saved and any other data saved to {save_dir}." + ) + + +def generate_multi_scenes(args): + with open(args.query_file, "r") as f: + queries = f.readlines() + queries = [query.strip() for query in queries] + + for query in tqdm(queries): + args.query = query + generate_single_scene(args) + + +def generate_variants(args): + try: + original_scene = compress_json.load(args.original_scene) + except: + raise Exception(f"Could not load original scene from {args.original_scene}.") + + try: + args.model.generate_variants( + query=args.query, + original_scene=original_scene, + save_dir=args.save_dir, + number_of_variants=int(args.number_of_variants), + used_assets=args.used_assets, + ) + except: + print(f"Could not generate variants from {args.query}.") + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument( + "--mode", + help="Mode to run in (generate_single_scene, generate_multi_scenes or generate_variants).", + default="generate_single_scene", + ) + parser.add_argument( + "--query", help="Query to generate scene from.", default="a living room" + ) + parser.add_argument( + "--query_file", help="File to load queries from.", default="./data/queries.txt" + ) + parser.add_argument( + "--number_of_variants", help="Number of variants to generate.", default=5 + ) + parser.add_argument( + "--original_scene", + help="Original scene to generate variants from.", + default=None, + ) + parser.add_argument( + "--openai_api_key", + help="OpenAI API key. If none given, will attempt to read this from the OPENAI_API_KEY env variable.", + default=None, + ) + parser.add_argument( + "--openai_org", + help="OpenAI ORG string. If none given, will attempt to read this from the OPENAI_ORG env variable.", + default=None, + ) + parser.add_argument( + "--save_dir", help="Directory to save scene to.", default="./data/scenes" + ) + parser.add_argument( + "--generate_image", + help="Whether to generate an image of the scene.", + default="True", + ) + parser.add_argument( + "--generate_video", + help="Whether to generate a video of the scene.", + default="False", + ) + parser.add_argument( + "--add_ceiling", help="Whether to add a ceiling to the scene.", default="False" + ) + parser.add_argument( + "--add_time", help="Whether to add the time to the scene name.", default="True" + ) + parser.add_argument( + "--use_constraint", help="Whether to use constraints.", default="True" + ) + parser.add_argument( + "--use_milp", + help="Whether to use mixed integer linear programming for the constraint satisfaction solver.", + default="False", + ) + parser.add_argument( + "--random_selection", + help="Whether to more random object selection, set to False will be more precise, True will be more diverse", + default="False", + ) + parser.add_argument( + "--used_assets", + help="a list of assets which we want to exclude from the scene", + default=[], + ) + parser.add_argument( + "--single_room", + help="Whether to generate a single room scene.", + default="False", + ) + + args = parser.parse_args() + + if args.openai_api_key is None: + args.openai_api_key = os.environ.get("OPENAI_API_KEY") + + if args.openai_org is None: + args.openai_org = os.environ.get("OPENAI_ORG") + + args.model = Holodeck( + openai_api_key=args.openai_api_key, + openai_org=args.openai_org, + objaverse_asset_dir=OBJATHOR_ASSETS_DIR, + single_room=ast.literal_eval(args.single_room), + ) + + if args.used_assets != [] and args.used_assets.endswith(".txt"): + with open(args.used_assets, "r") as f: + args.used_assets = f.readlines() + args.used_assets = [asset.strip() for asset in args.used_assets] + else: + args.used_assets = [] + + if args.mode == "generate_single_scene": + generate_single_scene(args) + + elif args.mode == "generate_multi_scenes": + generate_multi_scenes(args) + + elif args.mode == "generate_variants": + generate_variants(args) + + else: + raise Exception(f"Mode {args.mode} not supported.") diff --git a/connect_to_unity.py b/connect_to_unity.py index e9c8e2b..0512613 100644 --- a/connect_to_unity.py +++ b/connect_to_unity.py @@ -1,33 +1,52 @@ -import json +import os +from argparse import ArgumentParser + import ai2thor +import compress_json from ai2thor.controller import Controller from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner -from argparse import ArgumentParser + +from ai2holodeck.constants import ( + HOLODECK_BASE_DATA_DIR, + THOR_COMMIT_ID, + OBJATHOR_ASSETS_DIR, +) parser = ArgumentParser() -parser.add_argument("--scene", help = "the directory of the scene to be generated", default = "./data/scenes/a_living_room/a_living_room.json") -parser.add_argument("--asset_dir", help = "the directory of the assets to be used", default = "./data/objaverse_holodeck/09_23_combine_scale/processed_2023_09_23_combine_scale") +parser.add_argument( + "--scene", + help="the directory of the scene to be generated", + default=os.path.join( + HOLODECK_BASE_DATA_DIR, "/scenes/a_living_room/a_living_room.json" + ), +) +parser.add_argument( + "--asset_dir", + help="the directory of the assets to be used", + default=OBJATHOR_ASSETS_DIR, +) args = parser.parse_args() -scene = json.load(open(args.scene, "r")) +scene = compress_json.load(args.scene) controller = Controller( - start_unity=False, - port=8200, - scene="Procedural", - gridSize=0.25, - width=300, - height=300, - server_class=ai2thor.wsgi_server.WsgiServer, - makeAgentsVisible=False, - visibilityScheme='Distance', - action_hook_runner=ProceduralAssetHookRunner( + commit_id=THOR_COMMIT_ID, + start_unity=False, + port=8200, + scene="Procedural", + gridSize=0.25, + width=300, + height=300, + server_class=ai2thor.wsgi_server.WsgiServer, + makeAgentsVisible=False, + visibilityScheme="Distance", + action_hook_runner=ProceduralAssetHookRunner( asset_directory=args.asset_dir, asset_symlink=True, verbose=True, - ) - ) + ), +) controller.step(action="CreateHouse", house=scene) -print("controller reset") \ No newline at end of file +print("controller reset") diff --git a/main.py b/main.py index 0766521..410cf9e 100644 --- a/main.py +++ b/main.py @@ -2,7 +2,8 @@ import json from tqdm import tqdm from argparse import ArgumentParser -from modules.holodeck import Holodeck + +from ai2holodeck.generation.holodeck import Holodeck def generate_single_scene(args): @@ -12,8 +13,12 @@ def generate_single_scene(args): scene = json.load(open(args.original_scene, "r")) print(f"Loading exist scene from {args.original_scene}.") else: - scene = json.load(open(f"data/scenes/{folder_name}/{folder_name}.json", "r")) - print(f"Loading exist scene from data/scenes/{folder_name}/{folder_name}.json.") + scene = json.load( + open(f"data/scenes/{folder_name}/{folder_name}.json", "r") + ) + print( + f"Loading exist scene from data/scenes/{folder_name}/{folder_name}.json." + ) except: scene = args.model.get_empty_scene() print("Generating from an empty scene.") @@ -29,7 +34,7 @@ def generate_single_scene(args): add_time=ast.literal_eval(args.add_time), use_constraint=ast.literal_eval(args.use_constraint), use_milp=ast.literal_eval(args.use_milp), - random_selection=ast.literal_eval(args.random_selection) + random_selection=ast.literal_eval(args.random_selection), ) @@ -37,15 +42,17 @@ def generate_multi_scenes(args): with open(args.query_file, "r") as f: queries = f.readlines() queries = [query.strip() for query in queries] - + for query in tqdm(queries): args.query = query generate_single_scene(args) def generate_variants(args): - try: original_scene = json.load(open(args.original_scene, "r")) - except: raise Exception(f"Could not load original scene from {args.original_scene}.") + try: + original_scene = json.load(open(args.original_scene, "r")) + except: + raise Exception(f"Could not load original scene from {args.original_scene}.") try: args.model.generate_variants( query=args.query, @@ -60,28 +67,87 @@ def generate_variants(args): if __name__ == "__main__": parser = ArgumentParser() - parser.add_argument("--mode", help = "Mode to run in (generate_single_scene, generate_multi_scenes or generate_variants).", default = "generate_single_scene") - parser.add_argument("--query", help = "Query to generate scene from.", default = "a living room") - parser.add_argument("--query_file", help = "File to load queries from.", default = "./data/queries.txt") - parser.add_argument("--number_of_variants", help = "Number of variants to generate.", default = 5) - parser.add_argument("--original_scene", help = "Original scene to generate variants from.", default = None) - parser.add_argument("--openai_api_key", help = "OpenAI API key.", default = None) - parser.add_argument("--objaverse_version", help = "Version of objaverse to use.", default = "09_23_combine_scale") - parser.add_argument("--asset_dir", help = "Directory to load assets from.", default = "./data/objaverse_holodeck/09_23_combine_scale/processed_2023_09_23_combine_scale") - parser.add_argument("--save_dir", help = "Directory to save scene to.", default = "./data/scenes") - parser.add_argument("--generate_image", help = "Whether to generate an image of the scene.", default = "True") - parser.add_argument("--generate_video", help = "Whether to generate a video of the scene.", default = "False") - parser.add_argument("--add_ceiling", help = "Whether to add a ceiling to the scene.", default = "False") - parser.add_argument("--add_time", help = "Whether to add the time to the scene name.", default = "True") - parser.add_argument("--use_constraint", help = "Whether to use constraints.", default = "True") - parser.add_argument("--use_milp", help = "Whether to use mixed integer linear programming for the constraint satisfaction solver.", default = "False") - parser.add_argument("--random_selection", help = "Whether to more random object selection, set to False will be more precise, True will be more diverse", default = "False") - parser.add_argument("--used_assets", help = "a list of assets which we want to exclude from the scene", default = []) - parser.add_argument("--single_room", help = "Whether to generate a single room scene.", default = "False") - + parser.add_argument( + "--mode", + help="Mode to run in (generate_single_scene, generate_multi_scenes or generate_variants).", + default="generate_single_scene", + ) + parser.add_argument( + "--query", help="Query to generate scene from.", default="a living room" + ) + parser.add_argument( + "--query_file", help="File to load queries from.", default="./data/queries.txt" + ) + parser.add_argument( + "--number_of_variants", help="Number of variants to generate.", default=5 + ) + parser.add_argument( + "--original_scene", + help="Original scene to generate variants from.", + default=None, + ) + parser.add_argument("--openai_api_key", help="OpenAI API key.", default=None) + parser.add_argument( + "--objaverse_version", + help="Version of objaverse to use.", + default="09_23_combine_scale", + ) + parser.add_argument( + "--asset_dir", + help="Directory to load assets from.", + default="./data/objaverse_holodeck/09_23_combine_scale/processed_2023_09_23_combine_scale", + ) + parser.add_argument( + "--save_dir", help="Directory to save scene to.", default="./data/scenes" + ) + parser.add_argument( + "--generate_image", + help="Whether to generate an image of the scene.", + default="True", + ) + parser.add_argument( + "--generate_video", + help="Whether to generate a video of the scene.", + default="False", + ) + parser.add_argument( + "--add_ceiling", help="Whether to add a ceiling to the scene.", default="False" + ) + parser.add_argument( + "--add_time", help="Whether to add the time to the scene name.", default="True" + ) + parser.add_argument( + "--use_constraint", help="Whether to use constraints.", default="True" + ) + parser.add_argument( + "--use_milp", + help="Whether to use mixed integer linear programming for the constraint satisfaction solver.", + default="False", + ) + parser.add_argument( + "--random_selection", + help="Whether to more random object selection, set to False will be more precise, True will be more diverse", + default="False", + ) + parser.add_argument( + "--used_assets", + help="a list of assets which we want to exclude from the scene", + default=[], + ) + parser.add_argument( + "--single_room", + help="Whether to generate a single room scene.", + default="False", + ) + args = parser.parse_args() - args.model = Holodeck(args.openai_api_key, args.objaverse_version, args.asset_dir, ast.literal_eval(args.single_room)) + args.model = Holodeck( + args.openai_api_key, + args.objaverse_version, + args.asset_dir, + ast.literal_eval(args.single_room), + ) if args.used_assets != [] and args.used_assets.endswith(".txt"): with open(args.used_assets, "r") as f: @@ -89,12 +155,12 @@ def generate_variants(args): args.used_assets = [asset.strip() for asset in args.used_assets] else: args.used_assets = [] - + if args.mode == "generate_single_scene": generate_single_scene(args) - + elif args.mode == "generate_multi_scenes": generate_multi_scenes(args) - + elif args.mode == "generate_variants": - generate_variants(args) \ No newline at end of file + generate_variants(args) diff --git a/modules/floor_objects.py b/modules/floor_objects.py deleted file mode 100644 index 22acf07..0000000 --- a/modules/floor_objects.py +++ /dev/null @@ -1,1217 +0,0 @@ -import json -import math -import re -import time -import copy -import cvxpy as cp -import random -import datetime -import numpy as np -import multiprocessing -from rtree import index -import matplotlib.pyplot as plt -import modules.prompts as prompts -from langchain import PromptTemplate -from scipy.interpolate import interp1d -from shapely.geometry import Polygon, Point, box, LineString -from modules.milp_utils import * - - -class FloorObjectGenerator(): - def __init__(self, llm, object_retriever): - self.json_template = {"assetId": None, "id": None, "kinematic": True, - "position": {}, "rotation": {}, "material": None, "roomId": None} - self.llm = llm - self.object_retriever = object_retriever - self.database = object_retriever.database - self.constraint_prompt = PromptTemplate(input_variables=["room_type", "room_size", "objects"], - template=prompts.object_constraints_prompt) - self.baseline_prompt = PromptTemplate(input_variables=["room_type", "room_size", "objects"], - template=prompts.floor_baseline_prompt) - self.grid_density = 20 - self.add_window = False - self.size_buffer = 10 # add 10 cm buffer to object size - - self.constraint_type = "llm" - self.use_milp = False - self.multiprocessing = False - - - def generate_objects(self, scene, use_constraint=True): - rooms = scene["rooms"] - doors = scene["doors"] - windows = scene["windows"] - open_walls = scene["open_walls"] - selected_objects = scene["selected_objects"] - results = [] - - packed_args = [(room, doors, windows, open_walls, selected_objects, use_constraint) for room in rooms] - if self.multiprocessing: - pool = multiprocessing.Pool(processes=4) - all_placements = pool.map(self.generate_objects_per_room, packed_args) - pool.close() - pool.join() - else: - all_placements = [self.generate_objects_per_room(args) for args in packed_args] - - for placements in all_placements: - results += placements - - return results - - - def generate_objects_per_room(self, args): - room, doors, windows, open_walls, selected_objects, use_constraint = args - - selected_floor_objects = selected_objects[room["roomType"]]["floor"] - object_name2id = {object_name: asset_id for object_name, asset_id in selected_floor_objects} - - room_id = room["id"] - room_type = room["roomType"] - room_x, room_z = self.get_room_size(room) - - room_size = f"{room_x} cm x {room_z} cm" - grid_size = max(room_x // self.grid_density, room_z // self.grid_density) - - object_names = list(object_name2id.keys()) - - if use_constraint: - # get constraints - constraint_prompt = self.constraint_prompt.format(room_type=room_type, - room_size=room_size, - objects=", ".join(object_names)) - - if self.constraint_type == "llm": - constraint_plan = self.llm(constraint_prompt) - elif self.constraint_type in ["middle", "edge"]: - constraint_plan = "" - for object_name in object_names: - constraint_plan += f"{object_name} | {self.constraint_type}\n" - else: - print("Error: constraint type not supported!") - - print(f"plan for {room_type}: {constraint_plan}") - constraints = self.parse_constraints(constraint_plan, object_names) - - # get objects list - object2dimension = {object_name: self.database[object_id]['assetMetadata']['boundingBox'] - for object_name, object_id in object_name2id.items()} - - objects_list = [(object_name, (object2dimension[object_name]['x'] * 100 + self.size_buffer, object2dimension[object_name]['z'] * 100 + self.size_buffer)) for object_name in constraints] - - # get initial state - room_vertices = [(x * 100, y * 100) for (x, y) in room["vertices"]] - room_poly = Polygon(room_vertices) - initial_state = self.get_door_window_placements(doors, windows, room_vertices, open_walls, self.add_window) - - # solve - solver = DFS_Solver_Floor(grid_size=grid_size, max_duration=30, constraint_bouns=1) - solution = solver.get_solution(room_poly, objects_list, constraints, initial_state, use_milp=self.use_milp) - placements = self.solution2placement(solution, object_name2id, room_id) - else: - object_information = "" - for object_name in object_names: - object_id = object_name2id[object_name] - dimension = self.database[object_name2id[object_name]]['assetMetadata']['boundingBox'] - size_x = int(dimension["x"] * 100) - size_z = int(dimension["z"] * 100) - object_information += f"{object_name}: {size_x} cm x {size_z} cm\n" - - baseline_prompt = self.baseline_prompt.format(room_type=room_type, - room_size=room_size, - objects=", ".join(object_names)) - room_origin = [min(v[0] for v in room['vertices']), min(v[1] for v in room['vertices'])] - all_is_placed = False - while not all_is_placed: - completion_text = self.llm(baseline_prompt) - try: - completion_text = re.findall(r'```(.*?)```', completion_text, re.DOTALL)[0] - completion_text = re.sub(r'^json', '', completion_text, flags=re.MULTILINE) - all_data = json.loads(completion_text) - except json.JSONDecodeError: - continue - print(f"completion text for {room_type}: {completion_text}") - placements = list() - all_is_placed = True - for data in all_data: - object_name = data['object_name'] - try: - object_id = object_name2id[object_name] - except KeyError: - all_is_placed = False - break - - dimension = self.database[object_name2id[object_name]]['assetMetadata']['boundingBox'] - placement = self.json_template.copy() - placement["id"] = f"{object_name} ({room_id})" - placement["object_name"] = object_name - placement["assetId"] = object_id - placement["roomId"] = room_id - placement["position"] = {"x": room_origin[0] + (data['position']["X"]/100), - "y": dimension["y"] / 2, - "z": room_origin[1] + (data["position"]["Y"]/100)} - placement["rotation"] = {"x": 0, "y": data["rotation"], "z": 0} - placements.append(placement) - break # only one iteration - - return placements - - - def get_door_window_placements(self, doors, windows, room_vertices, open_walls, add_window=True): - room_poly = Polygon(room_vertices) - door_window_placements = {} - i = 0 - for door in doors: - door_boxes = door["doorBoxes"] - for door_box in door_boxes: - door_vertices = [(x * 100, z * 100) for (x, z) in door_box] - door_poly = Polygon(door_vertices) - door_center = door_poly.centroid - if room_poly.contains(door_center): - door_window_placements[f"door-{i}"] = ((door_center.x, door_center.y), 0, door_vertices, 1) - i += 1 - - if add_window: - for window in windows: - window_boxes = window["windowBoxes"] - for window_box in window_boxes: - window_vertices = [(x * 100, z * 100) for (x, z) in window_box] - window_poly = Polygon(window_vertices) - window_center = window_poly.centroid - if room_poly.contains(window_center): - door_window_placements[f"window-{i}"] = ((window_center.x, window_center.y), 0, window_vertices, 1) - i += 1 - - if open_walls != []: - for open_wall_box in open_walls["openWallBoxes"]: - open_wall_vertices = [(x * 100, z * 100) for (x, z) in open_wall_box] - open_wall_poly = Polygon(open_wall_vertices) - open_wall_center = open_wall_poly.centroid - if room_poly.contains(open_wall_center): - door_window_placements[f"open-{i}"] = ((open_wall_center.x, open_wall_center.y), 0, open_wall_vertices, 1) - i += 1 - - return door_window_placements - - - def get_room_size(self, room): - floor_polygon = room["floorPolygon"] - x_values = [point['x'] for point in floor_polygon] - z_values = [point['z'] for point in floor_polygon] - return (int(max(x_values) - min(x_values)) * 100, int(max(z_values) - min(z_values)) * 100) - - - def solution2placement(self, solutions, object_name2id, room_id): - placements = [] - for object_name, solution in solutions.items(): - if "door" in object_name or "window" in object_name or "open" in object_name: continue - dimension = self.database[object_name2id[object_name]]['assetMetadata']['boundingBox'] - placement = self.json_template.copy() - placement["assetId"] = object_name2id[object_name] - placement["id"] = f"{object_name} ({room_id})" - placement["position"] = {"x": solution[0][0] / 100, "y": dimension["y"] / 2, "z": solution[0][1] / 100} - placement["rotation"] = {"x": 0, "y": solution[1], "z": 0} - placement["roomId"] = room_id - placement["vertices"] = list(solution[2]) - placement["object_name"] = object_name - placements.append(placement) - return placements - - - def parse_constraints(self, constraint_text, object_names): - constraint_name2type = { - "edge": "global", - "middle": "global", - "in front of": "relative", - "behind": "relative", - "left of": "relative", - "right of": "relative", - "side of": "relative", - "around": "relative", - "face to": "direction", - "face same as": "direction", - "aligned": "alignment", - "center alignment": "alignment", - "center aligned": "alignment", - "aligned center": "alignment", - "edge alignment": "alignment", - "near": "distance", - "far": "distance" - } - - object2constraints = {} - plans = [plan.lower() for plan in constraint_text.split('\n') if "|" in plan] - - for plan in plans: - # remove index - pattern = re.compile(r'^(\d+[\.\)]\s*|- )') - plan = pattern.sub('', plan) - if plan[-1] == ".": plan = plan[:-1] - - object_name = plan.split("|")[0].replace("*", "").strip() # remove * in object name - - if object_name not in object_names: continue - - object2constraints[object_name] = [] - - constraints = plan.split("|")[1:] - for constraint in constraints: - constraint = constraint.strip() - constraint_name = constraint.split(",")[0].strip() - - if constraint_name == "n/a": continue - - try: constraint_type = constraint_name2type[constraint_name] - except: print(f"constraint type {constraint_name} not found"); continue - - if constraint_type == "global": - object2constraints[object_name].append({"type": constraint_type, "constraint": constraint_name}) - elif constraint_type in ["relative", "direction", "alignment", "distance"]: - try: target = constraint.split(",")[1].strip() - except: print(f"wrong format of constraint: {constraint}"); continue - - if target in object2constraints: - if constraint_name == "around": - object2constraints[object_name].append({"type": "distance", "constraint": "near", "target": target}) - object2constraints[object_name].append({"type": "direction", "constraint": "face to", "target": target}) - elif constraint_name == "in front of": - object2constraints[object_name].append({"type": "relative", "constraint": "in front of", "target": target}) - object2constraints[object_name].append({"type": "alignment", "constraint": "center aligned", "target": target}) - else: - object2constraints[object_name].append({"type": constraint_type, "constraint": constraint_name, "target": target}) - else: - print(f"target object {target} not found in the existing constraint plan") - continue - else: - print(f"constraint type {constraint_type} not found") - continue - - # clean the constraints - object2constraints_cleaned = {} - for object_name, constraints in object2constraints.items(): - constraints_cleaned = [] - constraint_types = [] - for constraint in constraints: - if constraint["type"] not in constraint_types: - constraint_types.append(constraint["type"]) - constraints_cleaned.append(constraint) - object2constraints_cleaned[object_name] = constraints_cleaned - - return object2constraints - - - def order_objects_by_size(self, selected_floor_objects): - ordered_floor_objects = [] - for object_name, asset_id in selected_floor_objects: - dimensions = self.database[asset_id]['assetMetadata']['boundingBox'] - size = dimensions["x"] * dimensions["z"] - ordered_floor_objects.append([object_name, asset_id, size]) - ordered_floor_objects.sort(key=lambda x: x[2], reverse=True) - ordered_floor_objects_no_size = [[object_name, asset_id] for object_name, asset_id, size in ordered_floor_objects] - return ordered_floor_objects_no_size - - -class SolutionFound(Exception): - def __init__(self, solution): - self.solution = solution - - -class DFS_Solver_Floor(): - def __init__(self, grid_size, random_seed=0, max_duration=5, constraint_bouns=0.2): - self.grid_size = grid_size - self.random_seed = random_seed - self.max_duration = max_duration # maximum allowed time in seconds - self.constraint_bouns = constraint_bouns - self.start_time = None - self.solutions = [] - self.vistualize = False - - # Define the functions in a dictionary to avoid if-else conditions - self.func_dict = { - "global": { - "edge": self.place_edge - }, - "relative": self.place_relative, - "direction": self.place_face, - "alignment": self.place_alignment_center, - "distance": self.place_distance - } - - self.constraint_type2weight = { - "global": 1.0, - "relative": 0.5, - "direction": 0.5, - "alignment": 0.5, - "distance": 1.8, - } - - self.edge_bouns = 0.0 # worth more than one constraint - - - def get_solution(self, bounds, objects_list, constraints, initial_state, use_milp=False): - self.start_time = time.time() - if use_milp: - # iterate through the constraints list - # for each constraint type "distance", add the same constraint to the target object - new_constraints = constraints.copy() - for object_name, object_constraints in constraints.items(): - for constraint in object_constraints: - if constraint["type"] == "distance": - target_object_name = constraint["target"] - if target_object_name in constraints.keys(): - # if there is already a distance constraint of target object_name, continue - if any(constraint["type"] == "distance" and constraint["target"] == object_name for constraint in constraints[target_object_name]): continue - new_constraint = constraint.copy() - new_constraint["target"] = object_name - new_constraints[target_object_name].append(new_constraint) - # iterate through the constraints list - # for each constraint type "left of" or "right of", add the same constraint to the target object - #for object_name, object_constraints in constraints.items(): - # for constraint in object_constraints: if constraint["type"] == "relative": - # if constraint["constraint"] == "left of": - constraints = new_constraints - - try: - self.milp_dfs(bounds, objects_list, constraints, initial_state, 10) - except SolutionFound as e: - print(f"Time taken: {time.time() - self.start_time}") - - else: - grid_points = self.create_grids(bounds) - grid_points = self.remove_points(grid_points, initial_state) - try: - self.dfs(bounds, objects_list, constraints, grid_points, initial_state, 30) - except SolutionFound as e: - print(f"Time taken: {time.time() - self.start_time}") - - print(f"Number of solutions found: {len(self.solutions)}") - max_solution = self.get_max_solution(self.solutions) - - if not use_milp and self.vistualize: - self.visualize_grid(bounds, grid_points, max_solution) - - return max_solution - - - def get_max_solution(self, solutions): - path_weights = [] - for solution in solutions: - path_weights.append(sum([obj[-1] for obj in solution.values()])) - max_index = np.argmax(path_weights) - return solutions[max_index] - - - def dfs(self, room_poly, objects_list, constraints, grid_points, placed_objects, branch_factor): - if len(objects_list) == 0: - self.solutions.append(placed_objects) - return placed_objects - - if time.time() - self.start_time > self.max_duration: - print(f"Time limit reached.") - raise SolutionFound(self.solutions) - - object_name, object_dim = objects_list[0] - placements = self.get_possible_placements(room_poly, object_dim, constraints[object_name], grid_points, placed_objects) - - if len(placements) == 0 and len(placed_objects) != 0: - self.solutions.append(placed_objects) - - paths = [] - if branch_factor > 1: random.shuffle(placements) # shuffle the placements of the first object - - for placement in placements[:branch_factor]: - placed_objects_updated = copy.deepcopy(placed_objects) - placed_objects_updated[object_name] = placement - grid_points_updated = self.remove_points(grid_points, placed_objects_updated) - - sub_paths = self.dfs(room_poly, objects_list[1:], constraints, grid_points_updated, placed_objects_updated, 1) - paths.extend(sub_paths) - - return paths - - - def get_possible_placements(self, room_poly, object_dim, constraints, grid_points, placed_objects): - solutions = self.filter_collision(placed_objects, self.get_all_solutions(room_poly, grid_points, object_dim)) - solutions = self.filter_facing_wall(room_poly, solutions, object_dim) - edge_solutions = self.place_edge(room_poly, copy.deepcopy(solutions), object_dim) - - if len(edge_solutions) == 0: return edge_solutions - - global_constraint = next((constraint for constraint in constraints if constraint["type"] == "global"), None) - - if global_constraint is None: global_constraint = {"type": "global", "constraint": "edge"} - - if global_constraint["constraint"] == "edge": - candidate_solutions = copy.deepcopy(edge_solutions) # edge is hard constraint - else: - if len(constraints) > 1: candidate_solutions = solutions + edge_solutions # edge is soft constraint - else: candidate_solutions = copy.deepcopy(solutions) # the first object - - candidate_solutions = self.filter_collision(placed_objects, candidate_solutions) # filter again after global constraint - - if candidate_solutions == []: return candidate_solutions - random.shuffle(candidate_solutions) - placement2score = {tuple(solution[:3]): solution[-1] for solution in candidate_solutions} - - # add a bias to edge solutions - for solution in candidate_solutions: - if solution in edge_solutions and len(constraints) >= 1: - placement2score[tuple(solution[:3])] += self.edge_bouns - - for constraint in constraints: - if "target" not in constraint: continue - - func = self.func_dict.get(constraint["type"]) - valid_solutions = func(constraint["constraint"], placed_objects[constraint["target"]], candidate_solutions) - - weight = self.constraint_type2weight[constraint["type"]] - if constraint["type"] == "distance": - for solution in valid_solutions: - bouns = solution[-1] - placement2score[tuple(solution[:3])] += bouns * weight - else: - for solution in valid_solutions: - placement2score[tuple(solution[:3])] += self.constraint_bouns * weight - - # normalize the scores - for placement in placement2score: placement2score[placement] /= max(len(constraints), 1) - - sorted_placements = sorted(placement2score, key=placement2score.get, reverse=True) - sorted_solutions = [list(placement) + [placement2score[placement]] for placement in sorted_placements] - - return sorted_solutions - - - def create_grids(self, room_poly): - # get the min and max bounds of the room - min_x, min_z, max_x, max_z = room_poly.bounds - - # create grid points - grid_points = [] - for x in range(int(min_x), int(max_x), self.grid_size): - for y in range(int(min_z), int(max_z), self.grid_size): - point = Point(x, y) - if room_poly.contains(point): - grid_points.append((x, y)) - - return grid_points - - - def remove_points(self, grid_points, objects_dict): - # Create an r-tree index - idx = index.Index() - - # Populate the index with bounding boxes of the objects - for i, (_, _, obj, _) in enumerate(objects_dict.values()): - idx.insert(i, Polygon(obj).bounds) - - # Create Shapely Polygon objects only once - polygons = [Polygon(obj) for _, _, obj, _ in objects_dict.values()] - - valid_points = [] - - for point in grid_points: - p = Point(point) - # Get a list of potential candidates - candidates = [polygons[i] for i in idx.intersection(p.bounds)] - # Check if point is in any of the candidate polygons - if not any(candidate.contains(p) for candidate in candidates): - valid_points.append(point) - - return valid_points - - - def get_all_solutions(self, room_poly, grid_points, object_dim): - obj_length, obj_width = object_dim - obj_half_length, obj_half_width = obj_length / 2, obj_width / 2 - - rotation_adjustments = { - 0: ((-obj_half_length, -obj_half_width), (obj_half_length, obj_half_width)), - 90: ((-obj_half_width, -obj_half_length), (obj_half_width, obj_half_length)), - 180: ((-obj_half_length, obj_half_width), (obj_half_length, -obj_half_width)), - 270: ((obj_half_width, -obj_half_length), (-obj_half_width, obj_half_length)), - } - - solutions = [] - for rotation in [0, 90, 180, 270]: - for point in grid_points: - center_x, center_y = point - lower_left_adjustment, upper_right_adjustment = rotation_adjustments[rotation] - lower_left = (center_x + lower_left_adjustment[0], center_y + lower_left_adjustment[1]) - upper_right = (center_x + upper_right_adjustment[0], center_y + upper_right_adjustment[1]) - obj_box = box(*lower_left, *upper_right) - - if room_poly.contains(obj_box): - solutions.append([point, rotation, tuple(obj_box.exterior.coords[:]), 1]) - - return solutions - - - def filter_collision(self, objects_dict, solutions): - valid_solutions = [] - object_polygons = [Polygon(obj_coords) for _, _, obj_coords, _ in list(objects_dict.values())] - for solution in solutions: - sol_obj_coords = solution[2] - sol_obj = Polygon(sol_obj_coords) - if not any(sol_obj.intersects(obj) for obj in object_polygons): - valid_solutions.append(solution) - return valid_solutions - - - def filter_facing_wall(self, room_poly, solutions, obj_dim): - valid_solutions = [] - obj_width = obj_dim[1] - obj_half_width = obj_width / 2 - - front_center_adjustments = { - 0: (0, obj_half_width), - 90: (obj_half_width, 0), - 180: (0, -obj_half_width), - 270: (-obj_half_width, 0), - } - - valid_solutions = [] - for solution in solutions: - center_x, center_y = solution[0] - rotation = solution[1] - - front_center_adjustment = front_center_adjustments[rotation] - front_center_x, front_center_y = center_x + front_center_adjustment[0], center_y + front_center_adjustment[1] - - front_center_distance = room_poly.boundary.distance(Point(front_center_x, front_center_y)) - - if front_center_distance >= 30: # TODO: make this a parameter - valid_solutions.append(solution) - - return valid_solutions - - - def place_edge(self, room_poly, solutions, obj_dim): - valid_solutions = [] - obj_width = obj_dim[1] - obj_half_width = obj_width / 2 - - back_center_adjustments = { - 0: (0, -obj_half_width), - 90: (-obj_half_width, 0), - 180: (0, obj_half_width), - 270: (obj_half_width, 0), - } - - for solution in solutions: - center_x, center_y = solution[0] - rotation = solution[1] - - back_center_adjustment = back_center_adjustments[rotation] - back_center_x, back_center_y = center_x + back_center_adjustment[0], center_y + back_center_adjustment[1] - - back_center_distance = room_poly.boundary.distance(Point(back_center_x, back_center_y)) - center_distance = room_poly.boundary.distance(Point(center_x, center_y)) - - if back_center_distance <= self.grid_size and back_center_distance < center_distance: - solution[-1] += self.constraint_bouns - # valid_solutions.append(solution) # those are still valid solutions, but we need to move the object to the edge - - # move the object to the edge - center2back_vector = np.array([back_center_x - center_x, back_center_y - center_y]) - center2back_vector /= np.linalg.norm(center2back_vector) - offset = center2back_vector * (back_center_distance + 4.5) # add a small distance to avoid the object cross the wall - solution[0] = (center_x + offset[0], center_y + offset[1]) - solution[2] = ((solution[2][0][0] + offset[0], solution[2][0][1] + offset[1]), \ - (solution[2][1][0] + offset[0], solution[2][1][1] + offset[1]), \ - (solution[2][2][0] + offset[0], solution[2][2][1] + offset[1]), \ - (solution[2][3][0] + offset[0], solution[2][3][1] + offset[1])) - valid_solutions.append(solution) - - return valid_solutions - - - def place_corner(self, room_poly, solutions, obj_dim): - obj_length, obj_width = obj_dim - obj_half_length, _ = obj_length / 2, obj_width / 2 - - rotation_center_adjustments = { - 0: ((-obj_half_length, 0), (obj_half_length, 0)), - 90: ((0, obj_half_length), (0, -obj_half_length)), - 180: ((obj_half_length, 0), (-obj_half_length, 0)), - 270: ((0, -obj_half_length), (0, obj_half_length)) - } - - edge_solutions = self.place_edge(room_poly, solutions, obj_dim) - - valid_solutions = [] - - for solution in edge_solutions: - (center_x, center_y), rotation = solution[:2] - (dx_left, dy_left), (dx_right, dy_right) = rotation_center_adjustments[rotation] - - left_center_x, left_center_y = center_x + dx_left, center_y + dy_left - right_center_x, right_center_y = center_x + dx_right, center_y + dy_right - - left_center_distance = room_poly.boundary.distance(Point(left_center_x, left_center_y)) - right_center_distance = room_poly.boundary.distance(Point(right_center_x, right_center_y)) - - if min(left_center_distance, right_center_distance) < self.grid_size: - solution[-1] += self.constraint_bouns - valid_solutions.append(solution) - - return valid_solutions - - - def place_relative(self, place_type, target_object, solutions): - valid_solutions = [] - _, target_rotation, target_coords, _ = target_object - target_polygon = Polygon(target_coords) - - min_x, min_y, max_x, max_y = target_polygon.bounds - mean_x = (min_x + max_x) / 2 - mean_y = (min_y + max_y) / 2 - - comparison_dict = { - 'left of': { - 0: lambda sol_center: sol_center[0] < min_x and min_y <= sol_center[1] <= max_y, - 90: lambda sol_center: sol_center[1] > max_y and min_x <= sol_center[0] <= max_x, - 180: lambda sol_center: sol_center[0] > max_x and min_y <= sol_center[1] <= max_y, - 270: lambda sol_center: sol_center[1] < min_y and min_x <= sol_center[0] <= max_x, - }, - 'right of': { - 0: lambda sol_center: sol_center[0] > max_x and min_y <= sol_center[1] <= max_y, - 90: lambda sol_center: sol_center[1] < min_y and min_x <= sol_center[0] <= max_x, - 180: lambda sol_center: sol_center[0] < min_x and min_y <= sol_center[1] <= max_y, - 270: lambda sol_center: sol_center[1] > max_y and min_x <= sol_center[0] <= max_x, - }, - 'in front of': { - 0: lambda sol_center: sol_center[1] > max_y and mean_x - self.grid_size < sol_center[0] < mean_x + self.grid_size, # in front of and centered - 90: lambda sol_center: sol_center[0] > max_x and mean_y - self.grid_size < sol_center[1] < mean_y + self.grid_size, - 180: lambda sol_center: sol_center[1] < min_y and mean_x - self.grid_size < sol_center[0] < mean_x + self.grid_size, - 270: lambda sol_center: sol_center[0] < min_x and mean_y - self.grid_size < sol_center[1] < mean_y + self.grid_size, - }, - 'behind': { - 0: lambda sol_center: sol_center[1] < min_y and min_x <= sol_center[0] <= max_x, - 90: lambda sol_center: sol_center[0] < min_x and min_y <= sol_center[1] <= max_y, - 180: lambda sol_center: sol_center[1] > max_y and min_x <= sol_center[0] <= max_x, - 270: lambda sol_center: sol_center[0] > max_x and min_y <= sol_center[1] <= max_y, - }, - "side of": { - 0: lambda sol_center: min_y <= sol_center[1] <= max_y, - 90: lambda sol_center: min_x <= sol_center[0] <= max_x, - 180: lambda sol_center: min_y <= sol_center[1] <= max_y, - 270: lambda sol_center: min_x <= sol_center[0] <= max_x - } - } - - compare_func = comparison_dict.get(place_type).get(target_rotation) - - for solution in solutions: - sol_center = solution[0] - - if compare_func(sol_center): - solution[-1] += self.constraint_bouns - valid_solutions.append(solution) - - return valid_solutions - - - def place_distance(self, distance_type, target_object, solutions): - target_coords = target_object[2] - target_poly = Polygon(target_coords) - distances = [] - valid_solutions = [] - for solution in solutions: - sol_coords = solution[2] - sol_poly = Polygon(sol_coords) - distance = target_poly.distance(sol_poly) - distances.append(distance) - - solution[-1] = distance - valid_solutions.append(solution) - - min_distance = min(distances) - max_distance = max(distances) - - if distance_type == "near": - if min_distance < 80: - points = [(min_distance, 1), (80, 0), (max_distance, 0)] - else: - points = [(min_distance, 0), (max_distance, 0)] - - elif distance_type == "far": - points = [(min_distance, 0), (max_distance, 1)] - - x = [point[0] for point in points] - y = [point[1] for point in points] - - f = interp1d(x, y, kind='linear', fill_value='extrapolate') - - for solution in valid_solutions: - distance = solution[-1] - solution[-1] = float(f(distance)) - - return valid_solutions - - - def place_face(self, face_type, target_object, solutions): - if face_type == "face to": - return self.place_face_to(target_object, solutions) - - elif face_type == "face same as": - return self.place_face_same(target_object, solutions) - - elif face_type == "face opposite to": - return self.place_face_opposite(target_object, solutions) - - - def place_face_to(self, target_object, solutions): - # Define unit vectors for each rotation - unit_vectors = { - 0: np.array([0., 1.]), # Facing up - 90: np.array([1., 0.]), # Facing right - 180: np.array([0., -1.]), # Facing down - 270: np.array([-1., 0.]) # Facing left - } - - target_coords = target_object[2] - target_poly = Polygon(target_coords) - - valid_solutions = [] - - for solution in solutions: - sol_center = solution[0] - sol_rotation = solution[1] - - # Define an arbitrarily large point in the direction of the solution's rotation - far_point = sol_center + 1e6 * unit_vectors[sol_rotation] - - # Create a half-line from the solution's center to the far point - half_line = LineString([sol_center, far_point]) - - # Check if the half-line intersects with the target polygon - if half_line.intersects(target_poly): - solution[-1] += self.constraint_bouns - valid_solutions.append(solution) - - return valid_solutions - - - def place_face_same(self, target_object, solutions): - target_rotation = target_object[1] - valid_solutions = [] - - for solution in solutions: - sol_rotation = solution[1] - if sol_rotation == target_rotation: - solution[-1] += self.constraint_bouns - valid_solutions.append(solution) - - return valid_solutions - - - def place_face_opposite(self, target_object, solutions): - target_rotation = (target_object[1] + 180) % 360 - valid_solutions = [] - - for solution in solutions: - sol_rotation = solution[1] - if sol_rotation == target_rotation: - solution[-1] += self.constraint_bouns - valid_solutions.append(solution) - - return valid_solutions - - - def place_alignment_center(self, alignment_type, target_object, solutions): - target_center = target_object[0] - valid_solutions = [] - eps = 5 - for solution in solutions: - sol_center = solution[0] - if abs(sol_center[0] - target_center[0]) < eps or abs(sol_center[1] - target_center[1]) < eps: - solution[-1] += self.constraint_bouns - valid_solutions.append(solution) - return valid_solutions - - - def visualize_grid(self, room_poly, grid_points, solutions): - plt.rcParams["font.family"] = "Times New Roman" - plt.rcParams["font.size"] = 22 - - # create a new figure - fig, ax = plt.subplots() - - # draw the room - x, y = room_poly.exterior.xy - ax.plot(x, y, '-', label='Room', color='black', linewidth=2) - - # draw the grid points - grid_x = [point[0] for point in grid_points] - grid_y = [point[1] for point in grid_points] - ax.plot(grid_x, grid_y, 'o', markersize=2, color="grey") - - # draw the solutions - for object_name, solution in solutions.items(): - center, rotation, box_coords = solution[:3] - center_x, center_y = center - - # create a polygon for the solution - obj_poly = Polygon(box_coords) - x, y = obj_poly.exterior.xy - ax.plot(x, y, '-', linewidth=2, color='black') - - # ax.text(center_x, center_y, object_name, fontsize=18, ha='center') - - # set arrow direction based on rotation - if rotation == 0: - ax.arrow(center_x, center_y, 0, 25, head_width=10, fc='black') - elif rotation == 90: - ax.arrow(center_x, center_y, 25, 0, head_width=10, fc='black') - elif rotation == 180: - ax.arrow(center_x, center_y, 0, -25, head_width=10, fc='black') - elif rotation == 270: - ax.arrow(center_x, center_y, -25, 0, head_width=10, fc='black') - # axis off - ax.axis('off') - ax.set_aspect('equal', 'box') # to keep the ratios equal along x and y axis - create_time = str(datetime.datetime.now()).replace(" ", "-").replace(":", "-").replace(".", "-") - plt.savefig(f"{create_time}.pdf", bbox_inches='tight', dpi=300) - plt.show() - - - def milp_dfs(self, room_poly, all_objects_list, constraints, placed_objects, branch_factor=1): - if len(all_objects_list) == 0: - self.solutions.append(placed_objects) - return placed_objects - - if time.time() - self.start_time > self.max_duration: - print(f"Time limit reached.") - raise SolutionFound(self.solutions) - - def milp_solve(soft_constraints_list, hard_constraints_list, verbose=False): - problem = cp.Problem(cp.Maximize(sum(soft_constraints_list)), hard_constraints_list) - if verbose: - print('solving milp using GUROBI ...') - problem.solve(solver=cp.GUROBI, reoptimize=True, verbose=False) - return problem.value - - def parse_object_properties(object_properties): - x, y = object_properties[0] - rotation = int(object_properties[1] or 0) - # set rotation to the closest 90 degree - rotation = int(round(rotation / 90) * 90) - assert rotation in [0, 90, 180, 270] - object_bbox = object_properties[2] - min_x = min([point[0] for point in object_bbox]) - max_x = max([point[0] for point in object_bbox]) - min_y = min([point[1] for point in object_bbox]) - max_y = max([point[1] for point in object_bbox]) - object_dim = (max_x - min_x, max_y - min_y) if rotation == 0 or rotation == 180 else (max_y - min_y, max_x - min_x) - return x, y, rotation, object_dim - - def find_object_dim(target_object_name, objects_list, placed_objects): - target_object_dim = None - for object_name_1, object_dim_1 in objects_list: - if object_name_1 == target_object_name: - target_object_dim = object_dim_1 - return target_object_dim - - if not None: - for object_name_1, object_properties in placed_objects.items(): - if object_name_1 == target_object_name: - x, y, rotation, target_object_dim = parse_object_properties(object_properties) - return target_object_dim - return None - - - found_a_solution = False - # randomly select a set of objects from all_objects_list - # start with the largest object + more objects --> gradually reduce the number of objects - for branch_idx in range(branch_factor): - # sample a set of objects from a list that contains the first object - - k = random.randint(0, min(5, len(all_objects_list)-1)) - objects_list = [all_objects_list[0]] + random.sample(all_objects_list[1:], k) - - hard_constraints_list = [] - soft_constraints_list = [0] - - # formulate the milp problem - # object_name, object_dim = objects_list[0] - # x, y, rotate_180, rotate_90 - variables_dict = {object[0]: [cp.Variable(), cp.Variable(), cp.Variable(boolean=True), cp.Variable(boolean=True)] for object in objects_list} - # add placed objects into variables dict even though they are not variables - for object, object_properties in placed_objects.items(): - x, y = object_properties[0] - rotation = int(object_properties[1]) - variables_dict[object] = [x, y, rotation == 180, rotation == 90 or rotation == 270] - - # Initialize a list of variables, each variable represents the coordinate for each object - room_min_x, room_min_y, room_max_x, room_max_y = room_poly.bounds - # Add boundary constraints to all objects - for object_name, object_dim in objects_list: - hard_constraints_list.extend(create_boundary_constraints(variables_dict[object_name], - object_dim, - (room_min_x, room_min_y, room_max_x, room_max_y))) - # Add pariwise collision constraints - for object_name_1, object_dim_1 in objects_list: - for object_name_2, object_dim_2 in objects_list: - if object_name_1 == object_name_2: continue - # collision constraints should be hard constraints - hard_constraints_list.extend(create_nooverlap_constraints(variables_dict[object_name_1], - variables_dict[object_name_2], - object_dim_1, - object_dim_2)) - - # Add pariwise collision constraints with placed objects - for object_name_1, object_dim_1 in objects_list: - for object_name_2, object_properties_2 in placed_objects.items(): - # bbox is a list of four points - x, y, rotation, object_dim_2 = parse_object_properties(object_properties_2) - - hard_constraints_list.extend(create_nooverlap_constraints(variables_dict[object_name_1], - [x, y, rotation == 180, rotation == 90 or rotation == 270], - object_dim_1, object_dim_2)) - - # default constraints / heuristics? - for object_name, object_dim in objects_list: - # encourage dispersement of assets - all_other_objects_list = [x[0] for x in objects_list if x[0] != object_name] + list(placed_objects.keys()) - for target_object_name in all_other_objects_list: - hard_constraints, soft_constraints = create_distance_constraints(variables_dict[object_name], - variables_dict[target_object_name], - upper_bound=[room_max_x-room_min_x, room_max_y-room_min_y], - type='far') - assert len(soft_constraints) == 1 - # soft_constraints[0] *= 0.001 - hard_constraints_list.extend(hard_constraints) - soft_constraints_list.extend(soft_constraints) - - - # use cvxpy to solve for the hard constraints - for object_name, object_dim in objects_list: - - # by default - add soft edge constraints although this might make the solver take a longer time - if not any(constraint['type'] == 'global' for constraint in constraints[object_name]): - hard_constraints, soft_constraints = create_edge_constraints(variables_dict[object_name], - object_dim, - room_dim=(room_min_x, room_min_y, room_max_x, room_max_y), - hard=False) - soft_constraints[0] *= 100 - hard_constraints_list.extend(hard_constraints) - soft_constraints_list.extend(soft_constraints) - - - - for constraint in constraints[object_name]: - if constraint['type'] == 'global': - if constraint['constraint'] == 'edge': # hard constraints - hard_constraints, soft_constraints = create_edge_constraints(variables_dict[object_name], - object_dim, - room_dim=(room_min_x, room_min_y, room_max_x, room_max_y), - hard=True) - hard_constraints_list.extend(hard_constraints) - soft_constraints_list.extend(soft_constraints) - - if constraint['type'] == 'direction': - assert constraint['constraint'] == 'face to' - target_object_name = constraint['target'] - target_object_dim = find_object_dim(target_object_name, objects_list, placed_objects) - if target_object_dim: - hard_constraints_list.extend(create_directional_constraints(variables_dict[object_name], - variables_dict[target_object_name], - object_dim, - target_object_dim)) - - if constraint['type'] == 'alignment': - assert constraint['constraint'] == 'center aligned' - target_object_name = constraint['target'] - target_object_dim = find_object_dim(target_object_name, objects_list, placed_objects) - if target_object_dim: - hard_constraints_list.extend(create_alignment_constraints(variables_dict[object_name], - variables_dict[target_object_name], - object_dim, - target_object_dim)) - - if constraint['type'] == 'distance': - target_object_name = constraint['target'] - target_object_dim = find_object_dim(target_object_name, objects_list, placed_objects) - if target_object_dim: - hard_constraints, soft_constraints = create_distance_constraints(variables_dict[object_name], - variables_dict[target_object_name], - upper_bound=[room_max_x-room_min_x, room_max_y-room_min_y], - type=constraint['constraint']) - hard_constraints_list.extend(hard_constraints) - soft_constraints_list.extend(soft_constraints) - assert len(soft_constraints) == 1 - # higher weighting - soft_constraints[0] *= 0.01 - - if constraint['type'] == 'relative': - target_object_name = constraint['target'] - target_object_dim = find_object_dim(target_object_name, objects_list, placed_objects) - if target_object_dim: - hard_constraints_list.extend(create_relative_constraints(variables_dict[object_name], - variables_dict[target_object_name], - object_dim, - target_object_dim, - constraint['constraint'])) - - result = milp_solve(soft_constraints_list, hard_constraints_list, verbose=False) - if result is None or math.isnan(result) or math.isinf(result): - continue - - found_a_solution = True - print(result, [x[0] for x in objects_list]) - - # we fonud a valid solution - # convert the placements to the same format as the dfs solver - placed_objects_updated = copy.deepcopy(placed_objects) - for object_name, object_dim in objects_list: - # (x, y), rotation, bbox, score - x = variables_dict[object_name][0].value.item() - y = variables_dict[object_name][1].value.item() - rotate_180 = variables_dict[object_name][2].value - rotate_90 = variables_dict[object_name][3].value - if not rotate_180: rotate_180 = 0 - if not rotate_90: rotate_90 = 0 - - # bbox has taken into account of the rotation - if rotate_90: - bbox = [(x - object_dim[1]/2, y - object_dim[0]/2), - (x + object_dim[1]/2, y - object_dim[0]/2), - (x + object_dim[1]/2, y + object_dim[0]/2), - (x - object_dim[1]/2, y + object_dim[0]/2)] - else: - bbox = [(x - object_dim[0]/2, y - object_dim[1]/2), - (x + object_dim[0]/2, y - object_dim[1]/2), - (x + object_dim[0]/2, y + object_dim[1]/2), - (x - object_dim[0]/2, y + object_dim[1]/2)] - - placed_objects_updated[object_name] = [(x,y), rotate_180 * 180 + rotate_90 * 90, bbox, - len(constraints[object_name])] - - # remove all elemnts in objects_list from all_objects_list - self.milp_dfs(room_poly, [x for x in all_objects_list if x not in objects_list], constraints, placed_objects_updated, branch_factor=1) - - if not found_a_solution and len(placed_objects) != 0: - self.solutions.append(placed_objects) - - - def test_dfs_placement(self): - room_vertices = ((0, 0), (0, 500), (500, 500), (500, 0)) - room_poly = Polygon(room_vertices) - grid_points = self.create_grids(room_poly) - objects = {"door": ((50, 50), 0, ((0, 0), (100, 0), (100, 100), (0, 100)), 1)} - grid_points = self.remove_points(grid_points, objects) - # self.visualize_grid(room_poly, grid_points, objects) - - object_dim = (200, 100) - solutions = self.get_all_solutions(room_poly, grid_points, object_dim) - solutions = self.filter_collision(objects, solutions) - solutions = self.place_edge(room_poly, solutions, object_dim) - - # for i, solution in enumerate(solutions): - # objects[f"sofa-{i}"] = solution - # self.visualize_grid(room_poly, grid_points, objects) - - random.seed(0) - objects["sofa"] = random.choice(solutions) - # self.visualize_grid(room_poly, grid_points, objects) - object_1_dim = (100, 50) - - solutions_1 = self.get_all_solutions(room_poly, grid_points, object_1_dim) - solutions_1 = self.filter_collision(objects, solutions_1) - - # random.seed(42) - # for i, solution in enumerate(random.sample(solutions_1, 25)): - # objects[f"coffee table-{i}"] = solution - - # objects[f"coffee table"] = [(300, 350), 0, ((350.0, 325.0), (350.0, 375.0), (250.0, 375.0), (250.0, 325.0), (350.0, 325.0)), 1.0] - # self.visualize_grid(room_poly, grid_points, objects) - - solutions_1 = self.place_face_to(objects["sofa"], solutions_1) - solutions_1 = self.place_relative("in front of", objects["sofa"], solutions_1) - solutions_1 = self.place_alignment_center("center alignment", objects["sofa"], solutions_1) - solutions_1 = self.place_distance("near", objects["sofa"], solutions_1) - objects[f"coffee table"] = solutions_1[-1] - self.visualize_grid(room_poly, grid_points, objects) - - - def test_milp_placement(self, simple=False, use_milp=True): - room_vertices = ((0, 0), (0, 600), (800, 600), (800, 0)) - room_poly = Polygon(room_vertices) - grid_points = self.create_grids(room_poly) - - if not simple: - constraints = {'sofa-0': [{'type': 'global', 'constraint': 'edge'}], - 'sofa-1': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'near', 'target': 'sofa-0'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-0'}], - 'tv stand-0': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'far', 'target': 'sofa-1'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-1'}], - 'coffee table-0': [{'type': 'global', 'constraint': 'middle'}, - {'type': 'distance', 'constraint': 'near', 'target': 'sofa-0'}, - {'type': 'relative', 'constraint': 'in front of', 'target': 'sofa-0'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-0'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-0'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'tv stand-0'}], - 'coffee table-1': [{'type': 'global', 'constraint': 'middle'}, - {'type': 'distance', 'constraint': 'near', 'target': 'sofa-1'}, - {'type': 'relative', 'constraint': 'in front of', 'target': 'sofa-1'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-1'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-1'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'tv stand-0'}], - 'side table-0': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'near', 'target': 'sofa-0'}, - {'type': 'relative', 'constraint': 'side of', 'target': 'sofa-0'}], - 'side table-1': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'near', 'target': 'sofa-1'}, - {'type': 'relative', 'constraint': 'side of', 'target': 'sofa-1'}], - 'armchair-0': [{'type': 'global', 'constraint': 'middle'}, - {'type': 'distance', 'constraint': 'near', 'target': 'coffee table-0'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'coffee table-0'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'coffee table-0'}], - 'armchair-1': [{'type': 'global', 'constraint': 'middle'}, - {'type': 'distance', 'constraint': 'near', 'target': 'coffee table-1'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'coffee table-1'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'coffee table-1'}], - 'bookshelf-0': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'far', 'target': 'tv stand-0'}], - 'bookshelf-1': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'far', 'target': 'bookshelf-0'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'bookshelf-0'}]} - - initial_state = {'door-0': ((586.7550200520433, 550.0), 0, [(640.8300346432603, 500.0), (532.6800054608262, 500.0), (532.6800054608262, 600.0), (640.8300346432603, 600.0)], 1)} - - objects = [('sofa-0', (301.6667297651499, 106.48952360032415)), - ('sofa-1', (301.6667297651499, 106.48952360032415)), - ('tv stand-0', (201.0964714933229, 59.39910836195032)), - ('coffee table-0', (69.15754261308616, 126.69169450358964)), - ('coffee table-1', (69.15754261308616, 126.69169450358964)), - ('side table-0', (61.74632023132328, 61.74453745262855)), - ('side table-1', (61.74632023132328, 61.74453745262855)), - ('armchair-0', (79.0368498902692, 89.4893987892571)), - ('armchair-1', (79.0368498902692, 89.4893987892571)), - ('bookshelf-0', (67.94689517917222, 43.8934937031396)), - ('bookshelf-1', (67.94689517917222, 43.8934937031396))] - solution = self.get_solution(room_poly, objects, constraints, initial_state, use_milp=use_milp) - else: - constraints = {'dining table': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'far', 'target': 'door'}, - {'type': 'distance', 'constraint': 'near', 'target': 'chair'}], - 'chair': [{'type': 'relative', 'constraint': 'side of', 'target': 'dining table'}] - } - initial_state = {"door": ((50, 50), 0, ((0, 0), (100, 0), (100, 100), (0, 100)), 1)} - objects = [("dining table", (100, 50)), ("chair", (50, 50))] - solution = self.get_solution(room_poly, objects, constraints, initial_state, use_milp=use_milp) - - print('milp solution:', len(solution)) - for object_name, object_properties in solution.items(): - print(object_name, object_properties) - # if object_properties[2] == 90 or object_properties[2] == 270: - self.visualize_grid(room_poly, grid_points, solution) - - -if __name__ == "__main__": - solver = DFS_Solver_Floor(max_duration=30, grid_size=50) - solver.test_dfs_placement() - solver.test_milp_placement(simple=False, use_milp=True) diff --git a/modules/holodeck.py b/modules/holodeck.py deleted file mode 100644 index 5cc8833..0000000 --- a/modules/holodeck.py +++ /dev/null @@ -1,304 +0,0 @@ -import os -import json -import datetime -import open_clip -from tqdm import tqdm -from langchain.llms import OpenAI -from sentence_transformers import SentenceTransformer -from modules.rooms import FloorPlanGenerator -from modules.walls import WallGenerator -from modules.doors import DoorGenerator -from modules.windows import WindowGenerator -from modules.object_selector import ObjectSelector -from modules.floor_objects import FloorObjectGenerator -from modules.wall_objects import WallObjectGenerator -from modules.ceiling_objects import CeilingObjectGenerator -from modules.small_objects import SmallObjectGenerator -from modules.lights import generate_lights -from modules.skybox import getSkybox -from modules.layers import map_asset2layer -from modules.objaverse_retriever import ObjaverseRetriever -from modules.utils import get_top_down_frame, room_video - - -class Holodeck(): - def __init__(self, openai_api_key, objaverse_version, objaverse_asset_dir, single_room): - os.environ["OPENAI_API_KEY"] = openai_api_key - - # initialize llm - self.llm = OpenAI(model_name="gpt-4-1106-preview", max_tokens=2048) - self.llm_fast = OpenAI(model_name="gpt-3.5-turbo", max_tokens=2048) - - # initialize CLIP - self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion2b_s32b_b82k') - self.clip_tokenizer = open_clip.get_tokenizer('ViT-L-14') - - # initialize sentence transformer - self.sbert_model = SentenceTransformer('all-mpnet-base-v2') - - # objaverse version and asset dir - self.objaverse_version = objaverse_version - self.objaverse_asset_dir = objaverse_asset_dir - - # initialize modules - self.retrieval_threshold = 28 - self.object_retriever = ObjaverseRetriever(self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.sbert_model, self.objaverse_version, self.retrieval_threshold) - self.floor_generator = FloorPlanGenerator(self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.llm) - self.wall_generator = WallGenerator(self.llm) - self.door_generator = DoorGenerator(self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.llm) - self.window_generator = WindowGenerator(self.llm) - self.object_selector = ObjectSelector(self.object_retriever, self.llm) - self.floor_object_generator = FloorObjectGenerator(self.llm, self.object_retriever) - self.wall_object_generator = WallObjectGenerator(self.llm, self.object_retriever) - self.ceiling_generator = CeilingObjectGenerator(self.llm, self.object_retriever) - self.small_object_generator = SmallObjectGenerator(self.llm, self.object_retriever, self.objaverse_version) - - # additional requirements - single_room_requirements = "I only need one room" - - if single_room: self.additional_requirements_room = single_room_requirements - else: self.additional_requirements_room = "N/A" - - self.additional_requirements_door = "N/A" - self.additional_requirements_window = "Only one wall of each room should have windows" - self.additional_requirements_object = "N/A" - self.additional_requirements_ceiling = "N/A" - - - def get_empty_scene(self): - with open("modules/empty_house.json", "r") as f: - scene = json.load(f) - return scene - - - def empty_house(self, scene): - scene["rooms"] = [] - scene["walls"] = [] - scene["doors"] = [] - scene["windows"] = [] - scene["objects"] = [] - scene["proceduralParameters"]["lights"] = [] - return scene - - - def generate_rooms(self, scene, additional_requirements_room, used_assets=[]): - self.floor_generator.used_assets = used_assets - rooms = self.floor_generator.generate_rooms(scene, additional_requirements_room) - scene["rooms"] = rooms - return scene - - - def generate_walls(self, scene): - wall_height, walls = self.wall_generator.generate_walls(scene) - scene["wall_height"] = wall_height - scene["walls"] = walls - return scene - - - def generate_doors(self, scene, additional_requirements_door="N/A", used_assets=[]): - self.door_generator.used_assets = used_assets - - # generate doors - raw_doorway_plan, doors, room_pairs, open_room_pairs = self.door_generator.generate_doors(scene, additional_requirements_door) - scene["raw_doorway_plan"] = raw_doorway_plan - scene["doors"] = doors - scene["room_pairs"] = room_pairs - scene["open_room_pairs"] = open_room_pairs - - # update walls - updated_walls, open_walls = self.wall_generator.update_walls(scene["walls"], open_room_pairs) - scene["walls"] = updated_walls - scene["open_walls"] = open_walls - return scene - - - def generate_windows(self, scene, additional_requirements_window="I want to install windows to only one wall of each room", used_assets=[]): - self.window_generator.used_assets = used_assets - raw_window_plan, walls, windows = self.window_generator.generate_windows(scene, additional_requirements_window) - scene["raw_window_plan"] = raw_window_plan - scene["windows"] = windows - scene["walls"] = walls - return scene - - - def select_objects(self, scene, additional_requirements_object, used_assets=[]): - self.object_selector.used_assets = used_assets - object_selection_plan, selected_objects = self.object_selector.select_objects(scene, additional_requirements_object) - scene["object_selection_plan"] = object_selection_plan - scene["selected_objects"] = selected_objects - return scene - - - def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A"): - raw_ceiling_plan, ceiling_objects = self.ceiling_generator.generate_ceiling_objects(scene, additional_requirements_ceiling) - scene["ceiling_objects"] = ceiling_objects - scene["raw_ceiling_plan"] = raw_ceiling_plan - return scene - - - def generate_small_objects(self, scene, used_assets=[]): - self.small_object_generator.used_assets = used_assets - controller = self.small_object_generator.start_controller(scene, self.objaverse_asset_dir) - event = controller.reset() - receptacle_ids = [obj["objectId"] for obj in event.metadata["objects"] if obj["receptacle"] and "___" not in obj["objectId"]] - if "Floor" in receptacle_ids: receptacle_ids.remove("Floor") - - try: - small_objects, receptacle2small_objects = self.small_object_generator.generate_small_objects(scene, controller, receptacle_ids) - scene["small_objects"] = small_objects - scene["receptacle2small_objects"] = receptacle2small_objects - except: - scene["small_objects"] = [] - print("Failed to generate small objects") - - controller.stop() # stop controller to avoid memory leak - return scene - - - def change_ceiling_material(self, scene): - first_wall_material = scene["rooms"][0]["wallMaterial"] - scene["proceduralParameters"]["ceilingMaterial"] = first_wall_material - return scene - - - def generate_scene(self, scene, query, save_dir, used_assets=[], add_ceiling=False, generate_image=True, generate_video=False, add_time=True, use_constraint=True, random_selection=False, use_milp=False): - # initialize scene - query = query.replace("_", " ") - scene["query"] = query - - # empty house - scene = self.empty_house(scene) - - # generate rooms - scene = self.generate_rooms(scene, additional_requirements_room=self.additional_requirements_room, used_assets=used_assets) - - # generate walls - scene = self.generate_walls(scene) - - # generate doors - scene = self.generate_doors(scene, additional_requirements_door=self.additional_requirements_door, used_assets=used_assets) - - # generate windows - scene = self.generate_windows(scene, additional_requirements_window=self.additional_requirements_window, used_assets=used_assets) - - # select objects - self.object_selector.random_selection = random_selection - scene = self.select_objects(scene, additional_requirements_object=self.additional_requirements_object, used_assets=used_assets) - - # generate floor objects - self.floor_object_generator.use_milp = use_milp - scene["floor_objects"] = self.floor_object_generator.generate_objects(scene, use_constraint=use_constraint) - - # generate wall objects - scene["wall_objects"] = self.wall_object_generator.generate_wall_objects(scene, use_constraint=use_constraint) - - # combine floor and wall objects - scene["objects"] = scene["floor_objects"] + scene["wall_objects"] - - # generate small objects - scene = self.generate_small_objects(scene, used_assets=used_assets) - scene["objects"] += scene["small_objects"] - - # generate ceiling objects - if add_ceiling: - scene = self.generate_ceiling_objects(scene, additional_requirements_ceiling=self.additional_requirements_ceiling) - scene["objects"] += scene["ceiling_objects"] - - # generate lights - lights = generate_lights(scene) - scene["proceduralParameters"]["lights"] = lights - - # assign layers - scene = map_asset2layer(scene) - - # assign skybox - scene = getSkybox(scene) - - # change ceiling material - scene = self.change_ceiling_material(scene) - - # create folder - query_name = query.replace(" ", "_").replace("'", "")[:30] - create_time = str(datetime.datetime.now()).replace(" ", "-").replace(":", "-").replace(".", "-") - - if add_time: folder_name = f"{query_name}-{create_time}" # query name + time - else: folder_name = query_name # query name only - - os.makedirs(f"{save_dir}/{folder_name}", exist_ok=True) - with open(f"{save_dir}/{folder_name}/{query_name}.json", "w") as f: - json.dump(scene, f, indent=4) - - # save top down image - if generate_image: - top_image = get_top_down_frame(scene, self.objaverse_asset_dir, 1024, 1024) - top_image.show() - top_image.save(f"{save_dir}/{folder_name}/{query_name}.png") - - # save video - if generate_video: - scene["objects"] = scene["floor_objects"] + scene["wall_objects"] + scene["small_objects"] - final_video = room_video(scene, self.objaverse_asset_dir, 1024, 1024) - final_video.write_videofile(f"{save_dir}/{folder_name}/{query_name}.mp4", fps=30) - - return scene - - - def generate_variants(self, query, original_scene, save_dir="data/scenes", number_of_variants=5, used_assets=[]): - self.object_selector.reuse_selection = False # force the selector to retrieve different assets - - # create the list of used assets - used_assets += [obj["assetId"] for obj in original_scene["objects"] + original_scene["windows"] + original_scene["doors"]] - used_assets += [room["floorMaterial"]["name"] for room in original_scene["rooms"]] - used_assets += [wall["material"]["name"] for wall in original_scene["walls"]] - used_assets = list(set(used_assets)) - - variant_scenes = [] - for i in tqdm(range(number_of_variants)): - variant_scene = self.generate_scene(original_scene.copy(), query, save_dir, used_assets, generate_image=True, generate_video=False, add_time=True) - variant_scenes.append(variant_scene) - used_assets += [obj["assetId"] for obj in variant_scene["objects"] + variant_scene["windows"] + variant_scene["doors"]] - used_assets += [room["floorMaterial"]["name"] for room in variant_scene["rooms"]] - used_assets += [wall["material"]["name"] for wall in variant_scene["walls"]] - used_assets = list(set(used_assets)) - return variant_scenes - - - def ablate_placement(self, scene, query, save_dir, used_assets=[], add_ceiling=False, generate_image=True, generate_video=False, add_time=True, use_constraint=False, constraint_type="llm"): - # place floor objects - if use_constraint: self.floor_object_generator.constraint_type = constraint_type # ablate the constraint types - scene["floor_objects"] = self.floor_object_generator.generate_objects(scene, use_constraint=use_constraint) - if len(scene["floor_objects"]) == 0: - print("No object is placed, skip this scene") - return None # if no object is placed, return None - # place wall objects - if use_constraint: self.wall_object_generator.constraint_type = constraint_type - scene["wall_objects"] = self.wall_object_generator.generate_wall_objects(scene, use_constraint=use_constraint) - - # combine floor and wall objects - scene["objects"] = scene["floor_objects"] + scene["wall_objects"] - - # generate small objects - scene = self.generate_small_objects(scene, used_assets=used_assets) - scene["objects"] += scene["small_objects"] - - # assign layers - scene = map_asset2layer(scene) - - # take the first 30 characters of the query as the folder name - query_name = query.replace(" ", "_").replace("'", "")[:30] - create_time = str(datetime.datetime.now()).replace(" ", "-").replace(":", "-").replace(".", "-") - - if add_time: folder_name = f"{query_name}-{create_time}" # query name + time - else: folder_name = query_name # query name only - - os.makedirs(f"{save_dir}/{folder_name}", exist_ok=True) - with open(f"{save_dir}/{folder_name}/{query_name}.json", "w") as f: - json.dump(scene, f, indent=4) - - # save top down image - if generate_image: - top_image = get_top_down_frame(scene, self.objaverse_asset_dir, 1024, 1024) - top_image.show() - top_image.save(f"{save_dir}/{folder_name}/{query_name}.png") - - return scene \ No newline at end of file diff --git a/modules/objaverse_retriever.py b/modules/objaverse_retriever.py deleted file mode 100644 index 286a262..0000000 --- a/modules/objaverse_retriever.py +++ /dev/null @@ -1,74 +0,0 @@ -import json -import torch -import pickle - -class ObjaverseRetriever(): - def __init__(self, clip_model, clip_preprocess, clip_tokenizer, sbert_model, version, retrieval_threshold): - self.database = json.load(open(f"./data/objaverse_holodeck/{version}/objaverse_holodeck_database.json", "r")) - self.asset_ids = list(self.database.keys()) - - self.clip_model = clip_model - self.clip_preprocess = clip_preprocess - self.clip_tokenizer = clip_tokenizer - self.sbert_model = sbert_model - - self.clip_features = pickle.load(open(f"data/objaverse_holodeck/{version}/objaverse_holodeck_features_clip_3.p", "rb")).float() # clip features - self.sbert_features = pickle.load(open(f"data/objaverse_holodeck/{version}/objaverse_holodeck_description_features_sbert.p", "rb")).float() # sbert features - self.retrieval_threshold = retrieval_threshold - - self.use_text = True - - - def retrieve(self, queries, threshold=28): - with torch.no_grad(): - query_feature_clip = self.clip_model.encode_text(self.clip_tokenizer(queries)) - query_feature_clip /= query_feature_clip.norm(dim=-1, keepdim=True) - - clip_similarities = query_feature_clip @ self.clip_features.T * 100 - clip_similarities = clip_similarities.reshape((len(queries), len(self.asset_ids), 3)) - clip_similarities = torch.max(clip_similarities, dim=2).values - - query_feature_sbert = self.sbert_model.encode(queries, convert_to_tensor=True, show_progress_bar=False) - sbert_similarities = query_feature_sbert @ self.sbert_features.T - - if self.use_text: similarities = clip_similarities + sbert_similarities - else: similarities = clip_similarities - - threshold_indices = torch.where(clip_similarities > threshold) - - unsorted_results = [] - for query_index, asset_index in zip(*threshold_indices): - score = similarities[query_index, asset_index].item() - unsorted_results.append((self.asset_ids[asset_index], score)) - - # Sorting the results in descending order by score - results = sorted(unsorted_results, key=lambda x: x[1], reverse=True) - - return results - - - def compute_size_difference(self, target_size, candidates): - candidate_sizes = [] - for uid, _ in candidates: - size = self.database[uid]['assetMetadata']['boundingBox'] - size_list = [size['x'] * 100, size['y'] * 100, size['z'] * 100] - size_list.sort() - candidate_sizes.append(size_list) - - candidate_sizes = torch.tensor(candidate_sizes) - - target_size_list = list(target_size) - target_size_list.sort() - target_size = torch.tensor(target_size_list) - - size_difference = abs(candidate_sizes - target_size).mean(axis=1)/100 - size_difference = size_difference.tolist() - - candidates_with_size_difference = [] - for i, (uid, score) in enumerate(candidates): - candidates_with_size_difference.append((uid, score - size_difference[i] * 10)) - - # sort the candidates by score - candidates_with_size_difference = sorted(candidates_with_size_difference, key=lambda x: x[1], reverse=True) - - return candidates_with_size_difference \ No newline at end of file diff --git a/modules/object_selector.py b/modules/object_selector.py deleted file mode 100644 index c3f3f25..0000000 --- a/modules/object_selector.py +++ /dev/null @@ -1,625 +0,0 @@ -import re -import copy -import json -import torch -import random -import multiprocessing -from typing import Dict -from colorama import Fore -from shapely import Polygon -import torch.nn.functional as F -import modules.prompts as prompts -from langchain import PromptTemplate -from modules.floor_objects import DFS_Solver_Floor -from modules.wall_objects import DFS_Solver_Wall - - -class ObjectSelector: - def __init__(self, object_retriever, llm): - # object retriever - self.object_retriever = object_retriever - self.database = object_retriever.database - - # language model and prompt templates - self.llm = llm - self.object_selection_template_1 = prompts.object_selection_prompt_new_1 - self.object_selection_template_2 = PromptTemplate(input_variables=["object_selection_prompt_new_1", "object_selection_1", "room"], template=prompts.object_selection_prompt_new_2) - - # hyperparameters - self.floor_capacity_ratio = 0.4 - self.wall_capacity_ratio = 0.5 - self.object_size_tolerance = 0.8 - self.similarity_threshold_floor = 31 # need to be tuned - self.similarity_threshold_wall = 31 # need to be tuned - self.thin_threshold = 3 - self.used_assets = [] - self.consider_size = True - self.size_buffer = 10 - - self.random_selection = False - self.reuse_selection = False - self.multiprocessing = True - - - def select_objects(self, scene, additional_requirements="N/A"): - rooms_types = [room["roomType"] for room in scene["rooms"]] - room2area = {room["roomType"]: self.get_room_area(room) for room in scene["rooms"]} - room2size = {room["roomType"]: self.get_room_size(room, scene["wall_height"]) for room in scene["rooms"]} - room2perimeter = {room["roomType"]: self.get_room_perimeter(room) for room in scene["rooms"]} - room2vertices = {room["roomType"]: [(x * 100, y * 100) for (x, y) in room["vertices"]] for room in scene["rooms"]} - - room2floor_capacity = {room_type: [room_area * self.floor_capacity_ratio, 0] for room_type, room_area in room2area.items()} - room2floor_capacity = self.update_floor_capacity(room2floor_capacity, scene) - room2wall_capacity = {room_type: [room_perimeter * self.wall_capacity_ratio, 0] for room_type, room_perimeter in room2perimeter.items()} - selected_objects = {room["roomType"]: {"floor": [], "wall": []} for room in scene["rooms"]} - - if "object_selection_plan" in scene: - object_selection_plan = scene["object_selection_plan"] - if self.reuse_selection: - selected_objects = scene["selected_objects"] - else: - for room_type in rooms_types: - floor_objects, _, wall_objects, _ = self.get_objects_by_room(object_selection_plan[room_type], scene, room2size[room_type], room2floor_capacity[room_type], room2wall_capacity[room_type], room2vertices[room_type]) - selected_objects[room_type]["floor"] = floor_objects - selected_objects[room_type]["wall"] = wall_objects - else: - object_selection_plan = {room["roomType"]: [] for room in scene["rooms"]} - packed_args = [(room_type, scene, additional_requirements, room2size, room2floor_capacity, room2wall_capacity, room2vertices) for room_type in rooms_types] - - if self.multiprocessing: - pool = multiprocessing.Pool(processes=4) - results = pool.map(self.plan_room, packed_args) - pool.close() - pool.join() - else: - results = [self.plan_room(args) for args in packed_args] - - for room_type, result in results: - selected_objects[room_type]["floor"] = result["floor"] - selected_objects[room_type]["wall"] = result["wall"] - object_selection_plan[room_type] = result["plan"] - - print(f"\n{Fore.GREEN}AI: Here is the object selection plan:\n{object_selection_plan}{Fore.RESET}") - return object_selection_plan, selected_objects - - - def plan_room(self, args): - room_type, scene, additional_requirements, room2size, room2floor_capacity, room2wall_capacity, room2vertices = args - print(f"\n{Fore.GREEN}AI: Selecting objects for {room_type}...{Fore.RESET}\n") - - result = {} - room_size_str = f"{int(room2size[room_type][0])*100}cm in length, {int(room2size[room_type][1])*100}cm in width, {int(room2size[room_type][2])*100}cm in height" - - prompt_1 = self.object_selection_template_1.replace("INPUT", scene["query"]).replace("ROOM_TYPE", room_type).replace("ROOM_SIZE", room_size_str).replace("REQUIREMENTS", additional_requirements) - # print(f"\nUser: {prompt_1}\n") - - output_1 = self.llm(prompt_1).lower() - plan_1 = self.extract_json(output_1) - - if plan_1 is None: - print(f"Error while extracting the JSON for {room_type}.") - return result - - floor_objects, floor_capacity, wall_objects, wall_capacity = self.get_objects_by_room(plan_1, scene, room2size[room_type], room2floor_capacity[room_type], room2wall_capacity[room_type], room2vertices[room_type]) - - if floor_capacity[1] / floor_capacity[0] >= 0.8: - result["floor"] = floor_objects - result["wall"] = wall_objects - result["plan"] = plan_1 - else: - print(f"{Fore.RED}AI: The floor capacity of {room_type} is {floor_capacity[1]}m^2, which is less than 70% of the total floor capacity {floor_capacity[0]}m^2.{Fore.RESET}") - prompt_2 = self.object_selection_template_2.format(object_selection_prompt_new_1=prompt_1, object_selection_1=output_1, room=room_type) - output_2 = self.llm(prompt_2).lower() - plan_2 = self.extract_json(output_2) - - new_plan = copy.deepcopy(plan_1) - for object in plan_2: new_plan[object] = plan_2[object] - - floor_objects, _, wall_objects, _ = self.get_objects_by_room(new_plan, scene, room2size[room_type], room2floor_capacity[room_type], room2wall_capacity[room_type], room2vertices[room_type]) - result["floor"] = floor_objects - result["wall"] = wall_objects - result["plan"] = new_plan - - return room_type, result - - - def extract_json(self, input_string): - # Using regex to identify the JSON structure in the string - json_match = re.search(r'{.*}', input_string, re.DOTALL) - if json_match: - extracted_json = json_match.group(0) - try: - # Convert the extracted JSON string into a Python dictionary - json_dict = json.loads(extracted_json) - json_dict = self.check_dict(json_dict) - return json_dict - except json.JSONDecodeError: - print(input_string) - print("Error while decoding the JSON.") - return None - else: - print("No valid JSON found.") - return None - - - def check_dict(self, dict): - valid = True - attributes = ["description", "location", "size", "quantity", "variance_type", "objects_on_top"] - for key, value in dict.items(): - if not isinstance(key, str): valid = False; break - - if not isinstance(value, Dict): valid = False; break - - for attribute in attributes: - if attribute not in value: valid = False; break - - if not isinstance(value["description"], str): valid = False; break - - if value["location"] not in ["floor", "wall"]: dict[key]["location"] = "floor" - - if not isinstance(value["size"], list) or len(value["size"]) != 3 or not all(isinstance(i, int) for i in value["size"]): dict[key]["size"] = None - - if not isinstance(value["quantity"], int): dict[key]["quantity"] = 1 - - if not isinstance(value["variance_type"], str) or value["variance_type"] not in ["same", "varied"]: dict[key]["variance_type"] = "same" - - if not isinstance(value["objects_on_top"], list): dict[key]["objects_on_top"] = [] - - for i, child in enumerate(value["objects_on_top"]): - if not isinstance(child, Dict): valid = False; break - - for attribute in ["object_name", "quantity", "variance_type"]: - if attribute not in child: valid = False; break - - if not isinstance(child["object_name"], str): valid = False; break - - if not isinstance(child["quantity"], int): dict[key]["objects_on_top"][i]["quantity"] = 1 - - if not isinstance(child["variance_type"], str) or child["variance_type"] not in ["same", "varied"]: dict[key]["objects_on_top"][i]["variance_type"] = "same" - - if not valid: return None - else: return dict - - - def get_objects_by_room(self, parsed_plan, scene, room_size, floor_capacity, wall_capacity, vertices): - # get the floor and wall objects - floor_object_list = [] - wall_object_list = [] - for object_name, object_info in parsed_plan.items(): - object_info["object_name"] = object_name - if object_info["location"] == "floor": floor_object_list.append(object_info) - else: wall_object_list.append(object_info) - - floor_objects, floor_capacity = self.get_floor_objects(floor_object_list, floor_capacity, room_size, vertices, scene) - wall_objects, wall_capacity = self.get_wall_objects(wall_object_list, wall_capacity, room_size, vertices, scene) - - return floor_objects, floor_capacity, wall_objects, wall_capacity - - - def get_room_size(self, room, wall_height): - floor_polygon = room["floorPolygon"] - x_values = [point['x'] for point in floor_polygon] - z_values = [point['z'] for point in floor_polygon] - x_dim = max(x_values) - min(x_values) - z_dim = max(z_values) - min(z_values) - - if x_dim > z_dim: return (x_dim, wall_height, z_dim) - else: return (z_dim, wall_height, x_dim) - - - def get_room_area(self, room): - room_vertices = room["vertices"] - room_polygon = Polygon(room_vertices) - return room_polygon.area - - - def get_room_perimeter(self, room): - room_vertices = room["vertices"] - room_polygon = Polygon(room_vertices) - return room_polygon.length - - - def get_floor_objects(self, floor_object_list, floor_capacity, room_size, room_vertices, scene): - selected_floor_objects_all = [] - for floor_object in floor_object_list: - object_type = floor_object["object_name"] - object_description = floor_object["description"] - object_size = floor_object["size"] - quantity = min(floor_object["quantity"], 10) - variance_type = floor_object["variance_type"] - - candidates = self.object_retriever.retrieve([f"a 3D model of {object_type}, {object_description}"], self.similarity_threshold_floor) - - # check on floor objects - candidates = [candidate for candidate in candidates if self.database[candidate[0]]["annotations"]["onFloor"] == True] # only select objects on the floor - candidates = [candidate for candidate in candidates if self.database[candidate[0]]["annotations"]["onCeiling"] == False] # only select objects not on the ceiling - - # ignore doors and windows and frames - candidates = [candidate for candidate in candidates if "door" not in self.database[candidate[0]]["annotations"]["category"].lower()] - candidates = [candidate for candidate in candidates if "window" not in self.database[candidate[0]]["annotations"]["category"].lower()] - candidates = [candidate for candidate in candidates if "frame" not in self.database[candidate[0]]["annotations"]["category"].lower()] - - # check if the object is too big - candidates = self.check_object_size(candidates, room_size) - - # check if object can be placed on the floor - candidates = self.check_floor_placement(candidates[:20], room_vertices, scene) - - # No candidates found - if len(candidates) == 0: print("No candidates found for {} {}".format(object_type, object_description)); continue - - # remove used assets - top_one_candidate = candidates[0] - if len(candidates) > 1: candidates = [candidate for candidate in candidates if candidate[0] not in self.used_assets] - if len(candidates) == 0: candidates = [top_one_candidate] - - # consider object size difference - if object_size is not None and self.consider_size: - candidates = self.object_retriever.compute_size_difference(object_size, candidates) - - candidates = candidates[:10] # only select top 10 candidates - - selected_asset_ids = [] - if variance_type == "same": - selected_candidate = self.random_select(candidates) - selected_asset_id = selected_candidate[0] - selected_asset_ids = [selected_asset_id] * quantity - - elif variance_type == "varied": - for i in range(quantity): - selected_candidate = self.random_select(candidates) - selected_asset_id = selected_candidate[0] - selected_asset_ids.append(selected_asset_id) - if len(candidates) > 1: candidates.remove(selected_candidate) - - for i in range(quantity): - selected_asset_id = selected_asset_ids[i] - object_name = f"{object_type}-{i}" - selected_floor_objects_all.append((object_name, selected_asset_id)) - - # reselect objects if they exceed floor capacity, consider the diversity of objects - selected_floor_objects = [] - while True: - if len(selected_floor_objects_all) == 0: break - current_selected_asset_ids = [] - current_number_of_objects = len(selected_floor_objects) - for object_name, selected_asset_id in selected_floor_objects_all: - if selected_asset_id not in current_selected_asset_ids: - selected_asset_size = self.database[selected_asset_id]["assetMetadata"]["boundingBox"] - selected_asset_capacity = selected_asset_size["x"] * selected_asset_size["z"] - if floor_capacity[1] + selected_asset_capacity > floor_capacity[0] and len(selected_floor_objects) > 0: - print(f"{object_type} {object_description} exceeds floor capacity") - else: - current_selected_asset_ids.append(selected_asset_id) - selected_floor_objects.append((object_name, selected_asset_id)) - selected_floor_objects_all.remove((object_name, selected_asset_id)) - floor_capacity = (floor_capacity[0], floor_capacity[1] + selected_asset_capacity) - if len(selected_floor_objects) == current_number_of_objects: print("No more objects can be added"); break - - # sort objects by object type - object_type2objects = {} - for object_name, selected_asset_id in selected_floor_objects: - object_type = object_name.split("-")[0] - if object_type not in object_type2objects: object_type2objects[object_type] = [] - object_type2objects[object_type].append((object_name, selected_asset_id)) - - selected_floor_objects_ordered = [] - for object_type in object_type2objects: - selected_floor_objects_ordered += sorted(object_type2objects[object_type]) - - return selected_floor_objects_ordered, floor_capacity - - - def get_wall_objects(self, wall_object_list, wall_capacity, room_size, room_vertices, scene): - selected_wall_objects_all = [] - for wall_object in wall_object_list: - object_type = wall_object["object_name"] - object_description = wall_object["description"] - object_size = wall_object["size"] - quantity = min(wall_object["quantity"], 10) - variance_type = wall_object["variance_type"] - - candidates = self.object_retriever.retrieve([f"a 3D model of {object_type}, {object_description}"], self.similarity_threshold_wall) - - # check on wall objects - candidates = [candidate for candidate in candidates if self.database[candidate[0]]["annotations"]["onWall"] == True] # only select objects on the wall - - # ignore doors and windows - candidates = [candidate for candidate in candidates if "door" not in self.database[candidate[0]]["annotations"]["category"].lower()] - candidates = [candidate for candidate in candidates if "window" not in self.database[candidate[0]]["annotations"]["category"].lower()] - - # check if the object is too big - candidates = self.check_object_size(candidates, room_size) - - # check thin objects - candidates = self.check_thin_object(candidates) - - # check if object can be placed on the wall - candidates = self.check_wall_placement(candidates[:20], room_vertices, scene) - - if len(candidates) == 0: print("No candidates found for {} {}".format(object_type, object_description)); continue - - # remove used assets - top_one_candidate = candidates[0] - if len(candidates) > 1: candidates = [candidate for candidate in candidates if candidate[0] not in self.used_assets] - if len(candidates) == 0: candidates = [top_one_candidate] - - # consider object size difference - if object_size is not None and self.consider_size: - candidates = self.object_retriever.compute_size_difference(object_size, candidates) - - candidates = candidates[:10] # only select top 10 candidates - - selected_asset_ids = [] - if variance_type == "same": - selected_candidate = self.random_select(candidates) - selected_asset_id = selected_candidate[0] - selected_asset_ids = [selected_asset_id] * quantity - - elif variance_type == "varied": - for i in range(quantity): - selected_candidate = self.random_select(candidates) - selected_asset_id = selected_candidate[0] - selected_asset_ids.append(selected_asset_id) - if len(candidates) > 1: candidates.remove(selected_candidate) - - for i in range(quantity): - selected_asset_id = selected_asset_ids[i] - object_name = f"{object_type}-{i}" - selected_wall_objects_all.append((object_name, selected_asset_id)) - - # reselect objects if they exceed wall capacity, consider the diversity of objects - selected_wall_objects = [] - while True: - if len(selected_wall_objects_all) == 0: break - current_selected_asset_ids = [] - current_number_of_objects = len(selected_wall_objects) - for object_name, selected_asset_id in selected_wall_objects_all: - if selected_asset_id not in current_selected_asset_ids: - selected_asset_size = self.database[selected_asset_id]["assetMetadata"]["boundingBox"] - selected_asset_capacity = selected_asset_size["x"] - if wall_capacity[1] + selected_asset_capacity > wall_capacity[0] and len(selected_wall_objects) > 0: - print(f"{object_type} {object_description} exceeds wall capacity") - else: - current_selected_asset_ids.append(selected_asset_id) - selected_wall_objects.append((object_name, selected_asset_id)) - selected_wall_objects_all.remove((object_name, selected_asset_id)) - wall_capacity = (wall_capacity[0], wall_capacity[1] + selected_asset_capacity) - if len(selected_wall_objects) == current_number_of_objects: print("No more objects can be added"); break - - # sort objects by object type - object_type2objects = {} - for object_name, selected_asset_id in selected_wall_objects: - object_type = object_name.split("-")[0] - if object_type not in object_type2objects: object_type2objects[object_type] = [] - object_type2objects[object_type].append((object_name, selected_asset_id)) - - selected_wall_objects_ordered = [] - for object_type in object_type2objects: - selected_wall_objects_ordered += sorted(object_type2objects[object_type]) - - return selected_wall_objects_ordered, wall_capacity - - - def check_object_size(self, candidates, room_size): - valid_candidates = [] - for candidate in candidates: - dimension = self.database[candidate[0]]["assetMetadata"]["boundingBox"] - size = [dimension["x"], dimension["y"], dimension["z"]] - if size[2] > size[0]: size = [size[2], size[1], size[0]] # make sure that x > z - - if size[0] > room_size[0] * self.object_size_tolerance: continue - if size[1] > room_size[1] * self.object_size_tolerance: continue - if size[2] > room_size[2] * self.object_size_tolerance: continue - if size[0] * size[2] > room_size[0] * room_size[2] * 0.5: continue # TODO: consider using the floor area instead of the room area - - valid_candidates.append(candidate) - - return valid_candidates - - - def check_thin_object(self, candidates): - valid_candidates = [] - for candidate in candidates: - dimension = self.database[candidate[0]]["assetMetadata"]["boundingBox"] - size = [dimension["x"], dimension["y"], dimension["z"]] - if size[2] > min(size[0], size[1]) * self.thin_threshold: continue - valid_candidates.append(candidate) - return valid_candidates - - - def random_select(self, candidates): - if self.random_selection: - selected_candidate = random.choice(candidates) - else: - scores = [candidate[1] for candidate in candidates] - scores_tensor = torch.Tensor(scores) - probas = F.softmax(scores_tensor, dim=0) # TODO: consider using normalized scores - selected_index = torch.multinomial(probas, 1).item() - selected_candidate = candidates[selected_index] - return selected_candidate - - - def update_floor_capacity(self, room2floor_capacity, scene): - for room in scene["rooms"]: - room_vertices = room["vertices"] - room_poly = Polygon(room_vertices) - for door in scene["doors"]: - for door_vertices in door["doorBoxes"]: - door_poly = Polygon(door_vertices) - door_center = door_poly.centroid - door_area = door_poly.area - if room_poly.contains(door_center): - room2floor_capacity[room["id"]][1] += door_area * 0.6 - - if scene["open_walls"] != []: - for open_wall_vertices in scene["open_walls"]["openWallBoxes"]: - open_wall_poly = Polygon(open_wall_vertices) - open_wall_center = open_wall_poly.centroid - if room_poly.contains(open_wall_center): - room2floor_capacity[room["id"]][1] += open_wall_poly.area * 0.6 - - return room2floor_capacity - - - def update_wall_capacity(self, room2wall_capacity, scene): - for room in scene["rooms"]: - room_vertices = room["vertices"] - room_poly = Polygon(room_vertices) - for window in scene["windows"]: - for window_vertices in window["windowBoxes"]: - window_poly = Polygon(window_vertices) - window_center = window_poly.centroid - window_x = window_poly.bounds[2] - window_poly.bounds[0] - window_y = window_poly.bounds[3] - window_poly.bounds[1] - window_width = max(window_x, window_y) - if room_poly.contains(window_center): - room2wall_capacity[room["id"]][1] += window_width * 0.6 - - if scene["open_walls"] != []: - for open_wall_vertices in scene["open_walls"]["openWallBoxes"]: - open_wall_poly = Polygon(open_wall_vertices) - open_wall_center = open_wall_poly.centroid - open_wall_x = open_wall_poly.bounds[2] - open_wall_poly.bounds[0] - open_wall_y = open_wall_poly.bounds[3] - open_wall_poly.bounds[1] - open_wall_width = max(open_wall_x, open_wall_y) - if room_poly.contains(open_wall_center): - room2wall_capacity[room["id"]][1] += open_wall_width * 0.6 - - return room2wall_capacity - - - def check_floor_placement(self, candidates, room_vertices, scene): - room_x = max([vertex[0] for vertex in room_vertices]) - min([vertex[0] for vertex in room_vertices]) - room_z = max([vertex[1] for vertex in room_vertices]) - min([vertex[1] for vertex in room_vertices]) - grid_size = int(max(room_x // 20, room_z // 20)) - - solver = DFS_Solver_Floor(grid_size=grid_size) - - room_poly = Polygon(room_vertices) - initial_state = self.get_initial_state_floor(room_vertices, scene, add_window=False) - - grid_points = solver.create_grids(room_poly) - grid_points = solver.remove_points(grid_points, initial_state) - - valid_candidates = [] - for candidate in candidates: - object_size = self.database[candidate[0]]["assetMetadata"]["boundingBox"] - object_dim = (object_size["x"]*100 + self.size_buffer, object_size["z"]*100 + self.size_buffer) - - solutions = solver.get_all_solutions(room_poly, grid_points, object_dim) - solutions = solver.filter_collision(initial_state, solutions) - solutions = solver.place_edge(room_poly, solutions, object_dim) - - if solutions != []: valid_candidates.append(candidate) - else: print(f"Floor Object {candidate[0]} (size: {object_dim}) cannot be placed in room"); continue - - return valid_candidates - - - def check_wall_placement(self, candidates, room_vertices, scene): - room_x = max([vertex[0] for vertex in room_vertices]) - min([vertex[0] for vertex in room_vertices]) - room_z = max([vertex[1] for vertex in room_vertices]) - min([vertex[1] for vertex in room_vertices]) - grid_size = int(max(room_x // 20, room_z // 20)) - - solver = DFS_Solver_Wall(grid_size=grid_size) - - room_poly = Polygon(room_vertices) - initial_state = self.get_initial_state_wall(room_vertices, scene) - grid_points = solver.create_grids(room_poly) - - valid_candidates = [] - for candidate in candidates: - object_size = self.database[candidate[0]]["assetMetadata"]["boundingBox"] - object_dim = (object_size["x"]*100, object_size["y"]*100, object_size["z"]*100) - - solutions = solver.get_all_solutions(room_poly, grid_points, object_dim, height=0) - solutions = solver.filter_collision(initial_state, solutions) - - if solutions != []: valid_candidates.append(candidate) - else: print(f"Wall Object {candidate[0]} (size: {object_dim}) cannot be placed in room"); continue - - return valid_candidates - - - def get_initial_state_floor(self, room_vertices, scene, add_window=True): - doors, windows, open_walls = scene["doors"], scene["windows"], scene["open_walls"] - room_poly = Polygon(room_vertices) - - initial_state = {} - i = 0 - for door in doors: - door_boxes = door["doorBoxes"] - for door_box in door_boxes: - door_vertices = [(x * 100, z * 100) for (x, z) in door_box] - door_poly = Polygon(door_vertices) - door_center = door_poly.centroid - if room_poly.contains(door_center): - initial_state[f"door-{i}"] = ((door_center.x, door_center.y), 0, door_vertices, 1) - i += 1 - - if add_window: - for window in windows: - window_boxes = window["windowBoxes"] - for window_box in window_boxes: - window_vertices = [(x * 100, z * 100) for (x, z) in window_box] - window_poly = Polygon(window_vertices) - window_center = window_poly.centroid - if room_poly.contains(window_center): - initial_state[f"window-{i}"] = ((window_center.x, window_center.y), 0, window_vertices, 1) - i += 1 - - if open_walls != []: - for open_wall_box in open_walls["openWallBoxes"]: - open_wall_vertices = [(x * 100, z * 100) for (x, z) in open_wall_box] - open_wall_poly = Polygon(open_wall_vertices) - open_wall_center = open_wall_poly.centroid - if room_poly.contains(open_wall_center): - initial_state[f"open-{i}"] = ((open_wall_center.x, open_wall_center.y), 0, open_wall_vertices, 1) - i += 1 - - return initial_state - - - def get_initial_state_wall(self, room_vertices, scene): - doors, windows, open_walls = scene["doors"], scene["windows"], scene["open_walls"] - room_poly = Polygon(room_vertices) - initial_state = {} - i = 0 - for door in doors: - door_boxes = door["doorBoxes"] - for door_box in door_boxes: - door_vertices = [(x * 100, z * 100) for (x, z) in door_box] - door_poly = Polygon(door_vertices) - door_center = door_poly.centroid - if room_poly.contains(door_center): - door_height = door["assetPosition"]["y"] * 100 * 2 - x_min, z_min, x_max, z_max = door_poly.bounds - initial_state[f"door-{i}"] = ((x_min, 0, z_min), (x_max, door_height, z_max), 0, door_vertices, 1) - i += 1 - - for window in windows: - window_boxes = window["windowBoxes"] - for window_box in window_boxes: - window_vertices = [(x * 100, z * 100) for (x, z) in window_box] - window_poly = Polygon(window_vertices) - window_center = window_poly.centroid - if room_poly.contains(window_center): - y_min = window["holePolygon"][0]["y"] * 100 - y_max = window["holePolygon"][1]["y"] * 100 - x_min, z_min, x_max, z_max = window_poly.bounds - initial_state[f"window-{i}"] = ((x_min, y_min, z_min), (x_max, y_max, z_max), 0, window_vertices, 1) - i += 1 - - if len(open_walls) != 0: - open_wall_boxes = open_walls["openWallBoxes"] - for open_wall_box in open_wall_boxes: - open_wall_vertices = [(x * 100, z * 100) for (x, z) in open_wall_box] - open_wall_poly = Polygon(open_wall_vertices) - open_wall_center = open_wall_poly.centroid - if room_poly.contains(open_wall_center): - x_min, z_min, x_max, z_max = open_wall_poly.bounds - initial_state[f"open-{i}"] = ((x_min, 0, z_min), (x_max, scene["wall_height"] * 100, z_max), 0, open_wall_vertices, 1) - i += 1 - - return initial_state \ No newline at end of file diff --git a/modules/windows.py b/modules/windows.py deleted file mode 100644 index 7c346b9..0000000 --- a/modules/windows.py +++ /dev/null @@ -1,297 +0,0 @@ -import re -import ast -import copy -import json -import random -import numpy as np -from colorama import Fore -import modules.prompts as prompts -from langchain import PromptTemplate - -class WindowGenerator(): - def __init__(self, llm): - self.json_template = {"assetId": None, "id": None, "room0": None, "room1": None, - "wall0": None, "wall1": None, "holePolygon": [], - "assetPosition": {}, "roomId": None} - - self.window_data = json.load(open("data/windows/window-database.json", "r")) - self.window_ids = list(self.window_data.keys()) - self.hole_offset = 0.05 # make the hole smaller than windows - self.llm = llm - self.window_template = PromptTemplate(input_variables=["input", "walls", "wall_height", "additional_requirements"], - template=prompts.window_prompt) - self.used_assets = [] - - - def generate_windows(self, scene, additional_requirements_window): - # get organized walls - organized_walls, available_wall_str = self.get_wall_for_windows(scene) - window_prompt = self.window_template.format(input=scene["query"], - walls=available_wall_str, - wall_height=int(scene["wall_height"] * 100), - additional_requirements=additional_requirements_window) - - if "raw_window_plan" not in scene: raw_window_plan = self.llm(window_prompt) - else: raw_window_plan = scene["raw_window_plan"] - - print(f"\nUser: {window_prompt}\n") - print(f"{Fore.GREEN}AI: Here is the window plan:\n{raw_window_plan}{Fore.RESET}") - - walls = scene["walls"] - windows = [] - window_ids = [] - rows = [row.lower() for row in raw_window_plan.split("\n") if "|" in row] - room_with_windows = [] - for row in rows: - # parse window plan - parsed_plan = self.parse_raw_plan(row) - if parsed_plan is None: continue - - # get room id - room_id = parsed_plan["room_id"] - - # only one wall with windows per room - if room_id not in room_with_windows: room_with_windows.append(room_id) - else: print(f"Warning: room {room_id} already has windows"); continue - - # get wall id - try: wall_id = organized_walls[room_id][parsed_plan["wall_direction"]]["wall_id"] - except: print("Warning: no available wall for {}".format(row)); continue - - for wall in walls: - if wall["id"] == wall_id: - wall_info = wall - - # select window - window_id = self.select_window(parsed_plan["window_type"], parsed_plan["window_size"]) - window_polygons, window_positions, window_segments, window_boxes, new_wall_ids, updated_walls = self.get_window_polygon(window_id, parsed_plan["window_height"], parsed_plan["quantity"], wall_info, walls) - walls = updated_walls # update walls - - if window_polygons == []: print("Warning: no windows generated for {}".format(row)); continue - - # generate window json - for i in range(len(window_polygons)): - current_wall_id = new_wall_ids[i] - current_window = copy.deepcopy(self.json_template) - current_window["wall0"] = current_wall_id - current_window["wall1"] = current_wall_id + "|exterior" - current_window["room0"] = room_id - current_window["room1"] = room_id - current_window["roomId"] = room_id - current_window["assetId"] = window_id - current_window["id"] = f"window|{current_wall_id}|{i}" - current_window["holePolygon"] = window_polygons[i] - current_window["assetPosition"] = window_positions[i] - current_window["windowSegment"] = window_segments[i] - current_window["windowBoxes"] = window_boxes[i] - - # sometimes the same window is generated twice and causes errors - if current_window["id"] not in window_ids: - window_ids.append(current_window["id"]) - windows.append(current_window) - else: - print("Warning: duplicated window id: {}".format(current_window["id"])) - - return raw_window_plan, walls, windows - - - def parse_raw_plan(self, plan): - try: - pattern = re.compile(r'^(\d+[\.\)]\s*|- )') - plan = pattern.sub('', plan) - if plan[-1] == ".": plan = plan[:-1] # remove the last period - room_id, wall_direction, window_type, window_size, quantity, window_height = plan.split("|") - return {"room_id": room_id.strip(), - "wall_direction": wall_direction.strip().lower(), - "window_type": window_type.strip().lower(), - "window_size": ast.literal_eval(window_size.strip()), - "quantity": int(quantity.strip()), - "window_height": float(window_height.strip())} - except: - print("Error: could not parse window plan: {}".format(plan)) - return None - - - def get_room(self, rooms, room_type): - for room in rooms: - if room_type == room["roomType"]: return room - - - def get_wall_for_windows(self, scene): - walls_with_door = [] - for door in scene["doors"]: - walls_with_door.append(door["wall0"]) - walls_with_door.append(door["wall1"]) - - available_walls = [] - - for wall in scene["walls"]: - if "connect_exterior" in wall and wall["id"] not in walls_with_door: - available_walls.append(wall) - - organized_walls = {} - for wall in available_walls: - room_id = wall['roomId'] - wall_direction = wall['direction'] - - wall_width = wall['width'] - if wall_width < 2.0: continue - - if room_id not in organized_walls: organized_walls[room_id] = {} - - if wall_direction not in organized_walls[room_id]: - organized_walls[room_id][wall_direction] = {"wall_id": wall["id"], "wall_width": wall_width} - else: - if wall_width > organized_walls[room_id][wall_direction]["wall_width"]: - organized_walls[room_id][wall_direction] = {"wall_id": wall["id"], "wall_width": wall_width} - - available_wall_str = "" - for room_id in organized_walls: - current_str = "{}: ".format(room_id) - for wall_direction in organized_walls[room_id]: - current_str += "{}, {} cm; ".format(wall_direction, int(organized_walls[room_id][wall_direction]["wall_width"] * 100)) - available_wall_str += current_str + "\n" - - return organized_walls, available_wall_str - - - def select_window(self, window_type, window_size): - candidate_window_ids = [window_id for window_id in self.window_ids if self.window_data[window_id]["type"] == window_type] - size_differences = [np.linalg.norm(np.array(window_size) - np.array(self.window_data[window_id]["size"])) for window_id in candidate_window_ids] - sorted_window_ids = [x for _, x in sorted(zip(size_differences, candidate_window_ids))] - - top_window_ids = sorted_window_ids[0] - sorted_window_ids = [window_id for window_id in sorted_window_ids if window_id not in self.used_assets] - - if len(sorted_window_ids) == 0: selected_window_id = top_window_ids - else: selected_window_id = sorted_window_ids[0] - - return selected_window_id - - - def get_window_polygon(self, window_id, window_height, quantity, wall_info, walls): - window_x = self.window_data[window_id]["boundingBox"]["x"] - self.hole_offset - window_y = self.window_data[window_id]["boundingBox"]["y"] - self.hole_offset - - wall_width = wall_info["width"] - wall_height = wall_info["height"] - wall_segment = wall_info["segment"] - - window_height = min(window_height / 100.0, wall_height - window_y) - - quantity = min(quantity, int(wall_width / window_x)) - - wall_start = np.array(wall_segment[0]) - wall_end = np.array(wall_segment[1]) - original_vector = wall_end - wall_start - original_length = np.linalg.norm(original_vector) - normalized_vector = original_vector / original_length - subwall_length = original_length / quantity - - if quantity == 0: - return [], [], [], [], [], walls - - elif quantity == 1: - window_start = random.uniform(0, wall_width - window_x) - window_end = window_start + window_x - polygon = [{"x": window_start, "y": window_height, "z": 0}, - {"x": window_end, "y": window_height + window_y, "z": 0}] - position = {"x": (polygon[0]["x"] + polygon[1]["x"]) / 2, - "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, - "z": (polygon[0]["z"] + polygon[1]["z"]) / 2} - window_segment = [list(wall_start + normalized_vector * window_start), list(wall_start + normalized_vector * window_end)] - window_boxes = self.create_rectangles(window_segment) - - return [polygon], [position], [window_segment], [window_boxes], [wall_info["id"]], walls - - else: - # split walls into subwalls - segments = [] - for i in range(quantity): - segment_start = wall_start + i * subwall_length * normalized_vector - segment_end = wall_start + (i+1) * subwall_length * normalized_vector - segments.append((segment_start, segment_end)) - - # update walls - updated_walls = [] - new_wall_ids = [] - for wall in walls: - if wall_info["id"] not in wall["id"]: - updated_walls.append(wall) - - for i in range(len(segments)): - # generate new subwall json - current_wall = copy.deepcopy(wall_info) - current_wall["id"] = f"{wall_info['id']}|{i}" - current_wall["segment"] = [segments[i][0].tolist(), segments[i][1].tolist()] - current_wall["width"] = subwall_length - current_wall["polygon"] = self.generate_wall_polygon(segments[i][0].tolist(), segments[i][1].tolist(), wall_height) - current_wall["connect_exterior"] = current_wall["id"] + "|exterior" - - # add exterior wall - current_wall_exterior = copy.deepcopy(current_wall) - current_wall_exterior["id"] = current_wall["id"] + "|exterior" - current_wall_exterior["material"] = {"name": "Walldrywall4Tiled"} - current_wall_exterior["polygon"] = current_wall["polygon"][::-1] - current_wall_exterior["segment"] = current_wall["segment"][::-1] - current_wall_exterior.pop("connect_exterior") - - updated_walls.append(current_wall) - updated_walls.append(current_wall_exterior) - new_wall_ids.append(current_wall["id"]) - - # generate window polygons - window_polygons = [] - window_positions = [] - window_segments = [] - window_boxes = [] - for i in range(len(segments)): - window_start = random.uniform(0, subwall_length - window_x) # TODO: consider the same start point for all windows - window_end = window_start + window_x - polygon = [{"x": window_start, "y": window_height, "z": 0}, - {"x": window_end, "y": window_height + window_y, "z": 0}] - position = {"x": (polygon[0]["x"] + polygon[1]["x"]) / 2, - "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, - "z": (polygon[0]["z"] + polygon[1]["z"]) / 2} - - window_segment = [list(segments[i][0] + normalized_vector * window_start), list(segments[i][0] + normalized_vector * window_end)] - window_box = self.create_rectangles(window_segment) - window_polygons.append(polygon) - window_positions.append(position) - window_segments.append(window_segment) - window_boxes.append(window_box) - - return window_polygons, window_positions, window_segments, window_boxes, new_wall_ids, updated_walls - - - def generate_wall_polygon(self, point, next_point, wall_height): - wall_polygon = [] - # add the base point - wall_polygon.append({'x': point[0], 'y': 0, 'z': point[1]}) - # add the top point (with the same x and z, but y = wall_height) - wall_polygon.append({'x': point[0], 'y': wall_height, 'z': point[1]}) - # add the top point of the next base point - wall_polygon.append({'x': next_point[0], 'y': wall_height, 'z': next_point[1]}) - # add the next base point - wall_polygon.append({'x': next_point[0], 'y': 0, 'z': next_point[1]}) - return wall_polygon - - - def create_rectangles(self, segment): - # Convert to numpy arrays for easier calculations - pt1 = np.array(segment[0]) - pt2 = np.array(segment[1]) - - # Calculate the vector for the segment - vec = pt2 - pt1 - - # Calculate a perpendicular vector with length 1 - perp_vec = np.array([-vec[1], vec[0]]) - perp_vec /= np.linalg.norm(perp_vec) - perp_vec *= 0.1 # 0.1 is the width of the window - - # Calculate the four points for each rectangle - top_rectangle = [list(pt1 + perp_vec), list(pt2 + perp_vec), list(pt2), list(pt1)] - bottom_rectangle = [list(pt1), list(pt2), list(pt2 - perp_vec), list(pt1 - perp_vec)] - - return top_rectangle, bottom_rectangle \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 9292698..40c8290 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,10 @@ langchain==0.0.171 torch==1.13.1 torchvision==0.14.1 gurobipy==10.0.3 +Werkzeug==2.0.1 +Flask==2.0.1 +compress-pickle +compress-json +black +objathor +editdistance diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..49fc3a2 --- /dev/null +++ b/setup.py @@ -0,0 +1,55 @@ +import os +from pathlib import Path + +from setuptools import setup, find_packages + +if __name__ == "__main__": + with Path(Path(__file__).parent, "README.md").open(encoding="utf-8") as file: + long_description = file.read() + + def _read_reqs(relpath): + fullpath = os.path.join(os.path.dirname(__file__), relpath) + with open(fullpath) as f: + return [ + s.strip() + for s in f.readlines() + if (s.strip() and not s.startswith("#")) + ] + + REQUIREMENTS = _read_reqs("requirements.txt") + + setup( + name="ai2holodeck", + packages=find_packages(), + include_package_data=True, + version="0.0.2", + license="Apache 2.0", + description='Holodeck: a framework for "Language Guided Generation of 3D Embodied AI Environments".', + long_description=long_description, + long_description_content_type="text/markdown", + author="Allen Institute for AI", + author_email="lucaw@allenai.org", + url="https://github.com/allenai/Holodeck", + data_files=[(".", ["README.md"])], + keywords=[ + "procedural generation", + "home environments", + "unity", + "3D assets", + "annotation", + "3D", + "ai2thor", + ], + install_requires=REQUIREMENTS, + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + ], + package_data={ + "objathor": ["generation/*/*.json", "generation/*.json"], + }, + )