initial commit

kakaobrain · Jan 15, 2024 · 621d72f · 621d72f
1 parent 2352f8d
commit 621d72f
Show file tree

Hide file tree

Showing 211 changed files with 156,575 additions and 0 deletions.
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -0,0 +1,15 @@
+name: Run black
+on: [pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install venv
+        run: |
+          sudo apt-get -y install python3.10-venv
+      - uses: psf/black@stable
+        with:
+          options: "--check --verbose -l88"
+          src: "./sgm ./scripts ./main.py"
diff --git a/.github/workflows/test-build.yaml b/.github/workflows/test-build.yaml
@@ -0,0 +1,27 @@
+name: Build package
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.10"]
+        requirements-file: ["pt2", "pt13"]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements/${{ matrix.requirements-file }}.txt
+          pip install .
diff --git a/.github/workflows/test-inference.yml b/.github/workflows/test-inference.yml
@@ -0,0 +1,34 @@
+name: Test inference
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  test:
+    name: "Test inference"
+    # This action is designed only to run on the Stability research cluster at this time, so many assumptions are made about the environment
+    if: github.repository == 'stability-ai/generative-models'
+    runs-on: [self-hosted, slurm, g40]
+    steps:
+      - uses: actions/checkout@v3
+      - name: "Symlink checkpoints"
+        run: ln -s ${{vars.SGM_CHECKPOINTS_PATH}} checkpoints
+      - name: "Setup python"
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: "Install Hatch"
+        run: pip install hatch
+      - name: "Run inference tests"
+        run: hatch run ci:test-inference --junit-xml test-results.xml
+      - name: Surface failing tests
+        if: always()
+        uses: pmeier/pytest-results-action@main
+        with:
+          path: test-results.xml
+          summary: true
+          display-options: fEX
+          fail-on-empty: true
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "thirdparty/carvekit"]
+	path = thirdparty/carvekit
+	url = https://github.com/OPHoperHPO/image-background-remove-tool.git
diff --git a/.project-root b/.project-root
@@ -0,0 +1,2 @@
+# this file is required for inferring the project root directory
+# do not delete
diff --git a/3drec/configs/nvsadapter.yaml b/3drec/configs/nvsadapter.yaml
@@ -0,0 +1,160 @@
+name: "nvsadapter"
+tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_prog${data.random_camera.progressive_until}"
+exp_root_dir: "./outputs"
+seed: 0
+
+data_type: "single-image-datamodule"
+data: # threestudio/data/image.py -> SingleImageDataModuleConfig
+  image_path: ./load/images/hamburger_rgba.png
+  height: [128, 128, 512]
+  width: [128, 128, 512]
+  resolution_milestones: [2000, 5000]
+  default_elevation_deg: 0.0
+  default_azimuth_deg: 0.0
+  default_camera_distance: 1.5
+  default_fovy_deg: 49.1
+  requires_depth: ${cmaxgt0orcmaxgt0:${system.loss.lambda_depth},${system.loss.lambda_depth_rel}}
+  requires_normal: ${cmaxgt0:${system.loss.lambda_normal}}
+  random_camera: # threestudio/data/uncond.py -> RandomCameraDataModuleConfig
+    height: [64, 64, 256]
+    width: [64, 64, 256]
+    batch_size: [16, 16, 8]  # nvs-adapter, must be times of num query
+    resolution_milestones: ${data.resolution_milestones}  # nvs-adapter
+    eval_height: 512
+    eval_width: 512
+    eval_batch_size: 1
+    elevation_range: [-10, 80]
+    azimuth_range: [-180, 180]
+    camera_distance_range: [ 1.5, 1.5 ]
+    fovy_range: [49.1, 49.1] # objaverse dataset has fixed fovyq
+    progressive_until: 0
+    camera_perturb: 0.0
+    center_perturb: 0.0
+    up_perturb: 0.0
+    light_position_perturb: 1.0
+    light_distance_range: [7.5, 10.0]
+    eval_elevation_deg: 15
+    eval_camera_distance: ${data.default_camera_distance}
+    eval_fovy_deg: ${data.default_fovy_deg}
+    light_sample_strategy: "dreamfusion"
+    batch_uniform_azimuth: False
+    n_val_views: 30
+    n_test_views: 120
+
+system_type: "nvsadapter-system"
+system:
+  geometry_type: "implicit-volume"
+  geometry:
+    radius: 1.0
+    normal_type: "analytic"
+
+    # the density initialization proposed in the DreamFusion paper
+    # does not work very well
+    # density_bias: "blob_dreamfusion"
+    # density_activation: exp
+    # density_blob_scale: 5.
+    # density_blob_std: 0.2
+
+    # use Magic3D density initialization instead
+    density_bias: "blob_magic3d"
+    density_activation: softplus
+    density_blob_scale: 10.
+    density_blob_std: 0.5
+
+    # coarse to fine hash grid encoding
+    # to ensure smooth analytic normals
+    pos_encoding_config:
+      otype: HashGrid
+      n_levels: 16
+      n_features_per_level: 2
+      log2_hashmap_size: 19
+      base_resolution: 16
+      per_level_scale: 1.447269237440378 # max resolution 4096
+    mlp_network_config:
+      otype: "VanillaMLP"
+      activation: "ReLU"
+      output_activation: "none"
+      n_neurons: 64
+      n_hidden_layers: 2
+
+  material_type: "diffuse-with-point-light-material"
+  material:
+    ambient_only_steps: 100000
+    textureless_prob: 0.05
+    albedo_activation: sigmoid
+
+  # background_type: "neural-environment-map-background"
+  # background:
+  #   color_activation: sigmoid
+
+  background_type: "solid-color-background" # unused
+
+  renderer_type: "nerf-volume-renderer"
+  renderer:
+    radius: ${system.geometry.radius}
+    num_samples_per_ray: 512
+    return_comp_normal: ${gt0:${system.loss.lambda_normal_smooth}}
+    return_normal_perturb: ${gt0:${system.loss.lambda_3d_normal_smooth}}
+
+  prompt_processor_type: "dummy-prompt-processor" # Zero123 doesn't use prompts
+  prompt_processor:
+    pretrained_model_name_or_path: ""
+    prompt: ""
+
+  guidance_type: "nvsadapter-guidance"
+  guidance:
+    pretrained_model_name_or_path: "../checkpoints/base_query_4_step_200000.ckpt"
+    pretrained_config: "../checkpoints/base_query_4.yaml"
+    cond_image_path: ${data.image_path}
+    cond_elevation_deg: ${data.default_elevation_deg}
+    cond_azimuth_deg: ${data.default_azimuth_deg}
+    cond_camera_distance: ${data.default_camera_distance}
+    cond_fovy_deg: ${data.default_fovy_deg}
+    guidance_scale: 3.0
+    min_step_percent: 0.02
+    max_step_percent: [0, 0.98, 0.5, 5000] # (start_iter, start_val, end_val, end_iter)
+    #max_step_percent: 0.98
+    num_query: 4
+
+  freq:
+    ref_only_steps: 0
+    guidance_eval: 0
+
+  loggers:
+    wandb:
+      enable: false
+      project: "threestudio"
+      name: None
+
+  loss:
+    lambda_sds: 0.1
+    lambda_rgb: 500.
+    lambda_mask: 50.
+    lambda_depth: 0. # 0.05
+    lambda_depth_rel: 0. # [0, 0, 0.05, 100]
+    lambda_normal: 0. # [0, 0, 0.05, 100]
+    lambda_normal_smooth: 8.0
+    lambda_3d_normal_smooth: 8.0
+    lambda_orient: 1.0
+    lambda_sparsity: 1.0 # should be tweaked for every model
+    lambda_opaque: 0.1
+
+  optimizer:
+    name: AdamW
+    args:
+      lr: 0.01
+      betas: [0.9, 0.99]
+      eps: 1.e-8
+
+trainer:
+  max_steps: 8000
+  log_every_n_steps: 1
+  num_sanity_val_steps: 0
+  val_check_interval: 200
+  enable_progress_bar: true
+  precision: 16-mixed
+
+checkpoint:
+  save_last: true # save at each validation time
+  save_top_k: -1
+  every_n_train_steps: 200 # ${trainer.max_steps}