From df195f40003b6e237c1fa883f9f04ab361876f5b Mon Sep 17 00:00:00 2001 From: FeiXue <49382877+feixue94@users.noreply.github.com> Date: Fri, 5 Jul 2024 02:35:26 +0100 Subject: [PATCH] add SFD2 (CVPR 2023) and IMP (CVPR 2023) (#40) --- .gitmodules | 3 +++ README.md | 2 ++ common/config.yaml | 36 ++++++++++++++++++++++++++------ hloc/extract_features.py | 39 ++++++++++++++++++++++++---------- hloc/extractors/sfd2.py | 43 ++++++++++++++++++++++++++++++++++++++ hloc/match_features.py | 7 +++++++ hloc/matchers/imp.py | 45 ++++++++++++++++++++++++++++++++++++++++ third_party/pram | 1 + 8 files changed, 159 insertions(+), 17 deletions(-) create mode 100644 hloc/extractors/sfd2.py create mode 100644 hloc/matchers/imp.py create mode 160000 third_party/pram diff --git a/.gitmodules b/.gitmodules index d46b303..f128b21 100644 --- a/.gitmodules +++ b/.gitmodules @@ -61,3 +61,6 @@ [submodule "third_party/RoMa"] path = third_party/RoMa url = https://github.com/Vincentqyw/RoMa.git +[submodule "third_party/pram"] + path = third_party/pram + url = https://github.com/feixue94/pram.git diff --git a/README.md b/README.md index 3a47298..94437e2 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,8 @@ The tool currently supports various popular image matching algorithms, namely: - [ ] [DUSt3R](https://github.com/naver/dust3r), arXiv 2023 - [x] [LightGlue](https://github.com/cvg/LightGlue), ICCV 2023 - [x] [DarkFeat](https://github.com/THU-LYJ-Lab/DarkFeat), AAAI 2023 +- [x] [SFD2](https://github.com/feixue94/sfd2), CVPR 2023 +- [x] [IMP](https://github.com/feixue94/imp-release), CVPR 2023 - [ ] [ASTR](https://github.com/ASTR2023/ASTR), CVPR 2023 - [ ] [SEM](https://github.com/SEM2023/SEM), CVPR 2023 - [ ] [DeepLSD](https://github.com/cvg/DeepLSD), CVPR 2023 diff --git a/common/config.yaml b/common/config.yaml index f4f435d..9bb904c 100644 --- a/common/config.yaml +++ b/common/config.yaml @@ -32,18 +32,18 @@ matcher_zoo: enable: true matcher: duster dense: true - info: + info: name: DUSt3R #dispaly name source: "CVPR 2024" github: https://github.com/naver/dust3r paper: https://arxiv.org/abs/2312.14132 project: https://dust3r.europe.naverlabs.com - display: true + display: true GIM(dkm): enable: true matcher: gim(dkm) dense: true - info: + info: name: GIM(DKM) #dispaly name source: "ICLR 2024" github: https://github.com/xuelunshen/gim @@ -53,7 +53,7 @@ matcher_zoo: RoMa: matcher: roma dense: true - info: + info: name: RoMa #dispaly name source: "CVPR 2024" github: https://github.com/Parskatt/RoMa @@ -63,7 +63,7 @@ matcher_zoo: dkm: matcher: dkm dense: true - info: + info: name: DKM #dispaly name source: "CVPR 2023" github: https://github.com/Parskatt/DKM @@ -73,7 +73,7 @@ matcher_zoo: loftr: matcher: loftr dense: true - info: + info: name: LoFTR #dispaly name source: "CVPR 2021" github: https://github.com/zju3dv/LoFTR @@ -363,3 +363,27 @@ matcher_zoo: paper: https://arxiv.org/abs/2104.03362 project: null display: true + + sfd2+imp: + matcher: imp + feature: sfd2 + dense: false + info: + name: SFD2+IMP #dispaly name + source: "CVPR 2023" + github: https://github.com/feixue94/imp-release + paper: https://arxiv.org/pdf/2304.14837 + project: https://feixue94.github.io/ + display: true + + sfd2+mnn: + matcher: NN-mutual + feature: sfd2 + dense: false + info: + name: SFD2+MNN #dispaly name + source: "CVPR 2023" + github: https://github.com/feixue94/sfd2 + paper: https://arxiv.org/abs/2304.14845 + project: https://feixue94.github.io/ + display: true \ No newline at end of file diff --git a/hloc/extract_features.py b/hloc/extract_features.py index cd0f2ba..8c65220 100644 --- a/hloc/extract_features.py +++ b/hloc/extract_features.py @@ -16,7 +16,6 @@ from .utils.parsers import parse_image_lists from .utils.io import read_image, list_h5_names - """ A set of standard configurations that can be directly selected from the command line using their name. Each is a dictionary with the following entries: @@ -290,6 +289,24 @@ "dfactor": 8, }, }, + "sfd2": { + "output": "feats-sfd2-n4096-r1600", + "model": { + "name": "sfd2", + "max_keypoints": 4096, + }, + "preprocessing": { + "grayscale": False, + "force_resize": True, + "resize_max": 1600, + "width": 640, + "height": 480, + 'conf_th': 0.001, + 'multiscale': False, + 'scales': [1.0], + + }, + }, # Global descriptors "dir": { "output": "global-feats-dir", @@ -316,13 +333,13 @@ def resize_image(image, size, interp): if interp.startswith("cv2_"): - interp = getattr(cv2, "INTER_" + interp[len("cv2_") :].upper()) + interp = getattr(cv2, "INTER_" + interp[len("cv2_"):].upper()) h, w = image.shape[:2] if interp == cv2.INTER_AREA and (w < size[0] or h < size[1]): interp = cv2.INTER_LINEAR resized = cv2.resize(image, size, interpolation=interp) elif interp.startswith("pil_"): - interp = getattr(PIL.Image, interp[len("pil_") :].upper()) + interp = getattr(PIL.Image, interp[len("pil_"):].upper()) resized = PIL.Image.fromarray(image.astype(np.uint8)) resized = resized.resize(size, resample=interp) resized = np.asarray(resized, dtype=image.dtype) @@ -376,7 +393,7 @@ def __getitem__(self, idx): size = image.shape[:2][::-1] if self.conf.resize_max and ( - self.conf.force_resize or max(size) > self.conf.resize_max + self.conf.force_resize or max(size) > self.conf.resize_max ): scale = self.conf.resize_max / max(size) size_new = tuple(int(round(x * scale)) for x in size) @@ -467,13 +484,13 @@ def preprocess(image: np.ndarray, conf: SimpleNamespace): @torch.no_grad() def main( - conf: Dict, - image_dir: Path, - export_dir: Optional[Path] = None, - as_half: bool = True, - image_list: Optional[Union[Path, List[str]]] = None, - feature_path: Optional[Path] = None, - overwrite: bool = False, + conf: Dict, + image_dir: Path, + export_dir: Optional[Path] = None, + as_half: bool = True, + image_list: Optional[Union[Path, List[str]]] = None, + feature_path: Optional[Path] = None, + overwrite: bool = False, ) -> Path: logger.info( "Extracting local features with configuration:" diff --git a/hloc/extractors/sfd2.py b/hloc/extractors/sfd2.py new file mode 100644 index 0000000..4039b38 --- /dev/null +++ b/hloc/extractors/sfd2.py @@ -0,0 +1,43 @@ +# -*- coding: UTF-8 -*- +import sys +from pathlib import Path +import torchvision.transforms as tvf + +import torch + +from ..utils.base_model import BaseModel +from .. import logger + +pram_path = Path(__file__).parent / "../../third_party/pram" +sys.path.append(str(pram_path)) + +from nets.sfd2 import load_sfd2, extract_sfd2_return + + +class SFD2(BaseModel): + default_conf = { + "max_keypoints": 4096, + "model_name": 'sfd2_20230511_210205_resnet4x.79.pth', + "conf_th": 0.001, + + } + required_inputs = ["image"] + + def _init(self, conf): + self.conf = {**self.default_conf, **conf} + self.norm_rgb = tvf.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + model_fn = pram_path / 'weights' / self.conf['model_name'] + self.net = load_sfd2(weight_path=model_fn).cuda().eval() + + logger.info(f"Load SFD2 model done.") + + def _forward(self, data): + pred = self.net.extract_local_global(data={'image': self.norm_rgb(data['image'])}, config=self.conf) + out = { + "keypoints": pred["keypoints"][0][None], + "scores": pred["scores"][0][None], + "descriptors": pred["descriptors"][0][None], + } + return out diff --git a/hloc/match_features.py b/hloc/match_features.py index 28ba3a2..3065d84 100644 --- a/hloc/match_features.py +++ b/hloc/match_features.py @@ -162,6 +162,13 @@ "match_threshold": 0.2, }, }, + "imp": { + "output": "matches-imp", + "model": { + "name": "imp", + "match_threshold": 0.2, + }, + }, } diff --git a/hloc/matchers/imp.py b/hloc/matchers/imp.py new file mode 100644 index 0000000..7e4ef0a --- /dev/null +++ b/hloc/matchers/imp.py @@ -0,0 +1,45 @@ +# -*- coding: UTF-8 -*- +import sys +from pathlib import Path + +import torch + +from ..utils.base_model import BaseModel +from .. import logger + +pram_path = Path(__file__).parent / "../../third_party/pram" +sys.path.append(str(pram_path)) + +from nets.gml import GML + + +class IMP(BaseModel): + default_conf = { + "match_threshold": 0.2, + "features": "sfd2", + "model_name": "imp_gml.920.pth", + 'sinkhorn_iterations': 20, + } + required_inputs = [ + "image0", + "keypoints0", + "scores0", + "descriptors0", + "image1", + "keypoints1", + "scores1", + "descriptors1", + ] + + def _init(self, conf): + self.conf = {**self.default_conf, **conf} + weight_path = pram_path / "weights" / self.conf["model_name"] + self.net = GML(self.conf).eval().cuda() + self.net.load_state_dict(torch.load(weight_path)['model'], strict=True) + logger.info(f"Load IMP model done.") + + def _forward(self, data): + data['descriptors0'] = data['descriptors0'].transpose(2, 1).float() + data['descriptors1'] = data['descriptors1'].transpose(2, 1).float() + + return self.net.produce_matches(data, p=0.2) diff --git a/third_party/pram b/third_party/pram new file mode 160000 index 0000000..7135cb9 --- /dev/null +++ b/third_party/pram @@ -0,0 +1 @@ +Subproject commit 7135cb9590df9c1d0de13e5ebc18bdd1a2384833