diff --git a/docs/en/installation.md b/docs/en/installation.md index 4140d9ad40..250a90a3a0 100644 --- a/docs/en/installation.md +++ b/docs/en/installation.md @@ -130,7 +130,7 @@ If everything goes fine, you will be able to get the following visualization res ![image](https://user-images.githubusercontent.com/87690686/187824033-2cce0f55-034a-4127-82e2-52744178bc32.jpg) -Option (B). If you install mmpose with pip, open you python interpreter and copy & paste the following codes. +Option (B). If you install mmpose with pip, open your Python interpreter and copy & paste the following codes. ```python from mmpose.apis import inference_topdown, init_model @@ -170,13 +170,13 @@ When installing PyTorch, you need to specify the version of CUDA. If you are not Please make sure the GPU driver satisfies the minimum version requirements. See [this table](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions__table-cuda-toolkit-driver-versions) for more information. -Installing CUDA runtime libraries is enough if you follow our best practices, because no CUDA code will be compiled locally. However if you hope to compile MMCV from source or develop other CUDA operators, you need to install the complete CUDA toolkit from NVIDIA's [website](https://developer.nvidia.com/cuda-downloads), and its version should match the CUDA version of PyTorch. i.e., the specified version of cudatoolkit in `conda install` command. +Installing CUDA runtime libraries is enough if you follow our best practices, because no CUDA code will be compiled locally. However, if you hope to compile MMCV from source or develop other CUDA operators, you need to install the complete CUDA toolkit from NVIDIA's [website](https://developer.nvidia.com/cuda-downloads), and its version should match the CUDA version of PyTorch. i.e., the specified version of cudatoolkit in `conda install` command. ### Install MMEngine without MIM To install MMEngine with pip instead of MIM, please follow [MMEngine installation guides](https://mmengine.readthedocs.io/zh_CN/latest/get_started/installation.html). -For example, you can install MMEngine by the following command. +For example, you can install MMEngine using the following command. ```shell pip install mmengine @@ -184,11 +184,11 @@ pip install mmengine ### Install MMCV without MIM -MMCV contains C++ and CUDA extensions, thus depending on PyTorch in a complex way. MIM solves such dependencies automatically and makes the installation easier. However, it is not a must. +MMCV contains C++ and CUDA extensions, thus, it depends on PyTorch in a complex way. MIM solves such dependencies automatically and makes the installation easier. However, it is not a must. To install MMCV with pip instead of MIM, please follow [MMCV installation guides](https://mmcv.readthedocs.io/en/2.x/get_started/installation.html). This requires manually specifying a find-url based on PyTorch version and its CUDA version. -For example, the following command install mmcv built for PyTorch 1.10.x and CUDA 11.3. +For example, the following command installs mmcv built for PyTorch 1.10.x and CUDA 11.3. ```shell pip install 'mmcv>=2.0.1' -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10/index.html @@ -239,7 +239,7 @@ We provide a [Dockerfile](https://github.com/open-mmlab/mmpose/blob/master/docke ```shell # build an image with PyTorch 1.8.0, CUDA 10.1, CUDNN 7. -# If you prefer other versions, just modified the Dockerfile +# If you prefer other versions, just modify the Dockerfile docker build -t mmpose docker/ ``` @@ -254,10 +254,10 @@ docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmpose/data mmpose `{DATA_DIR}` is your local folder containing all the datasets for mmpose. ```{note} -If you encounter the error message like `permission denied`, please add `sudo` at the start of the command and try it again. +If you encounter an error message like `permission denied`, please add `sudo` at the start of the command and try it again. ``` -## Trouble shooting +## Troubleshooting If you have some issues during the installation, please first view the [FAQ](./faq.md) page. You may [open an issue](https://github.com/open-mmlab/mmpose/issues/new/choose) on GitHub if no solution is found. diff --git a/mmpose/models/heads/hybrid_heads/rtmo_head.py b/mmpose/models/heads/hybrid_heads/rtmo_head.py index c364c20e98..60adade39c 100644 --- a/mmpose/models/heads/hybrid_heads/rtmo_head.py +++ b/mmpose/models/heads/hybrid_heads/rtmo_head.py @@ -1026,7 +1026,7 @@ def switch_to_deploy(self, test_cfg: Optional[Dict]): featmaps = [] for s in self.featmap_strides: featmaps.append( - torch.rand(1, 1, input_size[0] // s, input_size[1] // s)) + torch.rand(1, 1, input_size[1] // s, input_size[0] // s)) featmap_sizes = [fmap.shape[2:] for fmap in featmaps] self.mlvl_priors = self.prior_generator.grid_priors( diff --git a/tests/test_models/test_heads/test_hybrid_heads/test_rtmo_head.py b/tests/test_models/test_heads/test_hybrid_heads/test_rtmo_head.py new file mode 100644 index 0000000000..26339ff184 --- /dev/null +++ b/tests/test_models/test_heads/test_hybrid_heads/test_rtmo_head.py @@ -0,0 +1,144 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import unittest +from unittest import TestCase + +import torch + +from mmpose.models.heads.hybrid_heads.rtmo_head import RTMOHead + + +class TestRTMOHeadSwitchToDeploy(TestCase): + """Tests for RTMOHead.switch_to_deploy. + + switch_to_deploy precomputes a prior anchor grid from dummy feature maps + sized according to test_cfg['input_size']. mmpose's input_size follows + (W, H) order, while PyTorch tensor spatial dimensions are (H, W). + Swapping the two is harmless for square inputs (W == H) but causes the + prior grid to cover the wrong coordinate range for non-square models, + shifting every detection to a wrong position at inference time. + """ + + def _get_head(self): + return RTMOHead( + num_keypoints=17, + featmap_strides=(16, 32), + head_module_cfg=dict( + num_classes=1, + in_channels=64, + cls_feat_channels=64, + channels_per_group=36, + pose_vec_channels=64, + widen_factor=1.0, + stacked_convs=2, + norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), + act_cfg=dict(type='SiLU', inplace=True)), + assigner=dict( + type='SimOTAAssigner', + dynamic_k_indicator='oks', + oks_calculator=dict( + type='PoseOKS', + metainfo='configs/_base_/datasets/coco.py'), + use_keypoints_for_center=True), + prior_generator=dict( + type='MlvlPointGenerator', + centralize_points=True, + strides=[16, 32]), + dcc_cfg=dict( + in_channels=64, + feat_channels=32, + num_bins=(64, 64), + spe_channels=32, + gau_cfg=dict( + s=64, + expansion_factor=2, + dropout_rate=0.0, + drop_path=0.0, + act_fn='SiLU', + pos_enc='add')), + loss_cls=dict( + type='VariFocalLoss', + reduction='sum', + use_target_weight=True, + loss_weight=1.0), + loss_bbox=dict( + type='IoULoss', + mode='square', + eps=1e-16, + reduction='sum', + loss_weight=5.0), + loss_oks=dict( + type='OKSLoss', + reduction='none', + metainfo='configs/_base_/datasets/coco.py', + loss_weight=30.0), + loss_vis=dict( + type='BCELoss', + use_target_weight=True, + reduction='mean', + loss_weight=1.0)) + + def _get_feats(self, input_size, batch_size=2, in_channels=64): + """Return a list of dummy feature tensors matching ``input_size``.""" + W, H = input_size + return [ + torch.rand(batch_size, in_channels, H // 16, W // 16), + torch.rand(batch_size, in_channels, H // 32, W // 32), + ] + + def test_switch_to_deploy_square_input_size(self): + """Square input (W == H) should produce consistent x/y prior ranges.""" + W = H = 640 + head = self._get_head() + head.switch_to_deploy(test_cfg=dict(input_size=(W, H))) + + x_max = head.flatten_priors[:, 0].max().item() + y_max = head.flatten_priors[:, 1].max().item() + + # For a square input both axes cover the same range + self.assertLessEqual(x_max, W) + self.assertLessEqual(y_max, H) + # Priors must actually reach near the image boundary + self.assertGreater(x_max, 0) + self.assertGreater(y_max, 0) + + def test_switch_to_deploy_non_square_input_size(self): + """Non-square input (W != H) must keep x priors within [0, W] and + y priors within [0, H]. + + mmpose's input_size is (W, H), but torch.rand takes spatial dims as + (H, W). If the indices are passed without swapping, the feature maps + get shape (W/stride, H/stride) instead of (H/stride, W/stride). + MlvlPointGenerator then produces x priors up to H and y priors up to + W — exactly backwards. For input_size=(640, 384) this means x only + reaches ~384 and y reaches ~640, causing all detections to appear + in the wrong region of the image. + """ + W, H = 640, 384 + head = self._get_head() + head.switch_to_deploy(test_cfg=dict(input_size=(W, H))) + + # Priors are (x, y, stride) triples. + # Column 0 is the x (width) axis; column 1 is the y (height) axis. + x_max = head.flatten_priors[:, 0].max().item() + y_max = head.flatten_priors[:, 1].max().item() + + self.assertLessEqual( + x_max, W, + f'x priors exceed input width {W}: got {x_max:.0f}') + self.assertLessEqual( + y_max, H, + f'y priors exceed input height {H}: got {y_max:.0f}') + + # Key assertion: x priors must span close to W (640), not only H + # (384). If input_size[0] and input_size[1] are swapped when calling + # torch.rand inside switch_to_deploy, x_max will be ~384 instead. + self.assertGreater( + x_max, H, + f'x priors only reach {x_max:.0f}; expected close to W={W}. ' + f'Probable cause: input_size passed to torch.rand in (H, W) ' + f'order instead of the required (W, H) — swap the indices in ' + f'switch_to_deploy.') + + +if __name__ == '__main__': + unittest.main()