modelscope使用华为昇腾910B npu不正确RuntimeError: module must have its parameters and buffers on device npu:0 (device_ids[0]) but found one of them on device: npu:7 #1266

maqinghui · 2025-03-17T03:40:25Z

maqinghui
Mar 17, 2025

目前我用modelscope在npu 华为昇腾910B 上面运行的时候报错：下面是报错信息：
Traceback (most recent call last): File "/home/ziguang/unis_ai_arrange/program/face/facedet.py", line 31, in <module> result2 = face_detection(image) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/cv/face_detection_pipeline.py", line 81, in __call__ return super().__call__(input, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/base.py", line 227, in __call__ output = self._process_single(input, *args, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/base.py", line 262, in _process_single out = self.forward(out, **forward_params) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/cv/face_detection_pipeline.py", line 97, in forward return self.detector(**input) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/models/base/base_torch_model.py", line 36, in __call__ return self.postprocess(self.forward(*args, **kwargs)) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/models/cv/face_detection/scrfd/scrfd_detect.py", line 74, in forward result = self.detector( File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/mmcv/parallel/data_parallel.py", line 51, in forward return super().forward(*inputs, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py", line 171, in forward raise RuntimeError("module must have its parameters and buffers " RuntimeError: module must have its parameters and buffers on device npu:0 (device_ids[0]) but found one of them on device: npu:7 [ERROR] 2025-03-14-18:48:44 (PID:91070, Device:7, RankID:-1) ERR99999 UNKNOWN application exception

我的代码很简单：
`import torch
import torch_npu
torch.npu.set_device("npu:7")
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.outputs import OutputKeys
import os
from PIL import Image
import numpy as np
import time
import json
import uuid

from backbones import get_model

from modelscope.models import Model

face_detection2 = pipeline(Tasks.face_detection, 'damo/cv_manual_face-detection_mtcnn', device="npu:7")

face_detection2 = pipeline(Tasks.face_detection, 'damo/cv_manual_face-detection_mtcnn', device="npu:7")
face_detection = pipeline(Tasks.face_detection, model= 'damo/cv_manual_face-detection_tinymog', device="npu:7")

face_recognition_pipe = pipeline(Tasks.face_recognition, model='damo/cv_ir101_facerecognition_cfglint', device="npu:7")

face_quality_assessment_func = pipeline(Tasks.face_quality_assessment, model= 'damo/cv_manual_face-quality-assessment_fqa', device="npu:7")

image = '/home/ziguang/unis_ai_arrange/program/face/IMG_20231123_180222.jpg'
print("===============人脸位置cv_manual_face-detection_mtcnn============")
result = face_detection2(image)
print(result)

print("===============人脸位置cv_manual_face-detection_tinymog============")
result2 = face_detection(image)
print(result2)

print("============人脸向量 cv_ir101_facerecognition_cfglint===============")
imageResult = face_recognition_pipe(image)
print(imageResult)

print("============人脸质量 cv_manual_face-quality-assessment_fqa===============")
face_quality_score = face_quality_assessment_func(image)
print(face_quality_score)
目前环境上面 npu-info 是正常的，print(torch.npu.is_available()) 打印也是true。我参照网站 https://developer.aliyun.com/article/1420243 修改了 modelscope/utils/device.py 文件文件修改如下：# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from contextlib import contextmanager

from modelscope.utils.constant import Devices, Frameworks
from modelscope.utils.logger import get_logger

logger = get_logger()

def verify_device(device_name):
""" Verify device is valid, device should be either cpu, cuda, gpu, cuda:X or gpu:X.

Args:
    device (str):  device str, should be either cpu, cuda, gpu, gpu:X or cuda:X
        where X is the ordinal for gpu device.

Return:
    device info (tuple):  device_type and device_id, if device_id is not set, will use 0 as default.
"""
err_msg = 'device should be either cpu, cuda, gpu, gpu:X or cuda:X where X is the ordinal for gpu device.'
assert device_name is not None and device_name != '', err_msg
device_name = device_name.lower()
eles = device_name.split(':')
assert len(eles) <= 2, err_msg
assert device_name is not None
assert eles[0] in ['cpu', 'cuda', 'gpu', 'npu'], err_msg
device_type = eles[0]
device_id = None
if len(eles) > 1:
    device_id = int(eles[1])
if device_type == 'cuda':
    device_type = Devices.gpu
if device_type == Devices.gpu and device_id is None:
    device_id = 0
return device_type, device_id

@contextmanager
def device_placement(framework, device_name='gpu:0'):
""" Device placement function, allow user to specify which device to place model or tensor
Args:
framework (str): tensorflow or pytorch.
device (str): gpu or cpu to use, if you want to specify certain gpu,
use gpu:$gpu_id or cuda:$gpu_id.

Returns:
    Context manager

Examples:

    >>> # Requests for using model on cuda:0 for gpu
    >>> with device_placement('pytorch', device='gpu:0'):
    >>>     model = Model.from_pretrained(...)
"""
device_type, device_id = verify_device(device_name)

if framework == Frameworks.tf:
    import tensorflow as tf
    if device_type == Devices.gpu and not tf.test.is_gpu_available():
        logger.debug(
            'tensorflow: cuda is not available, using cpu instead.')
    device_type = Devices.cpu
    if device_type == Devices.cpu:
        with tf.device('/CPU:0'):
            yield
    else:
        if device_type == Devices.gpu:
            with tf.device(f'/device:gpu:{device_id}'):
                yield

elif framework == Frameworks.torch:
    import torch
    import torch_npu
    if device_type == Devices.gpu:
        if torch.cuda.is_available():
            torch.cuda.set_device(f'cuda:{device_id}')
        else:
            logger.debug(
                'pytorch: cuda is not available, using cpu instead.')
    elif device_type == "npu":
        torch.npu.set_device(f'npu:{device_id}')
    yield
else:
    yield

def create_device(device_name):
""" create torch device

Args:
    device_name (str):  cpu, gpu, gpu:0, cuda:0 etc.
"""
import torch
import torch_npu
device_type, device_id = verify_device(device_name)
use_cuda = False
if device_type == Devices.gpu:
    use_cuda = True
    if not torch.cuda.is_available():
        logger.info('cuda is not available, using cpu instead.')
        use_cuda = False

if device_type == "npu":
    torch_npu.npu.set_device(f"npu:{device_id}")
    device = torch.device(f"npu:{device_id}")
elif use_cuda:
    device = torch.device(f'cuda:{device_id}')
else:
    device = torch.device('cpu')

return device

def get_device():
import torch
from torch import distributed as dist
if torch.cuda.is_available():
if dist.is_available() and dist.is_initialized(
) and 'LOCAL_RANK' in os.environ:
device_id = f"cuda:{os.environ['LOCAL_RANK']}"
else:
device_id = 'cuda:0'
else:
device_id = 'cpu'
return torch.device(device_id)
`
麻烦帮忙看一下哪里不对？

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ModelScope

modelscope使用华为昇腾910B npu不正确RuntimeError: module must have its parameters and buffers on device npu:0 (device_ids[0]) but found one of them on device: npu:7 #1266

Uh oh!

{{title}}

Uh oh!

Replies: 0 comments

Select a reply

Uh oh!

ModelScope

modelscope使用华为昇腾910B npu不正确RuntimeError: module must have its parameters and buffers on device npu:0 (device_ids[0]) but found one of them on device: npu:7 #1266

Uh oh!

maqinghui Mar 17, 2025

from backbones import get_model

from backbones import get_model

face_detection2 = pipeline(Tasks.face_detection, 'damo/cv_manual_face-detection_mtcnn', device="npu:7")

Replies: 0 comments

maqinghui
Mar 17, 2025