You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
modelscope使用华为昇腾910B npu不正确RuntimeError: module must have its parameters and buffers on device npu:0 (device_ids[0]) but found one of them on device: npu:7
#1266
目前我用modelscope在npu 华为昇腾910B 上面运行的时候报错: 下面是报错信息: Traceback (most recent call last): File "/home/ziguang/unis_ai_arrange/program/face/facedet.py", line 31, in <module> result2 = face_detection(image) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/cv/face_detection_pipeline.py", line 81, in __call__ return super().__call__(input, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/base.py", line 227, in __call__ output = self._process_single(input, *args, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/base.py", line 262, in _process_single out = self.forward(out, **forward_params) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/cv/face_detection_pipeline.py", line 97, in forward return self.detector(**input) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/models/base/base_torch_model.py", line 36, in __call__ return self.postprocess(self.forward(*args, **kwargs)) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/models/cv/face_detection/scrfd/scrfd_detect.py", line 74, in forward result = self.detector( File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/mmcv/parallel/data_parallel.py", line 51, in forward return super().forward(*inputs, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py", line 171, in forward raise RuntimeError("module must have its parameters and buffers " RuntimeError: module must have its parameters and buffers on device npu:0 (device_ids[0]) but found one of them on device: npu:7 [ERROR] 2025-03-14-18:48:44 (PID:91070, Device:7, RankID:-1) ERR99999 UNKNOWN application exception
我的代码很简单:
`import torch
import torch_npu
torch.npu.set_device("npu:7")
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.outputs import OutputKeys
import os
from PIL import Image
import numpy as np
import time
import json
import uuid
print("============人脸质量 cv_manual_face-quality-assessment_fqa===============")
face_quality_score = face_quality_assessment_func(image)
print(face_quality_score) 目前环境 上面 npu-info 是正常的,print(torch.npu.is_available()) 打印也是true。 我参照 网站 https://developer.aliyun.com/article/1420243 修改了 modelscope/utils/device.py 文件文件修改如下:# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from contextlib import contextmanager
from modelscope.utils.constant import Devices, Frameworks
from modelscope.utils.logger import get_logger
logger = get_logger()
def verify_device(device_name):
""" Verify device is valid, device should be either cpu, cuda, gpu, cuda:X or gpu:X.
Args:
device (str): device str, should be either cpu, cuda, gpu, gpu:X or cuda:X
where X is the ordinal for gpu device.
Return:
device info (tuple): device_type and device_id, if device_id is not set, will use 0 as default.
"""
err_msg = 'device should be either cpu, cuda, gpu, gpu:X or cuda:X where X is the ordinal for gpu device.'
assert device_name is not None and device_name != '', err_msg
device_name = device_name.lower()
eles = device_name.split(':')
assert len(eles) <= 2, err_msg
assert device_name is not None
assert eles[0] in ['cpu', 'cuda', 'gpu', 'npu'], err_msg
device_type = eles[0]
device_id = None
if len(eles) > 1:
device_id = int(eles[1])
if device_type == 'cuda':
device_type = Devices.gpu
if device_type == Devices.gpu and device_id is None:
device_id = 0
return device_type, device_id
@contextmanager
def device_placement(framework, device_name='gpu:0'):
""" Device placement function, allow user to specify which device to place model or tensor
Args:
framework (str): tensorflow or pytorch.
device (str): gpu or cpu to use, if you want to specify certain gpu,
use gpu:$gpu_id or cuda:$gpu_id.
Returns:
Context manager
Examples:
>>> # Requests for using model on cuda:0 for gpu
>>> with device_placement('pytorch', device='gpu:0'):
>>> model = Model.from_pretrained(...)
"""
device_type, device_id = verify_device(device_name)
if framework == Frameworks.tf:
import tensorflow as tf
if device_type == Devices.gpu and not tf.test.is_gpu_available():
logger.debug(
'tensorflow: cuda is not available, using cpu instead.')
device_type = Devices.cpu
if device_type == Devices.cpu:
with tf.device('/CPU:0'):
yield
else:
if device_type == Devices.gpu:
with tf.device(f'/device:gpu:{device_id}'):
yield
elif framework == Frameworks.torch:
import torch
import torch_npu
if device_type == Devices.gpu:
if torch.cuda.is_available():
torch.cuda.set_device(f'cuda:{device_id}')
else:
logger.debug(
'pytorch: cuda is not available, using cpu instead.')
elif device_type == "npu":
torch.npu.set_device(f'npu:{device_id}')
yield
else:
yield
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
Uh oh!
There was an error while loading. Please reload this page.
-
目前我用modelscope在npu 华为昇腾910B 上面运行的时候报错: 下面是报错信息:
Traceback (most recent call last): File "/home/ziguang/unis_ai_arrange/program/face/facedet.py", line 31, in <module> result2 = face_detection(image) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/cv/face_detection_pipeline.py", line 81, in __call__ return super().__call__(input, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/base.py", line 227, in __call__ output = self._process_single(input, *args, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/base.py", line 262, in _process_single out = self.forward(out, **forward_params) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/pipelines/cv/face_detection_pipeline.py", line 97, in forward return self.detector(**input) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/models/base/base_torch_model.py", line 36, in __call__ return self.postprocess(self.forward(*args, **kwargs)) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/modelscope/models/cv/face_detection/scrfd/scrfd_detect.py", line 74, in forward result = self.detector( File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/mmcv/parallel/data_parallel.py", line 51, in forward return super().forward(*inputs, **kwargs) File "/root/anaconda3/envs/modelscope/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py", line 171, in forward raise RuntimeError("module must have its parameters and buffers " RuntimeError: module must have its parameters and buffers on device npu:0 (device_ids[0]) but found one of them on device: npu:7 [ERROR] 2025-03-14-18:48:44 (PID:91070, Device:7, RankID:-1) ERR99999 UNKNOWN application exception
我的代码很简单:
`import torch
import torch_npu
torch.npu.set_device("npu:7")
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.outputs import OutputKeys
import os
from PIL import Image
import numpy as np
import time
import json
import uuid
from backbones import get_model
from backbones import get_model
from modelscope.models import Model
face_detection2 = pipeline(Tasks.face_detection, 'damo/cv_manual_face-detection_mtcnn', device="npu:7")
face_detection2 = pipeline(Tasks.face_detection, 'damo/cv_manual_face-detection_mtcnn', device="npu:7")
face_detection = pipeline(Tasks.face_detection, model= 'damo/cv_manual_face-detection_tinymog', device="npu:7")
face_recognition_pipe = pipeline(Tasks.face_recognition, model='damo/cv_ir101_facerecognition_cfglint', device="npu:7")
face_quality_assessment_func = pipeline(Tasks.face_quality_assessment, model= 'damo/cv_manual_face-quality-assessment_fqa', device="npu:7")
image = '/home/ziguang/unis_ai_arrange/program/face/IMG_20231123_180222.jpg'
print("===============人脸位置cv_manual_face-detection_mtcnn============")
result = face_detection2(image)
print(result)
print("===============人脸位置cv_manual_face-detection_tinymog============")
result2 = face_detection(image)
print(result2)
print("============人脸向量 cv_ir101_facerecognition_cfglint===============")
imageResult = face_recognition_pipe(image)
print(imageResult)
print("============人脸质量 cv_manual_face-quality-assessment_fqa===============")
face_quality_score = face_quality_assessment_func(image)
print(face_quality_score)
目前环境 上面 npu-info 是正常的,print(torch.npu.is_available()) 打印也是true。 我参照 网站 https://developer.aliyun.com/article/1420243 修改了 modelscope/utils/device.py 文件文件修改如下:
# Copyright (c) Alibaba, Inc. and its affiliates.import os
from contextlib import contextmanager
from modelscope.utils.constant import Devices, Frameworks
from modelscope.utils.logger import get_logger
logger = get_logger()
def verify_device(device_name):
""" Verify device is valid, device should be either cpu, cuda, gpu, cuda:X or gpu:X.
@contextmanager
def device_placement(framework, device_name='gpu:0'):
""" Device placement function, allow user to specify which device to place model or tensor
Args:
framework (str): tensorflow or pytorch.
device (str): gpu or cpu to use, if you want to specify certain gpu,
use gpu:$gpu_id or cuda:$gpu_id.
def create_device(device_name):
""" create torch device
def get_device():
import torch
from torch import distributed as dist
if torch.cuda.is_available():
if dist.is_available() and dist.is_initialized(
) and 'LOCAL_RANK' in os.environ:
device_id = f"cuda:{os.environ['LOCAL_RANK']}"
else:
device_id = 'cuda:0'
else:
device_id = 'cpu'
return torch.device(device_id)
`
麻烦帮忙看一下哪里不对?
Beta Was this translation helpful? Give feedback.
All reactions