349 Normal file
View File

@ -0,0 +1,349 @@
# coding=utf-8
import torch
import torchvision
import torch.nn as nn
import os
import time
import numpy as np
import math
import random
import cv2.cv2 as cv2
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Conv, self).__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(
def fuseforward(self, x):
return self.act(self.conv(x))
class Ensemble(torch.nn.ModuleList):
def __init__(self):
super(Ensemble, self).__init__()
def forward(self, x, augment=False):
y = []
for module in self:
y.append(module(x, augment)[0])
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y =, 1) # nms ensemble
return y, None # inference, train output
class YOLOV5(object):
def __init__(self, conf_thres=0.25,
# 超参数设置
self.conf_thres = conf_thres # 置信度阈值
self.iou_thres = iou_thres # iou阈值
self.classes = classes # 分类个数
self.imgsz = imgsz # 归一化大小
# Load model
self.device = torch.device('cpu')
self.model = self.attempt_load(weights, map_location=self.device) # load FP32 model
self.stride = int(self.model.stride.max()) # model stride
self.imgsz = self.check_img_size(imgsz, s=self.stride) # check img_size
def attempt_load(self, weights, map_location=None):
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(w, map_location=map_location) # load
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
# Compatibility updates
for m in model.modules():
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
m.inplace = True # pytorch 1.7.0 compatibility
elif type(m) is Conv:
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
if len(model) == 1:
return model[-1] # return model
print('Ensemble created with %s\n' % weights)
for k in ['names', 'stride']:
setattr(model, k, getattr(model[-1], k))
return model # return ensemble
def make_divisible(self, x, divisor):
# Returns x evenly divisible by divisor
return math.ceil(x / divisor) * divisor
def check_img_size(self, img_size, s=32):
# Verify img_size is a multiple of stride s
new_size = self.make_divisible(img_size, int(s)) # ceil gs-multiple
if new_size != img_size:
print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
return new_size
def letterbox(self, img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True,
# Resize and pad image while meeting stride-multiple constraints
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better test mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return img, ratio, (dw, dh)
def box_iou(self, box1, box2):
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
def box_area(box):
# box = 4xn
return (box[2] - box[0]) * (box[3] - box[1])
area1 = box_area(box1.T)
area2 = box_area(box2.T)
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
def xywh2xyxy(self, x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def non_max_suppression(self, prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False,
"""Runs Non-Maximum Suppression (NMS) on inference results
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
nc = prediction.shape[2] - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Settings
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
max_det = 300 # maximum number of detections per image
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 10.0 # seconds to quit after
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
l = labels[xi]
v = torch.zeros((len(l), nc + 5), device=x.device)
v[:, :4] = l[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
x =, v), 0)
# If none remain process next image
if not x.shape[0]:
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = self.xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
x =[i], x[i, j + 5, None], j[:, None].float()), 1)
else: # best class only
conf, j = x[:, 5:].max(1, keepdim=True)
x =, conf, j.float()), 1)[conf.view(-1) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
# Apply finite constraint
# if not torch.isfinite(x).all():
# x = x[torch.isfinite(x).all(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
if i.shape[0] > max_det: # limit detections
i = i[:max_det]
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = self.box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] =, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if (time.time() - t) > time_limit:
print(f'WARNING: NMS time limit {time_limit}s exceeded')
break # time limit exceeded
return output
def clip_coords(self, boxes, img_shape):
# Clip bounding xyxy bounding boxes to image shape (height, width)
boxes[:, 0].clamp_(0, img_shape[1]) # x1
boxes[:, 1].clamp_(0, img_shape[0]) # y1
boxes[:, 2].clamp_(0, img_shape[1]) # x2
boxes[:, 3].clamp_(0, img_shape[0]) # y2
def scale_coords(self, img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding
coords[:, :4] /= gain
self.clip_coords(coords, img0_shape)
return coords
def plot_one_box(self, x, img, color=None, label=None, line_thickness=3):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
def infer(self, image, agnostic_nms=False, draw_flag=False):
# read image
# image=cv2.imread(img_path)
# Padded resize
img = self.letterbox(image, self.imgsz, stride=self.stride)[0]
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
img = torch.from_numpy(img).to(self.device)
img = img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
pred = self.model(img, augment=False)[0]
# Apply NMS
pred = self.non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=None, agnostic=agnostic_nms)
# Process detections
s = ""
s += '%gx%g ' % img.shape[2:] # print string
result = []
for i, det in enumerate(pred): # detections per image
# Rescale boxes from img_size to im0 size
det[:, :4] = self.scale_coords(img.shape[2:], det[:, :4], image.shape).round()
for *xyxy, conf, cls in reversed(det):
x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3])
result.append([x1, y1, x2, y2])
if draw_flag:
names = self.model.module.names if hasattr(self.model, 'module') else self.model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
for *xyxy, conf, cls in reversed(det):
label = f'{names[int(cls)]} {conf:.2f}'
self.plot_one_box(xyxy, image, label=label, color=colors[int(cls)], line_thickness=3)
# Print results
# for c in det[:, -1].unique():
# n = (det[:, -1] == c).sum() # detections per class
# s += f"{n}{'s' * (n > 1)}, " # add to string
# Write results
# Get names and colors
# 显示预测结果
# print(s)
# print(result)
# cv2.namedWindow("result",0)
# cv2.imshow("result", image)
# cv2.waitKey(0) # 1 millisecond
# return image
# 后处理
return result

View File

@ -1,5 +1,5 @@
import time
from mem_top import mem_top
from pymouse import PyMouse
import cv2
import mediapipe as mp
import math
@ -12,8 +12,9 @@ import util
class Identify:
def __init__(self, v):
self.v = v
def __init__(self, value, array):
self.value = value
self.array = array
self.left_hand_flag = False
self.right_hand_flag = False
self.result = 0
@ -32,7 +33,8 @@ class Identify:
self.last_control_flag = 0
self.page_up_count = 0
self.page_down_count = 0
self.step = 0
self.step_up = 0
self.step_down = 0
self.last_wrist_point = (0, 0)
self.now_time = 0
self.lase_time = 0
@ -44,6 +46,8 @@ class Identify:
self.mp_face =
self.face_detection = self.mp_face.FaceDetection(min_detection_confidence=0.5)
def begin(self):
capture = cv2.VideoCapture(0)
@ -68,6 +72,10 @@ class Identify:
if flag:
self.flag = flag
now_time = time.time()
self.array[0] = self.position_x
self.array[1] = self.position_y
self.array[2] = self.image_width
self.array[3] = self.image_height
if now_time - last_time < 1:
last_time = now_time
@ -81,13 +89,16 @@ class Identify:
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
# control_flag = flag
self.v.value = self.flag
self.value.value = self.flag
# print("self.v.value = " + str(self.flag))
print("final_control_flag = " + str(self.flag))
self.flag = 0
# def face_detect(self):
# results = self.face_detection.process(self.rgb_image)
def deal_with_image(self):
self.image = cv2.flip(self.image, 1)
self.rgb_image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)
@ -103,10 +114,11 @@ class Identify:
self.image_height, self.image_width, c = self.image.shape
cx, cy = int(hand_landmarks.x * self.image_width), int(hand_landmarks.y * self.image_height)
self.left_hand_points.append((cx, cy))
self.is_identify = True
self.image, self.identify_results.multi_hand_landmarks[i], self.mp_hands.HAND_CONNECTIONS)
# self.mp_drawing.draw_landmarks(
# self.image, self.identify_results.multi_hand_landmarks[i], self.mp_hands.HAND_CONNECTIONS)
if self.identify_results.multi_handedness[i].classification[0].score > 0.5:
self.left_hand_flag = True
self.is_identify = True
for hand_landmarks in self.identify_results.multi_hand_landmarks[i].landmark:
@ -115,9 +127,11 @@ class Identify:
cx, cy = int(hand_landmarks.x * self.image_width), int(hand_landmarks.y * self.image_height)
self.right_hand_points.append((cx, cy))
self.is_identify = True
self.image, self.identify_results.multi_hand_landmarks[i], self.mp_hands.HAND_CONNECTIONS)
# self.mp_drawing.draw_landmarks(
# self.image, self.identify_results.multi_hand_landmarks[i], self.mp_hands.HAND_CONNECTIONS)
if self.identify_results.multi_handedness[i].classification[0].score > 0.5:
self.right_hand_flag = True
self.is_identify = True
def hand_angle(self):
@ -171,6 +185,12 @@ class Identify:
not self.is_finger_straight[3] and not self.is_finger_straight[4]
def judge_one(self):
if self.left_hand_flag:
self.position_x = self.left_hand_points[8][0]
self.position_y = self.left_hand_points[8][1]
elif self.right_hand_flag:
self.position_x = self.right_hand_points[8][0]
self.position_y = self.right_hand_points[8][1]
return self.is_finger_straight[1] and not self.is_finger_straight[2] and \
not self.is_finger_straight[3] and not self.is_finger_straight[4]
@ -190,51 +210,6 @@ class Identify:
return self.is_finger_straight[1] and self.is_finger_straight[2] and \
self.is_finger_straight[3] and self.is_finger_straight[4]
# def judge_five(self):
# self.hand_angle()
# return util.Util.is_straight(self.angle_list[1]) and util.Util.is_straight(
# self.angle_list[2]) and util.Util.is_straight(self.angle_list[3]) and util.Util.is_straight(
# self.angle_list[4])
# def judge_open(self):
# self.hand_angle()
# # angle_ = vector_2d_angle(
# # ((int(points[0][0]) - int(points[5][0])), (int(points[0][1]) - int(points[5][1]))),
# # ((int(points[5][0]) - int(points[8][0])), (int(points[5][1]) - int(points[8][1])))
# # )
# return not util.Util.is_straight(self.angle_list[1]) and util.Util.is_straight(
# self.angle_list[2]) and util.Util.is_straight(self.angle_list[3]) and util.Util.is_straight(
# self.angle_list[4])
# def judge_up(self):
# self.hand_angle()
# angle_ = util.Util.vector_2d_angle(
# ((int(self.hand_points[0][0]) - int(self.hand_points[5][0])),
# (int(self.hand_points[0][1]) - int(self.hand_points[5][1]))),
# ((int(self.hand_points[5][0]) - int(self.hand_points[8][0])),
# (int(self.hand_points[5][1]) - int(self.hand_points[8][1])))
# )
# return util.Util.is_straight(
# self.angle_list[1] and not util.Util.is_straight(self.angle_list[2]) and not util.Util.is_straight(
# self.angle_list[3]) and not util.Util.is_straight(self.angle_list[4])) and angle_ <= 40
# def judge_down(self):
# self.hand_angle()
# return util.Util.is_straight(self.angle_list[1]) and util.Util.is_straight(
# self.angle_list[2]) and not util.Util.is_straight(self.angle_list[3]) and not util.Util.is_straight(
# self.angle_list[4])
# def judge_end(self):
# self.hand_angle()
# return not util.Util.is_straight(self.angle_list[1]) and not util.Util.is_straight(
# self.angle_list[2]) and not util.Util.is_straight(self.angle_list[3]) and not util.Util.is_straight(
# self.angle_list[4])
# def judge_one(self):
# self.hand_angle()
# return util.Util.is_straight(self.angle_list[1]) and not util.Util.is_straight(
# self.angle_list[2]) and not util.Util.is_straight(self.angle_list[3]) and not util.Util.is_straight(
# self.angle_list[4])
def judge_step_one(self, is_left):
if is_left:
if self.judge_five() and self.left_hand_points[8][0] < self.left_hand_points[0][0] and \
@ -265,19 +240,30 @@ class Identify:
return True
return False
def judge_step_three(self):
if self.left_hand_flag:
if self.left_hand_points[20][1] < self.left_hand_points[0][1]:
return True
if self.right_hand_points[20][1] < self.right_hand_points[0][1]:
return True
return False
def judge_page_up(self):
if not self.right_hand_flag:
return False
if self.step == 0:
if self.step_up == 0:
self.lase_time = time.time()
if self.step == 0 and self.judge_step_one(False):
self.step = 1
elif self.step == 1 and self.judge_step_two(False):
self.step = 3
elif self.step == 2 and self.judge_zero():
self.step = 3
elif self.step == 3:
self.step = 0
if self.step_up == 0 and self.judge_step_three():
self.step_up = 4
if self.step_up == 4 and self.judge_step_one(False):
self.step_up = 1
elif self.step_up == 1 and self.judge_step_two(False):
self.step_up = 3
elif self.step_up == 2 and self.judge_zero():
self.step_up = 3
elif self.step_up == 3:
self.step_up = 0
now_time = time.time()
if now_time - self.lase_time < 3:
self.lase_time = now_time
@ -290,16 +276,21 @@ class Identify:
def judge_page_down(self):
if not self.left_hand_flag:
return False
if self.step == 0:
if self.step_down == 0:
self.lase_time = time.time()
if self.step == 0 and self.judge_step_one(True):
self.step = 1
elif self.step == 1 and self.judge_step_two(True):
self.step = 3
elif self.step == 2 and self.judge_zero():
self.step = 3
elif self.step == 3:
self.step = 0
if self.step_down == 0 and self.judge_step_three():
self.step_down = 4
print("step = 1")
if self.step_down == 4 and self.judge_step_one(True):
self.step_down = 1
print("step = 2")
elif self.step_down == 1 and self.judge_step_two(True):
self.step_down = 3
print("step = 3")
elif self.step_down == 2 and self.judge_zero():
self.step_down = 3
elif self.step_down == 3:
self.step_down = 0
now_time = time.time()
if now_time - self.lase_time < 3:
self.lase_time = now_time
@ -309,16 +300,24 @@ class Identify:
return False
return False
def judge_end(self):
if self.left_hand_flag and self.right_hand_flag and self.judge_zero():
return True
return False
def judge_control(self):
if self.is_identify:
if self.judge_one():
if self.judge_two():
return 1
elif self.judge_page_up():
return 2
elif self.judge_page_down():
# print("down!down!down!down!down!down!down!down!down!down!down!down!down!down!down!down!down!down!")
return 3
elif self.judge_zero():
elif self.judge_one():
return 4
elif self.judge_zero():
return 5

View File

@ -1,29 +1,12 @@
import multiprocessing
import tkinter
import cv2
import identify
from identify import *
from tkinter.filedialog import *
import threading
# import pymouse
from tkinter import *
from tkinter import ttk
import tkinter.filedialog as dir
import queue
from tkinter.messagebox import showinfo
import win32com
from win32com.client import Dispatch
from pptx import Presentation
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import win32con
import win32api
import time
import os
import string
window = tkinter.Tk()
@ -72,10 +55,14 @@ def control_ppt_end():
def control_open_pencil():
win32api.keybd_event(17, 0, 0, 0) # 代表按下f键
win32api.keybd_event(80, 0, 0, 0) # 代表按下f键
win32api.keybd_event(82, 0, 0, 0) # 代表按下f键
win32api.keybd_event(17, 0, win32con.KEYEVENTF_KEYUP, 0) # 释放f键
win32api.keybd_event(80, 0, win32con.KEYEVENTF_KEYUP, 0) # 释放f键
win32api.keybd_event(82, 0, win32con.KEYEVENTF_KEYUP, 0) # 释放f键
def control_writing(m, position_x, position_y):
m.move(position_x, position_y)
def control_draw():
@ -93,17 +80,33 @@ def control_draw():
def control_thread(v, flag):
def control_thread(value, array):
last_time = 0.0
step = 0
mouse = PyMouse()
while 1:
# print("step = " + str(step))
# print("control_thread")
now_time = time.time()
# time.sleep(0.1)
# print(end - start)
control_flag = v.value
# print("***********array************")
# print(array)
# print("***********array************")
control_flag = value.value
if step == 2:
if control_flag == 4:
# print("inininininininininininininininininininininininin")
position_x = int(mouse.screen_size()[0] / array[2] * array[0])
position_y = int(mouse.screen_size()[1] / array[3] * array[1])
control_writing(mouse, position_x, position_y)
elif control_flag == 5:
step = 1
last_time = now_time
# print("step to 11111111111111111")
if now_time - last_time < 1:
last_time = now_time
@ -113,6 +116,7 @@ def control_thread(v, flag):
# time.sleep(0.05)
# start = time.time()
print("control_flag = " + str(control_flag))
# print("main.step = " + str(step))
if step == 0 and control_flag == 1:
# print("control_flag == 1")
@ -121,11 +125,15 @@ def control_thread(v, flag):
if control_flag == 2:
# print("control_flag == 2")
if control_flag == 3:
elif control_flag == 3:
# print("control_flag == 3")
if control_flag == 4:
elif control_flag == 4:
step = 2
elif control_flag == 5:
# print("end!end!end!end!end!end!end!end!end!end!end!")
# print("control_flag == 4")
step = 0
# if control_flag == 5:
@ -134,23 +142,32 @@ def control_thread(v, flag):
# control_draw()
def identify_thread(v, flag):
identify = Identify(v)
def identify_thread(value, array):
identify = Identify(value, array)
def show_thread(value, array):
return 0
def open_file():
file_path = askopenfilename(title=u'选择文件', initialdir=(os.path.expanduser('H:/')))
file_path = askopenfilename(title=u'选择文件')
# file_path = askopenfilename(title=u'选择文件', initialdir=(os.path.expanduser('H:/')))
v = multiprocessing.Value('i', 0)
p1 = multiprocessing.Process(target=identify_thread, args=(v, 0))
p2 = multiprocessing.Process(target=control_thread, args=(v, 0))
value = multiprocessing.Value('i', 0)
array = multiprocessing.Array('i', 4)
# array2 = multiprocessing.Array()
p1 = multiprocessing.Process(target=identify_thread, args=(value, array))
p2 = multiprocessing.Process(target=control_thread, args=(value, array))
# p3 = multiprocessing.Process(target=show_thread, args=(value, array))
# p3.start()
# identify_t = threading.Thread(target=identify_thread)
# # print("control_flag1 = " + str(control_flag))
# print("control_flag1 = " + str(control_flag))
# control_t = threading.Thread(target=control_thread)
# # print("control_flag2 = " + str(control_flag))
# print("control_flag2 = " + str(control_flag))
# identify_t.setDaemon(True)
# control_t.setDaemon(True)
# identify_t.start()
@ -161,7 +178,7 @@ def open_ppt(file_path):
ppt = Dispatch('PowerPoint.Application')
ppt.Visible = 1 # 后台运行
ppt.DisplayAlerts = 0 # 不显示,不警告
# pptSel = ppt.Presentations.Open(file_path)
if __name__ == '__main__':

@ -0,0 +1,469 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Common modules
import logging
import math
import warnings
from copy import copy
from pathlib import Path
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp
from utils.datasets import exif_transpose, letterbox
from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, save_one_box, \
scale_coords, xyxy2xywh
from utils.plots import Annotator, colors
from utils.torch_utils import time_sync
LOGGER = logging.getLogger(__name__)
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(
def forward_fuse(self, x):
return self.act(self.conv(x))
class DWConv(Conv):
# Depth-wise convolution class
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
class TransformerLayer(nn.Module):
# Transformer layer (LayerNorm layers removed for better performance)
def __init__(self, c, num_heads):
self.q = nn.Linear(c, c, bias=False)
self.k = nn.Linear(c, c, bias=False)
self.v = nn.Linear(c, c, bias=False) = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
self.fc1 = nn.Linear(c, c, bias=False)
self.fc2 = nn.Linear(c, c, bias=False)
def forward(self, x):
x =, self.k(x), self.v(x))[0] + x
x = self.fc2(self.fc1(x)) + x
return x
class TransformerBlock(nn.Module):
# Vision Transformer
def __init__(self, c1, c2, num_heads, num_layers):
self.conv = None
if c1 != c2:
self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
self.c2 = c2
def forward(self, x):
if self.conv is not None:
x = self.conv(x)
b, _, w, h = x.shape
p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
return + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class BottleneckCSP(nn.Module):
# CSP Bottleneck
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1) = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.LeakyReLU(0.1, inplace=True)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(, y2), dim=1))))
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
def forward(self, x):
return self.cv3(, self.cv2(x)), dim=1))
class C3TR(C3):
# C3 module with TransformerBlock()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = TransformerBlock(c_, c_, 4, n)
class C3SPP(C3):
# C3 module with SPP()
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = SPP(c_, c_, k)
class C3Ghost(C3):
# C3 module with GhostBottleneck()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels
self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])
class SPP(nn.Module):
# Spatial Pyramid Pooling (SPP) layer
def __init__(self, c1, c2, k=(5, 9, 13)):
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
return self.cv2([x] + [m(x) for m in self.m], 1))
class SPPF(nn.Module):
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * 4, c2, 1, 1)
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2([x, y1, y2, self.m(y2)], 1))
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
# self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
return self.conv([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
# return self.conv(self.contract(x))
class GhostConv(nn.Module):
# Ghost Convolution
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
def forward(self, x):
y = self.cv1(x)
return[y, self.cv2(y)], 1)
class GhostBottleneck(nn.Module):
# Ghost Bottleneck
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
c_ = c2 // 2
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
def __init__(self, gain=2):
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
s = self.gain
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
def __init__(self, gain=2):
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
s = self.gain
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
class Concat(nn.Module):
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
self.d = dimension
def forward(self, x):
return, self.d)
class AutoShape(nn.Module):
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
classes = None # (optional list) filter by class
multi_label = False # NMS multiple labels per box
max_det = 1000 # maximum number of detections per image
def __init__(self, model):
self.model = model.eval()
def autoshape(self):'AutoShape already enabled, skipping... ') # model already converted to model.autoshape()
return self
def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn)
m = self.model.model[-1] # Detect()
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
def forward(self, imgs, size=640, augment=False, profile=False):
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
# URI: = ''
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
# PIL: ='image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
# numpy: = np.zeros((640,1280,3)) # HWC
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
# multiple: = ['image1.jpg'),'image2.jpg'), ...] # list of images
t = [time_sync()]
p = next(self.model.parameters()) # for device and type
if isinstance(imgs, torch.Tensor): # torch
with amp.autocast(enabled=p.device.type != 'cpu'):
return self.model(, augment, profile) # inference
# Pre-process
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(imgs):
f = f'image{i}' # filename
if isinstance(im, (str, Path)): # filename or uri
im, f =, stream=True).raw if str(im).startswith('http') else im), im
im = np.asarray(exif_transpose(im))
elif isinstance(im, Image.Image): # PIL Image
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
s = im.shape[:2] # HWC
shape0.append(s) # image shape
g = (size / max(s)) # gain
shape1.append([y * g for y in s])
imgs[i] = im if else np.ascontiguousarray(im) # update
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
with amp.autocast(enabled=p.device.type != 'cpu'):
# Inference
y = self.model(x, augment, profile)[0] # forward
# Post-process
y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes,
multi_label=self.multi_label, max_det=self.max_det) # NMS
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])
return Detections(imgs, y, files, t, self.names, x.shape)
class Detections:
# YOLOv5 detections class for inference results
def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
d = pred[0].device # device
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
self.imgs = imgs # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names
self.files = files # image filenames
self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred) # number of images (batch size)
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
self.s = shape # inference BCHW shape
def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
crops = []
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
if pred.shape[0]:
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if show or save or render or crop:
annotator = Annotator(im, example=str(self.names))
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
label = f'{self.names[int(cls)]} {conf:.2f}'
if crop:
file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
crops.append({'box': box, 'conf': conf, 'cls': cls, 'label': label,
'im': save_one_box(box, im, file=file, save=save)})
else: # all others
annotator.box_label(box, label, color=colors(cls))
im =
s += '(no detections)'
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
if pprint:', '))
if show:[i]) # show
if save:
f = self.files[i] / f) # save
if i == self.n - 1:"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
if render:
self.imgs[i] = np.asarray(im)
if crop:
if save:'Saved results to {save_dir}\n')
return crops
def print(self):
self.display(pprint=True) # print results'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
def show(self):
self.display(show=True) # show results
def save(self, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
self.display(save=True, save_dir=save_dir) # save results
def crop(self, save=True, save_dir='runs/detect/exp'):
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
return self.display(crop=True, save=save, save_dir=save_dir) # crop results
def render(self):
self.display(render=True) # render results
return self.imgs
def pandas(self):
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
new = copy(self) # return copy
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
return new
def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():'
x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
for d in x:
for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
setattr(d, k, getattr(d, k)[0]) # pop out of list
return x
def __len__(self):
return self.n
class Classify(nn.Module):
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
self.flat = nn.Flatten()
def forward(self, x):
z =[self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
return self.flat(self.conv(z)) # flatten to x(b,c2)

models/ Normal file
View File

@ -0,0 +1,119 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
Experimental modules
import numpy as np
import torch
import torch.nn as nn
from models.common import Conv
from utils.downloads import attempt_download
class CrossConv(nn.Module):
# Cross Convolution Downsample
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, (1, k), (1, s))
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class Sum(nn.Module):
# Weighted sum of 2 or more layers
def __init__(self, n, weight=False): # n: number of inputs
self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object
if weight:
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
def forward(self, x):
y = x[0] # no weight
if self.weight:
w = torch.sigmoid(self.w) * 2
for i in self.iter:
y = y + x[i + 1] * w[i]
for i in self.iter:
y = y + x[i + 1]
return y
class MixConv2d(nn.Module):
# Mixed Depth-wise Conv
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
groups = len(k)
if equal_ch: # equal c_ per group
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
else: # equal weight.numel() per group
b = [c2] + [0] * groups
a = np.eye(groups + 1, groups, k=-1)
a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2
a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) = nn.BatchNorm2d(c2)
self.act = nn.LeakyReLU(0.1, inplace=True)
def forward(self, x):
return x + self.act([m(x) for m in self.m], 1)))
class Ensemble(nn.ModuleList):
# Ensemble of models
def __init__(self):
def forward(self, x, augment=False, profile=False, visualize=False):
y = []
for module in self:
y.append(module(x, augment, profile, visualize)[0])
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y =, 1) # nms ensemble
return y, None # inference, train output
def attempt_load(weights, map_location=None, inplace=True, fuse=True):
from models.yolo import Detect, Model
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(attempt_download(w), map_location=map_location) # load
if fuse:
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval()) # without layer fuse
# Compatibility updates
for m in model.modules():
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
m.inplace = inplace # pytorch 1.7.0 compatibility
if type(m) is Detect:
if not isinstance(m.anchor_grid, list): # new Detect Layer compatibility
delattr(m, 'anchor_grid')
setattr(m, 'anchor_grid', [torch.zeros(1)] *
elif type(m) is Conv:
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
if len(model) == 1:
return model[-1] # return model
print(f'Ensemble created with {weights}\n')
for k in ['names']:
setattr(model, k, getattr(model[-1], k))
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
return model # return ensemble

Some files were not shown because too many files have changed in this diff Show More