6f4585c3 · 6f4585c3 · 6f4585c3 · 6f4585c3 · 6f4585c3 · 6f4585c3
7 changed file
--- a/datadeal/deepfasion2_to_yolo_detect_seg_small.py
+++ b/datadeal/deepfasion2_to_yolo_detect_seg_small.py
@@ -19,13 +19,13 @@ def listPathAllfiles(dirname):


 if __name__ == '__main__':
-    annos_path = r"F:\Deepfashion2\validation\annos"  # 改成需要路径
-    image_path = r"F:\Deepfashion2\validation\images"  # 改成需要路径
-    labels_path = r"F:\Deepfashion2\validation\labels"  # 改成需要路径
+    annos_path = r"E:\BaiduNetdiskDownload\Deepfashion2\train_divided\train\annos"  # 改成需要路径
+    image_path = r"E:\BaiduNetdiskDownload\Deepfashion2\train_divided\train\image"  # 改成需要路径
+    #labels_path = r"F:\Deepfashion2\validation\labels"  # 改成需要路径

-    save_image_path="F:/Deepfashion2/validation/four_small_detect/images/"#图片保存位置
-    save_detect_path="F:/Deepfashion2/validation/four_small_detect/labels1/"#检测保存位置
-    save_seg_path="F:/Deepfashion2/validation/four_small_detect/labels2"#分割保存位置
+    save_image_path="E:/BaiduNetdiskDownload/Deepfashion2/train_divided/four_small_datasets/images/"#图片保存位置
+    save_detect_path="E:/BaiduNetdiskDownload/Deepfashion2/train_divided/four_small_datasets/labels1/"#检测保存位置
+    save_seg_path="E:/BaiduNetdiskDownload/Deepfashion2/train_divided/four_small_datasets/labels2"#分割保存位置

    num_images = len(os.listdir(annos_path))

@@ -49,7 +49,7 @@ if __name__ == '__main__':
                    box = temp[i]['segmentation']
                    category_id = int(temp[i]['category_id'] - 1)
                    if count[category_id]<nums_set:
-                        print(count[category_id])
+                        #print(count[category_id])
                        count[category_id]=count[category_id]+1
                        list_nums=[]
                        list_nums.append(str(category_id))

--- a/firecode/YOLOUI.py
+++ b/firecode/YOLOUI.py
+# -*- coding: utf-8 -*-
+
+# Form implementation generated from reading ui file 'miandemo2.ui'
+#
+# Created by: PyQt5 UI code generator 5.15.9
+#
+# WARNING: Any manual changes made to this file will be lost when pyuic5 is
+# run again.  Do not edit this file unless you know what you are doing.
+
+
+from PyQt5 import QtCore, QtGui, QtWidgets
+
+
+class Ui_MainWindow(object):
+    def setupUi(self, MainWindow):
+        MainWindow.setObjectName("MainWindow")
+        MainWindow.resize(1121, 867)
+        self.centralwidget = QtWidgets.QWidget(MainWindow)
+        self.centralwidget.setObjectName("centralwidget")
+        self.btndeal = QtWidgets.QGroupBox(self.centralwidget)
+        self.btndeal.setGeometry(QtCore.QRect(30, 50, 161, 591))
+        self.btndeal.setObjectName("btndeal")
+        self.layoutWidget = QtWidgets.QWidget(self.btndeal)
+        self.layoutWidget.setGeometry(QtCore.QRect(10, 20, 141, 551))
+        self.layoutWidget.setObjectName("layoutWidget")
+        self.verticalLayout = QtWidgets.QVBoxLayout(self.layoutWidget)
+        self.verticalLayout.setContentsMargins(0, 0, 0, 0)
+        self.verticalLayout.setObjectName("verticalLayout")
+        self.file_deal_btn = QtWidgets.QPushButton(self.layoutWidget)
+        self.file_deal_btn.setObjectName("file_deal_btn")
+        self.verticalLayout.addWidget(self.file_deal_btn)
+        self.xml_to_txt_btn = QtWidgets.QPushButton(self.layoutWidget)
+        self.xml_to_txt_btn.setObjectName("xml_to_txt_btn")
+        self.verticalLayout.addWidget(self.xml_to_txt_btn)
+        self.data_spilt_btn = QtWidgets.QPushButton(self.layoutWidget)
+        self.data_spilt_btn.setObjectName("data_spilt_btn")
+        self.verticalLayout.addWidget(self.data_spilt_btn)
+        self.open_came_btn = QtWidgets.QPushButton(self.layoutWidget)
+        self.open_came_btn.setObjectName("open_came_btn")
+        self.verticalLayout.addWidget(self.open_came_btn)
+        self.open_video_btn = QtWidgets.QPushButton(self.layoutWidget)
+        self.open_video_btn.setObjectName("open_video_btn")
+        self.verticalLayout.addWidget(self.open_video_btn)
+        self.open_file_btn = QtWidgets.QPushButton(self.layoutWidget)
+        self.open_file_btn.setObjectName("open_file_btn")
+        self.verticalLayout.addWidget(self.open_file_btn)
+        self.data_stronger = QtWidgets.QPushButton(self.layoutWidget)
+        self.data_stronger.setObjectName("data_stronger")
+        self.verticalLayout.addWidget(self.data_stronger)
+        self.defect_show_group = QtWidgets.QGroupBox(self.centralwidget)
+        self.defect_show_group.setGeometry(QtCore.QRect(190, 50, 431, 591))
+        self.defect_show_group.setTitle("")
+        self.defect_show_group.setObjectName("defect_show_group")
+        self.defect_show_label = QtWidgets.QLabel(self.defect_show_group)
+        self.defect_show_label.setGeometry(QtCore.QRect(0, 0, 431, 591))
+        self.defect_show_label.setText("")
+        self.defect_show_label.setObjectName("defect_show_label")
+        self.defect_result_show_group = QtWidgets.QGroupBox(self.centralwidget)
+        self.defect_result_show_group.setGeometry(QtCore.QRect(620, 50, 481, 591))
+        self.defect_result_show_group.setTitle("")
+        self.defect_result_show_group.setObjectName("defect_result_show_group")
+        self.defect_result_show_label = QtWidgets.QLabel(self.defect_result_show_group)
+        self.defect_result_show_label.setGeometry(QtCore.QRect(0, 0, 481, 591))
+        self.defect_result_show_label.setObjectName("defect_result_show_label")
+        self.data_deal_info_show_group = QtWidgets.QGroupBox(self.centralwidget)
+        self.data_deal_info_show_group.setGeometry(QtCore.QRect(30, 630, 1071, 201))
+        self.data_deal_info_show_group.setTitle("")
+        self.data_deal_info_show_group.setObjectName("data_deal_info_show_group")
+        self.data_deal_info_show_label = QtWidgets.QLabel(self.data_deal_info_show_group)
+        self.data_deal_info_show_label.setGeometry(QtCore.QRect(10, 20, 1051, 151))
+        self.data_deal_info_show_label.setObjectName("data_deal_info_show_label")
+        MainWindow.setCentralWidget(self.centralwidget)
+        self.menubar = QtWidgets.QMenuBar(MainWindow)
+        self.menubar.setGeometry(QtCore.QRect(0, 0, 1121, 26))
+        self.menubar.setObjectName("menubar")
+        MainWindow.setMenuBar(self.menubar)
+        self.statusbar = QtWidgets.QStatusBar(MainWindow)
+        self.statusbar.setObjectName("statusbar")
+        MainWindow.setStatusBar(self.statusbar)
+
+        self.retranslateUi(MainWindow)
+        QtCore.QMetaObject.connectSlotsByName(MainWindow)
+
+    def retranslateUi(self, MainWindow):
+        _translate = QtCore.QCoreApplication.translate
+        MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
+        self.btndeal.setTitle(_translate("MainWindow", "文件处理按钮"))
+        self.file_deal_btn.setText(_translate("MainWindow", "minie处理"))
+        self.xml_to_txt_btn.setText(_translate("MainWindow", "xml_to_txt"))
+        self.data_spilt_btn.setText(_translate("MainWindow", "数据划分"))
+        self.open_came_btn.setText(_translate("MainWindow", "打开摄像头"))
+        self.open_video_btn.setText(_translate("MainWindow", "打开视频"))
+        self.open_file_btn.setText(_translate("MainWindow", "打开文件"))
+        self.data_stronger.setText(_translate("MainWindow", "数据增强"))
+        self.defect_result_show_label.setText(_translate("MainWindow", "检测结果展示图"))
+        self.data_deal_info_show_label.setText(_translate("MainWindow", "TextLabel"))
--- a/firecode/firedemo1.py
+++ b/firecode/firedemo1.py
+#
+import shutil
+import sys
+from PyQt5.QtWidgets import *
+from PyQt5 import QtWidgets, QtCore, QtGui
+import cv2
+import os
+from YOLOUI import Ui_MainWindow
+from ultralytics.yolo.utils import ROOT, ops
+from ultralytics.nn.tasks import attempt_load_weights
+import os
+import cv2
+import torch
+import numpy as np
+import sys
+from ultralytics import YOLO
+import supervision as sv
+model = YOLO("yolov8-fire.pt")
+
+confidence_threshold = 0.3
+
+# 自定义边框
+box_annotator = sv.BoxAnnotator(
+    thickness=2,
+    text_thickness=2,
+    text_scale=1
+)
+
+
+def open_frame():  #
+    ret, image = self.cap.read()
+    if ret:
+        image1 = image.copy()  # 对原始的图片进行备份，用于识别结果的显示
+        result = model(image1, agnostic_nms=True)[0]  # 获取识别的结果
+        detections = sv.Detections.from_yolov8(result)  # 识别结果的处理
+        xyxy = detections.xyxy
+        print(xyxy)
+
+        labels = [  # 设置显示的识别类型以及置信度
+            f"{model.model.names[class_id]} {confidence:{confidence_threshold}}"
+            for _, _, confidence, class_id, _
+            in detections
+        ]
+
+        realtime_confidence = [  # 实时检测结果置信度记录,用于传送
+            f"{confidence:{confidence_threshold}}"
+            for _, _, confidence, _, _
+            in detections
+        ]
+
+        frame = box_annotator.annotate(
+            scene=image1,
+            detections=detections,
+            labels=labels
+        )
+
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # 设置图片显示
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+
+        # 根据image创建PyQt中的图片
+        video_frame = QtGui.QImage(image.data, image.shape[1], image.shape[0],
+                                   QtGui.QImage.Format_RGB888)
+
+        image_frame = QtGui.QImage(frame.data, frame.shape[1], frame.shape[0],
+                                   QtGui.QImage.Format_RGB888)
+
+
+    else:
+        self.cap.release()
+        self.timer.stop()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
--- a/firecode/frame.jpg
+++ b/firecode/frame.jpg
--- a/firecode/mainui.py
+++ b/firecode/mainui.py
+import shutil
+import sys
+from PyQt5.QtWidgets import *
+from PyQt5 import QtWidgets, QtCore, QtGui
+import cv2
+import os
+from YOLOUI import Ui_MainWindow
+from ultralytics.yolo.utils import ROOT, ops
+from ultralytics.nn.tasks import attempt_load_weights
+import os
+import cv2
+import torch
+import numpy as np
+import sys
+from ultralytics import YOLO
+import supervision as sv
+import time
+import json
+import requests
+
+model = YOLO("yolov8n.pt")
+confidence_threshold = 0.7
+# 自定义边框
+box_annotator = sv.BoxAnnotator(
+    thickness=2,
+    text_thickness=2,
+    text_scale=1
+)
+
+#开始GUI可视化类操作
+class My_Ui(QMainWindow, Ui_MainWindow):
+    def __init__(self, parent=None):
+        super().__init__(parent)
+        self.setupUi(self)
+        self.cap = cv2.VideoCapture()#打开摄像头
+        self.timer = QtCore.QTimer()#设置定时器
+        self.open_came_btn.clicked.connect(self.open_came_btn_function)#打开摄像头按钮
+        self.open_file_btn.clicked.connect(self.open_file_btn_function)#打开文件按钮
+        self.open_video_btn.clicked.connect(self.open_video_btn_function)#打开视频按钮
+
+    def open_file_btn_function(self):#打开图片进行识别
+        pass
+
+    def open_video_btn_function(self):#打开视频文件函数操作
+        video_path, _ = QtWidgets.QFileDialog.getOpenFileName(self, '选择视频文件', '', '*.mp4')
+        if video_path == '':
+            return
+        self.cap.open(video_path)
+        self.timer.start(30)  # 1000/30就是我们视频的帧率
+        self.timer.timeout.connect(self.open_frame)
+
+
+    def open_came_btn_function(self): #打开摄像头函数操作
+        flag = self.cap.open(0)  # 打开第0号的摄像头
+        if flag is False:
+            QtWidgets.QMessageBox.warning(self, "Warning", "打开摄像头失败",
+                                          button=QtWidgets.QMessageBox.Ok,
+                                          defaultButton=QtWidgets.QMessageBox.Ok)
+        else:
+            self.data_deal_info_show_label.setText("摄像头失打开成功")
+            #设置每一秒钟进行一次识别
+            
+
+            self.timer.start(30)  # 1000/30就是我们视频的帧率
+            self.timer.timeout.connect(self.open_frame)
+
+    #打开摄像头函数中调用的open_frame函数操作
+    def open_frame(self):#
+        ret, image = self.cap.read()
+        if ret:
+            image1=image.copy()#对原始的图片进行备份，用于识别结果的显示
+            result=model(image1, agnostic_nms=True)[0]#获取识别的结果
+            detections = sv.Detections.from_yolov8(result)#识别结果的处理
+            # print(detections)输出查看识别结果的格式
+            # 根据实际的需要帅选符合要求的置信度结果
+            xyxy= detections.xyxy
+            confidence=detections.confidence
+            class_id=detections.class_id
+            new_xyxy=[]
+            new_confidence=[]
+            new_class_id=[]
+            for index in range(len(confidence)):
+                if float(confidence[index])>confidence_threshold:
+                    new_xyxy.append(xyxy[index])
+                    new_confidence.append(confidence[index])
+                    new_class_id.append(class_id[index])
+                else:
+                    continue
+            detections.xyxy=np.array(new_xyxy)
+            detections.confidence=np.array(new_confidence)
+            detections.class_id=np.array(new_class_id)
+            #print(detections)对符合要求的结果进行重新的组装
+
+            labels = [#设置显示的识别类型以及置信度
+                f"{model.model.names[class_id]} {confidence:{confidence_threshold}}"
+                for _, _, confidence, class_id, _
+                in detections
+            ]
+
+            frame = box_annotator.annotate(
+                scene=image1,
+                detections=detections,
+                labels=labels
+            )
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            # 设置图片显示
+            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+
+            # 根据image创建PyQt中的图片
+            video_frame = QtGui.QImage(image.data, image.shape[1], image.shape[0],
+                                       QtGui.QImage.Format_RGB888)
+
+            image_frame = QtGui.QImage(frame.data, frame.shape[1], frame.shape[0],
+                                       QtGui.QImage.Format_RGB888)
+
+            self.defect_show_label.setPixmap(QtGui.QPixmap(video_frame))
+            self.defect_result_show_label.setPixmap(QtGui.QPixmap(image_frame))
+            self.defect_show_label.setScaledContents(True)  # 图片自适应窗口大小
+            self.defect_result_show_label.setScaledContents(True)  # 图片自适应窗口大小
+
+        else:
+            self.cap.release()
+            self.timer.stop()
+
+
+    def  deal_open_frame(self):#这是open_frame函数的备份,其实就是将open_frame函数的原来的版本进行了备份。
+        pass
+
+    def test_open_frame(self):#这是一个用于测试的函数体
+        ret, image = self.cap.read()
+        if ret:
+            image1 = image.copy()  # 对原始的图片进行备份，用于识别结果的显示
+            result = model(image1, agnostic_nms=True)[0]  # 获取识别的结果
+            detections = sv.Detections.from_yolov8(result)  # 识别结果的处理
+
+            labels = [  # 设置显示的识别类型以及置信度
+                f"{model.model.names[class_id]} {confidence:{confidence_threshold}}"
+                for _, _, confidence, class_id, _
+                in detections
+            ]
+
+            realtime_confidence = [  # 实时检测结果置信度记录,用于传送
+                f"{confidence:{confidence_threshold}}"
+                for _, _, confidence, _, _
+                in detections
+            ]
+
+            frame = box_annotator.annotate(
+                scene=image1,
+                detections=detections,
+                labels=labels
+            )
+
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            # 设置图片显示
+            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            for index in range(len(realtime_confidence)):
+                if float(realtime_confidence[index]) > 0.3:
+                    # 检测到火情进行警报发送
+                    #1.读取数据局查看上次的发送时间戳
+                    #2.编辑传输的数据JSON格式
+                    #3.发送数据
+                    #4.更新时间戳
+                    '''
+                    timestamp = time.time()
+                    JSON_data = {
+                        "timestamp": timestamp,
+                        "image_frame": frame
+                    }
+                    # 将字典编码为JSON字符串
+                    json_data = json.dumps(JSON_data)
+                    # 发送POST请求并附加JSON数据
+                    response = requests.post(url, data=json_data, headers={'Content-Type': 'application/json'})
+                    # 如果需要，可以解码服务器响应中的JSON数据
+                    response_data = json.loads(response.text)'''
+                else:
+                   continue
+
+        else:
+            self.cap.release()
+            self.timer.stop()
+
+
+if __name__ == '__main__':
+    app = QApplication(sys.argv)
+    ui = My_Ui()
+    ui.show()
+    sys.exit(app.exec())
\ No newline at end of file
--- a/ultralytics/nn/modules/block.py
+++ b/ultralytics/nn/modules/block.py
@@ -802,3 +802,4 @@ class SwinV2_CSPB(nn.Module):
        y1 = self.m(x1)
        y2 = self.cv2(x1)
        return self.cv3(torch.cat((y1, y2), dim=1))
+
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@@ -19,6 +19,67 @@ from .utils import bias_init_with_prob, linear_init_
 __all__ = 'Detect', 'Segment', 'Pose', 'Classify', 'RTDETRDecoder','RTDETRDecoderViT'


+
+class Detect_DyHead(nn.Module):
+    """YOLOv8 Detect head with DyHead for detection models."""
+    dynamic = False  # force grid reconstruction
+    export = False  # export mode
+    shape = None
+    anchors = torch.empty(0)  # init
+    strides = torch.empty(0)  # init
+
+    def __init__(self, nc=80, hidc=256, block_num=2, ch=()):  # detection layer
+        super().__init__()
+        self.nc = nc  # number of classes
+        self.nl = len(ch)  # number of detection layers
+        self.reg_max = 16  # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
+        self.no = nc + self.reg_max * 4  # number of outputs per anchor
+        self.stride = torch.zeros(self.nl)  # strides computed during build
+        c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc)  # channels
+        self.conv = nn.ModuleList(nn.Sequential(Conv(x, hidc, 1)) for x in ch)
+        self.dyhead = nn.Sequential(*[DyHeadBlock(hidc) for i in range(block_num)])
+        self.cv2 = nn.ModuleList(
+            nn.Sequential(Conv(hidc, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for _ in ch)
+        self.cv3 = nn.ModuleList(nn.Sequential(Conv(hidc, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for _ in ch)
+        self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
+
+    def forward(self, x):
+        """Concatenates and returns predicted bounding boxes and class probabilities."""
+        for i in range(self.nl):
+            x[i] = self.conv[i](x[i])
+        x = self.dyhead(x)
+        shape = x[0].shape  # BCHW
+        for i in range(self.nl):
+            x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
+        if self.training:
+            return x
+        elif self.dynamic or self.shape != shape:
+            self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
+            self.shape = shape
+
+        x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
+        if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'):  # avoid TF FlexSplitV ops
+            box = x_cat[:, :self.reg_max * 4]
+            cls = x_cat[:, self.reg_max * 4:]
+        else:
+            box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
+        dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
+        y = torch.cat((dbox, cls.sigmoid()), 1)
+        return y if self.export else (y, x)
+
+    def bias_init(self):
+        """Initialize Detect() biases, WARNING: requires stride availability."""
+        m = self  # self.model[-1]  # Detect() module
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
+        # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # nominal class frequency
+        for a, b, s in zip(m.cv2, m.cv3, m.stride):  # from
+            a[-1].bias.data[:] = 1.0  # box
+            b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
+
+
+
+
+
 class Detect(nn.Module):
    """YOLOv8 Detect head for detection models."""
    dynamic = False  # force grid reconstruction
@@ -368,16 +429,25 @@ import torch.nn as nn
 # ...您的其他导入...

 class PatchEmbed(nn.Module):
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+    """ 2D Image to Patch Embedding """
+    def __init__(self, in_chans=256, embed_dim=768, patch_size=16):
        super().__init__()
-        self.img_size = img_size
        self.patch_size = patch_size
-        self.num_patches = (img_size // patch_size) * (img_size // patch_size)
        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+        self.num_patches = None  # It will be dynamically updated

    def forward(self, x):
-        x = self.proj(x).flatten(2).transpose(1, 2)
-        return x
+        """ x: [B, C, H, W] """
+        B, C, H, W = x.shape
+       # assert H % self.patch_size == 0 and W % self.patch_size == 0, f"Image dimensions ({H}*{W}) should be divisible by the patch size ({self.patch_size})"
+        # [B, embed_dim, H//patch_size, W//patch_size]
+        x = self.proj(x)
+        # [B, embed_dim, -1]
+        x = x.flatten(2)
+        # [B, -1, embed_dim] -> [B, num_patches, embed_dim]
+        x = x.transpose(1, 2)
+        self.num_patches = x.shape[1]
+        return x, self.num_patches


 class RTDETRDecoderViT(RTDETRDecoder):
@@ -386,14 +456,16 @@ class RTDETRDecoderViT(RTDETRDecoder):

        embed_dim = self.hidden_dim
        self.patch_embed = PatchEmbed(embed_dim=embed_dim)
-        self.pos_embed = nn.Parameter(torch.zeros(1, self.patch_embed.num_patches + 1, embed_dim))
+        # Use a dummy tensor to get num_patches
+        _, num_patches = self.patch_embed(torch.zeros(256, 256, 16, 16))  # Use the expected image size
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))

    def _get_encoder_input(self, x):
        # Original feature extraction
        feats, shapes = super(RTDETRDecoderViT, self)._get_encoder_input(x)

-        # ViT Patch Embedding
-        vit_feats = self.patch_embed(x)
+        # ViT Patch Embedding (using last tensor in x list for ViT embedding)
+        vit_feats = self.patch_embed(x[-1])
        vit_feats = vit_feats + self.pos_embed[:, :vit_feats.size(1), :]

        # Concatenate original features with ViT embeddings