恋爱网页生成_工程外包平台_成都seo公司_百度搜索推广开户

Modify_json_ImagePath

import json
import os
import shutil
from pathlib import Path
from typing import Uniondef modify_image_paths(base_dir: Union[str, Path],image_relative_path: str = "../images",backup: bool = True,verbose: bool = True
) -> None:"""批量修改JSON文件中的imagePath字段参数说明：base_dir       - 需要处理的根目录路径image_relative_path - 图片新相对路径（相对于JSON文件）backup         - 是否创建备份文件（默认True）verbose        - 显示处理进度（默认True）"""base_dir = Path(base_dir)if not base_dir.exists():raise FileNotFoundError(f"目录不存在: {base_dir}")# 创建备份目录backup_dir = base_dir.parent / f"{base_dir.name}_backup"if backup:backup_dir.mkdir(exist_ok=True)# 遍历所有JSON文件json_files = list(base_dir.glob("**/*.json"))total_files = len(json_files)for idx, json_path in enumerate(json_files, 1):try:# 创建备份文件if backup:backup_path = backup_dir / json_path.relative_to(base_dir)backup_path.parent.mkdir(parents=True, exist_ok=True)shutil.copy2(json_path, backup_path)  # 保留元数据# 读取并修改JSONwith open(json_path, 'r+', encoding='utf-8') as f:data = json.load(f)# 生成新的imagePath（支持两种模式）if image_relative_path == "@filename":# 模式1：根据JSON文件名生成（如 group_0005.json → group_0005.jpg）new_path = f"{json_path.stem}.jpg"else:# 模式2：指定相对路径 + 原文件名new_path = Path(image_relative_path) / Path(data["imagePath"]).name# 路径标准化处理data["imagePath"] = str(new_path).replace("\\", "/")# 回写文件f.seek(0)json.dump(data, f, indent=4, ensure_ascii=False)f.truncate()if verbose:print(f"[{idx}/{total_files}] ✅ 已更新: {json_path} → {data['imagePath']}")except KeyError:print(f"⚠️ 文件缺少imagePath字段: {json_path}")except json.JSONDecodeError as e:print(f"❌ JSON解析失败: {json_path}\n错误详情: {str(e)}")except Exception as e:print(f"⛔ 未知错误: {type(e).__name__} - {str(e)}")if __name__ == "__main__":# ================= 配置区 =================INPUT_DIR = "/home/liweijia/Data/Mango_1000_Split/val/annotations"  # 需要修改的JSON文件所在目录NEW_RELATIVE_PATH = "@filename"  # 或使用"@filename"根据JSON文件名生成# ================= 执行修改 =================modify_image_paths(base_dir=INPUT_DIR,image_relative_path=NEW_RELATIVE_PATH,backup=True,verbose=True)

Coco_format

import os
import json
import shutil
from PIL import Image# 原始数据路径
data_images_dir = '/home/liweijia/Data/Mango_1000_Split/test/images'
data_annotations_dir = '/home/liweijia/Data/Mango_1000_Split/test/annotations'# 输出路径
output_dir = '/home/liweijia/Data/Mango_1000_Aug_COCO'
output_images_dir = os.path.join(output_dir, 'test/images')
os.makedirs(output_images_dir, exist_ok=True)# 初始化 COCO 数据结构
coco_data = {"images": [],"annotations": [],"categories": []
}
category_map = {}
category_id = 1
annotation_id = 1
image_id = 1# 遍历所有 JSON 文件
for json_file in os.listdir(data_annotations_dir):if not json_file.endswith('.json'):continuejson_path = os.path.join(data_annotations_dir, json_file)with open(json_path, 'r') as f:data = json.load(f)# 获取图片路径image_filename = data.get('imagePath', json_file.replace('.json', '.jpg'))image_path = os.path.join(data_images_dir, image_filename)if not os.path.exists(image_path):print(f"跳过 {image_filename}，图片文件不存在")continue# 读取图片尺寸with Image.open(image_path) as img:actual_width, actual_height = img.size# 校正标注坐标（如果 imageWidth/imageHeight 不一致）json_width = data.get('imageWidth', actual_width)json_height = data.get('imageHeight', actual_height)if (json_width, json_height) != (actual_width, actual_height):scale_x = actual_width / json_widthscale_y = actual_height / json_heightfor shape in data['shapes']:shape['points'] = [[x * scale_x, y * scale_y] for (x, y) in shape['points']]# 添加 image 信息coco_data['images'].append({"id": image_id,"file_name": image_filename,"width": actual_width,"height": actual_height})# 复制图片到输出目录shutil.copy(image_path, os.path.join(output_images_dir, image_filename))# 添加标注信息for shape in data['shapes']:label = shape['label']if label not in category_map:category_map[label] = category_idcategory_id += 1cat_id = category_map[label]points = shape['points']x_coords = [p[0] for p in points]y_coords = [p[1] for p in points]x_min, y_min = min(x_coords), min(y_coords)width = max(x_coords) - x_minheight = max(y_coords) - y_minannotation = {"id": annotation_id,"image_id": image_id,"category_id": cat_id,"segmentation": [sum(points, [])],  # 转成一维列表"bbox": [x_min, y_min, width, height],"area": width * height,"iscrowd": 0}coco_data['annotations'].append(annotation)annotation_id += 1image_id += 1# 添加类别信息
for label, cat_id in category_map.items():coco_data['categories'].append({"id": cat_id,"name": label,"supercategory": "none"})# 保存 COCO JSON 文件
os.makedirs(output_dir, exist_ok=True)
with open(os.path.join(output_dir, 'annotations.json'), 'w') as f:json.dump(coco_data, f, indent=2)print(f" COCO 格式转换完成，共转换 {image_id - 1} 张图片")

Data_Augmentation

"""
Mango Dataset Augmentation with Original Preservation
Created: 2025-03-03
Author: AI Assistant
"""
import os
import json
import cv2
import shutil
import albumentations as A
import numpy as np
from datetime import datetime
from tqdm import tqdm# ========== 配置区 ==========
class Config:# 输入输出路径image_dir = "/home/liweijia/Data/Mango_1000_Split/train/images"json_dir = "/home/liweijia/Data/Mango_1000_Split/train/annotations"dest_dir = "/home/liweijia/Data/Mango_1000_Aug/train"# 增强参数augmentation_times = 7  # 每张图生成的增强版本数（不含原始图）min_polygon_area = 30  # 多边形最小有效面积epsilon = 1e-6  # 浮点精度阈值# 增强流水线配置transform = A.Compose([A.HorizontalFlip(p=0.5),A.VerticalFlip(p=0.4),A.RandomRotate90(p=0.3),A.ShiftScaleRotate(shift_limit=0.03,rotate_limit=15,interpolation=cv2.INTER_NEAREST,  # 保持边缘锐利border_mode=cv2.BORDER_REPLICATE,  # 边缘复制代替填充p=0.4),A.RandomBrightnessContrast(p=0.3),A.GaussianBlur(blur_limit=(2, 4), p=0.3),A.CLAHE(p=0.2),A.ColorJitter(brightness=0.1,  # 亮度波动±10%(原0.2)contrast=0.1,  # 对比度波动±10%saturation=0.05,  # 饱和度波动±5%hue=0.02,  # 色相波动±2度p=0.5),A.ToGray(p=0.1),  # 低概率灰度化A.ChannelShuffle(p=0.05)  # 通道混洗], keypoint_params=A.KeypointParams(format='xy',remove_invisible=False, #保留不可见关键点angle_in_degrees=True))# ========== 核心函数 ==========
def validate_polygon(points: list, min_area: float) -> bool:"""验证多边形有效性（基于Shoelace公式）"""if len(points) < 3:return Falsex = np.array([p[0] for p in points])y = np.array([p[1] for p in points])area = 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))return area >= min_areadef safe_convert(value):"""安全转换numpy类型为Python原生类型"""if isinstance(value, np.generic):return value.item()return valuedef process_annotation(original_ann: dict, keypoints: list, new_size: tuple) -> dict:"""标注文件转换（包含类型转换和精度处理）"""new_shapes = []idx = 0# 确保new_size为原生int类型new_size = (int(new_size[0]), int(new_size[1]))for shape in original_ann["shapes"]:# 新增标注结构验证if "shapes" not in original_ann:print(f"⚠️ 无效标注结构：缺少shapes字段")return Noneif not isinstance(original_ann["shapes"], list):print(f"⚠️ 无效标注结构：shapes字段非列表类型")return Nonenum_points = len(shape["points"])new_points = keypoints[idx:idx + num_points]idx += num_pointsvalid_points = []for p in new_points:# 坐标裁剪和类型转换x = np.clip(p[0], 0, new_size[0] - 1)y = np.clip(p[1], 0, new_size[1] - 1)# 处理浮点精度误差x = safe_convert(x)y = safe_convert(y)x = x if abs(x) > Config.epsilon else 0.0y = y if abs(y) > Config.epsilon else 0.0valid_points.append([x, y])if validate_polygon(valid_points, Config.min_polygon_area):new_shapes.append({"label": shape["label"],"points": valid_points,"group_id": safe_convert(shape.get("group_id")),"shape_type": "polygon","flags": shape.get("flags", {})})return {"version": original_ann["version"],"flags": original_ann["flags"],"imagePath": original_ann["imagePath"],  # 后续会覆盖"imageData": None,"imageHeight": new_size[1],"imageWidth": new_size[0],"shapes": new_shapes}# ========== 主流程 ==========
def main():# 创建目录结构os.makedirs(os.path.join(Config.dest_dir, "images"), exist_ok=True)os.makedirs(os.path.join(Config.dest_dir, "labels", "json"), exist_ok=True)# 获取原始文件列表image_files = [f for f in os.listdir(Config.image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]for image_file in tqdm(image_files):# 原始文件路径image_path = os.path.join(Config.image_dir, image_file)json_base = os.path.splitext(image_file)[0]json_path = os.path.join(Config.json_dir, json_base + ".json")# 新增文件校验逻辑if not os.path.exists(json_path):print(f"⚠️ 标注文件缺失：{json_path}，跳过该图像处理")continueif os.path.getsize(json_path) == 0:print(f"⛔ 空标注文件：{json_path}，跳过处理")continue# 读取原始数据image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)if image is None:print(f"无法读取图像文件：{image_path}")continue# 修改后：try:with open(json_path, 'r', encoding='utf-8-sig') as f:  # 处理BOM头[4,8](@ref)raw_content = f.read().strip()if not raw_content:raise ValueError("文件内容为空")original_ann = json.loads(raw_content)except json.JSONDecodeError as e:print(f"❌ JSON格式错误：{json_path}\n错误详情：{str(e)}\n错误上下文：{raw_content[:200]}...")continueexcept Exception as e:print(f"❌ 未知错误：{json_path}\n错误类型：{type(e).__name__}\n错误信息：{str(e)}")continue# 准备关键点数据original_keypoints = []for shape in original_ann["shapes"]:original_keypoints.extend([[p[0], p[1]] for p in shape["points"]])# 生成增强版本for aug_idx in range(Config.augmentation_times + 1):try:# 应用数据增强if aug_idx == 0:  # 保留原始版本transformed_image = imagetransformed_keypoints = original_keypointselse:augmented = Config.transform(image=image, keypoints=original_keypoints)transformed_image = augmented["image"]transformed_keypoints = augmented["keypoints"]# 转换标注信息new_ann = process_annotation(original_ann=original_ann,keypoints=transformed_keypoints,new_size=(transformed_image.shape[1], transformed_image.shape[0]))# 保存增强结果suffix = f"_{aug_idx}" if aug_idx > 0 else ""new_filename = os.path.splitext(image_file)[0] + suffix + ".jpg"# 保存图像cv2.imwrite(os.path.join(Config.dest_dir, "images", new_filename),cv2.cvtColor(transformed_image, cv2.COLOR_RGB2BGR))# 保存标注new_ann["imagePath"] = new_filenamewith open(os.path.join(Config.dest_dir, "labels", "json", new_filename.replace(".jpg", ".json")), "w") as f:json.dump(new_ann, f, indent=2)except Exception as e:print(f"Error processing {image_file} augmentation {aug_idx}: {str(e)}")continueif __name__ == "__main__":main()

Data_split

import os
import random
import shutil
import time# 设置路径
data_images_dir = '/home/liweijia/Data/Mango_1000/images'
data_annotations_dir = '/home/liweijia/Data/Mango_1000/labels/json'
output_dir = '/home/liweijia/Data/Mango_1000_Split'# 划分比例
split_ratios = {'train': 0.7, 'val': 0.15, 'test': 0.15}
random_seed = int(time.time() * 1000) % 2 ** 32  # 时间戳生成动态种子
random.seed(random_seed)# 创建输出目录结构
splits = ['train', 'val', 'test']
for split in splits:os.makedirs(os.path.join(output_dir, split, 'images'), exist_ok=True)os.makedirs(os.path.join(output_dir, split, 'annotations'), exist_ok=True)# 收集所有 JSON 文件（默认每个 JSON 对应一张图片）
all_json_files = [f for f in os.listdir(data_annotations_dir) if f.endswith('.json')]
random.shuffle(all_json_files)# 计算划分索引
total = len(all_json_files)
train_end = int(total * split_ratios['train'])
val_end = train_end + int(total * split_ratios['val'])split_data = {'train': all_json_files[:train_end],'val': all_json_files[train_end:val_end],'test': all_json_files[val_end:]
}# 开始拷贝
for split, file_list in split_data.items():for json_file in file_list:# 图片和标注文件名base_name = os.path.splitext(json_file)[0]image_file = base_name + '.jpg'  # 如果你的图像是 .png 改这里src_image_path = os.path.join(data_images_dir, image_file)src_json_path = os.path.join(data_annotations_dir, json_file)dst_image_path = os.path.join(output_dir, split, 'images', image_file)dst_json_path = os.path.join(output_dir, split, 'annotations', json_file)# 判断文件是否存在if not os.path.exists(src_image_path):print(f"警告：找不到图片文件 {src_image_path}，跳过")continueif not os.path.exists(src_json_path):print(f"警告：找不到标注文件 {src_json_path}，跳过")continue# 拷贝shutil.copy(src_image_path, dst_image_path)shutil.copy(src_json_path, dst_json_path)print("数据划分完成 ✅")

Modify_Image_size

from PIL import Image
import os
import jsondef resize_images(image_dir, json_file_path, expected_size):modified_images = []  # 用于存储被修改的图像文件名# 读取 JSON 文件with open(json_file_path, 'r') as json_file:data = json.load(json_file)# 确保 JSON 文件中有 "images" 键if "images" not in data:print("Error: JSON file does not contain 'images' key.")return# 遍历图像目录for filename in os.listdir(image_dir):if filename.endswith('.jpg') or filename.endswith('.png'):image_path = os.path.join(image_dir, filename)image = Image.open(image_path)# 获取当前图像的尺寸current_size = image.sizeprint(f"Current size of {filename}: {current_size}")# 检查图像和 JSON 中的尺寸是否已经是预期尺寸should_resize = Falsefor annotation in data["images"]:if annotation['file_name'] == filename:if (current_size[0] != expected_size[0] or current_size[1] != expected_size[1] orannotation['width'] != expected_size[0] or annotation['height'] != expected_size[1]):should_resize = Truebreakif should_resize:# Resize the image to the expected sizeimage = image.resize(expected_size, Image.BILINEAR)image.save(image_path)modified_images.append(filename)  # 记录被修改的图像文件名print(f"Resized image: {filename} from {current_size} to {expected_size}")# 更新 JSON 中的图像尺寸for annotation in data["images"]:if annotation['file_name'] == filename:annotation['width'] = expected_size[0]annotation['height'] = expected_size[1]print(f"Updated annotation for {filename}: width={expected_size[0]}, height={expected_size[1]}")# 保存修改后的 JSON 文件with open(json_file_path, 'w') as json_file:json.dump(data, json_file, indent=4)print(f"Updated JSON file: {json_file_path}")if not modified_images:print("No images were modified.")else:print(f"Total modified images: {len(modified_images)}")# 使用示例
image_directory = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/images'
json_file_path = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/annotations/instances_val.json'  # JSON 文件路径
expected_dimensions = (3072, 4096)  # (width, height)
resize_images(image_directory, json_file_path, expected_dimensions)

RegisterDataSet

from detectron2.data import DatasetCatalog, MetadataCatalog
import torchdef register_coco_dataset(name, json_file, image_root):"""注册 COCO 数据集。Args:name (str): 数据集名称。json_file (str): COCO 格式的 JSON 文件路径。image_root (str): 图像文件夹路径。"""from detectron2.data.datasets.coco import load_coco_json# 注册数据集DatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name))metadata = MetadataCatalog.get(name)metadata.set(thing_classes=["Branch", "Mango"])  # 替换为您的类别metadata.evaluator_type = "coco"  # 添加这一行以设置评估器类型def register_datasets():"""注册所有数据集。"""register_coco_dataset("my_dataset_train","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/train/annotations.json","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/train/images")register_coco_dataset("my_dataset_test","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/test/annotations.json","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/test/images")register_coco_dataset("my_dataset_val","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/val/annotations.json","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/val/images")

Calculate_Image

import os# 设置图片文件夹路径
image_dir = "/home/liweijia/Data/Mango_1000/images"  # 替换为你自己的文件夹路径# 获取所有图片文件（根据扩展名筛选）
image_files = [f for f in os.listdir(image_dir) if f.endswith((".jpg", ".png", ".jpeg"))]# 输出图片数量
print(f"文件夹中共有 {len(image_files)} 张图片。")

Calculate_categories_of_image

import os
import json
from collections import defaultdictdef get_classes_for_image(image_id, annotations, categories):# 从 annotations 中提取与给定 image_id 相关的类别 IDcategory_ids = {annotation['category_id'] for annotation in annotations if annotation['image_id'] == image_id}# 根据类别 ID 获取类别名称class_names = {cat['name'] for cat in categories if cat['id'] in category_ids}return class_names  # 返回类别名称的集合def check_class_consistency(data_dir, annotations_file):# 读取注释文件with open(annotations_file, 'r') as f:annotations_data = json.load(f)# 提取类别信息categories = annotations_data['categories']annotations = annotations_data['annotations']# 用于存储每个图像的类别数class_counts = defaultdict(int)for image_info in annotations_data['images']:  # 遍历每个图像image_id = image_info['id']  # 获取图像 IDimage_name = image_info['file_name']  # 获取文件名image_path = os.path.join(data_dir, image_name)# 获取类别信息classes = get_classes_for_image(image_id, annotations, categories)  # 从 annotations 中获取类别num_classes = len(classes)# 记录类别数class_counts[num_classes] += 1# 打印每个图像的类别数print(f"Image: {image_path}, Number of classes: {num_classes}")# 打印每个类别数的图像数量print("\nClass counts:")for num_classes, count in class_counts.items():print(f"Number of classes: {num_classes}, Count: {count}")# 检查是否有不一致的类别数if len(class_counts) > 1:print("\nWarning: There are inconsistent class counts in the dataset.")else:print("\nAll images have the same number of classes.")# 使用示例
data_directory = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/images'  # 替换为您的图像目录
annotations_file = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/annotations/instances_val.json'  # 替换为您的注释文件
check_class_consistency(data_directory, annotations_file)

Warmup_Cosine

from torch.optim.lr_scheduler import CosineAnnealingLR, _LRScheduler
from detectron2.solver import build_lr_schedulerclass WarmupCosineAnnealingLR(_LRScheduler):def __init__(self, optimizer, max_iters, warmup_iters, warmup_factor, last_epoch=-1):self.max_iters = max_itersself.warmup_iters = warmup_itersself.warmup_factor = warmup_factorsuper().__init__(optimizer, last_epoch)def get_lr(self):if self.last_epoch < self.warmup_iters:alpha = self.last_epoch / self.warmup_itersreturn [base_lr * self.warmup_factor * (1 - alpha) + alpha * base_lr for base_lr in self.base_lrs]else:return [base_lr * (1 + math.cos(math.pi * (self.last_epoch - self.warmup_iters) / (self.max_iters - self.warmup_iters))) / 2 for base_lr in self.base_lrs]def build_warmup_cosine_scheduler(cfg, optimizer):return WarmupCosineAnnealingLR(optimizer,max_iters=cfg.SOLVER.MAX_ITER,warmup_iters=cfg.SOLVER.WARMUP_ITERS,warmup_factor=cfg.SOLVER.WARMUP_FACTOR,)

恋爱网页生成_工程外包平台_成都seo公司_百度搜索推广开户

Modify_json_ImagePath

Coco_format

Data_Augmentation

Data_split

Modify_Image_size

RegisterDataSet

Calculate_Image

Calculate_categories_of_image

Warmup_Cosine

最新新闻

热搜词