Modify_json_ImagePath
import json
import os
import shutil
from pathlib import Path
from typing import Uniondef modify_image_paths(base_dir: Union[str, Path],image_relative_path: str = "../images",backup: bool = True,verbose: bool = True
) -> None:"""批量修改JSON文件中的imagePath字段参数说明:base_dir - 需要处理的根目录路径image_relative_path - 图片新相对路径(相对于JSON文件)backup - 是否创建备份文件(默认True)verbose - 显示处理进度(默认True)"""base_dir = Path(base_dir)if not base_dir.exists():raise FileNotFoundError(f"目录不存在: {base_dir}")backup_dir = base_dir.parent / f"{base_dir.name}_backup"if backup:backup_dir.mkdir(exist_ok=True)json_files = list(base_dir.glob("**/*.json"))total_files = len(json_files)for idx, json_path in enumerate(json_files, 1):try:if backup:backup_path = backup_dir / json_path.relative_to(base_dir)backup_path.parent.mkdir(parents=True, exist_ok=True)shutil.copy2(json_path, backup_path) with open(json_path, 'r+', encoding='utf-8') as f:data = json.load(f)if image_relative_path == "@filename":new_path = f"{json_path.stem}.jpg"else:new_path = Path(image_relative_path) / Path(data["imagePath"]).namedata["imagePath"] = str(new_path).replace("\\", "/")f.seek(0)json.dump(data, f, indent=4, ensure_ascii=False)f.truncate()if verbose:print(f"[{idx}/{total_files}] ✅ 已更新: {json_path} → {data['imagePath']}")except KeyError:print(f"⚠️ 文件缺少imagePath字段: {json_path}")except json.JSONDecodeError as e:print(f"❌ JSON解析失败: {json_path}\n错误详情: {str(e)}")except Exception as e:print(f"⛔ 未知错误: {type(e).__name__} - {str(e)}")if __name__ == "__main__":INPUT_DIR = "/home/liweijia/Data/Mango_1000_Split/val/annotations" NEW_RELATIVE_PATH = "@filename" modify_image_paths(base_dir=INPUT_DIR,image_relative_path=NEW_RELATIVE_PATH,backup=True,verbose=True)
Coco_format
import os
import json
import shutil
from PIL import Image
data_images_dir = '/home/liweijia/Data/Mango_1000_Split/test/images'
data_annotations_dir = '/home/liweijia/Data/Mango_1000_Split/test/annotations'
output_dir = '/home/liweijia/Data/Mango_1000_Aug_COCO'
output_images_dir = os.path.join(output_dir, 'test/images')
os.makedirs(output_images_dir, exist_ok=True)
coco_data = {"images": [],"annotations": [],"categories": []
}
category_map = {}
category_id = 1
annotation_id = 1
image_id = 1
for json_file in os.listdir(data_annotations_dir):if not json_file.endswith('.json'):continuejson_path = os.path.join(data_annotations_dir, json_file)with open(json_path, 'r') as f:data = json.load(f)image_filename = data.get('imagePath', json_file.replace('.json', '.jpg'))image_path = os.path.join(data_images_dir, image_filename)if not os.path.exists(image_path):print(f"跳过 {image_filename},图片文件不存在")continuewith Image.open(image_path) as img:actual_width, actual_height = img.sizejson_width = data.get('imageWidth', actual_width)json_height = data.get('imageHeight', actual_height)if (json_width, json_height) != (actual_width, actual_height):scale_x = actual_width / json_widthscale_y = actual_height / json_heightfor shape in data['shapes']:shape['points'] = [[x * scale_x, y * scale_y] for (x, y) in shape['points']]coco_data['images'].append({"id": image_id,"file_name": image_filename,"width": actual_width,"height": actual_height})shutil.copy(image_path, os.path.join(output_images_dir, image_filename))for shape in data['shapes']:label = shape['label']if label not in category_map:category_map[label] = category_idcategory_id += 1cat_id = category_map[label]points = shape['points']x_coords = [p[0] for p in points]y_coords = [p[1] for p in points]x_min, y_min = min(x_coords), min(y_coords)width = max(x_coords) - x_minheight = max(y_coords) - y_minannotation = {"id": annotation_id,"image_id": image_id,"category_id": cat_id,"segmentation": [sum(points, [])], "bbox": [x_min, y_min, width, height],"area": width * height,"iscrowd": 0}coco_data['annotations'].append(annotation)annotation_id += 1image_id += 1
for label, cat_id in category_map.items():coco_data['categories'].append({"id": cat_id,"name": label,"supercategory": "none"})
os.makedirs(output_dir, exist_ok=True)
with open(os.path.join(output_dir, 'annotations.json'), 'w') as f:json.dump(coco_data, f, indent=2)print(f" COCO 格式转换完成,共转换 {image_id - 1} 张图片")
Data_Augmentation
"""
Mango Dataset Augmentation with Original Preservation
Created: 2025-03-03
Author: AI Assistant
"""
import os
import json
import cv2
import shutil
import albumentations as A
import numpy as np
from datetime import datetime
from tqdm import tqdm
class Config:image_dir = "/home/liweijia/Data/Mango_1000_Split/train/images"json_dir = "/home/liweijia/Data/Mango_1000_Split/train/annotations"dest_dir = "/home/liweijia/Data/Mango_1000_Aug/train"augmentation_times = 7 min_polygon_area = 30 epsilon = 1e-6 transform = A.Compose([A.HorizontalFlip(p=0.5),A.VerticalFlip(p=0.4),A.RandomRotate90(p=0.3),A.ShiftScaleRotate(shift_limit=0.03,rotate_limit=15,interpolation=cv2.INTER_NEAREST, border_mode=cv2.BORDER_REPLICATE, p=0.4),A.RandomBrightnessContrast(p=0.3),A.GaussianBlur(blur_limit=(2, 4), p=0.3),A.CLAHE(p=0.2),A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.05, hue=0.02, p=0.5),A.ToGray(p=0.1), A.ChannelShuffle(p=0.05) ], keypoint_params=A.KeypointParams(format='xy',remove_invisible=False, angle_in_degrees=True))
def validate_polygon(points: list, min_area: float) -> bool:"""验证多边形有效性(基于Shoelace公式)"""if len(points) < 3:return Falsex = np.array([p[0] for p in points])y = np.array([p[1] for p in points])area = 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))return area >= min_areadef safe_convert(value):"""安全转换numpy类型为Python原生类型"""if isinstance(value, np.generic):return value.item()return valuedef process_annotation(original_ann: dict, keypoints: list, new_size: tuple) -> dict:"""标注文件转换(包含类型转换和精度处理)"""new_shapes = []idx = 0new_size = (int(new_size[0]), int(new_size[1]))for shape in original_ann["shapes"]:if "shapes" not in original_ann:print(f"⚠️ 无效标注结构:缺少shapes字段")return Noneif not isinstance(original_ann["shapes"], list):print(f"⚠️ 无效标注结构:shapes字段非列表类型")return Nonenum_points = len(shape["points"])new_points = keypoints[idx:idx + num_points]idx += num_pointsvalid_points = []for p in new_points:x = np.clip(p[0], 0, new_size[0] - 1)y = np.clip(p[1], 0, new_size[1] - 1)x = safe_convert(x)y = safe_convert(y)x = x if abs(x) > Config.epsilon else 0.0y = y if abs(y) > Config.epsilon else 0.0valid_points.append([x, y])if validate_polygon(valid_points, Config.min_polygon_area):new_shapes.append({"label": shape["label"],"points": valid_points,"group_id": safe_convert(shape.get("group_id")),"shape_type": "polygon","flags": shape.get("flags", {})})return {"version": original_ann["version"],"flags": original_ann["flags"],"imagePath": original_ann["imagePath"], "imageData": None,"imageHeight": new_size[1],"imageWidth": new_size[0],"shapes": new_shapes}
def main():os.makedirs(os.path.join(Config.dest_dir, "images"), exist_ok=True)os.makedirs(os.path.join(Config.dest_dir, "labels", "json"), exist_ok=True)image_files = [f for f in os.listdir(Config.image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]for image_file in tqdm(image_files):image_path = os.path.join(Config.image_dir, image_file)json_base = os.path.splitext(image_file)[0]json_path = os.path.join(Config.json_dir, json_base + ".json")if not os.path.exists(json_path):print(f"⚠️ 标注文件缺失:{json_path},跳过该图像处理")continueif os.path.getsize(json_path) == 0:print(f"⛔ 空标注文件:{json_path},跳过处理")continueimage = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)if image is None:print(f"无法读取图像文件:{image_path}")continuetry:with open(json_path, 'r', encoding='utf-8-sig') as f: raw_content = f.read().strip()if not raw_content:raise ValueError("文件内容为空")original_ann = json.loads(raw_content)except json.JSONDecodeError as e:print(f"❌ JSON格式错误:{json_path}\n错误详情:{str(e)}\n错误上下文:{raw_content[:200]}...")continueexcept Exception as e:print(f"❌ 未知错误:{json_path}\n错误类型:{type(e).__name__}\n错误信息:{str(e)}")continueoriginal_keypoints = []for shape in original_ann["shapes"]:original_keypoints.extend([[p[0], p[1]] for p in shape["points"]])for aug_idx in range(Config.augmentation_times + 1):try:if aug_idx == 0: transformed_image = imagetransformed_keypoints = original_keypointselse:augmented = Config.transform(image=image, keypoints=original_keypoints)transformed_image = augmented["image"]transformed_keypoints = augmented["keypoints"]new_ann = process_annotation(original_ann=original_ann,keypoints=transformed_keypoints,new_size=(transformed_image.shape[1], transformed_image.shape[0]))suffix = f"_{aug_idx}" if aug_idx > 0 else ""new_filename = os.path.splitext(image_file)[0] + suffix + ".jpg"cv2.imwrite(os.path.join(Config.dest_dir, "images", new_filename),cv2.cvtColor(transformed_image, cv2.COLOR_RGB2BGR))new_ann["imagePath"] = new_filenamewith open(os.path.join(Config.dest_dir, "labels", "json", new_filename.replace(".jpg", ".json")), "w") as f:json.dump(new_ann, f, indent=2)except Exception as e:print(f"Error processing {image_file} augmentation {aug_idx}: {str(e)}")continueif __name__ == "__main__":main()
Data_split
import os
import random
import shutil
import time
data_images_dir = '/home/liweijia/Data/Mango_1000/images'
data_annotations_dir = '/home/liweijia/Data/Mango_1000/labels/json'
output_dir = '/home/liweijia/Data/Mango_1000_Split'
split_ratios = {'train': 0.7, 'val': 0.15, 'test': 0.15}
random_seed = int(time.time() * 1000) % 2 ** 32
random.seed(random_seed)
splits = ['train', 'val', 'test']
for split in splits:os.makedirs(os.path.join(output_dir, split, 'images'), exist_ok=True)os.makedirs(os.path.join(output_dir, split, 'annotations'), exist_ok=True)
all_json_files = [f for f in os.listdir(data_annotations_dir) if f.endswith('.json')]
random.shuffle(all_json_files)
total = len(all_json_files)
train_end = int(total * split_ratios['train'])
val_end = train_end + int(total * split_ratios['val'])split_data = {'train': all_json_files[:train_end],'val': all_json_files[train_end:val_end],'test': all_json_files[val_end:]
}
for split, file_list in split_data.items():for json_file in file_list:base_name = os.path.splitext(json_file)[0]image_file = base_name + '.jpg' src_image_path = os.path.join(data_images_dir, image_file)src_json_path = os.path.join(data_annotations_dir, json_file)dst_image_path = os.path.join(output_dir, split, 'images', image_file)dst_json_path = os.path.join(output_dir, split, 'annotations', json_file)if not os.path.exists(src_image_path):print(f"警告:找不到图片文件 {src_image_path},跳过")continueif not os.path.exists(src_json_path):print(f"警告:找不到标注文件 {src_json_path},跳过")continueshutil.copy(src_image_path, dst_image_path)shutil.copy(src_json_path, dst_json_path)print("数据划分完成 ✅")
Modify_Image_size
from PIL import Image
import os
import jsondef resize_images(image_dir, json_file_path, expected_size):modified_images = [] with open(json_file_path, 'r') as json_file:data = json.load(json_file)if "images" not in data:print("Error: JSON file does not contain 'images' key.")returnfor filename in os.listdir(image_dir):if filename.endswith('.jpg') or filename.endswith('.png'):image_path = os.path.join(image_dir, filename)image = Image.open(image_path)current_size = image.sizeprint(f"Current size of {filename}: {current_size}")should_resize = Falsefor annotation in data["images"]:if annotation['file_name'] == filename:if (current_size[0] != expected_size[0] or current_size[1] != expected_size[1] orannotation['width'] != expected_size[0] or annotation['height'] != expected_size[1]):should_resize = Truebreakif should_resize:image = image.resize(expected_size, Image.BILINEAR)image.save(image_path)modified_images.append(filename) print(f"Resized image: {filename} from {current_size} to {expected_size}")for annotation in data["images"]:if annotation['file_name'] == filename:annotation['width'] = expected_size[0]annotation['height'] = expected_size[1]print(f"Updated annotation for {filename}: width={expected_size[0]}, height={expected_size[1]}")with open(json_file_path, 'w') as json_file:json.dump(data, json_file, indent=4)print(f"Updated JSON file: {json_file_path}")if not modified_images:print("No images were modified.")else:print(f"Total modified images: {len(modified_images)}")
image_directory = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/images'
json_file_path = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/annotations/instances_val.json'
expected_dimensions = (3072, 4096)
resize_images(image_directory, json_file_path, expected_dimensions)
RegisterDataSet
from detectron2.data import DatasetCatalog, MetadataCatalog
import torchdef register_coco_dataset(name, json_file, image_root):"""注册 COCO 数据集。Args:name (str): 数据集名称。json_file (str): COCO 格式的 JSON 文件路径。image_root (str): 图像文件夹路径。"""from detectron2.data.datasets.coco import load_coco_jsonDatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name))metadata = MetadataCatalog.get(name)metadata.set(thing_classes=["Branch", "Mango"]) metadata.evaluator_type = "coco" def register_datasets():"""注册所有数据集。"""register_coco_dataset("my_dataset_train","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/train/annotations.json","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/train/images")register_coco_dataset("my_dataset_test","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/test/annotations.json","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/test/images")register_coco_dataset("my_dataset_val","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/val/annotations.json","/home/liweijia/Data/Mango_Group_Aug_V2_COCO/val/images")
Calculate_Image
import os
image_dir = "/home/liweijia/Data/Mango_1000/images"
image_files = [f for f in os.listdir(image_dir) if f.endswith((".jpg", ".png", ".jpeg"))]
print(f"文件夹中共有 {len(image_files)} 张图片。")
Calculate_categories_of_image
import os
import json
from collections import defaultdictdef get_classes_for_image(image_id, annotations, categories):category_ids = {annotation['category_id'] for annotation in annotations if annotation['image_id'] == image_id}class_names = {cat['name'] for cat in categories if cat['id'] in category_ids}return class_names def check_class_consistency(data_dir, annotations_file):with open(annotations_file, 'r') as f:annotations_data = json.load(f)categories = annotations_data['categories']annotations = annotations_data['annotations']class_counts = defaultdict(int)for image_info in annotations_data['images']: image_id = image_info['id'] image_name = image_info['file_name'] image_path = os.path.join(data_dir, image_name)classes = get_classes_for_image(image_id, annotations, categories) num_classes = len(classes)class_counts[num_classes] += 1print(f"Image: {image_path}, Number of classes: {num_classes}")print("\nClass counts:")for num_classes, count in class_counts.items():print(f"Number of classes: {num_classes}, Count: {count}")if len(class_counts) > 1:print("\nWarning: There are inconsistent class counts in the dataset.")else:print("\nAll images have the same number of classes.")
data_directory = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/images'
annotations_file = '/home/liweijia/SparseInst/MangoGroup_Augmented_Data_split/val/annotations/instances_val.json'
check_class_consistency(data_directory, annotations_file)
Warmup_Cosine
from torch.optim.lr_scheduler import CosineAnnealingLR, _LRScheduler
from detectron2.solver import build_lr_schedulerclass WarmupCosineAnnealingLR(_LRScheduler):def __init__(self, optimizer, max_iters, warmup_iters, warmup_factor, last_epoch=-1):self.max_iters = max_itersself.warmup_iters = warmup_itersself.warmup_factor = warmup_factorsuper().__init__(optimizer, last_epoch)def get_lr(self):if self.last_epoch < self.warmup_iters:alpha = self.last_epoch / self.warmup_itersreturn [base_lr * self.warmup_factor * (1 - alpha) + alpha * base_lr for base_lr in self.base_lrs]else:return [base_lr * (1 + math.cos(math.pi * (self.last_epoch - self.warmup_iters) / (self.max_iters - self.warmup_iters))) / 2 for base_lr in self.base_lrs]def build_warmup_cosine_scheduler(cfg, optimizer):return WarmupCosineAnnealingLR(optimizer,max_iters=cfg.SOLVER.MAX_ITER,warmup_iters=cfg.SOLVER.WARMUP_ITERS,warmup_factor=cfg.SOLVER.WARMUP_FACTOR,)