1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
| import torch import torchvision from torchvision.transforms import functional as F from PIL import Image import matplotlib.pyplot as plt import numpy as np
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.eval()
def preprocess_image(image_path): img = Image.open(image_path).convert("RGB") transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor() ]) img_tensor = transform(img) return img_tensor.unsqueeze(0).to(device)
def process_output(output): boxes = output['boxes'].detach().cpu().numpy() labels = output['labels'].detach().cpu().numpy() masks = output['masks'].detach().cpu().numpy() scores = output['scores'].detach().cpu().numpy() idxs = np.where(scores > 0.8)[0] boxes = boxes[idxs] labels = labels[idxs] masks = masks[idxs] return boxes, labels, masks
image_path = 'pic.jpg' img_tensor = preprocess_image(image_path)
with torch.no_grad(): predictions = model(img_tensor)
boxes, labels, masks = process_output(predictions[0])
COCO_CATEGORIES = [ '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ]
for label in labels: class_name = COCO_CATEGORIES[label] print(f"Label {label} represents: {class_name}")
plt.figure(figsize=(10, 10))
overlay = np.zeros_like(F.to_pil_image(img_tensor[0]).convert('RGB')).astype(np.uint8)
threshold = 0.99
for i in range(len(masks)): color = np.random.rand(3) * 255 mask = masks[i][0] * 255
binary_mask = (mask > threshold).astype(np.uint8)
overlay[binary_mask.astype(bool)] = color
plt.imshow(overlay) plt.show()
original_image = F.to_pil_image(img_tensor[0]) combined = Image.blend(original_image, Image.fromarray(overlay.astype(np.uint8)), alpha=0.4)
plt.imshow(combined)
for box in boxes: plt.gca().add_patch(plt.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1], fill=False, edgecolor='red', linewidth=2))
plt.axis('off') plt.show()
|