Tenho uma imagem JPG que contém nomes de marcas de dispositivos móveis:
Agora quero detectar o primeiro caractere de cada palavra pelo script python.
Escrevi o seguinte script python para isso:
import cv2
import numpy as np
from tkinter import Tk, Canvas, Frame, Scrollbar, BOTH, VERTICAL, HORIZONTAL
from PIL import Image, ImageTk
# Function to draw rectangles around shapes and display using Tkinter
def draw_rectangles(image_path):
# Create a Tkinter window to display the image
root = Tk()
root.title("Image with Left-Most Rectangles Only")
# Load the image
image = cv2.imread(image_path)
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding to get better separation of text
thresh = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2
)
# Find contours in the binary image
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Dictionary to store contours grouped by Y-coordinate ranges
contours_by_y = {}
# Sort contours by X-coordinate to ensure we pick the left-most character first
sorted_contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[0])
# Group contours by their Y coordinate to keep only the left-most rectangle per Y range
for contour in sorted_contours:
x, y, w, h = cv2.boundingRect(contour)
if w > 15 and h > 15: # Adjust the size filter to remove small artifacts
aspect_ratio = w / float(h)
# Ensure the aspect ratio is within the typical range of letters
if 0.2 < aspect_ratio < 5:
y_range = y // 20 # Group by a smaller Y coordinate range for better separation
# Check if the current rectangle is more left-most in X within its Y range
if y_range not in contours_by_y:
contours_by_y[y_range] = (x, y, w, h) # Store the first contour found in this range
else:
# Compare and keep the left-most (smallest X) rectangle
current_x, _, _, _ = contours_by_y[y_range]
# Check distance between new contour and the existing one to avoid close detection
if x < current_x and (x - current_x) > 20: # Distance threshold to filter out close contours
contours_by_y[y_range] = (x, y, w, h)
# Draw only the left-most rectangles
for (x, y, w, h) in contours_by_y.values():
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2) # Red color in BGR
# Convert the image to RGB (OpenCV uses BGR by default)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Convert the image to a format Tkinter can use
image_pil = Image.fromarray(image_rgb)
image_tk = ImageTk.PhotoImage(image_pil)
# Create a frame for the Canvas and scrollbars
frame = Frame(root)
frame.pack(fill=BOTH, expand=True)
# Create a Canvas widget to display the image
canvas = Canvas(frame, width=image_tk.width(), height=image_tk.height())
canvas.pack(side="left", fill="both", expand=True)
# Add scrollbars to the Canvas
v_scrollbar = Scrollbar(frame, orient=VERTICAL, command=canvas.yview)
v_scrollbar.pack(side="right", fill="y")
h_scrollbar = Scrollbar(frame, orient=HORIZONTAL, command=canvas.xview)
h_scrollbar.pack(side="bottom", fill="x")
canvas.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
canvas.create_image(0, 0, anchor="nw", image=image_tk)
canvas.config(scrollregion=canvas.bbox("all"))
# Keep a reference to the image to prevent garbage collection
canvas.image = image_tk
root.mainloop()
# Path to your image
image_path = r"E:\Desktop\mobile_brands\ORG_027081-Recovered.jpg"
# Call the function
draw_rectangles(image_path)
Mas não sei por que não está funcionando bem. A precisão desse script é de 90%. Por exemplo, na imagem acima, ele detecta o caractere "a" em "Samsung"
onde está o problema do meu script?
Como posso consertar esse problema?
talvez pela coordenada Y e X não consiga detectar as caixas mais à esquerda na imagem.
Note que não quero usar OCR