Example Code for MicroPython-YOLO11

Last revision 2026/01/22

This example demonstrates how to use the ESP32-P4 to classify objects in an image using YOLO11. The ESP32-P4 will mark object information in the image and save the annotated image as a new file.

Hardware Preparation

Software Preparation

Usage steps

  1. Upload the yolo.jpg and myufont.py files to the ESP32-P4.
  2. Run the script to view the position information of the detected objects.
  3. Click the Stop/Restart Backend Process button to view the marked image on the MicroPython device.

yolo.jpg

Sample Code

myufont.py

import struct

class CustomBMFont:
    def __init__(self, font_file):
        self.font_file = font_file
        self.font = open(font_file, 'rb')
        
        # Read file header
        header = self.font.read(16)
        if header[0:2] != b'BM':
            raise ValueError("Invalid BMF font file")
            
        self.version = header[2]
        self.map_mode = header[3]
        self.start_bitmap = struct.unpack('>I', b'\x00' + header[4:7])[0]
        self.font_size = header[7]
        self.bitmap_size = header[8]
        
        # Calculate the size of character index table
        self.index_table_size = (self.start_bitmap - 16) // 2
        
    def _find_char_index(self, char_code):
        """Binary search for the position of a character in the index table"""
        low = 0
        high = self.index_table_size - 1
        
        while low <= high:
            mid = (low + high) // 2
            self.font.seek(16 + mid * 2)
            mid_code = struct.unpack('>H', self.font.read(2))[0]
            
            if char_code == mid_code:
                return mid
            elif char_code < mid_code:
                high = mid - 1
            else:
                low = mid + 1
        return -1  # Character not found
        
    def get_char_bitmap(self, char):
        """Get bitmap data of a character"""
        char_code = ord(char)
        index = self._find_char_index(char_code)
        
        if index == -1:
            # Return a default square (8x16) for missing character display
            return bytearray([0xFF]*16)
            
        # Read bitmap data
        self.font.seek(self.start_bitmap + index * self.bitmap_size)
        return bytearray(self.font.read(self.bitmap_size))
        
    def close(self):
        self.font.close()
        
def is_chinese(ch):
    """Check if a character is a Chinese character"""
    if '\u4e00' <= ch <= '\u9fff' or \
       '\u3400' <= ch <= '\u4dbf' or \
       '\u20000' <= ch <= '\u2a6df':
        return True
    return False
    
def display_text(lcd, font, text, x_start, y_start, color, bg_color=None, spacing=0, line_spacing=0, max_width=800):
    """
    Display text using custom font (supports automatic line wrapping and mixed Chinese-English display)
    
    Parameters:
    lcd - LCD object (must have point method)
    font - CustomBMFont instance
    text - Text to be displayed
    x_start, y_start - Starting coordinates
    color - Text color
    bg_color - Background color (None means transparent)
    spacing - Character spacing
    line_spacing - Line spacing
    max_width - Maximum line width (pixels), None means no limit
    """
    font_size = font.font_size
    bytes_per_row = (font_size + 7) // 8  # Number of bytes per row
    x, y = x_start, y_start
    
    # If maximum width is not specified, use screen width minus starting x coordinate
    if max_width is None:
        max_width = lcd.width - x_start
    
    for char in text:
        # Handle newline character
        if char == '\n':
            y += font_size + line_spacing
            x = x_start
            continue
        if char == '\r':
            x += 2*font_size
            continue
        # Get character width (full width for Chinese characters, half width for ASCII characters)
        char_width = font_size if is_chinese(char) else font_size // 2
        
        # Check if line wrapping is needed
        if max_width is not None and x + char_width > x_start + max_width:
            y += font_size + line_spacing
            x = x_start
        
        # Get character bitmap
        bitmap = font.get_char_bitmap(char)
        
        # Draw character
        for row in range(font_size):
            for col in range(char_width if not is_chinese(char) else font_size):
                byte_idx = row * bytes_per_row + col // 8
                bit_mask = 0x80 >> (col % 8)
                
                if byte_idx < len(bitmap) and (bitmap[byte_idx] & bit_mask):
                    lcd.point(x + col, y + row, color)
                elif bg_color is not None:
                    lcd.point(x + col, y + row, bg_color)
        
        # Move to next character position
        x += char_width + spacing

Application Code

from espdl import CocoDetector
from jpeg import Decoder, Encoder
from myufont import CustomBMFont
from machine import Pin, SDCard
import os

sd = SDCard(slot=0, width=4, sck=43, cmd=44, data=(39, 40, 41, 42))
os.mount(sd, '/sd')
decoder = Decoder()
encoder = Encoder(width=405, height=540, pixel_format="RGB888")
object_detector = CocoDetector(width=405, height=540)  # Renamed from "face_detector" for accuracy (detects all COCO objects, not just faces)

# MS COCO dataset object classes (Chinese to English translation)
MSCOCO_CLASSES = [
    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
    "fire hydrant", "fire hose", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
    "snowboard", "ski poles", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "bowl",
    "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
    "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote",
    "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
    "scissors", "teddy bear", "hair dryer", "toothbrush"
]

font = CustomBMFont('/sd/text_full_16px_2312.v3.bmf')  # Load custom Chinese bitmap font

# Capture and process the image
img = open("yolo.jpg", "rb").read()  # Read original image (typically JPEG format)
framebuffer = decoder.decode(img)    # Decode image to RGB888 format
framebuffer = bytearray(framebuffer) # Convert memoryview to bytearray for pixel modification

# Run object detection (COCO dataset)
results = object_detector.run(framebuffer)

# Function to draw bounding boxes and labels on the image
def draw_rectangle(buffer, width, height, x, y, w, h, font, label, color=(255, 0, 0)):
    """
    Draw a rectangular bounding box and label on an RGB888 image buffer.
    :param buffer: Image buffer (bytearray in RGB888 format)
    :param width: Total width of the image
    :param height: Total height of the image
    :param x: X-coordinate of the top-left corner of the bounding box
    :param y: Y-coordinate of the top-left corner of the bounding box
    :param w: Width of the bounding box
    :param h: Height of the bounding box
    :param font: CustomBMFont object for text rendering
    :param label: Text label to display above the bounding box
    :param color: Bounding box and label color (RGB tuple, default: red)
    """
    # Helper function: Set color for a single pixel in the RGB888 buffer
    def set_pixel(buffer, width, x, y, color):
        offset = (y * width + x) * 3  # Calculate pixel position (3 bytes per RGB pixel)
        buffer[offset] = color[0]     # Red channel
        buffer[offset + 1] = color[1] # Green channel
        buffer[offset + 2] = color[2] # Blue channel

    # Helper function: Check if a character is Chinese
    def is_chinese(ch):
        """Determine if a character is a Chinese character (covers CJK unified ideographs)."""
        if ('\u4e00' <= ch <= '\u9fff') or  # Main Chinese character range
           ('\u3400' <= ch <= '\u4dbf') or  # Extended Chinese character range A
           ('\u20000' <= ch <= '\u2a6df'): # Extended Chinese character range B
            return True
        return False

    # Helper function: Render text on the image buffer
    def render_text(font, text, x_start, y_start, color, spacing=0, line_spacing=0, max_width=width):
        font_size = font.font_size
        bytes_per_row = (font_size + 7) // 8  # Bytes needed to store one row of the character bitmap (round up)
        x, y = x_start, y_start

        for char in text:
            # Handle line breaks
            if char == '\n':
                y += font_size + line_spacing
                x = x_start
                continue
            # Handle carriage returns (shift X position)
            if char == '\r':
                x += 2 * font_size
                continue

            # Set character width: full width for Chinese, half width for ASCII
            char_width = font_size if is_chinese(char) else font_size // 2

            # Wrap text if it exceeds max width
            if max_width is not None and (x + char_width) > (x_start + max_width):
                y += font_size + line_spacing
                x = x_start

            # Get the bitmap data for the current character
            char_bitmap = font.get_char_bitmap(char)

            # Draw each pixel of the character
            for row in range(font_size):
                for col in range(char_width if not is_chinese(char) else font_size):
                    byte_index = row * bytes_per_row + (col // 8)  # Calculate which byte contains the target bit
                    bit_mask = 0x80 >> (col % 8)                  # Mask to isolate the target bit (MSB first)

                    # If the bit is set (1), draw the pixel
                    if byte_index < len(char_bitmap) and (char_bitmap[byte_index] & bit_mask):
                        set_pixel(framebuffer, max_width, x + col, y + row, color)

            # Move to the next character position
            x += char_width + spacing

    # Draw the top edge of the bounding box
    for i in range(x, x + w):
        if 0 <= i < width and 0 <= y < height:
            set_pixel(buffer, width, i, y, color)

    # Draw the bottom edge of the bounding box
    for i in range(x, x + w):
        if 0 <= i < width and 0 <= (y + h) < height:
            set_pixel(buffer, width, i, y + h, color)

    # Draw the left edge of the bounding box
    for j in range(y, y + h):
        if 0 <= j < height and 0 <= x < width:
            set_pixel(buffer, width, x, j, color)

    # Draw the right edge of the bounding box
    for j in range(y, y + h):
        if 0 <= j < height and 0 <= (x + w) < width:
            set_pixel(buffer, width, x + w, j, color)

    # Draw the label above the bounding box
    render_text(font, label, x, y - 20, color)

# Draw bounding boxes and labels for all detected objects
for obj in results:
    # Extract bounding box coordinates (top-left: (x1,y1), bottom-right: (x2,y2))
    x1, y1, x2, y2 = obj['box']
    # Create label: "Object Class: Confidence%"
    class_name = MSCOCO_CLASSES[obj['category']]
    confidence = int(obj['score'] * 100)  # Convert confidence to percentage
    label = f"{class_name}: {confidence}%"
    # Draw bounding box and label (red color by default)
    draw_rectangle(framebuffer, 405, 540, x1, y1, x2 - x1, y2 - y1, font, label)
    # Print label to serial monitor for debugging
    print(label)

# Re-encode the annotated image to JPEG format and save
marked_img = encoder.encode(framebuffer)
with open("yolo_marked.jpg", "wb") as f:
    f.write(marked_img)

Was this article helpful?

TOP