Example Code for MicroPython-YOLO11
This example demonstrates how to use the ESP32-P4 to classify objects in an image using YOLO11. The ESP32-P4 will mark object information in the image and save the annotated image as a new file.
Hardware Preparation
Software Preparation
Usage steps
- Upload the yolo.jpg and myufont.py files to the ESP32-P4.
- Run the script to view the position information of the detected objects.
- Click the Stop/Restart Backend Process button to view the marked image on the MicroPython device.

Sample Code
myufont.py
import struct
class CustomBMFont:
def __init__(self, font_file):
self.font_file = font_file
self.font = open(font_file, 'rb')
# Read file header
header = self.font.read(16)
if header[0:2] != b'BM':
raise ValueError("Invalid BMF font file")
self.version = header[2]
self.map_mode = header[3]
self.start_bitmap = struct.unpack('>I', b'\x00' + header[4:7])[0]
self.font_size = header[7]
self.bitmap_size = header[8]
# Calculate the size of character index table
self.index_table_size = (self.start_bitmap - 16) // 2
def _find_char_index(self, char_code):
"""Binary search for the position of a character in the index table"""
low = 0
high = self.index_table_size - 1
while low <= high:
mid = (low + high) // 2
self.font.seek(16 + mid * 2)
mid_code = struct.unpack('>H', self.font.read(2))[0]
if char_code == mid_code:
return mid
elif char_code < mid_code:
high = mid - 1
else:
low = mid + 1
return -1 # Character not found
def get_char_bitmap(self, char):
"""Get bitmap data of a character"""
char_code = ord(char)
index = self._find_char_index(char_code)
if index == -1:
# Return a default square (8x16) for missing character display
return bytearray([0xFF]*16)
# Read bitmap data
self.font.seek(self.start_bitmap + index * self.bitmap_size)
return bytearray(self.font.read(self.bitmap_size))
def close(self):
self.font.close()
def is_chinese(ch):
"""Check if a character is a Chinese character"""
if '\u4e00' <= ch <= '\u9fff' or \
'\u3400' <= ch <= '\u4dbf' or \
'\u20000' <= ch <= '\u2a6df':
return True
return False
def display_text(lcd, font, text, x_start, y_start, color, bg_color=None, spacing=0, line_spacing=0, max_width=800):
"""
Display text using custom font (supports automatic line wrapping and mixed Chinese-English display)
Parameters:
lcd - LCD object (must have point method)
font - CustomBMFont instance
text - Text to be displayed
x_start, y_start - Starting coordinates
color - Text color
bg_color - Background color (None means transparent)
spacing - Character spacing
line_spacing - Line spacing
max_width - Maximum line width (pixels), None means no limit
"""
font_size = font.font_size
bytes_per_row = (font_size + 7) // 8 # Number of bytes per row
x, y = x_start, y_start
# If maximum width is not specified, use screen width minus starting x coordinate
if max_width is None:
max_width = lcd.width - x_start
for char in text:
# Handle newline character
if char == '\n':
y += font_size + line_spacing
x = x_start
continue
if char == '\r':
x += 2*font_size
continue
# Get character width (full width for Chinese characters, half width for ASCII characters)
char_width = font_size if is_chinese(char) else font_size // 2
# Check if line wrapping is needed
if max_width is not None and x + char_width > x_start + max_width:
y += font_size + line_spacing
x = x_start
# Get character bitmap
bitmap = font.get_char_bitmap(char)
# Draw character
for row in range(font_size):
for col in range(char_width if not is_chinese(char) else font_size):
byte_idx = row * bytes_per_row + col // 8
bit_mask = 0x80 >> (col % 8)
if byte_idx < len(bitmap) and (bitmap[byte_idx] & bit_mask):
lcd.point(x + col, y + row, color)
elif bg_color is not None:
lcd.point(x + col, y + row, bg_color)
# Move to next character position
x += char_width + spacing
Application Code
from espdl import CocoDetector
from jpeg import Decoder, Encoder
from myufont import CustomBMFont
from machine import Pin, SDCard
import os
sd = SDCard(slot=0, width=4, sck=43, cmd=44, data=(39, 40, 41, 42))
os.mount(sd, '/sd')
decoder = Decoder()
encoder = Encoder(width=405, height=540, pixel_format="RGB888")
object_detector = CocoDetector(width=405, height=540) # Renamed from "face_detector" for accuracy (detects all COCO objects, not just faces)
# MS COCO dataset object classes (Chinese to English translation)
MSCOCO_CLASSES = [
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "fire hose", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"snowboard", "ski poles", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "bowl",
"banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
"chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote",
"keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair dryer", "toothbrush"
]
font = CustomBMFont('/sd/text_full_16px_2312.v3.bmf') # Load custom Chinese bitmap font
# Capture and process the image
img = open("yolo.jpg", "rb").read() # Read original image (typically JPEG format)
framebuffer = decoder.decode(img) # Decode image to RGB888 format
framebuffer = bytearray(framebuffer) # Convert memoryview to bytearray for pixel modification
# Run object detection (COCO dataset)
results = object_detector.run(framebuffer)
# Function to draw bounding boxes and labels on the image
def draw_rectangle(buffer, width, height, x, y, w, h, font, label, color=(255, 0, 0)):
"""
Draw a rectangular bounding box and label on an RGB888 image buffer.
:param buffer: Image buffer (bytearray in RGB888 format)
:param width: Total width of the image
:param height: Total height of the image
:param x: X-coordinate of the top-left corner of the bounding box
:param y: Y-coordinate of the top-left corner of the bounding box
:param w: Width of the bounding box
:param h: Height of the bounding box
:param font: CustomBMFont object for text rendering
:param label: Text label to display above the bounding box
:param color: Bounding box and label color (RGB tuple, default: red)
"""
# Helper function: Set color for a single pixel in the RGB888 buffer
def set_pixel(buffer, width, x, y, color):
offset = (y * width + x) * 3 # Calculate pixel position (3 bytes per RGB pixel)
buffer[offset] = color[0] # Red channel
buffer[offset + 1] = color[1] # Green channel
buffer[offset + 2] = color[2] # Blue channel
# Helper function: Check if a character is Chinese
def is_chinese(ch):
"""Determine if a character is a Chinese character (covers CJK unified ideographs)."""
if ('\u4e00' <= ch <= '\u9fff') or # Main Chinese character range
('\u3400' <= ch <= '\u4dbf') or # Extended Chinese character range A
('\u20000' <= ch <= '\u2a6df'): # Extended Chinese character range B
return True
return False
# Helper function: Render text on the image buffer
def render_text(font, text, x_start, y_start, color, spacing=0, line_spacing=0, max_width=width):
font_size = font.font_size
bytes_per_row = (font_size + 7) // 8 # Bytes needed to store one row of the character bitmap (round up)
x, y = x_start, y_start
for char in text:
# Handle line breaks
if char == '\n':
y += font_size + line_spacing
x = x_start
continue
# Handle carriage returns (shift X position)
if char == '\r':
x += 2 * font_size
continue
# Set character width: full width for Chinese, half width for ASCII
char_width = font_size if is_chinese(char) else font_size // 2
# Wrap text if it exceeds max width
if max_width is not None and (x + char_width) > (x_start + max_width):
y += font_size + line_spacing
x = x_start
# Get the bitmap data for the current character
char_bitmap = font.get_char_bitmap(char)
# Draw each pixel of the character
for row in range(font_size):
for col in range(char_width if not is_chinese(char) else font_size):
byte_index = row * bytes_per_row + (col // 8) # Calculate which byte contains the target bit
bit_mask = 0x80 >> (col % 8) # Mask to isolate the target bit (MSB first)
# If the bit is set (1), draw the pixel
if byte_index < len(char_bitmap) and (char_bitmap[byte_index] & bit_mask):
set_pixel(framebuffer, max_width, x + col, y + row, color)
# Move to the next character position
x += char_width + spacing
# Draw the top edge of the bounding box
for i in range(x, x + w):
if 0 <= i < width and 0 <= y < height:
set_pixel(buffer, width, i, y, color)
# Draw the bottom edge of the bounding box
for i in range(x, x + w):
if 0 <= i < width and 0 <= (y + h) < height:
set_pixel(buffer, width, i, y + h, color)
# Draw the left edge of the bounding box
for j in range(y, y + h):
if 0 <= j < height and 0 <= x < width:
set_pixel(buffer, width, x, j, color)
# Draw the right edge of the bounding box
for j in range(y, y + h):
if 0 <= j < height and 0 <= (x + w) < width:
set_pixel(buffer, width, x + w, j, color)
# Draw the label above the bounding box
render_text(font, label, x, y - 20, color)
# Draw bounding boxes and labels for all detected objects
for obj in results:
# Extract bounding box coordinates (top-left: (x1,y1), bottom-right: (x2,y2))
x1, y1, x2, y2 = obj['box']
# Create label: "Object Class: Confidence%"
class_name = MSCOCO_CLASSES[obj['category']]
confidence = int(obj['score'] * 100) # Convert confidence to percentage
label = f"{class_name}: {confidence}%"
# Draw bounding box and label (red color by default)
draw_rectangle(framebuffer, 405, 540, x1, y1, x2 - x1, y2 - y1, font, label)
# Print label to serial monitor for debugging
print(label)
# Re-encode the annotated image to JPEG format and save
marked_img = encoder.encode(framebuffer)
with open("yolo_marked.jpg", "wb") as f:
f.write(marked_img)
Was this article helpful?
