Notes on "My PaddlePaddle Learning Journey" VII——End-to-End License Plate Recognition

车牌识别项目笔记（基于PaddlePaddle）¶

前言¶

本文记录了使用PaddlePaddle实现端到端车牌识别的完整流程，从数据采集、预处理、模型构建到训练与预测。项目采用CNN-RNN架构结合CTC（Connectionist Temporal Classification）损失函数，实现车牌字符的直接识别，无需分割步骤。

1. 数据采集与预处理¶

1.1 车牌图像下载¶

使用Python爬虫从百度图片下载车牌图像，代码如下：

import re
import uuid
import requests
import os

class DownloadImages:
    def __init__(self, download_max, key_word):
        self.download_sum = 0
        self.download_max = download_max
        self.key_word = key_word
        self.save_path = '../images/download/'

    def start_download(self):
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        pn = 0
        while self.download_sum < self.download_max:
            url = f'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={self.key_word}&pn={pn}'
            result = requests.get(url)
            self.downloadImages(result.text)
            pn += 100
        print('下载完成')

    def downloadImages(self, html):
        img_urls = re.findall('"objURL":"(.*?)",', html, re.S)
        for img_url in img_urls:
            try:
                pic = requests.get(img_url, timeout=50)
                pic_name = self.save_path + '/' + str(uuid.uuid1()) + '.jpg'
                with open(pic_name, 'wb') as f:
                    f.write(pic.content)
                self.download_sum += 1
            except Exception as e:
                print(f'下载失败：{e}')

if __name__ == '__main__':
    downloader = DownloadImages(100, '车牌')
    downloader.start_download()

1.2 车牌图像预处理¶

无效图像过滤：手动删除无车牌的图像。
图像命名：将有效图像重命名为车牌字符（如辽B2723L）。
裁剪与灰度化：使用形态学操作（膨胀、腐蚀）定位车牌区域，裁剪后转为灰度图并调整尺寸为180×80。

2. 数据读取与准备¶

2.1 生成训练/测试列表¶

class CreateDataList:
    def createDataList(self, data_path, isTrain):
        list_name = 'trainer.list' if isTrain else 'test.list'
        list_path = os.path.join(data_path, list_name)
        if os.path.exists(list_path):
            os.remove(list_path)
        for img in os.listdir(data_path):
            with open(list_path, 'a') as f:
                f.write(f'{img}\t{img.split(".")[0]}\n')  # 格式：图像名\t标签

# 生成列表
createDataList = CreateDataList()
createDataList.createDataList('../data/train_data/', True)
createDataList.createDataList('../data/test_data/', False)

2.2 读取数据为向量¶

def get_file_list(image_file_list):
    path_list = []
    with open(image_file_list) as f:
        for line in f:
            img, label = line.strip().split('\t')
            path_list.append((img, label))
    return path_list

train_list = get_file_list('../data/train_data/trainer.list')
test_list = get_file_list('../data/test_data/test.list')

3. 神经网络定义（CNN+GRU+CTC）¶

3.1 网络结构¶

class Model(object):
    def __init__(self, num_classes, shape, is_infer=False):
        self.image_vector_size = shape[0] * shape[1]
        self.__declare_input_layers__()
        self.__build_nn__()

    def __declare_input_layers__(self):
        self.image = paddle.layer.data(
            name='image',
            type=paddle.data_type.dense_vector(self.image_vector_size)
        )
        if not is_infer:
            self.label = paddle.layer.data(
                name='label',
                type=paddle.data_type.integer_value_sequence(num_classes)
            )

    def __build_nn__(self):
        # CNN特征提取
        conv = paddle.networks.img_conv_group(
            input=self.image,
            num_channels=1,
            conv_filter_size=3,
            conv_num_filter=[16, 32, 64, 128],
            pool_size=2,
            pool_stride=2
        )
        # RNN序列建模
        gru = paddle.networks.simple_gru(
            input=conv, size=128, act=paddle.activation.Relu()
        )
        # CTC输出
        self.output = paddle.layer.fc(
            input=gru, size=num_classes + 1, act=paddle.activation.Softmax()
        )
        self.cost = paddle.layer.warp_ctc(self.output, self.label)

4. 训练与测试¶

4.1 训练配置¶

paddle.init(use_gpu=True, trainer_count=1)
model = Model(num_classes=len(char_dict), shape=(180, 80))
optimizer = paddle.optimizer.Momentum(momentum=0)
trainer = paddle.trainer.SGD(cost=model.cost, parameters=model.parameters, optimizer=optimizer)

# 启动训练
trainer.train(
    reader=paddle.batch(train_list, batch_size=16),
    num_passes=5000,
    event_handler=lambda e: print(f'Pass {e.pass_id}, Cost {e.cost}')
)

4.2 模型保存与测试¶

# 保存模型
with open('model.tar.gz', 'wb') as f:
    trainer.save_parameter_to_tar(f)

# 测试模型
test_result = trainer.test(reader=paddle.batch(test_list, batch_size=16))
print(f'测试结果：{test_result}')

5. 预测¶

def infer(img_path, model_path):
    # 加载模型参数
    parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path))
    # 裁剪图像（此处省略裁剪逻辑）
    img = cv2.imread(img_path, 0)
    img = cv2.resize(img, (180, 80)) / 255.
    # 预测
    inferer = paddle.inference.Inference(output_layer=model.output, parameters=parameters)
    result = inferer.infer([img])
    # CTC解码（取argmax并去重）
    decoded = ctc_greedy_decoder(result[0])
    return decoded

# 调用预测
print(infer('../test_img.jpg', 'model.tar.gz'))

6. 项目代码¶

GitHub地址：https://github.com/yeyupiaoling/LearnPaddle

关键技巧¶

CTC损失函数：无需标签对齐，直接处理序列输出。
数据增强：增加训练数据量可提升模型泛化能力。
灰度化与尺寸统一：减少特征维度，加速训练。

此方案通过端到端识别实现车牌定位与字符识别，适用于简单场景。如需更高精度，可引入目标检测模型（如YOLO）定位车牌后再识别。