通用图纸识别调研

2020-04-26

Word count: 1.2k | Reading time≈ 6 min

最近在做图纸识别的项目调研，对于文字识别来说，各种云服务商已经提供了很详尽的API和各种开发语言的SDK，开箱即用非常方便。但是对于开发者来说具体的识别过程还是个黑匣子，而且，是收费的。这里调研了几个开源的文字识别项目，目前还在调研过程

云服务商API

百度云，腾讯云，阿里云，华为云都提供了印刷文字的识别。阿里云的在项目中有用到过，文档写的非常详尽。参照之前写的身份证信息识别

Tesseract-OCR

谷歌公司产品。经过测试读取计算机的照片生成的文字，准确率不错。使用起来简单，实例化一个Tesseract实例，然后为已经训练好的LSTM模型设置数据路径，调用doOCR方法，接收一个文件参数返回字符串。开源。这里用sprigboot写了一个demo测试了一下识别效果。

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.2.6.RELEASE</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.hhzhu</groupId>
    <artifactId>demo</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>demo</name>
    <description>Demo project for Spring Boot</description>

    <properties>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-thymeleaf</artifactId>
        </dependency>

        <!-- https://mvnrepository.com/artifact/net.sourceforge.tess4j/tess4j -->
        <dependency>
            <groupId>net.sourceforge.tess4j</groupId>
            <artifactId>tess4j</artifactId>
            <version>4.4.1</version>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
            <exclusions>
                <exclusion>
                    <groupId>org.junit.vintage</groupId>
                    <artifactId>junit-vintage-engine</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

</project>

controller

package com.hhzhu.controller;

import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.mvc.support.RedirectAttributes;
import org.springframework.web.servlet.view.RedirectView;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
 * Description:
 * 谷歌OCR识别测试控制层
 * @author HhZhu
 * @date Created on 2020/4/25 15:38
 */
public class TestController {
    @Controller
    public static class FileUploadController {
        @RequestMapping("/")
        public String index() {
            return "upload";
        }
        @RequestMapping(value = "/upload", method = RequestMethod.POST)
        public RedirectView singleFileUpload(@RequestParam("file") MultipartFile file,
                                             RedirectAttributes redirectAttributes, Model model) throws IOException, TesseractException {
            byte[] bytes = file.getBytes();
            Path path = Paths.get("C://Users//Victor//Desktop//demo//src//main//resources//static//" + file.getOriginalFilename());
            Files.write(path, bytes);
            File convFile = convert(file);
            Tesseract tesseract = new Tesseract();
            tesseract.setDatapath("C://Users//Victor//Desktop//demo//DataScience//testdata//");
            String text = tesseract.doOCR(convFile);
            redirectAttributes.addFlashAttribute("file", file);
            redirectAttributes.addFlashAttribute("text", text);
            return new RedirectView("result");
        }
        @RequestMapping("/result")
        public String result() {
            return "result";
        }
        public static File convert(MultipartFile file) throws IOException {
            File convFile = new File(file.getOriginalFilename());
            convFile.createNewFile();
            FileOutputStream fos = new FileOutputStream(convFile);
            fos.write(file.getBytes());
            fos.close();
            return convFile;
        }
    }
}

static

# upload页面
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>test</title>
</head>
<body>
    <h1>Upload a file for OCR</h1>
    <form method="post" action="/upload" enctype="multipart/form-data">
        <input type="file" name="file" /><br/><br/>
        <input type="submit" value="Submit" />
    </form>

</body>
</html>

# result页面
<!DOCTYPE html>
<html lang="en" xmlns:th="http://www.thymeleaf.org">
<head>
    <meta charset="UTF-8">
    <title>result</title>
</head>
<body>
    <h1>Extracted Content:</h1>
    <h2><span th:text="${text}"></span></h2>
    <p>Form the image:</p>
    <img th:src="'/' + ${file.getOriginalFilename()}"/>

</body>
</html>

这里的testdata可以到github下载，大概有1.6个G的样子(monkey找个脚本加速一下吧，不然要疯的)，然后用两张图片作为测试，下面是识别结果的对比：

微信截图_20200426135853

微信截图_20200426140021

可以看出适应性比较差。对于排版标准的图片，识别很精准，但是对于畸形图片的识别准确率较差

Chineseocr-lite

还有很多正在调研和测试，，，，，，，

Copyright： Copyright is owned by the author. For commercial reprints, please contact the author for authorization. For non-commercial reprints, please indicate the source.