diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/codestyle.yml similarity index 69% rename from .github/workflows/pre-commit.yml rename to .github/workflows/codestyle.yml index 55dd8360e1..f49912f42a 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/codestyle.yml @@ -1,4 +1,4 @@ -name: pre-commit +name: PaddleOCR Code Style Check on: pull_request: @@ -6,11 +6,13 @@ on: branches: ['main', 'release/*'] jobs: - pre-commit: + check-code-style: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 + - uses: actions/checkout@v4 + with: + ref: ${{ github.ref }} + - uses: actions/setup-python@v5 with: python-version: '3.10' # Install Dependencies for Python diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 0000000000..04333130f7 --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,30 @@ +name: PaddleOCR PR Tests + +on: + push: + pull_request: + branches: ["main", "release/*"] + +permissions: + contents: read + +jobs: + test-pr: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install "paddlepaddle==2.5" requests + pip install -e . + - name: Test with pytest + run: | + pytest tests/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5cc16ba237..a94ef86abd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,3 +35,16 @@ repos: hooks: - id: black files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ + +# Flake8 +- repo: https://github.com/pycqa/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + args: + - --count + - --select=E9,F63,F7,F82 + - --show-source + - --statistics + exclude: ^benchmark/|^test_tipc/ + diff --git a/README.md b/README.md old mode 100755 new mode 100644 index 2e8f9b6623..9dbde9c8ce --- a/README.md +++ b/README.md @@ -14,8 +14,7 @@

## 简介 - -PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力开发者训练出更好的模型,并应用落地。 +PaddleOCR 旨在打造一套丰富、领先、且实用的 OCR 工具库,助力开发者训练出更好的模型,并应用落地。
@@ -25,10 +24,13 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
+## 🚀 Community +PaddleOCR is being oversight by a [PMC](https://github.com/PaddlePaddle/PaddleOCR/issues/12122). Issues and PRs will be reviewed on a best-effort basis. For a complete overview of PaddlePaddle community, please visit [community](https://github.com/PaddlePaddle/community). + ## 📣 近期更新 -- **📖直播和OCR实战打卡营预告**:《OCR零代码产线重构信息处理新范式》,详细解读四大OCR场景任务以及全新开发范式,并启动车牌检测实战打卡营。直播时间:5月16日(周四)19:00。直播报名:https://www.wjx.top/vm/YgdyYKX.aspx?udsid=881730 -- **🔥2024.5.10 上线星河零代码产线(OCR 相关)**:全面覆盖了以下四大OCR核心任务,提供极便捷的Badcase分析和实用的在线体验: - - [通用OCR](https://aistudio.baidu.com/community/app/91660) (PP-OCRv4)。 +- **📖直播和 OCR 实战打卡营预告**:《OCR 零代码产线重构信息处理新范式》,详细解读四大 OCR 场景任务以及全新开发范式,并启动车牌检测实战打卡营。直播时间:5 月 16 日(周四)19:00。直播报名:https://www.wjx.top/vm/YgdyYKX.aspx?udsid=881730 +- **🔥2024.5.10 上线星河零代码产线(OCR 相关)**:全面覆盖了以下四大 OCR 核心任务,提供极便捷的 Badcase 分析和实用的在线体验: + - [通用 OCR](https://aistudio.baidu.com/community/app/91660) (PP-OCRv4)。 - [通用表格识别](https://aistudio.baidu.com/community/app/91661) (SLANet)。 - [通用图像信息抽取](https://aistudio.baidu.com/community/app/91662) (PP-ChatOCRv2-common)。 - [文档场景信息抽取](https://aistudio.baidu.com/community/app/70303) (PP-ChatOCRv2-doc)。 @@ -36,25 +38,25 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 同时采用了 **[全新的场景任务开发范式](https://aistudio.baidu.com/pipeline/mine)** ,将模型统一汇聚,实现训练部署的零代码开发,并支持在线服务化部署和导出离线服务化部署包。 - **🔥2023.8.7 发布 PaddleOCR [release/2.7](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.7)** - - 发布[PP-OCRv4](./doc/doc_ch/PP-OCRv4_introduction.md),提供mobile和server两种模型 - - PP-OCRv4-mobile:速度可比情况下,中文场景效果相比于PP-OCRv3再提升4.5%,英文场景提升10%,80语种多语言模型平均识别准确率提升8%以上 - - PP-OCRv4-server:发布了目前精度最高的OCR模型,中英文场景上检测模型精度提升4.9%, 识别模型精度提升2% - 可参考[快速开始](./doc/doc_ch/quickstart.md) 一行命令快速使用,同时也可在飞桨AI套件(PaddleX)中的[通用OCR产业方案](https://aistudio.baidu.com/aistudio/modelsdetail?modelId=286)中低代码完成模型训练、推理、高性能部署全流程 -- 🔨**2022.11 新增实现[4种前沿算法](doc/doc_ch/algorithm_overview.md)**:文本检测 [DRRG](doc/doc_ch/algorithm_det_drrg.md), 文本识别 [RFL](doc/doc_ch/algorithm_rec_rfl.md), 文本超分[Text Telescope](doc/doc_ch/algorithm_sr_telescope.md),公式识别[CAN](doc/doc_ch/algorithm_rec_can.md) -- **2022.10 优化[JS版PP-OCRv3模型](./deploy/paddlejs/README_ch.md)**:模型大小仅4.3M,预测速度提升8倍,配套web demo开箱即用 -- **💥 直播回放:PaddleOCR研发团队详解PP-StructureV2优化策略**。微信扫描[下方二维码](#开源社区),关注公众号并填写问卷后进入官方交流群,获取直播回放链接与20G重磅OCR学习大礼包(内含PDF转Word应用程序、10种垂类模型、《动手学OCR》电子书等) + - 发布[PP-OCRv4](./doc/doc_ch/PP-OCRv4_introduction.md),提供 mobile 和 server 两种模型 + - PP-OCRv4-mobile:速度可比情况下,中文场景效果相比于 PP-OCRv3 再提升 4.5%,英文场景提升 10%,80 语种多语言模型平均识别准确率提升 8%以上 + - PP-OCRv4-server:发布了目前精度最高的 OCR 模型,中英文场景上检测模型精度提升 4.9%, 识别模型精度提升 2% + 可参考[快速开始](./doc/doc_ch/quickstart.md) 一行命令快速使用,同时也可在飞桨 AI 套件(PaddleX)中的[通用 OCR 产业方案](https://aistudio.baidu.com/aistudio/modelsdetail?modelId=286)中低代码完成模型训练、推理、高性能部署全流程 +- 🔨**2022.11 新增实现[4 种前沿算法](doc/doc_ch/algorithm_overview.md)**:文本检测 [DRRG](doc/doc_ch/algorithm_det_drrg.md), 文本识别 [RFL](doc/doc_ch/algorithm_rec_rfl.md), 文本超分[Text Telescope](doc/doc_ch/algorithm_sr_telescope.md),公式识别[CAN](doc/doc_ch/algorithm_rec_can.md) +- **2022.10 优化[JS 版 PP-OCRv3 模型](./deploy/paddlejs/README_ch.md)**:模型大小仅 4.3M,预测速度提升 8 倍,配套 web demo 开箱即用 +- **💥 直播回放:PaddleOCR 研发团队详解 PP-StructureV2 优化策略**。微信扫描[下方二维码](#开源社区),关注公众号并填写问卷后进入官方交流群,获取直播回放链接与 20G 重磅 OCR 学习大礼包(内含 PDF 转 Word 应用程序、10 种垂类模型、《动手学 OCR》电子书等) - **🔥2022.8.24 发布 PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)** - - 发布[PP-StructureV2](./ppstructure/README_ch.md),系统功能性能全面升级,适配中文场景,新增支持[版面复原](./ppstructure/recovery/README_ch.md),支持**一行命令完成PDF转Word**; - - [版面分析](./ppstructure/layout/README_ch.md)模型优化:模型存储减少95%,速度提升11倍,平均CPU耗时仅需41ms; - - [表格识别](./ppstructure/table/README_ch.md)模型优化:设计3大优化策略,预测耗时不变情况下,模型精度提升6%; - - [关键信息抽取](./ppstructure/kie/README_ch.md)模型优化:设计视觉无关模型结构,语义实体识别精度提升2.8%,关系抽取精度提升9.1%。 -- 🔥**2022.8 发布 [OCR场景应用集合](./applications)**:包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等**9个垂类模型**,覆盖通用,制造、金融、交通行业的主要OCR垂类应用。 + - 发布[PP-StructureV2](./ppstructure/README_ch.md),系统功能性能全面升级,适配中文场景,新增支持[版面复原](./ppstructure/recovery/README_ch.md),支持**一行命令完成 PDF 转 Word**; + - [版面分析](./ppstructure/layout/README_ch.md)模型优化:模型存储减少 95%,速度提升 11 倍,平均 CPU 耗时仅需 41ms; + - [表格识别](./ppstructure/table/README_ch.md)模型优化:设计 3 大优化策略,预测耗时不变情况下,模型精度提升 6%; + - [关键信息抽取](./ppstructure/kie/README_ch.md)模型优化:设计视觉无关模型结构,语义实体识别精度提升 2.8%,关系抽取精度提升 9.1%。 +- 🔥**2022.8 发布 [OCR 场景应用集合](./applications)**:包含数码管、液晶屏、车牌、高精度 SVTR 模型、手写体识别等**9 个垂类模型**,覆盖通用,制造、金融、交通行业的主要 OCR 垂类应用。 > [更多](./doc/doc_ch/update.md) ## 🌟 特性 -支持多种OCR相关前沿算法,在此基础上打造产业级特色模型[PP-OCR](./doc/doc_ch/ppocr_introduction.md)、[PP-Structure](./ppstructure/README_ch.md)和[PP-ChatOCRv2](https://aistudio.baidu.com/community/app/70303),并打通数据生产、模型训练、压缩、预测部署全流程。 +支持多种 OCR 相关前沿算法,在此基础上打造产业级特色模型[PP-OCR](./doc/doc_ch/ppocr_introduction.md)、[PP-Structure](./ppstructure/README_ch.md)和[PP-ChatOCRv2](https://aistudio.baidu.com/community/app/70303),并打通数据生产、模型训练、压缩、预测部署全流程。
@@ -72,39 +74,26 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 - PP-ChatOCRv2-doc 在线体验地址:https://aistudio.baidu.com/community/app/70303 - 一行命令快速使用:[快速开始(中英文/多语言/文档分析)](./doc/doc_ch/quickstart.md) -- 移动端demo体验:[安装包DEMO下载地址](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite)(基于EasyEdge和Paddle-Lite, 支持iOS和Android系统) +- 移动端 demo 体验:[安装包 DEMO 下载地址](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite)(基于 EasyEdge 和 Paddle-Lite, 支持 iOS 和 Android 系统) - ## 📖 技术交流合作 - 飞桨低代码开发工具 PaddleX 官方交流频道:https://aistudio.baidu.com/community/channel/610 - -## 📚《动手学OCR》电子书 -- [《动手学OCR》电子书](./doc/doc_ch/ocr_book.md) - - -## 🚀 开源共建 -- **👫 加入社区**:感谢大家长久以来对 PaddleOCR 的支持和关注,与广大开发者共同构建一个专业、和谐、相互帮助的开源社区是 PaddleOCR 的目标。我们非常欢迎各位开发者参与到飞桨社区的开源建设中,加入开源、共建飞桨。**为感谢社区开发者在 PaddleOCR release2.7 中做出的代码贡献,我们将为贡献者制作与邮寄[开源贡献证书](https://github.com/PaddlePaddle/community/blob/master/contributors/certificate-inspection.md),烦请[填写问卷](https://paddle.wjx.cn/vm/wFNr6w7.aspx)提供必要的邮寄信息。** -- **🤩 社区活动**:飞桨开源社区长期运营与发布各类丰富的活动与开发任务,在 PaddleOCR 社区,你可以关注以下社区活动,并选择自己感兴趣的内容参与开源共建: - - **🎁 飞桨套件快乐开源常规赛 | [传送门](https://github.com/PaddlePaddle/PaddleOCR/issues/10223)**:OCR 社区常规赛升级版,以建设更好用的 OCR 套件为目标,包括但不限于学术前沿模型训练与推理、打磨优化 OCR 工具与应用项目开发等,任何有利于社区意见流动和问题解决的行为都热切希望大家的参与。让我们共同成长为飞桨套件的重要 Contributor 🎉🎉🎉。 - - **💡 新需求征集 | [传送门](https://github.com/PaddlePaddle/PaddleOCR/issues/10334)**:你在日常研究和实践深度学习过程中,有哪些你期望的 feature 亟待实现?请按照格式描述你想实现的 feature 和你提出的初步实现思路,我们会定期沟通与讨论这些需求,并将其纳入未来的版本规划中。 - - **💬 PP-SIG 技术研讨会 | [传送门](https://github.com/PaddlePaddle/community/tree/master/ppsigs)**:PP-SIG 是飞桨社区开发者由于相同的兴趣汇聚在一起形成的虚拟组织,通过定期召开技术研讨会的方式,分享行业前沿动态、探讨社区需求与技术开发细节、发起社区联合贡献任务。PaddleOCR 希望可以通过 AI 的力量助力任何一位有梦想的开发者实现自己的想法,享受创造价值带来的愉悦。 -- **📑 项目合作**:如果你有企业中明确的 OCR 垂类应用需求,我们推荐你使用训压推一站式全流程高效率开发平台 PaddleX,助力 AI 技术快速落地。PaddleX 还支持联创开发,利润分成!欢迎广大的个人开发者和企业开发者参与进来,共创繁荣的 AI 技术生态! +## 📚《动手学 OCR》电子书 +- [《动手学 OCR》电子书](./doc/doc_ch/ocr_book.md) - - -## 🛠️ PP-OCR系列模型列表(更新中) +## 🛠️ PP-OCR 系列模型列表(更新中) | 模型简介 | 模型名称 | 推荐场景 | 检测模型 | 方向分类器 | 识别模型 | | ------------------------------------- | ----------------------- | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -| 中英文超轻量PP-OCRv4模型(15.8M) | ch_PP-OCRv4_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_train.tar) | -| 中英文超轻量PP-OCRv3模型(16.2M) | ch_PP-OCRv3_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) | -| 英文超轻量PP-OCRv3模型(13.4M) | en_PP-OCRv3_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) | +| 中英文超轻量 PP-OCRv4 模型(15.8M) | ch_PP-OCRv4_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_train.tar) | +| 中英文超轻量 PP-OCRv3 模型(16.2M) | ch_PP-OCRv3_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) | +| 英文超轻量 PP-OCRv3 模型(13.4M) | en_PP-OCRv3_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) | -- 超轻量OCR系列更多模型下载(包括多语言),可以参考[PP-OCR系列模型下载](./doc/doc_ch/models_list.md),文档分析相关模型参考[PP-Structure系列模型下载](./ppstructure/docs/models_list.md) +- 超轻量 OCR 系列更多模型下载(包括多语言),可以参考[PP-OCR 系列模型下载](./doc/doc_ch/models_list.md),文档分析相关模型参考[PP-Structure 系列模型下载](./ppstructure/docs/models_list.md) -### PaddleOCR场景应用模型 +### PaddleOCR 场景应用模型 | 行业 | 类别 | 亮点 | 文档说明 | 模型下载 | | ---- | ------------ | ---------------------------------- | ------------------------------------------------------------ | --------------------------------------------- | @@ -112,14 +101,12 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 | 金融 | 通用表单识别 | 多模态通用表单结构化提取 | [多模态表单识别](./applications/多模态表单识别.md) | [下载链接](./applications/README.md#模型下载) | | 交通 | 车牌识别 | 多角度图像处理、轻量模型、端侧部署 | [轻量级车牌识别](./applications/轻量级车牌识别.md) | [下载链接](./applications/README.md#模型下载) | -- 更多制造、金融、交通行业的主要OCR垂类应用模型(如电表、液晶屏、高精度SVTR模型等),可参考[场景应用模型下载](./applications) - - +- 更多制造、金融、交通行业的主要 OCR 垂类应用模型(如电表、液晶屏、高精度 SVTR 模型等),可参考[场景应用模型下载](./applications) ## 📖 文档教程 - [运行环境准备](./doc/doc_ch/environment.md) -- [PP-OCR文本检测识别🔥](./doc/doc_ch/ppocr_introduction.md) +- [PP-OCR 文本检测识别🔥](./doc/doc_ch/ppocr_introduction.md) - [快速开始](./doc/doc_ch/quickstart.md) - [模型库](./doc/doc_ch/models_list.md) - [模型训练](./doc/doc_ch/training.md) @@ -131,14 +118,14 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 - [模型裁剪](./deploy/slim/prune/README.md) - [知识蒸馏](./doc/doc_ch/knowledge_distillation.md) - [推理部署](./deploy/README_ch.md) - - [基于Python预测引擎推理](./doc/doc_ch/inference_ppocr.md) - - [基于C++预测引擎推理](./deploy/cpp_infer/readme_ch.md) + - [基于 Python 预测引擎推理](./doc/doc_ch/inference_ppocr.md) + - [基于 C++预测引擎推理](./deploy/cpp_infer/readme_ch.md) - [服务化部署](./deploy/pdserving/README_CN.md) - [端侧部署](./deploy/lite/readme.md) - - [Paddle2ONNX模型转化与预测](./deploy/paddle2onnx/readme.md) + - [Paddle2ONNX 模型转化与预测](./deploy/paddle2onnx/readme.md) - [云上飞桨部署工具](./deploy/paddlecloud/README.md) - [Benchmark](./doc/doc_ch/benchmark.md) -- [PP-Structure文档分析🔥](./ppstructure/README_ch.md) +- [PP-Structure 文档分析🔥](./ppstructure/README_ch.md) - [快速开始](./ppstructure/docs/quickstart.md) - [模型库](./ppstructure/docs/models_list.md) - [模型训练](./doc/doc_ch/training.md) @@ -146,36 +133,36 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 - [表格识别](./ppstructure/table/README_ch.md) - [关键信息提取](./ppstructure/kie/README_ch.md) - [推理部署](./deploy/README_ch.md) - - [基于Python预测引擎推理](./ppstructure/docs/inference.md) - - [基于C++预测引擎推理](./deploy/cpp_infer/readme_ch.md) + - [基于 Python 预测引擎推理](./ppstructure/docs/inference.md) + - [基于 C++预测引擎推理](./deploy/cpp_infer/readme_ch.md) - [服务化部署](./deploy/hubserving/readme.md) - [前沿算法与模型🚀](./doc/doc_ch/algorithm_overview.md) - [文本检测算法](./doc/doc_ch/algorithm_overview.md) - [文本识别算法](./doc/doc_ch/algorithm_overview.md) - - [端到端OCR算法](./doc/doc_ch/algorithm_overview.md) + - [端到端 OCR 算法](./doc/doc_ch/algorithm_overview.md) - [表格识别算法](./doc/doc_ch/algorithm_overview.md) - [关键信息抽取算法](./doc/doc_ch/algorithm_overview.md) - - [使用PaddleOCR架构添加新算法](./doc/doc_ch/add_new_algorithm.md) + - [使用 PaddleOCR 架构添加新算法](./doc/doc_ch/add_new_algorithm.md) - [场景应用](./applications) - 数据标注与合成 - - [半自动标注工具PPOCRLabel](https://github.com/PFCCLab/PPOCRLabel/blob/main/README_ch.md) - - [数据合成工具Style-Text](https://github.com/PFCCLab/StyleText/blob/main/README_ch.md) + - [半自动标注工具 PPOCRLabel](https://github.com/PFCCLab/PPOCRLabel/blob/main/README_ch.md) + - [数据合成工具 Style-Text](https://github.com/PFCCLab/StyleText/blob/main/README_ch.md) - [其它数据标注工具](./doc/doc_ch/data_annotation.md) - [其它数据合成工具](./doc/doc_ch/data_synthesis.md) - 数据集 - - [通用中英文OCR数据集](doc/doc_ch/dataset/datasets.md) - - [手写中文OCR数据集](doc/doc_ch/dataset/handwritten_datasets.md) - - [垂类多语言OCR数据集](doc/doc_ch/dataset/vertical_and_multilingual_datasets.md) + - [通用中英文 OCR 数据集](doc/doc_ch/dataset/datasets.md) + - [手写中文 OCR 数据集](doc/doc_ch/dataset/handwritten_datasets.md) + - [垂类多语言 OCR 数据集](doc/doc_ch/dataset/vertical_and_multilingual_datasets.md) - [版面分析数据集](doc/doc_ch/dataset/layout_datasets.md) - [表格识别数据集](doc/doc_ch/dataset/table_datasets.md) - [关键信息提取数据集](doc/doc_ch/dataset/kie_datasets.md) - [代码组织结构](./doc/doc_ch/tree.md) - [效果展示](#效果展示) -- [《动手学OCR》电子书📚](./doc/doc_ch/ocr_book.md) +- [《动手学 OCR》电子书📚](./doc/doc_ch/ocr_book.md) - [开源社区](#开源社区) - FAQ - [通用问题](./doc/doc_ch/FAQ.md) - - [PaddleOCR实战问题](./doc/doc_ch/FAQ.md) + - [PaddleOCR 实战问题](./doc/doc_ch/FAQ.md) - [参考文献](./doc/doc_ch/reference.md) - [许可证书](#许可证书) diff --git a/README_ch.md b/README_ch.md deleted file mode 100755 index 187cca5ab1..0000000000 --- a/README_ch.md +++ /dev/null @@ -1,254 +0,0 @@ -[English](README.md) | 简体中文 | [हिन्दी](./doc/doc_i18n/README_हिन्द.md) | [日本語](./doc/doc_i18n/README_日本語.md) | [한국인](./doc/doc_i18n/README_한국어.md) | [Pу́сский язы́к](./doc/doc_i18n/README_Ру́сский_язы́к.md) - -

- -

-

- - - - - - - -

- -## 简介 - -PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力开发者训练出更好的模型,并应用落地。 - -
- -
- -
- -
- -## 📣 近期更新 - -- **🔥2023.3.10 PaddleOCR集成了高性能、全场景模型部署方案FastDeploy,欢迎参考[指南](https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/deploy/fastdeploy)试用(注意使用dygraph分支)。** -- 📚**2022.12 发布[《OCR产业范例20讲》电子书](./applications/README.md)**,新增蒙古文、身份证、液晶屏缺陷等**7个场景应用范例** -- 🔨**2022.11 新增实现[4种前沿算法](doc/doc_ch/algorithm_overview.md)**:文本检测 [DRRG](doc/doc_ch/algorithm_det_drrg.md), 文本识别 [RFL](doc/doc_ch/algorithm_rec_rfl.md), 文本超分[Text Telescope](doc/doc_ch/algorithm_sr_telescope.md),公式识别[CAN](doc/doc_ch/algorithm_rec_can.md) -- **2022.10 优化[JS版PP-OCRv3模型](./deploy/paddlejs/README_ch.md)**:模型大小仅4.3M,预测速度提升8倍,配套web demo开箱即用 -- **💥 直播回放:PaddleOCR研发团队详解PP-StructureV2优化策略**。微信扫描[下方二维码](#开源社区),关注公众号并填写问卷后进入官方交流群,获取直播回放链接与20G重磅OCR学习大礼包(内含PDF转Word应用程序、10种垂类模型、《动手学OCR》电子书等) - -- **🔥2022.8.24 发布 PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)** - - 发布[PP-StructureV2](./ppstructure/README_ch.md),系统功能性能全面升级,适配中文场景,新增支持[版面复原](./ppstructure/recovery/README_ch.md),支持**一行命令完成PDF转Word**; - - [版面分析](./ppstructure/layout/README_ch.md)模型优化:模型存储减少95%,速度提升11倍,平均CPU耗时仅需41ms; - - [表格识别](./ppstructure/table/README_ch.md)模型优化:设计3大优化策略,预测耗时不变情况下,模型精度提升6%; - - [关键信息抽取](./ppstructure/kie/README_ch.md)模型优化:设计视觉无关模型结构,语义实体识别精度提升2.8%,关系抽取精度提升9.1%。 -- **2022.8 发布 [OCR场景应用集合](./applications)**:包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等**9个垂类模型**,覆盖通用,制造、金融、交通行业的主要OCR垂类应用。 -- **2022.8 新增实现[8种前沿算法](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_overview.md)** - - 文本检测:[FCENet](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_det_fcenet.md), [DB++](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_det_db.md) - - 文本识别:[ViTSTR](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_vitstr.md), [ABINet](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_abinet.md), [VisionLAN](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_visionlan.md), [SPIN](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_spin.md), [RobustScanner](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_robustscanner.md) - - 表格识别:[TableMaster](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_table_master.md) - -- **2022.5.9 发布 PaddleOCR [release/2.5](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.5)** - - 发布[PP-OCRv3](./doc/doc_ch/ppocr_introduction.md#pp-ocrv3),速度可比情况下,中文场景效果相比于PP-OCRv2再提升5%,英文场景提升11%,80语种多语言模型平均识别准确率提升5%以上; - - 发布半自动标注工具[PPOCRLabelv2](https://github.com/PFCCLab/PPOCRLabel):新增表格文字图像、图像关键信息抽取任务和不规则文字图像的标注功能; - - 发布OCR产业落地工具集:打通22种训练部署软硬件环境与方式,覆盖企业90%的训练部署环境需求; - - 发布交互式OCR开源电子书[《动手学OCR》](./doc/doc_ch/ocr_book.md),覆盖OCR全栈技术的前沿理论与代码实践,并配套教学视频。 - -> [更多](./doc/doc_ch/update.md) - -## 🌟 特性 - -支持多种OCR相关前沿算法,在此基础上打造产业级特色模型[PP-OCR](./doc/doc_ch/ppocr_introduction.md)和[PP-Structure](./ppstructure/README_ch.md),并打通数据生产、模型训练、压缩、预测部署全流程。 - -
- -
- -> 上述内容的使用方法建议从文档教程中的快速开始体验 - - -## ⚡ 快速开始 - -- 在线网站体验:超轻量PP-OCR mobile模型体验地址:https://www.paddlepaddle.org.cn/hub/scene/ocr -- 移动端demo体验:[安装包DEMO下载地址](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite)(基于EasyEdge和Paddle-Lite, 支持iOS和Android系统) -- 一行命令快速使用:[快速开始(中英文/多语言/文档分析)](./doc/doc_ch/quickstart.md) - - -## 📚《动手学OCR》电子书 -- [《动手学OCR》电子书](./doc/doc_ch/ocr_book.md) - - - -## 👫 开源社区 -- **📑项目合作:** 如果您是企业开发者且有明确的OCR垂类应用需求,填写[问卷](https://paddle.wjx.cn/vj/QwF7GKw.aspx)后可免费与官方团队展开不同层次的合作。 -- **👫加入社区:** **微信扫描二维码并填写问卷之后,加入交流群领取20G重磅OCR学习大礼包** - - **包括《动手学OCR》电子书** ,配套讲解视频和notebook项目;**PaddleOCR历次发版直播课回放链接**; - - **OCR场景应用模型集合:** 包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等垂类模型,覆盖通用,制造、金融、交通行业的主要OCR垂类应用。 - - PDF2Word应用程序;OCR社区优秀开发者项目分享视频。 -- **🏅️社区项目**:[社区项目](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等,是官方为社区开发者打造的荣誉墙,也是帮助优质项目宣传的广播站。 -- **🎁社区常规赛**:社区常规赛是面向OCR开发者的积分赛事,覆盖文档、代码、模型和应用四大类型,以季度为单位评选并发放奖励,赛题详情与报名方法可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)。 - -
- -

PaddleOCR官方交流群二维码

-
- - -## 🛠️ PP-OCR系列模型列表(更新中) - -| 模型简介 | 模型名称 | 推荐场景 | 检测模型 | 方向分类器 | 识别模型 | -| ------------------------------------- | ----------------------- | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -| 中英文超轻量PP-OCRv3模型(16.2M) | ch_PP-OCRv3_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) | -| 英文超轻量PP-OCRv3模型(13.4M) | en_PP-OCRv3_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) | - -- 超轻量OCR系列更多模型下载(包括多语言),可以参考[PP-OCR系列模型下载](./doc/doc_ch/models_list.md),文档分析相关模型参考[PP-Structure系列模型下载](./ppstructure/docs/models_list.md) - -### PaddleOCR场景应用模型 - -| 行业 | 类别 | 亮点 | 文档说明 | 模型下载 | -| ---- | ------------ | ---------------------------------- | ------------------------------------------------------------ | --------------------------------------------- | -| 制造 | 数码管识别 | 数码管数据合成、漏识别调优 | [光功率计数码管字符识别](./applications/光功率计数码管字符识别/光功率计数码管字符识别.md) | [下载链接](./applications/README.md#模型下载) | -| 金融 | 通用表单识别 | 多模态通用表单结构化提取 | [多模态表单识别](./applications/多模态表单识别.md) | [下载链接](./applications/README.md#模型下载) | -| 交通 | 车牌识别 | 多角度图像处理、轻量模型、端侧部署 | [轻量级车牌识别](./applications/轻量级车牌识别.md) | [下载链接](./applications/README.md#模型下载) | - -- 更多制造、金融、交通行业的主要OCR垂类应用模型(如电表、液晶屏、高精度SVTR模型等),可参考[场景应用模型下载](./applications) - - - -## 📖 文档教程 - -- [运行环境准备](./doc/doc_ch/environment.md) -- [PP-OCR文本检测识别🔥](./doc/doc_ch/ppocr_introduction.md) - - [快速开始](./doc/doc_ch/quickstart.md) - - [模型库](./doc/doc_ch/models_list.md) - - [模型训练](./doc/doc_ch/training.md) - - [文本检测](./doc/doc_ch/detection.md) - - [文本识别](./doc/doc_ch/recognition.md) - - [文本方向分类器](./doc/doc_ch/angle_class.md) - - 模型压缩 - - [模型量化](./deploy/slim/quantization/README.md) - - [模型裁剪](./deploy/slim/prune/README.md) - - [知识蒸馏](./doc/doc_ch/knowledge_distillation.md) - - [推理部署](./deploy/README_ch.md) - - [基于Python预测引擎推理](./doc/doc_ch/inference_ppocr.md) - - [基于C++预测引擎推理](./deploy/cpp_infer/readme_ch.md) - - [服务化部署](./deploy/pdserving/README_CN.md) - - [端侧部署](./deploy/lite/readme.md) - - [Paddle2ONNX模型转化与预测](./deploy/paddle2onnx/readme.md) - - [云上飞桨部署工具](./deploy/paddlecloud/README.md) - - [Benchmark](./doc/doc_ch/benchmark.md) -- [PP-Structure文档分析🔥](./ppstructure/README_ch.md) - - [快速开始](./ppstructure/docs/quickstart.md) - - [模型库](./ppstructure/docs/models_list.md) - - [模型训练](./doc/doc_ch/training.md) - - [版面分析](./ppstructure/layout/README_ch.md) - - [表格识别](./ppstructure/table/README_ch.md) - - [关键信息提取](./ppstructure/kie/README_ch.md) - - [推理部署](./deploy/README_ch.md) - - [基于Python预测引擎推理](./ppstructure/docs/inference.md) - - [基于C++预测引擎推理](./deploy/cpp_infer/readme_ch.md) - - [服务化部署](./deploy/hubserving/readme.md) -- [前沿算法与模型🚀](./doc/doc_ch/algorithm_overview.md) - - [文本检测算法](./doc/doc_ch/algorithm_overview.md) - - [文本识别算法](./doc/doc_ch/algorithm_overview.md) - - [端到端OCR算法](./doc/doc_ch/algorithm_overview.md) - - [表格识别算法](./doc/doc_ch/algorithm_overview.md) - - [关键信息抽取算法](./doc/doc_ch/algorithm_overview.md) - - [使用PaddleOCR架构添加新算法](./doc/doc_ch/add_new_algorithm.md) -- [场景应用](./applications) -- 数据标注与合成 - - [半自动标注工具PPOCRLabel](https://github.com/PFCCLab/PPOCRLabel/blob/main/README_ch.md) - - [数据合成工具Style-Text](https://github.com/PFCCLab/StyleText/blob/main/README_ch.md) - - [其它数据标注工具](./doc/doc_ch/data_annotation.md) - - [其它数据合成工具](./doc/doc_ch/data_synthesis.md) -- 数据集 - - [通用中英文OCR数据集](doc/doc_ch/dataset/datasets.md) - - [手写中文OCR数据集](doc/doc_ch/dataset/handwritten_datasets.md) - - [垂类多语言OCR数据集](doc/doc_ch/dataset/vertical_and_multilingual_datasets.md) - - [版面分析数据集](doc/doc_ch/dataset/layout_datasets.md) - - [表格识别数据集](doc/doc_ch/dataset/table_datasets.md) - - [关键信息提取数据集](doc/doc_ch/dataset/kie_datasets.md) -- [代码组织结构](./doc/doc_ch/tree.md) -- [效果展示](#效果展示) -- [《动手学OCR》电子书📚](./doc/doc_ch/ocr_book.md) -- [开源社区](#开源社区) -- FAQ - - [通用问题](./doc/doc_ch/FAQ.md) - - [PaddleOCR实战问题](./doc/doc_ch/FAQ.md) -- [参考文献](./doc/doc_ch/reference.md) -- [许可证书](#许可证书) - - - - -## 👀 效果展示 [more](./doc/doc_ch/visualization.md) - -
-PP-OCRv3 中文模型 - -
- - - -
- -
- - -
-PP-OCRv3 英文模型 - -
- - -
- -
- - -
-PP-OCRv3 多语言模型 - -
- - -
- -
- -
-PP-Structure 文档分析 - -- 版面分析+表格识别 -
- -
- -- SER(语义实体识别) -
- -
- -
- -
- -
- -
- -- RE(关系提取) -
- -
- -
- -
- -
- -
- -
- - - -## 许可证书 -本项目的发布受Apache 2.0 license许可认证。 diff --git a/README_en.md b/README_en.md index 47869d2eb2..be65512932 100644 --- a/README_en.md +++ b/README_en.md @@ -1,4 +1,4 @@ -English | [简体中文](README_ch.md) | [हिन्दी](./doc/doc_i18n/README_हिन्द.md) | [日本語](./doc/doc_i18n/README_日本語.md) | [한국인](./doc/doc_i18n/README_한국어.md) | [Pу́сский язы́к](./doc/doc_i18n/README_Ру́сский_язы́к.md) +English | [简体中文](README.md) | [हिन्दी](./doc/doc_i18n/README_हिन्द.md) | [日本語](./doc/doc_i18n/README_日本語.md) | [한국인](./doc/doc_i18n/README_한국어.md) | [Pу́сский язы́к](./doc/doc_i18n/README_Ру́сский_язы́к.md)

@@ -25,6 +25,9 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools

+## 🚀 Community +PaddleOCR is being oversight by a [PMC](https://github.com/PaddlePaddle/PaddleOCR/issues/12122). Issues and PRs will be reviewed on a best-effort basis. For a complete overview of PaddlePaddle community, please visit [community](https://github.com/PaddlePaddle/community). + ## 📣 Recent updates - **🔥2023.8.7 Release PaddleOCR[release/2.7](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.7)** - Release [PP-OCRv4](./doc/doc_ch/PP-OCRv4_introduction.md), support mobile version and server version @@ -56,7 +59,6 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools ## 🌟 Features - PaddleOCR support a variety of cutting-edge algorithms related to OCR, and developed industrial featured models/solution [PP-OCR](./doc/doc_en/ppocr_introduction_en.md)、 [PP-Structure](./ppstructure/README.md) and [PP-ChatOCR](https://aistudio.baidu.com/aistudio/projectdetail/6488689) on this basis, and get through the whole process of data production, model training, compression, inference and deployment.
@@ -67,7 +69,6 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel ## ⚡ Quick Experience - - Web online experience - PP-OCRv4 online experience:https://aistudio.baidu.com/aistudio/projectdetail/6611435 - PP-ChatOCR online experience:https://aistudio.baidu.com/aistudio/projectdetail/6488689 @@ -77,38 +78,14 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel - PP-ChatOCR:https://aistudio.baidu.com/aistudio/modelsdetail?modelId=332 - Mobile demo experience:[Installation DEMO](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite)(Based on EasyEdge and Paddle-Lite, support iOS and Android systems) - - ## 📖 Technical exchange and cooperation -- ([PaddleX](http://10.136.157.23:8080/paddle/paddleX))provides a one-stop full-process high-efficiency development platform for flying paddle ecological model training, pressure, and push. Its mission is to help AI technology quickly land, and its vision is to make everyone an AI Developer! +- PaddleX provides a one-stop full-process high-efficiency development platform for flying paddle ecological model training, pressure, and push. Its mission is to help AI technology quickly land, and its vision is to make everyone an AI Developer! - PaddleX currently covers areas such as image classification, object detection, image segmentation, 3D, OCR, and time series prediction, and has built-in 36 basic single models, such as RP-DETR, PP-YOLOE, PP-HGNet, PP-LCNet, PP- LiteSeg, etc.; integrated 12 practical industrial solutions, such as PP-OCRv4, PP-ChatOCR, PP-ShiTu, PP-TS, vehicle-mounted road waste detection, identification of prohibited wildlife products, etc. - PaddleX provides two AI development modes: "Toolbox" and "Developer". The toolbox mode can tune key hyperparameters without code, and the developer mode can perform single-model training, push and multi-model serial inference with low code, and supports both cloud and local terminals. - PaddleX also supports joint innovation and development, profit sharing! At present, PaddleX is rapidly iterating, and welcomes the participation of individual developers and enterprise developers to create a prosperous AI technology ecosystem! -Scan the QR code below on WeChat to add operation students, and reply [paddlex], operation students will invite you to join the official communication group for more efficient questions and answers. - -
- -

[PaddleX] technology exchange group QR code

-
- - ## 📚 E-book: *Dive Into OCR* -- [Dive Into OCR ](./doc/doc_en/ocr_book_en.md) - - - -## 👫 Community - -- For international developers, we regard [PaddleOCR Discussions](https://github.com/PaddlePaddle/PaddleOCR/discussions) as our international community platform. All ideas and questions can be discussed here in English. - -- For Chinese develops, Scan the QR code below with your Wechat, you can join the official technical discussion group. For richer community content, please refer to [中文README](README_ch.md), looking forward to your participation. - -
- -
- - +- [Dive Into OCR](./doc/doc_en/ocr_book_en.md) ## 🛠️ PP-OCR Series Model List(Update on September 8th) @@ -122,7 +99,6 @@ Scan the QR code below on WeChat to add operation students, and reply [paddlex], - For a new language request, please refer to [Guideline for new language_requests](#language_requests). - For structural document analysis models, please refer to [PP-Structure models](./ppstructure/docs/models_list_en.md). - ## 📖 Tutorials - [Environment Preparation](./doc/doc_en/environment_en.md) - [PP-OCR 🔥](./doc/doc_en/ppocr_introduction_en.md) @@ -182,8 +158,6 @@ Scan the QR code below on WeChat to add operation students, and reply [paddlex], - [References](./doc/doc_en/reference_en.md) - [License](#LICENSE) - - ## 👀 Visualization [more](./doc/doc_en/visualization_en.md)
@@ -244,10 +218,6 @@ Scan the QR code below on WeChat to add operation students, and reply [paddlex],
- -
- - ## 🇺🇳 Guideline for New Language Requests If you want to request a new language support, a PR with 1 following files are needed: @@ -259,7 +229,5 @@ If your language has unique elements, please tell me in advance within any way, More details, please refer to [Multilingual OCR Development Plan](https://github.com/PaddlePaddle/PaddleOCR/issues/1048). - - ## 📄 License This project is released under Apache 2.0 license diff --git a/benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py b/benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py index d2edd93d7a..bd0e483c8b 100644 --- a/benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py +++ b/benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py @@ -25,7 +25,7 @@ def __call__(self, data: dict): return data data["img"] = ( random_noise(data["img"], mode="gaussian", clip=True) * 255 - ).astype(im.dtype) + ).astype(data["img"].dtype) return data diff --git a/configs/rec/PP-OCRv4/en_PP-OCRv4_rec.yml b/configs/rec/PP-OCRv4/en_PP-OCRv4_rec.yml index 9537f7a106..d4a718b3d5 100644 --- a/configs/rec/PP-OCRv4/en_PP-OCRv4_rec.yml +++ b/configs/rec/PP-OCRv4/en_PP-OCRv4_rec.yml @@ -10,7 +10,7 @@ Global: - 0 - 2000 cal_metric_during_train: true - pretrained_model: refactor + pretrained_model: null checkpoints: null save_inference_dir: null use_visualdl: false diff --git a/deploy/hubserving/kie_ser/module.py b/deploy/hubserving/kie_ser/module.py index 2c046be07b..fa95a91f76 100644 --- a/deploy/hubserving/kie_ser/module.py +++ b/deploy/hubserving/kie_ser/module.py @@ -142,7 +142,7 @@ def serving_method(self, images, **kwargs): if __name__ == "__main__": - ocr = OCRSystem() + ocr = KIESer() ocr._initialize() image_path = [ "./doc/imgs/11.jpg", diff --git a/deploy/hubserving/kie_ser_re/module.py b/deploy/hubserving/kie_ser_re/module.py index 4f2bc4479c..5e30a51a7e 100644 --- a/deploy/hubserving/kie_ser_re/module.py +++ b/deploy/hubserving/kie_ser_re/module.py @@ -144,7 +144,7 @@ def serving_method(self, images, **kwargs): if __name__ == "__main__": - ocr = OCRSystem() + ocr = KIESerRE() ocr._initialize() image_path = [ "./doc/imgs/11.jpg", diff --git a/doc/doc_ch/ppocr_introduction.md b/doc/doc_ch/ppocr_introduction.md index bd62087c8b..6963e0526f 100644 --- a/doc/doc_ch/ppocr_introduction.md +++ b/doc/doc_ch/ppocr_introduction.md @@ -11,8 +11,8 @@ - [5.2 模型训练、压缩、推理部署](#52) - [6. 模型库](#6) - + ## 1. 简介 PP-OCR是PaddleOCR自研的实用的超轻量OCR系统。在实现[前沿算法](algorithm.md)的基础上,考虑精度与速度的平衡,进行**模型瘦身**和**深度优化**,使其尽可能满足产业落地需求。 @@ -109,7 +109,7 @@ PP-OCRv3系统pipeline如下: ### 5.2 模型训练、压缩、推理部署 -更多教程,包括模型训练、模型压缩、推理部署等,请参考[文档教程](../../README_ch.md#文档教程)。 +更多教程,包括模型训练、模型压缩、推理部署等,请参考[文档教程](../../README.md#文档教程)。 ## 6. 模型库 diff --git "a/doc/doc_i18n/README_\320\240\321\203\314\201\321\201\321\201\320\272\320\270\320\271_\321\217\320\267\321\213\314\201\320\272.md" "b/doc/doc_i18n/README_\320\240\321\203\314\201\321\201\321\201\320\272\320\270\320\271_\321\217\320\267\321\213\314\201\320\272.md" index 78fe0b6114..11fe5d4513 100644 --- "a/doc/doc_i18n/README_\320\240\321\203\314\201\321\201\321\201\320\272\320\270\320\271_\321\217\320\267\321\213\314\201\320\272.md" +++ "b/doc/doc_i18n/README_\320\240\321\203\314\201\321\201\321\201\320\272\320\270\320\271_\321\217\320\267\321\213\314\201\320\272.md" @@ -1,4 +1,4 @@ -[English](../../README.md) | [简体中文](../../README_ch.md) | [हिन्दी](./README_हिन्द.md) | [日本語](./README_日本語.md) | [한국인](./README_한국어.md) | Pу́сский язы́к +[English](../../README_en.md) | [简体中文](../../README.md) | [हिन्दी](./README_हिन्द.md) | [日本語](./README_日本語.md) | [한국인](./README_한국어.md) | Pу́сский язы́к

diff --git "a/doc/doc_i18n/README_\340\244\271\340\244\277\340\244\250\340\245\215\340\244\246.md" "b/doc/doc_i18n/README_\340\244\271\340\244\277\340\244\250\340\245\215\340\244\246.md" index 0288ea8e76..c493327dc0 100644 --- "a/doc/doc_i18n/README_\340\244\271\340\244\277\340\244\250\340\245\215\340\244\246.md" +++ "b/doc/doc_i18n/README_\340\244\271\340\244\277\340\244\250\340\245\215\340\244\246.md" @@ -1,4 +1,4 @@ -[English](../../README.md) | [简体中文](../../README_ch.md) | हिन्दी | [日本語](./README_日本語.md) | [한국인](./README_한국어.md) | [Pу́сский язы́к](./README_Ру́сский_язы́к.md) +[English](../../README_en.md) | [简体中文](../../README.md) | हिन्दी | [日本語](./README_日本語.md) | [한국인](./README_한국어.md) | [Pу́сский язы́к](./README_Ру́сский_язы́к.md)

diff --git "a/doc/doc_i18n/README_\346\227\245\346\234\254\350\252\236.md" "b/doc/doc_i18n/README_\346\227\245\346\234\254\350\252\236.md" index 0b9ddc9f58..215c825c7b 100644 --- "a/doc/doc_i18n/README_\346\227\245\346\234\254\350\252\236.md" +++ "b/doc/doc_i18n/README_\346\227\245\346\234\254\350\252\236.md" @@ -1,4 +1,4 @@ -[English](../../README.md) | [简体中文](../../README_ch.md) | [हिन्दी](./README_हिन्द.md) | 日本語 | [한국인](./README_한국어.md) | [Pу́сский язы́к](./README_Ру́сский_язы́к.md) +[English](../../README_en.md) | [简体中文](../../README.md) | [हिन्दी](./README_हिन्द.md) | 日本語 | [한국인](./README_한국어.md) | [Pу́сский язы́к](./README_Ру́сский_язы́к.md)

diff --git "a/doc/doc_i18n/README_\355\225\234\352\265\255\354\226\264.md" "b/doc/doc_i18n/README_\355\225\234\352\265\255\354\226\264.md" index ccf9849ec9..30ff079abe 100644 --- "a/doc/doc_i18n/README_\355\225\234\352\265\255\354\226\264.md" +++ "b/doc/doc_i18n/README_\355\225\234\352\265\255\354\226\264.md" @@ -1,4 +1,4 @@ -[English](../../README.md) | [简体中文](../../README_ch.md) | [हिन्दी](./README_हिन्द.md) | [日本語](./README_日本語.md) | 한국인 | [Pу́сский язы́к](./README_Ру́сский_язы́к.md) +[English](../../README_en.md) | [简体中文](../../README.md) | [हिन्दी](./README_हिन्द.md) | [日本語](./README_日本語.md) | 한국인 | [Pу́сский язы́к](./README_Ру́сский_язы́к.md)

diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index 39e413b2d2..58c6610f8c 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -841,11 +841,11 @@ def __call__(self, data): return data def xyxyxyxy2xywh(self, boxes): - new_bboxes = np.zeros([len(bboxes), 4]) - new_bboxes[:, 0] = bboxes[:, 0::2].min() # x1 - new_bboxes[:, 1] = bboxes[:, 1::2].min() # y1 - new_bboxes[:, 2] = bboxes[:, 0::2].max() - new_bboxes[:, 0] # w - new_bboxes[:, 3] = bboxes[:, 1::2].max() - new_bboxes[:, 1] # h + new_bboxes = np.zeros([len(boxes), 4]) + new_bboxes[:, 0] = boxes[:, 0::2].min() # x1 + new_bboxes[:, 1] = boxes[:, 1::2].min() # y1 + new_bboxes[:, 2] = boxes[:, 0::2].max() - new_bboxes[:, 0] # w + new_bboxes[:, 3] = boxes[:, 1::2].max() - new_bboxes[:, 1] # h return new_bboxes def xyxy2xywh(self, bboxes): diff --git a/ppocr/losses/distillation_loss.py b/ppocr/losses/distillation_loss.py index 4d0f751c5f..6313f8d8ea 100644 --- a/ppocr/losses/distillation_loss.py +++ b/ppocr/losses/distillation_loss.py @@ -1184,7 +1184,9 @@ def forward(self, predicts, batch): loss = super().forward(out1, out2, ctc_label) if isinstance(loss, dict): for key in loss: - loss_dict["{}_{}_{}".format(self.name, model_name, idx)] = loss[key] + loss_dict[ + "{}_{}_{}".format(self.name, self.model_name_pairs, idx) + ] = loss[key] else: loss_dict["{}_{}".format(self.name, idx)] = loss return loss_dict diff --git a/ppocr/metrics/vqa_token_re_metric.py b/ppocr/metrics/vqa_token_re_metric.py index d39917000f..8c85be5763 100644 --- a/ppocr/metrics/vqa_token_re_metric.py +++ b/ppocr/metrics/vqa_token_re_metric.py @@ -19,7 +19,7 @@ import numpy as np import paddle -__all__ = ["KIEMetric"] +__all__ = ["VQAReTokenMetric"] class VQAReTokenMetric(object): diff --git a/ppocr/metrics/vqa_token_ser_metric.py b/ppocr/metrics/vqa_token_ser_metric.py index b6033c3ae5..3afcb0518b 100644 --- a/ppocr/metrics/vqa_token_ser_metric.py +++ b/ppocr/metrics/vqa_token_ser_metric.py @@ -19,7 +19,7 @@ import numpy as np import paddle -__all__ = ["KIEMetric"] +__all__ = ["VQASerTokenMetric"] class VQASerTokenMetric(object): diff --git a/ppocr/modeling/backbones/rec_efficientb3_pren.py b/ppocr/modeling/backbones/rec_efficientb3_pren.py index d153ad6d87..916a090e23 100644 --- a/ppocr/modeling/backbones/rec_efficientb3_pren.py +++ b/ppocr/modeling/backbones/rec_efficientb3_pren.py @@ -27,7 +27,7 @@ import paddle.nn as nn import paddle.nn.functional as F -__all__ = ["EfficientNetb3"] +__all__ = ["EfficientNetb3_PREN"] GlobalParams = collections.namedtuple( "GlobalParams", diff --git a/ppocr/modeling/heads/rec_aster_head.py b/ppocr/modeling/heads/rec_aster_head.py index ba0acaeebe..dbef77f68b 100644 --- a/ppocr/modeling/heads/rec_aster_head.py +++ b/ppocr/modeling/heads/rec_aster_head.py @@ -132,7 +132,7 @@ def sample(self, x): # Decoder state = paddle.zeros([1, batch_size, self.sDim]) - predicted_ids, predicted_scores = [], [] + predicted_ids, predicted_scores, predicted = [], [], None for i in range(self.max_len_labels): if i == 0: y_prev = paddle.full(shape=[batch_size], fill_value=self.num_classes) diff --git a/ppocr/utils/dict/bengali_dict.txt b/ppocr/utils/dict/bengali_dict.txt new file mode 100644 index 0000000000..89ffadfa5f --- /dev/null +++ b/ppocr/utils/dict/bengali_dict.txt @@ -0,0 +1,74 @@ +হ +থ +শ +৫ +ক +ও +য +০ +গ +দ +ড় +খ +য় +ঋ +ন +অ +৪ +এ +ব +ঠ +ঢ +৭ +৯ +ধ +ঙ +ট +ঝ +ৎ +ণ +ত +র +২ +চ +ঌ +ড +৬ +ঔ +প +ভ +ম +ঢ় +ঈ +৮ +ঘ +১ +ষ +৩ +ফ +ছ +ল +জ +আ +। +ঊ +ই +স +ঐ +উ +ঞ +া +্ +ু +ী +ে +ং +ি +় +ঁ +ৃ +ো +ূ +ৈ +ৌ +ঃ diff --git a/ppocr/utils/dict/gujarati_dict.txt b/ppocr/utils/dict/gujarati_dict.txt new file mode 100644 index 0000000000..08c8bad67d --- /dev/null +++ b/ppocr/utils/dict/gujarati_dict.txt @@ -0,0 +1,48 @@ +અ +આ +ઇ +ઈ +ઉ +ઊ +ઋ +ઌ +એ +ઐ +ઓ +ઔ +અં +અઃ +ક +ખ +ગ +ઘ +ઙ +ચ +છ +જ +ઝ +ઞ +ટ +ઠ +ડ +ઢ +ણ +ત +થ +દ +ધ +ન +પ +ફ +બ +ભ +મ +ય +ર +લ +ળ +વ +શ +ષ +સ +હ \ No newline at end of file diff --git a/ppocr/utils/dict/kazakh_dict.txt b/ppocr/utils/dict/kazakh_dict.txt new file mode 100644 index 0000000000..15bac40bec --- /dev/null +++ b/ppocr/utils/dict/kazakh_dict.txt @@ -0,0 +1,42 @@ +А +Ә +Б +В +Г +Ғ +Д +Е +Ё +Ж +З +И +Й +К +Қ +Л +М +Н +Ң +О +Ө +П +Р +С +Т +У +Ұ +Ү +Ф +Х +Һ +Ц +Ч +Ш +Щ +Ъ +Ы +І +Ь +Э +Ю +Я \ No newline at end of file diff --git a/ppocr/utils/loggers/wandb_logger.py b/ppocr/utils/loggers/wandb_logger.py index 83596d86d1..3b528b3fa9 100644 --- a/ppocr/utils/loggers/wandb_logger.py +++ b/ppocr/utils/loggers/wandb_logger.py @@ -1,5 +1,8 @@ import os from .base_logger import BaseLogger +from ppocr.utils.logging import get_logger + +logger = get_logger() class WandbLogger(BaseLogger): @@ -11,7 +14,7 @@ def __init__( entity=None, save_dir=None, config=None, - **kwargs + **kwargs, ): try: import wandb diff --git a/tests/test_paddleocr_api.py b/tests/test_paddleocr_api.py new file mode 100644 index 0000000000..0af794d2b8 --- /dev/null +++ b/tests/test_paddleocr_api.py @@ -0,0 +1,116 @@ +from typing import Any + +import pytest +from paddleocr import PaddleOCR, PPStructure + + +# Test image paths +IMAGE_PATHS_OCR = ["./doc/imgs_en/254.jpg", "./doc/imgs_en/img_10.jpg"] +IMAGE_PATHS_STRUCTURE = [ + "./ppstructure/docs/table/layout.jpg", + "./ppstructure/docs/table/1.png", +] + + +@pytest.fixture(params=["en", "ch"]) +def ocr_engine(request: Any) -> PaddleOCR: + """ + Initialize PaddleOCR engine with different languages. + + Args: + request: pytest fixture request object. + + Returns: + An instance of PaddleOCR. + """ + return PaddleOCR(lang=request.param) + + +def test_ocr_initialization(ocr_engine: PaddleOCR) -> None: + """ + Test PaddleOCR initialization. + + Args: + ocr_engine: An instance of PaddleOCR. + """ + assert ocr_engine is not None + + +@pytest.mark.parametrize("image_path", IMAGE_PATHS_OCR) +def test_ocr_function(ocr_engine: PaddleOCR, image_path: str) -> None: + """ + Test PaddleOCR OCR functionality with different images. + + Args: + ocr_engine: An instance of PaddleOCR. + image_path: Path to the image to be processed. + """ + result = ocr_engine.ocr(image_path) + assert result is not None + assert isinstance(result, list) + + +@pytest.mark.parametrize("image_path", IMAGE_PATHS_OCR) +def test_ocr_det_only(ocr_engine: PaddleOCR, image_path: str) -> None: + """ + Test PaddleOCR OCR functionality with detection only. + + Args: + ocr_engine: An instance of PaddleOCR. + image_path: Path to the image to be processed. + """ + result = ocr_engine.ocr(image_path, det=True, rec=False) + assert result is not None + assert isinstance(result, list) + + +@pytest.mark.parametrize("image_path", IMAGE_PATHS_OCR) +def test_ocr_rec_only(ocr_engine: PaddleOCR, image_path: str) -> None: + """ + Test PaddleOCR OCR functionality with recognition only. + + Args: + ocr_engine: An instance of PaddleOCR. + image_path: Path to the image to be processed. + """ + result = ocr_engine.ocr(image_path, det=False, rec=True) + assert result is not None + assert isinstance(result, list) + + +@pytest.fixture(params=["en", "ch"]) +def structure_engine(request: Any) -> PPStructure: + """ + Initialize PPStructure engine with different languages. + + Args: + request: pytest fixture request object. + + Returns: + An instance of PPStructure. + """ + return PPStructure(lang=request.param) + + +def test_structure_initialization(structure_engine: PPStructure) -> None: + """ + Test PPStructure initialization. + + Args: + structure_engine: An instance of PPStructure. + """ + assert structure_engine is not None + + +@pytest.mark.parametrize("image_path", IMAGE_PATHS_STRUCTURE) +def test_structure_function(structure_engine: PPStructure, image_path: str) -> None: + """ + Test PPStructure structure analysis functionality with different images. + + Args: + structure_engine: An instance of PPStructure. + image_path: Path to the image to be processed. + """ + result = structure_engine(image_path) + assert result is not None + assert isinstance(result, list)