diff --git a/.classpath b/.classpath index 67ea658..054cdd5 100644 --- a/.classpath +++ b/.classpath @@ -1,11 +1,16 @@ - + + + + + + diff --git a/README.md b/README.md index 52d2f61..1e18844 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ 软件主要检查、比较学生提交的电子档文本相似度,能对程序语言(如java、c等)、中英文文档(如实验报告等)之间的文本相似度进行比较分析,输出相似度高的文档,进而辅助发现学生之间互相抄袭的行为。 ## 需求 -jdk1.6及以上版本 +jdk11 ## 安装 直接下载或clone项目源代码版,或下载软件的发布版[releases](https://github.com/fanghon/antiplag/releases)。 @@ -13,11 +13,12 @@ jdk1.6及以上版本 ![程序主界面](./maingui.png) ## 原理 -系统采用的主要技术是自然语言处理(nlp)中的文本相似度计算。程序类文本的相似度比较基于两个开放系统: -* 一个是基于网络服务的[MOSS系统](http://theory.stanford.edu/~aiken/moss/)(斯坦福大学开放的支持多种编程语言代码相似度比较的系统); -* 另一个是本地执行的[sim系统](https://dickgrune.com/Programs/similarity_tester/)(支持java、c等语言的文本相似度比较)。 +系统采用的主要技术是自然语言处理(nlp)中的文本相似度计算。程序类文本的相似度比较基于3个开放系统: +* 一是基于网络服务的[MOSS系统](http://theory.stanford.edu/~aiken/moss/)(斯坦福大学开放的支持多种编程语言代码相似度比较的系统); +* 二是本地执行的[sim系统](https://dickgrune.com/Programs/similarity_tester/)(支持java、c等语言的文本相似度比较)。 +* 三是本地执行的[jplag系统](https://github.com/jplag/jplag/)(支持java、c/c++、python等语言的文本相似度比较)。 -本系统在它们基础上进行了二次开发和封装,针对moss系统,开发出了客户端存取模块,实现了代码文件提交、结果获取和解析、结果排序等功能;针对sim,则将其集成到系统中,在moss因网络故障等原因不可用时,可作为替代产品使用。 +本系统在它们基础上进行了二次开发和封装,针对moss系统,开发出了客户端存取模块,实现了代码文件提交、结果获取和解析、结果排序等功能;针对sim和jplag,则将其集成到系统中,在moss因网络故障等原因不可用时,可作为替代产品使用。 中英文文档作业相似度的比较则基于[shinglecloud算法](https://www.kom.tu-darmstadt.de/de/research-results/0/1/shinglecloud/)(一种基于文本指纹的、语言无关的相似度快速计算方法),文档主要处理过程如下: 1. 使用tika读取不同格式(txt、doc、docx等)的文档,并将其转换成能统一处理的文本; @@ -26,7 +27,7 @@ jdk1.6及以上版本 4. 根据相似度排序,输出比较结果。 ## TODO -1. 将jplag整合进系统。 +1. 将jplag整合进系统。已实现。 2. 支持存储以往作业文档,支持基于数据库的作业查重。 2. 开发web版作业查重软件。 diff --git a/bin/.gitignore b/bin/.gitignore new file mode 100644 index 0000000..6debac6 --- /dev/null +++ b/bin/.gitignore @@ -0,0 +1,2 @@ +/utils/ +/gui/ diff --git a/bin/gui/plag/edu/CompareResultFrame$1.class b/bin/gui/plag/edu/CompareResultFrame$1.class index a47eebf..adeb780 100644 Binary files a/bin/gui/plag/edu/CompareResultFrame$1.class and b/bin/gui/plag/edu/CompareResultFrame$1.class differ diff --git a/bin/gui/plag/edu/CompareResultFrame$2.class b/bin/gui/plag/edu/CompareResultFrame$2.class index ea0e579..0d7f285 100644 Binary files a/bin/gui/plag/edu/CompareResultFrame$2.class and b/bin/gui/plag/edu/CompareResultFrame$2.class differ diff --git a/bin/gui/plag/edu/CompareResultFrame$3.class b/bin/gui/plag/edu/CompareResultFrame$3.class index 08edd41..43bf49d 100644 Binary files a/bin/gui/plag/edu/CompareResultFrame$3.class and b/bin/gui/plag/edu/CompareResultFrame$3.class differ diff --git a/bin/gui/plag/edu/CompareResultFrame.class b/bin/gui/plag/edu/CompareResultFrame.class index e912979..96c69c7 100644 Binary files a/bin/gui/plag/edu/CompareResultFrame.class and b/bin/gui/plag/edu/CompareResultFrame.class differ diff --git a/bin/gui/plag/edu/FileConvertFrame$1.class b/bin/gui/plag/edu/FileConvertFrame$1.class index 78202d1..800b218 100644 Binary files a/bin/gui/plag/edu/FileConvertFrame$1.class and b/bin/gui/plag/edu/FileConvertFrame$1.class differ diff --git a/bin/gui/plag/edu/FileConvertFrame$2.class b/bin/gui/plag/edu/FileConvertFrame$2.class index b7ac180..b57b418 100644 Binary files a/bin/gui/plag/edu/FileConvertFrame$2.class and b/bin/gui/plag/edu/FileConvertFrame$2.class differ diff --git a/bin/gui/plag/edu/FileConvertFrame$3.class b/bin/gui/plag/edu/FileConvertFrame$3.class index 34c0312..b79e429 100644 Binary files a/bin/gui/plag/edu/FileConvertFrame$3.class and b/bin/gui/plag/edu/FileConvertFrame$3.class differ diff --git a/bin/gui/plag/edu/FileConvertFrame$4.class b/bin/gui/plag/edu/FileConvertFrame$4.class index 2956418..de6604c 100644 Binary files a/bin/gui/plag/edu/FileConvertFrame$4.class and b/bin/gui/plag/edu/FileConvertFrame$4.class differ diff --git a/bin/gui/plag/edu/PlagGUI$1.class b/bin/gui/plag/edu/PlagGUI$1.class index 36c5653..a4f5b55 100644 Binary files a/bin/gui/plag/edu/PlagGUI$1.class and b/bin/gui/plag/edu/PlagGUI$1.class differ diff --git a/bin/gui/plag/edu/PlagGUI$2.class b/bin/gui/plag/edu/PlagGUI$2.class index 0a1337c..643b0b2 100644 Binary files a/bin/gui/plag/edu/PlagGUI$2.class and b/bin/gui/plag/edu/PlagGUI$2.class differ diff --git a/bin/gui/plag/edu/PlagGUI$3.class b/bin/gui/plag/edu/PlagGUI$3.class index dad6f9e..080dcbb 100644 Binary files a/bin/gui/plag/edu/PlagGUI$3.class and b/bin/gui/plag/edu/PlagGUI$3.class differ diff --git a/bin/gui/plag/edu/PlagGUI$4.class b/bin/gui/plag/edu/PlagGUI$4.class index ce62236..e9667d2 100644 Binary files a/bin/gui/plag/edu/PlagGUI$4.class and b/bin/gui/plag/edu/PlagGUI$4.class differ diff --git a/bin/gui/plag/edu/PlagGUI$5.class b/bin/gui/plag/edu/PlagGUI$5.class index 64e87bb..0351610 100644 Binary files a/bin/gui/plag/edu/PlagGUI$5.class and b/bin/gui/plag/edu/PlagGUI$5.class differ diff --git a/bin/gui/plag/edu/PlagGUI$6.class b/bin/gui/plag/edu/PlagGUI$6.class index fed49d6..5a668b4 100644 Binary files a/bin/gui/plag/edu/PlagGUI$6.class and b/bin/gui/plag/edu/PlagGUI$6.class differ diff --git a/bin/gui/plag/edu/PlagGUI$7.class b/bin/gui/plag/edu/PlagGUI$7.class index dfe50db..5e9aa59 100644 Binary files a/bin/gui/plag/edu/PlagGUI$7.class and b/bin/gui/plag/edu/PlagGUI$7.class differ diff --git a/bin/gui/plag/edu/PlagGUI$8.class b/bin/gui/plag/edu/PlagGUI$8.class index a777b5e..f86431d 100644 Binary files a/bin/gui/plag/edu/PlagGUI$8.class and b/bin/gui/plag/edu/PlagGUI$8.class differ diff --git a/bin/gui/plag/edu/PlagGUI$9.class b/bin/gui/plag/edu/PlagGUI$9.class index 382c79b..fcecb43 100644 Binary files a/bin/gui/plag/edu/PlagGUI$9.class and b/bin/gui/plag/edu/PlagGUI$9.class differ diff --git a/bin/gui/plag/edu/PlagGUI.class b/bin/gui/plag/edu/PlagGUI.class index cd20c71..a0499f2 100644 Binary files a/bin/gui/plag/edu/PlagGUI.class and b/bin/gui/plag/edu/PlagGUI.class differ diff --git a/bin/moss/plag/edu/DataBase.class b/bin/moss/plag/edu/DataBase.class index 1b73382..86b23a0 100644 Binary files a/bin/moss/plag/edu/DataBase.class and b/bin/moss/plag/edu/DataBase.class differ diff --git a/bin/moss/plag/edu/Http.class b/bin/moss/plag/edu/Http.class index 0519794..ce759bc 100644 Binary files a/bin/moss/plag/edu/Http.class and b/bin/moss/plag/edu/Http.class differ diff --git a/bin/moss/plag/edu/Moss.class b/bin/moss/plag/edu/Moss.class index c873708..9a56fbc 100644 Binary files a/bin/moss/plag/edu/Moss.class and b/bin/moss/plag/edu/Moss.class differ diff --git a/bin/moss/plag/edu/Text.class b/bin/moss/plag/edu/Text.class index e34153a..72da7b9 100644 Binary files a/bin/moss/plag/edu/Text.class and b/bin/moss/plag/edu/Text.class differ diff --git a/bin/preprocess/plag/edu/IKAnalyzer.class b/bin/preprocess/plag/edu/IKAnalyzer.class index 116b508..1d3e3cf 100644 Binary files a/bin/preprocess/plag/edu/IKAnalyzer.class and b/bin/preprocess/plag/edu/IKAnalyzer.class differ diff --git a/bin/preprocess/plag/edu/TextExtractor.class b/bin/preprocess/plag/edu/TextExtractor.class index 976c29e..0297362 100644 Binary files a/bin/preprocess/plag/edu/TextExtractor.class and b/bin/preprocess/plag/edu/TextExtractor.class differ diff --git a/bin/shingle/plag/edu/ShingleSim.class b/bin/shingle/plag/edu/ShingleSim.class index de2d154..d882a38 100644 Binary files a/bin/shingle/plag/edu/ShingleSim.class and b/bin/shingle/plag/edu/ShingleSim.class differ diff --git a/bin/utils/edu/AntFile.class b/bin/utils/edu/AntFile.class index c7ed241..311eb1c 100644 Binary files a/bin/utils/edu/AntFile.class and b/bin/utils/edu/AntFile.class differ diff --git a/bin/utils/edu/FileIO.class b/bin/utils/edu/FileIO.class index c1cac01..749b5a8 100644 Binary files a/bin/utils/edu/FileIO.class and b/bin/utils/edu/FileIO.class differ diff --git a/bin/utils/edu/MossClient.class b/bin/utils/edu/MossClient.class index ef991fa..c8ce582 100644 Binary files a/bin/utils/edu/MossClient.class and b/bin/utils/edu/MossClient.class differ diff --git a/bin/utils/edu/StreamGobbler.class b/bin/utils/edu/StreamGobbler.class index 72d9b47..3e662e4 100644 Binary files a/bin/utils/edu/StreamGobbler.class and b/bin/utils/edu/StreamGobbler.class differ diff --git a/bin/utils/edu/WinCMD.class b/bin/utils/edu/WinCMD.class index 2c1d79b..a0c58dc 100644 Binary files a/bin/utils/edu/WinCMD.class and b/bin/utils/edu/WinCMD.class differ diff --git a/lib/jplag-2.12.1-SNAPSHOT-jar-with-dependencies.jar b/lib/jplag-2.12.1-SNAPSHOT-jar-with-dependencies.jar new file mode 100644 index 0000000..0bb8131 Binary files /dev/null and b/lib/jplag-2.12.1-SNAPSHOT-jar-with-dependencies.jar differ diff --git a/mossout.txt b/mossout.txt index e6e4821..e68458d 100644 --- a/mossout.txt +++ b/mossout.txt @@ -1,35 +1,6 @@ -Uploading .\testdata\wpsdoc\bixinghui.doc...done -Uploading .\testdata\wpsdoc\chengxi.doc...done -Uploading .\testdata\wpsdoc\chenxiaofeng.doc...done -Uploading .\testdata\wpsdoc\chenyufan.doc...done -Uploading .\testdata\wpsdoc\gaoming.doc.docx...done -Uploading .\testdata\wpsdoc\gezhongqi.doc...done -Uploading .\testdata\wpsdoc\huangkaiming.doc...done -Uploading .\testdata\wpsdoc\huangzhi.doc...done -Uploading .\testdata\wpsdoc\jihua.docx...done -Uploading .\testdata\wpsdoc\lichenguang.doc...done -Uploading .\testdata\wpsdoc\litao.docx...done -Uploading .\testdata\wpsdoc\majunxian.doc...done -Uploading .\testdata\wpsdoc\nijinhua.doc...done -Uploading .\testdata\wpsdoc\shaohaohao.doc...done -Uploading .\testdata\wpsdoc\shaoyuanxu.doc...done -Uploading .\testdata\wpsdoc\shenjie.doc...done -Uploading .\testdata\wpsdoc\sunshangxing.docx...done -Uploading .\testdata\wpsdoc\tangwenyuan.doc...done -Uploading .\testdata\wpsdoc\wangjingxuan.doc...done -Uploading .\testdata\wpsdoc\wangpeng.doc...done -Uploading .\testdata\wpsdoc\wangwei.docx...done -Uploading .\testdata\wpsdoc\wuhang.doc...done -Uploading .\testdata\wpsdoc\xutianxiu.doc...done -Uploading .\testdata\wpsdoc\yanghao.docx...done -Uploading .\testdata\wpsdoc\yangweichao.doc...done -Uploading .\testdata\wpsdoc\yankai.docx...done -Uploading .\testdata\wpsdoc\zhangsheng.doc...done -Uploading .\testdata\wpsdoc\zhangshuyang.doc...done -Uploading .\testdata\wpsdoc\zhaoxingyi.doc...done -Uploading .\testdata\wpsdoc\zhenglinpeng.doc...done -Uploading .\testdata\wpsdoc\zhengxianyang.doc...done -Uploading .\testdata\wpsdoc\zhuangyu.doc...done -Uploading .\testdata\wpsdoc\zhuchengpeng.docx...done +Uploading .\testdata\python\demo.py...done +Uploading .\testdata\python\demo1.py...done +Uploading .\testdata\python\lprcmd.py...done +Uploading .\testdata\python\lprcmd2.py...done Query submitted. Waiting for the server's response. -http://moss.stanford.edu/results/306585337 +http://moss.stanford.edu/results/116291522 diff --git a/out.txt b/out.txt index 441df32..e69de29 100644 --- a/out.txt +++ b/out.txt @@ -1,2 +0,0 @@ -1 12.066752% chengxi.doc zhuchengpeng.docx -from fh Sun Sep 22 10:06:33 CST 2019 \ No newline at end of file diff --git a/src/gui/plag/edu/PlagGUI.java b/src/gui/plag/edu/PlagGUI.java index 6f20b5a..0fa0a4a 100644 --- a/src/gui/plag/edu/PlagGUI.java +++ b/src/gui/plag/edu/PlagGUI.java @@ -1,6 +1,7 @@ package gui.plag.edu; import java.awt.BorderLayout; +import java.awt.Desktop; import java.awt.EventQueue; import javax.swing.JFrame; @@ -23,6 +24,9 @@ import java.awt.event.ActionListener; import java.awt.event.ActionEvent; import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import javax.swing.event.ChangeListener; import javax.swing.event.ChangeEvent; @@ -47,6 +51,8 @@ public class PlagGUI extends JFrame { private JComboBox combMethod; private JComboBox combLang; + + WinCMD cmd; /** * Launch the application. */ @@ -166,7 +172,7 @@ public void stateChanged(ChangeEvent arg0) { panel_1.add(label_1); combLang = new JComboBox(); - combLang.setModel(new DefaultComboBoxModel(new String[] {"java", "c", "csharp", "javascript"})); + combLang.setModel(new DefaultComboBoxModel(new String[] {"java", "c", "python", "csharp", "javascript"})); combLang.setBounds(220, 51, 75, 21); panel_1.add(combLang); @@ -182,6 +188,7 @@ public void itemStateChanged(ItemEvent arg0) { combLang.removeAllItems(); combLang.addItem("java"); combLang.addItem("c"); + combLang.addItem("python"); combLang.addItem("csharp"); combLang.addItem("javascript"); @@ -189,10 +196,16 @@ public void itemStateChanged(ItemEvent arg0) { combLang.removeAllItems(); combLang.addItem("java"); combLang.addItem("c"); + }else if("jplag".equals(method)) { + combLang.removeAllItems(); + combLang.addItem("java"); + combLang.addItem("c/c++"); + combLang.addItem("python3"); + combLang.addItem("text"); } } }); - combMethod.setModel(new DefaultComboBoxModel(new String[] {"moss", "sim"})); + combMethod.setModel(new DefaultComboBoxModel(new String[] {"moss", "jplag", "sim"})); combMethod.setBounds(80, 51, 70, 21); panel_1.add(combMethod); @@ -233,14 +246,14 @@ public void actionPerformed(ActionEvent arg0) { } } - WinCMD cmd = new WinCMD(); + cmd = new WinCMD(); int res = cmd.exec(methodtype, lang, value, f.getAbsolutePath()); if(res==0){ - JOptionPane.showMessageDialog(PlagGUI.this, "ִϣ鿴"); + JOptionPane.showMessageDialog(PlagGUI.this, "ִϣ鿴ΪգԳԵƶֵ"); }else if(res<0){ JOptionPane.showMessageDialog(PlagGUI.this, "ִʧܣ"); }else if(res>0){ - JOptionPane.showMessageDialog(PlagGUI.this, "ִϣδֵַҪĽ"); + JOptionPane.showMessageDialog(PlagGUI.this, "ִϣδֵַҪĽԳԵƶֵ"); } } @@ -264,7 +277,37 @@ public void actionPerformed(ActionEvent arg0) { button_1.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent arg0) { //鿴 + String methodtype = (String)combMethod.getSelectedItem(); + String lang = (String)combLang.getSelectedItem(); + CompareResultFrame crf = new CompareResultFrame(); + if(radBntProgram.isSelected()) { + if("jplag".equals(methodtype)) { + File rf = new File("jplagresult/matches_avg.csv"); + crf.setResfile(rf); + + rf = new File("jplagresult/index.html"); + try { //Ĭʾҳ + Desktop.getDesktop().browse(rf.toURI()); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + }else if("moss".equals(methodtype)) { + try { //Ĭʾҳ + String url = cmd.getMoss().getUrl(); + if(url!=null) { + Desktop.getDesktop().browse(new URI(url)); + } + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + + } + crf.setVisible(true); } }); diff --git a/src/utils/edu/WinCMD.java b/src/utils/edu/WinCMD.java index 215f11d..d19ce50 100644 --- a/src/utils/edu/WinCMD.java +++ b/src/utils/edu/WinCMD.java @@ -11,11 +11,17 @@ import java.io.*; import data.plag.edu.SimData; +import jplag.ExitException; +import jplag.JPlag; +import jplag.Program; +import jplag.options.CommandLineOptions; import moss.plag.edu.*; public class WinCMD { - String outfile = "out.txt"; + String outfile = "out.txt"; String mossoutfile = "mossout.txt"; + Moss moss = null; + public static void main(String args[]) { /* * if (args.length < 1) { @@ -65,6 +71,12 @@ public static void main(String args[]) { } + } + public Moss getMoss() { + return moss; + } + public void setMoss(Moss moss) { + this.moss = moss; } //outļ public void clearOut(File f){ @@ -95,6 +107,8 @@ public int exec(String methodtype,String lang,int threshold,String files){ res = execMossJava(lang,threshold, files, lists); }else if("sim".equals(methodtype)){ res = this.execSim(lang, threshold, files, lists); + }else if("jplag".equals(methodtype)) { + res = this.execJplag(lang, threshold, files, lists); } return res; } @@ -117,6 +131,56 @@ String pathconvert(String path){ return res; } + + //JplagķԴбȽ,ɹ0ʧܷ-1 + public int execJplag(String lang,float threshold,String files,List lists){ + int res = -1; + String INPUT_FILE_FOLDER_NAME=files ; //ļĿ¼ + String jplagResultsFolderName="./jplagresult/"; //ĿĿ¼ + float MINIMUM_FILE_SIMILARITY = threshold ; + String EXCLUDE_FILES = null ; + ArrayList args = new ArrayList(); + + args.add("-l"); + if(!"java".equals(lang)) { + args.add(lang); //ͲӴ˲ʹĬֵΪjava19 + }else { + args.add("java19"); + } + args.add("-s"); //ݹѯļĿ¼µĿ¼ + args.add("-r"); //ָŵ· + args.add(jplagResultsFolderName); + args.add("-m"); //ƶȼ޲ֵ + args.add((int) (MINIMUM_FILE_SIMILARITY) + "%"); + if (EXCLUDE_FILES!=null) { // ñųļ + args.add("-x"); + args.add(EXCLUDE_FILES); + } + args.add(INPUT_FILE_FOLDER_NAME); + String[] toPass = new String[args.size()]; + toPass = args.toArray(toPass); + // System.out.println(toPass.toString()); + // JPlag.main(toPass); + try { + CommandLineOptions options = new CommandLineOptions(toPass, null); + Program program = new Program(options); + + System.out.println("jplag initialize ok "+program.get_commandLine()); + program.run(); + res = 0; //ִгɹ + } + catch(ExitException ex) { + System.out.println("Error: "+ex.getReport()); + + } + + return res ; + } + + + + + //javaͻִmoss,langԣthresholdƶֵfilesȽļڵĿ¼ listsȽϽɹ0ʧܷ-1 //޷1 public int execMossJava(String lang,float threshold,String files,List lists){ @@ -128,7 +192,7 @@ public int execMossJava(String lang,float threshold,String files,List l File dir = new File(files); res = mc.sendMoss(dir,lang); if(res==0){ //ϴɹ - Moss moss = new Moss(); + moss = new Moss(); res = moss.analyMoss(mossoutfile,threshold, lists); if(res==0 && lists.size()>0){ //Ч FileIO.saveFile(new File(outfile), lists,2,"from stanford:"+moss.getUrl()); //out.txtļ @@ -170,7 +234,7 @@ public int execMoss(String lang,float threshold,String files,List lists // File file = new File("mossout.txt"); // analySim(file,lang,lists); if(res==0){ //ϴִгɹ - Moss moss = new Moss(); + moss = new Moss(); res = moss.analyMoss(mossoutfile,threshold, lists); if(res==0 && lists.size()>0){ //Ч,ע⣺ûг޵ֵsizeҲΪ0 FileIO.saveFile(new File(outfile), lists,2,"from stanford:"+moss.getUrl()); //out.txtļ diff --git a/testdata/doc/dongxiao-2.doc b/testdata/doccn/dongxiao-2.doc similarity index 100% rename from testdata/doc/dongxiao-2.doc rename to testdata/doccn/dongxiao-2.doc diff --git a/testdata/doc/gechunlong-2.doc b/testdata/doccn/gechunlong-2.doc similarity index 100% rename from testdata/doc/gechunlong-2.doc rename to testdata/doccn/gechunlong-2.doc diff --git a/testdata/doc/gumingzhu-2.doc b/testdata/doccn/gumingzhu-2.doc similarity index 100% rename from testdata/doc/gumingzhu-2.doc rename to testdata/doccn/gumingzhu-2.doc diff --git a/testdata/doc/guozhiquan -2.doc b/testdata/doccn/guozhiquan -2.doc similarity index 100% rename from testdata/doc/guozhiquan -2.doc rename to testdata/doccn/guozhiquan -2.doc diff --git a/testdata/doc/hanchao_2.doc b/testdata/doccn/hanchao_2.doc similarity index 100% rename from testdata/doc/hanchao_2.doc rename to testdata/doccn/hanchao_2.doc diff --git a/testdata/doc/heliwen_2.doc b/testdata/doccn/heliwen_2.doc similarity index 100% rename from testdata/doc/heliwen_2.doc rename to testdata/doccn/heliwen_2.doc diff --git a/testdata/doc/jiangMing_2.doc b/testdata/doccn/jiangMing_2.doc similarity index 100% rename from testdata/doc/jiangMing_2.doc rename to testdata/doccn/jiangMing_2.doc diff --git a/testdata/doc/jiangfeng-2.doc b/testdata/doccn/jiangfeng-2.doc similarity index 100% rename from testdata/doc/jiangfeng-2.doc rename to testdata/doccn/jiangfeng-2.doc diff --git a/testdata/doc/lijie-2.doc b/testdata/doccn/lijie-2.doc similarity index 100% rename from testdata/doc/lijie-2.doc rename to testdata/doccn/lijie-2.doc diff --git a/testdata/doc/liuchuanyang-2.doc b/testdata/doccn/liuchuanyang-2.doc similarity index 100% rename from testdata/doc/liuchuanyang-2.doc rename to testdata/doccn/liuchuanyang-2.doc diff --git a/testdata/doc/liufan_2.doc b/testdata/doccn/liufan_2.doc similarity index 100% rename from testdata/doc/liufan_2.doc rename to testdata/doccn/liufan_2.doc diff --git a/testdata/doc/luxiang-2.doc b/testdata/doccn/luxiang-2.doc similarity index 100% rename from testdata/doc/luxiang-2.doc rename to testdata/doccn/luxiang-2.doc diff --git a/testdata/doc/majiaji-2.doc b/testdata/doccn/majiaji-2.doc similarity index 100% rename from testdata/doc/majiaji-2.doc rename to testdata/doccn/majiaji-2.doc diff --git "a/testdata/doc/makai\342\200\2242.doc" "b/testdata/doccn/makai\342\200\2242.doc" similarity index 100% rename from "testdata/doc/makai\342\200\2242.doc" rename to "testdata/doccn/makai\342\200\2242.doc" diff --git a/testdata/doc/meitao-2.doc b/testdata/doccn/meitao-2.doc similarity index 100% rename from testdata/doc/meitao-2.doc rename to testdata/doccn/meitao-2.doc diff --git a/testdata/doc/sunxiaolei-2.doc b/testdata/doccn/sunxiaolei-2.doc similarity index 100% rename from testdata/doc/sunxiaolei-2.doc rename to testdata/doccn/sunxiaolei-2.doc diff --git a/testdata/doc/tangwenming-2.doc b/testdata/doccn/tangwenming-2.doc similarity index 100% rename from testdata/doc/tangwenming-2.doc rename to testdata/doccn/tangwenming-2.doc diff --git a/testdata/doc/tangwenpeng-2.doc b/testdata/doccn/tangwenpeng-2.doc similarity index 100% rename from testdata/doc/tangwenpeng-2.doc rename to testdata/doccn/tangwenpeng-2.doc diff --git a/testdata/doc/wangchen-2.doc b/testdata/doccn/wangchen-2.doc similarity index 100% rename from testdata/doc/wangchen-2.doc rename to testdata/doccn/wangchen-2.doc diff --git a/testdata/doc/wangchunming_2.doc b/testdata/doccn/wangchunming_2.doc similarity index 100% rename from testdata/doc/wangchunming_2.doc rename to testdata/doccn/wangchunming_2.doc diff --git a/testdata/doc/wangjiafa-2.doc b/testdata/doccn/wangjiafa-2.doc similarity index 100% rename from testdata/doc/wangjiafa-2.doc rename to testdata/doccn/wangjiafa-2.doc diff --git a/testdata/doc/wangmeng-2.doc b/testdata/doccn/wangmeng-2.doc similarity index 100% rename from testdata/doc/wangmeng-2.doc rename to testdata/doccn/wangmeng-2.doc diff --git a/testdata/doc/wangqi-2.doc b/testdata/doccn/wangqi-2.doc similarity index 100% rename from testdata/doc/wangqi-2.doc rename to testdata/doccn/wangqi-2.doc diff --git a/testdata/doc/wangxuan_2.doc.doc b/testdata/doccn/wangxuan_2.doc.doc similarity index 100% rename from testdata/doc/wangxuan_2.doc.doc rename to testdata/doccn/wangxuan_2.doc.doc diff --git a/testdata/doc/weixiao-2.doc b/testdata/doccn/weixiao-2.doc similarity index 100% rename from testdata/doc/weixiao-2.doc rename to testdata/doccn/weixiao-2.doc diff --git a/testdata/doc/wuchangqing-2.doc b/testdata/doccn/wuchangqing-2.doc similarity index 100% rename from testdata/doc/wuchangqing-2.doc rename to testdata/doccn/wuchangqing-2.doc diff --git a/testdata/doc/wuliangchao-2.doc b/testdata/doccn/wuliangchao-2.doc similarity index 100% rename from testdata/doc/wuliangchao-2.doc rename to testdata/doccn/wuliangchao-2.doc diff --git a/testdata/doc/xiaqi_2.doc b/testdata/doccn/xiaqi_2.doc similarity index 100% rename from testdata/doc/xiaqi_2.doc rename to testdata/doccn/xiaqi_2.doc diff --git a/testdata/doc/xuqiwei-2.doc b/testdata/doccn/xuqiwei-2.doc similarity index 100% rename from testdata/doc/xuqiwei-2.doc rename to testdata/doccn/xuqiwei-2.doc diff --git a/testdata/doc/xuzhiwen_2.doc b/testdata/doccn/xuzhiwen_2.doc similarity index 100% rename from testdata/doc/xuzhiwen_2.doc rename to testdata/doccn/xuzhiwen_2.doc diff --git a/testdata/doc/yinpeiyan_2.doc b/testdata/doccn/yinpeiyan_2.doc similarity index 100% rename from testdata/doc/yinpeiyan_2.doc rename to testdata/doccn/yinpeiyan_2.doc diff --git a/testdata/doc/yinxu-2.doc b/testdata/doccn/yinxu-2.doc similarity index 100% rename from testdata/doc/yinxu-2.doc rename to testdata/doccn/yinxu-2.doc diff --git a/testdata/doc/zhongcongming_2.doc b/testdata/doccn/zhongcongming_2.doc similarity index 100% rename from testdata/doc/zhongcongming_2.doc rename to testdata/doccn/zhongcongming_2.doc diff --git a/testdata/doc/zhucuiyun_2.doc b/testdata/doccn/zhucuiyun_2.doc similarity index 100% rename from testdata/doc/zhucuiyun_2.doc rename to testdata/doccn/zhucuiyun_2.doc diff --git a/testdata/doc/zhuguoqing_2.doc b/testdata/doccn/zhuguoqing_2.doc similarity index 100% rename from testdata/doc/zhuguoqing_2.doc rename to testdata/doccn/zhuguoqing_2.doc diff --git a/testdata/docen/1.txt b/testdata/docen/1.txt new file mode 100644 index 0000000..7de0ae5 --- /dev/null +++ b/testdata/docen/1.txt @@ -0,0 +1,8 @@ +My Hobbies and Interests +From Monday until Friday most people are busyworking or studying, but in the evenings and off weekends they are free to relax and enjoy themselves. Some watch television or go to the movies;others participate in sports.It depends on individual interests. There are many different ways to spend our spare time. + +Almost everyone has some kind of hobby. It may be anything from collecting stamps to making model airplanes.Some hobbies are worth a lot of money; others are valuable only to their owners. + +I know a man Who has a coin collection worth several thousand yuan. A short time ago he bought a rare ten-yuan piece worth 250 yuan. He was very happy about the purchase and thought the price was reasonable, on the other hand, my son collects match boxes. He has almost 600 of them but I doubt if they are wortfi any money. However, to my son they are extremely valuable. Nothing makes him happier than to find a new match-box for his collection. + +That's what a hobby means, i guess. It is something we like to do in our spare time simply for the fun of. it. The value in money is not important, but the pleasure it gives us is. \ No newline at end of file diff --git a/testdata/docen/2.txt b/testdata/docen/2.txt new file mode 100644 index 0000000..b797c66 --- /dev/null +++ b/testdata/docen/2.txt @@ -0,0 +1,5 @@ +From Monday until Friday most people are busyworking or studying, but in the evenings and off weekends they are free to relax and enjoy themselves. Some watch television or go to the movies;others participate in sports.It depends on individual interests. There are many different ways to spend our spare time. + +Almost everyone has some kind of hobby. It may be anything from collecting stamps to making model airplanes.Some hobbies are worth a lot of money; others are valuable only to their owners. + +I know a man Who has a coin collection worth several thousand yuan. A short time ago he bought a rare ten-yuan piece worth 250 yuan. He was very happy about the purchase and thought the price was reasonable, on the other hand, my son collects match boxes. He has almost 600 of them but I doubt if they are wortfi any money. However, to my son they are extremely valuable. Nothing makes him happier than to find a new match-box for his collection. \ No newline at end of file diff --git a/testdata/abctograde/gaoxinjian.java b/testdata/docen/gaoxinjian.txt similarity index 100% rename from testdata/abctograde/gaoxinjian.java rename to testdata/docen/gaoxinjian.txt diff --git a/testdata/abctograde/jinghanyuan.java b/testdata/docen/jinghanyuan.txt similarity index 100% rename from testdata/abctograde/jinghanyuan.java rename to testdata/docen/jinghanyuan.txt diff --git a/testdata/abctograde/liuchenyi.java b/testdata/docen/liuchenyi.txt similarity index 100% rename from testdata/abctograde/liuchenyi.java rename to testdata/docen/liuchenyi.txt diff --git a/testdata/abctograde/TianYang.java b/testdata/javaabctograde/TianYang.java similarity index 100% rename from testdata/abctograde/TianYang.java rename to testdata/javaabctograde/TianYang.java diff --git a/testdata/javaabctograde/gaoxinjian.java b/testdata/javaabctograde/gaoxinjian.java new file mode 100644 index 0000000..7c3395d --- /dev/null +++ b/testdata/javaabctograde/gaoxinjian.java @@ -0,0 +1,26 @@ + public class gaoxinjian{ +public static void main(String[] args){ +char grade='b'; + char a; + char b; + char c; + char d; +if(grade=='a') +{ +System.out.println("90~100"); +}else{ +if(grade=='b') +{ +System.out.println("70~90"); +}else{ +if(grade=='c'){ +System.out.println("60~70"); +}else{ +if(grade=='D'){ +System.out.println("0~60"); +} +} +} +} +} +} \ No newline at end of file diff --git a/testdata/javaabctograde/jinghanyuan.java b/testdata/javaabctograde/jinghanyuan.java new file mode 100644 index 0000000..bc5f4c6 --- /dev/null +++ b/testdata/javaabctograde/jinghanyuan.java @@ -0,0 +1,25 @@ +public class jinghanyuan{ +public static void main(String[] args){ +//A,B,C,DΧ + +char A; +char B; +char C; +char D; +char K='B'; +if(K=='A'){ + System.out.println("90~100"); + } +if(K=='B'){ + System.out.println("70~90"); + } + +if(K=='C'){ + System.out.println("60~70"); + } +if(K=='D'){ + System.out.println("0~60"); + } + + }//main + }//class \ No newline at end of file diff --git a/testdata/javaabctograde/liuchenyi.java b/testdata/javaabctograde/liuchenyi.java new file mode 100644 index 0000000..c79921d --- /dev/null +++ b/testdata/javaabctograde/liuchenyi.java @@ -0,0 +1,19 @@ +//ɼȼ A B C DΧ 90 A 70-90 B 60-70 C <60 D +public class liuchenyi { + + public static void main(String[] args){ + char degree = 'A'; + if (degree=='A'){ + System.out.println("ķ90"); + } else if (degree=='B'){ + System.out.println("ķ70-90֮"); + } else if(degree=='C'){ + System.out.println("ķ60-70֮"); + } else { + System.out.println("ķ60"); + + } + + } + + } \ No newline at end of file diff --git a/testdata/abctograde/wangdongyue.java b/testdata/javaabctograde/wangdongyue.java similarity index 100% rename from testdata/abctograde/wangdongyue.java rename to testdata/javaabctograde/wangdongyue.java diff --git a/testdata/abctograde/yangfan.java b/testdata/javaabctograde/yangfan.java similarity index 100% rename from testdata/abctograde/yangfan.java rename to testdata/javaabctograde/yangfan.java diff --git a/testdata/abctograde/yangkaiyue.java b/testdata/javaabctograde/yangkaiyue.java similarity index 100% rename from testdata/abctograde/yangkaiyue.java rename to testdata/javaabctograde/yangkaiyue.java diff --git a/testdata/abctograde/zhongyue.java b/testdata/javaabctograde/zhongyue.java similarity index 100% rename from testdata/abctograde/zhongyue.java rename to testdata/javaabctograde/zhongyue.java diff --git a/testdata/abctograde/zhouton.java b/testdata/javaabctograde/zhouton.java similarity index 100% rename from testdata/abctograde/zhouton.java rename to testdata/javaabctograde/zhouton.java diff --git a/testdata/python/demo.py b/testdata/python/demo.py new file mode 100644 index 0000000..5fae002 --- /dev/null +++ b/testdata/python/demo.py @@ -0,0 +1,68 @@ +import sys +from imp import reload + +reload(sys) +# sys.setdefaultencoding("utf-8") + + + + +import time + +def SpeedTest(image_path): + grr = cv2.imread(image_path) + model = pr.LPR("model/cascade.xml", "model/model12.h5", "model/ocr_plate_all_gru.h5") + model.SimpleRecognizePlateByE2E(grr) + t0 = time.time() + for x in range(5): + model.SimpleRecognizePlateByE2E(grr) + t = (time.time() - t0)/5.0 + print("Image size :" + str(grr.shape[1])+"x"+str(grr.shape[0]) + " need " + str(round(t*1000,2))+"ms") + + + +from PIL import ImageFont +from PIL import Image +from PIL import ImageDraw +fontC = ImageFont.truetype("./Font/platech.ttf", 14, 0) + +def drawRectBox(image,rect,addText): + cv2.rectangle(image, (int(rect[0]), int(rect[1])), (int(rect[0] + rect[2]), int(rect[1] + rect[3])), (0,0, 255), 2,cv2.LINE_AA) + cv2.rectangle(image, (int(rect[0]-1), int(rect[1])-16), (int(rect[0] + 115), int(rect[1])), (0, 0, 255), -1, + cv2.LINE_AA) + img = Image.fromarray(image) + draw = ImageDraw.Draw(img) + draw.text((int(rect[0]+1), int(rect[1]-16)), addText, (255, 255, 255), font=fontC) + imagex = np.array(img) + return imagex + + + + + +import HyperLPRLite as pr +import cv2 +import numpy as np + +# SpeedTest("images_rec/2.jpg") # 约200ms ,412kb,936wx1104h,96dpi + +SpeedTest("anpimages/outdoorA-1.jpg") # 约2s.3.27M,4160x3120,72dpi + +grr = cv2.imread("anpimages/outdoorA-1.jpg") +model = pr.LPR("model/cascade.xml","model/model12.h5","model/ocr_plate_all_gru.h5") +for pstr,confidence,rect in model.SimpleRecognizePlateByE2E(grr): + if confidence>0.7: + image = drawRectBox(grr, rect, pstr+" "+str(round(confidence,3))) + print("plate_str:") + print(pstr) + print("plate_confidence") + print(confidence) + +cv2.namedWindow("image",0); +cv2.resizeWindow("image", 640, 480); +cv2.imshow("image",image) +cv2.waitKey(0) + + + +SpeedTest("images_rec/2.jpg") diff --git a/testdata/python/demo1.py b/testdata/python/demo1.py new file mode 100644 index 0000000..9e6df3e --- /dev/null +++ b/testdata/python/demo1.py @@ -0,0 +1,9 @@ +# coding=utf-8 +#导入包 +from hyperlpr import * +#导入OpenCV库 +import cv2 +#读入图片 +image = cv2.imread("anpimages/outdoorAA-1.jpg") +#识别结果 +print(HyperLPR_PlateRecogntion(image)) \ No newline at end of file diff --git a/testdata/python/lprcmd.py b/testdata/python/lprcmd.py new file mode 100644 index 0000000..10aebd3 --- /dev/null +++ b/testdata/python/lprcmd.py @@ -0,0 +1,130 @@ +""" +Author: fanghong +edited: 2019.4.23 +""" +# coding=utf-8 + +import sys +import os +from hyperlpr_py3 import pipline as pp +import cv2 +import numpy as np +import time +from PIL import Image, ImageDraw, ImageFont + +Sheng = ["京", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "皖", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", + "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新"] + +plateSheng = {"京":"JING","津":"JINA","沪":"HU","渝":"YUA","蒙":"MENG","新":"XIN","藏":"ZANG","宁":"NING", + "桂":"GUIA","黑":"HEI","吉":"JIB","辽":"LIAO","晋":"JINB","冀":"JIA","青":"QING","鲁":"LU", + "豫":"YUB","苏":"SU","皖":"WAN","浙":"ZHE","闽":"MIN","赣":"GANA","湘":"XIANG","鄂":"E", + "粤":"YUE","琼":"QIONG","甘":"GANB","陕":"SHAN","贵":"GUIB","云":"YUN","川":"CHUAN"} +plateTypeName = ["蓝", "黄", "绿", "白", "黑 "] +fontC = ImageFont.truetype("Font/platech.ttf", 20, 0) # 加载中文字体,20表示字体大小,0表示unicode编码 +# 画车牌定位框及识别出来的车牌字符,返回标记过的图片 +def drawPred(frame, label, left, top, right, bottom): + # 画车牌定位边框.左上点,右下点,红色,边框粗细:2 + cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2) + # 画车牌字符 + img = Image.fromarray(frame) + draw = ImageDraw.Draw(img) + draw.text((left + 1, top - 38), label, (0, 0, 255), font=fontC) # 车牌框上方红色汉字 + imagex = np.array(img) + return imagex + +# 判断车牌字符是否有效 +def isValidPlate(plate,confidence): + # 置信度大于0.8,长度等于7或8(绿牌) , 车牌第一个字符应是省名 + if confidence > 0.8 and (len(plate) == 7 or len(plate) == 8) and plate[0] in Sheng: + return True + return False + +# 在输入图片中定位并识别车牌字符,返回绘制的图片及检测结果 +def SimpleRecognizePlate(image): + # t0 = time.time() + # 粗定位 + images = pp.detect.detectPlateRough( + image, image.shape[0], top_bottom_padding_rate=0.02) + # t1 = time.time()-t0 + # print("初定位时间:", t1) + + res_set = [] + + # 循环遍历发现的每个车牌 + for j, plate in enumerate(images): + plate, rect, origin_plate = plate + # 调整车牌到统一大小 + plate = cv2.resize(plate, (136, 36 * 2)) + # cv2.imshow("test", plate); + # cv2.waitKey(0) + # 判断车牌颜色 + plate_type = pp.td.SimplePredict(plate) + plate_color = plateTypeName[plate_type] + + if (plate_type > 0) and (plate_type < 5): + plate = cv2.bitwise_not(plate) + + + # 精定位,倾斜校正 + # t2 = time.time() + image_rgb = pp.fm.findContoursAndDrawBoundingBox(plate) + # cv2.imshow("test", image_rgb); + # cv2.waitKey(0); + # print("精定位时间:", time.time() - t2) + # 车牌左右边界修正 + # t3 = time.time() + image_rgb = pp.fv.finemappingVertical(image_rgb) + # print("左右修正时间:", time.time() - t3) + + # e2e 车牌字符识别 + # t4 = time.time() + e2e_plate, e2e_confidence = pp.e2e.recognizeOne(image_rgb) + # print("e2e识别时间:", time.time() - t4) + # t5 = time.time() - t0 + # print(e2e_plate, e2e_confidence, t5, "s") + if isValidPlate(e2e_plate, e2e_confidence): # 判断是否是有效车牌 + # 在原图中绘制定位框及车牌信息,传入定位框左上点和右下点xy坐标 + image = drawPred(image, e2e_plate, int(rect[0]),int(rect[1]),int(rect[0]+rect[2]),int(rect[1]+rect[3])) + # 设置检测结果 + res_set.append([e2e_plate, # 结果车牌号 + plate_color, # 车牌颜色 + e2e_confidence, # 车牌字符置信度 + (rect[0], rect[1])]) # 车牌定位框左上点坐标 + return image, res_set + +test_dir = "./test-imgs" # 图片读入路径 +fw = open("./test-results/No14007mresults.txt", 'w+') # 以覆盖写方式打开结果文件,如果不存在,则新建一个 +# 循环遍历文件夹下所有的文件 +for f in os.listdir(test_dir): + try: + path = os.path.join(test_dir, f) # 生成完整文件路径 + # t0 = time.time(); + image = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) # 读取图片文件 + # print("img load time:",time.time()-t0) + h = 1024 # 720 ,image.shape[0] ,指定缩放高度 + scale = image.shape[1] / float(image.shape[0]) # 原图宽高比 + w = int(scale * h) + image = cv2.resize(image, (w, h)) # 将原图像缩放到指定高度,保持原图像高宽比 + + t0 = time.time() + framedrawed, res = SimpleRecognizePlate(image) # 针对缩放后的图片,检测识别车牌;返回的是缩放后的图片 + tlabel = '%.0f ms' % ((time.time() - t0) * 1000) + # 输出车牌检测信息 + info = f + "\n" # 输出信息,文件名+换行符 + # 循环遍历检测结果,将车牌省名替换为相应拼音 + # print(res) + for r in res: + py = plateSheng[r[0][0]] # 获取结果中车牌的第一个字符省名,获取省名对应的拼音 + plate = r[0].replace(r[0][0], py) # 将省名替换为拼音 + info = info + plate + "\n" # 拼接结果字符串 + + fw.write(info) # 写入检测信息到结果文本文件 + # cv2.imwrite("./test-results/" + f, framedrawed.astype(np.uint8)) # 保存图片 + print(info[:-1]) # 屏幕输出结果 + print(tlabel) # 输出处理时间 + except Exception as e: + print(e) # 输出异常信息,调试用,发布时应注释掉 + continue # 出现异常则继续循环读取 + +fw.close() +cv2.destroyAllWindows() \ No newline at end of file diff --git a/testdata/python/lprcmd2.py b/testdata/python/lprcmd2.py new file mode 100644 index 0000000..9488943 --- /dev/null +++ b/testdata/python/lprcmd2.py @@ -0,0 +1,30 @@ +""" +Author: fanghong +edited: 2019.4.23 +""" +# coding=utf-8 + +import sys +import os +import time +import HyperLPRLite as pr +import cv2 +import numpy as np + + +model = pr.LPR("model/cascade.xml","model/model12.h5","model/ocr_plate_all_gru.h5") +imgdir = "./anpimages" +name_list = os.listdir(imgdir) # 列出文件夹下所有的目录与文件 +for i in range(0, len(name_list)): + path = os.path.join(imgdir, name_list[i]) + print(path) + # grr = cv2.imread(path) + # 读图片文件,支持中文文件名 + grr = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) + t0 = time.time() + for pstr, confidence, rect in model.SimpleRecognizePlateByE2E(grr): + if confidence > 0.7: + print(pstr, round(confidence, 3), round(time.time()-t0, 4), "s") + print("----------------------------------") + +