-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
30 changed files
with
633 additions
and
91 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
/preprocess/ | ||
/gui/ | ||
/jplag/ | ||
/utils/ | ||
/shingle/ |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package jplag.doc; | ||
|
||
|
||
public class DocToken extends jplag.Token { | ||
|
||
private static final long serialVersionUID = 3800987170521573780L; | ||
|
||
|
||
public static int getSerial(String text, Parser parser) { | ||
text = text.toLowerCase(); | ||
Integer obj = (Integer) parser.tokenStructure.table.get(text); | ||
if(obj == null) { | ||
obj = new Integer(parser.tokenStructure.serial); | ||
if(parser.tokenStructure.serial == Integer.MAX_VALUE) | ||
parser.outOfSerials(); | ||
else | ||
parser.tokenStructure.serial++; | ||
parser.tokenStructure.table.put(text, obj); | ||
if(parser.tokenStructure.reverseMapping != null) | ||
parser.tokenStructure.reverseMapping = null; | ||
} | ||
return obj.intValue(); | ||
} | ||
|
||
// throw away this method soon: | ||
|
||
public static String type2string(int i, TokenStructure tokenStructure) { | ||
if(tokenStructure.reverseMapping == null) | ||
tokenStructure.createReverseMapping(); | ||
return tokenStructure.reverseMapping[i]; | ||
} | ||
|
||
// ///////////////////// END OF STATIC MEMBERS | ||
|
||
private int line, column, length; | ||
private String text; | ||
|
||
public DocToken(int type, String file, Parser parser) { | ||
super(type, file, -1, -1, -1); | ||
} | ||
|
||
public DocToken(String text, String file, int line, int column, | ||
int length, Parser parser) { | ||
super(-1, file, line, column, length); | ||
this.type = getSerial(text, parser); | ||
this.text = text.toLowerCase(); | ||
} | ||
|
||
public int getLine() { | ||
return line; | ||
} | ||
|
||
public int getColumn() { | ||
return column; | ||
} | ||
|
||
public int getLength() { | ||
return length; | ||
} | ||
|
||
public void setLine(int line) { | ||
this.line = line; | ||
} | ||
|
||
public void setColumn(int column) { | ||
this.column = column; | ||
} | ||
|
||
public void setLength(int length) { | ||
this.length = length; | ||
} | ||
|
||
public String getText() { | ||
return this.text; | ||
} | ||
|
||
public static int numberOfTokens(TokenStructure tokenStructure) { | ||
return tokenStructure.table.size(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
|
||
package jplag.doc; | ||
|
||
import java.io.File; | ||
|
||
import jplag.ProgramI; | ||
|
||
/** | ||
* @Changed by fanghong 2019.12.1 | ||
* | ||
*/ | ||
public class Language implements jplag.Language { | ||
|
||
private ProgramI program; | ||
|
||
private jplag.doc.Parser parser = new jplag.doc.Parser(); | ||
|
||
public Language(ProgramI program) { | ||
this.program = program; | ||
this.parser.setProgram(this.program); | ||
} | ||
|
||
public int errorsCount() { | ||
return this.parser.errorsCount(); | ||
} | ||
|
||
public String[] suffixes() { | ||
String[] res = { ".txt", ".doc", ".docx", ".pdf", ".html" }; | ||
return res; | ||
} | ||
|
||
public String name() { | ||
return "Doc Parser"; | ||
} | ||
|
||
public String getShortName() { | ||
return "doc"; | ||
} | ||
|
||
public int min_token_match() { | ||
return 12; | ||
} | ||
|
||
public jplag.Structure parse(File dir, String[] files) { | ||
return this.parser.parse(dir, files); | ||
} | ||
|
||
public boolean errors() { | ||
return this.parser.getErrors(); | ||
} | ||
|
||
public boolean supportsColumns() { | ||
return true; | ||
} | ||
|
||
public boolean isPreformated() { | ||
return false; | ||
} | ||
|
||
public boolean usesIndex() { | ||
return false; | ||
} | ||
|
||
public int noOfTokens() { | ||
return parser.tokenStructure.serial; | ||
// return jplag.text.TextToken.numberOfTokens(); // always returns 1 .... | ||
} | ||
|
||
public String type2string(int type) { | ||
return jplag.text.TextToken.type2string(type); | ||
} | ||
} |
Oops, something went wrong.