Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Ahmedfir committed Oct 31, 2022
0 parents commit 69fa98d
Show file tree
Hide file tree
Showing 32 changed files with 5,385 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Project exclude paths
/target/
81 changes: 81 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.example</groupId>
<artifactId>java-n-gram-line-level</artifactId>
<version>1.0-SNAPSHOT</version>


<dependencies>
<!-- https://mvnrepository.com/artifact/com.google.guava/listenablefuture -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>listenablefuture</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>lu.jimenez.research</groupId>
<artifactId>tuna-tokenizer</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>lu.jimenez.research</groupId>
<artifactId>tuna-modelling</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>lu.jimenez.research</groupId>
<artifactId>tuna-gitUtils</artifactId>
<version>1.0</version>
</dependency>
<!-- test -->
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<!-- use 2.9.1 for Java 7 projects -->
<version>3.23.1</version>
<scope>test</scope>
</dependency>

</dependencies>

<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>

<build>
<plugins>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<archive>
<manifest>
<mainClass>
Main
</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>

</plugins>
</build>

</project>
Empty file added src/Readme.md
Empty file.
24 changes: 24 additions & 0 deletions src/main/java/Main.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import cli.CliRequest;
import modelling.exception.TrainingFailedException;

import java.io.IOException;
import java.util.concurrent.ExecutionException;

public class Main {


/**
* repo=repo_path
* in=file_absolute_path
* in=file_absolute_path
* in=file_absolute_path
* out=output_file_absolute_path
*
* @param args
*/
public static void main(String... args) throws IOException, ExecutionException, InterruptedException, TrainingFailedException {
CliRequest cliRequest = CliRequest.parseArgs(args);
cliRequest.train().rank();
}

}
31 changes: 31 additions & 0 deletions src/main/java/cli/CliArgPrefix.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package cli;

// @see {}
enum CliArgPrefix {
LEVEL("-level="),
TOKENIZER("-tokenizer="),
THRESHOLD("-threshold="),
SIZE("-n="),
FILE_INCLUDE_REQUEST("-in="),
EXCLUDE_FILES_WITH_WORD_IN_PATH("-ex_w_in_path="),
INCLUDE_FILES_WITH_WORD_IN_PATH("-inc_w_in_path="),
INCLUDE_FILE_NEIGHBOURS_WITH_WORD_IN_PATH("-inc_neighbours_w_in_path"),
REPO("-repo="),
OUTPUT_FILE("-out=");

final String argPrefix;

CliArgPrefix(String argPrefix) {
this.argPrefix = argPrefix;
}

static CliArgPrefix startsWithPrefix(String arg) {
for (CliArgPrefix cap : CliArgPrefix.values()) {
if (arg.startsWith(cap.argPrefix)) {
return cap;
}
}
throw new IllegalArgumentException(arg);
}

}
38 changes: 38 additions & 0 deletions src/main/java/cli/CliArgTokenizer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package cli;

import tokenizer.line.AbstractLineTokenizer;
import tokenizer.line.UTFLineTokenizer;
import tokenizer.line.java.JavaLemmeLineTokenizer;

public enum CliArgTokenizer {
JavaParser("JP"),
UTF8("UTF8");


private final String cliParam;

CliArgTokenizer(String param) {
this.cliParam = param;
}

private static CliArgTokenizer parse(String param){
for (CliArgTokenizer value : CliArgTokenizer.values()) {
if (value.cliParam.equals(param)){
return value;
}
}
throw new IllegalArgumentException("Unknown tokenizer : "+param);
}

static AbstractLineTokenizer newTokenizer(String param){
CliArgTokenizer te = parse(param);
switch (te){
case UTF8:
return new UTFLineTokenizer();
case JavaParser:
return new JavaLemmeLineTokenizer();
default:
throw new IllegalArgumentException("Unknown tokenizer : "+param);
}
}
}
Loading

0 comments on commit 69fa98d

Please sign in to comment.