Skip to content

Commit

Permalink
Merge pull request #53 from codefuse-ai/xxh_dev
Browse files Browse the repository at this point in the history
Add COREF for java language extractor source code.
  • Loading branch information
xiexie authored Jun 18, 2024
2 parents 952b99c + dbe1215 commit 0cf2f65
Show file tree
Hide file tree
Showing 456 changed files with 49,145 additions and 0 deletions.
9 changes: 9 additions & 0 deletions language/java/extractor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Introduction
The codefuse-query java extractor transforms the source code of Java projects into standardized coref-java data, which is utilized for further analysis by codefuse-query.

# Quick Start
1. Set `JAVA_HOME`. Execute `echo $JAVA_HOME` to display its current setting. If it displays as empty, then it has not been configured yet.
2. Build. Execute `mvn clean install`.
3. Run. Execute `java -jar target/java-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_JAVA_REPO} ./db`.

After execution, a file named coref_java_src.db will be generated in the ./db directory.
9 changes: 9 additions & 0 deletions language/java/extractor/README_cn.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# 介绍
codefuse-query java extractor 将 java 项目的源码转化为 coref-java 标准化数据,用于codefuse-query的进一步分析。

# 快速开始
1. 设置 JAVA_HOME。`echo $JAVA_HOME` 如果显示为空,则还没有设置好。
2. 构建。 `mvn clean install`
3. 运行。 `java -jar target/java-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_JAVA_REPO} ./db`

执行完成后,会在 ./db 目录下生成 coref_java_src.db 文件。
258 changes: 258 additions & 0 deletions language/java/extractor/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.alipay.tool</groupId>
<artifactId>java-extractor</artifactId>
<version>0.2.0</version>

<packaging>jar</packaging>

<name>coref-java-src-extractor</name>
<url>http://maven.apache.org</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<kotlin.version.coref>1.5.21</kotlin.version.coref>
</properties>
<dependencies>
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis</artifactId>
<version>3.5.7</version>
</dependency>
<dependency>
<groupId>javax.annotation</groupId>
<artifactId>javax.annotation-api</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.mybatis.dynamic-sql</groupId>
<artifactId>mybatis-dynamic-sql</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.15</version>
</dependency>
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>59.1</version>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>

<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.20</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>me.tongfei</groupId>
<artifactId>progressbar</artifactId>
<version>0.9.2</version>
</dependency>

<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
<version>22.0.0</version>
</dependency>
<dependency>
<groupId>uk.com.robust-it</groupId>
<artifactId>cloning</artifactId>
<version>1.9.12</version>
</dependency>

<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.8</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>30.1.1-jre</version>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-all</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>com.google.re2j</groupId>
<artifactId>re2j</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-compiler-embeddable</artifactId>
<version>${kotlin.version.coref}</version>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-reflect</artifactId>
<version>${kotlin.version.coref}</version>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-script-runtime</artifactId>
<version>${kotlin.version.coref}</version>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib</artifactId>
<version>${kotlin.version.coref}</version>
</dependency>

<dependency>
<groupId>net.java.dev.jna</groupId>
<artifactId>jna</artifactId>
<version>4.1.0</version>
</dependency>

<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.36.0.2</version>
</dependency>

<dependency>
<groupId>tk.mybatis</groupId>
<artifactId>mapper</artifactId>
<version>4.1.5</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.9.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>info.picocli</groupId>
<artifactId>picocli</artifactId>
<version>4.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.14.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.14.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.14.1</version>
</dependency>

<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.8.0</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>com.aliyun.oss</groupId>
<artifactId>aliyun-sdk-oss</artifactId>
<version>3.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.18</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.72_noneautotype</version>
</dependency>
<dependency>
<groupId>org.ini4j</groupId>
<artifactId>ini4j</artifactId>
<version>0.5.4</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.mybatis.generator</groupId>
<artifactId>mybatis-generator-maven-plugin</artifactId>
<version>1.4.0</version>
<configuration>
<verbose>false</verbose>
<overwrite>true</overwrite>
</configuration>
<dependencies>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.36.0.1</version>
</dependency>
<dependency>
<groupId>tk.mybatis</groupId>
<artifactId>mapper</artifactId>
<version>4.1.5</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>Generate MyBatis Artifacts</id>
<goals>
<goal>generate</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>com.alipay.codequery.Extractor</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package com.alipay.codequery;

import com.alipay.codequery.project.ProjectUtil;
import com.alipay.codequery.util.PathUtil;
import lombok.Getter;
import org.jetbrains.annotations.NotNull;

import java.io.File;
import java.util.*;


public class Configuration implements Cloneable {

public final String commitId = "init";
public final String repository = "Not Specified";
public List<String> sourcepath = new ArrayList<>();
public String javaHome = null;
public List<String> classpath = new ArrayList<>();

@Getter
private final @NotNull Collection<File> kotlinFiles = new HashSet<>();
@Getter
private final @NotNull Collection<File> javaFiles = new HashSet<>();
@Getter
private final @NotNull Set<File> javaDirs = new HashSet<>();
@Getter
private final List<File> classFiles = new ArrayList<>();

/**
* MENTION: we allow multiple source paths exist but we ONLY use the first one as root
*/
public String getSourcePath() {
return sourcepath.get(0);
}

public void prepareFile() {
clearExistedFiles();

// jar class path
ProjectUtil.getClassPaths(this.classpath).stream().map(File::new).forEach(classFiles::add);

// source file path
for (String sourceRoot : sourcepath) {
File file = new File(sourceRoot).getAbsoluteFile();
withJavaSrc(file);
withKotlinSrc(file);
}
}

public void withKotlinSrc(File root) {
ArrayList<File> results = new ArrayList<>();

PathUtil.TraverseBuilder traversal = new PathUtil.TraverseBuilder()
.withSymbol(false)
.withDirInResult(false)
.withSuffix(".kt");
traversal.traverse(root, results);

kotlinFiles.addAll(results);
}


public void withJavaSrc(File root) {
ArrayList<File> results = new ArrayList<>();
PathUtil.TraverseBuilder traversal = new PathUtil.TraverseBuilder()
.withSymbol(false)
.withDirInResult(true)
.withSuffix(".java");
traversal.traverse(root, results);

for (File file : results) {
if (file.isDirectory()) {
javaDirs.add(file);
} else {
javaFiles.add(file);
}
}
}

private void clearExistedFiles() {
javaFiles.clear();
javaDirs.clear();
kotlinFiles.clear();
classFiles.clear();
}

}
Loading

0 comments on commit 0cf2f65

Please sign in to comment.