Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
octaviospain committed May 22, 2017
2 parents 911a14f + a6300ea commit f54cd8a
Show file tree
Hide file tree
Showing 15 changed files with 556 additions and 56 deletions.
Binary file removed lib/src0_82.jar
Binary file not shown.
23 changes: 10 additions & 13 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,6 @@
<testSourceDirectory>src/test/java</testSourceDirectory>
<testOutputDirectory>target/test-classes</testOutputDirectory>
<resources>

<resource>
<directory>src</directory>
<excludes>
<exclude>**/*.java</exclude>
</excludes>
</resource>

<resource>
<directory>src/main/resources</directory>
</resource>
Expand Down Expand Up @@ -58,6 +50,13 @@

<dependencies>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>21.0</version>
<scope>compile</scope>
</dependency>

<dependency>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
Expand Down Expand Up @@ -94,11 +93,9 @@
</dependency>

<dependency>
<groupId>org.htmlparser</groupId>
<artifactId>htmlparser</artifactId>
<scope>system</scope>
<version>1.2</version>
<systemPath>${project.basedir}/lib/src0_82.jar</systemPath>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
</dependencies>
</project>
File renamed without changes.
1 change: 1 addition & 0 deletions run-gui.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mvn package && mvn exec:java -D exec.mainClass="cuni.software.ViewRunner"
14 changes: 7 additions & 7 deletions src/main/java/cuni/software/Article.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.google.common.collect.*;

import java.time.*;
import java.util.*;

public class Article {
Expand All @@ -10,26 +11,25 @@ public class Article {

private int id;
private String uri;
private Set<String> tags;
private Multiset<String> terms;
private LocalDate pubDate;

public Article(String uri) {
public Article(String uri) {
id = ++ articleSequenceId;
this.uri = uri;
tags = new HashSet<>();
terms = HashMultiset.create();
}

public String getUri() {
return uri;
}

public void addTags(Set<String> newTags) {
tags.addAll(newTags);
public LocalDate getPubDate() {
return pubDate;
}

public Set<String> getTags() {
return tags;
public void setPubDate(LocalDate pubDate) {
this.pubDate = pubDate;
}

public void addTerms(Collection<String> newTerms) {
Expand Down
120 changes: 120 additions & 0 deletions src/main/java/cuni/software/Controller.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package cuni.software;

import javafx.application.*;
import javafx.beans.property.*;
import javafx.collections.*;
import javafx.fxml.*;
import javafx.scene.control.*;
import javafx.scene.control.SpinnerValueFactory.*;
import javafx.stage.*;
import javafx.stage.FileChooser.*;
import javafx.util.converter.*;

import java.io.*;
import java.util.*;
import java.util.stream.*;

/**
* @author Octavio Calleya
*/
public class Controller {

@FXML
private TextField searchField;
@FXML
private ListView<Hyperlink> listView;
@FXML
private Button searchButton;
@FXML
private Button loadButton;
@FXML
private TextArea textArea;
@FXML
private Spinner<Double> similaritySpinner;

private HostServices hostServices;
private BooleanProperty searchingProperty = new SimpleBooleanProperty(false);
private SearchEngine searchEngine = new SearchEngine();
private List<RssFeed> loadedRssFeeds;

@FXML
public void initialize() {
searchButton.disableProperty().bind(searchingProperty.not().and(searchField.textProperty().isEmpty()));
loadButton.disableProperty().bind(searchingProperty);
loadButton.setOnAction(e -> loadRssFromFile());
searchButton.setOnAction(e -> performSearch());
DoubleSpinnerValueFactory doubleSpinnerValueFactory = new DoubleSpinnerValueFactory(1.550, 1.575);
doubleSpinnerValueFactory.setAmountToStepBy(0.00025);
doubleSpinnerValueFactory.setConverter(new DoubleStringConverter());
similaritySpinner.setValueFactory(doubleSpinnerValueFactory);
}

private void performSearch() {
new Thread(() -> {
Double similarityValue = similaritySpinner.getValue();
List<Article> relatedArticles = searchEngine.findRelatedArticles(searchField.getText(), similarityValue);
Platform.runLater(() -> log("Found " + relatedArticles.size() + " articles"));
addArticlesToList(relatedArticles);
}).start();
}

private void addArticlesToList(List<Article> articles) {
Platform.runLater(() -> {
ObservableList<Hyperlink> links = FXCollections.observableArrayList();
articles.forEach(article -> {
Hyperlink link = new Hyperlink(article.getUri());
link.setOnAction(e -> hostServices.showDocument(article.getUri()));
links.add(link);
});
listView.setItems(links);
});
}

private void loadRssFromFile() {
FileChooser chooser = new FileChooser();
chooser.setTitle("Select file...");
chooser.getExtensionFilters().add(new ExtensionFilter("Text Files", "*.txt"));
File feedsFile = chooser.showOpenDialog(searchField.getScene().getWindow());
if (feedsFile != null) {
new Thread(() -> loadTask(feedsFile)).start();
log("Importing feeds...");
searchingProperty.setValue(true);
}
}

private void loadTask(File feedsFile) {
try {
loadedRssFeeds = RssFeedParse.fromFile(feedsFile);
printParsedFeeds();
addToSearchEngine();
}
catch (IOException e) {
log("Error loading rss feeds from file: " + e.getMessage());
}
finally {
Platform.runLater(() -> searchingProperty.setValue(false));
}
}

private void addToSearchEngine() {
List<Article> allArticles = loadedRssFeeds.stream()
.flatMap(feed -> feed.getArticles().stream())
.collect(Collectors.toList());
searchEngine.addArticles(allArticles);
}

private void printParsedFeeds() {
loadedRssFeeds.forEach(feed -> {
log("\t\t" + feed.getUrl().toString() + "\n\tLoaded articles:");
feed.getArticles().forEach(article -> log(article.toString()));
});
}

public void setHostServices(HostServices hostServices) {
this.hostServices = hostServices;
}

public void log(String message) {
Platform.runLater(() -> textArea.appendText(message + "\n"));
}
}
72 changes: 66 additions & 6 deletions src/main/java/cuni/software/CustomArticleFinderRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.slf4j.*;

import java.io.*;
import java.time.*;
import java.util.*;
import java.util.stream.*;

Expand All @@ -16,30 +17,89 @@ public static void main(String[] args) {
System.out.println("****\tCustom Article Finder\t****");
String query = "";

if (args.length == 0) {
LOG.error("The first argument should be the file with RSS feeds.");
return;
}

List<RssFeed> loadedRssFeeds;
try {
loadedRssFeeds = RssFeedParse.fromFile(new File(args[0]));
} catch (IOException e) {
LOG.error("The first argument should be the file with RSS feeds.", e);
return;
}

SearchEngine searchEngine;
try {
List<RssFeed> loadedRssFeeds = RssFeedParse.fromFile(new File(args[0]));
System.out.println("\n\t\tLoaded rss feeds:");
loadedRssFeeds.forEach(feed -> {
System.out.println("\n\tLoaded articles:");
System.out.println("\t\t" + feed.getUrl().toString() + "\n\tLoaded articles:");
feed.getArticles().forEach(System.out::println);
});

List<Article> allArticles = loadedRssFeeds.stream()
.flatMap(feed -> feed.getArticles().stream())
.collect(Collectors.toList());

SearchEngine searchEngine = new SearchEngine(allArticles);
searchEngine = new SearchEngine();
searchEngine.addArticles(allArticles);
}
catch (Exception e) {
LOG.error("Error during loading of articles:", e);
return;
}

try {
menu();
query = input.nextLine();

while (! query.equalsIgnoreCase("exit")) {
System.out.println("\n\tEnter the query in order to find related articles. Type 'exit' to quit.");
query = input.nextLine();

searchEngine.findRelatedArticles(query).forEach(article -> System.out.println(article.getUri()));
if (query.equalsIgnoreCase("menu")) {
menu();
} else if (query.startsWith("search query ")) {
try {
List<Article> relatedArticles = searchEngine.findRelatedArticles(query.substring(13), 1.6);
System.out.println("Found " + relatedArticles.size() + " articles");
relatedArticles.forEach(article -> System.out.println(article.getUri()));
}
catch (Exception e) {
LOG.error("Error during finding:", e);
}
} else if (query.startsWith("search date ")) {
String[] dates = query.substring(12).split("/");
try {
LocalDate date = LocalDate.of(Integer.parseInt(dates[2]), Integer.parseInt(dates[1]), Integer.parseInt(dates[0]));
try {
//searchEngine.findRelatedArticles(date).forEach(article -> System.out.println(article.getUri()));
}
catch (Exception e) {
LOG.error("Error during finding:", e);
}
}
catch (Exception e) {
LOG.error("Error during parsing of date:", e);
}
} else {
System.out.println("The command '" + query + "' is not valid.");
menu();
}

query = input.nextLine();
}
}
catch (Exception exception) {
LOG.error("Error: " + exception.getMessage());
exception.printStackTrace();
}
}

private static void menu() {
System.out.println("\n\tYou can type one of the following commands:");
System.out.println("'search query YOUR_QUERY' to find articles related to given query.");
System.out.println("'search date DD/MM/YYYY' to find articles since given date.");
System.out.println("'menu' to show the list of possible commands.");
System.out.println("'exit' to quit the program.");
}
}
74 changes: 74 additions & 0 deletions src/main/java/cuni/software/Parser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package cuni.software;

import com.google.common.collect.*;
import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.*;

import java.io.*;

public class Parser {

private Multiset<String> articleTerms = HashMultiset.create();

/**
* This method retrieves terms from the articles found in the provided link
* @param link is a URL link to the atricle.
* @return set of terms.
*/
public Multiset<String> parseLink(String link){
try {
Document doc = Jsoup.connect(link).get();
Elements elements = doc.getElementsByTag("p");
for(Element e: elements){
if (e.attributes().size() == 0){ //actual content doesn't have any atts in <p> tags
retrieveItems(e);
}
}
} catch (IOException e) {

e.printStackTrace();
}
return articleTerms;
}

private String formatWord(String word){
word.trim();
word = word.toLowerCase();

int position = -1;
for (int i = word.length() - 1; i >= 0; i--){
if (Character.isLetterOrDigit((int)word.charAt(i))){ //resolves all Unicode letters thanks to (int)
position = i;
break;
}
}
if (position < word.length() - 1){
word = word.substring(0, position + 1);
}

position = -1;

for(int i = 0; i < word.length(); i++){
if (Character.isLetterOrDigit((int)word.charAt(i))){
position = i;
break;
}
}
if (position > 0){
word = word.substring(position);
}

return word;
}

private void retrieveItems(Element e){
String words[] = e.text().split(" ");
for(String word : words){
word = formatWord(word);
if (word.length() > 2){
articleTerms.add(word);
}
}
}
}
Loading

0 comments on commit f54cd8a

Please sign in to comment.