Skip to content

Commit

Permalink
Merge pull request #2 from beeldengeluid/master
Browse files Browse the repository at this point in the history
Some wikidata adapters
  • Loading branch information
menzowindhouwer authored Nov 30, 2018
2 parents 2b99f02 + cade80b commit 38a3df9
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 2 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ hs_err_pid*

# Netbeans
nbactions.xml
/target/
/target/
bin/.project
12 changes: 11 additions & 1 deletion conf/termennetwerk.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,18 @@
<nde:api>https://clavas.clarin.eu/clavas/public/api</nde:api>
<nde:conceptScheme>http://hdl.handle.net/11459/CLAVAS_810f8d2a-6723-3ba6-2e57-41d6d3844816</nde:conceptScheme>
</nde:dataset>
<nde:dataset id="wikidata" recipe="nl.knaw.huc.di.nde.recipe.WikiData">
<nde:label xml:lang="nl">Wikidata: wikidata entities</nde:label>
<nde:api>https://www.wikidata.org</nde:api>
<nde:conceptScheme></nde:conceptScheme>
</nde:dataset>
<nde:dataset id="wikidatagtaa" recipe="nl.knaw.huc.di.nde.recipe.WikiDataGTAAConcepts">
<nde:label xml:lang="nl">Wikidata/GTAA: wikidata entities that are linked to the GTAA</nde:label>
<nde:api>https://www.wikidata.org</nde:api>
<nde:conceptScheme></nde:conceptScheme>
</nde:dataset>
<nde:dataset id="gtaa" recipe="nl.knaw.huc.di.nde.recipe.OpenSKOS">
<nde:label xml:lang="nl">GTAA: Onderwerpen</nde:label>
<nde:label xml:lang="nl">GTAA Onderwerpen</nde:label>
<nde:api>http://openskos.beeldengeluid.nl/api</nde:api>
<nde:conceptScheme>http://data.beeldengeluid.nl/gtaa/Onderwerpen</nde:conceptScheme>
</nde:dataset>
Expand Down
8 changes: 8 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,14 @@
<version>1.0-SNAPSHOT</version>
<type>jar</type>
</dependency>
<!-- https://mvnrepository.com/artifact/org.json/json -->
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20180813</version>
</dependency>


<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
Expand Down
101 changes: 101 additions & 0 deletions src/main/java/nl/knaw/huc/di/nde/recipe/WikiData.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package nl.knaw.huc.di.nde.recipe;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import nl.knaw.huc.di.nde.Registry;
import nl.knaw.huc.di.nde.TermDTO;
import nl.mpi.tla.util.Saxon;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
import org.json.*;

public class WikiData implements RecipeInterface {

final static public Map<String,String> NAMESPACES = new LinkedHashMap<>();

static {
NAMESPACES.putAll(Registry.NAMESPACES);
NAMESPACES.put("wikidata", "https://www.wikidata.org/wiki/");
};

private static String streamToString(InputStream inputStream) {
String text = new Scanner(inputStream, "UTF-8").useDelimiter("\\Z").next();
return text;
}

public static String jsonGetRequest(URL url) {
String json = null;
try {
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setDoOutput(true);
connection.setInstanceFollowRedirects(false);
connection.setRequestMethod("GET");
connection.setRequestProperty("Content-Type", "application/json");
connection.setRequestProperty("charset", "utf-8");
connection.connect();
InputStream inStream = connection.getInputStream();
json = streamToString(inStream); // input stream to string
} catch (IOException ex) {
ex.printStackTrace();
}
return json;
}

@Override
public List<TermDTO> fetchMatchingTerms(XdmItem config, String match) {
List<TermDTO> terms = new ArrayList<>();
try {
System.err.println("DBG: Lets cook some WikiData!");
String api = Saxon.xpath2string(config, "nde:api", null, WikiData.NAMESPACES);
String cs = Saxon.xpath2string(config, "nde:conceptScheme", null, WikiData.NAMESPACES);
System.err.println("DBG: Ingredients:");
System.err.println("DBG: - instance["+Saxon.xpath2string(config, "(nde:label)[1]", null, WikiData.NAMESPACES)+"]");
System.err.println("DBG: - api["+api+"]");
System.err.println("DBG: - conceptScheme["+cs+"]");
System.err.println("DBG: - match["+match+"]");
// https://www.wikidata.org/w/api.php?action=wbsearchentities&search=andre%20van%20duin&format=json&language=en&type=item&continue=0
URL url = new URL(api + "/w/api.php?action=wbsearchentities&search="+ match + "&language=en&format=json&type=item&continue=0");//api+"/find-concepts?q=prefLabel:"+match+"&conceptScheme="+cs+"&fl=uri,prefLabel,altLabel");
System.err.println("DBG: = url["+url+"]");
JSONObject termsObject = new JSONObject(jsonGetRequest(url));
System.err.println("DBG: " + jsonGetRequest(url));

JSONArray termsArray = termsObject.getJSONArray("search");
for (int i = 0; i < termsArray.length(); i++)
{
JSONObject termObject = termsArray.getJSONObject(i);
TermDTO term = new TermDTO();
term.uri = new URI(termObject.getString("concepturi"));

//TODO: retrieve the whole concept information from its URI and get all labels
term.prefLabel = new ArrayList<>();
term.prefLabel.add(termObject.getString("label"));

term.altLabel = new ArrayList<>();
term.altLabel.add(termObject.getString("label"));

terms.add(term);
}

} catch (SaxonApiException | MalformedURLException | URISyntaxException ex) {
Logger.getLogger(WikiData.class.getName()).log(Level.SEVERE, null, ex);
}
return terms;
}

}
124 changes: 124 additions & 0 deletions src/main/java/nl/knaw/huc/di/nde/recipe/WikiDataGTAAConcepts.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package nl.knaw.huc.di.nde.recipe;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import nl.knaw.huc.di.nde.Registry;
import nl.knaw.huc.di.nde.TermDTO;
import nl.mpi.tla.util.Saxon;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
import org.json.*;

//TODO: Get this to inherit nicely from Wikdata recipe
public class WikiDataGTAAConcepts implements RecipeInterface {

final static public Map<String,String> NAMESPACES = new LinkedHashMap<>();

static {
NAMESPACES.putAll(Registry.NAMESPACES);
NAMESPACES.put("wikidata", "https://www.wikidata.org/wiki/");
};

private static String streamToString(InputStream inputStream) {
String text = new Scanner(inputStream, "UTF-8").useDelimiter("\\Z").next();
return text;
}

public static String jsonGetRequest(URL url) {
String json = null;
try {
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setDoOutput(true);
connection.setInstanceFollowRedirects(false);
connection.setRequestMethod("GET");
connection.setRequestProperty("Content-Type", "application/json");
connection.setRequestProperty("charset", "utf-8");
connection.connect();
InputStream inStream = connection.getInputStream();
json = streamToString(inStream); // input stream to string
} catch (IOException ex) {
ex.printStackTrace();
}
return json;
}

@Override
public List<TermDTO> fetchMatchingTerms(XdmItem config, String match) {
List<TermDTO> terms = new ArrayList<>();
try {
System.err.println("DBG: Lets cook some WikiData!");
String api = Saxon.xpath2string(config, "nde:api", null, WikiDataGTAAConcepts.NAMESPACES);
String cs = Saxon.xpath2string(config, "nde:conceptScheme", null, WikiDataGTAAConcepts.NAMESPACES);
System.err.println("DBG: Ingredients:");
System.err.println("DBG: - instance["+Saxon.xpath2string(config, "(nde:label)[1]", null, WikiDataGTAAConcepts.NAMESPACES)+"]");
System.err.println("DBG: - api["+api+"]");
System.err.println("DBG: - conceptScheme["+cs+"]");
System.err.println("DBG: - match["+match+"]");
// https://www.wikidata.org/w/api.php?action=wbsearchentities&search=andre%20van%20duin&format=json&language=en&type=item&continue=0&props=www.wikidata.org/wiki/Property:P1741
//TODO: proper URL escaping
URL url = new URL(api + "/w/api.php?action=wbsearchentities&search="+ match.replace(" ","%20") + "&language=en&format=json&type=item&continue=0");//api+"/find-concepts?q=prefLabel:"+match+"&conceptScheme="+cs+"&fl=uri,prefLabel,altLabel");
System.err.println("DBG: = url["+url+"]");
JSONObject termsObject = new JSONObject(jsonGetRequest(url));
System.err.println("DBG: " + jsonGetRequest(url));

JSONArray termsArray = termsObject.getJSONArray("search");
for (int i = 0; i < termsArray.length(); i++)
{
JSONObject termObject = termsArray.getJSONObject(i);
TermDTO term = new TermDTO();
term.uri = new URI(termObject.getString("concepturi"));


//TODO: try to get all the labels, not just the one returned by the query
term.prefLabel = new ArrayList<>();
term.prefLabel.add(termObject.getString("label"));

term.altLabel = new ArrayList<>();
term.altLabel.add(termObject.getString("label"));


//check if is GTAA using prop=links
URL linksUrl = new URL(api + "/w/api.php?action=parse&pageid="+ termObject.getInt("pageid") + "&format=json&prop=links");//api+"/find-concepts?q=prefLabel:"+match+"&conceptScheme="+cs+"&fl=uri,prefLabel,altLabel");
System.err.println("DBG: Links URL" + linksUrl);
JSONObject linksUrlObject = new JSONObject(jsonGetRequest(linksUrl));

JSONObject parseObject = linksUrlObject.getJSONObject("parse");

JSONArray linksArray = parseObject.getJSONArray("links");

for (int j=0; j< linksArray.length();j++)
{
JSONObject linkObject = linksArray.getJSONObject(j);
System.err.println("Processing link" +linkObject.getString("*"));
if(linkObject.getString("*").equals("Property:P1741"))
{
System.err.println("DBG: adding term");
terms.add(term);
}
}

}

} catch (SaxonApiException | MalformedURLException | URISyntaxException ex) {
Logger.getLogger(WikiDataGTAAConcepts.class.getName()).log(Level.SEVERE, null, ex);
}
return terms;
}

}

0 comments on commit 38a3df9

Please sign in to comment.