Skip to content

Commit

Permalink
Parse OSM wiki data from wikibase-rdf.ttl dump
Browse files Browse the repository at this point in the history
Signed-off-by: Taylor Smock <tsmock@meta.com>
  • Loading branch information
tsmock committed Nov 3, 2023
1 parent a7b7158 commit 912d607
Show file tree
Hide file tree
Showing 11 changed files with 875 additions and 33 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
cache: 'pip'
- uses: actions/setup-node@v3
with:
node-version: 18
node-version: 20
- run: pip install rdflib requests
- run: python3 osm_wikidata.py
- run: npm install
- run: npm run build
- run: exit $(git diff --numstat | wc -l)
22 changes: 19 additions & 3 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
cache: 'pip'
- uses: actions/setup-node@v3
with:
node-version: 18
node-version: 20
- run: pip install rdflib requests
- run: python3 osm_wikidata.py
- run: npm install
- run: npm run build

Expand All @@ -20,12 +25,23 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
cache: 'pip'
- uses: actions/setup-node@v3
with:
node-version: 18
registry-url: 'https://registry.npmjs.org/'
node-version: 20
- run: pip install rdflib requests
- run: python3 osm_wikidata.py
- run: npm install
- run: npm run build
- run: npm publish
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}

publish-webjar-test-echo:
needs: publish-npm-registry
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: echo curl "https://www.webjars.org/deploy?webJarType=npm&nameOrUrlish=tag2link&version=$(cat package.json | jq -r '.version')"
4 changes: 2 additions & 2 deletions .github/workflows/scheduled_update.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
node-version: 18
node-version: 20
- run: npm install
- run: npm run build
- run: |
git config --global user.name 'Github tag2link Action'
git config --global user.email 'tsmock@users.noreply.github.com'
git commit -am "Automated update"
git push
git push
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/build.js
/build.js.map
/package-lock.json
61 changes: 36 additions & 25 deletions build.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env node
import * as child_process from "child_process";
import * as fs from "fs";
import { isDeepStrictEqual } from "util";
import { existsSync, readFileSync, writeFileSync } from "fs";
import { execFileSync } from "child_process";

interface SparkQLBinding {
type: string;
Expand Down Expand Up @@ -36,19 +36,28 @@ interface IndexData {
rank: string;
}

function writeWikidataSophoxRules(): [Array<IndexData>, boolean] {
const fromWikidata = sparql(
"https://query.wikidata.org/sparql",
"tag2link.wikidata.sparql"
async function parse_osm_wikidata(): Promise<SparkQL> {
const text = readFileSync("wikidata.json", "utf8");
const osm_wikidata = JSON.parse(text);
const bindings = osm_wikidata.map((x) =>
Object.fromEntries(Object.entries(x).map(([k, v]) => [k, { value: v }])),
);
const fromSophox = sparql(
"https://sophox.org/sparql",
"tag2link.sophox.sparql"
return { head: [], results: { bindings: bindings } };
}

async function writeWikidataSophoxRules(): Promise<
[Array<IndexData>, boolean]
> {
const osm_wikidata = await parse_osm_wikidata();
const fromWikidata = await sparql(
"https://query.wikidata.org/sparql",
"tag2link.wikidata.sparql",
);
// we used to have sparql("https://sophox.org/sparql", "tag2link.sophox.sparql"), but that was extremely outdated

const data = [
...fromWikidata.results.bindings,
...fromSophox.results.bindings,
...osm_wikidata.results.bindings,
].map(
(i) =>
({
Expand All @@ -60,7 +69,7 @@ function writeWikidataSophoxRules(): [Array<IndexData>, boolean] {
"http://wikiba.se/ontology#NormalRank": "normal",
"http://wikiba.se/ontology#DeprecatedRank": "deprecated",
}[i.rank.value],
} as IndexData)
}) as IndexData,
);

data.sort((link1, link2) => {
Expand All @@ -71,7 +80,7 @@ function writeWikidataSophoxRules(): [Array<IndexData>, boolean] {
preferred: "1",
normal: "2",
deprecated: "3",
}[x.rank] || "9"),
})[x.rank] || "9",
(x) => x.url,
(x) => x.source,
];
Expand All @@ -81,29 +90,31 @@ function writeWikidataSophoxRules(): [Array<IndexData>, boolean] {
.find((i) => i !== 0) || 0
);
});
const original = fs.existsSync("index.json") ? JSON.parse(fs.readFileSync("index.json").toString()) : {};
const original = existsSync("index.json")
? JSON.parse(readFileSync("index.json").toString())
: {};
const changed = !isDeepStrictEqual(original, data);
if (changed) {
console.log(`Writing ${data.length} rules to index.json`);
fs.writeFileSync("index.json", JSON.stringify(data, undefined, 2));
writeFileSync("index.json", JSON.stringify(data, undefined, 2));
} else {
console.log(`index.json did not need to be updated`);
}
return [data, changed];
}

function updatePackageVersion(now: Date): PackageJson {
const packageJson = JSON.parse(fs.readFileSync("package.json").toString());
const packageJson = JSON.parse(readFileSync("package.json").toString());
packageJson.version = now.toISOString().substring(0, 10).replace(/-/g, ".");
console.log(`Updating package version to ${packageJson.version}`);
fs.writeFileSync("package.json", JSON.stringify(packageJson, undefined, 2));
writeFileSync("package.json", JSON.stringify(packageJson, undefined, 2));
return packageJson;
}

function updateTag2Link(
tag2linkPackage: PackageJson,
data: Array<IndexData>,
now: Date
now: Date,
): void {
const packageAuthor = tag2linkPackage.author.match(/(.*) <(.*)>/);
if (packageAuthor == null) {
Expand All @@ -125,11 +136,11 @@ function updateTag2Link(
})),
};
console.log(`Updating taginfo.json`);
fs.writeFileSync("taginfo.json", JSON.stringify(taginfo, undefined, 2));
writeFileSync("taginfo.json", JSON.stringify(taginfo, undefined, 2));
}

function main(): void {
const [data, indexChanged] = writeWikidataSophoxRules();
async function main(): Promise<void> {
const [data, indexChanged] = await writeWikidataSophoxRules();

if (indexChanged) {
const now = new Date();
Expand All @@ -139,7 +150,7 @@ function main(): void {
}
}

function sparql(url, filename): SparkQL {
async function sparql(url: string, filename: string): Promise<SparkQL> {
return JSON.parse(
curl(
"--request",
Expand All @@ -148,13 +159,13 @@ function sparql(url, filename): SparkQL {
"Accept:application/json",
"--data-urlencode",
"query@" + filename,
url
)
url,
),
);
}

function curl(...args: string[]): string {
return child_process.execFileSync("curl", ["--silent", ...args]).toString();
return execFileSync("curl", ["--silent", ...args]).toString();
}

main();
main().catch((error) => console.log(error));
Loading

0 comments on commit 912d607

Please sign in to comment.