Skip to content

Commit

Permalink
🎦
Browse files Browse the repository at this point in the history
  • Loading branch information
transitive-bullshit committed Oct 23, 2023
1 parent 0890d65 commit b045c00
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 26 deletions.
7 changes: 4 additions & 3 deletions src/lib/rt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,10 @@ export async function scrapeRottenTomatoesInfoByUrlImpl(
movie.genres = schema.genre
}

if (schema.url) {
movie.rtUrl = schema.url
}
// TODO
// if (schema.url) {
// movie.rtUrl = schema.url
// }

if (schema.description) {
movie.plot = schema.description
Expand Down
3 changes: 3 additions & 0 deletions src/lib/wikidata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,12 @@ export async function fetchAllWikidataMovies({
}

offset += limit

// this approach has a hard offset limit of 10k imposed by wikidata
// // find all films which have both an IMDB id and a rotten tomatoes id
// const url = wdk.cirrusSearchPages({
// search: query,
// // TODO: this doesn't support children of Q11424 films (like unfinished films)
// haswbstatement: ['P31=Q11424', 'P345', 'P1258'],
// limit,
// offset
Expand Down
7 changes: 4 additions & 3 deletions src/populate-imdb-movies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,19 +156,20 @@ async function main() {
console.log()
console.log(`batch ${batchNum} done`, {
numMovies,
numIMDBMoviesDownloaded
numIMDBMoviesDownloaded,
numIMDBMoviesDownloadedTotal
})

++batchNum
} while (batchNum < numBatches)

await imdbMoviesDb.close()

console.log()
console.log('done', {
numMoviesTotal,
numIMDBMoviesDownloadedTotal
})

await imdbMoviesDb.close()
}

main()
43 changes: 23 additions & 20 deletions src/populate-rt-movies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,26 +61,29 @@ async function main() {
return null
}

const rtUrls = Array.from(
new Set(
[
movie.rtUrl,
rtMovies[movie.tmdbId]?.rtUrl,
movie.imdbId && wikidataMovies[movie.imdbId]?.rtUrl,
movie.imdbId && omdbMovies[movie.imdbId]?.tomatoURL
]
.filter(Boolean)
.map((url) => url.trim().replace(/\/+$/g, '').trim())
)
)

// console.log(
// `${batchNum}:${index}`,
// movie.tmdbId,
// movie.title,
// 'rtUrls',
// Array.from(rtUrls)
// )
const tempUrls = [
movie.rtUrl,
rtMovies[movie.tmdbId]?.rtUrl,
movie.imdbId && wikidataMovies[movie.imdbId]?.rtUrl,
movie.imdbId && omdbMovies[movie.imdbId]?.tomatoURL
]
.filter(Boolean)
.map((url) => url.trim().replace(/\/+$/g, '').trim())

const rtUrlsTemp = new Set<string>()
const rtUrls: string[] = []
for (const tempUrl of tempUrls) {
if (!rtUrlsTemp.has(tempUrl)) {
rtUrlsTemp.add(tempUrl)
rtUrls.push(tempUrl)
}
}

if (rtUrls.length > 1) {
console.log(`${batchNum}:${index}`, movie.tmdbId, movie.title, {
rtUrls
})
}

let numErrors = 0

Expand Down

0 comments on commit b045c00

Please sign in to comment.