Skip to content

Commit

Permalink
🔸
Browse files Browse the repository at this point in the history
  • Loading branch information
transitive-bullshit committed Oct 22, 2023
1 parent 2d14003 commit 4e2992f
Show file tree
Hide file tree
Showing 8 changed files with 342 additions and 223 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"isobject": "^4.0.0",
"json-diff": "^1.0.0",
"keyv": "^4.5.2",
"level": "^8.0.0",
"lodash.omit": "^4.5.0",
"lqip-modern": "^2.0.0",
"make-dir": "^3.1.0",
Expand Down
96 changes: 95 additions & 1 deletion pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export const imdbRatingsPath = `${dataDir}/title.ratings.tsv`

// local caches
export const imdbMoviesPath = `${outDir}/imdb-movies.json`
export const imdbMoviesDbPath = `${outDir}/imdb-movies-db`
export const rtMoviesPath = `${outDir}/rt-movies.json`
export const wikidataMoviesPath = `${outDir}/wikidata-movies.json`
export const flickMetrixMoviesPath = `${outDir}/flick-metrix-movies.json`
Expand Down
159 changes: 128 additions & 31 deletions src/lib/imdb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import util from 'node:util'

import * as movier from 'movier'
import { parse as parseCSV } from 'csv-parse'
import { Level } from 'level'
import pThrottle from 'p-throttle'

import * as types from '../types'
Expand All @@ -11,13 +12,9 @@ import * as config from './config'
/**
* Rate-limit HTTP requests to IMDB. Note that each call to
* `movier.getTitleDetailsByIMDBId` includes multiple HTTP GET requests.
*
* We're using a modified version of `movier` which removes many of these
* additional requests which fetch data we're not interested in. Otherwise, we
* would need to use a stricter rate-limit here (originally max 1 per 1000ms).
*/
const throttle = pThrottle({
limit: 4,
limit: 10,
interval: 2000
})

Expand All @@ -37,36 +34,30 @@ export const getTitleDetailsByIMDBId = throttle((titleId: string) =>
mainRate: true,
allRates: true,
runtime: true
// directors: false,
// writers: false,
// producers: false,
// casts: false,
// posterImage: false,
// allImages: false,
// goofs: false,
// quotes: false,
// taglines: false,
// productionCompanies: false,
// awards: false,
// awardsSummary: false,
// dates: false,
// allReleaseDates: false
}
})
)

export async function loadIMDBMoviesFromCache(): Promise<types.IMDBMovies> {
let imdbMovies: types.IMDBMovies = {}

try {
console.log(`loading IMDB movies from cache (${config.imdbMoviesPath})`)

imdbMovies = JSON.parse(
await fs.readFile(config.imdbMoviesPath, { encoding: 'utf-8' })
)

console.warn(
`loaded ${Object.keys(imdbMovies).length} IMDB movies from cache (${
config.imdbMoviesPath
})`
)
} catch (err) {
console.warn(
`warn: unable to load IMDB movie cache (${config.imdbMoviesPath})`,
err.toString()
)
console.warn(
"You can safely ignore this warning if you haven't run `populate-imdb-movies.ts`."
)
}

return imdbMovies
export async function loadIMDBMoviesDB() {
const db = new Level<string, types.imdb.Movie>(config.imdbMoviesDbPath, {
valueEncoding: 'json'
})
await db.open()
return db
}

export async function loadIMDBRatingsFromDataDump(): Promise<types.IMDBRatings> {
Expand Down Expand Up @@ -111,3 +102,109 @@ export async function loadIMDBRatingsFromDataDump(): Promise<types.IMDBRatings>

return imdbRatings
}

/**
* Augments a normalized TMDB movie with additional metadata from IMDB.
*
* In most cases, we prefer the IMDB data over TMDB equivalents.
*
* This function also filters many movies which are unlikely to be relevant
* for most use cases.
*/
export function populateMovieWithIMDBInfo(
movie: types.Movie,
{
imdbRatings,
imdbMovie
}: { imdbRatings?: types.IMDBRatings; imdbMovie?: types.imdb.Movie }
): types.Movie | null {
if (!movie.imdbId) {
return movie
}

const imdbRating = imdbRatings ? imdbRatings[movie.imdbId] : null
let hasIMDBRating = false

if (imdbMovie) {
if (imdbMovie.genres?.length) {
const genres = imdbMovie.genres.map((genre) => genre.toLowerCase())
movie.genres = movie.genres.concat(genres)

// ensure genres are unique
movie.genres = Array.from(new Set(movie.genres))
}

if (imdbMovie.keywords?.length) {
movie.keywords = imdbMovie.keywords
}

if (imdbMovie.countriesOfOrigin?.length) {
movie.countriesOfOrigin = imdbMovie.countriesOfOrigin
}

if (imdbMovie.languages?.length) {
movie.languages = imdbMovie.languages
}

if (imdbMovie.ageCategoryTitle) {
movie.mpaaRating = imdbMovie.ageCategoryTitle
}

if (imdbMovie.plot) {
if (movie.plot && imdbMovie.plot?.trim().endsWith('Read all')) {
// ignore truncated plots
} else {
// otherwise favor the IMDB plot over the TMDB plot
movie.plot = imdbMovie.plot.replace(/\.\.\. read all$/i, '...')
}
}

if (imdbMovie.boxOffice) {
if (imdbMovie.boxOffice.budget > 0) {
movie.budget = `${imdbMovie.boxOffice.budget}`
}

if (imdbMovie.boxOffice.worldwide > 0) {
movie.revenue = `${imdbMovie.boxOffice.worldwide}`
}
}

if (imdbMovie.mainRate?.rateSource?.toLowerCase() === 'imdb') {
hasIMDBRating = true
movie.imdbRating = imdbMovie.mainRate.rate
movie.imdbVotes = imdbMovie.mainRate.votesCount
}

const metacriticRate = imdbMovie.allRates?.find(
(rate) => rate.rateSource?.toLowerCase() === 'metacritics'
)
if (metacriticRate) {
movie.metacriticRating = metacriticRate.rate
movie.metacriticVotes = metacriticRate.votesCount
}

movie.imdbType = imdbMovie.mainType

const genres = new Set(movie.genres)
if (genres.has('short')) {
if (imdbMovie.mainType === 'movie') {
movie.imdbType = 'short'
}

// ignore IMDB-labeled short films
return null
}
}

if (imdbRating) {
// if we have IMDB ratings from two sources, take the one with more votes,
// which is likely to be more recent
if (!hasIMDBRating || imdbRating.numVotes > movie.imdbVotes) {
hasIMDBRating = true
movie.imdbRating = imdbRating.rating
movie.imdbVotes = imdbRating.numVotes
}
}

return movie
}
Loading

0 comments on commit 4e2992f

Please sign in to comment.