Skip to content

Commit

Permalink
fix: better vector similarity for job matching
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasdavis committed Dec 4, 2024
1 parent 4cb4eab commit 2b2c67c
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 14 deletions.
46 changes: 33 additions & 13 deletions apps/registry/pages/api/jobs.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,31 @@ export default async function handler(req, res) {
.eq('username', username);

const resume = JSON.parse(data[0].resume);

// Generate a natural language description of the resume
const resumeCompletion = await openai.chat.completions.create({
model: 'gpt-3.5-turbo-16k',
messages: [
{
role: 'system',
content:
"You are a professional resume analyzer. Create a detailed professional summary that describes this candidate's background, skills, and experience in natural language. Focus on their expertise, achievements, and what makes them unique. Write it in a style similar to job descriptions to optimize for semantic matching. Do not include the candidates name. Be terse as most job descriptions are.",
},
{
role: 'user',
content: JSON.stringify(resume),
},
],
temperature: 0.85,
});

const resumeDescription = resumeCompletion.choices[0].message.content;

console.log({ resumeDescription });

const completion = await openai.embeddings.create({
model: 'text-embedding-3-large',
input: JSON.stringify({
skills: resume.skills,
work: resume.work,
summary: resume.summary,
education: resume.education,
awards: resume.awards,
basics: resume.basics,
interests: resume.interests,
}),
input: resumeDescription,
});

const desiredLength = 3072;
Expand All @@ -47,8 +61,8 @@ export default async function handler(req, res) {

const { data: documents } = await supabase.rpc('match_jobs_v5', {
query_embedding: embedding,
match_threshold: 0.14, // Choose an appropriate threshold for your data
match_count: 60, // Choose the number of matches
match_threshold: 0.02, // Choose an appropriate threshold for your data
match_count: 200, // Choose the number of matches
});

console.log({ documents });
Expand All @@ -58,8 +72,14 @@ export default async function handler(req, res) {
const jobIds = documents ? sortedDocuments.map((doc) => doc.id) : [];

const { data: jobs } = await supabase.from('jobs').select().in('id', jobIds);
// sort jobs in the same order as jobIds by id
const sortedJobs = jobIds.map((id) => jobs.find((job) => job.id === id));
// sort jobs in the same order as jobIds by id and add similarity scores
const sortedJobs = jobIds.map((id, index) => {
const job = jobs.find((job) => job.id === id);
return {
...job,
similarity: documents[index].similarity,
};
});

const filteredJobs = sortedJobs.filter(
(job) =>
Expand Down
2 changes: 1 addition & 1 deletion apps/registry/scripts/jobs/hackernews.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const supabaseKey = process.env.SUPABASE_KEY;
const supabase = createClient(supabaseUrl, supabaseKey);

const HN_API = 'https://hn.algolia.com/api/v1/items/';
const WHO_IS_HIRING_ITEM_ID = 41425910;
const WHO_IS_HIRING_ITEM_ID = 42297424;

async function main() {
const response = await axios.get(`${HN_API}${WHO_IS_HIRING_ITEM_ID}`);
Expand Down

0 comments on commit 2b2c67c

Please sign in to comment.