diff --git a/master/tasks.go b/master/tasks.go index 5c3c8ba58..231c81ba9 100644 --- a/master/tasks.go +++ b/master/tasks.go @@ -310,6 +310,7 @@ func (t *FindItemNeighborsTask) run(j *task.JobsAllocator) error { t.taskMonitor.Add(TaskFindItemNeighbors, len(dataset.ItemLabels)) // inverse document frequency of labels for i := range labeledItems { + labeledItems[i] = lo.Uniq(labeledItems[i]) if dataset.ItemCount() == len(labeledItems[i]) { labelIDF[i] = 1 } else { @@ -634,6 +635,7 @@ func (t *FindUserNeighborsTask) run(j *task.JobsAllocator) error { t.taskMonitor.Add(TaskFindUserNeighbors, len(dataset.UserLabels)) // inverse document frequency of labels for i := range labeledUsers { + labeledUsers[i] = lo.Uniq(labeledUsers[i]) if dataset.UserCount() == len(labeledUsers[i]) { labelIDF[i] = 1 } else { diff --git a/master/tasks_test.go b/master/tasks_test.go index b7674972e..be1e974ee 100644 --- a/master/tasks_test.go +++ b/master/tasks_test.go @@ -266,8 +266,8 @@ func TestMaster_FindItemNeighborsIVF_ZeroIDF(t *testing.T) { // create dataset err := m.DataClient.BatchInsertItems(ctx, []data.Item{ - {"0", false, []string{"*"}, time.Now(), []string{"a"}, ""}, - {"1", false, []string{"*"}, time.Now(), []string{"a"}, ""}, + {"0", false, []string{"*"}, time.Now(), []string{"a", "a"}, ""}, + {"1", false, []string{"*"}, time.Now(), []string{"a", "a"}, ""}, }) assert.NoError(t, err) err = m.DataClient.BatchInsertFeedback(ctx, []data.Feedback{ @@ -486,8 +486,8 @@ func TestMaster_FindUserNeighborsIVF_ZeroIDF(t *testing.T) { // create dataset err := m.DataClient.BatchInsertUsers(ctx, []data.User{ - {"0", []string{"a"}, nil, ""}, - {"1", []string{"a"}, nil, ""}, + {"0", []string{"a", "a"}, nil, ""}, + {"1", []string{"a", "a"}, nil, ""}, }) assert.NoError(t, err) err = m.DataClient.BatchInsertFeedback(ctx, []data.Feedback{