Skip to content

Commit

Permalink
optimize fallback item-based recommender (#350)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenghaoz authored Dec 28, 2021
1 parent f824565 commit 220a034
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 12 deletions.
5 changes: 5 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ type RecommendConfig struct {
SearchTrials int `toml:"search_trials"`
RefreshRecommendPeriod int `toml:"refresh_recommend_period"`
FallbackRecommend []string `toml:"fallback_recommend"`
NumFeedbackFallbackItemBased int `toml:"num_feedback_fallback_item_based"`
ExploreRecommend map[string]float64 `toml:"explore_recommend"`
ItemNeighborType string `toml:"item_neighbor_type"`
UserNeighborType string `toml:"user_neighbor_type"`
Expand Down Expand Up @@ -167,6 +168,7 @@ func (config *RecommendConfig) LoadDefaultIfNil() *RecommendConfig {
SearchTrials: 10,
RefreshRecommendPeriod: 5,
FallbackRecommend: []string{"latest"},
NumFeedbackFallbackItemBased: 10,
ItemNeighborType: "auto",
UserNeighborType: "auto",
EnableLatestRecommend: false,
Expand Down Expand Up @@ -278,6 +280,9 @@ func (config *Config) FillDefault(meta toml.MetaData) {
if !meta.IsDefined("recommend", "fallback_recommend") {
config.Recommend.FallbackRecommend = defaultRecommendConfig.FallbackRecommend
}
if !meta.IsDefined("recommend", "num_feedback_fallback_item_based") {
config.Recommend.NumFeedbackFallbackItemBased = defaultRecommendConfig.NumFeedbackFallbackItemBased
}
if !meta.IsDefined("recommend", "item_neighbor_type") {
config.Recommend.ItemNeighborType = defaultRecommendConfig.ItemNeighborType
}
Expand Down
3 changes: 3 additions & 0 deletions config/config.toml.template
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ refresh_recommend_period = 1
# Recommenders are used in order. The default values is ["latest"].
fallback_recommend = ["item_based", "latest"]

# The number of feedback used in fallback item-based similar recommendation. The default values is 10.
num_feedback_fallback_item_based = 20

# The type of neighbors for items. There are three types:
# similar: Neighbors are found by number of common labels.
# related: Neighbors are found by number of common users.
Expand Down
1 change: 1 addition & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func TestLoadConfig(t *testing.T) {
assert.Equal(t, 1, config.Recommend.RefreshRecommendPeriod)
assert.Equal(t, []string{"item_based", "latest"}, config.Recommend.FallbackRecommend)
assert.Equal(t, map[string]float64{"popular": 0.1, "latest": 0.2}, config.Recommend.ExploreRecommend)
assert.Equal(t, 20, config.Recommend.NumFeedbackFallbackItemBased)
value, exist := config.Recommend.GetExploreRecommend("popular")
assert.Equal(t, true, exist)
assert.Equal(t, 0.1, value)
Expand Down
2 changes: 1 addition & 1 deletion master/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -1060,7 +1060,7 @@ func (m *Master) LoadDataFromDatabase(database data.Database, posFeedbackTypes,
}
}
for _, items := range popularItems {
sort.Sort(cache.Scores(items))
cache.SortScores(items)
}

m.taskMonitor.Finish(TaskLoadDataset)
Expand Down
6 changes: 6 additions & 0 deletions misc/database_test/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ services:
ports:
- 6379:6379

pika:
image: zhenghaoz/pika
restart: unless-stopped
ports:
- 9221:9221

mysql:
image: mysql:8.0
ports:
Expand Down
8 changes: 7 additions & 1 deletion server/rest.go
Original file line number Diff line number Diff line change
Expand Up @@ -840,9 +840,15 @@ func (s *RestServer) RecommendItemBased(ctx *recommendContext) error {
return errors.Trace(err)
}
start := time.Now()
// truncate user feedback
data.SortFeedbacks(ctx.userFeedback)
userFeedback := ctx.userFeedback
if s.GorseConfig.Recommend.NumFeedbackFallbackItemBased < len(userFeedback) {
userFeedback = userFeedback[:s.GorseConfig.Recommend.NumFeedbackFallbackItemBased]
}
// collect candidates
candidates := make(map[string]float32)
for _, feedback := range ctx.userFeedback {
for _, feedback := range userFeedback {
// load similar items
similarItems, err := s.CacheClient.GetCategoryScores(cache.ItemNeighbors, feedback.ItemId, ctx.category, 0, s.GorseConfig.Database.CacheSize)
if err != nil {
Expand Down
20 changes: 15 additions & 5 deletions server/rest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1072,17 +1072,19 @@ func TestServer_GetRecommends(t *testing.T) {

func TestServer_GetRecommends_Fallback_ItemBasedSimilar(t *testing.T) {
s := newMockServer(t)
s.GorseConfig.Recommend.NumFeedbackFallbackItemBased = 4
defer s.Close(t)
// insert recommendation
err := s.CacheClient.SetScores(cache.OfflineRecommend, "0",
[]cache.Scored{{"1", 99}, {"2", 98}, {"3", 97}, {"4", 96}})
assert.NoError(t, err)
// insert feedback
feedback := []data.Feedback{
{FeedbackKey: data.FeedbackKey{FeedbackType: "a", UserId: "0", ItemId: "1"}},
{FeedbackKey: data.FeedbackKey{FeedbackType: "a", UserId: "0", ItemId: "2"}},
{FeedbackKey: data.FeedbackKey{FeedbackType: "a", UserId: "0", ItemId: "3"}},
{FeedbackKey: data.FeedbackKey{FeedbackType: "a", UserId: "0", ItemId: "4"}},
{FeedbackKey: data.FeedbackKey{FeedbackType: "a", UserId: "0", ItemId: "1"}, Timestamp: time.Date(2010, 1, 1, 1, 1, 1, 1, time.UTC)},
{FeedbackKey: data.FeedbackKey{FeedbackType: "a", UserId: "0", ItemId: "2"}, Timestamp: time.Date(2009, 1, 1, 1, 1, 1, 1, time.UTC)},
{FeedbackKey: data.FeedbackKey{FeedbackType: "a", UserId: "0", ItemId: "3"}, Timestamp: time.Date(2008, 1, 1, 1, 1, 1, 1, time.UTC)},
{FeedbackKey: data.FeedbackKey{FeedbackType: "a", UserId: "0", ItemId: "4"}, Timestamp: time.Date(2007, 1, 1, 1, 1, 1, 1, time.UTC)},
{FeedbackKey: data.FeedbackKey{FeedbackType: "a", UserId: "0", ItemId: "5"}, Timestamp: time.Date(2006, 1, 1, 1, 1, 1, 1, time.UTC)},
}
apitest.New().
Handler(s.handler).
Expand All @@ -1091,7 +1093,7 @@ func TestServer_GetRecommends_Fallback_ItemBasedSimilar(t *testing.T) {
JSON(feedback).
Expect(t).
Status(http.StatusOK).
Body(`{"RowAffected": 4}`).
Body(`{"RowAffected": 5}`).
End()

// insert similar items
Expand Down Expand Up @@ -1121,6 +1123,14 @@ func TestServer_GetRecommends_Fallback_ItemBasedSimilar(t *testing.T) {
{"9", 1},
})
assert.NoError(t, err)
err = s.CacheClient.SetScores(cache.ItemNeighbors, "5", []cache.Scored{
{"1", 1},
{"6", 1},
{"7", 100000},
{"8", 100},
{"9", 1},
})
assert.NoError(t, err)

// insert similar items of category *
err = s.CacheClient.SetCategoryScores(cache.ItemNeighbors, "1", "*", []cache.Scored{
Expand Down
15 changes: 10 additions & 5 deletions storage/cache/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package cache
import (
"github.com/go-redis/redis/v8"
"github.com/juju/errors"
"sort"
"strings"
"time"
)
Expand Down Expand Up @@ -89,21 +90,25 @@ func RemoveScores(items []Scored) []string {
return ids
}

// Scores is slice of Scored.
type Scores []Scored
// SortScores sorts scores from high score to low score.
func SortScores(scores []Scored) {
sort.Sort(scoresSorter(scores))
}

type scoresSorter []Scored

// Len is the number of elements in the collection.
func (s Scores) Len() int {
func (s scoresSorter) Len() int {
return len(s)
}

// Less reports whether the element with index i
func (s Scores) Less(i, j int) bool {
func (s scoresSorter) Less(i, j int) bool {
return s[i].Score > s[j].Score
}

// Swap swaps the elements with indexes i and j.
func (s Scores) Swap(i, j int) {
func (s scoresSorter) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}

Expand Down
2 changes: 2 additions & 0 deletions storage/cache/database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ func TestScored(t *testing.T) {
scored := []Scored{{Id: "2", Score: 2}, {Id: "4", Score: 4}, {Id: "6", Score: 6}}
assert.Equal(t, scored, CreateScoredItems(itemIds, scores))
assert.Equal(t, itemIds, RemoveScores(scored))
SortScores(scored)
assert.Equal(t, []Scored{{Id: "6", Score: 6}, {Id: "4", Score: 4}, {Id: "2", Score: 2}}, scored)
}

func TestKey(t *testing.T) {
Expand Down
20 changes: 20 additions & 0 deletions storage/data/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
"go.mongodb.org/mongo-driver/x/mongo/driver/connstring"
"sort"
"strings"
"time"
)
Expand Down Expand Up @@ -82,6 +83,25 @@ type Feedback struct {
Comment string
}

// SortFeedbacks sorts feedback from latest to oldest.
func SortFeedbacks(feedback []Feedback) {
sort.Sort(feedbackSorter(feedback))
}

type feedbackSorter []Feedback

func (sorter feedbackSorter) Len() int {
return len(sorter)
}

func (sorter feedbackSorter) Less(i, j int) bool {
return sorter[i].Timestamp.After(sorter[j].Timestamp)
}

func (sorter feedbackSorter) Swap(i, j int) {
sorter[i], sorter[j] = sorter[j], sorter[i]
}

// Measurement stores a statistical value.
type Measurement struct {
Name string
Expand Down
14 changes: 14 additions & 0 deletions storage/data/database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -633,3 +633,17 @@ func isClickHouse(db Database) bool {
return sqlDB.driver == ClickHouse
}
}

func TestSortFeedbacks(t *testing.T) {
feedback := []Feedback{
{FeedbackKey: FeedbackKey{"star", "1", "1"}, Timestamp: time.Date(2000, 10, 1, 0, 0, 0, 0, time.UTC)},
{FeedbackKey: FeedbackKey{"like", "1", "1"}, Timestamp: time.Date(2001, 10, 1, 0, 0, 0, 0, time.UTC)},
{FeedbackKey: FeedbackKey{"read", "1", "1"}, Timestamp: time.Date(2002, 10, 1, 0, 0, 0, 0, time.UTC)},
}
SortFeedbacks(feedback)
assert.Equal(t, []Feedback{
{FeedbackKey: FeedbackKey{"read", "1", "1"}, Timestamp: time.Date(2002, 10, 1, 0, 0, 0, 0, time.UTC)},
{FeedbackKey: FeedbackKey{"like", "1", "1"}, Timestamp: time.Date(2001, 10, 1, 0, 0, 0, 0, time.UTC)},
{FeedbackKey: FeedbackKey{"star", "1", "1"}, Timestamp: time.Date(2000, 10, 1, 0, 0, 0, 0, time.UTC)},
}, feedback)
}

0 comments on commit 220a034

Please sign in to comment.