Slow 'Find neighbors of users' task #854

erfansahaf · 2024-08-18T09:29:45Z

Please answer these questions before submitting your issue. Thanks!

Gorse version
0.4.15

Describe the bug
I have a Gorse instance deployed on a Kubernetes cluster with master, server, proxy, and worker instances. We have about 1.5 Million users with aprx. 20 labels for each user. The issue here is the process of the Find neighbors of users task which is very slow. It's been more than 3 days that the instance is up but this task has not been progressing.

Expected behavior
I want to be fast. If I should scale any service, which service would it be?

Additional context

Cache Store: Redis

Database: MySQL

Configs:

[database]
    cache_store = "redis://host:6379"

    data_store = "mysql://user:pass@tcp(host)/database_name"
    table_prefix = ""
    cache_table_prefix = ""
    data_table_prefix = ""

    [master]
    port = 8086
    host = "0.0.0.0"
    http_port = 8088
    http_host = "0.0.0.0"

    http_cors_domains = []
    http_cors_methods = []

    n_jobs = 20

    meta_timeout = "10s"

    dashboard_user_name = ""

    dashboard_password = ""
    admin_api_key = ""

    [server]

    default_n = 10
    
    api_key = ""

    clock_error = "5s"

    auto_insert_user = false

    auto_insert_item = false

    cache_expire = "10s"

    [recommend]

    # The cache size for recommended/popular/latest items. The default value is 10.
    cache_size = 500

    # Recommended cache expire time. The default value is 72h.
    cache_expire = "10m"

    # The time-to-live (days) of active users, 0 means disabled. Recommendation won't be cached for inactive users. The default value is 0.
    active_user_ttl = 20

    [recommend.data_source]

    # The feedback types for positive events.
    positive_feedback_types = ["open", "watch","like"]

    # The feedback types for read events.
    read_feedback_types = ["read"]

    # The time-to-live (days) of positive feedback, 0 means disabled. The default value is 0.
    positive_feedback_ttl = 90

    # The time-to-live (days) of items, 0 means disabled. The default value is 0.
    item_ttl = 60

    [recommend.popular]

    # The time window of popular items. The default values is 4320h.
    popular_window = "720h"

    [recommend.user_neighbors]

    neighbor_type = "similar"

    enable_index = true

    index_recall = 0.8

    index_fit_epoch = 3

    [recommend.item_neighbors]

    neighbor_type = "similar"

    enable_index = true

    index_recall = 0.8

    index_fit_epoch = 3

    [recommend.collaborative]

    enable_index = true

    # Minimal recall for approximate collaborative filtering recommend. The default value is 0.9.
    index_recall = 0.9

    # Maximal number of fit epochs for approximate collaborative filtering recommend vector index. The default value is 3.
    index_fit_epoch = 3
    model_fit_period = "60m"
    model_search_period = "360m"
    model_search_epoch = 100

    model_search_trials = 10

    enable_model_size_search = false

    [recommend.replacement]

    enable_replacement = true

    positive_replacement_decay = 0.8

    read_replacement_decay = 0.2

    [recommend.offline]
    check_recommend_period = "1m"
    refresh_recommend_period = "4h"
    enable_latest_recommend = true
    enable_popular_recommend = true
    enable_user_based_recommend = true
    enable_item_based_recommend = true
    enable_collaborative_recommend = true
    enable_click_through_prediction = true

    explore_recommend = { popular = 0.2, latest = 0.1 }

    [recommend.online]
    fallback_recommend = ["item_based", "popular"]

    num_feedback_fallback_item_based = 10

    [tracing]

    enable_tracing = false

    exporter = "jaeger"

    collector_endpoint = "http://localhost:14268/api/traces"

    sampler = "always"

    ratio = 1

    [experimental]
    enable_deep_learning = false
    deep_learning_batch_size = 128

The text was updated successfully, but these errors were encountered:

erfansahaf added the bug Something isn't working label Aug 18, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Slow 'Find neighbors of users' task #854

Slow 'Find neighbors of users' task #854

erfansahaf commented Aug 18, 2024

Slow 'Find neighbors of users' task #854

Slow 'Find neighbors of users' task #854

Comments

erfansahaf commented Aug 18, 2024