Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slow 'Find neighbors of users' task #854

Open
erfansahaf opened this issue Aug 18, 2024 · 0 comments
Open

Slow 'Find neighbors of users' task #854

erfansahaf opened this issue Aug 18, 2024 · 0 comments
Labels
bug Something isn't working

Comments

@erfansahaf
Copy link

Please answer these questions before submitting your issue. Thanks!

Gorse version
0.4.15

Describe the bug
I have a Gorse instance deployed on a Kubernetes cluster with master, server, proxy, and worker instances. We have about 1.5 Million users with aprx. 20 labels for each user. The issue here is the process of the Find neighbors of users task which is very slow. It's been more than 3 days that the instance is up but this task has not been progressing.

image

Expected behavior
I want to be fast. If I should scale any service, which service would it be?

Additional context

Cache Store: Redis

Database: MySQL

Configs:

[database]
    cache_store = "redis://host:6379"

    data_store = "mysql://user:pass@tcp(host)/database_name"
    table_prefix = ""
    cache_table_prefix = ""
    data_table_prefix = ""

    [master]
    port = 8086
    host = "0.0.0.0"
    http_port = 8088
    http_host = "0.0.0.0"

    http_cors_domains = []
    http_cors_methods = []

    n_jobs = 20

    meta_timeout = "10s"

    dashboard_user_name = ""

    dashboard_password = ""
    admin_api_key = ""

    [server]

    default_n = 10
    
    api_key = ""

    clock_error = "5s"

    auto_insert_user = false

    auto_insert_item = false

    cache_expire = "10s"

    [recommend]

    # The cache size for recommended/popular/latest items. The default value is 10.
    cache_size = 500

    # Recommended cache expire time. The default value is 72h.
    cache_expire = "10m"

    # The time-to-live (days) of active users, 0 means disabled. Recommendation won't be cached for inactive users. The default value is 0.
    active_user_ttl = 20

    [recommend.data_source]

    # The feedback types for positive events.
    positive_feedback_types = ["open", "watch","like"]

    # The feedback types for read events.
    read_feedback_types = ["read"]

    # The time-to-live (days) of positive feedback, 0 means disabled. The default value is 0.
    positive_feedback_ttl = 90

    # The time-to-live (days) of items, 0 means disabled. The default value is 0.
    item_ttl = 60

    [recommend.popular]

    # The time window of popular items. The default values is 4320h.
    popular_window = "720h"

    [recommend.user_neighbors]

    neighbor_type = "similar"

    enable_index = true

    index_recall = 0.8

    index_fit_epoch = 3

    [recommend.item_neighbors]

    neighbor_type = "similar"

    enable_index = true

    index_recall = 0.8

    index_fit_epoch = 3

    [recommend.collaborative]

    enable_index = true

    # Minimal recall for approximate collaborative filtering recommend. The default value is 0.9.
    index_recall = 0.9

    # Maximal number of fit epochs for approximate collaborative filtering recommend vector index. The default value is 3.
    index_fit_epoch = 3
    model_fit_period = "60m"
    model_search_period = "360m"
    model_search_epoch = 100

    model_search_trials = 10

    enable_model_size_search = false

    [recommend.replacement]

    enable_replacement = true

    positive_replacement_decay = 0.8

    read_replacement_decay = 0.2

    [recommend.offline]
    check_recommend_period = "1m"
    refresh_recommend_period = "4h"
    enable_latest_recommend = true
    enable_popular_recommend = true
    enable_user_based_recommend = true
    enable_item_based_recommend = true
    enable_collaborative_recommend = true
    enable_click_through_prediction = true

    explore_recommend = { popular = 0.2, latest = 0.1 }

    [recommend.online]
    fallback_recommend = ["item_based", "popular"]

    num_feedback_fallback_item_based = 10

    [tracing]

    enable_tracing = false

    exporter = "jaeger"

    collector_endpoint = "http://localhost:14268/api/traces"

    sampler = "always"

    ratio = 1

    [experimental]
    enable_deep_learning = false
    deep_learning_batch_size = 128
@erfansahaf erfansahaf added the bug Something isn't working label Aug 18, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

1 participant