Skip to content

Commit

Permalink
Merge pull request #257 from D10S0VSkY-OSS/256-deploy-record-stuck
Browse files Browse the repository at this point in the history
🐛fix: deploy stack locked when worker crash
  • Loading branch information
D10S0VSkY-OSS authored Mar 30, 2024
2 parents cc92fb1 + 5ba9a36 commit ef10319
Show file tree
Hide file tree
Showing 15 changed files with 133 additions and 16 deletions.
2 changes: 1 addition & 1 deletion play-with-sld/kubernetes/k8s/sld-api-backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
subdomain: primary
containers:
- name: api-backend
image: d10s0vsky/sld-api:v3.6.1
image: d10s0vsky/sld-api:v3.6.2
imagePullPolicy: Always
command: ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]
ports:
Expand Down
2 changes: 1 addition & 1 deletion play-with-sld/kubernetes/k8s/sld-dashboard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
subdomain: primary
containers:
- name: sld-dashboard
image: d10s0vsky/sld-dashboard:v3.6.1
image: d10s0vsky/sld-dashboard:v3.6.2
env:
- name: PATH
value: "/home/sld/.asdf/shims:/home/sld/.asdf/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
Expand Down
2 changes: 1 addition & 1 deletion play-with-sld/kubernetes/k8s/sld-worker-default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
subdomain: primary
containers:
- name: stack-deploy-worker-default
image: d10s0vsky/sld-api:v3.6.1
image: d10s0vsky/sld-api:v3.6.2
imagePullPolicy: Always
env:
- name: TF_WARN_OUTPUT_ERRORS
Expand Down
2 changes: 1 addition & 1 deletion play-with-sld/kubernetes/k8s/sld-worker-squad1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
subdomain: primary
containers:
- name: stack-deploy-worker-squad1
image: d10s0vsky/sld-api:v3.6.1
image: d10s0vsky/sld-api:v3.6.2
imagePullPolicy: Always
env:
- name: TF_WARN_OUTPUT_ERRORS
Expand Down
2 changes: 1 addition & 1 deletion play-with-sld/kubernetes/k8s/sld-worker-squad2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
subdomain: primary
containers:
- name: stack-deploy-worker-squad2
image: d10s0vsky/sld-api:v3.6.1
image: d10s0vsky/sld-api:v3.6.2
imagePullPolicy: Always
env:
- name: TF_WARN_OUTPUT_ERRORS
Expand Down
2 changes: 1 addition & 1 deletion sld-api-backend/src/deploy/api/container/deploy/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ async def delete_infra_by_id(
try:
# Check deploy state
if not check_deploy_state(deploy_data.task_id):
raise ValueError("Deploy state running, cannot upgrade")
raise ValueError("The deployment task is locked and cannot be upgraded. If you wish to proceed with the change, you can force the deletion of the task.")
# Delete deploy db by id
crud_deploys.delete_deploy_by_id(db=db, deploy_id=deploy_id, squad=squad)
# push task destroy to queue and return task_id
Expand Down
2 changes: 1 addition & 1 deletion sld-api-backend/src/deploy/api/container/deploy/destroy.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ async def destroy_infra(
try:
# Check deploy state
if not check_deploy_state(deploy_data.task_id):
raise ValueError("Deploy state running, cannot upgrade")
raise ValueError("The deployment task is locked and cannot be upgraded. If you wish to proceed with the change, you can force the deletion of the task.")
# push task destroy to queue and return task_id
pipeline_destroy = async_destroy(DeployParams(
git_repo=git_repo,
Expand Down
2 changes: 1 addition & 1 deletion sld-api-backend/src/deploy/api/container/deploy/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ async def deploy_by_id(
check_cron_schedule(deploy_update.destroy_time)
# Check deploy state
if not check_deploy_state(deploy_data.task_id):
raise ValueError("Deploy state running, cannot upgrade")
raise ValueError("The deployment task is locked and cannot be upgraded. If you wish to proceed with the change, you can force the deletion of the task.")
# push task Deploy Update to queue and return task_id
pipeline_deploy = async_deploy(DeployParams(
git_repo=git_repo,
Expand Down
2 changes: 1 addition & 1 deletion sld-api-backend/src/deploy/api/container/plan/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ async def get_plan_by_id_deploy(
try:
# Check deploy state
if not check_deploy_state(deploy_data.task_id):
raise ValueError("Deploy state running, cannot upgrade")
raise ValueError("The deployment task is locked and cannot be upgraded. If you wish to proceed with the change, you can force the deletion of the task.")
# push task Deploy to queue and return task_id
pipeline_plan = async_plan(
git_repo,
Expand Down
2 changes: 1 addition & 1 deletion sld-api-backend/src/deploy/api/container/plan/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ async def update_plan_by_id(
check_cron_schedule(deploy_update.destroy_time)
# Check deploy state
if not check_deploy_state(deploy_data.task_id):
raise ValueError("Deploy state running, cannot upgrade")
raise ValueError("The deployment task is locked and cannot be upgraded. If you wish to proceed with the change, you can force the deletion of the task.")
# push task Deploy to queue and return task_id
pipeline_plan = async_plan(DeployParams(
git_repo=git_repo,
Expand Down
13 changes: 9 additions & 4 deletions sld-api-backend/src/tasks/api/container/delete.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
from config.celery_config import celery_app
from fastapi import Depends
from sqlalchemy.orm import Session
from fastapi import Depends, HTTPException

from src.shared.security import deps
from src.users.domain.entities import users as schemas_users
from src.tasks.infrastructure.repositories import delete_celery_task_meta_by_task_id


async def get_task_by_id(
task_id: str,
db: Session = Depends(deps.get_db),
current_user: schemas_users.User = Depends(deps.get_current_active_user),
):
result = celery_app.control.revoke(task_id, terminate=True)
return {"result": f"REVOKE {task_id}"}
try:
delete_celery_task_meta_by_task_id(db=db, task_id=task_id)
return {"result": f"REVOKE {task_id}"}
except Exception as err:
raise HTTPException(status_code=500, detail=str(err))
18 changes: 17 additions & 1 deletion sld-api-backend/src/tasks/infrastructure/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime

from config.database import Base
from sqlalchemy import Column, DateTime, Integer, String
from sqlalchemy import Column, DateTime, Integer, String, Text, LargeBinary


class Tasks(Base):
Expand All @@ -14,3 +14,19 @@ class Tasks(Base):
squad = Column(String(50), nullable=False)
action = Column(String(50), nullable=False)
created_at = Column(DateTime, default=datetime.datetime.now())


class CeleryTaskMeta(Base):
__tablename__ = "celery_taskmeta"
id = Column(Integer, primary_key=True, autoincrement=True)
task_id = Column(String(155), unique=True)
status = Column(String(50))
result = Column(LargeBinary)
date_done = Column(DateTime)
traceback = Column(Text)
name = Column(String(155))
args = Column(LargeBinary)
kwargs = Column(LargeBinary)
worker = Column(String(155))
retries = Column(Integer)
queue = Column(String(155))
14 changes: 14 additions & 0 deletions sld-api-backend/src/tasks/infrastructure/repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,17 @@ def get_tasks_by_deploy_id(db: Session, deploy_id: int):
return db.query(models.Tasks).filter(models.Tasks.deploy_id == deploy_id).all()
except Exception as err:
raise err


def delete_celery_task_meta_by_task_id(db: Session, task_id: str):
try:
db_task_meta = db.query(models.CeleryTaskMeta).filter(models.CeleryTaskMeta.task_id == task_id).first()
if db_task_meta is not None:
db.delete(db_task_meta)
db.commit()
return True
else:
return False
except Exception as err:
raise err

24 changes: 23 additions & 1 deletion sld-dashboard/app/home/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,29 @@ def destroy_deploy_console(deploy_id):
except Exception:
return render_template("page-500.html"), 500


@blueprint.route("/task/<task_id>")
@login_required
def unlock_task(task_id):
try:
token = decrypt(r.get(current_user.id))
# Check if token no expired
check_unauthorized_token(token)
endpoint = f"tasks/id/{task_id}"
response = request_url(
verb="DELETE", uri=f"{endpoint}", headers={"Authorization": f"Bearer {token}"}
)
if response.get("status_code") == 200:
flash("Delete task id locked")
else:
flash(response["json"]["detail"], "error")
return redirect(
url_for("home_blueprint.route_template", template="deploys-list")
)
except TemplateNotFound:
return render_template("page-404.html"), 404
except TypeError:
return redirect(url_for("base_blueprint.logout"))

@blueprint.route("/deploys/unlock/<int:deploy_id>")
@login_required
def unlock_deploy(deploy_id):
Expand Down
60 changes: 60 additions & 0 deletions sld-dashboard/app/home/templates/deploys-list.html
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,16 @@ <h2 class="h4">All Deploys</h2>
<span class="fas fa-skull-crossbones mr-2"></span>
Delete
</a>
<!-- Task Unlock -->
<a
title="Force remove task locked {{deploy.task_id}}"
class="dropdown-item"
data-toggle="modal"
data-target="#TaskModalCenter-{{deploy.task_id}}"
>
<span class="fas fa-tasks mr-2"></span>
Task
</a>
</div>
{% else %}
<div class="dropdown-menu">
Expand Down Expand Up @@ -300,6 +310,16 @@ <h2 class="h4">All Deploys</h2>
<span class="fas fa-unlock mr-2"></span>
Unlock
</a>
<!-- Task Unlock -->
<a
title="Force remove task locked {{deploy.task_id}}"
class="dropdown-item"
data-toggle="modal"
data-target="#TaskModalCenter-{{deploy.task_id}}"
>
<span class="fas fa-tasks mr-2"></span>
Task
</a>
</span>
{% endif %}
</div>
Expand All @@ -321,6 +341,7 @@ <h2 class="h4">All Deploys</h2>
>
<span class="far fa-file-code mr-0"></span>
</a>

</span>
</div>
</td>
Expand Down Expand Up @@ -365,6 +386,45 @@ <h5 class="modal-title" id="UnlockModalLongTitle-{{deploy.id}}">
</div>
</div>
</div>
<!-- Modal Task-->
<div class="modal fade" id="TaskModalCenter-{{deploy.task_id}}">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="TaskModalLongTitle-{{deploy.task_id}}">
Unlock Deploy
{{deploy.name}}
</h5>
<button
type="button"
class="close"
data-dismiss="modal"
aria-label="Close"
>
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<p>
<strong>
Force remove task locked {{deploy.task_id}}
to protect the state from being written by multiple users at the same time.

</strong>
</p>
<p>
<strong>Are you sure?</strong>
</p>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-primary" data-dismiss="modal">Cancel</button>
<form action="{{ url_for('.unlock_task', task_id=deploy.task_id)}}">
<input type="submit" value="Unlock" class="btn btn-danger">
</form>
</div>
</div>
</div>
</div>
<!-- Modal Output-->
<div
class="modal fade"
Expand Down

0 comments on commit ef10319

Please sign in to comment.