From c6a2d1487408215d4c891b323fd68466bee36ccd Mon Sep 17 00:00:00 2001
From: Shreyas Mocherla <vmgng@umsystem.edu>
Date: Tue, 6 Aug 2024 15:28:33 +0530
Subject: [PATCH] Added manifests and updated app.py

---
 app.py                                |  14 ++-
 k8s-manifests/clusterrole.yaml        |  41 ++++++++
 k8s-manifests/clusterrolebinding.yaml |  13 +++
 k8s-manifests/deploy.yaml             |  72 ++++++++++++++
 k8s-manifests/env.yaml                |  20 ++++
 k8s-manifests/hf.yaml                 |  10 ++
 k8s-manifests/secret.yaml             |   8 ++
 k8s-manifests/svc-template.yaml       |  44 +++++++++
 k8s-manifests/svc.yaml                |  32 +++++++
 k8s-manifests/tgi.yaml                | 132 ++++++++++++++++++++++++++
 k8s-manifests/tokens.yaml             |  10 ++
 11 files changed, 391 insertions(+), 5 deletions(-)
 create mode 100644 k8s-manifests/clusterrole.yaml
 create mode 100644 k8s-manifests/clusterrolebinding.yaml
 create mode 100644 k8s-manifests/deploy.yaml
 create mode 100644 k8s-manifests/env.yaml
 create mode 100644 k8s-manifests/hf.yaml
 create mode 100644 k8s-manifests/secret.yaml
 create mode 100644 k8s-manifests/svc-template.yaml
 create mode 100644 k8s-manifests/svc.yaml
 create mode 100644 k8s-manifests/tgi.yaml
 create mode 100644 k8s-manifests/tokens.yaml

diff --git a/app.py b/app.py
index 8cce554..93c545e 100644
--- a/app.py
+++ b/app.py
@@ -30,6 +30,9 @@
 import yaml
 from yaml.loader import SafeLoader
 
+from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
+from urllib3.exceptions import ProtocolError
+
 st.set_page_config(layout="wide", page_title="InSightful")
 
 def authenticate():
@@ -144,9 +147,10 @@ def __init__(self, llm, embeddings, collection_name, db_client):
         self.collection_name = collection_name
         self.db_client = db_client
 
+    @retry(retry=retry_if_exception_type(ProtocolError), stop=stop_after_attempt(5), wait=wait_fixed(2))
     def load_documents(self, doc, num_docs=250):
         documents = []
-        for data in datasets.load_dataset(doc, split=f"train[:{num_docs}]").to_list():
+        for data in datasets.load_dataset(doc, split=f"train[:{num_docs}]", num_proc=10).to_list():
             documents.append(
                 Document(
                     page_content=data["text"],
@@ -249,7 +253,7 @@ def setup_tools(_model, _client, _chroma_embedding_function, _embedder):
     #    embedder=_embedder,
     #)
 
-    if os.getenv("USE_RERANKER", "False") == "True":
+    if os.getenv("USE_RERANKER", "False") == True:
         retriever = create_reranker_retriever(
             name="slack_conversations_retriever",
             model=_model,
@@ -327,7 +331,7 @@ def main():
     st.session_state["chat_history"] = chat_history
 
 if __name__ == "__main__":
-    authenticator = authenticate()
-    if st.session_state['authentication_status']:
-        authenticator.logout()
+    #authenticator = authenticate()
+    #if st.session_state['authentication_status']:
+    #    authenticator.logout()
         main()
diff --git a/k8s-manifests/clusterrole.yaml b/k8s-manifests/clusterrole.yaml
new file mode 100644
index 0000000..f2e51eb
--- /dev/null
+++ b/k8s-manifests/clusterrole.yaml
@@ -0,0 +1,41 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: restricted-view
+rules:
+- apiGroups: [""]
+  resources: ["configmaps", "pods", "services", "endpoints", "persistentvolumeclaims", "replicationcontrollers", "replicationcontrollers/scale"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources: ["secrets"]
+  verbs: []
+- apiGroups: ["apps"]
+  resources: ["daemonsets", "deployments", "replicasets", "statefulsets"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["autoscaling"]
+  resources: ["horizontalpodautoscalers"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["batch"]
+  resources: ["cronjobs", "jobs"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["extensions"]
+  resources: ["daemonsets", "deployments", "replicasets", "replicationcontrollers/scale", "statefulsets"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["policy"]
+  resources: ["poddisruptionbudgets"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["networking.k8s.io"]
+  resources: ["networkpolicies"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["storage.k8s.io"]
+  resources: ["storageclasses", "volumeattachments"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["admissionregistration.k8s.io"]
+  resources: ["mutatingwebhookconfigurations", "validatingwebhookconfigurations"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["apiextensions.k8s.io"]
+  resources: ["customresourcedefinitions"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["apiregistration.k8s.io"]
+  resources: ["apiservices"]
+  verbs: ["get", "list", "watch"]
\ No newline at end of file
diff --git a/k8s-manifests/clusterrolebinding.yaml b/k8s-manifests/clusterrolebinding.yaml
new file mode 100644
index 0000000..96d986b
--- /dev/null
+++ b/k8s-manifests/clusterrolebinding.yaml
@@ -0,0 +1,13 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: restricted-view-binding
+subjects:
+- kind: User
+  name: sameer # Replace with the actual username
+  apiGroup: rbac.authorization.k8s.io
+roleRef:
+  kind: ClusterRole
+  name: restricted-view
+  apiGroup: rbac.authorization.k8s.io
+
diff --git a/k8s-manifests/deploy.yaml b/k8s-manifests/deploy.yaml
new file mode 100644
index 0000000..d16899f
--- /dev/null
+++ b/k8s-manifests/deploy.yaml
@@ -0,0 +1,72 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app: insightful
+  name: insightful
+  namespace: ai-stack
+spec:
+  progressDeadlineSeconds: 600
+  replicas: 1
+  revisionHistoryLimit: 10
+  selector:
+    matchLabels:
+      app: insightful
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      labels:
+        app: insightful
+    spec:
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: insightful-env
+        env:
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            secretKeyRef:
+              name: tokens
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: STACK_OVERFLOW_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: tokens
+              key: STACK_OVERFLOW_API_KEY
+        - name: TAVILY_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: tokens
+              key: TAVILY_API_KEY
+        image: ghcr.io/infracloudio/insightful:latest
+        imagePullPolicy: Always
+        name: insightful
+        ports:
+        - containerPort: 8501
+          protocol: TCP
+        - containerPort: 3150
+          protocol: TCP
+        resources:
+          limits:
+            nvidia.com/gpu: "1"
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /opt/secrets
+          name: secrets
+          readOnly: true
+        - name: empty
+          mountPath: /tmp/
+      dnsPolicy: ClusterFirst
+      restartPolicy: Always
+      schedulerName: default-scheduler
+      securityContext: {}
+      terminationGracePeriodSeconds: 30
+      volumes:
+      - name: secrets
+        secret:
+          defaultMode: 420
+          secretName: gh-pat
+      - name: empty
+        emptyDir: {}
diff --git a/k8s-manifests/env.yaml b/k8s-manifests/env.yaml
new file mode 100644
index 0000000..e7b95d2
--- /dev/null
+++ b/k8s-manifests/env.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: insightful-env
+  namespace: ai-stack
+data:
+  TGI_HOST: "192.168.0.203"
+  TGI_PORT: "80"
+  TEI_HOST: "192.168.0.202"
+  TEI_PORT: "80"
+  RERANKER_HOST: "192.168.0.205"
+  RERANKER_PORT: "80"
+  VECTORDB_HOST: "192.168.0.204"
+  VECTORDB_PORT: "8000"
+  STOP_TOKEN: "<|endoftext|>"
+  PORTKEY_PROVIDER: "llm_provider_name"
+  PORTKEY_CUSTOM_HOST: "llm_provider_host_ip_and_port"
+  USE_PORTKEY: "0"
+  USE_RERANKER: "1"
+
diff --git a/k8s-manifests/hf.yaml b/k8s-manifests/hf.yaml
new file mode 100644
index 0000000..a695a1c
--- /dev/null
+++ b/k8s-manifests/hf.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: tokens
+  namespace: ai-stack
+type: Opaque
+data:
+  hf-token: "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"
+  tavily-token: "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"
+  stack-exchange-token: "z4v1VvgGdeTpBH2IefrTQg(("
\ No newline at end of file
diff --git a/k8s-manifests/secret.yaml b/k8s-manifests/secret.yaml
new file mode 100644
index 0000000..1d8f25d
--- /dev/null
+++ b/k8s-manifests/secret.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  creationTimestamp: null
+  name: gh-pat
+  namespace: ai-stack
+data:
+  gh-pat: Z2hwX3hjRXdyTzJKV1pvd214bFpZMm50YnhRUk13QU5wbTNkaEpZMgo= 
diff --git a/k8s-manifests/svc-template.yaml b/k8s-manifests/svc-template.yaml
new file mode 100644
index 0000000..4c0ed64
--- /dev/null
+++ b/k8s-manifests/svc-template.yaml
@@ -0,0 +1,44 @@
+apiVersion: v1
+kind: Service
+metadata:
+  annotations:
+    meta.helm.sh/release-name: ai-stack
+    meta.helm.sh/release-namespace: ai-stack
+    metallb.universe.tf/ip-allocated-from-pool: ip-pool
+  creationTimestamp: "2024-07-27T04:34:16Z"
+  labels:
+    app.kubernetes.io/instance: ai-stack
+    app.kubernetes.io/managed-by: Helm
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/version: 2.2.0
+    helm.sh/chart: tgi-0.1.5
+  name: ai-stack-tgi
+  namespace: ai-stack
+  resourceVersion: "7773298"
+  uid: d66fb7ac-ea08-48dc-9599-06ebea87e28b
+spec:
+  allocateLoadBalancerNodePorts: true
+  clusterIP: 10.233.37.109
+  clusterIPs:
+  - 10.233.37.109
+  externalTrafficPolicy: Cluster
+  internalTrafficPolicy: Cluster
+  ipFamilies:
+  - IPv4
+  ipFamilyPolicy: SingleStack
+  ports:
+  - name: http
+    nodePort: 30175
+    port: 80
+    protocol: TCP
+    targetPort: http
+  selector:
+    app.kubernetes.io/instance: ai-stack
+    app.kubernetes.io/name: tgi
+  sessionAffinity: None
+  type: LoadBalancer
+status:
+  loadBalancer:
+    ingress:
+    - ip: 192.168.0.203
+      ipMode: VIP
diff --git a/k8s-manifests/svc.yaml b/k8s-manifests/svc.yaml
new file mode 100644
index 0000000..d242f8d
--- /dev/null
+++ b/k8s-manifests/svc.yaml
@@ -0,0 +1,32 @@
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: insightful
+  name: insightful
+  namespace: ai-stack
+spec:
+  allocateLoadBalancerNodePorts: true
+  clusterIP: 10.233.39.226
+  clusterIPs:
+  - 10.233.39.226
+  externalTrafficPolicy: Cluster
+  internalTrafficPolicy: Cluster
+  ipFamilies:
+  - IPv4
+  ipFamilyPolicy: SingleStack
+  ports:
+  - name: http
+    nodePort: 30150
+    port: 80
+    protocol: TCP
+    targetPort: 8501
+  selector:
+    app: insightful
+  sessionAffinity: None
+  type: LoadBalancer
+status:
+  loadBalancer:
+    ingress:
+    - ip: 192.168.0.206
+      ipMode: VIP
diff --git a/k8s-manifests/tgi.yaml b/k8s-manifests/tgi.yaml
new file mode 100644
index 0000000..c46f6cd
--- /dev/null
+++ b/k8s-manifests/tgi.yaml
@@ -0,0 +1,132 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  annotations:
+    deployment.kubernetes.io/revision: "9"
+    meta.helm.sh/release-name: ai-stack
+    meta.helm.sh/release-namespace: ai-stack
+  creationTimestamp: "2024-07-27T04:34:16Z"
+  generation: 9
+  labels:
+    app.kubernetes.io/instance: ai-stack
+    app.kubernetes.io/managed-by: Helm
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/version: 2.2.0
+    helm.sh/chart: tgi-0.1.5
+  name: ai-stack-tgi
+  namespace: ai-stack
+  resourceVersion: "10815846"
+  uid: f826cd3e-8d41-4bb8-83a7-abaa41e37d53
+spec:
+  progressDeadlineSeconds: 600
+  replicas: 1
+  revisionHistoryLimit: 10
+  selector:
+    matchLabels:
+      app.kubernetes.io/instance: ai-stack
+      app.kubernetes.io/name: tgi
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      annotations:
+        kubectl.kubernetes.io/restartedAt: "2024-08-02T16:52:08+05:30"
+      creationTimestamp: null
+      labels:
+        app.kubernetes.io/instance: ai-stack
+        app.kubernetes.io/managed-by: Helm
+        app.kubernetes.io/name: tgi
+        app.kubernetes.io/version: 2.2.0
+        helm.sh/chart: tgi-0.1.5
+    spec:
+      containers:
+      - command:
+        - text-generation-launcher
+        env:
+        - name: MAX_INPUT_TOKENS
+          value: "6144"
+        - name: MAX_TOTAL_TOKENS
+          value: "8192"
+        - name: HF_API_TOKEN
+          valueFrom:
+            secretKeyRef:
+              key: HF_API_TOKEN
+              name: hf-api-token
+        - name: HF_HUB_OFFLINE
+          value: "1"
+        - name: MODEL_ID
+          value: Qwen/Qwen2-7B-Instruct
+        image: ghcr.io/huggingface/text-generation-inference:2.2.0
+        imagePullPolicy: IfNotPresent
+        livenessProbe:
+          failureThreshold: 3
+          httpGet:
+            path: /health
+            port: http
+            scheme: HTTP
+          initialDelaySeconds: 30
+          periodSeconds: 30
+          successThreshold: 1
+          timeoutSeconds: 2
+        name: tgi
+        ports:
+        - containerPort: 80
+          name: http
+          protocol: TCP
+        readinessProbe:
+          failureThreshold: 4
+          httpGet:
+            path: /health
+            port: http
+            scheme: HTTP
+          initialDelaySeconds: 30
+          periodSeconds: 15
+          successThreshold: 1
+          timeoutSeconds: 2
+        resources:
+          limits:
+            nvidia.com/gpu: "1"
+          requests:
+            nvidia.com/gpu: "1"
+        securityContext: {}
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /data
+          name: hf-cache
+      dnsPolicy: ClusterFirst
+      nodeSelector:
+        kubernetes.io/hostname: infracloud03
+      restartPolicy: Always
+      schedulerName: default-scheduler
+      securityContext: {}
+      serviceAccount: ai-stack-tgi
+      serviceAccountName: ai-stack-tgi
+      terminationGracePeriodSeconds: 30
+      volumes:
+      - name: hf-cache
+        persistentVolumeClaim:
+          claimName: hf-cache
+      - emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+        name: shm
+status:
+  availableReplicas: 1
+  conditions:
+  - lastTransitionTime: "2024-07-27T04:34:16Z"
+    lastUpdateTime: "2024-08-02T11:22:55Z"
+    message: ReplicaSet "ai-stack-tgi-554994c8" has successfully progressed.
+    reason: NewReplicaSetAvailable
+    status: "True"
+    type: Progressing
+  - lastTransitionTime: "2024-08-06T06:11:09Z"
+    lastUpdateTime: "2024-08-06T06:11:09Z"
+    message: Deployment has minimum availability.
+    reason: MinimumReplicasAvailable
+    status: "True"
+    type: Available
+  observedGeneration: 9
+  readyReplicas: 1
+  replicas: 1
+  updatedReplicas: 1
diff --git a/k8s-manifests/tokens.yaml b/k8s-manifests/tokens.yaml
new file mode 100644
index 0000000..cd120c8
--- /dev/null
+++ b/k8s-manifests/tokens.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: tokens
+  namespace: ai-stack
+type: Opaque
+data:
+  hf-token: aGZfclVEVE1tWmhXd1FRUnpFdUdETGNjTWF6UVBlUVl1dm5ERQ==
+  tavily-token: dHZseS1BeWp3SVdXc05UbEZRTU5CRUFkQmtkZUhsN0ZPWHNDUA==
+  stack-exchange-token: ejR2MVZ2Z0dkZVRwQkgySWVmclRRZygo
\ No newline at end of file