From c6a2d1487408215d4c891b323fd68466bee36ccd Mon Sep 17 00:00:00 2001 From: Shreyas Mocherla Date: Tue, 6 Aug 2024 15:28:33 +0530 Subject: [PATCH] Added manifests and updated app.py --- app.py | 14 ++- k8s-manifests/clusterrole.yaml | 41 ++++++++ k8s-manifests/clusterrolebinding.yaml | 13 +++ k8s-manifests/deploy.yaml | 72 ++++++++++++++ k8s-manifests/env.yaml | 20 ++++ k8s-manifests/hf.yaml | 10 ++ k8s-manifests/secret.yaml | 8 ++ k8s-manifests/svc-template.yaml | 44 +++++++++ k8s-manifests/svc.yaml | 32 +++++++ k8s-manifests/tgi.yaml | 132 ++++++++++++++++++++++++++ k8s-manifests/tokens.yaml | 10 ++ 11 files changed, 391 insertions(+), 5 deletions(-) create mode 100644 k8s-manifests/clusterrole.yaml create mode 100644 k8s-manifests/clusterrolebinding.yaml create mode 100644 k8s-manifests/deploy.yaml create mode 100644 k8s-manifests/env.yaml create mode 100644 k8s-manifests/hf.yaml create mode 100644 k8s-manifests/secret.yaml create mode 100644 k8s-manifests/svc-template.yaml create mode 100644 k8s-manifests/svc.yaml create mode 100644 k8s-manifests/tgi.yaml create mode 100644 k8s-manifests/tokens.yaml diff --git a/app.py b/app.py index 8cce554..93c545e 100644 --- a/app.py +++ b/app.py @@ -30,6 +30,9 @@ import yaml from yaml.loader import SafeLoader +from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type +from urllib3.exceptions import ProtocolError + st.set_page_config(layout="wide", page_title="InSightful") def authenticate(): @@ -144,9 +147,10 @@ def __init__(self, llm, embeddings, collection_name, db_client): self.collection_name = collection_name self.db_client = db_client + @retry(retry=retry_if_exception_type(ProtocolError), stop=stop_after_attempt(5), wait=wait_fixed(2)) def load_documents(self, doc, num_docs=250): documents = [] - for data in datasets.load_dataset(doc, split=f"train[:{num_docs}]").to_list(): + for data in datasets.load_dataset(doc, split=f"train[:{num_docs}]", num_proc=10).to_list(): documents.append( Document( page_content=data["text"], @@ -249,7 +253,7 @@ def setup_tools(_model, _client, _chroma_embedding_function, _embedder): # embedder=_embedder, #) - if os.getenv("USE_RERANKER", "False") == "True": + if os.getenv("USE_RERANKER", "False") == True: retriever = create_reranker_retriever( name="slack_conversations_retriever", model=_model, @@ -327,7 +331,7 @@ def main(): st.session_state["chat_history"] = chat_history if __name__ == "__main__": - authenticator = authenticate() - if st.session_state['authentication_status']: - authenticator.logout() + #authenticator = authenticate() + #if st.session_state['authentication_status']: + # authenticator.logout() main() diff --git a/k8s-manifests/clusterrole.yaml b/k8s-manifests/clusterrole.yaml new file mode 100644 index 0000000..f2e51eb --- /dev/null +++ b/k8s-manifests/clusterrole.yaml @@ -0,0 +1,41 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: restricted-view +rules: +- apiGroups: [""] + resources: ["configmaps", "pods", "services", "endpoints", "persistentvolumeclaims", "replicationcontrollers", "replicationcontrollers/scale"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["secrets"] + verbs: [] +- apiGroups: ["apps"] + resources: ["daemonsets", "deployments", "replicasets", "statefulsets"] + verbs: ["get", "list", "watch"] +- apiGroups: ["autoscaling"] + resources: ["horizontalpodautoscalers"] + verbs: ["get", "list", "watch"] +- apiGroups: ["batch"] + resources: ["cronjobs", "jobs"] + verbs: ["get", "list", "watch"] +- apiGroups: ["extensions"] + resources: ["daemonsets", "deployments", "replicasets", "replicationcontrollers/scale", "statefulsets"] + verbs: ["get", "list", "watch"] +- apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["get", "list", "watch"] +- apiGroups: ["networking.k8s.io"] + resources: ["networkpolicies"] + verbs: ["get", "list", "watch"] +- apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "volumeattachments"] + verbs: ["get", "list", "watch"] +- apiGroups: ["admissionregistration.k8s.io"] + resources: ["mutatingwebhookconfigurations", "validatingwebhookconfigurations"] + verbs: ["get", "list", "watch"] +- apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["get", "list", "watch"] +- apiGroups: ["apiregistration.k8s.io"] + resources: ["apiservices"] + verbs: ["get", "list", "watch"] \ No newline at end of file diff --git a/k8s-manifests/clusterrolebinding.yaml b/k8s-manifests/clusterrolebinding.yaml new file mode 100644 index 0000000..96d986b --- /dev/null +++ b/k8s-manifests/clusterrolebinding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: restricted-view-binding +subjects: +- kind: User + name: sameer # Replace with the actual username + apiGroup: rbac.authorization.k8s.io +roleRef: + kind: ClusterRole + name: restricted-view + apiGroup: rbac.authorization.k8s.io + diff --git a/k8s-manifests/deploy.yaml b/k8s-manifests/deploy.yaml new file mode 100644 index 0000000..d16899f --- /dev/null +++ b/k8s-manifests/deploy.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: insightful + name: insightful + namespace: ai-stack +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: insightful + strategy: + type: Recreate + template: + metadata: + labels: + app: insightful + spec: + containers: + - envFrom: + - configMapRef: + name: insightful-env + env: + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + secretKeyRef: + name: tokens + key: HUGGINGFACEHUB_API_TOKEN + - name: STACK_OVERFLOW_API_KEY + valueFrom: + secretKeyRef: + name: tokens + key: STACK_OVERFLOW_API_KEY + - name: TAVILY_API_KEY + valueFrom: + secretKeyRef: + name: tokens + key: TAVILY_API_KEY + image: ghcr.io/infracloudio/insightful:latest + imagePullPolicy: Always + name: insightful + ports: + - containerPort: 8501 + protocol: TCP + - containerPort: 3150 + protocol: TCP + resources: + limits: + nvidia.com/gpu: "1" + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /opt/secrets + name: secrets + readOnly: true + - name: empty + mountPath: /tmp/ + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + volumes: + - name: secrets + secret: + defaultMode: 420 + secretName: gh-pat + - name: empty + emptyDir: {} diff --git a/k8s-manifests/env.yaml b/k8s-manifests/env.yaml new file mode 100644 index 0000000..e7b95d2 --- /dev/null +++ b/k8s-manifests/env.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: insightful-env + namespace: ai-stack +data: + TGI_HOST: "192.168.0.203" + TGI_PORT: "80" + TEI_HOST: "192.168.0.202" + TEI_PORT: "80" + RERANKER_HOST: "192.168.0.205" + RERANKER_PORT: "80" + VECTORDB_HOST: "192.168.0.204" + VECTORDB_PORT: "8000" + STOP_TOKEN: "<|endoftext|>" + PORTKEY_PROVIDER: "llm_provider_name" + PORTKEY_CUSTOM_HOST: "llm_provider_host_ip_and_port" + USE_PORTKEY: "0" + USE_RERANKER: "1" + diff --git a/k8s-manifests/hf.yaml b/k8s-manifests/hf.yaml new file mode 100644 index 0000000..a695a1c --- /dev/null +++ b/k8s-manifests/hf.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Secret +metadata: + name: tokens + namespace: ai-stack +type: Opaque +data: + hf-token: "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" + tavily-token: "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" + stack-exchange-token: "z4v1VvgGdeTpBH2IefrTQg((" \ No newline at end of file diff --git a/k8s-manifests/secret.yaml b/k8s-manifests/secret.yaml new file mode 100644 index 0000000..1d8f25d --- /dev/null +++ b/k8s-manifests/secret.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + creationTimestamp: null + name: gh-pat + namespace: ai-stack +data: + gh-pat: Z2hwX3hjRXdyTzJKV1pvd214bFpZMm50YnhRUk13QU5wbTNkaEpZMgo= diff --git a/k8s-manifests/svc-template.yaml b/k8s-manifests/svc-template.yaml new file mode 100644 index 0000000..4c0ed64 --- /dev/null +++ b/k8s-manifests/svc-template.yaml @@ -0,0 +1,44 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + meta.helm.sh/release-name: ai-stack + meta.helm.sh/release-namespace: ai-stack + metallb.universe.tf/ip-allocated-from-pool: ip-pool + creationTimestamp: "2024-07-27T04:34:16Z" + labels: + app.kubernetes.io/instance: ai-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: tgi + app.kubernetes.io/version: 2.2.0 + helm.sh/chart: tgi-0.1.5 + name: ai-stack-tgi + namespace: ai-stack + resourceVersion: "7773298" + uid: d66fb7ac-ea08-48dc-9599-06ebea87e28b +spec: + allocateLoadBalancerNodePorts: true + clusterIP: 10.233.37.109 + clusterIPs: + - 10.233.37.109 + externalTrafficPolicy: Cluster + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http + nodePort: 30175 + port: 80 + protocol: TCP + targetPort: http + selector: + app.kubernetes.io/instance: ai-stack + app.kubernetes.io/name: tgi + sessionAffinity: None + type: LoadBalancer +status: + loadBalancer: + ingress: + - ip: 192.168.0.203 + ipMode: VIP diff --git a/k8s-manifests/svc.yaml b/k8s-manifests/svc.yaml new file mode 100644 index 0000000..d242f8d --- /dev/null +++ b/k8s-manifests/svc.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app: insightful + name: insightful + namespace: ai-stack +spec: + allocateLoadBalancerNodePorts: true + clusterIP: 10.233.39.226 + clusterIPs: + - 10.233.39.226 + externalTrafficPolicy: Cluster + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http + nodePort: 30150 + port: 80 + protocol: TCP + targetPort: 8501 + selector: + app: insightful + sessionAffinity: None + type: LoadBalancer +status: + loadBalancer: + ingress: + - ip: 192.168.0.206 + ipMode: VIP diff --git a/k8s-manifests/tgi.yaml b/k8s-manifests/tgi.yaml new file mode 100644 index 0000000..c46f6cd --- /dev/null +++ b/k8s-manifests/tgi.yaml @@ -0,0 +1,132 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: "9" + meta.helm.sh/release-name: ai-stack + meta.helm.sh/release-namespace: ai-stack + creationTimestamp: "2024-07-27T04:34:16Z" + generation: 9 + labels: + app.kubernetes.io/instance: ai-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: tgi + app.kubernetes.io/version: 2.2.0 + helm.sh/chart: tgi-0.1.5 + name: ai-stack-tgi + namespace: ai-stack + resourceVersion: "10815846" + uid: f826cd3e-8d41-4bb8-83a7-abaa41e37d53 +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/instance: ai-stack + app.kubernetes.io/name: tgi + strategy: + type: Recreate + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: "2024-08-02T16:52:08+05:30" + creationTimestamp: null + labels: + app.kubernetes.io/instance: ai-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: tgi + app.kubernetes.io/version: 2.2.0 + helm.sh/chart: tgi-0.1.5 + spec: + containers: + - command: + - text-generation-launcher + env: + - name: MAX_INPUT_TOKENS + value: "6144" + - name: MAX_TOTAL_TOKENS + value: "8192" + - name: HF_API_TOKEN + valueFrom: + secretKeyRef: + key: HF_API_TOKEN + name: hf-api-token + - name: HF_HUB_OFFLINE + value: "1" + - name: MODEL_ID + value: Qwen/Qwen2-7B-Instruct + image: ghcr.io/huggingface/text-generation-inference:2.2.0 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /health + port: http + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 2 + name: tgi + ports: + - containerPort: 80 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 4 + httpGet: + path: /health + port: http + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 15 + successThreshold: 1 + timeoutSeconds: 2 + resources: + limits: + nvidia.com/gpu: "1" + requests: + nvidia.com/gpu: "1" + securityContext: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /data + name: hf-cache + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/hostname: infracloud03 + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccount: ai-stack-tgi + serviceAccountName: ai-stack-tgi + terminationGracePeriodSeconds: 30 + volumes: + - name: hf-cache + persistentVolumeClaim: + claimName: hf-cache + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +status: + availableReplicas: 1 + conditions: + - lastTransitionTime: "2024-07-27T04:34:16Z" + lastUpdateTime: "2024-08-02T11:22:55Z" + message: ReplicaSet "ai-stack-tgi-554994c8" has successfully progressed. + reason: NewReplicaSetAvailable + status: "True" + type: Progressing + - lastTransitionTime: "2024-08-06T06:11:09Z" + lastUpdateTime: "2024-08-06T06:11:09Z" + message: Deployment has minimum availability. + reason: MinimumReplicasAvailable + status: "True" + type: Available + observedGeneration: 9 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 diff --git a/k8s-manifests/tokens.yaml b/k8s-manifests/tokens.yaml new file mode 100644 index 0000000..cd120c8 --- /dev/null +++ b/k8s-manifests/tokens.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Secret +metadata: + name: tokens + namespace: ai-stack +type: Opaque +data: + hf-token: aGZfclVEVE1tWmhXd1FRUnpFdUdETGNjTWF6UVBlUVl1dm5ERQ== + tavily-token: dHZseS1BeWp3SVdXc05UbEZRTU5CRUFkQmtkZUhsN0ZPWHNDUA== + stack-exchange-token: ejR2MVZ2Z0dkZVRwQkgySWVmclRRZygo \ No newline at end of file