首页 > 其他分享 >OPEA Deployment

OPEA Deployment

时间:2024-09-14 14:53:07浏览次数:10  
标签:kubectl Deployment chatqna TCP opea CHATQNA OPEA PORT

RKE2 deployment K8s

export KUBECONFIG=/etc/rancher/rke2/rke2.yaml
mkdir -p ~/.kube/
sudo cp /etc/rancher/rke2/rke2.yaml  ~/.kube/config
sudo chown $USER:$USER ~/.kube/config
 
# https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
# sudo chown root:root /usr/local/bin/kubectl
source <(kubectl completion bash)
 
IPADDR=$(ip route get 1 | head -n 1 | grep -o 'src\s[.0-9a-z]\+' | awk '{print $2}')
uiport=$(kubectl -n opea-chatqna get svc -l "app.kubernetes.io/name"=chatqna-ui -o jsonpath='{.items[0].spec.ports[0].nodePort}')
echo "login windows and open a browser with http://$IPADDR:$uiport"

check ConfigMap

kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=data-prep
kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=data-prep -o yaml
kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=llm-uservice -o yaml
kubectl -n opea-chatqna  get cm -l app.kubernetes.io/name=retriever-usvc -o yaml
 
kubectl -n opea-chatqna  get cm -l app.kubernetes.io/name=tei -o json | jq .items[0].data
kubectl -n opea-chatqna  get cm -l app.kubernetes.io/name=teirerank  -o json | jq .items[0].data  
# kubectl -n opea-chatqna  get cm -l app.kubernetes.io/name=tgi
 
TOKEN=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
 
label=app.kubernetes.io/name=retriever-usvc
cm=$(kubectl -n opea-chatqna get cm -l $label -o name)
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]'
kubectl -n opea-chatqna get cm -l $label -o yaml
 
 
label=app.kubernetes.io/name=tei
cm=$(kubectl -n opea-chatqna get cm -l $label -o name)
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]'
kubectl -n opea-chatqna get cm -l $label -o yaml
 
label=app.kubernetes.io/name=teirerank
cm=$(kubectl -n opea-chatqna get cm -l $label -o name)
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]'
kubectl -n opea-chatqna get cm -l $label -o yaml
 
DATAP="/root"
MIRROR=https://hf-mirror.com/
label=app.kubernetes.io/name=tgi
# REF: https://github.com/opea-project/GenAIInfra/blob/main/helm-charts/common/tgi/templates/configmap.yaml#L14C18-L14C36
# CM
cm=$(kubectl -n opea-chatqna get cm -l $label -o name)
# Data path, X
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/global.modelUseHostPath","value":"'"$DATAP"'"}]'
# Data path, X
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/modelUseHostPath","value":"'"$DATAP"'"}]'
# Token, X
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]'
# Token, X
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/global.HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]'
# Token
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HF_TOKEN","value":"'"$TOKEN"'"}]'
# Mirror
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HF_ENDPOINT","value":"'"$MIRROR"'"}]'
# Mirror, X
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/global.HF_ENDPOINT","value":"'"$MIRROR"'"}]'
kubectl -n opea-chatqna get cm -l $label -o yaml
 
 
 
kubectl get pods -n opea-chatqna\
    --field-selector="status.containerStatuses.readt!=true" \
    -o custom-columns="POD:metadata.name"
 
 
# not ready pod
kubectl get pod -o jsonpath='{range .items[?(@.status.containerStatuses[*].ready!=true)]}{.metadata.name}{"\n"}{end}' -n opea-chatqna
 
 
kubectl -n opea-chatqna  get pods -l app.kubernetes.io/name=retriever-usvc -o json | jq .items[0].spec.containers[0].envFrom
kubectl -n opea-chatqna  get pods -l app.kubernetes.io/name=tei -o json | jq .items[0].spec.containers[0].envFrom  
kubectl -n opea-chatqna  get pods -l app.kubernetes.io/name=teirerank  -o json | jq .items[0].spec.containers[0].envFrom  
kubectl -n opea-chatqna  get pods -l app.kubernetes.io/name=tgi  -o json | jq .items[0].spec.containers[0].envFrom     
 
 
 
kubectl -n opea-chatqna  delete pods -l app.kubernetes.io/name=retriever-usvc
kubectl -n opea-chatqna  delete pods -l app.kubernetes.io/name=tei
kubectl -n opea-chatqna  delete pods -l app.kubernetes.io/name=teirerank
kubectl -n opea-chatqna  delete pods -l app.kubernetes.io/name=tgi
 
 
kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=retriever-usvc
kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=tei
kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=teirerank
kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=tgi
 
 
 
kubectl -n opea-chatqna logs -l app.kubernetes.io/name=retriever-usvc --tail -1
kubectl -n opea-chatqna logs -l app.kubernetes.io/name=tei --tail -1
kubectl -n opea-chatqna logs -l app.kubernetes.io/name=teirerank --tail -1
kubectl -n opea-chatqna logs -l app.kubernetes.io/name=tgi --tail -1
 
 
kubectl -n opea-chatqna wait --for=condition=ready pod -l app.kubernetes.io/name=tgi --timeout=3m

Port forward

kubectl -n opea-chatqna port-forward $(kubectl -n opea-chatqna get svc -l "app.kubernetes.io/name"=chatqna-ui -o name) 5174
 
kubectl cluster-info
 
clusterip=$(kubectl -n opea-chatqna get svc -l "app.kubernetes.io/name"=chatqna-ui -o jsonpath='{.items[0].spec.clusterIP}')
echo "copy the var to edge node: clusterip=$clusterip"
   
# login edge node
sudo apt install socat -y
  
IPADDR=$(ip route get 1 | head -n 1 | grep -o 'src\s[.0-9a-z]\+' | awk '{print $2}')
echo "login windows and open a browser with http://$IPADDR:8080"
   
clusterip=$clusterip
sudo ufw allow 8080/tcp
socat TCP-LISTEN:8080,fork TCP:${clusterip}:5174
 
 
# login the orchestrator
echo "in private network, we can use X11 forward, please run this command:"
echo "google-chrome --new-window http://$IPADDR:8080/ --user-data-dir=/tmp/chromedpdata --no-first-run"
 
# https://unix.stackexchange.com/questions/10428/simple-way-to-create-a-tunnel-from-one-local-port-to-another
# echo "ssh -g -L $IPADDR:8080:localhost:8000 -f -N user@$IPADDR.com"

 

question

It takes 12 minutes to saw a piece of wood into 4 pieces. How many minutes does it take to saw it into 7 pieces?

install HuggingFace CLI

sudo apt install python3-pip
pip install huggingface_hub
 
# To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
huggingface-cli login
 
HUGGINGFACE_TOKEN=""
 
echo "PATH=`python3 -m site --user-base`:\$PATH" >> ~/.bashrc

index

https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA

architecture

https://github.com/opea-project/GenAIComps

RAG API workflow

REF: https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker/xeon

# 1 get the embedding of input
label='app.kubernetes.io/name=tei'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
input="What is Deep Learning?"
input="What is the revenue of Nike in 2023?"
curl -x "" http://${clusterip}:${port}/embed \
    -X POST \
    -d '{"inputs":"'"$input"'"}' \
    -H 'Content-Type: application/json'
your_embedding=$(curl -x "" http://${clusterip}:${port}/embed \
    -X POST \
    -d '{"inputs":"'"$input"'"}' \
    -H 'Content-Type: application/json' |jq .[0] -c)
 
label='app.kubernetes.io/name=embedding-usvc'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
curl -x "" http://${clusterip}:${port}/v1/embeddings\
  -X POST \
  -d '{"text":"hello"}' \
  -H 'Content-Type: application/json'
 
# 2 get the retriever docs
label='app.kubernetes.io/name=retriever-usvc'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
# export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
echo "clusterip=$clusterip"
echo "port=$port"
# text='Just a test'
text=$input
curl -x "" http://${clusterip}:${port}/v1/retrieval \
  -X POST \
  -d "{\"text\":\"${text}\",\"embedding\":${your_embedding}}" \
  -H 'Content-Type: application/json'
retrieved_docs=$(curl -x "" http://${clusterip}:${port}/v1/retrieval \
  -X POST \
  -d "{\"text\":\"${text}\",\"embedding\":${your_embedding}}" \
  -H 'Content-Type: application/json' | jq -c .retrieved_docs)
 
# 3 reranking the docs
label='app.kubernetes.io/name=reranking-usvc'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
# query="What is Deep Learning?"
query=$input
curl -x "" http://${clusterip}:${port}/v1/reranking\
  -X POST \
  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
  -H 'Content-Type: application/json'
reranking_docs=$(curl -x "" http://${clusterip}:${port}/v1/reranking \
  -X POST \
  -d '{"initial_query":"'"$query"'", "retrieved_docs": '"$retrieved_docs"'}' \
  -H 'Content-Type: application/json' | jq -c .documents[0])
 
# reranking_docs=$(tr -d '"' <<< "${reranking_docs}" |sed 's/\"/ /g')
reranking_docs=$(sed 's/\\"/ /g' <<< "${reranking_docs}")
reranking_docs=$(tr -d '"' <<< "${reranking_docs}")
 
# 4.a llm
label='app.kubernetes.io/name=llm-uservice'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
curl -x "" http://${clusterip}:${port}/v1/completions \
  -X POST \
  -H "Content-Type: application/json" \
  -d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
  
label='app.kubernetes.io/name=chatqna'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
curl -x "" http://${clusterip}:${port}/v1/chatqna -H "Content-Type: application/json" -d '{
     "messages": "What is the revenue of Nike in 2023?"
     }'
# 4.b tgi
label='app.kubernetes.io/name=tgi'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
 
# your question
query=${input}
# inputs template.
inputs="### You are a helpful, respectful and honest assistant to help the user with questions. Please refer to the search results obtained from the local knowledge base. But be careful to not incorporate the information that you think is not relevant to the question. If you don't know the answer to a question, please don't share false information. ### Search results: ${reranking_docs} ### Question: ${query} \n\n### Answer:"
 
curl -x "" http://${clusterip}:${port}/generate \
  -X POST \
  -d '{"inputs":"'"${inputs}"'","parameters":{"max_new_tokens":1024, "do_sample": true}}' \
  -H 'Content-Type: application/json'
 
# 5 data-prep
label='app.kubernetes.io/name=data-prep'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
 
wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/langchain/redis/data/nke-10k-2023.pdf
curl -x "" -X POST "http://${clusterip}:${port}/v1/dataprep" \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./nke-10k-2023.pdf"
 
curl -x "" -X POST "http://${clusterip}:${port}/v1/dataprep/get_file" \
     -H "Content-Type: application/json"
 
curl -x "" -X POST "http://${clusterip}:${port}/v1/dataprep/delete_file" \
     -d '{"file_path": "https://opea.dev.txt"}' \
     -H "Content-Type: application/json"

 

get image info

edge@iot-edge-xr12-5:~/kubeconf$ kubectl -n opea-chatqna get  pod -o json | jq .items[].spec.containers[].image "opea/chatqna:latest" "amr-registry.caas.intel.com/nex-hybrid-ai/chatqna-conversation-ui:itep-build" "opea/dataprep-redis:latest" "opea/embedding-tei:latest" "opea/llm-tgi:latest" "redis/redis-stack:7.2.0-v9" "opea/reranking-tei:latest" "opea/retriever-redis:latest" "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" "ghcr.io/huggingface/text-generation-inference:2.1.0" edge@iot-edge-xr12-5:~/kubeconf$ kubectl -n opea-chatqna get  pod -o json | jq .items[] |less edge@iot-edge-xr12-5:~/kubeconf$ kubectl -n opea-chatqna get  pod -o json | jq .items[].metadata.name "b-21365218-da18-5fad-b4be-0d843c751579-chatqna-686f56fc87-rbnn4" "b-21365218-da18-5fad-b4be-0d843c751579-chatqna-ui-f7b644757rqgb" "b-21365218-da18-5fad-b4be-0d843c751579-data-prep-5cf8f8dc475f9r" "b-21365218-da18-5fad-b4be-0d843c751579-embedding-usvc-85875bjcn" "b-21365218-da18-5fad-b4be-0d843c751579-llm-uservice-6cf647w8rrz" "b-21365218-da18-5fad-b4be-0d843c751579-redis-vector-db-7b4frbxj" "b-21365218-da18-5fad-b4be-0d843c751579-reranking-usvc-58f87zbrw" "b-21365218-da18-5fad-b4be-0d843c751579-retriever-usvc-557dv4v5r" "b-21365218-da18-5fad-b4be-0d843c751579-tei-6bc85bc8db-7j9vf" "b-21365218-da18-5fad-b4be-0d843c751579-teirerank-8cb97f9b-pbl45" "b-21365218-da18-5fad-b4be-0d843c751579-tgi-75b687bdd8-fz97w"

 

The ENV variable can works well from OPEA team

kubectl exec chatqna-tgi-cfd44f9c4-szhqt -- env
PATH=/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
HOSTNAME=chatqna-tgi-cfd44f9c4-szhqt
NUMBA_CACHE_DIR=/tmp
TRANSFORMERS_CACHE=/tmp/transformers_cache
no_proxy=bj.intel.com,.bj.intel.com,10.0.0.0/8,192.168.0.0/16,localhost,127.0.0.0/8
CUDA_GRAPHS=0
HF_HOME=/tmp/.cache/huggingface
HF_TOKEN=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
HF_ENDPOINT=
https://hf-mirror.com
http_proxy=
http://child-prc.intel.com:913
PORT=2080
HABANA_LOGS=/tmp/habana_logs
https_proxy=
http://child-prc.intel.com:913
MODEL_ID=Intel/neural-chat-7b-v3-3
CHATQNA_TEIRERANK_SERVICE_HOST=10.97.193.140
CHATQNA_TEIRERANK_PORT=tcp://10.97.193.140:80
CHATQNA_TEIRERANK_PORT_80_TCP_PORT=80
CHATQNA_PORT_8888_TCP_PROTO=tcp
KUBERNETES_SERVICE_PORT_HTTPS=443
CHATQNA_LLM_USERVICE_SERVICE_HOST=10.110.202.82
CHATQNA_RERANKING_USVC_SERVICE_HOST=10.106.114.139
CHATQNA_RERANKING_USVC_PORT_8000_TCP=tcp://10.106.114.139:8000
CHATQNA_TGI_PORT_80_TCP=tcp://10.106.73.168:80
CHATQNA_TGI_SERVICE_PORT=80
CHATQNA_DATA_PREP_PORT=tcp://10.98.141.218:6007
CHATQNA_TEI_PORT_80_TCP_PORT=80
CHATQNA_SERVICE_PORT=8888
CHATQNA_PORT_8888_TCP_PORT=8888
CHATQNA_PORT_8888_TCP_ADDR=10.96.134.150
CHATQNA_TEI_SERVICE_HOST=10.97.141.126
CHATQNA_RETRIEVER_USVC_SERVICE_HOST=10.98.19.124
CHATQNA_RETRIEVER_USVC_PORT_7000_TCP_ADDR=10.98.19.124
CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP_ADDR=10.103.0.129
CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP_PORT=8001
CHATQNA_TEIRERANK_PORT_80_TCP=tcp://10.97.193.140:80
CHATQNA_TGI_PORT_80_TCP_PROTO=tcp
CHATQNA_SERVICE_PORT_CHATQNA=8888
KUBERNETES_SERVICE_HOST=10.96.0.1
KUBERNETES_PORT=tcp://10.96.0.1:443
CHATQNA_LLM_USERVICE_PORT_9000_TCP_ADDR=10.110.202.82
CHATQNA_REDIS_VECTOR_DB_SERVICE_PORT_REDIS_INSIGHT=8001
CHATQNA_RETRIEVER_USVC_SERVICE_PORT=7000
CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP_ADDR=10.103.0.129
CHATQNA_TEIRERANK_PORT_80_TCP_PROTO=tcp
CHATQNA_PORT_8888_TCP=tcp://10.96.134.150:8888
CHATQNA_EMBEDDING_USVC_PORT=tcp://10.100.45.231:6000
CHATQNA_EMBEDDING_USVC_PORT_6000_TCP_PROTO=tcp
CHATQNA_LLM_USERVICE_PORT_9000_TCP_PORT=9000
CHATQNA_RERANKING_USVC_PORT_8000_TCP_PROTO=tcp
CHATQNA_EMBEDDING_USVC_SERVICE_PORT_EMBEDDING_USVC=6000
CHATQNA_RETRIEVER_USVC_PORT_7000_TCP_PROTO=tcp
CHATQNA_RERANKING_USVC_PORT_8000_TCP_PORT=8000
CHATQNA_TEIRERANK_SERVICE_PORT_TEIRERANK=80
CHATQNA_LLM_USERVICE_PORT=tcp://10.110.202.82:9000
CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP_PROTO=tcp
CHATQNA_DATA_PREP_SERVICE_PORT=6007
CHATQNA_DATA_PREP_PORT_6007_TCP_ADDR=10.98.141.218
CHATQNA_PORT=tcp://10.96.134.150:8888
CHATQNA_EMBEDDING_USVC_SERVICE_HOST=10.100.45.231
CHATQNA_EMBEDDING_USVC_PORT_6000_TCP_ADDR=10.100.45.231
CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP=tcp://10.103.0.129:6379
CHATQNA_TGI_SERVICE_HOST=10.106.73.168
CHATQNA_TEI_SERVICE_PORT=80
CHATQNA_TEI_PORT_80_TCP_ADDR=10.97.141.126
CHATQNA_SERVICE_HOST=10.96.134.150
KUBERNETES_PORT_443_TCP_PROTO=tcp
CHATQNA_RERANKING_USVC_SERVICE_PORT=8000
CHATQNA_DATA_PREP_PORT_6007_TCP=tcp://10.98.141.218:6007
CHATQNA_EMBEDDING_USVC_PORT_6000_TCP=tcp://10.100.45.231:6000
CHATQNA_REDIS_VECTOR_DB_SERVICE_PORT=6379
CHATQNA_DATA_PREP_PORT_6007_TCP_PORT=6007
CHATQNA_LLM_USERVICE_PORT_9000_TCP=tcp://10.110.202.82:9000
CHATQNA_LLM_USERVICE_PORT_9000_TCP_PROTO=tcp
CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP_PROTO=tcp
CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP_PORT=6379
CHATQNA_DATA_PREP_SERVICE_PORT_DATA_PREP=6007
CHATQNA_TEI_PORT_80_TCP=tcp://10.97.141.126:80
CHATQNA_TEI_PORT_80_TCP_PROTO=tcp
KUBERNETES_PORT_443_TCP_ADDR=10.96.0.1
CHATQNA_RERANKING_USVC_PORT_8000_TCP_ADDR=10.106.114.139
CHATQNA_DATA_PREP_SERVICE_HOST=10.98.141.218
CHATQNA_TEI_SERVICE_PORT_TEI=80
CHATQNA_REDIS_VECTOR_DB_SERVICE_HOST=10.103.0.129
CHATQNA_TGI_PORT_80_TCP_PORT=80
CHATQNA_TGI_PORT_80_TCP_ADDR=10.106.73.168
CHATQNA_DATA_PREP_PORT_6007_TCP_PROTO=tcp
CHATQNA_EMBEDDING_USVC_PORT_6000_TCP_PORT=6000
CHATQNA_LLM_USERVICE_SERVICE_PORT=9000
CHATQNA_RERANKING_USVC_PORT=tcp://10.106.114.139:8000
CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP=tcp://10.103.0.129:8001
CHATQNA_RETRIEVER_USVC_PORT=tcp://10.98.19.124:7000
CHATQNA_RETRIEVER_USVC_PORT_7000_TCP=tcp://10.98.19.124:7000
CHATQNA_RERANKING_USVC_SERVICE_PORT_RERANKING_USVC=8000
KUBERNETES_SERVICE_PORT=443
KUBERNETES_PORT_443_TCP=tcp://10.96.0.1:443
KUBERNETES_PORT_443_TCP_PORT=443
CHATQNA_LLM_USERVICE_SERVICE_PORT_LLM_USERVICE=9000
CHATQNA_RETRIEVER_USVC_SERVICE_PORT_RETRIEVER_USVC=7000
CHATQNA_REDIS_VECTOR_DB_SERVICE_PORT_REDIS_SERVICE=6379
CHATQNA_TEIRERANK_SERVICE_PORT=80
CHATQNA_TGI_SERVICE_PORT_TGI=80
CHATQNA_TGI_PORT=tcp://10.106.73.168:80
CHATQNA_TEI_PORT=tcp://10.97.141.126:80
CHATQNA_EMBEDDING_USVC_SERVICE_PORT=6000
CHATQNA_RETRIEVER_USVC_PORT_7000_TCP_PORT=7000
CHATQNA_REDIS_VECTOR_DB_PORT=tcp://10.103.0.129:6379
CHATQNA_TEIRERANK_PORT_80_TCP_ADDR=10.97.193.140
NVARCH=x86_64
NVIDIA_REQUIRE_CUDA=cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526
NV_CUDA_CUDART_VERSION=12.1.55-1
NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-1
CUDA_VERSION=12.1.0
LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
NVIDIA_VISIBLE_DEVICES=all
NVIDIA_DRIVER_CAPABILITIES=compute,utility
CONDA_PREFIX=/opt/conda
HUGGINGFACE_HUB_CACHE=/data
HF_HUB_ENABLE_HF_TRANSFER=1
LD_PRELOAD=/opt/conda/lib/python3.10/site-packages/nvidia/nccl/lib/libnccl.so.2
HOME=/root

TGI input example 

{ "inputs": "### You are a helpful, respectful and honest assistant to help the user with questions. Please refer to the search results obtained from the local knowledge base. But be careful to not incorporate the information that you think is not relevant to the question. If you don't know the answer to a question, please don't share false information. ### Search results:
  discounts, largely due to strategic pricing actions and product mix.\n• Selling and administrative expense increased 15% due to higher operating overhead and demand creation expense. The increase in operating overhead expense\nwas primarily due to higher wage-related costs and higher NIKE Direct variable costs, in part due to new store additions.  ……. (~ 1500 characters)
  ### Question: What is the revenue of Nike in last 10 years before 2023? Give me detail \n\n### Answer:",
  "parameters": {
    "details": false,
    "do_sample": false,
    "max_new_tokens": 1024,
    "repetition_penalty": 1.03,
    "return_full_text": false,
    "stop": [],
    "temperature": 0.01,
    "top_k": 10,
    "top_p": 0.95
  },
  "stream": true}

download file

https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/onnx/model.onnx in tei

https://huggingface.co/BAAI/bge-reranker-base/resolve/main/tokenizer.json in teirerank

/api/models/Intel/neural-chat-7b-v3-3 tgi

 

REF:

https://github.com/intel-innersource/frameworks.edge.one-intel-edge.maestro-app.application-catalog-examples/tree/main/opea/chatqna

 

标签:kubectl,Deployment,chatqna,TCP,opea,CHATQNA,OPEA,PORT
From: https://www.cnblogs.com/shaohef/p/18413965

相关文章

  • 使用kube-prometheus部署k8s监控---超详细(Kubernetes Deployment of Kubernetes Moni
    ......
  • ReplicaSet、Deployment功能是怎么实现的?
    在Kubernetes中,ReplicaSet和Deployment是用于管理Pod副本的两种重要对象。它们各自的功能和实现方式如下:ReplicaSet功能确保副本数量:ReplicaSet的主要功能是确保指定数量的Pod副本始终在运行。如果某个Pod意外崩溃或被删除,ReplicaSet会自动创建新的Pod来替代它......
  • deployment和statefulset区别
    在Kubernetes中,Deployment和StatefulSet都是用于管理Pod的控制器,但它们适用于不同的场景和需求。以下是二者的主要区别:1.用例Deployment:主要用于无状态应用(statelessapplications),如Web服务器、API服务等。适合需要快速扩展和更新的场景。StatefulSet:主要用于有状态......
  • Why I‘m getting 404 Resource Not Found to my newly Azure OpenAI deployment?
    题意:为什么我新部署的AzureOpenAI服务会出现404资源未找到的错误?问题背景:I'vegonethroughthis quickstart andIcreatedmyAzureOpenAIresource+createdamodeldeploymentwhichisinstatesucceedded.Ialsoplayarounditin AzureOpenAIStudio-Mi......
  • D20 kubernetes 工作负载资源对象-Deployment
    1、Deployment简介 Deployment是kubernetes中最常用的工作负载资源,具有以下特点和功能-副本管理:确保指定数量的pod副本在集群中运行。如果pod副本数小于期望值,则会自动创建pod;如果pod的副本数多余期望值,则删除多余的pod-滚动更新:采用滚动更新策略,逐步进行新旧版本pod的替换......
  • 2019-2020 ICPC Northwestern European Regional Programming Contest (NWERC 2019):GH
    前言目前打过的最逆天的一场,前半场CF评测机度假去了,全场Inqueue,两小时左右评测机终于回来了,Standings遍地开花,听取WA声一片。昨天就有好几道题是因为没及时看题所以没做,赛后还和队友商量说应该先把所有题大致看一遍,结果今天不长记性,还没看H和J,就去写思路不一定对+实现起来难得......
  • Deployment
    一、Deployment介绍Kubernetes的Deployment是一种高层次的API对象,用于声明式地管理Pod的副本、升级、回滚等操作。它为应用的部署和维护提供了强大的功能。Deployment是Replicat的一个升级版本,具备动态升级和回滚功能。Deployment是kubernetes中最常用的资源对象,为Repli......
  • 在K8S中,Deployment⽀持扩容吗?它与HPA有什么区别?
    在Kubernetes(K8S)中,Deployment支持扩容,并且是用于管理应用的推荐方式之一。Deployment提供了一种声明式的方式来更新应用实例,并支持滚动更新、回滚等功能。下面详细介绍Deployment的扩容功能及其与HorizontalPodAutoscaler(HPA)的区别。1.Deployment的扩容功能在......
  • deployment.yaml 文件解读
     apiVersion:apps/v1kind:DeploymentapiVersion:资源的版本,这里使用的是apps/v1,表示这是一个Deployment的最新版本。kind:资源类型,这里是Deployment,表示这是一个Deployment资源。metadata:name:project_name-project_envnamespace:dtmtask-project_en......
  • 在K8S中,Deployment的升级过程是什么?
    在Kubernetes中,Deployment提供了一种非常强大的方式来更新应用,同时保持应用的可用性。以下是使用Deployment进行应用升级的过程,包括滚动更新、回滚、暂停和恢复更新等操作。1.升级过程准备工作:确保你有一个现有的Deployment。准备新的容器镜像或者其他需要更改的配置......