RKE2 deployment K8s
export KUBECONFIG=/etc/rancher/rke2/rke2.yaml mkdir -p ~/.kube/ sudo cp /etc/rancher/rke2/rke2.yaml ~/.kube/config sudo chown $USER:$USER ~/.kube/config # https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/ curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl # sudo chown root:root /usr/local/bin/kubectl source <(kubectl completion bash) IPADDR=$(ip route get 1 | head -n 1 | grep -o 'src\s[.0-9a-z]\+' | awk '{print $2}') uiport=$(kubectl -n opea-chatqna get svc -l "app.kubernetes.io/name"=chatqna-ui -o jsonpath='{.items[0].spec.ports[0].nodePort}') echo "login windows and open a browser with http://$IPADDR:$uiport"
check ConfigMap
kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=data-prep kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=data-prep -o yaml kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=llm-uservice -o yaml kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=retriever-usvc -o yaml kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=tei -o json | jq .items[0].data kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=teirerank -o json | jq .items[0].data # kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=tgi TOKEN=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 label=app.kubernetes.io/name=retriever-usvc cm=$(kubectl -n opea-chatqna get cm -l $label -o name) kubectl -n opea-chatqna patch $cm --type='json' -p '[ {"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]' kubectl -n opea-chatqna get cm -l $label -o yaml label=app.kubernetes.io/name=tei cm=$(kubectl -n opea-chatqna get cm -l $label -o name) kubectl -n opea-chatqna patch $cm --type='json' -p '[ {"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]' kubectl -n opea-chatqna get cm -l $label -o yaml label=app.kubernetes.io/name=teirerank cm=$(kubectl -n opea-chatqna get cm -l $label -o name) kubectl -n opea-chatqna patch $cm --type='json' -p '[ {"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]' kubectl -n opea-chatqna get cm -l $label -o yaml DATAP="/root" MIRROR=https://hf-mirror.com/ label=app.kubernetes.io/name=tgi # REF: https://github.com/opea-project/GenAIInfra/blob/main/helm-charts/common/tgi/templates/configmap.yaml#L14C18-L14C36 # CM cm=$(kubectl -n opea-chatqna get cm -l $label -o name) # Data path, X kubectl -n opea-chatqna patch $cm --type='json' -p '[ {"op":"add","path":"/data/global.modelUseHostPath","value":"'"$DATAP"'"}]' # Data path, X kubectl -n opea-chatqna patch $cm --type='json' -p '[ {"op":"add","path":"/data/modelUseHostPath","value":"'"$DATAP"'"}]' # Token, X kubectl -n opea-chatqna patch $cm --type='json' -p '[ {"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]' # Token, X kubectl -n opea-chatqna patch $cm --type='json' -p '[ {"op":"add","path":"/data/global.HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]' # Token kubectl -n opea-chatqna patch $cm --type='json' -p '[ {"op":"add","path":"/data/HF_TOKEN","value":"'"$TOKEN"'"}]' # Mirror kubectl -n opea-chatqna patch $cm --type='json' -p '[ {"op":"add","path":"/data/HF_ENDPOINT","value":"'"$MIRROR"'"}]' # Mirror, X kubectl -n opea-chatqna patch $cm --type='json' -p '[ {"op":"add","path":"/data/global.HF_ENDPOINT","value":"'"$MIRROR"'"}]' kubectl -n opea-chatqna get cm -l $label -o yaml kubectl get pods -n opea-chatqna\ --field-selector="status.containerStatuses.readt!=true" \ -o custom-columns="POD:metadata.name" # not ready pod kubectl get pod -o jsonpath='{range .items[?(@.status.containerStatuses[*].ready!=true)]}{.metadata.name}{"\n"}{end}' -n opea-chatqna kubectl -n opea-chatqna get pods -l app.kubernetes.io/name=retriever-usvc -o json | jq .items[0].spec.containers[0].envFrom kubectl -n opea-chatqna get pods -l app.kubernetes.io/name=tei -o json | jq .items[0].spec.containers[0].envFrom kubectl -n opea-chatqna get pods -l app.kubernetes.io/name=teirerank -o json | jq .items[0].spec.containers[0].envFrom kubectl -n opea-chatqna get pods -l app.kubernetes.io/name=tgi -o json | jq .items[0].spec.containers[0].envFrom kubectl -n opea-chatqna delete pods -l app.kubernetes.io/name=retriever-usvc kubectl -n opea-chatqna delete pods -l app.kubernetes.io/name=tei kubectl -n opea-chatqna delete pods -l app.kubernetes.io/name=teirerank kubectl -n opea-chatqna delete pods -l app.kubernetes.io/name=tgi kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=retriever-usvc kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=tei kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=teirerank kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=tgi kubectl -n opea-chatqna logs -l app.kubernetes.io/name=retriever-usvc --tail -1 kubectl -n opea-chatqna logs -l app.kubernetes.io/name=tei --tail -1 kubectl -n opea-chatqna logs -l app.kubernetes.io/name=teirerank --tail -1 kubectl -n opea-chatqna logs -l app.kubernetes.io/name=tgi --tail -1 kubectl -n opea-chatqna wait --for=condition=ready pod -l app.kubernetes.io/name=tgi --timeout=3m
Port forward
kubectl -n opea-chatqna port-forward $(kubectl -n opea-chatqna get svc -l "app.kubernetes.io/name"=chatqna-ui -o name) 5174 kubectl cluster-info clusterip=$(kubectl -n opea-chatqna get svc -l "app.kubernetes.io/name"=chatqna-ui -o jsonpath='{.items[0].spec.clusterIP}') echo "copy the var to edge node: clusterip=$clusterip" # login edge node sudo apt install socat -y IPADDR=$(ip route get 1 | head -n 1 | grep -o 'src\s[.0-9a-z]\+' | awk '{print $2}') echo "login windows and open a browser with http://$IPADDR:8080" clusterip=$clusterip sudo ufw allow 8080/tcp socat TCP-LISTEN:8080,fork TCP:${clusterip}:5174 # login the orchestrator echo "in private network, we can use X11 forward, please run this command:" echo "google-chrome --new-window http://$IPADDR:8080/ --user-data-dir=/tmp/chromedpdata --no-first-run" # https://unix.stackexchange.com/questions/10428/simple-way-to-create-a-tunnel-from-one-local-port-to-another # echo "ssh -g -L $IPADDR:8080:localhost:8000 -f -N user@$IPADDR.com"
question
It takes 12 minutes to saw a piece of wood into 4 pieces. How many minutes does it take to saw it into 7 pieces?
install HuggingFace CLI
sudo apt install python3-pip pip install huggingface_hub # To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens . huggingface-cli login HUGGINGFACE_TOKEN="" echo "PATH=`python3 -m site --user-base`:\$PATH" >> ~/.bashrc
index
https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA
https://github.com/opea-project/GenAIComps
RAG API workflow
REF: https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker/xeon
# 1 get the embedding of input label='app.kubernetes.io/name=tei' clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}') port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}') echo "clusterip=$clusterip" echo "port=$port" input="What is Deep Learning?" input="What is the revenue of Nike in 2023?" curl -x "" http://${clusterip}:${port}/embed \ -X POST \ -d '{"inputs":"'"$input"'"}' \ -H 'Content-Type: application/json' your_embedding=$(curl -x "" http://${clusterip}:${port}/embed \ -X POST \ -d '{"inputs":"'"$input"'"}' \ -H 'Content-Type: application/json' |jq .[0] -c) label='app.kubernetes.io/name=embedding-usvc' clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}') port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}') echo "clusterip=$clusterip" echo "port=$port" curl -x "" http://${clusterip}:${port}/v1/embeddings\ -X POST \ -d '{"text":"hello"}' \ -H 'Content-Type: application/json' # 2 get the retriever docs label='app.kubernetes.io/name=retriever-usvc' clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}') port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}') # export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") echo "clusterip=$clusterip" echo "port=$port" # text='Just a test' text=$input curl -x "" http://${clusterip}:${port}/v1/retrieval \ -X POST \ -d "{\"text\":\"${text}\",\"embedding\":${your_embedding}}" \ -H 'Content-Type: application/json' retrieved_docs=$(curl -x "" http://${clusterip}:${port}/v1/retrieval \ -X POST \ -d "{\"text\":\"${text}\",\"embedding\":${your_embedding}}" \ -H 'Content-Type: application/json' | jq -c .retrieved_docs) # 3 reranking the docs label='app.kubernetes.io/name=reranking-usvc' clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}') port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}') echo "clusterip=$clusterip" echo "port=$port" # query="What is Deep Learning?" query=$input curl -x "" http://${clusterip}:${port}/v1/reranking\ -X POST \ -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ -H 'Content-Type: application/json' reranking_docs=$(curl -x "" http://${clusterip}:${port}/v1/reranking \ -X POST \ -d '{"initial_query":"'"$query"'", "retrieved_docs": '"$retrieved_docs"'}' \ -H 'Content-Type: application/json' | jq -c .documents[0]) # reranking_docs=$(tr -d '"' <<< "${reranking_docs}" |sed 's/\"/ /g') reranking_docs=$(sed 's/\\"/ /g' <<< "${reranking_docs}") reranking_docs=$(tr -d '"' <<< "${reranking_docs}") # 4.a llm label='app.kubernetes.io/name=llm-uservice' clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}') port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}') echo "clusterip=$clusterip" echo "port=$port" curl -x "" http://${clusterip}:${port}/v1/completions \ -X POST \ -H "Content-Type: application/json" \ -d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}' label='app.kubernetes.io/name=chatqna' clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}') port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}') echo "clusterip=$clusterip" echo "port=$port" curl -x "" http://${clusterip}:${port}/v1/chatqna -H "Content-Type: application/json" -d '{ "messages": "What is the revenue of Nike in 2023?" }' # 4.b tgi label='app.kubernetes.io/name=tgi' clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}') port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}') echo "clusterip=$clusterip" echo "port=$port" # your question query=${input} # inputs template. inputs="### You are a helpful, respectful and honest assistant to help the user with questions. Please refer to the search results obtained from the local knowledge base. But be careful to not incorporate the information that you think is not relevant to the question. If you don't know the answer to a question, please don't share false information. ### Search results: ${reranking_docs} ### Question: ${query} \n\n### Answer:" curl -x "" http://${clusterip}:${port}/generate \ -X POST \ -d '{"inputs":"'"${inputs}"'","parameters":{"max_new_tokens":1024, "do_sample": true}}' \ -H 'Content-Type: application/json' # 5 data-prep label='app.kubernetes.io/name=data-prep' clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}') port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}') echo "clusterip=$clusterip" echo "port=$port" wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/langchain/redis/data/nke-10k-2023.pdf curl -x "" -X POST "http://${clusterip}:${port}/v1/dataprep" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" curl -x "" -X POST "http://${clusterip}:${port}/v1/dataprep/get_file" \ -H "Content-Type: application/json" curl -x "" -X POST "http://${clusterip}:${port}/v1/dataprep/delete_file" \ -d '{"file_path": "https://opea.dev.txt"}' \ -H "Content-Type: application/json"
get image info
edge@iot-edge-xr12-5:~ /kubeconf $ kubectl -n opea-chatqna get pod -o json | jq .items[].spec.containers[].image
"opea/chatqna:latest"
"amr-registry.caas.intel.com/nex-hybrid-ai/chatqna-conversation-ui:itep-build"
"opea/dataprep-redis:latest"
"opea/embedding-tei:latest"
"opea/llm-tgi:latest"
"redis/redis-stack:7.2.0-v9"
"opea/reranking-tei:latest"
"opea/retriever-redis:latest"
"ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
"ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
"ghcr.io/huggingface/text-generation-inference:2.1.0"
edge@iot-edge-xr12-5:~ /kubeconf $ kubectl -n opea-chatqna get pod -o json | jq .items[] | less
edge@iot-edge-xr12-5:~ /kubeconf $ kubectl -n opea-chatqna get pod -o json | jq .items[].metadata.name
"b-21365218-da18-5fad-b4be-0d843c751579-chatqna-686f56fc87-rbnn4"
"b-21365218-da18-5fad-b4be-0d843c751579-chatqna-ui-f7b644757rqgb"
"b-21365218-da18-5fad-b4be-0d843c751579-data-prep-5cf8f8dc475f9r"
"b-21365218-da18-5fad-b4be-0d843c751579-embedding-usvc-85875bjcn"
"b-21365218-da18-5fad-b4be-0d843c751579-llm-uservice-6cf647w8rrz"
"b-21365218-da18-5fad-b4be-0d843c751579-redis-vector-db-7b4frbxj"
"b-21365218-da18-5fad-b4be-0d843c751579-reranking-usvc-58f87zbrw"
"b-21365218-da18-5fad-b4be-0d843c751579-retriever-usvc-557dv4v5r"
"b-21365218-da18-5fad-b4be-0d843c751579-tei-6bc85bc8db-7j9vf"
"b-21365218-da18-5fad-b4be-0d843c751579-teirerank-8cb97f9b-pbl45"
"b-21365218-da18-5fad-b4be-0d843c751579-tgi-75b687bdd8-fz97w"
|
The ENV variable can works well from OPEA team
kubectl exec chatqna-tgi-cfd44f9c4-szhqt -- env PATH=/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin HOSTNAME=chatqna-tgi-cfd44f9c4-szhqt NUMBA_CACHE_DIR=/tmp TRANSFORMERS_CACHE=/tmp/transformers_cache no_proxy=bj.intel.com,.bj.intel.com,10.0.0.0/8,192.168.0.0/16,localhost,127.0.0.0/8 CUDA_GRAPHS=0 HF_HOME=/tmp/.cache/huggingface HF_TOKEN=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 HF_ENDPOINT= https://hf-mirror.com http_proxy= http://child-prc.intel.com:913 PORT=2080 HABANA_LOGS=/tmp/habana_logs https_proxy= http://child-prc.intel.com:913 MODEL_ID=Intel/neural-chat-7b-v3-3 CHATQNA_TEIRERANK_SERVICE_HOST=10.97.193.140 CHATQNA_TEIRERANK_PORT=tcp://10.97.193.140:80 CHATQNA_TEIRERANK_PORT_80_TCP_PORT=80 CHATQNA_PORT_8888_TCP_PROTO=tcp KUBERNETES_SERVICE_PORT_HTTPS=443 CHATQNA_LLM_USERVICE_SERVICE_HOST=10.110.202.82 CHATQNA_RERANKING_USVC_SERVICE_HOST=10.106.114.139 CHATQNA_RERANKING_USVC_PORT_8000_TCP=tcp://10.106.114.139:8000 CHATQNA_TGI_PORT_80_TCP=tcp://10.106.73.168:80 CHATQNA_TGI_SERVICE_PORT=80 CHATQNA_DATA_PREP_PORT=tcp://10.98.141.218:6007 CHATQNA_TEI_PORT_80_TCP_PORT=80 CHATQNA_SERVICE_PORT=8888 CHATQNA_PORT_8888_TCP_PORT=8888 CHATQNA_PORT_8888_TCP_ADDR=10.96.134.150 CHATQNA_TEI_SERVICE_HOST=10.97.141.126 CHATQNA_RETRIEVER_USVC_SERVICE_HOST=10.98.19.124 CHATQNA_RETRIEVER_USVC_PORT_7000_TCP_ADDR=10.98.19.124 CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP_ADDR=10.103.0.129 CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP_PORT=8001 CHATQNA_TEIRERANK_PORT_80_TCP=tcp://10.97.193.140:80 CHATQNA_TGI_PORT_80_TCP_PROTO=tcp CHATQNA_SERVICE_PORT_CHATQNA=8888 KUBERNETES_SERVICE_HOST=10.96.0.1 KUBERNETES_PORT=tcp://10.96.0.1:443 CHATQNA_LLM_USERVICE_PORT_9000_TCP_ADDR=10.110.202.82 CHATQNA_REDIS_VECTOR_DB_SERVICE_PORT_REDIS_INSIGHT=8001 CHATQNA_RETRIEVER_USVC_SERVICE_PORT=7000 CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP_ADDR=10.103.0.129 CHATQNA_TEIRERANK_PORT_80_TCP_PROTO=tcp CHATQNA_PORT_8888_TCP=tcp://10.96.134.150:8888 CHATQNA_EMBEDDING_USVC_PORT=tcp://10.100.45.231:6000 CHATQNA_EMBEDDING_USVC_PORT_6000_TCP_PROTO=tcp CHATQNA_LLM_USERVICE_PORT_9000_TCP_PORT=9000 CHATQNA_RERANKING_USVC_PORT_8000_TCP_PROTO=tcp CHATQNA_EMBEDDING_USVC_SERVICE_PORT_EMBEDDING_USVC=6000 CHATQNA_RETRIEVER_USVC_PORT_7000_TCP_PROTO=tcp CHATQNA_RERANKING_USVC_PORT_8000_TCP_PORT=8000 CHATQNA_TEIRERANK_SERVICE_PORT_TEIRERANK=80 CHATQNA_LLM_USERVICE_PORT=tcp://10.110.202.82:9000 CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP_PROTO=tcp CHATQNA_DATA_PREP_SERVICE_PORT=6007 CHATQNA_DATA_PREP_PORT_6007_TCP_ADDR=10.98.141.218 CHATQNA_PORT=tcp://10.96.134.150:8888 CHATQNA_EMBEDDING_USVC_SERVICE_HOST=10.100.45.231 CHATQNA_EMBEDDING_USVC_PORT_6000_TCP_ADDR=10.100.45.231 CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP=tcp://10.103.0.129:6379 CHATQNA_TGI_SERVICE_HOST=10.106.73.168 CHATQNA_TEI_SERVICE_PORT=80 CHATQNA_TEI_PORT_80_TCP_ADDR=10.97.141.126 CHATQNA_SERVICE_HOST=10.96.134.150 KUBERNETES_PORT_443_TCP_PROTO=tcp CHATQNA_RERANKING_USVC_SERVICE_PORT=8000 CHATQNA_DATA_PREP_PORT_6007_TCP=tcp://10.98.141.218:6007 CHATQNA_EMBEDDING_USVC_PORT_6000_TCP=tcp://10.100.45.231:6000 CHATQNA_REDIS_VECTOR_DB_SERVICE_PORT=6379 CHATQNA_DATA_PREP_PORT_6007_TCP_PORT=6007 CHATQNA_LLM_USERVICE_PORT_9000_TCP=tcp://10.110.202.82:9000 CHATQNA_LLM_USERVICE_PORT_9000_TCP_PROTO=tcp CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP_PROTO=tcp CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP_PORT=6379 CHATQNA_DATA_PREP_SERVICE_PORT_DATA_PREP=6007 CHATQNA_TEI_PORT_80_TCP=tcp://10.97.141.126:80 CHATQNA_TEI_PORT_80_TCP_PROTO=tcp KUBERNETES_PORT_443_TCP_ADDR=10.96.0.1 CHATQNA_RERANKING_USVC_PORT_8000_TCP_ADDR=10.106.114.139 CHATQNA_DATA_PREP_SERVICE_HOST=10.98.141.218 CHATQNA_TEI_SERVICE_PORT_TEI=80 CHATQNA_REDIS_VECTOR_DB_SERVICE_HOST=10.103.0.129 CHATQNA_TGI_PORT_80_TCP_PORT=80 CHATQNA_TGI_PORT_80_TCP_ADDR=10.106.73.168 CHATQNA_DATA_PREP_PORT_6007_TCP_PROTO=tcp CHATQNA_EMBEDDING_USVC_PORT_6000_TCP_PORT=6000 CHATQNA_LLM_USERVICE_SERVICE_PORT=9000 CHATQNA_RERANKING_USVC_PORT=tcp://10.106.114.139:8000 CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP=tcp://10.103.0.129:8001 CHATQNA_RETRIEVER_USVC_PORT=tcp://10.98.19.124:7000 CHATQNA_RETRIEVER_USVC_PORT_7000_TCP=tcp://10.98.19.124:7000 CHATQNA_RERANKING_USVC_SERVICE_PORT_RERANKING_USVC=8000 KUBERNETES_SERVICE_PORT=443 KUBERNETES_PORT_443_TCP=tcp://10.96.0.1:443 KUBERNETES_PORT_443_TCP_PORT=443 CHATQNA_LLM_USERVICE_SERVICE_PORT_LLM_USERVICE=9000 CHATQNA_RETRIEVER_USVC_SERVICE_PORT_RETRIEVER_USVC=7000 CHATQNA_REDIS_VECTOR_DB_SERVICE_PORT_REDIS_SERVICE=6379 CHATQNA_TEIRERANK_SERVICE_PORT=80 CHATQNA_TGI_SERVICE_PORT_TGI=80 CHATQNA_TGI_PORT=tcp://10.106.73.168:80 CHATQNA_TEI_PORT=tcp://10.97.141.126:80 CHATQNA_EMBEDDING_USVC_SERVICE_PORT=6000 CHATQNA_RETRIEVER_USVC_PORT_7000_TCP_PORT=7000 CHATQNA_REDIS_VECTOR_DB_PORT=tcp://10.103.0.129:6379 CHATQNA_TEIRERANK_PORT_80_TCP_ADDR=10.97.193.140 NVARCH=x86_64 NVIDIA_REQUIRE_CUDA=cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 NV_CUDA_CUDART_VERSION=12.1.55-1 NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-1 CUDA_VERSION=12.1.0 LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 NVIDIA_VISIBLE_DEVICES=all NVIDIA_DRIVER_CAPABILITIES=compute,utility CONDA_PREFIX=/opt/conda HUGGINGFACE_HUB_CACHE=/data HF_HUB_ENABLE_HF_TRANSFER=1 LD_PRELOAD=/opt/conda/lib/python3.10/site-packages/nvidia/nccl/lib/libnccl.so.2 HOME=/root
TGI input example
{ "inputs": "### You are a helpful, respectful and honest assistant to help the user with questions. Please refer to the search results obtained from the local knowledge base. But be careful to not incorporate the information that you think is not relevant to the question. If you don't know the answer to a question, please don't share false information. ### Search results: discounts, largely due to strategic pricing actions and product mix.\n• Selling and administrative expense increased 15% due to higher operating overhead and demand creation expense. The increase in operating overhead expense\nwas primarily due to higher wage-related costs and higher NIKE Direct variable costs, in part due to new store additions. ……. (~ 1500 characters) ### Question: What is the revenue of Nike in last 10 years before 2023? Give me detail \n\n### Answer:", "parameters": { "details": false, "do_sample": false, "max_new_tokens": 1024, "repetition_penalty": 1.03, "return_full_text": false, "stop": [], "temperature": 0.01, "top_k": 10, "top_p": 0.95 }, "stream": true}
download file
https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/onnx/model.onnx in tei
https://huggingface.co/BAAI/bge-reranker-base/resolve/main/tokenizer.json in teirerank
/api/models/Intel/neural-chat-7b-v3-3 tgi
REF:
标签:kubectl,Deployment,chatqna,TCP,opea,CHATQNA,OPEA,PORT From: https://www.cnblogs.com/shaohef/p/18413965