未验证 提交 753c2ff9 编写于 作者: X XuPeng-SH 提交者: GitHub

Mishards k8s fix (#1958)

* [skip ci](shards): update kubernetes related
Signed-off-by: Npeng.xu <peng.xu@zilliz.com>

* [skip ci](shards): add pending buff in kubernetes provider
Signed-off-by: Npeng.xu <peng.xu@zilliz.com>

* [skip ci](shards): some bug fix in k8s provider
Signed-off-by: Npeng.xu <peng.xu@zilliz.com>

* [skip ci](shards): update log
Signed-off-by: Npeng.xu <peng.xu@zilliz.com>

* [skip ci](shards): update k8s config
Signed-off-by: Npeng.xu <peng.xu@zilliz.com>

* [skip ci](shards): update Makefile
Signed-off-by: Npeng.xu <peng.xu@zilliz.com>
上级 3e1b2ab4
......@@ -27,7 +27,7 @@ cluster_status:
kubectl get pods -n milvus -o wide
probe_cluster:
@echo
$(shell kubectl get service -n milvus | grep milvus-proxy-servers | awk {'print $$4,$$5'} | awk -F"[: ]" {'print "docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c \"python all_in_one/probe_test.py --port="$$2" --host="$$1"\""'})
$(shell kubectl get service -n mishards | grep mishards-proxy-servers | awk {'print $$4,$$5'} | awk -F"[: ]" {'print "docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c \"python all_in_one/probe_test.py --port="$$2" --host="$$1"\""'})
probe:
docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c "python all_in_one/probe_test.py --port=${PORT} --host=${HOST}"
clean_coverage:
......
......@@ -11,7 +11,10 @@ import copy
import threading
import queue
import enum
from kubernetes import client, config, watch
from functools import partial
from collections import defaultdict
from kubernetes import client, config as kconfig, watch
from mishards.topology import StatusType
logger = logging.getLogger(__name__)
......@@ -33,8 +36,8 @@ class K8SMixin:
self.namespace = open(INCLUSTER_NAMESPACE_PATH).read()
if not self.v1:
config.load_incluster_config(
) if self.in_cluster else config.load_kube_config()
kconfig.load_incluster_config(
) if self.in_cluster else kconfig.load_kube_config()
self.v1 = client.CoreV1Api()
......@@ -133,6 +136,7 @@ class K8SEventListener(threading.Thread, K8SMixin):
class EventHandler(threading.Thread):
PENDING_THRESHOLD = 3
def __init__(self, mgr, message_queue, namespace, pod_patt, **kwargs):
threading.Thread.__init__(self)
self.mgr = mgr
......@@ -141,6 +145,25 @@ class EventHandler(threading.Thread):
self.terminate = False
self.pod_patt = re.compile(pod_patt)
self.namespace = namespace
self.pending_add = defaultdict(int)
self.pending_delete = defaultdict(int)
def record_pending_add(self, pod, true_cb=None):
self.pending_add[pod] += 1
self.pending_delete.pop(pod, None)
if self.pending_add[pod] >= self.PENDING_THRESHOLD:
true_cb and true_cb()
return True
return False
def record_pending_delete(self, pod, true_cb=None):
self.pending_delete[pod] += 1
self.pending_add.pop(pod, None)
if self.pending_delete[pod] >= 1:
true_cb and true_cb()
return True
return False
def stop(self):
self.terminate = True
......@@ -165,37 +188,47 @@ class EventHandler(threading.Thread):
if try_cnt <= 0 and not pod:
if not event['start_up']:
logger.error('Pod {} is started but cannot read pod'.format(
logger.warning('Pod {} is started but cannot read pod'.format(
event['pod']))
return
elif try_cnt <= 0 and not pod.status.pod_ip:
logger.warning('NoPodIPFoundError')
return
logger.info('Register POD {} with IP {}'.format(
pod.metadata.name, pod.status.pod_ip))
self.mgr.add_pod(name=pod.metadata.name, ip=pod.status.pod_ip)
self.record_pending_add(pod.metadata.name,
true_cb=partial(self.mgr.add_pod, pod.metadata.name, pod.status.pod_ip))
def on_pod_killing(self, event, **kwargs):
logger.info('Unregister POD {}'.format(event['pod']))
self.mgr.delete_pod(name=event['pod'])
self.record_pending_delete(event['pod'],
true_cb=partial(self.mgr.delete_pod, event['pod']))
def on_pod_heartbeat(self, event, **kwargs):
names = self.mgr.readonly_topo.group_names
names = set(copy.deepcopy(list(self.mgr.readonly_topo.group_names)))
running_names = set()
pods_with_event = set()
for each_event in event['events']:
pods_with_event.add(each_event['pod'])
if each_event['ready']:
self.mgr.add_pod(name=each_event['pod'], ip=each_event['ip'])
running_names.add(each_event['pod'])
self.record_pending_add(each_event['pod'],
true_cb=partial(self.mgr.add_pod, each_event['pod'], each_event['ip']))
else:
self.mgr.delete_pod(name=each_event['pod'])
self.record_pending_delete(each_event['pod'],
true_cb=partial(self.mgr.delete_pod, each_event['pod']))
to_delete = names - running_names
for name in to_delete:
self.mgr.delete_pod(name)
pods_no_event = names - pods_with_event
for name in pods_no_event:
self.record_pending_delete(name,
true_cb=partial(self.mgr.delete_pod, name))
logger.info(self.mgr.readonly_topo.group_names)
latest = self.mgr.readonly_topo.group_names
deleted = names - latest
added = latest - names
if deleted:
logger.info('Deleted Pods: {}'.format(list(deleted)))
if added:
logger.info('Added Pods: {}'.format(list(added)))
logger.debug('All Pods: {}'.format(list(latest)))
def handle_event(self, event):
if event['eType'] == EventType.PodHeartBeat:
......@@ -237,15 +270,15 @@ class KubernetesProviderSettings:
class KubernetesProvider(object):
name = 'kubernetes'
def __init__(self, plugin_config, readonly_topo, **kwargs):
self.namespace = plugin_config.DISCOVERY_KUBERNETES_NAMESPACE
self.pod_patt = plugin_config.DISCOVERY_KUBERNETES_POD_PATT
self.label_selector = plugin_config.DISCOVERY_KUBERNETES_LABEL_SELECTOR
self.in_cluster = plugin_config.DISCOVERY_KUBERNETES_IN_CLUSTER.lower()
def __init__(self, config, readonly_topo, **kwargs):
self.namespace = config.DISCOVERY_KUBERNETES_NAMESPACE
self.pod_patt = config.DISCOVERY_KUBERNETES_POD_PATT
self.label_selector = config.DISCOVERY_KUBERNETES_LABEL_SELECTOR
self.in_cluster = config.DISCOVERY_KUBERNETES_IN_CLUSTER.lower()
self.in_cluster = self.in_cluster == 'true'
self.poll_interval = plugin_config.DISCOVERY_KUBERNETES_POLL_INTERVAL
self.poll_interval = config.DISCOVERY_KUBERNETES_POLL_INTERVAL
self.poll_interval = int(self.poll_interval) if self.poll_interval else 5
self.port = plugin_config.DISCOVERY_KUBERNETES_PORT
self.port = config.DISCOVERY_KUBERNETES_PORT
self.port = int(self.port) if self.port else 19530
self.kwargs = kwargs
self.queue = queue.Queue()
......@@ -255,8 +288,8 @@ class KubernetesProvider(object):
if not self.namespace:
self.namespace = open(incluster_namespace_path).read()
config.load_incluster_config(
) if self.in_cluster else config.load_kube_config()
kconfig.load_incluster_config(
) if self.in_cluster else kconfig.load_kube_config()
self.v1 = client.CoreV1Api()
self.listener = K8SEventListener(message_queue=self.queue,
......@@ -281,6 +314,8 @@ class KubernetesProvider(object):
**kwargs)
def add_pod(self, name, ip):
logger.debug('Register POD {} with IP {}'.format(
name, ip))
ok = True
status = StatusType.OK
try:
......@@ -292,8 +327,8 @@ class KubernetesProvider(object):
ok = False
logger.error('Connection error to: {}'.format(addr))
if ok and status == StatusType.OK:
logger.info('KubernetesProvider Add Group \"{}\" Of 1 Address: {}'.format(name, uri))
# if ok and status == StatusType.OK:
# logger.info('KubernetesProvider Add Group \"{}\" Of 1 Address: {}'.format(name, uri))
return ok
def delete_pod(self, name):
......@@ -306,6 +341,7 @@ class KubernetesProvider(object):
self.event_handler.start()
self.pod_heartbeater.start()
return True
def stop(self):
self.listener.stop()
......
kind: Service
apiVersion: v1
metadata:
name: milvus-mysql
namespace: milvus
name: mishards-mysql
namespace: mishards
spec:
type: ClusterIP
selector:
app: milvus
app: mishards
tier: mysql
ports:
- protocol: TCP
......@@ -19,22 +19,22 @@ spec:
apiVersion: apps/v1
kind: Deployment
metadata:
name: milvus-mysql
namespace: milvus
name: mishards-mysql
namespace: mishards
spec:
selector:
matchLabels:
app: milvus
app: mishards
tier: mysql
replicas: 1
template:
metadata:
labels:
app: milvus
app: mishards
tier: mysql
spec:
containers:
- name: milvus-mysql
- name: mishards-mysql
image: mysql:5.7
imagePullPolicy: IfNotPresent
# lifecycle:
......@@ -51,17 +51,17 @@ spec:
- name: mysql-port
containerPort: 3306
volumeMounts:
- name: milvus-mysql-disk
- name: mishards-mysql-disk
mountPath: /data
subPath: mysql
- name: milvus-mysql-configmap
- name: mishards-mysql-configmap
mountPath: /etc/mysql/mysql.conf.d/mysqld.cnf
subPath: milvus_mysql_config.yml
subPath: mishards_mysql_config.yml
volumes:
- name: milvus-mysql-disk
- name: mishards-mysql-disk
persistentVolumeClaim:
claimName: milvus-mysql-disk
- name: milvus-mysql-configmap
claimName: mishards-mysql-disk
- name: mishards-mysql-configmap
configMap:
name: milvus-mysql-configmap
name: mishards-mysql-configmap
apiVersion: v1
kind: ConfigMap
metadata:
name: milvus-mysql-configmap
namespace: milvus
name: mishards-mysql-configmap
namespace: mishards
data:
milvus_mysql_config.yml: |
mishards_mysql_config.yml: |
[mysqld]
pid-file = /var/run/mysqld/mysqld.pid
socket = /var/run/mysqld/mysqld.sock
......@@ -26,71 +26,79 @@ data:
apiVersion: v1
kind: ConfigMap
metadata:
name: milvus-proxy-configmap
namespace: milvus
name: mishards-proxy-configmap
namespace: mishards
data:
milvus_proxy_config.yml: |
mishards_proxy_config.yml: |
DEBUG=True
TESTING=False
WOSERVER=tcp://milvus-wo-servers:19530
WOSERVER=tcp://mishards-wo-servers:19530
SERVER_PORT=19530
DISCOVERY_CLASS_NAME=kubernetes
DISCOVERY_KUBERNETES_NAMESPACE=milvus
DISCOVERY_KUBERNETES_NAMESPACE=mishards
DISCOVERY_KUBERNETES_POD_PATT=.*-ro-servers-.*
DISCOVERY_KUBERNETES_LABEL_SELECTOR=tier=ro-servers
DISCOVERY_KUBERNETES_POLL_INTERVAL=10
DISCOVERY_KUBERNETES_IN_CLUSTER=True
SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:milvusroot@milvus-mysql:3306/milvus?charset=utf8mb4
SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:milvusroot@mishards-mysql:3306/milvus?charset=utf8mb4
SQLALCHEMY_POOL_SIZE=50
SQLALCHEMY_POOL_RECYCLE=7200
LOG_PATH=/var/log/milvus
TIMEZONE=Asia/Shanghai
---
apiVersion: v1
kind: ConfigMap
metadata:
name: milvus-roserver-configmap
namespace: milvus
name: mishards-roserver-configmap
namespace: mishards
data:
config.yml: |
version: 0.3
server_config:
address: 0.0.0.0
port: 19530
mode: cluster_readonly
deploy_mode: cluster_readonly
web_port: 19121
db_config:
backend_url: mysql://root:milvusroot@mishards-mysql:3306/milvus
auto_flush_interval: 1
storage_config:
primary_path: /var/milvus
backend_url: mysql://root:milvusroot@milvus-mysql:3306/milvus
insert_buffer_size: 2
metric_config:
enable_monitor: off # true is on, false is off
enable_monitor: false
cache_config:
cpu_cache_capacity: 12 # memory pool to hold index data, unit: GB
cpu_cache_free_percent: 0.85
insert_cache_immediately: false
# gpu_cache_capacity: 4
# gpu_cache_free_percent: 0.85
# gpu_ids:
# - 0
cpu_cache_capacity: 6 # memory pool to hold index data, unit: GB
insert_buffer_size: 1
cache_insert_data: false
gpu_resource_config:
enabled: false
cache_capacity: 1
search_resources:
- gpu0
build_index_resources:
- gpu0
engine_config:
use_blas_threshold: 800
use_blas_threshold: 800
gpu_search_threshold: 1000
resource_config:
search_resources:
- gpu0
wal_config:
enable: false
log.conf: |
* GLOBAL:
FORMAT = "%datetime | %level | %logger | %msg"
FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-global.log"
FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-global.log"
ENABLED = true
TO_FILE = true
TO_STANDARD_OUTPUT = true
......@@ -98,12 +106,12 @@ data:
PERFORMANCE_TRACKING = false
MAX_LOG_FILE_SIZE = 2097152 ## Throw log files away after 2MB
* DEBUG:
FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-debug.log"
FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-debug.log"
ENABLED = true
* WARNING:
FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-warning.log"
FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-warning.log"
* TRACE:
FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-trace.log"
FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-trace.log"
* VERBOSE:
FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg"
TO_FILE = true
......@@ -111,54 +119,61 @@ data:
## Error logs
* ERROR:
ENABLED = true
FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-error.log"
FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-error.log"
* FATAL:
ENABLED = true
FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-fatal.log"
FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-fatal.log"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: milvus-woserver-configmap
namespace: milvus
name: mishards-woserver-configmap
namespace: mishards
data:
config.yml: |
version: 0.3
server_config:
address: 0.0.0.0
port: 19530
mode: cluster_writable
deploy_mode: cluster_writable
web_port: 19121
db_config:
backend_url: mysql://root:milvusroot@mishards-mysql:3306/milvus
auto_flush_interval: 1
storage_config:
primary_path: /var/milvus
backend_url: mysql://root:milvusroot@milvus-mysql:3306/milvus
insert_buffer_size: 2
metric_config:
enable_monitor: off # true is on, false is off
enable_monitor: false
cache_config:
cpu_cache_capacity: 2 # memory pool to hold index data, unit: GB
cpu_cache_free_percent: 0.85
insert_cache_immediately: false
# gpu_cache_capacity: 4
# gpu_cache_free_percent: 0.85
# gpu_ids:
# - 0
cpu_cache_capacity: 2 # memory pool to hold index data, unit: GB
insert_buffer_size: 2
cache_insert_data: false
engine_config:
use_blas_threshold: 800
gpu_resource_config:
enabled: false
cache_capacity: 1
search_resources:
- gpu0
build_index_resources:
- gpu0
resource_config:
search_resources:
- gpu0
engine_config:
use_blas_threshold: 800
gpu_search_threshold: 1000
wal_config:
enable: false
log.conf: |
* GLOBAL:
FORMAT = "%datetime | %level | %logger | %msg"
FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-global.log"
FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-global.log"
ENABLED = true
TO_FILE = true
TO_STANDARD_OUTPUT = true
......@@ -166,12 +181,12 @@ data:
PERFORMANCE_TRACKING = false
MAX_LOG_FILE_SIZE = 2097152 ## Throw log files away after 2MB
* DEBUG:
FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-debug.log"
FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-debug.log"
ENABLED = true
* WARNING:
FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-warning.log"
FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-warning.log"
* TRACE:
FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-trace.log"
FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-trace.log"
* VERBOSE:
FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg"
TO_FILE = true
......@@ -179,7 +194,7 @@ data:
## Error logs
* ERROR:
ENABLED = true
FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-error.log"
FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-error.log"
* FATAL:
ENABLED = true
FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-fatal.log"
FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-fatal.log"
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: milvus-db-disk
namespace: milvus
name: mishards-db-disk
namespace: mishards
spec:
accessModes:
- ReadWriteMany
......@@ -16,8 +16,8 @@ spec:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: milvus-log-disk
namespace: milvus
name: mishards-log-disk
namespace: mishards
spec:
accessModes:
- ReadWriteMany
......@@ -31,8 +31,8 @@ spec:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: milvus-mysql-disk
namespace: milvus
name: mishards-mysql-disk
namespace: mishards
spec:
accessModes:
- ReadWriteMany
......@@ -46,8 +46,8 @@ spec:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: milvus-redis-disk
namespace: milvus
name: mishards-redis-disk
namespace: mishards
spec:
accessModes:
- ReadWriteOnce
......
kind: Service
apiVersion: v1
metadata:
name: milvus-proxy-servers
namespace: milvus
name: mishards-proxy-servers
namespace: mishards
spec:
type: LoadBalancer
selector:
app: milvus
app: mishards
tier: proxy
ports:
- name: tcp
......@@ -19,29 +19,29 @@ spec:
apiVersion: apps/v1
kind: Deployment
metadata:
name: milvus-proxy
namespace: milvus
name: mishards-proxy
namespace: mishards
spec:
selector:
matchLabels:
app: milvus
app: mishards
tier: proxy
replicas: 1
template:
metadata:
labels:
app: milvus
app: mishards
tier: proxy
spec:
containers:
- name: milvus-proxy
image: milvusdb/mishards:0.1.0-rc0
- name: mishards-proxy
image: milvusdb/mishards
imagePullPolicy: Always
command: ["python", "mishards/main.py"]
resources:
limits:
memory: "3Gi"
cpu: "4"
memory: "2Gi"
cpu: "2"
requests:
memory: "2Gi"
ports:
......@@ -71,18 +71,18 @@ spec:
- name: SD_ROSERVER_POD_PATT
value: ".*-ro-servers-.*"
volumeMounts:
- name: milvus-proxy-configmap
- name: mishards-proxy-configmap
mountPath: /source/mishards/.env
subPath: milvus_proxy_config.yml
- name: milvus-log-disk
subPath: mishards_proxy_config.yml
- name: mishards-log-disk
mountPath: /var/log/milvus
subPath: proxylog
# imagePullSecrets:
# - name: regcred
volumes:
- name: milvus-proxy-configmap
- name: mishards-proxy-configmap
configMap:
name: milvus-proxy-configmap
- name: milvus-log-disk
name: mishards-proxy-configmap
- name: mishards-log-disk
persistentVolumeClaim:
claimName: milvus-log-disk
claimName: mishards-log-disk
......@@ -16,7 +16,7 @@ metadata:
subjects:
- kind: ServiceAccount
name: default
namespace: milvus
namespace: mishards
roleRef:
kind: ClusterRole
name: pods-list
......
kind: Service
apiVersion: v1
metadata:
name: milvus-ro-servers
namespace: milvus
name: mishards-ro-servers
namespace: mishards
spec:
type: ClusterIP
selector:
app: milvus
app: mishards
tier: ro-servers
ports:
- protocol: TCP
......@@ -18,40 +18,43 @@ spec:
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: milvus-ro-servers
namespace: milvus
name: mishards-ro-servers
namespace: mishards
spec:
serviceName: "milvus-ro-servers"
serviceName: "mishards-ro-servers"
replicas: 1
template:
metadata:
labels:
app: milvus
app: mishards
tier: ro-servers
spec:
terminationGracePeriodSeconds: 11
containers:
- name: milvus-ro-server
image: milvusdb/milvus:0.5.0-d102119-ede20b
- name: mishards-ro-server
image: milvusdb/milvus:0.8.0-gpu-d041520-464400
imagePullPolicy: Always
ports:
- containerPort: 19530
resources:
limits:
memory: "16Gi"
cpu: "8.0"
memory: "8Gi"
cpu: "6.0"
requests:
memory: "14Gi"
memory: "8Gi"
volumeMounts:
- name: milvus-db-disk
- name: mishards-db-disk
mountPath: /var/milvus
subPath: dbdata
- name: milvus-roserver-configmap
mountPath: /opt/milvus/conf/server_config.yaml
- name: mishards-roserver-configmap
mountPath: /var/lib/milvus/conf/server_config.yaml
subPath: config.yml
- name: milvus-roserver-configmap
mountPath: /opt/milvus/conf/log_config.conf
- name: mishards-roserver-configmap
mountPath: /var/lib/milvus/conf/log_config.conf
subPath: log.conf
- name: mishards-log-disk
mountPath: /var/lib/milvus/logs
subPath: rolog
# imagePullSecrets:
# - name: regcred
# tolerations:
......@@ -60,9 +63,12 @@ spec:
# value: "performance"
# effect: "NoSchedule"
volumes:
- name: milvus-roserver-configmap
- name: mishards-roserver-configmap
configMap:
name: milvus-roserver-configmap
- name: milvus-db-disk
name: mishards-roserver-configmap
- name: mishards-db-disk
persistentVolumeClaim:
claimName: milvus-db-disk
claimName: mishards-db-disk
- name: mishards-log-disk
persistentVolumeClaim:
claimName: mishards-log-disk
kind: Service
apiVersion: v1
metadata:
name: milvus-wo-servers
namespace: milvus
name: mishards-wo-servers
namespace: mishards
spec:
type: ClusterIP
selector:
app: milvus
app: mishards
tier: wo-servers
ports:
- protocol: TCP
......@@ -18,42 +18,45 @@ spec:
apiVersion: apps/v1beta1
kind: Deployment
metadata:
name: milvus-wo-servers
namespace: milvus
name: mishards-wo-servers
namespace: mishards
spec:
selector:
matchLabels:
app: milvus
app: mishards
tier: wo-servers
replicas: 1
template:
metadata:
labels:
app: milvus
app: mishards
tier: wo-servers
spec:
containers:
- name: milvus-wo-server
image: milvusdb/milvus:0.5.0-d102119-ede20b
- name: mishards-wo-server
image: milvusdb/milvus:0.8.0-gpu-d041520-464400
imagePullPolicy: Always
ports:
- containerPort: 19530
resources:
limits:
memory: "5Gi"
cpu: "1.0"
memory: "4Gi"
cpu: "2.0"
requests:
memory: "4Gi"
volumeMounts:
- name: milvus-db-disk
- name: mishards-db-disk
mountPath: /var/milvus
subPath: dbdata
- name: milvus-woserver-configmap
mountPath: /opt/milvus/conf/server_config.yaml
- name: mishards-woserver-configmap
mountPath: /var/lib/milvus/conf/server_config.yaml
subPath: config.yml
- name: milvus-woserver-configmap
mountPath: /opt/milvus/conf/log_config.conf
- name: mishards-woserver-configmap
mountPath: /var/lib/milvus/conf/log_config.conf
subPath: log.conf
- name: mishards-log-disk
mountPath: /var/lib/milvus/logs
subPath: wslog
# imagePullSecrets:
# - name: regcred
# tolerations:
......@@ -62,9 +65,12 @@ spec:
# value: "performance"
# effect: "NoSchedule"
volumes:
- name: milvus-woserver-configmap
- name: mishards-woserver-configmap
configMap:
name: milvus-woserver-configmap
- name: milvus-db-disk
name: mishards-woserver-configmap
- name: mishards-db-disk
persistentVolumeClaim:
claimName: mishards-db-disk
- name: mishards-log-disk
persistentVolumeClaim:
claimName: milvus-db-disk
claimName: mishards-log-disk
......@@ -42,29 +42,29 @@ function PrintPodStatusMessage() {
timeout=60
function setUpMysql () {
mysqlUserName=$(kubectl describe configmap -n milvus milvus-roserver-configmap |
mysqlUserName=$(kubectl describe configmap -n mishards mishards-roserver-configmap |
grep backend_url |
awk '{print $2}' |
awk '{split($0, level1, ":");
split(level1[2], level2, "/");
print level2[3]}')
mysqlPassword=$(kubectl describe configmap -n milvus milvus-roserver-configmap |
mysqlPassword=$(kubectl describe configmap -n mishards mishards-roserver-configmap |
grep backend_url |
awk '{print $2}' |
awk '{split($0, level1, ":");
split(level1[3], level3, "@");
print level3[1]}')
mysqlDBName=$(kubectl describe configmap -n milvus milvus-roserver-configmap |
mysqlDBName=$(kubectl describe configmap -n mishards mishards-roserver-configmap |
grep backend_url |
awk '{print $2}' |
awk '{split($0, level1, ":");
split(level1[4], level4, "/");
print level4[2]}')
mysqlContainer=$(kubectl get pods -n milvus | grep milvus-mysql | awk '{print $1}')
mysqlContainer=$(kubectl get pods -n mishards | grep mishards-mysql | awk '{print $1}')
kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "CREATE DATABASE IF NOT EXISTS $mysqlDBName;"
kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "CREATE DATABASE IF NOT EXISTS $mysqlDBName;"
checkDBExists=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '$mysqlDBName';" | grep -o $mysqlDBName | wc -l)
checkDBExists=$(kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '$mysqlDBName';" | grep -o $mysqlDBName | wc -l)
counter=0
while [ $checkDBExists -lt 1 ]; do
sleep 1
......@@ -73,12 +73,12 @@ function setUpMysql () {
echo "Creating MySQL database $mysqlDBName timeout"
return 1
fi
checkDBExists=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '$mysqlDBName';" | grep -o $mysqlDBName | wc -l)
checkDBExists=$(kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '$mysqlDBName';" | grep -o $mysqlDBName | wc -l)
done;
kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "GRANT ALL PRIVILEGES ON $mysqlDBName.* TO '$mysqlUserName'@'%';"
kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "FLUSH PRIVILEGES;"
checkGrant=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SHOW GRANTS for $mysqlUserName;" | grep -o "GRANT ALL PRIVILEGES ON \`$mysqlDBName\`\.\*" | wc -l)
kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "GRANT ALL PRIVILEGES ON $mysqlDBName.* TO '$mysqlUserName'@'%';"
kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "FLUSH PRIVILEGES;"
checkGrant=$(kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "SHOW GRANTS for $mysqlUserName;" | grep -o "GRANT ALL PRIVILEGES ON \`$mysqlDBName\`\.\*" | wc -l)
counter=0
while [ $checkGrant -lt 1 ]; do
sleep 1
......@@ -87,17 +87,17 @@ function setUpMysql () {
echo "Granting all privileges on $mysqlDBName to $mysqlUserName timeout"
return 1
fi
checkGrant=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SHOW GRANTS for $mysqlUserName;" | grep -o "GRANT ALL PRIVILEGES ON \`$mysqlDBName\`\.\*" | wc -l)
checkGrant=$(kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "SHOW GRANTS for $mysqlUserName;" | grep -o "GRANT ALL PRIVILEGES ON \`$mysqlDBName\`\.\*" | wc -l)
done;
}
function checkStatefulSevers() {
stateful_replicas=$(kubectl describe statefulset -n milvus milvus-ro-servers | grep "Replicas:" | awk '{print $2}')
stateful_running_pods=$(kubectl describe statefulset -n milvus milvus-ro-servers | grep "Pods Status:" | awk '{print $3}')
stateful_replicas=$(kubectl describe statefulset -n mishards mishards-ro-servers | grep "Replicas:" | awk '{print $3}')
stateful_running_pods=$(kubectl describe statefulset -n mishards mishards-ro-servers | grep "Pods Status:" | awk '{print $3}')
counter=0
prev=$stateful_running_pods
PrintPodStatusMessage "Running milvus-ro-servers Pods: $stateful_running_pods/$stateful_replicas"
PrintPodStatusMessage "Running mishards-ro-servers Pods: $stateful_running_pods/$stateful_replicas"
while [ $stateful_replicas != $stateful_running_pods ]; do
echo -e "${YELLOW}Wait another 1 sec --- ${counter}${ENDC}"
sleep 1;
......@@ -107,9 +107,9 @@ function checkStatefulSevers() {
return 1;
fi
stateful_running_pods=$(kubectl describe statefulset -n milvus milvus-ro-servers | grep "Pods Status:" | awk '{print $3}')
stateful_running_pods=$(kubectl describe statefulset -n mishards mishards-ro-servers | grep "Pods Status:" | awk '{print $3}')
if [ $stateful_running_pods -ne $prev ]; then
PrintPodStatusMessage "Running milvus-ro-servers Pods: $stateful_running_pods/$stateful_replicas"
PrintPodStatusMessage "Running mishards-ro-servers Pods: $stateful_running_pods/$stateful_replicas"
fi
prev=$stateful_running_pods
done;
......@@ -118,8 +118,8 @@ function checkStatefulSevers() {
function checkDeployment() {
deployment_name=$1
replicas=$(kubectl describe deployment -n milvus $deployment_name | grep "Replicas:" | awk '{print $2}')
running=$(kubectl get pods -n milvus | grep $deployment_name | grep Running | wc -l)
replicas=$(kubectl describe deployment -n mishards $deployment_name | grep "Replicas:" | awk '{print $2}')
running=$(kubectl get pods -n mishards | grep $deployment_name | grep Running | wc -l)
counter=0
prev=$running
......@@ -133,7 +133,7 @@ function checkDeployment() {
return 1
fi
running=$(kubectl get pods -n milvus | grep "$deployment_name" | grep Running | wc -l)
running=$(kubectl get pods -n mishards | grep "$deployment_name" | grep Running | wc -l)
if [ $running -ne $prev ]; then
PrintPodStatusMessage "Running $deployment_name Pods: $running/$replicas"
fi
......@@ -143,12 +143,12 @@ function checkDeployment() {
function startDependencies() {
kubectl apply -f milvus_data_pvc.yaml
kubectl apply -f milvus_configmap.yaml
kubectl apply -f milvus_auxiliary.yaml
kubectl apply -f mishards_data_pvc.yaml
kubectl apply -f mishards_configmap.yaml
kubectl apply -f mishards_auxiliary.yaml
counter=0
while [ $(kubectl get pvc -n milvus | grep Bound | wc -l) != 4 ]; do
while [ $(kubectl get pvc -n mishards | grep Bound | wc -l) != 4 ]; do
sleep 1;
let counter=counter+1
if [ $counter == $timeout ]; then
......@@ -156,7 +156,7 @@ function startDependencies() {
return 1
fi
done
checkDeployment "milvus-mysql"
checkDeployment "mishards-mysql"
}
function startApps() {
......@@ -165,19 +165,19 @@ function startApps() {
echo -e "${GREEN}${BOLD}Checking required resouces...${NORMAL}${ENDC}"
while [ $counter -lt $timeout ]; do
sleep 1;
if [ $(kubectl get pvc -n milvus 2>/dev/null | grep Bound | wc -l) != 4 ]; then
if [ $(kubectl get pvc -n mishards 2>/dev/null | grep Bound | wc -l) != 4 ]; then
echo -e "${YELLOW}No pvc. Wait another sec... $counter${ENDC}";
errmsg='No pvc';
let counter=counter+1;
continue
fi
if [ $(kubectl get configmap -n milvus 2>/dev/null | grep milvus | wc -l) != 4 ]; then
if [ $(kubectl get configmap -n mishards 2>/dev/null | grep mishards | wc -l) != 4 ]; then
echo -e "${YELLOW}No configmap. Wait another sec... $counter${ENDC}";
errmsg='No configmap';
let counter=counter+1;
continue
fi
if [ $(kubectl get ep -n milvus 2>/dev/null | grep milvus-mysql | awk '{print $2}') == "<none>" ]; then
if [ $(kubectl get ep -n mishards 2>/dev/null | grep mishards-mysql | awk '{print $2}') == "<none>" ]; then
echo -e "${YELLOW}No mysql. Wait another sec... $counter${ENDC}";
errmsg='No mysql';
let counter=counter+1;
......@@ -205,30 +205,30 @@ function startApps() {
fi
echo -e "${GREEN}${BOLD}Start servers ...${NORMAL}${ENDC}"
kubectl apply -f milvus_stateful_servers.yaml
kubectl apply -f milvus_write_servers.yaml
kubectl apply -f mishards_stateful_servers.yaml
kubectl apply -f mishards_write_servers.yaml
checkStatefulSevers
if [ $? -ne 0 ]; then
echo -e "${RED}${BOLD}Starting milvus-ro-servers timeout${NORMAL}${ENDC}"
echo -e "${RED}${BOLD}Starting mishards-ro-servers timeout${NORMAL}${ENDC}"
exit 1
fi
checkDeployment "milvus-wo-servers"
checkDeployment "mishards-wo-servers"
if [ $? -ne 0 ]; then
echo -e "${RED}${BOLD}Starting milvus-wo-servers timeout${NORMAL}${ENDC}"
echo -e "${RED}${BOLD}Starting mishards-wo-servers timeout${NORMAL}${ENDC}"
exit 1
fi
echo -e "${GREEN}${BOLD}Start rolebinding ...${NORMAL}${ENDC}"
kubectl apply -f milvus_rbac.yaml
kubectl apply -f mishards_rbac.yaml
echo -e "${GREEN}${BOLD}Start proxies ...${NORMAL}${ENDC}"
kubectl apply -f milvus_proxy.yaml
kubectl apply -f mishards_proxy.yaml
checkDeployment "milvus-proxy"
checkDeployment "mishards-proxy"
if [ $? -ne 0 ]; then
echo -e "${RED}${BOLD}Starting milvus-proxy timeout${NORMAL}${ENDC}"
echo -e "${RED}${BOLD}Starting mishards-proxy timeout${NORMAL}${ENDC}"
exit 1
fi
......@@ -244,10 +244,10 @@ function startApps() {
function removeApps () {
# kubectl delete -f milvus_flower.yaml 2>/dev/null
kubectl delete -f milvus_proxy.yaml 2>/dev/null
kubectl delete -f milvus_stateful_servers.yaml 2>/dev/null
kubectl delete -f milvus_write_servers.yaml 2>/dev/null
kubectl delete -f milvus_rbac.yaml 2>/dev/null
kubectl delete -f mishards_proxy.yaml 2>/dev/null
kubectl delete -f mishards_stateful_servers.yaml 2>/dev/null
kubectl delete -f mishards_write_servers.yaml 2>/dev/null
kubectl delete -f mishards_rbac.yaml 2>/dev/null
# kubectl delete -f milvus_monitor.yaml 2>/dev/null
}
......@@ -263,9 +263,9 @@ function scaleDeployment() {
;;
esac
cur=$(kubectl get deployment -n milvus $deployment_name |grep $deployment_name |awk '{split($2, status, "/"); print status[2];}')
cur=$(kubectl get deployment -n mishards $deployment_name |grep $deployment_name |awk '{split($2, status, "/"); print status[2];}')
echo -e "${GREEN}Current Running ${BOLD}$cur ${GREEN}${deployment_name}, Scaling to ${BOLD}$des ...${ENDC}";
scalecmd="kubectl scale deployment -n milvus ${deployment_name} --replicas=${des}"
scalecmd="kubectl scale deployment -n mishards ${deployment_name} --replicas=${des}"
${scalecmd}
if [ $? -ne 0 ]; then
echo -e "${RED}${BOLD}Scale Error: ${GREEN}${scalecmd}${ENDC}"
......@@ -276,7 +276,7 @@ function scaleDeployment() {
if [ $? -ne 0 ]; then
echo -e "${RED}${BOLD}Scale ${deployment_name} timeout${NORMAL}${ENDC}"
scalecmd="kubectl scale deployment -n milvus ${deployment_name} --replicas=${cur}"
scalecmd="kubectl scale deployment -n mishards ${deployment_name} --replicas=${cur}"
${scalecmd}
if [ $? -ne 0 ]; then
echo -e "${RED}${BOLD}Scale Rollback Error: ${GREEN}${scalecmd}${ENDC}"
......@@ -298,9 +298,9 @@ function scaleROServers() {
;;
esac
cur=$(kubectl get statefulset -n milvus milvus-ro-servers |tail -n 1 |awk '{split($2, status, "/"); print status[2];}')
cur=$(kubectl get statefulset -n mishards mishards-ro-servers |tail -n 1 |awk '{split($2, status, "/"); print status[2];}')
echo -e "${GREEN}Current Running ${BOLD}$cur ${GREEN}Readonly Servers, Scaling to ${BOLD}$des ...${ENDC}";
scalecmd="kubectl scale sts milvus-ro-servers -n milvus --replicas=${des}"
scalecmd="kubectl scale sts mishards-ro-servers -n mishards --replicas=${des}"
${scalecmd}
if [ $? -ne 0 ]; then
echo -e "${RED}${BOLD}Scale Error: ${GREEN}${scalecmd}${ENDC}"
......@@ -309,8 +309,8 @@ function scaleROServers() {
checkStatefulSevers
if [ $? -ne 0 ]; then
echo -e "${RED}${BOLD}Scale milvus-ro-servers timeout${NORMAL}${ENDC}"
scalecmd="kubectl scale sts milvus-ro-servers -n milvus --replicas=${cur}"
echo -e "${RED}${BOLD}Scale mishards-ro-servers timeout${NORMAL}${ENDC}"
scalecmd="kubectl scale sts mishards-ro-servers -n mishards --replicas=${cur}"
${scalecmd}
if [ $? -ne 0 ]; then
echo -e "${RED}${BOLD}Scale Rollback Error: ${GREEN}${scalecmd}${ENDC}"
......@@ -358,7 +358,7 @@ scale-ro-server)
;;
scale-proxy)
scaleDeployment "milvus-proxy" $1 $2
scaleDeployment "mishards-proxy" $1 $2
;;
-h|--help|*)
......
......@@ -35,7 +35,8 @@ class TopoGroup:
self.cv = threading.Condition()
def on_duplicate(self, topo_object):
logger.warning('Duplicated topo_object \"{}\" into group \"{}\"'.format(topo_object, self.name))
pass
# logger.warning('Duplicated topo_object \"{}\" into group \"{}\"'.format(topo_object, self.name))
def on_added(self, topo_object):
return True
......@@ -85,15 +86,15 @@ class Topology:
self.cv = threading.Condition()
def on_duplicated_group(self, group):
logger.warning('Duplicated group \"{}\" found!'.format(group))
# logger.warning('Duplicated group \"{}\" found!'.format(group))
return StatusType.DUPLICATED
def on_pre_add_group(self, group):
logger.debug('Pre add group \"{}\"'.format(group))
# logger.debug('Pre add group \"{}\"'.format(group))
return StatusType.OK
def on_post_add_group(self, group):
logger.debug('Post add group \"{}\"'.format(group))
# logger.debug('Post add group \"{}\"'.format(group))
return StatusType.OK
def get_group(self, name):
......@@ -116,13 +117,16 @@ class Topology:
return self.on_post_add_group(group)
def on_delete_not_existed_group(self, group):
logger.warning('Deleting non-existed group \"{}\"'.format(group))
# logger.warning('Deleting non-existed group \"{}\"'.format(group))
pass
def on_pre_delete_group(self, group):
logger.debug('Pre delete group \"{}\"'.format(group))
pass
# logger.debug('Pre delete group \"{}\"'.format(group))
def on_post_delete_group(self, group):
logger.debug('Post delete group \"{}\"'.format(group))
pass
# logger.debug('Post delete group \"{}\"'.format(group))
def _delete_group_no_lock(self, group):
logger.info('Deleting group \"{}\"'.format(group))
......@@ -132,7 +136,7 @@ class Topology:
def delete_group(self, group):
self.on_pre_delete_group(group)
with self.cv:
deleted_group = self._delete_group_lock(group)
deleted_group = self._delete_group_no_lock(group)
if not deleted_group:
return self.on_delete_not_existed_group(group)
return self.on_post_delete_group(group)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册