diff --git a/CHANGES.md b/CHANGES.md index 58cd48a33f80d3653a0967de5eea7884b4674779..c04f34d7086c3adcc1aff07b0cec674f5d40f8ee 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -127,6 +127,7 @@ Release Notes. #### Documentation * Add FAQ about `Elasticsearch exception type=version_conflict_engine_exception since 8.7.0` +* Add Self Observability service discovery (k8s). All issues and pull requests are [here](https://github.com/apache/skywalking/milestone/90?closed=1) diff --git a/docs/en/setup/backend/backend-telemetry.md b/docs/en/setup/backend/backend-telemetry.md index 709a484fc7deca03ad1530af972b15785da60577..a958c54436b4f9471f03b43371cd805972da17b9 100644 --- a/docs/en/setup/backend/backend-telemetry.md +++ b/docs/en/setup/backend/backend-telemetry.md @@ -19,7 +19,7 @@ telemetry: but you can set one of `prometheus` to enable them, for more information, refer to the details below. ## Self Observability - +### Static IP or hostname SkyWalking supports to collect telemetry data into OAP backend directly. Users could check them out through UI or GraphQL API then. @@ -90,6 +90,63 @@ staticConfig: service: oap-server ... ``` +### Service discovery (k8s) +If you deploy an oap-server cluster on k8s, the oap-server instance(pod) could not has the static IP or hostname. We can leverage [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/getting-started/#kubernetes) to discover the oap-server instance and scrape & transfer the metrics to OAP [OpenTelemetry receiver](backend-receivers.md#opentelemetry-receiver). + +How to install SkyWalking on k8s can refer to [Apache SkyWalking Kubernetes](https://github.com/apache/skywalking-kubernetes) + +To set this up by the following steps: + +1. Set up oap-server +- Set the metrics port + ``` + prometheus-port: 1234 + ``` +- Set environment variables + ``` + SW_TELEMETRY=prometheus + SW_OTEL_RECEIVER=default + SW_OTEL_RECEIVER_ENABLED_OC_RULES=oap + ``` + + Here is the example to install by Apache SkyWalking Kubernetes: + ``` + helm -n istio-system install skywalking skywalking \ + --set elasticsearch.replicas=1 \ + --set elasticsearch.minimumMasterNodes=1 \ + --set elasticsearch.imageTag=7.5.1 \ + --set oap.replicas=2 \ + --set ui.image.repository=$HUB/skywalking-ui \ + --set ui.image.tag=$TAG \ + --set oap.image.tag=$TAG \ + --set oap.image.repository=$HUB/skywalking-oap \ + --set oap.storageType=elasticsearch7 \ + --set oap.ports.prometheus-port=1234 \ # <<< Expose self observability metrics port + --set oap.env.SW_TELEMETRY=prometheus \ + --set oap.env.SW_OTEL_RECEIVER=default \ # <<< Enable Otel receiver + --set oap.env.SW_OTEL_RECEIVER_ENABLED_OC_RULES=oap # <<< Add oap analyzer for Otel metrics + ``` +2. Set up OpenTelemetry Collector and config a scrape job: +``` yaml +- job_name: 'skywalking' + metrics_path: '/metrics' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_container_name, __meta_kubernetes_pod_container_port_name] + action: keep + regex: oap;prometheus-port + - source_labels: [] + target_label: service + replacement: oap-server + - source_labels: [__meta_kubernetes_pod_name] + target_label: host_name + regex: (.+) + replacement: $$1 +``` + The full example for OpenTelemetry Collector configuration and recommend version can refer to [otel-collector-oap.yaml](otel-collector-oap.yaml). + + ___ diff --git a/docs/en/setup/backend/otel-collector-oap.yaml b/docs/en/setup/backend/otel-collector-oap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e18dffc72cf942745fe0bf5d2676f53fcbc1d55 --- /dev/null +++ b/docs/en/setup/backend/otel-collector-oap.yaml @@ -0,0 +1,180 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-conf + labels: + app: opentelemetry + component: otel-collector-conf + namespace: istio-system +data: + otel-collector-config: | + receivers: + prometheus: + config: + global: + scrape_interval: 10s + evaluation_interval: 30s + scrape_configs: + - job_name: 'skywalking' + metrics_path: '/metrics' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_container_name, __meta_kubernetes_pod_container_port_name] + action: keep + regex: oap;prometheus-port + - source_labels: [] + target_label: service + replacement: oap-server + - source_labels: [__meta_kubernetes_pod_name] + target_label: host_name + regex: (.+) + replacement: $$1 + processors: + batch: + extensions: + health_check: {} + zpages: {} + exporters: + opencensus: + endpoint: "skywalking-oap:11800" # The OAP Server address + insecure: true + logging: + logLevel: debug + service: + extensions: [health_check, zpages] + pipelines: + metrics: + receivers: [prometheus] + processors: [batch] + exporters: [opencensus,logging] +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-collector + labels: + app: opentelemetry + component: otel-collector + namespace: istio-system +spec: + ports: + - name: otlp # Default endpoint for OpenTelemetry receiver. + port: 55680 + protocol: TCP + targetPort: 55680 + - name: metrics # Default endpoint for querying metrics. + port: 8888 + selector: + component: otel-collector +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector + labels: + app: opentelemetry + component: otel-collector + namespace: istio-system +spec: + selector: + matchLabels: + app: opentelemetry + component: otel-collector + minReadySeconds: 5 + progressDeadlineSeconds: 120 + replicas: 1 #TODO - adjust this to your own requirements + template: + metadata: + labels: + app: opentelemetry + component: otel-collector + spec: + containers: + - command: + - "/otelcol" + - "--config=/conf/otel-collector-config.yaml" + - "--log-level=DEBUG" +# Memory Ballast size should be max 1/3 to 1/2 of memory. + - "--mem-ballast-size-mib=683" + image: otel/opentelemetry-collector:0.29.0 + name: otel-collector + resources: + limits: + cpu: 1 + memory: 2Gi + requests: + cpu: 200m + memory: 400Mi + ports: + - containerPort: 55679 # Default endpoint for ZPages. + - containerPort: 55680 # Default endpoint for OpenTelemetry receiver. + - containerPort: 8888 # Default endpoint for querying metrics. + volumeMounts: + - name: otel-collector-config-vol + mountPath: /conf + livenessProbe: + httpGet: + path: / + port: 13133 # Health Check extension default port. + readinessProbe: + httpGet: + path: / + port: 13133 # Health Check extension default port. + volumes: + - configMap: + name: otel-collector-conf + items: + - key: otel-collector-config + path: otel-collector-config.yaml + name: otel-collector-config-vol +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: otel-collector +rules: +- apiGroups: [""] + resources: + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] +- apiGroups: + - extensions + resources: + - ingresses + verbs: ["get", "list", "watch"] +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app: opentelemetry + component: otel-collector + name: otel-collector +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: otel-collector +subjects: +- kind: ServiceAccount + name: default + namespace: istio-system \ No newline at end of file