未验证 提交 cb16e524 编写于 作者: Z Zhenxu Ke 提交者: GitHub

Enhance the LAL to allow easily skipping logs with malformed formats (#6477)

上级 207cef79
...@@ -34,13 +34,7 @@ filter { ...@@ -34,13 +34,7 @@ filter {
if (log.service == "TestingService") { // Don't waste resources on TestingServices if (log.service == "TestingService") { // Don't waste resources on TestingServices
abort {} // all remaining components won't be executed at all abort {} // all remaining components won't be executed at all
} }
text { // ... parsers, extractors, sinks
if (!regexp("(?<timestamp>\\d{8}) (?<thread>\\w+) (?<level>\\w+) (?<traceId>\\w+) (?<msg>.+)")) {
// if the logs don't match this regexp, skip it
abort {}
}
}
// ... extractors, sinks
} }
``` ```
...@@ -55,15 +49,35 @@ types of parsers at the moment, namely `json`, `yaml`, and `text`. ...@@ -55,15 +49,35 @@ types of parsers at the moment, namely `json`, `yaml`, and `text`.
When a piece of log is parsed, there is a corresponding property available, called `parsed`, injected by LAL. When a piece of log is parsed, there is a corresponding property available, called `parsed`, injected by LAL.
Property `parsed` is typically a map, containing all the fields parsed from the raw logs, for example, if the parser Property `parsed` is typically a map, containing all the fields parsed from the raw logs, for example, if the parser
is `json` / `yaml`, `parsed` is a map containing all the key-values in the `json` / `yaml`, if the parser is `text` is `json` / `yaml`, `parsed` is a map containing all the key-values in the `json` / `yaml`, if the parser is `text`
, `parsed` is a map containing all the captured groups and their values (for `regexp` and `grok`). See examples below. , `parsed` is a map containing all the captured groups and their values (for `regexp` and `grok`).
All parsers share the following options:
| Option | Type | Description | Default Value |
| ------ | ---- | ----------- | ------------- |
| `abortOnFailure` | `boolean` | Whether the filter chain should abort if the parser failed to parse / match the logs | `true` |
See examples below.
#### `json` #### `json`
<!-- TODO: is structured in the reported (gRPC) `LogData`, not much to do --> ```groovy
filter {
json {
abortOnFailure true // this is optional because it's default behaviour
}
}
```
#### `yaml` #### `yaml`
<!-- TODO: is structured in the reported (gRPC) `LogData`, not much to do --> ```groovy
filter {
yaml {
abortOnFailure true // this is optional because it's default behaviour
}
}
```
#### `text` #### `text`
...@@ -78,8 +92,9 @@ all the captured groups can be used later in the extractors or sinks. ...@@ -78,8 +92,9 @@ all the captured groups can be used later in the extractors or sinks.
```groovy ```groovy
filter { filter {
text { text {
regexp "(?<timestamp>\\d{8}) (?<thread>\\w+) (?<level>\\w+) (?<traceId>\\w+) (?<msg>.+)" abortOnFailure true // this is optional because it's default behaviour
// this is just a demo pattern // this is just a demo pattern
regexp "(?<timestamp>\\d{8}) (?<thread>\\w+) (?<level>\\w+) (?<traceId>\\w+) (?<msg>.+)"
} }
extractor { extractor {
tag level: parsed.level tag level: parsed.level
...@@ -91,9 +106,10 @@ filter { ...@@ -91,9 +106,10 @@ filter {
} }
``` ```
- `grok` - `grok` (TODO)
<!-- TODO: grok Java library has poor performance, need to benchmark it, the idea is basically the same with `regexp` above --> Because grok Java library has performance issue, we need some investigations and benchmark on it. Contributions are
welcome.
### Extractor ### Extractor
......
...@@ -98,11 +98,17 @@ public class FilterSpec extends AbstractSpec { ...@@ -98,11 +98,17 @@ public class FilterSpec extends AbstractSpec {
cl.call(); cl.call();
final LogData.Builder logData = BINDING.get().log(); final LogData.Builder logData = BINDING.get().log();
final Map<String, Object> parsed = jsonParser.create().fromJson( try {
logData.getBody().getJson().getJson(), parsedType final Map<String, Object> parsed = jsonParser.create().fromJson(
); logData.getBody().getJson().getJson(), parsedType
);
BINDING.get().parsed(parsed);
BINDING.get().parsed(parsed);
} catch (final Exception e) {
if (jsonParser.abortOnFailure()) {
BINDING.get().abort();
}
}
} }
@SuppressWarnings({"unused", "unchecked"}) @SuppressWarnings({"unused", "unchecked"})
...@@ -114,11 +120,17 @@ public class FilterSpec extends AbstractSpec { ...@@ -114,11 +120,17 @@ public class FilterSpec extends AbstractSpec {
cl.call(); cl.call();
final LogData.Builder logData = BINDING.get().log(); final LogData.Builder logData = BINDING.get().log();
final Map<String, Object> parsed = (Map<String, Object>) yamlParser.create().load( try {
logData.getBody().getYaml().getYaml() final Map<String, Object> parsed = (Map<String, Object>) yamlParser.create().load(
); logData.getBody().getYaml().getYaml()
);
BINDING.get().parsed(parsed);
BINDING.get().parsed(parsed);
} catch (final Exception e) {
if (yamlParser.abortOnFailure()) {
BINDING.get().abort();
}
}
} }
@SuppressWarnings("unused") @SuppressWarnings("unused")
......
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
import lombok.Getter;
import lombok.Setter;
import lombok.experimental.Accessors;
import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
import org.apache.skywalking.oap.server.library.module.ModuleManager;
@Accessors(fluent = true)
public class AbstractParserSpec extends AbstractSpec {
/**
* Whether the filter chain should abort when parsing the logs failed.
*
* Failing to parse the logs means either parsing throws exceptions or the logs not matching the desired patterns.
*/
@Getter
@Setter
private boolean abortOnFailure = true;
public AbstractParserSpec(final ModuleManager moduleManager,
final LogAnalyzerModuleConfig moduleConfig) {
super(moduleManager, moduleConfig);
}
}
...@@ -20,21 +20,26 @@ package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser; ...@@ -20,21 +20,26 @@ package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
import com.google.gson.Gson; import com.google.gson.Gson;
import com.google.gson.GsonBuilder; import com.google.gson.GsonBuilder;
import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig; import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
import org.apache.skywalking.oap.server.library.module.ModuleManager; import org.apache.skywalking.oap.server.library.module.ModuleManager;
public class JsonParserSpec extends AbstractSpec { public class JsonParserSpec extends AbstractParserSpec {
private final GsonBuilder gsonBuilder; private final GsonBuilder gsonBuilder;
private final Gson gson;
public JsonParserSpec(final ModuleManager moduleManager, public JsonParserSpec(final ModuleManager moduleManager,
final LogAnalyzerModuleConfig moduleConfig) { final LogAnalyzerModuleConfig moduleConfig) {
super(moduleManager, moduleConfig); super(moduleManager, moduleConfig);
gsonBuilder = new GsonBuilder(); gsonBuilder = new GsonBuilder();
// We just create a gson instance in advance for now (for the sake of performance),
// when we want to provide some extra options, we'll move this into method "create" then.
gson = gsonBuilder.create();
} }
public Gson create() { public Gson create() {
return gsonBuilder.create(); return gson;
} }
} }
...@@ -21,11 +21,10 @@ package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser; ...@@ -21,11 +21,10 @@ package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.skywalking.apm.network.logging.v3.LogData; import org.apache.skywalking.apm.network.logging.v3.LogData;
import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig; import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
import org.apache.skywalking.oap.server.library.module.ModuleManager; import org.apache.skywalking.oap.server.library.module.ModuleManager;
public class TextParserSpec extends AbstractSpec { public class TextParserSpec extends AbstractParserSpec {
public TextParserSpec(final ModuleManager moduleManager, public TextParserSpec(final ModuleManager moduleManager,
final LogAnalyzerModuleConfig moduleConfig) { final LogAnalyzerModuleConfig moduleConfig) {
super(moduleManager, moduleConfig); super(moduleManager, moduleConfig);
...@@ -45,6 +44,8 @@ public class TextParserSpec extends AbstractSpec { ...@@ -45,6 +44,8 @@ public class TextParserSpec extends AbstractSpec {
final boolean matched = matcher.find(); final boolean matched = matcher.find();
if (matched) { if (matched) {
BINDING.get().parsed(matcher); BINDING.get().parsed(matcher);
} else if (abortOnFailure()) {
BINDING.get().abort();
} }
return matched; return matched;
} }
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser; package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig; import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
import org.apache.skywalking.oap.server.library.module.ModuleManager; import org.apache.skywalking.oap.server.library.module.ModuleManager;
import org.yaml.snakeyaml.DumperOptions; import org.yaml.snakeyaml.DumperOptions;
...@@ -27,7 +26,7 @@ import org.yaml.snakeyaml.Yaml; ...@@ -27,7 +26,7 @@ import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.SafeConstructor; import org.yaml.snakeyaml.constructor.SafeConstructor;
import org.yaml.snakeyaml.representer.Representer; import org.yaml.snakeyaml.representer.Representer;
public class YamlParserSpec extends AbstractSpec { public class YamlParserSpec extends AbstractParserSpec {
private final LoaderOptions loaderOptions; private final LoaderOptions loaderOptions;
public YamlParserSpec(final ModuleManager moduleManager, public YamlParserSpec(final ModuleManager moduleManager,
......
...@@ -18,6 +18,7 @@ rules: ...@@ -18,6 +18,7 @@ rules:
dsl: | dsl: |
filter { filter {
text { text {
abortOnFailure false // for test purpose, we want to persist all logs
regexp $/(?s)(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d{3}) \[TID:(?<tid>.+?)] \[(?<thread>.+?)] (?<level>\w{4,}) (?<logger>.{1,36}) (?<msg>.+)/$ regexp $/(?s)(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d{3}) \[TID:(?<tid>.+?)] \[(?<thread>.+?)] (?<level>\w{4,}) (?<logger>.{1,36}) (?<msg>.+)/$
} }
extractor { extractor {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册