Skip to content

Commit 529253e

Browse files
committed
feat: Handle JSON-like structured logs better
Updates the OTEL pipeline to handle structured logs better. If the body content is an OTEL map, it will merge the body map into the log attributes map. If the body is a JSON object, it will parse the JSON string into an OTEL map, then merge the fields into the log attributes map. Replacing the Body field doesn't work since the Clickhouse exporter schema defines Body as string, so any parsed out object ends up turning back into a string. At least as log resources, it's a lighter weight means of grouping and filtering in the UI. Ref: HDX-1453
1 parent 19431d3 commit 529253e

File tree

13 files changed

+295
-0
lines changed

13 files changed

+295
-0
lines changed

Diff for: docker/otel-collector/config.yaml

+9
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@ receivers:
2727
processors:
2828
transform:
2929
log_statements:
30+
- context: log
31+
error_mode: ignore
32+
statements:
33+
# JSON parsing: Extends log attributes with the fields from structured log body content, either as an OTEL map or
34+
# as a string containing JSON content.
35+
- set(log.cache, ExtractPatterns(log.body, "(?P<0>(\\{.*\\}))")) where IsString(log.body)
36+
- merge_maps(log.attributes, ParseJSON(log.cache["0"]), "upsert") where IsMap(log.cache)
37+
- flatten(log.attributes) where IsMap(log.cache)
38+
- merge_maps(log.attributes, log.body, "upsert") where IsMap(log.body)
3039
- context: log
3140
error_mode: ignore
3241
conditions:

Diff for: smoke-tests/otel-collector/README.md

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# OpenTelemetry Collector Smoke Tests
2+
3+
This directory contains smoke tests for validating the OpenTelemetry Collector functionality in HyperDX.
4+
5+
## Prerequisites
6+
7+
Before running the tests, ensure you have the following tools installed:
8+
9+
- [Bats](https://github.com/bats-core/bats-core) - Bash Automated Testing System
10+
- [Docker](https://www.docker.com/) and Docker Compose
11+
- [curl](https://curl.se/) - Command line tool for transferring data
12+
- [ClickHouse client](https://clickhouse.com/docs/en/integrations/sql-clients/clickhouse-client) - Command-line client for ClickHouse
13+
14+
## Running the Tests
15+
16+
To run all the tests:
17+
18+
```bash
19+
cd smoke-tests/otel-collector
20+
bats *.bats
21+
```
22+
23+
To run a specific test file:
24+
25+
```bash
26+
bats hdx-1453-auto-parse-json.bats
27+
```
28+
29+
## Test Structure
30+
31+
- `*.bats` - Test files written in Bats
32+
- `data/` - Test data used by the tests
33+
- `test_helpers/` - Utility functions for the tests
34+
- `docker-compose.yaml` - Docker Compose configuration for the test environment
35+
36+
## Debugging
37+
38+
If you need to debug the tests, you can set the `SKIP_CLEANUP` environment variable to prevent the Docker containers from being torn down after the tests complete:
39+
40+
```bash
41+
SKIP_CLEANUP=1 bats hdx-1453-auto-parse-json.bats
42+
```
43+
44+
or
45+
46+
```bash
47+
SKIP_CLEANUP=true bats hdx-1453-auto-parse-json.bats
48+
```
49+
50+
With `SKIP_CLEANUP` enabled, the test containers will remain running after the tests complete, allowing you to inspect logs, connect to the containers, and debug issues.
51+
52+
To manually clean up the containers after debugging:
53+
54+
```bash
55+
docker compose down
56+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT ResourceAttributes, LogAttributes FROM otel_logs WHERE ResourceAttributes['suite-id'] = 'auto-parse' AND ResourceAttributes['test-id'] = 'default' ORDER BY TimestampTime FORMAT CSV
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"{'suite-id':'auto-parse','test-id':'default'}","{}"
2+
"{'suite-id':'auto-parse','test-id':'default'}","{}"
3+
"{'suite-id':'auto-parse','test-id':'default'}","{}"
4+
"{'suite-id':'auto-parse','test-id':'default'}","{}"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
{
2+
"resourceLogs": [
3+
{
4+
"resource": {
5+
"attributes": [
6+
{
7+
"key": "suite-id",
8+
"value": {
9+
"stringValue": "auto-parse"
10+
}
11+
},
12+
{
13+
"key": "test-id",
14+
"value": {
15+
"stringValue": "default"
16+
}
17+
}
18+
]
19+
},
20+
"scopeLogs": [
21+
{
22+
"scope": {},
23+
"logRecords": [
24+
{
25+
"timeUnixNano": "1901999580000000000",
26+
"body": {
27+
"stringValue": "[note] this is very much not JSON even though it starts with an array char"
28+
}
29+
},
30+
{
31+
"timeUnixNano": "1901999580000000001",
32+
"body": {
33+
"stringValue": "{note} this is very much not JSON even though it starts with an object char"
34+
}
35+
},
36+
{
37+
"timeUnixNano": "1901999580000000002",
38+
"body": {
39+
"stringValue": "NOTE: this is very much not JSON"
40+
}
41+
},
42+
{
43+
"timeUnixNano": "1901999580000000003",
44+
"body": {
45+
"stringValue": "this has some {Key {Value { '{' } } invalid JSON in it"
46+
}
47+
}
48+
]
49+
}
50+
]
51+
}
52+
]
53+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT ResourceAttributes, LogAttributes FROM otel_logs WHERE ResourceAttributes['suite-id'] = 'auto-parse' AND ResourceAttributes['test-id'] = 'json-string' ORDER BY TimestampTime FORMAT CSV
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"{'suite-id':'auto-parse','test-id':'json-string'}","{'attr.intValue':'1','found':'false','message':'this should be parsed into a map'}"
2+
"{'suite-id':'auto-parse','test-id':'json-string'}","{'bodyAttr':'12345','message':'this has an existing user attribute that should be preserved.','userAttr':'true'}"
3+
"{'suite-id':'auto-parse','test-id':'json-string'}","{'found':'true','position':'trailing'}"
4+
"{'suite-id':'auto-parse','test-id':'json-string'}","{'found':'true','position':'leading'}"
5+
"{'suite-id':'auto-parse','test-id':'json-string'}","{'found':'true','position':'wrapped'}"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
{
2+
"resourceLogs": [
3+
{
4+
"resource": {
5+
"attributes": [
6+
{
7+
"key": "suite-id",
8+
"value": {
9+
"stringValue": "auto-parse"
10+
}
11+
},
12+
{
13+
"key": "test-id",
14+
"value": {
15+
"stringValue": "json-string"
16+
}
17+
}
18+
]
19+
},
20+
"scopeLogs": [
21+
{
22+
"scope": {},
23+
"logRecords": [
24+
{
25+
"timeUnixNano": "1901999580000000000",
26+
"body": {
27+
"stringValue": "{\"attr\":{\"intValue\": 1},\"found\":false,\"message\":\"this should be parsed into a map\"}"
28+
}
29+
},
30+
{
31+
"timeUnixNano": "1901999580000000001",
32+
"attributes": [
33+
{
34+
"key": "userAttr",
35+
"value": {
36+
"boolValue": true
37+
}
38+
}
39+
],
40+
"body": {
41+
"stringValue": "{\"bodyAttr\":12345,\"message\":\"this has an existing user attribute that should be preserved.\"}"
42+
}
43+
},
44+
{
45+
"timeUnixNano": "1901999580000000002",
46+
"body": {
47+
"stringValue": "should find the trailing JSON object {\"found\":true,\"position\":\"trailing\"}"
48+
}
49+
},
50+
{
51+
"timeUnixNano": "1901999580000000003",
52+
"body": {
53+
"stringValue": "{\"found\":true,\"position\":\"leading\"} should find the leading JSON object "
54+
}
55+
},
56+
{
57+
"timeUnixNano": "1901999580000000004",
58+
"body": {
59+
"stringValue": "should find a wrapped JSON object {\"found\":true,\"position\":\"wrapped\"} between text"
60+
}
61+
}
62+
]
63+
}
64+
]
65+
}
66+
]
67+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT ResourceAttributes, LogAttributes FROM otel_logs WHERE ResourceAttributes['suite-id'] = 'auto-parse' AND ResourceAttributes['test-id'] = 'otel-map' ORDER BY TimestampTime FORMAT CSV
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"{'suite-id':'auto-parse','test-id':'otel-map'}","{'account-id':'550e8400-e29b-41d4-a716-446655440000','message':'data sent as OTEL map should also extend the log attributes','user-id':'1234'}"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{
2+
"resourceLogs": [
3+
{
4+
"resource": {
5+
"attributes": [
6+
{
7+
"key": "suite-id",
8+
"value": {
9+
"stringValue": "auto-parse"
10+
}
11+
},
12+
{
13+
"key": "test-id",
14+
"value": {
15+
"stringValue": "otel-map"
16+
}
17+
}
18+
]
19+
},
20+
"scopeLogs": [
21+
{
22+
"scope": {},
23+
"logRecords": [
24+
{
25+
"timeUnixNano": "1901999580000000000",
26+
"body": {
27+
"kvlistValue": {
28+
"values": [
29+
{
30+
"key": "message",
31+
"value": {
32+
"stringValue": "data sent as OTEL map should also extend the log attributes"
33+
}
34+
},
35+
{
36+
"key": "user-id",
37+
"value": {
38+
"stringValue": "1234"
39+
}
40+
},
41+
{
42+
"key": "account-id",
43+
"value": {
44+
"stringValue": "550e8400-e29b-41d4-a716-446655440000"
45+
}
46+
}
47+
]
48+
}
49+
}
50+
}
51+
]
52+
}
53+
]
54+
}
55+
]
56+
}
+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env bats
2+
3+
load 'test_helpers/utilities.bash'
4+
load 'test_helpers/assertions.bash'
5+
6+
setup_file() {
7+
validate_env
8+
docker compose up --build --detach
9+
wait_for_ready "otel-collector" "http://localhost:4318"
10+
}
11+
12+
teardown_file() {
13+
attempt_env_cleanup
14+
}
15+
16+
@test "JSON string body content should be parsed and stored as log attributes" {
17+
emit_otel_data "http://localhost:4318" "data/auto-parse/json-string"
18+
sleep 1
19+
assert_test_data "data/auto-parse/json-string"
20+
}
21+
22+
@test "OTEL map content should be stored as log attributes" {
23+
emit_otel_data "http://localhost:4318" "data/auto-parse/otel-map"
24+
sleep 1
25+
assert_test_data "data/auto-parse/otel-map"
26+
}
27+
28+
@test "all other content should skip storing values in log attributes" {
29+
emit_otel_data "http://localhost:4318" "data/auto-parse/default"
30+
sleep 1
31+
assert_test_data "data/auto-parse/default"
32+
}

Diff for: smoke-tests/otel-collector/test_helpers/utilities.bash

+9
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,12 @@ emit_otel_data() {
7272
fi
7373
return 0
7474
}
75+
76+
attempt_env_cleanup() {
77+
# Check if we should keep the test containers running
78+
if [[ "${SKIP_CLEANUP}" == "1" ]] || [[ "$(echo "${SKIP_CLEANUP}" | tr '[:upper:]' '[:lower:]')" == "true" ]]; then
79+
echo "🔍 SKIP_CLEANUP is set, skipping container cleanup" >&3
80+
return 0
81+
fi
82+
docker compose down
83+
}

0 commit comments

Comments
 (0)