fix: revert jsmn parser to cJSON, keep coalescing

The jsmn zero-alloc parser had token-navigation bugs that caused
all book updates to fail silently. Restore cJSON-based parsing
while preserving the coalescing architecture (accumulate dirty
symbols, evaluate once per burst).
This commit is contained in:
nicolas 2026-05-24 19:34:43 -03:00
parent 7c9b7f7ae6
commit 7afd4977ca
1 changed files with 77 additions and 172 deletions

View File

@ -11,9 +11,6 @@
#include "ws_client.h" #include "ws_client.h"
#include "http_client.h" #include "http_client.h"
#include "cJSON.h" #include "cJSON.h"
#define JSMN_STATIC
#include "jsmn.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@ -510,100 +507,30 @@ int ws_client_unsubscribe(ws_client_t *client, uint32_t conn_idx,
} }
/* /*
* Parse a KuCoin level2Depth5 book update JSON using jsmn (zero-alloc). * Parse a KuCoin level2Depth5 book update JSON (cJSON) and update the
* Topic format: /spotMarket/level2Depth5:{symbol} * in-memory order book. Topic format: /spotMarket/level2Depth5:{symbol}
* Extracts timestamp/sequence, bids, asks (each [price, size] pair). * Extracts timestamp/sequence, bids, asks (each [price, size] pair).
* Returns symbol index on success, -1 on failure. * Returns symbol index on success, -1 on failure.
* The caller is responsible for calling evaluate_symbol afterwards * The caller is responsible for calling evaluate_symbol afterwards
* (coalesced per-symbol batching is done in ws_client_read). * (coalesced per-symbol batching is done in ws_client_read).
*/ */
#define JSMN_BOOK_TOKENS 128 static int16_t parse_book_update(cJSON *root, ws_client_t *client) {
cJSON *type = cJSON_GetObjectItem(root, "type");
if (!cJSON_IsString(type) || strcmp(type->valuestring, "message") != 0)
return -1;
static bool jsmn_eq(const char *json, const jsmntok_t *tok, const char *str) { cJSON *topic = cJSON_GetObjectItem(root, "topic");
size_t len = strlen(str); cJSON *data = cJSON_GetObjectItem(root, "data");
return tok->type == JSMN_STRING if (!cJSON_IsString(topic) || !cJSON_IsObject(data))
&& (size_t)(tok->end - tok->start) == len return -1;
&& memcmp(json + tok->start, str, len) == 0;
}
static int16_t parse_book_update(ws_client_t *client, const char *payload, const char *topic_str = topic->valuestring;
size_t payload_len) { const char *sym_start = strstr(topic_str, "level2Depth5:");
jsmn_parser parser;
jsmntok_t tokens[JSMN_BOOK_TOKENS];
jsmn_init(&parser);
int ntokens = jsmn_parse(&parser, payload, payload_len,
tokens, JSMN_BOOK_TOKENS);
if (ntokens <= 0 || tokens[0].type != JSMN_OBJECT) return -1;
/* Walk top-level keys: "type", "topic", "data" */
const char *topic_start = NULL;
int topic_len = 0;
const jsmntok_t *data_obj = NULL;
int n = tokens[0].size;
int pos = 1;
for (int i = 0; i < n && pos < ntokens; i++) {
const jsmntok_t *key = &tokens[pos];
const jsmntok_t *val = &tokens[pos + 1];
if (jsmn_eq(payload, key, "type")) {
if (!jsmn_eq(payload, val, "message") && val->type != JSMN_STRING)
return -1;
} else if (jsmn_eq(payload, key, "topic")) {
topic_start = payload + val->start;
topic_len = val->end - val->start;
} else if (jsmn_eq(payload, key, "data") && val->type == JSMN_OBJECT) {
data_obj = val;
}
/* Skip value and its children */
if (val->type == JSMN_OBJECT) {
pos += 2; /* key + val */
int children = val->size * 2;
for (int c = 0; c < children && pos < ntokens; c++) {
const jsmntok_t *ck = &tokens[pos];
pos++;
if (ck->type == JSMN_OBJECT || ck->type == JSMN_ARRAY) {
int grand = ck->size;
if (ck->type == JSMN_OBJECT) grand *= 2;
for (int g = 0; g < grand && pos < ntokens; g++) pos++;
}
}
} else if (val->type == JSMN_ARRAY) {
pos += 2;
int children = val->size;
for (int c = 0; c < children && pos < ntokens; c++) {
const jsmntok_t *ca = &tokens[pos];
pos++;
if (ca->type == JSMN_OBJECT || ca->type == JSMN_ARRAY) {
int grand = ca->size;
if (ca->type == JSMN_OBJECT) grand *= 2;
for (int g = 0; g < grand && pos < ntokens; g++) pos++;
}
}
} else {
pos += 2;
}
}
if (!topic_start || !data_obj) return -1;
/* Extract symbol from topic: /spotMarket/level2Depth5:{symbol} */
const char *sym_start = NULL;
const char *marker = "/level2Depth5:";
for (int i = 0; i <= topic_len - 14; i++) {
if (memcmp(topic_start + i, marker, 14) == 0) {
sym_start = topic_start + i + 14;
break;
}
}
if (!sym_start) return -1; if (!sym_start) return -1;
sym_start += 13;
char symbol[SYMBOL_NAME_LEN] = {0}; char symbol[SYMBOL_NAME_LEN] = {0};
int sym_len = (int)(topic_start + topic_len - sym_start); strncpy(symbol, sym_start, SYMBOL_NAME_LEN - 1);
if (sym_len >= SYMBOL_NAME_LEN) sym_len = SYMBOL_NAME_LEN - 1;
memcpy(symbol, sym_start, (size_t)sym_len);
char *comma = strchr(symbol, ','); char *comma = strchr(symbol, ',');
if (comma) *comma = '\0'; if (comma) *comma = '\0';
@ -612,85 +539,67 @@ static int16_t parse_book_update(ws_client_t *client, const char *payload,
order_book_t *book = &client->books[sym_idx]; order_book_t *book = &client->books[sym_idx];
/* Walk "data" object keys: bids, asks, timestamp/sequence/time */ cJSON *ts_val = cJSON_GetObjectItem(data, "timestamp");
int nd = data_obj->size; cJSON *seq_val = cJSON_GetObjectItem(data, "sequence");
int dpos = pos; cJSON *seqNum_val = cJSON_GetObjectItem(data, "sequenceNum");
if (cJSON_IsNumber(ts_val)) book->ts_ms = (int64_t)ts_val->valuedouble;
if (!book->ts_ms && cJSON_IsNumber(seq_val))
book->ts_ms = (int64_t)seq_val->valuedouble;
if (!book->ts_ms) {
cJSON *time_val = cJSON_GetObjectItem(data, "time");
if (cJSON_IsNumber(time_val))
book->ts_ms = (int64_t)time_val->valuedouble;
}
if (cJSON_IsNumber(seq_val))
book->sequence = (int64_t)seq_val->valuedouble;
else if (cJSON_IsNumber(seqNum_val))
book->sequence = (int64_t)seqNum_val->valuedouble;
/* Reset book bid/ask counts before filling */ cJSON *bids = cJSON_GetObjectItem(data, "bids");
book->bid_count = 0; cJSON *asks = cJSON_GetObjectItem(data, "asks");
book->ask_count = 0;
for (int i = 0; i < nd && dpos < ntokens; i++) { if (cJSON_IsArray(bids)) {
const jsmntok_t *dkey = &tokens[dpos]; int count = 0;
const jsmntok_t *dval = &tokens[dpos + 1]; cJSON *bid;
cJSON_ArrayForEach(bid, bids) {
if (jsmn_eq(payload, dkey, "bids") && dval->type == JSMN_ARRAY) { if (count >= MAX_BOOK_LEVELS) break;
int n_bids = dval->size; if (cJSON_IsArray(bid) && cJSON_GetArraySize(bid) >= 2) {
int bpos = dpos + 2; cJSON *price = cJSON_GetArrayItem(bid, 0);
int count = 0; cJSON *size = cJSON_GetArrayItem(bid, 1);
for (int b = 0; b < n_bids && count < MAX_BOOK_LEVELS && bpos < ntokens; b++) { double p = cJSON_IsNumber(price) ? price->valuedouble :
const jsmntok_t *entry = &tokens[bpos]; cJSON_IsString(price) ? atof(price->valuestring) : 0.0;
if (entry->type == JSMN_ARRAY && entry->size >= 2) { double s = cJSON_IsNumber(size) ? size->valuedouble :
const jsmntok_t *p = &tokens[bpos + 1]; cJSON_IsString(size) ? atof(size->valuestring) : 0.0;
const jsmntok_t *s = &tokens[bpos + 2]; if (p > 0 && s > 0) {
double price = atof(payload + p->start); book->bids[count][0] = p;
double size = atof(payload + s->start); book->bids[count][1] = s;
if (price > 0 && size > 0) { count++;
book->bids[count][0] = price;
book->bids[count][1] = size;
count++;
}
} }
/* Skip entry array + its children */
bpos += 1 + entry->size;
}
book->bid_count = (uint8_t)count;
dpos = bpos;
} else if (jsmn_eq(payload, dkey, "asks") && dval->type == JSMN_ARRAY) {
int n_asks = dval->size;
int apos = dpos + 2;
int count = 0;
for (int a = 0; a < n_asks && count < MAX_BOOK_LEVELS && apos < ntokens; a++) {
const jsmntok_t *entry = &tokens[apos];
if (entry->type == JSMN_ARRAY && entry->size >= 2) {
const jsmntok_t *p = &tokens[apos + 1];
const jsmntok_t *s = &tokens[apos + 2];
double price = atof(payload + p->start);
double size = atof(payload + s->start);
if (price > 0 && size > 0) {
book->asks[count][0] = price;
book->asks[count][1] = size;
count++;
}
}
apos += 1 + entry->size;
}
book->ask_count = (uint8_t)count;
dpos = apos;
} else if (jsmn_eq(payload, dkey, "timestamp") ||
jsmn_eq(payload, dkey, "time")) {
if (!book->ts_ms)
book->ts_ms = (int64_t)atof(payload + dval->start);
dpos += 2;
} else if (jsmn_eq(payload, dkey, "sequence") ||
jsmn_eq(payload, dkey, "sequenceNum")) {
book->sequence = (int64_t)atof(payload + dval->start);
if (!book->ts_ms)
book->ts_ms = book->sequence;
dpos += 2;
} else {
/* Skip unknown fields */
if (dval->type == JSMN_OBJECT) {
dpos += 2 + dval->size * 2;
} else if (dval->type == JSMN_ARRAY) {
dpos += 2;
for (int c = 0; c < dval->size && dpos < ntokens; c++) {
dpos += 1 + tokens[dpos].size;
}
} else {
dpos += 2;
} }
} }
book->bid_count = (uint8_t)count;
}
if (cJSON_IsArray(asks)) {
int count = 0;
cJSON *ask;
cJSON_ArrayForEach(ask, asks) {
if (count >= MAX_BOOK_LEVELS) break;
if (cJSON_IsArray(ask) && cJSON_GetArraySize(ask) >= 2) {
cJSON *price = cJSON_GetArrayItem(ask, 0);
cJSON *size = cJSON_GetArrayItem(ask, 1);
double p = cJSON_IsNumber(price) ? price->valuedouble :
cJSON_IsString(price) ? atof(price->valuestring) : 0.0;
double s = cJSON_IsNumber(size) ? size->valuedouble :
cJSON_IsString(size) ? atof(size->valuestring) : 0.0;
if (p > 0 && s > 0) {
book->asks[count][0] = p;
book->asks[count][1] = s;
count++;
}
}
}
book->ask_count = (uint8_t)count;
} }
book->symbol_idx = (uint16_t)sym_idx; book->symbol_idx = (uint16_t)sym_idx;
@ -754,17 +663,6 @@ int16_t ws_client_process_frame(ws_client_t *client, uint32_t conn_idx) {
} }
if (opcode == 0x1) { if (opcode == 0x1) {
/* Quick check: book updates start with {"type":"message" */
if (payload_len > 18 &&
memcmp(payload, "{\"type\":\"message\"", 16) == 0) {
int16_t sym_idx = parse_book_update(client,
(const char *)payload, payload_len);
conn->frame_payload_len = 0;
conn->frame_finished = false;
return sym_idx;
}
/* Other JSON messages: use cJSON (welcome, ack, error) */
cJSON *msg_root = cJSON_ParseWithLength((const char *)payload, payload_len); cJSON *msg_root = cJSON_ParseWithLength((const char *)payload, payload_len);
if (!msg_root) { if (!msg_root) {
static int parse_fails = 0; static int parse_fails = 0;
@ -777,6 +675,8 @@ int16_t ws_client_process_frame(ws_client_t *client, uint32_t conn_idx) {
} }
cJSON *msg_type = cJSON_GetObjectItem(msg_root, "type"); cJSON *msg_type = cJSON_GetObjectItem(msg_root, "type");
int16_t sym_idx = -1;
if (cJSON_IsString(msg_type)) { if (cJSON_IsString(msg_type)) {
if (strcmp(msg_type->valuestring, "welcome") == 0) { if (strcmp(msg_type->valuestring, "welcome") == 0) {
log_write("[WS] Welcome message received\n"); log_write("[WS] Welcome message received\n");
@ -784,6 +684,8 @@ int16_t ws_client_process_frame(ws_client_t *client, uint32_t conn_idx) {
static int ack_count = 0; static int ack_count = 0;
if (++ack_count <= 5) log_write("[WS] Ack #%d: %.*s\n", ack_count, if (++ack_count <= 5) log_write("[WS] Ack #%d: %.*s\n", ack_count,
(int)(payload_len > 200 ? 200 : payload_len), (const char *)payload); (int)(payload_len > 200 ? 200 : payload_len), (const char *)payload);
} else if (strcmp(msg_type->valuestring, "message") == 0) {
sym_idx = parse_book_update(msg_root, client);
} else if (strcmp(msg_type->valuestring, "error") == 0) { } else if (strcmp(msg_type->valuestring, "error") == 0) {
log_write("[WS] Error message: %.*s\n", log_write("[WS] Error message: %.*s\n",
(int)(payload_len > 200 ? 200 : payload_len), (const char *)payload); (int)(payload_len > 200 ? 200 : payload_len), (const char *)payload);
@ -791,6 +693,9 @@ int16_t ws_client_process_frame(ws_client_t *client, uint32_t conn_idx) {
} }
cJSON_Delete(msg_root); cJSON_Delete(msg_root);
conn->frame_payload_len = 0;
conn->frame_finished = false;
return sym_idx;
} }
conn->frame_payload_len = 0; conn->frame_payload_len = 0;