From 7a9c0444c055bbe0205c8d06e97587e208f1cfaf Mon Sep 17 00:00:00 2001 From: PGross Date: Wed, 18 Dec 2024 22:37:19 +0100 Subject: [PATCH 01/11] added result serializer --- CMakeLists.txt | 1 + src/httpserver_extension.cpp | 17 +-- src/include/httpserver_extension.hpp | 2 +- src/include/result_serializer.hpp | 106 ++++++++++++++ src/result_serializer.cpp | 203 +++++++++++++++++++++++++++ 5 files changed, 313 insertions(+), 16 deletions(-) create mode 100644 src/include/result_serializer.hpp create mode 100644 src/result_serializer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 435c3dc..54c5cd4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ add_custom_command( set(EXTENSION_SOURCES src/httpserver_extension.cpp + src/result_serializer.cpp ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp ) diff --git a/src/httpserver_extension.cpp b/src/httpserver_extension.cpp index 65a9aa0..2d377b0 100644 --- a/src/httpserver_extension.cpp +++ b/src/httpserver_extension.cpp @@ -95,21 +95,8 @@ static std::string ConvertResultToJSON(MaterializedQueryResult &result, ReqStats } yyjson_mut_obj_add_val(doc, root, "meta", meta_array); - // Add data - auto data_array = yyjson_mut_arr(doc); - for (idx_t row = 0; row < result.RowCount(); ++row) { - auto row_array = yyjson_mut_arr(doc); - for (idx_t col = 0; col < result.ColumnCount(); ++col) { - Value value = result.GetValue(col, row); - if (value.IsNull()) { - yyjson_mut_arr_append(row_array, yyjson_mut_null(doc)); - } else { - std::string value_str = value.ToString(); - yyjson_mut_arr_append(row_array, yyjson_mut_strncpy(doc, value_str.c_str(), value_str.length())); - } - } - yyjson_mut_arr_append(data_array, row_array); - } + ResultSerializer serializer; + auto data_array = serializer.Serialize(result); yyjson_mut_obj_add_val(doc, root, "data", data_array); // Add row count diff --git a/src/include/httpserver_extension.hpp b/src/include/httpserver_extension.hpp index 432d1c0..85f08c5 100644 --- a/src/include/httpserver_extension.hpp +++ b/src/include/httpserver_extension.hpp @@ -1,7 +1,7 @@ #pragma once #include "duckdb.hpp" -#include "duckdb/common/file_system.hpp" +#include "result_serializer.hpp" namespace duckdb { diff --git a/src/include/result_serializer.hpp b/src/include/result_serializer.hpp new file mode 100644 index 0000000..2c48f21 --- /dev/null +++ b/src/include/result_serializer.hpp @@ -0,0 +1,106 @@ +#pragma once + +#include "duckdb/common/extra_type_info.hpp" +#include "duckdb/common/types/uuid.hpp" +#include "duckdb/main/query_result.hpp" +#include "yyjson.hpp" + +#include + +using namespace duckdb_yyjson; + +namespace duckdb { + +class SerializationResult { +public: + virtual ~SerializationResult() = default; + virtual bool IsSuccess() = 0; + virtual string WithSuccessField() = 0; + virtual string Raw() = 0; + + void Print() { + std::cerr << WithSuccessField() << std::endl; + } + + template + TARGET &Cast() { + DynamicCastCheck(this); + return reinterpret_cast(*this); + } +}; + +class SerializationSuccess final : public SerializationResult { +public: + explicit SerializationSuccess(string serialized) : serialized(std::move(serialized)) { + } + + bool IsSuccess() override { + return true; + } + + string Raw() override { + return serialized; + } + + string WithSuccessField() override { + return R"({"success": true, "data": )" + serialized + "}"; + } + +private: + string serialized; +}; + +class SerializationError final : public SerializationResult { +public: + explicit SerializationError(string message) : message(std::move(message)) { + } + + bool IsSuccess() override { + return false; + } + + string Raw() override { + return message; + } + + string WithSuccessField() override { + return R"({"success": false, "message": ")" + message + "\"}"; + } + +private: + string message; +}; + +class ResultSerializer { +public: + explicit ResultSerializer(const bool _set_invalid_values_to_null = false) + : set_invalid_values_to_null(_set_invalid_values_to_null) { + doc = yyjson_mut_doc_new(nullptr); + root = yyjson_mut_arr(doc); + if (!root) { + throw InternalException("Could not create yyjson array"); + } + yyjson_mut_doc_set_root(doc, root); + } + unique_ptr result; + + ~ResultSerializer() { + yyjson_mut_doc_free(doc); + } + + void SerializeChunk(const DataChunk &chunk, vector &names, vector &types); + + yyjson_mut_val* Serialize(QueryResult &query_result); + +private: + + + void SerializeValue( + yyjson_mut_val *parent, const Value &value, optional_ptr name, const LogicalType &type + ); + + yyjson_mut_doc *doc; + yyjson_mut_val *root; + bool set_invalid_values_to_null; +}; +} // namespace duckdb diff --git a/src/result_serializer.cpp b/src/result_serializer.cpp new file mode 100644 index 0000000..b9b5e18 --- /dev/null +++ b/src/result_serializer.cpp @@ -0,0 +1,203 @@ +#include "result_serializer.hpp" + + +// ReSharper disable once CppPassValueParameterByConstReference +yyjson_mut_val* duckdb::ResultSerializer::Serialize(QueryResult &query_result) { + auto chunk = query_result.Fetch(); + auto names = query_result.names; + auto types = query_result.types; + while (chunk) { + SerializeChunk(*chunk, names, types); + chunk = query_result.Fetch(); + } + + return root; +} + +void duckdb::ResultSerializer::SerializeChunk( + const DataChunk &chunk, vector &names, vector &types +) { + const auto column_count = chunk.ColumnCount(); + const auto row_count = chunk.size(); + + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + + // Which itself contains an object + // ReSharper disable once CppLocalVariableMayBeConst + auto obj = yyjson_mut_arr(doc); + + for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + + auto value = chunk.GetValue(col_idx, row_idx); + auto &type = types[col_idx]; + SerializeValue(obj, value, nullptr, type); + } + if (!yyjson_mut_arr_append(root, obj)) { + throw InternalException("Could not add object to yyjson array"); + } + } +} + +void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) + yyjson_mut_val *parent, const Value &value, optional_ptr name, const LogicalType &type +) { + yyjson_mut_val *val = nullptr; + + if (value.IsNull()) { + goto null_handle; + } + + switch (type.id()) { + case LogicalTypeId::SQLNULL: + null_handle: + val = yyjson_mut_null(doc); + break; + case LogicalTypeId::BOOLEAN: + val = yyjson_mut_bool(doc, value.GetValue()); + break; + case LogicalTypeId::TINYINT: + case LogicalTypeId::SMALLINT: + case LogicalTypeId::INTEGER: + case LogicalTypeId::BIGINT: + case LogicalTypeId::INTEGER_LITERAL: + val = yyjson_mut_int(doc, value.GetValue()); + break; + case LogicalTypeId::UTINYINT: + case LogicalTypeId::USMALLINT: + case LogicalTypeId::UINTEGER: + case LogicalTypeId::UBIGINT: + val = yyjson_mut_uint(doc, value.GetValue()); + break; + case LogicalTypeId::FLOAT: + case LogicalTypeId::DOUBLE: + case LogicalTypeId::DECIMAL: { + const auto real_val = value.GetValue(); + if (std::isnan(real_val) || std::isinf(real_val)) { + if (set_invalid_values_to_null) { + goto null_handle; + } + throw InvalidTypeException("NaN, Infinity, -Infinity are not supported"); + } + + val = yyjson_mut_real(doc, real_val); + break; + } + // Data + time + case LogicalTypeId::DATE: + case LogicalTypeId::TIME: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::TIMESTAMP_TZ: + case LogicalTypeId::TIME_TZ: + // Enum + case LogicalTypeId::ENUM: + // Strings + case LogicalTypeId::CHAR: + case LogicalTypeId::VARCHAR: + case LogicalTypeId::STRING_LITERAL: + val = yyjson_mut_strcpy(doc, value.GetValue().c_str()); + break; + case LogicalTypeId::UUID: { + const auto uuid_int = value.GetValue(); + const auto uuid = UUID::ToString(uuid_int); + val = yyjson_mut_strcpy(doc, uuid.c_str()); + break; + } + // Weird special types that are jus serialized to string + case LogicalTypeId::INTERVAL: + // TODO perhaps base64 encode blob? + case LogicalTypeId::BLOB: + case LogicalTypeId::BIT: + val = yyjson_mut_strcpy(doc, value.ToString().c_str()); + break; + case LogicalTypeId::UNION: { + auto &union_val = UnionValue::GetValue(value); + SerializeValue(parent, union_val, name, union_val.type()); + return; + } + case LogicalTypeId::ARRAY: + case LogicalTypeId::LIST: { + const auto get_children = LogicalTypeId::LIST == type.id() ? ListValue::GetChildren : ArrayValue::GetChildren; + auto &children = get_children(value); + val = yyjson_mut_arr(doc); + for (auto &child : children) { + SerializeValue(val, child, nullptr, child.type()); + } + break; + } + case LogicalTypeId::STRUCT: { + const auto &children = StructValue::GetChildren(value); + const auto &type_info = value.type().AuxInfo()->Cast(); + + auto all_keys_are_empty = true; + for (uint64_t idx = 0; idx < children.size(); ++idx) { + if (!type_info.child_types[idx].first.empty()) { + all_keys_are_empty = false; + break; + } + } + + // Unnamed struct -> just create tuples + if (all_keys_are_empty) { + val = yyjson_mut_arr(doc); + for (auto &child : children) { + SerializeValue(val, child, nullptr, child.type()); + } + } else { + val = yyjson_mut_obj(doc); + for (uint64_t idx = 0; idx < children.size(); ++idx) { + string struct_name = type_info.child_types[idx].first; + SerializeValue(val, children[idx], struct_name, type_info.child_types[idx].second); + } + } + + break; + } + // Not implemented types + case LogicalTypeId::MAP: { + auto &children = ListValue::GetChildren(value); + val = yyjson_mut_obj(doc); + for (auto &item : children) { + auto &key_value = StructValue::GetChildren(item); + D_ASSERT(key_value.size() == 2); + auto key_str = key_value[0].GetValue(); + SerializeValue(val, key_value[1], key_str, key_value[1].type()); + } + break; + } + // Unsupported types + case LogicalTypeId::TABLE: + case LogicalTypeId::UHUGEINT: + case LogicalTypeId::HUGEINT: + case LogicalTypeId::POINTER: + case LogicalTypeId::VALIDITY: + case LogicalTypeId::AGGREGATE_STATE: + case LogicalTypeId::LAMBDA: + case LogicalTypeId::USER: + case LogicalTypeId::ANY: + case LogicalTypeId::UNKNOWN: + case LogicalTypeId::INVALID: + if (set_invalid_values_to_null) { + goto null_handle; + } + throw InvalidTypeException("Type " + type.ToString() + " not supported"); + } + + D_ASSERT(val); + if (!name) { + if (!yyjson_mut_arr_append(parent, val)) { + throw InternalException("Could not add value to yyjson array"); + } + } else { + yyjson_mut_val *key = yyjson_mut_strcpy(doc, name->c_str()); + D_ASSERT(key); + if (!yyjson_mut_obj_add(parent, key, val)) { + throw InternalException("Could not add value to yyjson object"); + } + if (!yyjson_mut_arr_add_val(parent, val)) { + throw InternalException("Could not add value to yyjson object"); + } + } +} \ No newline at end of file From e9aea60d78b4997c55746b1de382e62d12d139ce Mon Sep 17 00:00:00 2001 From: PGross Date: Wed, 18 Dec 2024 23:46:14 +0100 Subject: [PATCH 02/11] fixed some bugs for exotic types --- src/httpserver_extension.cpp | 2 +- src/result_serializer.cpp | 40 +++++++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/src/httpserver_extension.cpp b/src/httpserver_extension.cpp index 2d377b0..8367af9 100644 --- a/src/httpserver_extension.cpp +++ b/src/httpserver_extension.cpp @@ -100,7 +100,7 @@ static std::string ConvertResultToJSON(MaterializedQueryResult &result, ReqStats yyjson_mut_obj_add_val(doc, root, "data", data_array); // Add row count - yyjson_mut_obj_add_int(doc, root, "rows", result.RowCount()); + yyjson_mut_obj_add_uint(doc, root, "rows", result.RowCount()); //"statistics":{"elapsed":0.00031403,"rows_read":1,"bytes_read":0}} auto stat_obj = yyjson_mut_obj_add_obj(doc, root, "statistics"); yyjson_mut_obj_add_real(doc, stat_obj, "elapsed", req_stats.elapsed_sec); diff --git a/src/result_serializer.cpp b/src/result_serializer.cpp index b9b5e18..824e5fd 100644 --- a/src/result_serializer.cpp +++ b/src/result_serializer.cpp @@ -68,6 +68,19 @@ void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) case LogicalTypeId::UBIGINT: val = yyjson_mut_uint(doc, value.GetValue()); break; + + // format to big numbers as strings + case LogicalTypeId::UHUGEINT: { + const uhugeint_t uHugeIntNumber = value.GetValue(); + val = yyjson_mut_strcpy(doc, uHugeIntNumber.ToString().c_str()); + break; + } + case LogicalTypeId::HUGEINT: { + const hugeint_t hugeIntNumber = value.GetValue(); + val = yyjson_mut_strcpy(doc, hugeIntNumber.ToString().c_str()); + break; + } + case LogicalTypeId::FLOAT: case LogicalTypeId::DOUBLE: case LogicalTypeId::DECIMAL: { @@ -75,12 +88,17 @@ void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) if (std::isnan(real_val) || std::isinf(real_val)) { if (set_invalid_values_to_null) { goto null_handle; + } else { + const auto castedValue = value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue(); + val = yyjson_mut_strcpy(doc, castedValue.c_str()); + break; } - throw InvalidTypeException("NaN, Infinity, -Infinity are not supported"); + } else { + val = yyjson_mut_real(doc, real_val); + break; + } - val = yyjson_mut_real(doc, real_val); - break; } // Data + time case LogicalTypeId::DATE: @@ -99,6 +117,10 @@ void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) case LogicalTypeId::STRING_LITERAL: val = yyjson_mut_strcpy(doc, value.GetValue().c_str()); break; + case LogicalTypeId::VARINT: + val = yyjson_mut_strcpy(doc, value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue().c_str()); + break; + // UUID case LogicalTypeId::UUID: { const auto uuid_int = value.GetValue(); const auto uuid = UUID::ToString(uuid_int); @@ -167,10 +189,9 @@ void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) } break; } - // Unsupported types + + // Unsupported types case LogicalTypeId::TABLE: - case LogicalTypeId::UHUGEINT: - case LogicalTypeId::HUGEINT: case LogicalTypeId::POINTER: case LogicalTypeId::VALIDITY: case LogicalTypeId::AGGREGATE_STATE: @@ -185,7 +206,9 @@ void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) throw InvalidTypeException("Type " + type.ToString() + " not supported"); } - D_ASSERT(val); + if (!val) { + throw InternalException("Could not serialize value of type " + type.ToString()); + } if (!name) { if (!yyjson_mut_arr_append(parent, val)) { throw InternalException("Could not add value to yyjson array"); @@ -196,8 +219,5 @@ void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) if (!yyjson_mut_obj_add(parent, key, val)) { throw InternalException("Could not add value to yyjson object"); } - if (!yyjson_mut_arr_add_val(parent, val)) { - throw InternalException("Could not add value to yyjson object"); - } } } \ No newline at end of file From 053b45e21bb5644594ec58b02ed879e7c3256ad8 Mon Sep 17 00:00:00 2001 From: PGross Date: Wed, 18 Dec 2024 23:53:11 +0100 Subject: [PATCH 03/11] use internal function for type names --- src/httpserver_extension.cpp | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/src/httpserver_extension.cpp b/src/httpserver_extension.cpp index 8367af9..67fb58d 100644 --- a/src/httpserver_extension.cpp +++ b/src/httpserver_extension.cpp @@ -40,37 +40,11 @@ struct HttpServerState { static HttpServerState global_state; -std::string GetColumnType(MaterializedQueryResult &result, idx_t column) { +std::string GetColumnTypeName(MaterializedQueryResult &result, idx_t column) { if (result.RowCount() == 0) { return "String"; } - switch (result.types[column].id()) { - case LogicalTypeId::FLOAT: - return "Float"; - case LogicalTypeId::DOUBLE: - return "Double"; - case LogicalTypeId::INTEGER: - return "Int32"; - case LogicalTypeId::BIGINT: - return "Int64"; - case LogicalTypeId::UINTEGER: - return "UInt32"; - case LogicalTypeId::UBIGINT: - return "UInt64"; - case LogicalTypeId::VARCHAR: - return "String"; - case LogicalTypeId::TIME: - return "DateTime"; - case LogicalTypeId::DATE: - return "Date"; - case LogicalTypeId::TIMESTAMP: - return "DateTime"; - case LogicalTypeId::BOOLEAN: - return "Int8"; - default: - return "String"; - } - return "String"; + return result.types[column].ToString(); } struct ReqStats { @@ -90,7 +64,7 @@ static std::string ConvertResultToJSON(MaterializedQueryResult &result, ReqStats auto column_obj = yyjson_mut_obj(doc); yyjson_mut_obj_add_str(doc, column_obj, "name", result.ColumnName(col).c_str()); yyjson_mut_arr_append(meta_array, column_obj); - std::string tp(GetColumnType(result, col)); + std::string tp(GetColumnTypeName(result, col)); yyjson_mut_obj_add_strcpy(doc, column_obj, "type", tp.c_str()); } yyjson_mut_obj_add_val(doc, root, "meta", meta_array); From 3546724a207efff0c9f10397ed06ea65e2e15e1d Mon Sep 17 00:00:00 2001 From: PGross Date: Thu, 19 Dec 2024 01:11:51 +0100 Subject: [PATCH 04/11] added missing cmath include --- src/result_serializer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/result_serializer.cpp b/src/result_serializer.cpp index 824e5fd..c38f94d 100644 --- a/src/result_serializer.cpp +++ b/src/result_serializer.cpp @@ -1,4 +1,5 @@ #include "result_serializer.hpp" +#include // ReSharper disable once CppPassValueParameterByConstReference From 6ed45076d0ee9ebde6449f62af90a9ad4ed83f5e Mon Sep 17 00:00:00 2001 From: Niclas Haderer Date: Mon, 23 Dec 2024 13:46:43 +0100 Subject: [PATCH 05/11] Added e2e tests --- http_api/__init__.py | 0 http_api/client.py | 38 ++++ http_api/conftest.py | 34 ++++ http_api/const.py | 11 ++ http_api/responses/all_types_compact.py | 233 ++++++++++++++++++++++++ http_api/test_json_compact_all_types.py | 10 + requirements.txt | 2 + 7 files changed, 328 insertions(+) create mode 100644 http_api/__init__.py create mode 100644 http_api/client.py create mode 100644 http_api/conftest.py create mode 100644 http_api/const.py create mode 100644 http_api/responses/all_types_compact.py create mode 100644 http_api/test_json_compact_all_types.py create mode 100644 requirements.txt diff --git a/http_api/__init__.py b/http_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/http_api/client.py b/http_api/client.py new file mode 100644 index 0000000..9b5d6ac --- /dev/null +++ b/http_api/client.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from enum import Enum + +import httpx +from httpx import BasicAuth + + +class ResponseFormat(Enum): + ND_JSON = "JSONEachRow" + JSON = "JSON" + COMPACT_JSON = "JSONCompact" + + +class Client: + def __init__(self, url: str, basic_auth: str | None = None, token_auth: str | None = None): + assert basic_auth is not None or token_auth is not None, "Set either basic_auth xor token_auth" + assert not (basic_auth is not None and token_auth is not None), "Set either basic_auth xor token_auth" + + self._url = url + self._basic_auth = basic_auth + self._token_auth = token_auth + + def execute_query(self, sql: str, response_format: ResponseFormat): + headers = {"format": response_format.value} + + if self._token_auth: + headers["X-API-Key"] = self._token_auth + + auth = None + if self._basic_auth: + username, password = self._basic_auth.split(":") + auth = BasicAuth(username, password) + + with httpx.Client() as client: + response = client.get(self._url, params={"q": sql}, headers=headers, auth=auth) + response.raise_for_status() + return response.json() diff --git a/http_api/conftest.py b/http_api/conftest.py new file mode 100644 index 0000000..548306f --- /dev/null +++ b/http_api/conftest.py @@ -0,0 +1,34 @@ +import subprocess + +import pytest + +from .client import Client +from .const import DEBUG_SHELL, HOST, PORT, API_KEY + + + +@pytest.fixture +def http_duck_with_token(): + process = subprocess.Popen( + [ + DEBUG_SHELL, + ], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + ) + + # Load the extension + process.stdin.write("LOAD httpserver;\n") + cmd = f"SELECT httpserve_start('{HOST}', {PORT}, '{API_KEY}');\n" + process.stdin.write(cmd) + yield + + process.kill() + + +@pytest.fixture +def token_client(): + return Client(f"http://{HOST}:{PORT}", token_auth=API_KEY) diff --git a/http_api/const.py b/http_api/const.py new file mode 100644 index 0000000..5a4c0b5 --- /dev/null +++ b/http_api/const.py @@ -0,0 +1,11 @@ +import os + +PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +DEBUG_SHELL = f"{PROJECT_DIR}/build/debug/duckdb" +RELEASE_SHELL = f"{PROJECT_DIR}/build/release/duckdb" + +HOST = "localhost" +PORT = 9999 +API_KEY = "my_api_key" +BASIC_AUTH = "admin:admin" diff --git a/http_api/responses/all_types_compact.py b/http_api/responses/all_types_compact.py new file mode 100644 index 0000000..3724067 --- /dev/null +++ b/http_api/responses/all_types_compact.py @@ -0,0 +1,233 @@ +ALL_TYPES_COMPACT = { + "meta": [ + {"name": "bool", "type": "BOOLEAN"}, + {"name": "tinyint", "type": "TINYINT"}, + {"name": "smallint", "type": "SMALLINT"}, + {"name": "int", "type": "INTEGER"}, + {"name": "bigint", "type": "BIGINT"}, + {"name": "hugeint", "type": "HUGEINT"}, + {"name": "uhugeint", "type": "UHUGEINT"}, + {"name": "utinyint", "type": "UTINYINT"}, + {"name": "usmallint", "type": "USMALLINT"}, + {"name": "uint", "type": "UINTEGER"}, + {"name": "ubigint", "type": "UBIGINT"}, + {"name": "varint", "type": "VARINT"}, + {"name": "date", "type": "DATE"}, + {"name": "time", "type": "TIME"}, + {"name": "timestamp", "type": "TIMESTAMP"}, + {"name": "timestamp_s", "type": "TIMESTAMP_S"}, + {"name": "timestamp_ms", "type": "TIMESTAMP_MS"}, + {"name": "timestamp_ns", "type": "TIMESTAMP_NS"}, + {"name": "time_tz", "type": "TIME WITH TIME ZONE"}, + {"name": "timestamp_tz", "type": "TIMESTAMP WITH TIME ZONE"}, + {"name": "float", "type": "FLOAT"}, + {"name": "double", "type": "DOUBLE"}, + {"name": "dec_4_1", "type": "DECIMAL(4,1)"}, + {"name": "dec_9_4", "type": "DECIMAL(9,4)"}, + {"name": "dec_18_6", "type": "DECIMAL(18,6)"}, + {"name": "dec38_10", "type": "DECIMAL(38,10)"}, + {"name": "uuid", "type": "UUID"}, + {"name": "interval", "type": "INTERVAL"}, + {"name": "varchar", "type": "VARCHAR"}, + {"name": "blob", "type": "BLOB"}, + {"name": "bit", "type": "BIT"}, + {"name": "small_enum", "type": "ENUM('DUCK_DUCK_ENUM', 'GOOSE')"}, + { + "name": "medium_enum", + "type": "ENUM('enum_0', 'enum_1', 'enum_2', 'enum_3', 'enum_4', 'enum_5', 'enum_6', 'enum_7', 'enum_8', 'enum_9', 'enum_10', 'enum_11', 'enum_12', 'enum_13', 'enum_14', 'enum_15', 'enum_16', 'enum_17', 'enum_18', 'enum_19', 'enum_20', 'enum_21', 'enum_22', 'enum_23', 'enum_24', 'enum_25', 'enum_26', 'enum_27', 'enum_28', 'enum_29', 'enum_30', 'enum_31', 'enum_32', 'enum_33', 'enum_34', 'enum_35', 'enum_36', 'enum_37', 'enum_38', 'enum_39', 'enum_40', 'enum_41', 'enum_42', 'enum_43', 'enum_44', 'enum_45', 'enum_46', 'enum_47', 'enum_48', 'enum_49', 'enum_50', 'enum_51', 'enum_52', 'enum_53', 'enum_54', 'enum_55', 'enum_56', 'enum_57', 'enum_58', 'enum_59', 'enum_60', 'enum_61', 'enum_62', 'enum_63', 'enum_64', 'enum_65', 'enum_66', 'enum_67', 'enum_68', 'enum_69', 'enum_70', 'enum_71', 'enum_72', 'enum_73', 'enum_74', 'enum_75', 'enum_76', 'enum_77', 'enum_78', 'enum_79', 'enum_80', 'enum_81', 'enum_82', 'enum_83', 'enum_84', 'enum_85', 'enum_86', 'enum_87', 'enum_88', 'enum_89', 'enum_90', 'enum_91', 'enum_92', 'enum_93', 'enum_94', 'enum_95', 'enum_96', 'enum_97', 'enum_98', 'enum_99', 'enum_100', 'enum_101', 'enum_102', 'enum_103', 'enum_104', 'enum_105', 'enum_106', 'enum_107', 'enum_108', 'enum_109', 'enum_110', 'enum_111', 'enum_112', 'enum_113', 'enum_114', 'enum_115', 'enum_116', 'enum_117', 'enum_118', 'enum_119', 'enum_120', 'enum_121', 'enum_122', 'enum_123', 'enum_124', 'enum_125', 'enum_126', 'enum_127', 'enum_128', 'enum_129', 'enum_130', 'enum_131', 'enum_132', 'enum_133', 'enum_134', 'enum_135', 'enum_136', 'enum_137', 'enum_138', 'enum_139', 'enum_140', 'enum_141', 'enum_142', 'enum_143', 'enum_144', 'enum_145', 'enum_146', 'enum_147', 'enum_148', 'enum_149', 'enum_150', 'enum_151', 'enum_152', 'enum_153', 'enum_154', 'enum_155', 'enum_156', 'enum_157', 'enum_158', 'enum_159', 'enum_160', 'enum_161', 'enum_162', 'enum_163', 'enum_164', 'enum_165', 'enum_166', 'enum_167', 'enum_168', 'enum_169', 'enum_170', 'enum_171', 'enum_172', 'enum_173', 'enum_174', 'enum_175', 'enum_176', 'enum_177', 'enum_178', 'enum_179', 'enum_180', 'enum_181', 'enum_182', 'enum_183', 'enum_184', 'enum_185', 'enum_186', 'enum_187', 'enum_188', 'enum_189', 'enum_190', 'enum_191', 'enum_192', 'enum_193', 'enum_194', 'enum_195', 'enum_196', 'enum_197', 'enum_198', 'enum_199', 'enum_200', 'enum_201', 'enum_202', 'enum_203', 'enum_204', 'enum_205', 'enum_206', 'enum_207', 'enum_208', 'enum_209', 'enum_210', 'enum_211', 'enum_212', 'enum_213', 'enum_214', 'enum_215', 'enum_216', 'enum_217', 'enum_218', 'enum_219', 'enum_220', 'enum_221', 'enum_222', 'enum_223', 'enum_224', 'enum_225', 'enum_226', 'enum_227', 'enum_228', 'enum_229', 'enum_230', 'enum_231', 'enum_232', 'enum_233', 'enum_234', 'enum_235', 'enum_236', 'enum_237', 'enum_238', 'enum_239', 'enum_240', 'enum_241', 'enum_242', 'enum_243', 'enum_244', 'enum_245', 'enum_246', 'enum_247', 'enum_248', 'enum_249', 'enum_250', 'enum_251', 'enum_252', 'enum_253', 'enum_254', 'enum_255', 'enum_256', 'enum_257', 'enum_258', 'enum_259', 'enum_260', 'enum_261', 'enum_262', 'enum_263', 'enum_264', 'enum_265', 'enum_266', 'enum_267', 'enum_268', 'enum_269', 'enum_270', 'enum_271', 'enum_272', 'enum_273', 'enum_274', 'enum_275', 'enum_276', 'enum_277', 'enum_278', 'enum_279', 'enum_280', 'enum_281', 'enum_282', 'enum_283', 'enum_284', 'enum_285', 'enum_286', 'enum_287', 'enum_288', 'enum_289', 'enum_290', 'enum_291', 'enum_292', 'enum_293', 'enum_294', 'enum_295', 'enum_296', 'enum_297', 'enum_298', 'enum_299')", + }, + {"name": "large_enum", "type": "ENUM('enum_0', 'enum_69999')"}, + {"name": "int_array", "type": "INTEGER[]"}, + {"name": "double_array", "type": "DOUBLE[]"}, + {"name": "date_array", "type": "DATE[]"}, + {"name": "timestamp_array", "type": "TIMESTAMP[]"}, + {"name": "timestamptz_array", "type": "TIMESTAMP WITH TIME ZONE[]"}, + {"name": "varchar_array", "type": "VARCHAR[]"}, + {"name": "nested_int_array", "type": "INTEGER[][]"}, + {"name": "struct", "type": "STRUCT(a INTEGER, b VARCHAR)"}, + {"name": "struct_of_arrays", "type": "STRUCT(a INTEGER[], b VARCHAR[])"}, + {"name": "array_of_structs", "type": "STRUCT(a INTEGER, b VARCHAR)[]"}, + {"name": "map", "type": "MAP(VARCHAR, VARCHAR)"}, + {"name": "union", "type": 'UNION("name" VARCHAR, age SMALLINT)'}, + {"name": "fixed_int_array", "type": "INTEGER[3]"}, + {"name": "fixed_varchar_array", "type": "VARCHAR[3]"}, + {"name": "fixed_nested_int_array", "type": "INTEGER[3][3]"}, + {"name": "fixed_nested_varchar_array", "type": "VARCHAR[3][3]"}, + {"name": "fixed_struct_array", "type": "STRUCT(a INTEGER, b VARCHAR)[3]"}, + {"name": "struct_of_fixed_array", "type": "STRUCT(a INTEGER[3], b VARCHAR[3])"}, + {"name": "fixed_array_of_int_list", "type": "INTEGER[][3]"}, + {"name": "list_of_fixed_int_array", "type": "INTEGER[3][]"}, + ], + "data": [ + [ + False, + -128, + -32768, + -2147483648, + -9223372036854775808, + "-170141183460469231731687303715884105728", + "0", + 0, + 0, + 0, + 0, + "-179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368", + "5877642-06-25 (BC)", + "00:00:00", + "290309-12-22 (BC) 00:00:00", + "290309-12-22 (BC) 00:00:00", + "290309-12-22 (BC) 00:00:00", + "1677-09-22 00:00:00", + "00:00:00+15:59:59", + "290309-12-22 (BC) 00:00:00+00", + -3.4028234663852886e38, + -1.7976931348623157e308, + -999.9, + -99999.9999, + -1000000000000.0, + -1e28, + "00000000-0000-0000-0000-000000000000", + "00:00:00", + "🦆🦆🦆🦆🦆🦆", + "thisisalongblob\\x00withnullbytes", + "0010001001011100010101011010111", + "DUCK_DUCK_ENUM", + "enum_0", + "enum_0", + [], + [], + [], + [], + [], + [], + [], + {"a": None, "b": None}, + {"a": None, "b": None}, + [], + {}, + "Frank", + [None, 2, 3], + ["a", None, "c"], + [[None, 2, 3], None, [None, 2, 3]], + [["a", None, "c"], None, ["a", None, "c"]], + [{"a": None, "b": None}, {"a": 42, "b": "🦆🦆🦆🦆🦆🦆"}, {"a": None, "b": None}], + {"a": [None, 2, 3], "b": ["a", None, "c"]}, + [[], [42, 999, None, None, -42], []], + [[None, 2, 3], [4, 5, 6], [None, 2, 3]], + ], + [ + True, + 127, + 32767, + 2147483647, + 9223372036854775807, + "170141183460469231731687303715884105727", + "340282366920938463463374607431768211455", + 255, + 65535, + 4294967295, + 18446744073709551615, + "179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368", + "5881580-07-10", + "24:00:00", + "294247-01-10 04:00:54.775806", + "294247-01-10 04:00:54", + "294247-01-10 04:00:54.775", + "2262-04-11 23:47:16.854775806", + "24:00:00-15:59:59", + "294247-01-10 04:00:54.775806+00", + 3.4028234663852886e38, + 1.7976931348623157e308, + 999.9, + 99999.9999, + 1000000000000.0, + 1e28, + "ffffffff-ffff-ffff-ffff-ffffffffffff", + "83 years 3 months 999 days 00:16:39.999999", + "goo", + "\\x00\\x00\\x00a", + "10101", + "GOOSE", + "enum_299", + "enum_69999", + [42, 999, None, None, -42], + [42.0, "nan", "inf", "-inf", None, -42.0], + ["1970-01-01", "infinity", "-infinity", None, "2022-05-12"], + ["1970-01-01 00:00:00", "infinity", "-infinity", None, "2022-05-12 16:23:45"], + ["1970-01-01 00:00:00+00", "infinity", "-infinity", None, "2022-05-12 23:23:45+00"], + ["🦆🦆🦆🦆🦆🦆", "goose", None, ""], + [[], [42, 999, None, None, -42], None, [], [42, 999, None, None, -42]], + {"a": 42, "b": "🦆🦆🦆🦆🦆🦆"}, + {"a": [42, 999, None, None, -42], "b": ["🦆🦆🦆🦆🦆🦆", "goose", None, ""]}, + [{"a": None, "b": None}, {"a": 42, "b": "🦆🦆🦆🦆🦆🦆"}, None], + {"key1": "🦆🦆🦆🦆🦆🦆", "key2": "goose"}, + 5, + [4, 5, 6], + ["d", "e", "f"], + [[4, 5, 6], [None, 2, 3], [4, 5, 6]], + [["d", "e", "f"], ["a", None, "c"], ["d", "e", "f"]], + [{"a": 42, "b": "🦆🦆🦆🦆🦆🦆"}, {"a": None, "b": None}, {"a": 42, "b": "🦆🦆🦆🦆🦆🦆"}], + {"a": [4, 5, 6], "b": ["d", "e", "f"]}, + [[42, 999, None, None, -42], [], [42, 999, None, None, -42]], + [[4, 5, 6], [None, 2, 3], [4, 5, 6]], + ], + [ + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + ], + ], + "rows": 3, + "statistics": {"elapsed": 0.06300000101327896, "rows_read": 0, "bytes_read": 0}, +} diff --git a/http_api/test_json_compact_all_types.py b/http_api/test_json_compact_all_types.py new file mode 100644 index 0000000..3e62681 --- /dev/null +++ b/http_api/test_json_compact_all_types.py @@ -0,0 +1,10 @@ +from .client import Client, ResponseFormat +from .responses.all_types_compact import ALL_TYPES_COMPACT + + +def test_json_compact_all_types(http_duck_with_token, token_client: Client): + res = token_client.execute_query("FROM test_all_types()", response_format=ResponseFormat.COMPACT_JSON) + + assert res["meta"] == ALL_TYPES_COMPACT["meta"] + assert res["data"] == ALL_TYPES_COMPACT["data"] + assert res["rows"] == ALL_TYPES_COMPACT["rows"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a0effd9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +httpx==0.28.1 +pytest==8.3.4 \ No newline at end of file From f6fc570837e78978ef52d4c15ce83b15c7c474ee Mon Sep 17 00:00:00 2001 From: Niclas Haderer Date: Mon, 23 Dec 2024 14:50:13 +0100 Subject: [PATCH 06/11] Cleanup --- src/httpserver_extension.cpp | 30 +- src/include/httpserver_extension.hpp | 1 - src/include/result_serializer.hpp | 116 ++--- src/result_serializer.cpp | 462 ++++++++++-------- {http_api => test_http_api}/__init__.py | 0 {http_api => test_http_api}/client.py | 20 +- {http_api => test_http_api}/conftest.py | 11 +- {http_api => test_http_api}/const.py | 0 .../responses/all_types_compact.py | 0 .../test_json_compact_all_types.py | 4 +- 10 files changed, 324 insertions(+), 320 deletions(-) rename {http_api => test_http_api}/__init__.py (100%) rename {http_api => test_http_api}/client.py (71%) rename {http_api => test_http_api}/conftest.py (75%) rename {http_api => test_http_api}/const.py (100%) rename {http_api => test_http_api}/responses/all_types_compact.py (100%) rename {http_api => test_http_api}/test_json_compact_all_types.py (58%) diff --git a/src/httpserver_extension.cpp b/src/httpserver_extension.cpp index 67fb58d..9e91104 100644 --- a/src/httpserver_extension.cpp +++ b/src/httpserver_extension.cpp @@ -1,28 +1,24 @@ #define DUCKDB_EXTENSION_MAIN +#define CPPHTTPLIB_OPENSSL_SUPPORT + +#include +#include +#include #include "httpserver_extension.hpp" #include "duckdb.hpp" #include "duckdb/common/exception.hpp" -#include "duckdb/common/string_util.hpp" #include "duckdb/function/scalar_function.hpp" #include "duckdb/main/extension_util.hpp" -#include "duckdb/common/atomic.hpp" -#include "duckdb/common/exception/http_exception.hpp" #include "duckdb/common/allocator.hpp" -#include -#include -#include -#include +#include "result_serializer.hpp" +#include "httplib.hpp" +#include "yyjson.hpp" +#include "playground.hpp" #ifndef _WIN32 #include #endif -#define CPPHTTPLIB_OPENSSL_SUPPORT -#include "httplib.hpp" -#include "yyjson.hpp" - -#include "playground.hpp" - using namespace duckdb_yyjson; // NOLINT namespace duckdb { @@ -62,7 +58,7 @@ static std::string ConvertResultToJSON(MaterializedQueryResult &result, ReqStats auto meta_array = yyjson_mut_arr(doc); for (idx_t col = 0; col < result.ColumnCount(); ++col) { auto column_obj = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, column_obj, "name", result.ColumnName(col).c_str()); + yyjson_mut_obj_add_strcpy(doc, column_obj, "name", result.ColumnName(col).c_str()); yyjson_mut_arr_append(meta_array, column_obj); std::string tp(GetColumnTypeName(result, col)); yyjson_mut_obj_add_strcpy(doc, column_obj, "type", tp.c_str()); @@ -70,7 +66,7 @@ static std::string ConvertResultToJSON(MaterializedQueryResult &result, ReqStats yyjson_mut_obj_add_val(doc, root, "meta", meta_array); ResultSerializer serializer; - auto data_array = serializer.Serialize(result); + auto data_array = serializer.Serialize(result, true); yyjson_mut_obj_add_val(doc, root, "data", data_array); // Add row count @@ -286,9 +282,9 @@ void HttpServerStart(DatabaseInstance& db, string_t host, int32_t port, string_t global_state.is_running = true; global_state.auth_token = auth.GetString(); - // Custom basepath, defaults to root / + // Custom basepath, defaults to root / const char* base_path_env = std::getenv("DUCKDB_HTTPSERVER_BASEPATH"); - std::string base_path = "/"; + std::string base_path = "/"; if (base_path_env && base_path_env[0] == '/' && strlen(base_path_env) > 1) { base_path = std::string(base_path_env); diff --git a/src/include/httpserver_extension.hpp b/src/include/httpserver_extension.hpp index 85f08c5..fff923d 100644 --- a/src/include/httpserver_extension.hpp +++ b/src/include/httpserver_extension.hpp @@ -1,7 +1,6 @@ #pragma once #include "duckdb.hpp" -#include "result_serializer.hpp" namespace duckdb { diff --git a/src/include/result_serializer.hpp b/src/include/result_serializer.hpp index 2c48f21..5195c89 100644 --- a/src/include/result_serializer.hpp +++ b/src/include/result_serializer.hpp @@ -1,106 +1,54 @@ #pragma once -#include "duckdb/common/extra_type_info.hpp" -#include "duckdb/common/types/uuid.hpp" #include "duckdb/main/query_result.hpp" #include "yyjson.hpp" -#include - using namespace duckdb_yyjson; namespace duckdb { -class SerializationResult { -public: - virtual ~SerializationResult() = default; - virtual bool IsSuccess() = 0; - virtual string WithSuccessField() = 0; - virtual string Raw() = 0; - - void Print() { - std::cerr << WithSuccessField() << std::endl; - } - - template - TARGET &Cast() { - DynamicCastCheck(this); - return reinterpret_cast(*this); - } -}; - -class SerializationSuccess final : public SerializationResult { -public: - explicit SerializationSuccess(string serialized) : serialized(std::move(serialized)) { - } - - bool IsSuccess() override { - return true; - } - - string Raw() override { - return serialized; - } - - string WithSuccessField() override { - return R"({"success": true, "data": )" + serialized + "}"; - } - -private: - string serialized; -}; - -class SerializationError final : public SerializationResult { +class ResultSerializer { public: - explicit SerializationError(string message) : message(std::move(message)) { - } + explicit ResultSerializer(const bool _set_invalid_values_to_null = false) + : set_invalid_values_to_null(_set_invalid_values_to_null) { + doc = yyjson_mut_doc_new(nullptr); + root = yyjson_mut_arr(doc); + if (!root) { + throw SerializationException("Could not create yyjson array"); + } + yyjson_mut_doc_set_root(doc, root); + } - bool IsSuccess() override { - return false; - } + ~ResultSerializer() { yyjson_mut_doc_free(doc); } - string Raw() override { - return message; - } + void SerializeChunk(const DataChunk &chunk, vector &names, + vector &types, bool values_as_array); - string WithSuccessField() override { - return R"({"success": false, "message": ")" + message + "\"}"; - } + yyjson_mut_val *Serialize(QueryResult &query_result, bool values_as_array); -private: - string message; -}; + yyjson_mut_val *SerializeRowAsArray(const DataChunk &chunk, idx_t row_idx, + vector &types); -class ResultSerializer { -public: - explicit ResultSerializer(const bool _set_invalid_values_to_null = false) - : set_invalid_values_to_null(_set_invalid_values_to_null) { - doc = yyjson_mut_doc_new(nullptr); - root = yyjson_mut_arr(doc); - if (!root) { - throw InternalException("Could not create yyjson array"); - } - yyjson_mut_doc_set_root(doc, root); - } - unique_ptr result; + yyjson_mut_val *SerializeRowAsObject(const DataChunk &chunk, idx_t row_idx, + vector &names, + vector &types); - ~ResultSerializer() { - yyjson_mut_doc_free(doc); + static std::string YY_ToString(yyjson_mut_doc *val) { + auto data = yyjson_mut_write(val, 0, nullptr); + if (!data) { + throw SerializationException("Could not render yyjson document"); } - - void SerializeChunk(const DataChunk &chunk, vector &names, vector &types); - - yyjson_mut_val* Serialize(QueryResult &query_result); + std::string json_output(data); + free(data); + return json_output; + } private: + void SerializeValue(yyjson_mut_val *parent, const Value &value, + optional_ptr name, const LogicalType &type); - - void SerializeValue( - yyjson_mut_val *parent, const Value &value, optional_ptr name, const LogicalType &type - ); - - yyjson_mut_doc *doc; - yyjson_mut_val *root; - bool set_invalid_values_to_null; + yyjson_mut_doc *doc; + yyjson_mut_val *root; + bool set_invalid_values_to_null; }; } // namespace duckdb diff --git a/src/result_serializer.cpp b/src/result_serializer.cpp index c38f94d..d5e02eb 100644 --- a/src/result_serializer.cpp +++ b/src/result_serializer.cpp @@ -1,224 +1,264 @@ #include "result_serializer.hpp" +#include "duckdb/common/extra_type_info.hpp" +#include "duckdb/common/types/uuid.hpp" #include - // ReSharper disable once CppPassValueParameterByConstReference -yyjson_mut_val* duckdb::ResultSerializer::Serialize(QueryResult &query_result) { - auto chunk = query_result.Fetch(); - auto names = query_result.names; - auto types = query_result.types; - while (chunk) { - SerializeChunk(*chunk, names, types); - chunk = query_result.Fetch(); - } - - return root; +yyjson_mut_val *duckdb::ResultSerializer::Serialize(QueryResult &query_result, + bool values_as_array) { + auto chunk = query_result.Fetch(); + auto names = query_result.names; + auto types = query_result.types; + while (chunk) { + SerializeChunk(*chunk, names, types, values_as_array); + chunk = query_result.Fetch(); + } + + return root; +} + +void duckdb::ResultSerializer::SerializeChunk(const DataChunk &chunk, + vector &names, + vector &types, + bool values_as_array) { + const auto row_count = chunk.size(); + + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + + // Which itself contains an object + yyjson_mut_val *obj; + + if (values_as_array) { + obj = SerializeRowAsArray(chunk, row_idx, types); + } else { + obj = SerializeRowAsObject(chunk, row_idx, names, types); + } + + if (!yyjson_mut_arr_append(root, obj)) { + throw SerializationException("Could not add object to yyjson array"); + } + } } -void duckdb::ResultSerializer::SerializeChunk( - const DataChunk &chunk, vector &names, vector &types -) { - const auto column_count = chunk.ColumnCount(); - const auto row_count = chunk.size(); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - // Which itself contains an object - // ReSharper disable once CppLocalVariableMayBeConst - auto obj = yyjson_mut_arr(doc); +yyjson_mut_val *duckdb::ResultSerializer::SerializeRowAsArray( + const DataChunk &chunk, idx_t row_idx, vector &types) { + const auto column_count = chunk.ColumnCount(); + auto obj = yyjson_mut_arr(doc); + + for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + auto value = chunk.GetValue(col_idx, row_idx); + auto &type = types[col_idx]; + SerializeValue(obj, value, nullptr, type); + } + + return obj; +} + +yyjson_mut_val *duckdb::ResultSerializer::SerializeRowAsObject( + const DataChunk &chunk, idx_t row_idx, vector &names, + vector &types) { + const auto column_count = chunk.ColumnCount(); + auto obj = yyjson_mut_obj(doc); - for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + auto value = chunk.GetValue(col_idx, row_idx); + auto &type = types[col_idx]; + SerializeValue(obj, value, names[col_idx], type); + } - auto value = chunk.GetValue(col_idx, row_idx); - auto &type = types[col_idx]; - SerializeValue(obj, value, nullptr, type); - } - if (!yyjson_mut_arr_append(root, obj)) { - throw InternalException("Could not add object to yyjson array"); - } - } + return obj; } void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) - yyjson_mut_val *parent, const Value &value, optional_ptr name, const LogicalType &type -) { - yyjson_mut_val *val = nullptr; - - if (value.IsNull()) { - goto null_handle; - } - - switch (type.id()) { - case LogicalTypeId::SQLNULL: - null_handle: - val = yyjson_mut_null(doc); - break; - case LogicalTypeId::BOOLEAN: - val = yyjson_mut_bool(doc, value.GetValue()); - break; - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - case LogicalTypeId::INTEGER_LITERAL: - val = yyjson_mut_int(doc, value.GetValue()); - break; - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - case LogicalTypeId::UBIGINT: - val = yyjson_mut_uint(doc, value.GetValue()); - break; - - // format to big numbers as strings - case LogicalTypeId::UHUGEINT: { - const uhugeint_t uHugeIntNumber = value.GetValue(); - val = yyjson_mut_strcpy(doc, uHugeIntNumber.ToString().c_str()); - break; - } - case LogicalTypeId::HUGEINT: { - const hugeint_t hugeIntNumber = value.GetValue(); - val = yyjson_mut_strcpy(doc, hugeIntNumber.ToString().c_str()); - break; - } - - case LogicalTypeId::FLOAT: - case LogicalTypeId::DOUBLE: - case LogicalTypeId::DECIMAL: { - const auto real_val = value.GetValue(); - if (std::isnan(real_val) || std::isinf(real_val)) { - if (set_invalid_values_to_null) { - goto null_handle; - } else { - const auto castedValue = value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue(); - val = yyjson_mut_strcpy(doc, castedValue.c_str()); - break; - } - } else { - val = yyjson_mut_real(doc, real_val); - break; - - } - - } - // Data + time - case LogicalTypeId::DATE: - case LogicalTypeId::TIME: - case LogicalTypeId::TIMESTAMP_SEC: - case LogicalTypeId::TIMESTAMP_MS: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_NS: - case LogicalTypeId::TIMESTAMP_TZ: - case LogicalTypeId::TIME_TZ: - // Enum - case LogicalTypeId::ENUM: - // Strings - case LogicalTypeId::CHAR: - case LogicalTypeId::VARCHAR: - case LogicalTypeId::STRING_LITERAL: - val = yyjson_mut_strcpy(doc, value.GetValue().c_str()); - break; - case LogicalTypeId::VARINT: - val = yyjson_mut_strcpy(doc, value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue().c_str()); - break; - // UUID - case LogicalTypeId::UUID: { - const auto uuid_int = value.GetValue(); - const auto uuid = UUID::ToString(uuid_int); - val = yyjson_mut_strcpy(doc, uuid.c_str()); - break; - } - // Weird special types that are jus serialized to string - case LogicalTypeId::INTERVAL: - // TODO perhaps base64 encode blob? - case LogicalTypeId::BLOB: - case LogicalTypeId::BIT: - val = yyjson_mut_strcpy(doc, value.ToString().c_str()); - break; - case LogicalTypeId::UNION: { - auto &union_val = UnionValue::GetValue(value); - SerializeValue(parent, union_val, name, union_val.type()); - return; - } - case LogicalTypeId::ARRAY: - case LogicalTypeId::LIST: { - const auto get_children = LogicalTypeId::LIST == type.id() ? ListValue::GetChildren : ArrayValue::GetChildren; - auto &children = get_children(value); - val = yyjson_mut_arr(doc); - for (auto &child : children) { - SerializeValue(val, child, nullptr, child.type()); - } - break; - } - case LogicalTypeId::STRUCT: { - const auto &children = StructValue::GetChildren(value); - const auto &type_info = value.type().AuxInfo()->Cast(); - - auto all_keys_are_empty = true; - for (uint64_t idx = 0; idx < children.size(); ++idx) { - if (!type_info.child_types[idx].first.empty()) { - all_keys_are_empty = false; - break; - } - } - - // Unnamed struct -> just create tuples - if (all_keys_are_empty) { - val = yyjson_mut_arr(doc); - for (auto &child : children) { - SerializeValue(val, child, nullptr, child.type()); - } - } else { - val = yyjson_mut_obj(doc); - for (uint64_t idx = 0; idx < children.size(); ++idx) { - string struct_name = type_info.child_types[idx].first; - SerializeValue(val, children[idx], struct_name, type_info.child_types[idx].second); - } - } - - break; - } - // Not implemented types - case LogicalTypeId::MAP: { - auto &children = ListValue::GetChildren(value); - val = yyjson_mut_obj(doc); - for (auto &item : children) { - auto &key_value = StructValue::GetChildren(item); - D_ASSERT(key_value.size() == 2); - auto key_str = key_value[0].GetValue(); - SerializeValue(val, key_value[1], key_str, key_value[1].type()); - } - break; - } - - // Unsupported types - case LogicalTypeId::TABLE: - case LogicalTypeId::POINTER: - case LogicalTypeId::VALIDITY: - case LogicalTypeId::AGGREGATE_STATE: - case LogicalTypeId::LAMBDA: - case LogicalTypeId::USER: - case LogicalTypeId::ANY: - case LogicalTypeId::UNKNOWN: - case LogicalTypeId::INVALID: - if (set_invalid_values_to_null) { - goto null_handle; - } - throw InvalidTypeException("Type " + type.ToString() + " not supported"); - } - - if (!val) { - throw InternalException("Could not serialize value of type " + type.ToString()); - } - if (!name) { - if (!yyjson_mut_arr_append(parent, val)) { - throw InternalException("Could not add value to yyjson array"); - } - } else { - yyjson_mut_val *key = yyjson_mut_strcpy(doc, name->c_str()); - D_ASSERT(key); - if (!yyjson_mut_obj_add(parent, key, val)) { - throw InternalException("Could not add value to yyjson object"); - } - } -} \ No newline at end of file + yyjson_mut_val *parent, const Value &value, optional_ptr name, + const LogicalType &type) { + yyjson_mut_val *val = nullptr; + + if (value.IsNull()) { + goto null_handle; + } + + switch (type.id()) { + case LogicalTypeId::SQLNULL: + null_handle: + val = yyjson_mut_null(doc); + break; + case LogicalTypeId::BOOLEAN: + val = yyjson_mut_bool(doc, value.GetValue()); + break; + case LogicalTypeId::TINYINT: + case LogicalTypeId::SMALLINT: + case LogicalTypeId::INTEGER: + case LogicalTypeId::BIGINT: + case LogicalTypeId::INTEGER_LITERAL: + val = yyjson_mut_int(doc, value.GetValue()); + break; + case LogicalTypeId::UTINYINT: + case LogicalTypeId::USMALLINT: + case LogicalTypeId::UINTEGER: + case LogicalTypeId::UBIGINT: + val = yyjson_mut_uint(doc, value.GetValue()); + break; + + // format to big numbers as strings + case LogicalTypeId::UHUGEINT: { + const uhugeint_t uHugeIntNumber = value.GetValue(); + val = yyjson_mut_strcpy(doc, uHugeIntNumber.ToString().c_str()); + break; + } + case LogicalTypeId::HUGEINT: { + const hugeint_t hugeIntNumber = value.GetValue(); + val = yyjson_mut_strcpy(doc, hugeIntNumber.ToString().c_str()); + break; + } + + case LogicalTypeId::FLOAT: + case LogicalTypeId::DOUBLE: + case LogicalTypeId::DECIMAL: { + const auto real_val = value.GetValue(); + if (std::isnan(real_val) || std::isinf(real_val)) { + if (set_invalid_values_to_null) { + goto null_handle; + } else { + const auto castedValue = + value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue(); + val = yyjson_mut_strcpy(doc, castedValue.c_str()); + break; + } + } else { + val = yyjson_mut_real(doc, real_val); + break; + } + } + // Data + time + case LogicalTypeId::DATE: + case LogicalTypeId::TIME: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::TIMESTAMP_TZ: + case LogicalTypeId::TIME_TZ: + // Enum + case LogicalTypeId::ENUM: + // Strings + case LogicalTypeId::CHAR: + case LogicalTypeId::VARCHAR: + case LogicalTypeId::STRING_LITERAL: + val = yyjson_mut_strcpy(doc, value.GetValue().c_str()); + break; + case LogicalTypeId::VARINT: + val = yyjson_mut_strcpy( + doc, + value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue().c_str()); + break; + // UUID + case LogicalTypeId::UUID: { + const auto uuid_int = value.GetValue(); + const auto uuid = UUID::ToString(uuid_int); + val = yyjson_mut_strcpy(doc, uuid.c_str()); + break; + } + // Weird special types that are just serialized to string + case LogicalTypeId::INTERVAL: + // TODO perhaps base64 encode blob? + case LogicalTypeId::BLOB: + case LogicalTypeId::BIT: + val = yyjson_mut_strcpy(doc, value.ToString().c_str()); + break; + case LogicalTypeId::UNION: { + auto &union_val = UnionValue::GetValue(value); + SerializeValue(parent, union_val, name, union_val.type()); + return; + } + case LogicalTypeId::ARRAY: + case LogicalTypeId::LIST: { + const auto get_children = LogicalTypeId::LIST == type.id() + ? ListValue::GetChildren + : ArrayValue::GetChildren; + auto &children = get_children(value); + val = yyjson_mut_arr(doc); + for (auto &child : children) { + SerializeValue(val, child, nullptr, child.type()); + } + break; + } + case LogicalTypeId::STRUCT: { + const auto &children = StructValue::GetChildren(value); + const auto &type_info = value.type().AuxInfo()->Cast(); + + auto all_keys_are_empty = true; + for (uint64_t idx = 0; idx < children.size(); ++idx) { + if (!type_info.child_types[idx].first.empty()) { + all_keys_are_empty = false; + break; + } + } + + // Unnamed struct -> just create tuples + if (all_keys_are_empty) { + val = yyjson_mut_arr(doc); + for (auto &child : children) { + SerializeValue(val, child, nullptr, child.type()); + } + } else { + val = yyjson_mut_obj(doc); + for (uint64_t idx = 0; idx < children.size(); ++idx) { + string struct_name = type_info.child_types[idx].first; + SerializeValue(val, children[idx], struct_name, + type_info.child_types[idx].second); + } + } + + break; + } + // Not implemented types + case LogicalTypeId::MAP: { + auto &children = ListValue::GetChildren(value); + val = yyjson_mut_obj(doc); + for (auto &item : children) { + auto &key_value = StructValue::GetChildren(item); + D_ASSERT(key_value.size() == 2); + auto key_str = key_value[0].GetValue(); + SerializeValue(val, key_value[1], key_str, key_value[1].type()); + } + break; + } + + // Unsupported types + case LogicalTypeId::TABLE: + case LogicalTypeId::POINTER: + case LogicalTypeId::VALIDITY: + case LogicalTypeId::AGGREGATE_STATE: + case LogicalTypeId::LAMBDA: + case LogicalTypeId::USER: + case LogicalTypeId::ANY: + case LogicalTypeId::UNKNOWN: + case LogicalTypeId::INVALID: + if (set_invalid_values_to_null) { + goto null_handle; + } + throw InvalidTypeException("Type " + type.ToString() + " not supported"); + } + + if (!val) { + throw SerializationException("Could not serialize value of type " + + type.ToString()); + } + if (!name) { + if (!yyjson_mut_arr_append(parent, val)) { + throw SerializationException("Could not add value to yyjson array"); + } + } else { + yyjson_mut_val *key = yyjson_mut_strcpy(doc, name->c_str()); + if (!key) { + throw SerializationException("Could not create yyjson key"); + } + + if (!yyjson_mut_obj_add(parent, key, val)) { + throw SerializationException("Could not add value to yyjson object"); + } + } +} diff --git a/http_api/__init__.py b/test_http_api/__init__.py similarity index 100% rename from http_api/__init__.py rename to test_http_api/__init__.py diff --git a/http_api/client.py b/test_http_api/client.py similarity index 71% rename from http_api/client.py rename to test_http_api/client.py index 9b5d6ac..2c20263 100644 --- a/http_api/client.py +++ b/test_http_api/client.py @@ -1,5 +1,6 @@ from __future__ import annotations +import time from enum import Enum import httpx @@ -21,7 +22,7 @@ def __init__(self, url: str, basic_auth: str | None = None, token_auth: str | No self._basic_auth = basic_auth self._token_auth = token_auth - def execute_query(self, sql: str, response_format: ResponseFormat): + def execute_query(self, sql: str, response_format: ResponseFormat) -> dict: headers = {"format": response_format.value} if self._token_auth: @@ -36,3 +37,20 @@ def execute_query(self, sql: str, response_format: ResponseFormat): response = client.get(self._url, params={"q": sql}, headers=headers, auth=auth) response.raise_for_status() return response.json() + + + def ping(self) -> None: + with httpx.Client() as client: + response = client.get(f"{self._url}/ping") + response.raise_for_status() + + def on_ready(self, timeout = 5) -> None: + end_time = time.time() + timeout + while time.time() < end_time: + try: + self.ping() + return + except Exception: + pass + + raise TimeoutError("Server is not ready") \ No newline at end of file diff --git a/http_api/conftest.py b/test_http_api/conftest.py similarity index 75% rename from http_api/conftest.py rename to test_http_api/conftest.py index 548306f..7b5ed91 100644 --- a/http_api/conftest.py +++ b/test_http_api/conftest.py @@ -1,4 +1,5 @@ import subprocess +from typing import Iterator import pytest @@ -6,9 +7,8 @@ from .const import DEBUG_SHELL, HOST, PORT, API_KEY - @pytest.fixture -def http_duck_with_token(): +def http_duck_with_token() -> Iterator[Client]: process = subprocess.Popen( [ DEBUG_SHELL, @@ -17,14 +17,17 @@ def http_duck_with_token(): stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, - bufsize=1, + bufsize=2^16 ) # Load the extension process.stdin.write("LOAD httpserver;\n") cmd = f"SELECT httpserve_start('{HOST}', {PORT}, '{API_KEY}');\n" process.stdin.write(cmd) - yield + + client = Client(f"http://{HOST}:{PORT}", token_auth=API_KEY) + client.on_ready() + yield client process.kill() diff --git a/http_api/const.py b/test_http_api/const.py similarity index 100% rename from http_api/const.py rename to test_http_api/const.py diff --git a/http_api/responses/all_types_compact.py b/test_http_api/responses/all_types_compact.py similarity index 100% rename from http_api/responses/all_types_compact.py rename to test_http_api/responses/all_types_compact.py diff --git a/http_api/test_json_compact_all_types.py b/test_http_api/test_json_compact_all_types.py similarity index 58% rename from http_api/test_json_compact_all_types.py rename to test_http_api/test_json_compact_all_types.py index 3e62681..b2783e9 100644 --- a/http_api/test_json_compact_all_types.py +++ b/test_http_api/test_json_compact_all_types.py @@ -2,8 +2,8 @@ from .responses.all_types_compact import ALL_TYPES_COMPACT -def test_json_compact_all_types(http_duck_with_token, token_client: Client): - res = token_client.execute_query("FROM test_all_types()", response_format=ResponseFormat.COMPACT_JSON) +def test_json_compact_all_types(http_duck_with_token: Client): + res = http_duck_with_token.execute_query("FROM test_all_types()", response_format=ResponseFormat.COMPACT_JSON) assert res["meta"] == ALL_TYPES_COMPACT["meta"] assert res["data"] == ALL_TYPES_COMPACT["data"] From f10d81a2af9b9a95253512c100bc0b2bb315b9e5 Mon Sep 17 00:00:00 2001 From: Niclas Haderer Date: Mon, 23 Dec 2024 14:52:26 +0100 Subject: [PATCH 07/11] Moved up clang-format and tidy config so IDE and make format picks up on the right formatting rules --- .clang-format | 1 + .clang-tidy | 1 + CMakeLists.txt | 26 +- src/include/result_serializer.hpp | 73 +++-- src/result_serializer.cpp | 464 +++++++++++++++--------------- 5 files changed, 274 insertions(+), 291 deletions(-) create mode 120000 .clang-format create mode 120000 .clang-tidy diff --git a/.clang-format b/.clang-format new file mode 120000 index 0000000..9a13bb6 --- /dev/null +++ b/.clang-format @@ -0,0 +1 @@ +duckdb/.clang-format \ No newline at end of file diff --git a/.clang-tidy b/.clang-tidy new file mode 120000 index 0000000..b438d44 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1 @@ +duckdb/.clang-tidy \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 54c5cd4..ed43a9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,25 +6,20 @@ set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) project(${TARGET_NAME}) -include_directories( - src/include - ${CMAKE_CURRENT_BINARY_DIR} - duckdb/third_party/httplib - duckdb/parquet/include -) +include_directories(src/include ${CMAKE_CURRENT_BINARY_DIR} + duckdb/third_party/httplib duckdb/parquet/include) # Embed ./src/assets/index.html as a C++ header add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp - COMMAND ${CMAKE_COMMAND} -P ${PROJECT_SOURCE_DIR}/embed.cmake ${PROJECT_SOURCE_DIR}/src/assets/index.html ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp playgroundContent - DEPENDS ${PROJECT_SOURCE_DIR}/src/assets/index.html -) + COMMAND + ${CMAKE_COMMAND} -P ${PROJECT_SOURCE_DIR}/embed.cmake + ${PROJECT_SOURCE_DIR}/src/assets/index.html + ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp playgroundContent + DEPENDS ${PROJECT_SOURCE_DIR}/src/assets/index.html) -set(EXTENSION_SOURCES - src/httpserver_extension.cpp - src/result_serializer.cpp - ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp -) +set(EXTENSION_SOURCES src/httpserver_extension.cpp src/result_serializer.cpp + ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp) if(MINGW) set(OPENSSL_USE_STATIC_LIBS TRUE) @@ -37,7 +32,8 @@ build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) include_directories(${OPENSSL_INCLUDE_DIR}) -target_link_libraries(${LOADABLE_EXTENSION_NAME} duckdb_mbedtls ${OPENSSL_LIBRARIES}) +target_link_libraries(${LOADABLE_EXTENSION_NAME} duckdb_mbedtls + ${OPENSSL_LIBRARIES}) target_link_libraries(${EXTENSION_NAME} duckdb_mbedtls ${OPENSSL_LIBRARIES}) if(MINGW) diff --git a/src/include/result_serializer.hpp b/src/include/result_serializer.hpp index 5195c89..6e03040 100644 --- a/src/include/result_serializer.hpp +++ b/src/include/result_serializer.hpp @@ -9,46 +9,45 @@ namespace duckdb { class ResultSerializer { public: - explicit ResultSerializer(const bool _set_invalid_values_to_null = false) - : set_invalid_values_to_null(_set_invalid_values_to_null) { - doc = yyjson_mut_doc_new(nullptr); - root = yyjson_mut_arr(doc); - if (!root) { - throw SerializationException("Could not create yyjson array"); - } - yyjson_mut_doc_set_root(doc, root); - } - - ~ResultSerializer() { yyjson_mut_doc_free(doc); } - - void SerializeChunk(const DataChunk &chunk, vector &names, - vector &types, bool values_as_array); - - yyjson_mut_val *Serialize(QueryResult &query_result, bool values_as_array); - - yyjson_mut_val *SerializeRowAsArray(const DataChunk &chunk, idx_t row_idx, - vector &types); - - yyjson_mut_val *SerializeRowAsObject(const DataChunk &chunk, idx_t row_idx, - vector &names, - vector &types); - - static std::string YY_ToString(yyjson_mut_doc *val) { - auto data = yyjson_mut_write(val, 0, nullptr); - if (!data) { - throw SerializationException("Could not render yyjson document"); + explicit ResultSerializer(const bool _set_invalid_values_to_null = false) + : set_invalid_values_to_null(_set_invalid_values_to_null) { + doc = yyjson_mut_doc_new(nullptr); + root = yyjson_mut_arr(doc); + if (!root) { + throw SerializationException("Could not create yyjson array"); + } + yyjson_mut_doc_set_root(doc, root); + } + + ~ResultSerializer() { + yyjson_mut_doc_free(doc); + } + + void SerializeChunk(const DataChunk &chunk, vector &names, vector &types, + bool values_as_array); + + yyjson_mut_val *Serialize(QueryResult &query_result, bool values_as_array); + + yyjson_mut_val *SerializeRowAsArray(const DataChunk &chunk, idx_t row_idx, vector &types); + + yyjson_mut_val *SerializeRowAsObject(const DataChunk &chunk, idx_t row_idx, vector &names, + vector &types); + + static std::string YY_ToString(yyjson_mut_doc *val) { + auto data = yyjson_mut_write(val, 0, nullptr); + if (!data) { + throw SerializationException("Could not render yyjson document"); + } + std::string json_output(data); + free(data); + return json_output; } - std::string json_output(data); - free(data); - return json_output; - } private: - void SerializeValue(yyjson_mut_val *parent, const Value &value, - optional_ptr name, const LogicalType &type); + void SerializeValue(yyjson_mut_val *parent, const Value &value, optional_ptr name, const LogicalType &type); - yyjson_mut_doc *doc; - yyjson_mut_val *root; - bool set_invalid_values_to_null; + yyjson_mut_doc *doc; + yyjson_mut_val *root; + bool set_invalid_values_to_null; }; } // namespace duckdb diff --git a/src/result_serializer.cpp b/src/result_serializer.cpp index d5e02eb..6091a07 100644 --- a/src/result_serializer.cpp +++ b/src/result_serializer.cpp @@ -4,261 +4,247 @@ #include // ReSharper disable once CppPassValueParameterByConstReference -yyjson_mut_val *duckdb::ResultSerializer::Serialize(QueryResult &query_result, - bool values_as_array) { - auto chunk = query_result.Fetch(); - auto names = query_result.names; - auto types = query_result.types; - while (chunk) { - SerializeChunk(*chunk, names, types, values_as_array); - chunk = query_result.Fetch(); - } - - return root; +yyjson_mut_val *duckdb::ResultSerializer::Serialize(QueryResult &query_result, bool values_as_array) { + auto chunk = query_result.Fetch(); + auto names = query_result.names; + auto types = query_result.types; + while (chunk) { + SerializeChunk(*chunk, names, types, values_as_array); + chunk = query_result.Fetch(); + } + + return root; } -void duckdb::ResultSerializer::SerializeChunk(const DataChunk &chunk, - vector &names, - vector &types, +void duckdb::ResultSerializer::SerializeChunk(const DataChunk &chunk, vector &names, vector &types, bool values_as_array) { - const auto row_count = chunk.size(); + const auto row_count = chunk.size(); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - // Which itself contains an object - yyjson_mut_val *obj; + // Which itself contains an object + yyjson_mut_val *obj; - if (values_as_array) { - obj = SerializeRowAsArray(chunk, row_idx, types); - } else { - obj = SerializeRowAsObject(chunk, row_idx, names, types); - } + if (values_as_array) { + obj = SerializeRowAsArray(chunk, row_idx, types); + } else { + obj = SerializeRowAsObject(chunk, row_idx, names, types); + } - if (!yyjson_mut_arr_append(root, obj)) { - throw SerializationException("Could not add object to yyjson array"); - } - } + if (!yyjson_mut_arr_append(root, obj)) { + throw SerializationException("Could not add object to yyjson array"); + } + } } +yyjson_mut_val *duckdb::ResultSerializer::SerializeRowAsArray(const DataChunk &chunk, idx_t row_idx, + vector &types) { + const auto column_count = chunk.ColumnCount(); + auto obj = yyjson_mut_arr(doc); + for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + auto value = chunk.GetValue(col_idx, row_idx); + auto &type = types[col_idx]; + SerializeValue(obj, value, nullptr, type); + } -yyjson_mut_val *duckdb::ResultSerializer::SerializeRowAsArray( - const DataChunk &chunk, idx_t row_idx, vector &types) { - const auto column_count = chunk.ColumnCount(); - auto obj = yyjson_mut_arr(doc); - - for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { - auto value = chunk.GetValue(col_idx, row_idx); - auto &type = types[col_idx]; - SerializeValue(obj, value, nullptr, type); - } - - return obj; + return obj; } -yyjson_mut_val *duckdb::ResultSerializer::SerializeRowAsObject( - const DataChunk &chunk, idx_t row_idx, vector &names, - vector &types) { - const auto column_count = chunk.ColumnCount(); - auto obj = yyjson_mut_obj(doc); +yyjson_mut_val *duckdb::ResultSerializer::SerializeRowAsObject(const DataChunk &chunk, idx_t row_idx, + vector &names, vector &types) { + const auto column_count = chunk.ColumnCount(); + auto obj = yyjson_mut_obj(doc); - for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { - auto value = chunk.GetValue(col_idx, row_idx); - auto &type = types[col_idx]; - SerializeValue(obj, value, names[col_idx], type); - } + for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + auto value = chunk.GetValue(col_idx, row_idx); + auto &type = types[col_idx]; + SerializeValue(obj, value, names[col_idx], type); + } - return obj; + return obj; } void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) - yyjson_mut_val *parent, const Value &value, optional_ptr name, - const LogicalType &type) { - yyjson_mut_val *val = nullptr; - - if (value.IsNull()) { - goto null_handle; - } - - switch (type.id()) { - case LogicalTypeId::SQLNULL: - null_handle: - val = yyjson_mut_null(doc); - break; - case LogicalTypeId::BOOLEAN: - val = yyjson_mut_bool(doc, value.GetValue()); - break; - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - case LogicalTypeId::INTEGER_LITERAL: - val = yyjson_mut_int(doc, value.GetValue()); - break; - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - case LogicalTypeId::UBIGINT: - val = yyjson_mut_uint(doc, value.GetValue()); - break; - - // format to big numbers as strings - case LogicalTypeId::UHUGEINT: { - const uhugeint_t uHugeIntNumber = value.GetValue(); - val = yyjson_mut_strcpy(doc, uHugeIntNumber.ToString().c_str()); - break; - } - case LogicalTypeId::HUGEINT: { - const hugeint_t hugeIntNumber = value.GetValue(); - val = yyjson_mut_strcpy(doc, hugeIntNumber.ToString().c_str()); - break; - } - - case LogicalTypeId::FLOAT: - case LogicalTypeId::DOUBLE: - case LogicalTypeId::DECIMAL: { - const auto real_val = value.GetValue(); - if (std::isnan(real_val) || std::isinf(real_val)) { - if (set_invalid_values_to_null) { - goto null_handle; - } else { - const auto castedValue = - value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue(); - val = yyjson_mut_strcpy(doc, castedValue.c_str()); - break; - } - } else { - val = yyjson_mut_real(doc, real_val); - break; - } - } - // Data + time - case LogicalTypeId::DATE: - case LogicalTypeId::TIME: - case LogicalTypeId::TIMESTAMP_SEC: - case LogicalTypeId::TIMESTAMP_MS: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_NS: - case LogicalTypeId::TIMESTAMP_TZ: - case LogicalTypeId::TIME_TZ: - // Enum - case LogicalTypeId::ENUM: - // Strings - case LogicalTypeId::CHAR: - case LogicalTypeId::VARCHAR: - case LogicalTypeId::STRING_LITERAL: - val = yyjson_mut_strcpy(doc, value.GetValue().c_str()); - break; - case LogicalTypeId::VARINT: - val = yyjson_mut_strcpy( - doc, - value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue().c_str()); - break; - // UUID - case LogicalTypeId::UUID: { - const auto uuid_int = value.GetValue(); - const auto uuid = UUID::ToString(uuid_int); - val = yyjson_mut_strcpy(doc, uuid.c_str()); - break; - } - // Weird special types that are just serialized to string - case LogicalTypeId::INTERVAL: - // TODO perhaps base64 encode blob? - case LogicalTypeId::BLOB: - case LogicalTypeId::BIT: - val = yyjson_mut_strcpy(doc, value.ToString().c_str()); - break; - case LogicalTypeId::UNION: { - auto &union_val = UnionValue::GetValue(value); - SerializeValue(parent, union_val, name, union_val.type()); - return; - } - case LogicalTypeId::ARRAY: - case LogicalTypeId::LIST: { - const auto get_children = LogicalTypeId::LIST == type.id() - ? ListValue::GetChildren - : ArrayValue::GetChildren; - auto &children = get_children(value); - val = yyjson_mut_arr(doc); - for (auto &child : children) { - SerializeValue(val, child, nullptr, child.type()); - } - break; - } - case LogicalTypeId::STRUCT: { - const auto &children = StructValue::GetChildren(value); - const auto &type_info = value.type().AuxInfo()->Cast(); - - auto all_keys_are_empty = true; - for (uint64_t idx = 0; idx < children.size(); ++idx) { - if (!type_info.child_types[idx].first.empty()) { - all_keys_are_empty = false; - break; - } - } - - // Unnamed struct -> just create tuples - if (all_keys_are_empty) { - val = yyjson_mut_arr(doc); - for (auto &child : children) { - SerializeValue(val, child, nullptr, child.type()); - } - } else { - val = yyjson_mut_obj(doc); - for (uint64_t idx = 0; idx < children.size(); ++idx) { - string struct_name = type_info.child_types[idx].first; - SerializeValue(val, children[idx], struct_name, - type_info.child_types[idx].second); - } - } - - break; - } - // Not implemented types - case LogicalTypeId::MAP: { - auto &children = ListValue::GetChildren(value); - val = yyjson_mut_obj(doc); - for (auto &item : children) { - auto &key_value = StructValue::GetChildren(item); - D_ASSERT(key_value.size() == 2); - auto key_str = key_value[0].GetValue(); - SerializeValue(val, key_value[1], key_str, key_value[1].type()); - } - break; - } - - // Unsupported types - case LogicalTypeId::TABLE: - case LogicalTypeId::POINTER: - case LogicalTypeId::VALIDITY: - case LogicalTypeId::AGGREGATE_STATE: - case LogicalTypeId::LAMBDA: - case LogicalTypeId::USER: - case LogicalTypeId::ANY: - case LogicalTypeId::UNKNOWN: - case LogicalTypeId::INVALID: - if (set_invalid_values_to_null) { - goto null_handle; - } - throw InvalidTypeException("Type " + type.ToString() + " not supported"); - } - - if (!val) { - throw SerializationException("Could not serialize value of type " + - type.ToString()); - } - if (!name) { - if (!yyjson_mut_arr_append(parent, val)) { - throw SerializationException("Could not add value to yyjson array"); - } - } else { - yyjson_mut_val *key = yyjson_mut_strcpy(doc, name->c_str()); - if (!key) { - throw SerializationException("Could not create yyjson key"); - } - - if (!yyjson_mut_obj_add(parent, key, val)) { - throw SerializationException("Could not add value to yyjson object"); - } - } + yyjson_mut_val *parent, const Value &value, optional_ptr name, const LogicalType &type) { + yyjson_mut_val *val = nullptr; + + if (value.IsNull()) { + goto null_handle; + } + + switch (type.id()) { + case LogicalTypeId::SQLNULL: + null_handle: + val = yyjson_mut_null(doc); + break; + case LogicalTypeId::BOOLEAN: + val = yyjson_mut_bool(doc, value.GetValue()); + break; + case LogicalTypeId::TINYINT: + case LogicalTypeId::SMALLINT: + case LogicalTypeId::INTEGER: + case LogicalTypeId::BIGINT: + case LogicalTypeId::INTEGER_LITERAL: + val = yyjson_mut_int(doc, value.GetValue()); + break; + case LogicalTypeId::UTINYINT: + case LogicalTypeId::USMALLINT: + case LogicalTypeId::UINTEGER: + case LogicalTypeId::UBIGINT: + val = yyjson_mut_uint(doc, value.GetValue()); + break; + + // format to big numbers as strings + case LogicalTypeId::UHUGEINT: { + const uhugeint_t uHugeIntNumber = value.GetValue(); + val = yyjson_mut_strcpy(doc, uHugeIntNumber.ToString().c_str()); + break; + } + case LogicalTypeId::HUGEINT: { + const hugeint_t hugeIntNumber = value.GetValue(); + val = yyjson_mut_strcpy(doc, hugeIntNumber.ToString().c_str()); + break; + } + + case LogicalTypeId::FLOAT: + case LogicalTypeId::DOUBLE: + case LogicalTypeId::DECIMAL: { + const auto real_val = value.GetValue(); + if (std::isnan(real_val) || std::isinf(real_val)) { + if (set_invalid_values_to_null) { + goto null_handle; + } else { + const auto castedValue = value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue(); + val = yyjson_mut_strcpy(doc, castedValue.c_str()); + break; + } + } else { + val = yyjson_mut_real(doc, real_val); + break; + } + } + // Data + time + case LogicalTypeId::DATE: + case LogicalTypeId::TIME: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::TIMESTAMP_TZ: + case LogicalTypeId::TIME_TZ: + // Enum + case LogicalTypeId::ENUM: + // Strings + case LogicalTypeId::CHAR: + case LogicalTypeId::VARCHAR: + case LogicalTypeId::STRING_LITERAL: + val = yyjson_mut_strcpy(doc, value.GetValue().c_str()); + break; + case LogicalTypeId::VARINT: + val = yyjson_mut_strcpy(doc, value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue().c_str()); + break; + // UUID + case LogicalTypeId::UUID: { + const auto uuid_int = value.GetValue(); + const auto uuid = UUID::ToString(uuid_int); + val = yyjson_mut_strcpy(doc, uuid.c_str()); + break; + } + // Weird special types that are just serialized to string + case LogicalTypeId::INTERVAL: + // TODO perhaps base64 encode blob? + case LogicalTypeId::BLOB: + case LogicalTypeId::BIT: + val = yyjson_mut_strcpy(doc, value.ToString().c_str()); + break; + case LogicalTypeId::UNION: { + auto &union_val = UnionValue::GetValue(value); + SerializeValue(parent, union_val, name, union_val.type()); + return; + } + case LogicalTypeId::ARRAY: + case LogicalTypeId::LIST: { + const auto get_children = LogicalTypeId::LIST == type.id() ? ListValue::GetChildren : ArrayValue::GetChildren; + auto &children = get_children(value); + val = yyjson_mut_arr(doc); + for (auto &child : children) { + SerializeValue(val, child, nullptr, child.type()); + } + break; + } + case LogicalTypeId::STRUCT: { + const auto &children = StructValue::GetChildren(value); + const auto &type_info = value.type().AuxInfo()->Cast(); + + auto all_keys_are_empty = true; + for (uint64_t idx = 0; idx < children.size(); ++idx) { + if (!type_info.child_types[idx].first.empty()) { + all_keys_are_empty = false; + break; + } + } + + // Unnamed struct -> just create tuples + if (all_keys_are_empty) { + val = yyjson_mut_arr(doc); + for (auto &child : children) { + SerializeValue(val, child, nullptr, child.type()); + } + } else { + val = yyjson_mut_obj(doc); + for (uint64_t idx = 0; idx < children.size(); ++idx) { + string struct_name = type_info.child_types[idx].first; + SerializeValue(val, children[idx], struct_name, type_info.child_types[idx].second); + } + } + + break; + } + // Not implemented types + case LogicalTypeId::MAP: { + auto &children = ListValue::GetChildren(value); + val = yyjson_mut_obj(doc); + for (auto &item : children) { + auto &key_value = StructValue::GetChildren(item); + D_ASSERT(key_value.size() == 2); + auto key_str = key_value[0].GetValue(); + SerializeValue(val, key_value[1], key_str, key_value[1].type()); + } + break; + } + + // Unsupported types + case LogicalTypeId::TABLE: + case LogicalTypeId::POINTER: + case LogicalTypeId::VALIDITY: + case LogicalTypeId::AGGREGATE_STATE: + case LogicalTypeId::LAMBDA: + case LogicalTypeId::USER: + case LogicalTypeId::ANY: + case LogicalTypeId::UNKNOWN: + case LogicalTypeId::INVALID: + if (set_invalid_values_to_null) { + goto null_handle; + } + throw InvalidTypeException("Type " + type.ToString() + " not supported"); + } + + if (!val) { + throw SerializationException("Could not serialize value of type " + type.ToString()); + } + if (!name) { + if (!yyjson_mut_arr_append(parent, val)) { + throw SerializationException("Could not add value to yyjson array"); + } + } else { + yyjson_mut_val *key = yyjson_mut_strcpy(doc, name->c_str()); + if (!key) { + throw SerializationException("Could not create yyjson key"); + } + + if (!yyjson_mut_obj_add(parent, key, val)) { + throw SerializationException("Could not add value to yyjson object"); + } + } } From 038ad5060b30c346f3a264c73bf8607179178548 Mon Sep 17 00:00:00 2001 From: Niclas Haderer Date: Mon, 23 Dec 2024 15:41:13 +0100 Subject: [PATCH 08/11] Moved most logic into new compact serializer --- src/httpserver_extension.cpp | 55 ++--------------- src/include/query_stats.hpp | 12 ++++ src/include/result_serializer.hpp | 37 +++++------ .../result_serializer_compact_json.hpp | 61 +++++++++++++++++++ src/result_serializer.cpp | 53 ++++++++-------- 5 files changed, 122 insertions(+), 96 deletions(-) create mode 100644 src/include/query_stats.hpp create mode 100644 src/include/result_serializer_compact_json.hpp diff --git a/src/httpserver_extension.cpp b/src/httpserver_extension.cpp index 9e91104..fd57131 100644 --- a/src/httpserver_extension.cpp +++ b/src/httpserver_extension.cpp @@ -5,12 +5,14 @@ #include #include #include "httpserver_extension.hpp" +#include "query_stats.hpp" #include "duckdb.hpp" #include "duckdb/common/exception.hpp" #include "duckdb/function/scalar_function.hpp" #include "duckdb/main/extension_util.hpp" #include "duckdb/common/allocator.hpp" #include "result_serializer.hpp" +#include "result_serializer_compact_json.hpp" #include "httplib.hpp" #include "yyjson.hpp" #include "playground.hpp" @@ -19,10 +21,10 @@ #include #endif -using namespace duckdb_yyjson; // NOLINT - namespace duckdb { +using namespace duckdb_yyjson; // NOLINT(*-build-using-namespace) + struct HttpServerState { std::unique_ptr server; std::unique_ptr server_thread; @@ -43,52 +45,6 @@ std::string GetColumnTypeName(MaterializedQueryResult &result, idx_t column) { return result.types[column].ToString(); } -struct ReqStats { - float elapsed_sec; - int64_t read_bytes; - int64_t read_rows; -}; - -// Convert the query result to JSON format -static std::string ConvertResultToJSON(MaterializedQueryResult &result, ReqStats &req_stats) { - auto doc = yyjson_mut_doc_new(nullptr); - auto root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - // Add meta information - auto meta_array = yyjson_mut_arr(doc); - for (idx_t col = 0; col < result.ColumnCount(); ++col) { - auto column_obj = yyjson_mut_obj(doc); - yyjson_mut_obj_add_strcpy(doc, column_obj, "name", result.ColumnName(col).c_str()); - yyjson_mut_arr_append(meta_array, column_obj); - std::string tp(GetColumnTypeName(result, col)); - yyjson_mut_obj_add_strcpy(doc, column_obj, "type", tp.c_str()); - } - yyjson_mut_obj_add_val(doc, root, "meta", meta_array); - - ResultSerializer serializer; - auto data_array = serializer.Serialize(result, true); - yyjson_mut_obj_add_val(doc, root, "data", data_array); - - // Add row count - yyjson_mut_obj_add_uint(doc, root, "rows", result.RowCount()); - //"statistics":{"elapsed":0.00031403,"rows_read":1,"bytes_read":0}} - auto stat_obj = yyjson_mut_obj_add_obj(doc, root, "statistics"); - yyjson_mut_obj_add_real(doc, stat_obj, "elapsed", req_stats.elapsed_sec); - yyjson_mut_obj_add_int(doc, stat_obj, "rows_read", req_stats.read_rows); - yyjson_mut_obj_add_int(doc, stat_obj, "bytes_read", req_stats.read_bytes); - // Write to string - auto data = yyjson_mut_write(doc, 0, nullptr); - if (!data) { - yyjson_mut_doc_free(doc); - throw InternalException("Failed to render the result as JSON, yyjson failed"); - } - - std::string json_output(data); - free(data); - yyjson_mut_doc_free(doc); - return json_output; -} - // New: Base64 decoding function std::string base64_decode(const std::string &in) { std::string out; @@ -257,7 +213,8 @@ void HandleHttpRequest(const duckdb_httplib_openssl::Request& req, duckdb_httpli std::string json_output = ConvertResultToNDJSON(*result); res.set_content(json_output, "application/x-ndjson"); } else if (format == "JSONCompact") { - std::string json_output = ConvertResultToJSON(*result, stats); + ResultSerializerCompactJson serializer; + std::string json_output = serializer.Serialize(*result, stats); res.set_content(json_output, "application/json"); } else { // Default to NDJSON for DuckDB's own queries diff --git a/src/include/query_stats.hpp b/src/include/query_stats.hpp new file mode 100644 index 0000000..acf4aac --- /dev/null +++ b/src/include/query_stats.hpp @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace duckdb { + +struct ReqStats { + float elapsed_sec; + uint64_t read_bytes; + uint64_t read_rows; +}; + +} // namespace duckdb diff --git a/src/include/result_serializer.hpp b/src/include/result_serializer.hpp index 6e03040..7217f28 100644 --- a/src/include/result_serializer.hpp +++ b/src/include/result_serializer.hpp @@ -3,38 +3,22 @@ #include "duckdb/main/query_result.hpp" #include "yyjson.hpp" -using namespace duckdb_yyjson; - namespace duckdb { +using namespace duckdb_yyjson; // NOLINT(*-build-using-namespace) class ResultSerializer { public: explicit ResultSerializer(const bool _set_invalid_values_to_null = false) : set_invalid_values_to_null(_set_invalid_values_to_null) { doc = yyjson_mut_doc_new(nullptr); - root = yyjson_mut_arr(doc); - if (!root) { - throw SerializationException("Could not create yyjson array"); - } - yyjson_mut_doc_set_root(doc, root); } - ~ResultSerializer() { + virtual ~ResultSerializer() { yyjson_mut_doc_free(doc); } - void SerializeChunk(const DataChunk &chunk, vector &names, vector &types, - bool values_as_array); - - yyjson_mut_val *Serialize(QueryResult &query_result, bool values_as_array); - - yyjson_mut_val *SerializeRowAsArray(const DataChunk &chunk, idx_t row_idx, vector &types); - - yyjson_mut_val *SerializeRowAsObject(const DataChunk &chunk, idx_t row_idx, vector &names, - vector &types); - - static std::string YY_ToString(yyjson_mut_doc *val) { - auto data = yyjson_mut_write(val, 0, nullptr); + std::string YY_ToString() { + auto data = yyjson_mut_write(doc, 0, nullptr); if (!data) { throw SerializationException("Could not render yyjson document"); } @@ -43,11 +27,20 @@ class ResultSerializer { return json_output; } -private: +protected: + void SerializeInternal(QueryResult &query_result, yyjson_mut_val *append_root, bool values_as_array); + + void SerializeChunk(const DataChunk &chunk, vector &names, vector &types, + yyjson_mut_val *append_root, bool values_as_array); + + yyjson_mut_val *SerializeRowAsArray(const DataChunk &chunk, idx_t row_idx, vector &types); + + yyjson_mut_val *SerializeRowAsObject(const DataChunk &chunk, idx_t row_idx, vector &names, + vector &types); + void SerializeValue(yyjson_mut_val *parent, const Value &value, optional_ptr name, const LogicalType &type); yyjson_mut_doc *doc; - yyjson_mut_val *root; bool set_invalid_values_to_null; }; } // namespace duckdb diff --git a/src/include/result_serializer_compact_json.hpp b/src/include/result_serializer_compact_json.hpp new file mode 100644 index 0000000..7d4410c --- /dev/null +++ b/src/include/result_serializer_compact_json.hpp @@ -0,0 +1,61 @@ +#pragma once +#include "query_stats.hpp" +#include "result_serializer.hpp" + +namespace duckdb { + +class ResultSerializerCompactJson final : public ResultSerializer { +public: + explicit ResultSerializerCompactJson(const bool _set_invalid_values_to_null = false) + : ResultSerializer(_set_invalid_values_to_null) { + root = yyjson_mut_obj(doc); + D_ASSERT(root); + yyjson_mut_doc_set_root(doc, root); + } + + std::string Serialize(MaterializedQueryResult &query_result, const ReqStats &stats) { + // Metadata about the query result + yyjson_mut_val *yy_meta = GetMeta(query_result); + yyjson_mut_obj_add_val(doc, root, "meta", yy_meta); + + // Actual query data + yyjson_mut_val *yy_data_array = yyjson_mut_arr(doc); + SerializeInternal(query_result, yy_data_array, true); + yyjson_mut_obj_add_val(doc, root, "data", yy_data_array); + + // Number of rows + yyjson_mut_obj_add_uint(doc, root, "rows", query_result.RowCount()); + + // Query statistics + yyjson_mut_val *yy_stats = GetStats(stats); + yyjson_mut_obj_add_val(doc, root, "statistics", yy_stats); + + return YY_ToString(); + } + +private: + yyjson_mut_val *GetMeta(QueryResult &query_result) { + auto meta_array = yyjson_mut_arr(doc); + for (idx_t col = 0; col < query_result.ColumnCount(); ++col) { + auto column_obj = yyjson_mut_obj(doc); + yyjson_mut_obj_add_strcpy(doc, column_obj, "name", query_result.ColumnName(col).c_str()); + yyjson_mut_arr_append(meta_array, column_obj); + // @paul Did you find out if result.RowCount() == 0 is needed? + std::string tp(query_result.types[col].ToString()); + yyjson_mut_obj_add_strcpy(doc, column_obj, "type", tp.c_str()); + } + + return meta_array; + } + + yyjson_mut_val *GetStats(const ReqStats &stats) { + auto stat_obj = yyjson_mut_obj(doc); + yyjson_mut_obj_add_real(doc, stat_obj, "elapsed", stats.elapsed_sec); + yyjson_mut_obj_add_int(doc, stat_obj, "rows_read", stats.read_rows); + yyjson_mut_obj_add_int(doc, stat_obj, "bytes_read", stats.read_bytes); + return stat_obj; + } + + yyjson_mut_val *root; +}; +} // namespace duckdb diff --git a/src/result_serializer.cpp b/src/result_serializer.cpp index 6091a07..5150b6c 100644 --- a/src/result_serializer.cpp +++ b/src/result_serializer.cpp @@ -1,23 +1,33 @@ #include "result_serializer.hpp" + #include "duckdb/common/extra_type_info.hpp" #include "duckdb/common/types/uuid.hpp" + #include -// ReSharper disable once CppPassValueParameterByConstReference -yyjson_mut_val *duckdb::ResultSerializer::Serialize(QueryResult &query_result, bool values_as_array) { +namespace duckdb { + +#define YY_APPEND_FAIL(success) \ + if (!success) { \ + throw SerializationException("Failed to append in " __FILE__, __LINE__); \ + } + +void ResultSerializer::SerializeInternal(QueryResult &query_result, yyjson_mut_val *append_root, + const bool values_as_array) { auto chunk = query_result.Fetch(); auto names = query_result.names; auto types = query_result.types; + while (chunk) { - SerializeChunk(*chunk, names, types, values_as_array); + SerializeChunk(*chunk, names, types, append_root, values_as_array); chunk = query_result.Fetch(); } - - return root; } -void duckdb::ResultSerializer::SerializeChunk(const DataChunk &chunk, vector &names, vector &types, - bool values_as_array) { +void ResultSerializer::SerializeChunk(const DataChunk &chunk, vector &names, vector &types, + yyjson_mut_val *append_root, const bool values_as_array) { + D_ASSERT(yyjson_mut_is_arr(append_root)); + const auto row_count = chunk.size(); for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { @@ -31,14 +41,12 @@ void duckdb::ResultSerializer::SerializeChunk(const DataChunk &chunk, vector &types) { +yyjson_mut_val *ResultSerializer::SerializeRowAsArray(const DataChunk &chunk, const idx_t row_idx, + vector &types) { const auto column_count = chunk.ColumnCount(); auto obj = yyjson_mut_arr(doc); @@ -51,8 +59,8 @@ yyjson_mut_val *duckdb::ResultSerializer::SerializeRowAsArray(const DataChunk &c return obj; } -yyjson_mut_val *duckdb::ResultSerializer::SerializeRowAsObject(const DataChunk &chunk, idx_t row_idx, - vector &names, vector &types) { +yyjson_mut_val *ResultSerializer::SerializeRowAsObject(const DataChunk &chunk, const idx_t row_idx, + vector &names, vector &types) { const auto column_count = chunk.ColumnCount(); auto obj = yyjson_mut_obj(doc); @@ -65,7 +73,7 @@ yyjson_mut_val *duckdb::ResultSerializer::SerializeRowAsObject(const DataChunk & return obj; } -void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) +void ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) yyjson_mut_val *parent, const Value &value, optional_ptr name, const LogicalType &type) { yyjson_mut_val *val = nullptr; @@ -234,17 +242,12 @@ void duckdb::ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) throw SerializationException("Could not serialize value of type " + type.ToString()); } if (!name) { - if (!yyjson_mut_arr_append(parent, val)) { - throw SerializationException("Could not add value to yyjson array"); - } + YY_APPEND_FAIL(yyjson_mut_arr_append(parent, val)); } else { yyjson_mut_val *key = yyjson_mut_strcpy(doc, name->c_str()); - if (!key) { - throw SerializationException("Could not create yyjson key"); - } - - if (!yyjson_mut_obj_add(parent, key, val)) { - throw SerializationException("Could not add value to yyjson object"); - } + D_ASSERT(key); + YY_APPEND_FAIL(yyjson_mut_obj_add(parent, key, val)); } } + +} // namespace duckdb From 6712cec7937ad3b63a30580675401fc9ffe86932 Mon Sep 17 00:00:00 2001 From: Niclas Haderer Date: Mon, 23 Dec 2024 15:46:08 +0100 Subject: [PATCH 09/11] Added some docs for contributing --- docs/README.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/README.md b/docs/README.md index 532468b..7a89d7f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -203,6 +203,43 @@ Check out this flocking macro from fellow _Italo-Amsterdammer_ @carlopi @ DuckDB
+## Development + +### Cloning the Repository + +Clone the repository and all its submodules + +```bash +git clone +git submodule update --init --recursive +``` + +### Setting up CLion +**Opening project:** +Configuring CLion with the extension template requires a little work. Firstly, make sure that the DuckDB submodule is available. +Then make sure to open `./duckdb/CMakeLists.txt` (so not the top level `CMakeLists.txt` file from this repo) as a project in CLion. +Now to fix your project path go to `tools->CMake->Change Project Root`([docs](https://www.jetbrains.com/help/clion/change-project-root-directory.html)) to set the project root to the root dir of this repo. + +**Debugging:** +To set up debugging in CLion, there are two simple steps required. Firstly, in `CLion -> Settings / Preferences -> Build, Execution, Deploy -> CMake` you will need to add the desired builds (e.g. Debug, Release, RelDebug, etc). There's different ways to configure this, but the easiest is to leave all empty, except the `build path`, which needs to be set to `../build/{build type}`. Now on a clean repository you will first need to run `make {build type}` to initialize the CMake build directory. After running make, you will be able to (re)build from CLion by using the build target we just created. If you use the CLion editor, you can create a CLion CMake profiles matching the CMake variables that are described in the makefile, and then you don't need to invoke the Makefile. + +The second step is to configure the unittest runner as a run/debug configuration. To do this, go to `Run -> Edit Configurations` and click `+ -> Cmake Application`. The target and executable should be `unittest`. This will run all the DuckDB tests. To specify only running the extension specific tests, add `--test-dir ../../.. [sql]` to the `Program Arguments`. Note that it is recommended to use the `unittest` executable for testing/development within CLion. The actual DuckDB CLI currently does not reliably work as a run target in CLion. + + +### Testing + +To run the E2E test install all packages necessary: + +```bash +pip install -r requirements.txt +``` + +Then run the test suite: + +```bash +pytest pytest test_http_api +``` + ##### :black_joker: Disclaimers [^1]: DuckDB ® is a trademark of DuckDB Foundation. All rights reserved by their respective owners. [^1] From 816606ff26dd0d5cd36d02fa8900fcde3117a3d4 Mon Sep 17 00:00:00 2001 From: Niclas Haderer Date: Mon, 23 Dec 2024 15:46:48 +0100 Subject: [PATCH 10/11] Ignore pycache --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index b9f264b..5496fe3 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ duckdb_unittest_tempdir/ testext test/python/__pycache__/ .Rhistory +__pycache__ +venv \ No newline at end of file From 6a0a3d6209f69c2dd42838d270f4b18acb054bd1 Mon Sep 17 00:00:00 2001 From: Niclas Haderer Date: Mon, 23 Dec 2024 15:51:39 +0100 Subject: [PATCH 11/11] Cleanup --- src/httpserver_extension.cpp | 7 ------- test_http_api/client.py | 1 - test_http_api/conftest.py | 5 ----- 3 files changed, 13 deletions(-) diff --git a/src/httpserver_extension.cpp b/src/httpserver_extension.cpp index fd57131..6bda310 100644 --- a/src/httpserver_extension.cpp +++ b/src/httpserver_extension.cpp @@ -38,13 +38,6 @@ struct HttpServerState { static HttpServerState global_state; -std::string GetColumnTypeName(MaterializedQueryResult &result, idx_t column) { - if (result.RowCount() == 0) { - return "String"; - } - return result.types[column].ToString(); -} - // New: Base64 decoding function std::string base64_decode(const std::string &in) { std::string out; diff --git a/test_http_api/client.py b/test_http_api/client.py index 2c20263..669cd9f 100644 --- a/test_http_api/client.py +++ b/test_http_api/client.py @@ -9,7 +9,6 @@ class ResponseFormat(Enum): ND_JSON = "JSONEachRow" - JSON = "JSON" COMPACT_JSON = "JSONCompact" diff --git a/test_http_api/conftest.py b/test_http_api/conftest.py index 7b5ed91..eb7552a 100644 --- a/test_http_api/conftest.py +++ b/test_http_api/conftest.py @@ -30,8 +30,3 @@ def http_duck_with_token() -> Iterator[Client]: yield client process.kill() - - -@pytest.fixture -def token_client(): - return Client(f"http://{HOST}:{PORT}", token_auth=API_KEY)