-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix query on unspecified property #191
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
#include <duckpgq_extension.hpp> | ||
#include "duckpgq/core/functions/table/match.hpp" | ||
|
||
#include "duckdb/common/string_util.hpp" | ||
#include "duckdb/common/case_insensitive_map.hpp" | ||
#include "duckdb/parser/tableref/matchref.hpp" | ||
#include "duckdb/parser/tableref/subqueryref.hpp" | ||
#include "duckdb/parser/tableref/joinref.hpp" | ||
|
@@ -33,6 +35,55 @@ namespace duckpgq { | |
|
||
namespace core { | ||
|
||
namespace { | ||
|
||
// Get fully-qualified column names for the property graph [tbl], and insert | ||
// into set [col_names]. | ||
void PopulateFullyQualifiedColName( | ||
const vector<shared_ptr<PropertyGraphTable>> &tbls, | ||
const case_insensitive_map_t<vector<string>> &tbl_name_to_aliases, | ||
case_insensitive_set_t &col_names) { | ||
for (const auto &cur_tbl : tbls) { | ||
for (const auto &cur_col : cur_tbl->column_names) { | ||
// It's legal to query by `<col>` instead of `<table>.<col>`. | ||
col_names.insert(cur_col); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if this will also be sufficient for property graphs on attached databases. Then we also need to add the catalog and schema ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that's a good point, and it's not hard to get the catalog and schema name since they're already part of However after some trials, I found it's failing due to another bind error: Checking the variable, only |
||
|
||
const string &tbl_name = cur_tbl->table_name; | ||
auto iter = tbl_name_to_aliases.find(tbl_name); | ||
// Prefer to use table alias specified in the statement, otherwise use | ||
// table name. | ||
if (iter == tbl_name_to_aliases.end()) { | ||
col_names.insert(StringUtil::Format("%s.%s", tbl_name, cur_col)); | ||
} else { | ||
const auto &all_aliases = iter->second; | ||
for (const auto &cur_alias : all_aliases) { | ||
col_names.insert(StringUtil::Format("%s.%s", cur_alias, cur_col)); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Get fully-qualified column names from property graph. | ||
case_insensitive_set_t GetFullyQualifiedColFromPg( | ||
const CreatePropertyGraphInfo &pg, | ||
const case_insensitive_map_t<shared_ptr<PropertyGraphTable>> &alias_map) { | ||
case_insensitive_map_t<vector<string>> relation_name_to_aliases; | ||
for (const auto &entry : alias_map) { | ||
relation_name_to_aliases[entry.second->table_name].emplace_back( | ||
entry.first); | ||
} | ||
|
||
case_insensitive_set_t col_names; | ||
PopulateFullyQualifiedColName(pg.vertex_tables, relation_name_to_aliases, | ||
col_names); | ||
PopulateFullyQualifiedColName(pg.edge_tables, relation_name_to_aliases, | ||
col_names); | ||
return col_names; | ||
} | ||
|
||
} // namespace | ||
|
||
shared_ptr<PropertyGraphTable> | ||
PGQMatchFunction::FindGraphTable(const string &label, | ||
CreatePropertyGraphInfo &pg_table) { | ||
|
@@ -145,6 +196,18 @@ PathElement *PGQMatchFunction::GetPathElement( | |
throw InternalException("Unknown path reference type detected"); | ||
} | ||
|
||
SubPath * | ||
PGQMatchFunction::GetSubPath(const unique_ptr<PathReference> &path_reference) { | ||
if (path_reference->path_reference_type == | ||
PGQPathReferenceType::PATH_ELEMENT) { | ||
return nullptr; | ||
} | ||
if (path_reference->path_reference_type == PGQPathReferenceType::SUBPATH) { | ||
return reinterpret_cast<SubPath *>(path_reference.get()); | ||
} | ||
throw InternalException("Unknown path reference type detected"); | ||
} | ||
|
||
unique_ptr<SubqueryRef> PGQMatchFunction::CreateCountCTESubquery() { | ||
//! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x | ||
|
||
|
@@ -907,15 +970,88 @@ void PGQMatchFunction::ProcessPathList( | |
} | ||
} | ||
|
||
void PGQMatchFunction::PopulateGraphTableAliasMap( | ||
const CreatePropertyGraphInfo &pg_table, | ||
const unique_ptr<PathReference> &path_reference, | ||
case_insensitive_map_t<shared_ptr<PropertyGraphTable>> | ||
&alias_to_vertex_and_edge_tables) { | ||
PathElement *path_elem = GetPathElement(path_reference); | ||
|
||
// Populate binding from PathElement. | ||
if (path_elem != nullptr) { | ||
auto iter = pg_table.label_map.find(path_elem->label); | ||
if (iter == pg_table.label_map.end()) { | ||
throw BinderException( | ||
"The label %s is not registered in property graph %s", | ||
path_elem->label, pg_table.property_graph_name); | ||
} | ||
alias_to_vertex_and_edge_tables[path_elem->variable_binding] = iter->second; | ||
return; | ||
} | ||
|
||
// Recursively populate binding from SubPath. | ||
SubPath *sub_path = GetSubPath(path_reference); | ||
D_ASSERT(sub_path != nullptr); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know if the result of sub_path is best checked with an assert. Instead with an You already checked whether it is a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah it's a sanity check, because we only have two valid types: path element and subpath. |
||
const auto &path_list = sub_path->path_list; | ||
for (const auto &cur_path : path_list) { | ||
PopulateGraphTableAliasMap(pg_table, cur_path, | ||
alias_to_vertex_and_edge_tables); | ||
} | ||
} | ||
|
||
void PGQMatchFunction::CheckColumnBinding( | ||
const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref) { | ||
// Maps from table alias to table, including vertex and edge tables. | ||
case_insensitive_map_t<shared_ptr<PropertyGraphTable>> | ||
alias_to_vertex_and_edge_tables; | ||
for (idx_t idx_i = 0; idx_i < ref.path_patterns.size(); idx_i++) { | ||
const auto &path_list = ref.path_patterns[idx_i]->path_elements; | ||
for (const auto &cur_path : path_list) { | ||
PopulateGraphTableAliasMap(pg_table, cur_path, | ||
alias_to_vertex_and_edge_tables); | ||
} | ||
} | ||
|
||
// All fully-qualified column names for vertex tables and edge tables. | ||
const auto all_fq_col_names = | ||
GetFullyQualifiedColFromPg(pg_table, alias_to_vertex_and_edge_tables); | ||
|
||
for (auto &expression : ref.column_list) { | ||
// TODO(hjiang): `ColumnRefExpression` alone is not enough, we could have | ||
// more complicated expression. | ||
// | ||
// See issue for reference: | ||
// https://github.com/cwida/duckpgq-extension/issues/198 | ||
auto *column_ref = dynamic_cast<ColumnRefExpression *>(expression.get()); | ||
if (column_ref == nullptr) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might not be a sufficient check, since there may be something else other than a ColumnRefExpression such as a FunctionExpression. This FunctionExpression could then contain a non-registered property, which should be checked. I would at least like to see a test case check this: statement ok
create table person (name varchar, lastname varchar); insert into person values ('Alice', 'Foo');
statement ok
-create property graph pg vertex tables (person properties (name));
query I
-from graph_table (pg match (p:person) columns (p.name || p.name as name));
----
AliceAlice
statement error
-from graph_table (pg match (p:person) columns(p.name, p.name || p.lastname));
----
BinderException: p.notregistered is not a registered property for label person in property graph pg The exact error message does not need to be this of course. This can also be implemented as another PR since this can become quite nested. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see what you mean, that makes sense; currently I don't see an easy solution. I create another issue to track: #198, and leave a TODO under my name. |
||
continue; | ||
} | ||
// 'shortest_path_cte' is a special table populated by pgq. | ||
if (column_ref->column_names[0] == "shortest_path_cte") { | ||
continue; | ||
} | ||
// 'rowid' is a column duckdb binds automatically. | ||
if (column_ref->column_names.back() == "rowid") { | ||
continue; | ||
} | ||
const auto cur_fq_col_name = | ||
StringUtil::Join(column_ref->column_names, /*separator=*/"."); | ||
if (all_fq_col_names.find(cur_fq_col_name) == all_fq_col_names.end()) { | ||
throw BinderException("Property %s is never registered!", | ||
cur_fq_col_name); | ||
} | ||
} | ||
} | ||
|
||
unique_ptr<TableRef> | ||
PGQMatchFunction::MatchBindReplace(ClientContext &context, | ||
TableFunctionBindInput &bind_input) { | ||
auto duckpgq_state = GetDuckPGQState(context); | ||
|
||
auto match_index = bind_input.inputs[0].GetValue<int32_t>(); | ||
auto ref = dynamic_cast<MatchExpression *>( | ||
auto *ref = dynamic_cast<MatchExpression *>( | ||
duckpgq_state->transform_expression[match_index].get()); | ||
auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); | ||
auto *pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); | ||
|
||
vector<unique_ptr<ParsedExpression>> conditions; | ||
|
||
|
@@ -948,11 +1084,16 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, | |
if (ref->where_clause) { | ||
conditions.push_back(std::move(ref->where_clause)); | ||
} | ||
|
||
CheckColumnBinding(*pg_table, *ref); | ||
dentiny marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
std::vector<unique_ptr<ParsedExpression>> final_column_list; | ||
|
||
for (auto &expression : ref->column_list) { | ||
unordered_set<string> named_subpaths; | ||
auto column_ref = dynamic_cast<ColumnRefExpression *>(expression.get()); | ||
|
||
// Handle ColumnRefExpression. | ||
auto *column_ref = dynamic_cast<ColumnRefExpression *>(expression.get()); | ||
if (column_ref != nullptr) { | ||
if (named_subpaths.count(column_ref->column_names[0]) && | ||
column_ref->column_names.size() == 1) { | ||
|
@@ -963,7 +1104,9 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, | |
} | ||
continue; | ||
} | ||
auto function_ref = dynamic_cast<FunctionExpression *>(expression.get()); | ||
|
||
// Handle FunctionExpression. | ||
auto *function_ref = dynamic_cast<FunctionExpression *>(expression.get()); | ||
if (function_ref != nullptr) { | ||
if (function_ref->function_name == "path_length") { | ||
column_ref = dynamic_cast<ColumnRefExpression *>( | ||
|
@@ -995,6 +1138,9 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, | |
continue; | ||
} | ||
|
||
// TODO(hjiang): For star expression, only select columns in vertex or edge | ||
// table, but not those unspecified in property graph. | ||
// Issue reference: https://github.com/cwida/duckpgq-extension/issues/192 | ||
final_column_list.push_back(std::move(expression)); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is there a namespace here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The anonymous namespace (or
static
) is to constrain symbol to live only in the current translation unit.Reference: https://google.github.io/styleguide/cppguide.html#Internal_Linkage
I don't have a strong preference for unamed namespace vs static; use the former here since it's used in my company.