Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance on any directed edges #110

Merged
merged 24 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion duckdb-pgq
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

#pragma once
#include "duckdb/function/table_function.hpp"
#include "duckdb/parser/statement/create_statement.hpp"
#include "duckdb/parser/parsed_data/create_property_graph_info.hpp"
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"

Expand Down Expand Up @@ -42,12 +41,12 @@ class CreatePropertyGraphFunction : public TableFunction {
CheckPropertyGraphTableColumns(const shared_ptr<PropertyGraphTable> &pg_table,
TableCatalogEntry &table);

static duckdb::unique_ptr<FunctionData>
static unique_ptr<FunctionData>
CreatePropertyGraphBind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types,
vector<string> &names);

static duckdb::unique_ptr<GlobalTableFunctionState>
static unique_ptr<GlobalTableFunctionState>
CreatePropertyGraphInit(ClientContext &context,
TableFunctionInitInput &input);

Expand Down
9 changes: 5 additions & 4 deletions duckpgq/include/duckpgq/functions/tablefunctions/match.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ struct PGQMatchFunction : public TableFunction {
const string &edge_binding,
const string &prev_binding,
const string &next_binding,
vector<unique_ptr<ParsedExpression>> &conditions);
vector<unique_ptr<ParsedExpression>> &conditions,
unique_ptr<TableRef> &from_clause);

static void EdgeTypeLeft(const shared_ptr<PropertyGraphTable> &edge_table,
const string &next_table_name,
Expand Down Expand Up @@ -120,15 +121,15 @@ struct PGQMatchFunction : public TableFunction {
const SubPath *subpath);

static void
AddEdgeJoins(const unique_ptr<SelectNode> &select_node,
const shared_ptr<PropertyGraphTable> &edge_table,
AddEdgeJoins(const shared_ptr<PropertyGraphTable> &edge_table,
const shared_ptr<PropertyGraphTable> &previous_vertex_table,
const shared_ptr<PropertyGraphTable> &next_vertex_table,
PGQMatchType edge_type, const string &edge_binding,
const string &prev_binding, const string &next_binding,
vector<unique_ptr<ParsedExpression>> &conditions,
unordered_map<string, string> &alias_map,
int32_t &extra_alias_counter);
int32_t &extra_alias_counter,
unique_ptr<TableRef> &from_clause);

static void ProcessPathList(
vector<unique_ptr<PathReference>> &path_pattern,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "duckpgq/functions/tablefunctions/create_property_graph.hpp"
#include "duckdb/parser/statement/create_statement.hpp"
#include <duckpgq_extension.hpp>

namespace duckdb {
Expand Down Expand Up @@ -66,7 +67,7 @@ CreatePropertyGraphFunction::CreatePropertyGraphBind(
throw Exception(ExceptionType::INVALID,
"Registered DuckPGQ state not found");
}
const auto duckpgq_state = (DuckPGQState *)lookup->second.get();
const auto duckpgq_state = dynamic_cast<DuckPGQState *>(lookup->second.get());
const auto duckpgq_parse_data =
dynamic_cast<DuckPGQParseData *>(duckpgq_state->parse_data.get());

Expand Down
114 changes: 75 additions & 39 deletions duckpgq/src/duckpgq/functions/tablefunctions/match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#include "duckdb/parser/expression/constant_expression.hpp"
#include "duckdb/parser/expression/comparison_expression.hpp"
#include "duckdb/parser/expression/conjunction_expression.hpp"
#include "duckdb/parser/expression/star_expression.hpp"

#include "duckdb/parser/query_node/set_operation_node.hpp"

#include "duckdb/parser/query_node/select_node.hpp"

Expand All @@ -21,9 +24,8 @@

#include "duckdb/parser/property_graph_table.hpp"
#include "duckdb/parser/subpath_element.hpp"

#include <iostream>
#include <cmath>
#include <duckdb/common/enums/set_operation_type.hpp>

namespace duckdb {
shared_ptr<PropertyGraphTable>
Expand Down Expand Up @@ -364,7 +366,58 @@ void PGQMatchFunction::EdgeTypeAny(
const shared_ptr<PropertyGraphTable> &edge_table,
const string &edge_binding, const string &prev_binding,
const string &next_binding,
vector<unique_ptr<ParsedExpression>> &conditions) {
vector<unique_ptr<ParsedExpression>> &conditions,
unique_ptr<TableRef> &from_clause) {

// START SELECT src, dst, * from edge_table
auto src_dst_select_node = make_uniq<SelectNode>();

auto edge_left_ref = make_uniq<BaseTableRef>();
edge_left_ref->table_name = edge_table->table_name;
src_dst_select_node->from_table = std::move(edge_left_ref);
auto src_dst_children = vector<unique_ptr<ParsedExpression>>();
src_dst_children.push_back(make_uniq<ColumnRefExpression>(edge_table->source_fk[0], edge_table->table_name));
src_dst_children.push_back(make_uniq<ColumnRefExpression>(edge_table->destination_fk[0], edge_table->table_name));
src_dst_children.push_back(make_uniq<StarExpression>());

src_dst_select_node->select_list = std::move(src_dst_children);
// END SELECT src, dst, * from edge_table

// START SELECT dst, src, * from edge_table
auto dst_src_select_node = make_uniq<SelectNode>();

auto edge_right_ref = make_uniq<BaseTableRef>();
edge_right_ref->table_name = edge_table->table_name;
auto dst_src_children = vector<unique_ptr<ParsedExpression>>();
dst_src_select_node->from_table = std::move(edge_right_ref);

dst_src_children.push_back(make_uniq<ColumnRefExpression>(
edge_table->destination_fk[0], edge_table->table_name));
dst_src_children.push_back(make_uniq<ColumnRefExpression>(edge_table->source_fk[0],
edge_table->table_name));
dst_src_children.push_back(make_uniq<StarExpression>());

dst_src_select_node->select_list = std::move(dst_src_children);
// END SELECT dst, src, * from edge_table

auto union_node = make_uniq<SetOperationNode>();
union_node->setop_type = SetOperationType::UNION;
union_node->setop_all = true;
union_node->left = std::move(src_dst_select_node);
union_node->right = std::move(dst_src_select_node);
auto union_select = make_uniq<SelectStatement>();
union_select->node = std::move(union_node);
// (SELECT src, dst, * from edge_table UNION ALL SELECT dst, src, * from edge_table)
auto union_subquery = make_uniq<SubqueryRef>(std::move(union_select));
union_subquery->alias = edge_binding;
if (from_clause) {
auto from_join = make_uniq<JoinRef>(JoinRefType::CROSS);
from_join->left = std::move(from_clause);
from_join->right = std::move(union_subquery);
from_clause = std::move(from_join);
} else {
from_clause = std::move(union_subquery);
}
// (a) src.key = edge.src
auto src_left_expr = CreateMatchJoinExpression(
edge_table->source_pk, edge_table->source_fk, prev_binding, edge_binding);
Expand All @@ -376,23 +429,8 @@ void PGQMatchFunction::EdgeTypeAny(
auto combined_left_expr = make_uniq<ConjunctionExpression>(
ExpressionType::CONJUNCTION_AND, std::move(src_left_expr),
std::move(dst_left_expr));
// (c) src.key = edge.dst
auto src_right_expr = CreateMatchJoinExpression(edge_table->source_pk,
edge_table->destination_fk,
prev_binding, edge_binding);
// (d) dst.key = edge.src
auto dst_right_expr = CreateMatchJoinExpression(edge_table->destination_pk,
edge_table->source_fk,
next_binding, edge_binding);
// (c) AND (d)
auto combined_right_expr = make_uniq<ConjunctionExpression>(
ExpressionType::CONJUNCTION_AND, std::move(src_right_expr),
std::move(dst_right_expr));
// ((a) AND (b)) OR ((c) AND (d))
auto combined_expr = make_uniq<ConjunctionExpression>(
ExpressionType::CONJUNCTION_OR, std::move(combined_left_expr),
std::move(combined_right_expr));
conditions.push_back(std::move(combined_expr));

conditions.push_back(std::move(combined_left_expr));
}

void PGQMatchFunction::EdgeTypeLeft(
Expand Down Expand Up @@ -592,20 +630,21 @@ unique_ptr<ParsedExpression> PGQMatchFunction::CreatePathFindingFunction(
return final_list;
}

void PGQMatchFunction::AddEdgeJoins(
const unique_ptr<SelectNode> &select_node,
const shared_ptr<PropertyGraphTable> &edge_table,
void PGQMatchFunction::AddEdgeJoins(const shared_ptr<PropertyGraphTable> &edge_table,
const shared_ptr<PropertyGraphTable> &previous_vertex_table,
const shared_ptr<PropertyGraphTable> &next_vertex_table,
PGQMatchType edge_type, const string &edge_binding,
const string &prev_binding, const string &next_binding,
vector<unique_ptr<ParsedExpression>> &conditions,
unordered_map<string, string> &alias_map, int32_t &extra_alias_counter) {
unordered_map<string, string> &alias_map, int32_t &extra_alias_counter,
unique_ptr<TableRef> &from_clause) {
if (edge_type != PGQMatchType::MATCH_EDGE_ANY) {
alias_map[edge_binding] = edge_table->table_name;
}
switch (edge_type) {
case PGQMatchType::MATCH_EDGE_ANY: {
select_node->modifiers.push_back(make_uniq<DistinctModifier>());
EdgeTypeAny(edge_table, edge_binding, prev_binding, next_binding,
conditions);
conditions, from_clause);
break;
}
case PGQMatchType::MATCH_EDGE_LEFT:
Expand Down Expand Up @@ -865,33 +904,31 @@ void PGQMatchFunction::ProcessPathList(
} else {
alias_map[edge_element->variable_binding] =
edge_table->source_reference;
AddEdgeJoins(select_node, edge_table, previous_vertex_table,
AddEdgeJoins(edge_table, previous_vertex_table,
next_vertex_table, edge_element->match_type,
edge_element->variable_binding,
previous_vertex_element->variable_binding,
next_vertex_element->variable_binding, conditions,
alias_map, extra_alias_counter);
alias_map, extra_alias_counter, from_clause);
}
} else {
// The edge element is a path element without WHERE or path-finding.
auto edge_table = FindGraphTable(edge_element->label, pg_table);
CheckInheritance(edge_table, edge_element, conditions);
// check aliases
alias_map[edge_element->variable_binding] = edge_table->table_name;
AddEdgeJoins(select_node, edge_table, previous_vertex_table,
AddEdgeJoins(edge_table, previous_vertex_table,
next_vertex_table, edge_element->match_type,
edge_element->variable_binding,
previous_vertex_element->variable_binding,
next_vertex_element->variable_binding, conditions, alias_map,
extra_alias_counter);
next_vertex_element->variable_binding, conditions,
alias_map, extra_alias_counter, from_clause);
// Check the edge type
// If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id
// If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id
// If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR
// (b.dst = a.id AND b.src
// = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND
// (b.dst = a.id AND b.src
//= c.id)
// If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id)
// FROM (src, dst, * from b UNION ALL dst, src, * from b)
// If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND
// (b.dst = a.id AND b.src = c.id)
}
previous_vertex_element = next_vertex_element;
previous_vertex_table = next_vertex_table;
Expand All @@ -900,7 +937,7 @@ void PGQMatchFunction::ProcessPathList(

unique_ptr<TableRef>
PGQMatchFunction::MatchBindReplace(ClientContext &context,
TableFunctionBindInput &) {
TableFunctionBindInput &bind_input) {
auto duckpgq_state_entry = context.registered_state.find("duckpgq");
auto duckpgq_state =
dynamic_cast<DuckPGQState *>(duckpgq_state_entry->second.get());
Expand Down Expand Up @@ -1005,7 +1042,6 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context,
subquery->node = std::move(select_node);

auto result = make_uniq<SubqueryRef>(std::move(subquery), ref->alias);

return std::move(result);
}
} // namespace duckdb
3 changes: 2 additions & 1 deletion duckpgq/src/duckpgq_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
#include "duckdb/parser/query_node/select_node.hpp"
#include "duckdb/parser/statement/copy_statement.hpp"
#include "duckdb/parser/statement/create_statement.hpp"
#include "duckdb/parser/parsed_data/create_table_info.hpp"
#include "duckdb/parser/tableref/joinref.hpp"

Expand Down Expand Up @@ -101,7 +102,7 @@ BoundStatement duckpgq_bind(ClientContext &context, Binder &binder,
}

auto duckpgq_state = (DuckPGQState *)lookup->second.get();
auto duckpgq_binder = Binder::CreateBinder(context);
auto duckpgq_binder = Binder::CreateBinder(context, &binder);
auto duckpgq_parse_data =
dynamic_cast<DuckPGQParseData *>(duckpgq_state->parse_data.get());
if (duckpgq_parse_data) {
Expand Down
3 changes: 2 additions & 1 deletion test/sql/create_pg/optional_edge_table_clause.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# name: test/sql/sqlpgq/snb.test
# name: test/sql/create_pg/optional_edge_table_clause.test
# group: [duckpgq]

require duckpgq
Expand All @@ -10,6 +10,7 @@ statement ok
-CREATE PROPERTY GRAPH snb
VERTEX TABLES (Message, person);


statement ok
-FROM GRAPH_TABLE (snb
MATCH (m:Message)
Expand Down
2 changes: 1 addition & 1 deletion test/sql/pattern-matching/basic_match.test
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ Peter Tavneet
Peter Gabor
Peter David

# Disabled, see https://github.com/cwida/duckpgq-extension/issues/60
query II
-SELECT study.a_name, study.b_name
FROM GRAPH_TABLE (pg
Expand All @@ -144,6 +143,7 @@ FROM GRAPH_TABLE (pg
ORDER BY a_name, b_name;
----
Peter Daniel
Peter Daniel
Peter David
Peter Gabor
Peter Tavneet
Expand Down
66 changes: 66 additions & 0 deletions test/sql/pattern-matching/undirected_edges.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# name: test/sql/sqlpgq/undirected_edges.test
# group: [duckpgq]

statement ok
pragma enable_verification

require duckpgq

statement ok
CREATE TABLE Student(id BIGINT, name VARCHAR);INSERT INTO Student VALUES (0, 'Daniel'), (1, 'Tavneet'), (2, 'Gabor'), (3, 'Peter'), (4, 'David');

statement ok
CREATE TABLE know(src BIGINT, dst BIGINT, createDate BIGINT);INSERT INTO know VALUES (0,1, 10), (0,2, 11), (0,3, 12), (3,0, 13), (1,2, 14), (1,3, 15), (2,3, 16), (4,3, 17), (4, 0, 18);

statement ok
-CREATE PROPERTY GRAPH pg
VERTEX TABLES (Student)
EDGE TABLES (
know SOURCE KEY ( src ) REFERENCES Student ( id )
DESTINATION KEY ( dst ) REFERENCES Student ( id )
);

query II
SELECT a.name AS person, b.name AS friend
FROM ((SELECT know.src, know.dst FROM know) UNION ALL (SELECT know.dst, know.src FROM know)) AS k , Student AS b , Student AS a
WHERE ((a.id = k.src) AND (b.id = k.dst) AND (a.name = 'Daniel'));
----
Daniel Tavneet
Daniel Gabor
Daniel Peter
Daniel David
Daniel Peter

# Daniel has 3 outgoing edges and 2 incoming edges, so there should be 5 tuples
query II
-SELECT person, friend
FROM GRAPH_TABLE (pg
MATCH
(a:Student)-[k:know]-(b:Student)
WHERE a.name = 'Daniel'
COLUMNS (a.name as person, b.name as friend)
)
ORDER BY person, friend;
----
Daniel David
Daniel Gabor
Daniel Peter
Daniel Peter
Daniel Tavneet

# Daniel has 3 outgoing edges and 2 incoming edges, so there should be 5 tuples
query III
-FROM GRAPH_TABLE (pg
MATCH
(a:Student)-[k:know]-(b:Student)
WHERE a.name = 'Daniel'
COLUMNS (a.name as person, b.name as friend, k.createDate as date)
)
ORDER BY person, friend, date;
----
Daniel David 18
Daniel Gabor 11
Daniel Peter 12
Daniel Peter 13
Daniel Tavneet 10

Loading