mirror of https://github.com/postgres/postgres
Implement a data structure that is a List of Bitmapsets, which is essentially a 2-D boolean array except that the rows need not all be the same width. Operations such as union and intersection are meaningful for these, just as they are for Bitmapsets. Eventually we might build many of the same operations that we have written for Bitmapsets, but for the first use-case we just need a few. That first use-case is for antijoin detection: reduce_outer_joins needs to find the set of Vars that are certain to be non-null in a successfully joined (not null-extended) left join row, and also find the set of Vars subject to higher-level IS NULL constraints, and intersect them. We had been doing this by making Lists of the Var nodes and then using list_intersect, which works but is pretty inefficient compared to a bitmapset-like intersection. Potentially it's O(N^2) if there are a lot of Vars involved, which fortunately there generally aren't; still it's not great. Moreover, that method requires the Vars of interest to be exactly equal() in the join condition and the upper IS NULL condition, which is problematic for my WIP patch that labels Vars according to which outer joins have possibly nulled them. Discussion: https://postgr.es/m/892228.1668437838@sss.pgh.pa.us Discussion: https://postgr.es/m/CAMbWs4-mvPPCJ1W6iK6dD5HiNwoJdi6mZp=-7mE8N9Sh+cd0tQ@mail.gmail.compull/138/head^2
parent
90e4f308b4
commit
e9e26b5e71
@ -0,0 +1,162 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* multibitmapset.c |
||||
* Lists of Bitmapsets |
||||
* |
||||
* A multibitmapset is useful in situations where members of a set can |
||||
* be identified by two small integers; for example, varno and varattno |
||||
* of a group of Vars within a query. The implementation is a List of |
||||
* Bitmapsets, so that the empty set can be represented by NIL. (But, |
||||
* as with Bitmapsets, that's not the only allowed representation.) |
||||
* The zero-based index of a List element is the first identifying value, |
||||
* and the (also zero-based) index of a bit within that Bitmapset is |
||||
* the second identifying value. There is no expectation that the |
||||
* Bitmapsets should all be the same size. |
||||
* |
||||
* The available operations on multibitmapsets are intended to parallel |
||||
* those on bitmapsets, for example union and intersection. So far only |
||||
* a small fraction of that has been built out; we'll add more as needed. |
||||
* |
||||
* |
||||
* Copyright (c) 2022, PostgreSQL Global Development Group |
||||
* |
||||
* IDENTIFICATION |
||||
* src/backend/nodes/multibitmapset.c |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#include "postgres.h" |
||||
|
||||
#include "nodes/multibitmapset.h" |
||||
|
||||
|
||||
/*
|
||||
* mbms_add_member |
||||
* Add a new member to a multibitmapset. |
||||
* |
||||
* The new member is identified by "listidx", the zero-based index of the |
||||
* List element it should go into, and "bitidx", which specifies the bit |
||||
* number to be set therein. |
||||
* |
||||
* This is like bms_add_member, but for multibitmapsets. |
||||
*/ |
||||
List * |
||||
mbms_add_member(List *a, int listidx, int bitidx) |
||||
{ |
||||
Bitmapset *bms; |
||||
ListCell *lc; |
||||
|
||||
if (listidx < 0 || bitidx < 0) |
||||
elog(ERROR, "negative multibitmapset member index not allowed"); |
||||
/* Add empty elements as needed */ |
||||
while (list_length(a) <= listidx) |
||||
a = lappend(a, NULL); |
||||
/* Update the target element */ |
||||
lc = list_nth_cell(a, listidx); |
||||
bms = lfirst_node(Bitmapset, lc); |
||||
bms = bms_add_member(bms, bitidx); |
||||
lfirst(lc) = bms; |
||||
return a; |
||||
} |
||||
|
||||
/*
|
||||
* mbms_add_members |
||||
* Add all members of set b to set a. |
||||
* |
||||
* This is a UNION operation, but the left input is modified in-place. |
||||
* |
||||
* This is like bms_add_members, but for multibitmapsets. |
||||
*/ |
||||
List * |
||||
mbms_add_members(List *a, const List *b) |
||||
{ |
||||
ListCell *lca, |
||||
*lcb; |
||||
|
||||
/* Add empty elements to a, as needed */ |
||||
while (list_length(a) < list_length(b)) |
||||
a = lappend(a, NULL); |
||||
/* forboth will stop at the end of the shorter list, which is fine */ |
||||
forboth(lca, a, lcb, b) |
||||
{ |
||||
Bitmapset *bmsa = lfirst_node(Bitmapset, lca); |
||||
const Bitmapset *bmsb = lfirst_node(Bitmapset, lcb); |
||||
|
||||
bmsa = bms_add_members(bmsa, bmsb); |
||||
lfirst(lca) = bmsa; |
||||
} |
||||
return a; |
||||
} |
||||
|
||||
/*
|
||||
* mbms_int_members |
||||
* Reduce set a to its intersection with set b. |
||||
* |
||||
* This is an INTERSECT operation, but the left input is modified in-place. |
||||
* |
||||
* This is like bms_int_members, but for multibitmapsets. |
||||
*/ |
||||
List * |
||||
mbms_int_members(List *a, const List *b) |
||||
{ |
||||
ListCell *lca, |
||||
*lcb; |
||||
|
||||
/* Remove any elements of a that are no longer of use */ |
||||
a = list_truncate(a, list_length(b)); |
||||
/* forboth will stop at the end of the shorter list, which is fine */ |
||||
forboth(lca, a, lcb, b) |
||||
{ |
||||
Bitmapset *bmsa = lfirst_node(Bitmapset, lca); |
||||
const Bitmapset *bmsb = lfirst_node(Bitmapset, lcb); |
||||
|
||||
bmsa = bms_int_members(bmsa, bmsb); |
||||
lfirst(lca) = bmsa; |
||||
} |
||||
return a; |
||||
} |
||||
|
||||
/*
|
||||
* mbms_is_member |
||||
* Is listidx/bitidx a member of A? |
||||
* |
||||
* This is like bms_is_member, but for multibitmapsets. |
||||
*/ |
||||
bool |
||||
mbms_is_member(int listidx, int bitidx, const List *a) |
||||
{ |
||||
const Bitmapset *bms; |
||||
|
||||
/* XXX better to just return false for negative indexes? */ |
||||
if (listidx < 0 || bitidx < 0) |
||||
elog(ERROR, "negative multibitmapset member index not allowed"); |
||||
if (listidx >= list_length(a)) |
||||
return false; |
||||
bms = list_nth_node(Bitmapset, a, listidx); |
||||
return bms_is_member(bitidx, bms); |
||||
} |
||||
|
||||
/*
|
||||
* mbms_overlap_sets |
||||
* Identify the bitmapsets having common members in a and b. |
||||
* |
||||
* The result is a bitmapset of the list indexes of bitmapsets that overlap. |
||||
*/ |
||||
Bitmapset * |
||||
mbms_overlap_sets(const List *a, const List *b) |
||||
{ |
||||
Bitmapset *result = NULL; |
||||
ListCell *lca, |
||||
*lcb; |
||||
|
||||
/* forboth will stop at the end of the shorter list, which is fine */ |
||||
forboth(lca, a, lcb, b) |
||||
{ |
||||
const Bitmapset *bmsa = lfirst_node(Bitmapset, lca); |
||||
const Bitmapset *bmsb = lfirst_node(Bitmapset, lcb); |
||||
|
||||
if (bms_overlap(bmsa, bmsb)) |
||||
result = bms_add_member(result, foreach_current_index(lca)); |
||||
} |
||||
return result; |
||||
} |
@ -0,0 +1,39 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* multibitmapset.h |
||||
* Lists of Bitmapsets |
||||
* |
||||
* A multibitmapset is useful in situations where members of a set can |
||||
* be identified by two small integers; for example, varno and varattno |
||||
* of a group of Vars within a query. The implementation is a List of |
||||
* Bitmapsets, so that the empty set can be represented by NIL. (But, |
||||
* as with Bitmapsets, that's not the only allowed representation.) |
||||
* The zero-based index of a List element is the first identifying value, |
||||
* and the (also zero-based) index of a bit within that Bitmapset is |
||||
* the second identifying value. There is no expectation that the |
||||
* Bitmapsets should all be the same size. |
||||
* |
||||
* The available operations on multibitmapsets are intended to parallel |
||||
* those on bitmapsets, for example union and intersection. So far only |
||||
* a small fraction of that has been built out; we'll add more as needed. |
||||
* |
||||
* |
||||
* Copyright (c) 2022, PostgreSQL Global Development Group |
||||
* |
||||
* src/include/nodes/multibitmapset.h |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
#ifndef MULTIBITMAPSET_H |
||||
#define MULTIBITMAPSET_H |
||||
|
||||
#include "nodes/bitmapset.h" |
||||
#include "nodes/pg_list.h" |
||||
|
||||
extern List *mbms_add_member(List *a, int listidx, int bitidx); |
||||
extern List *mbms_add_members(List *a, const List *b); |
||||
extern List *mbms_int_members(List *a, const List *b); |
||||
extern bool mbms_is_member(int listidx, int bitidx, const List *a); |
||||
extern Bitmapset *mbms_overlap_sets(const List *a, const List *b); |
||||
|
||||
#endif /* MULTIBITMAPSET_H */ |
Loading…
Reference in new issue