postgis/postgis/gserialized_gist_nd.c
2022-12-13 17:43:08 -08:00

1811 lines
50 KiB
C

/**********************************************************************
*
* PostGIS - Spatial Types for PostgreSQL
* http://postgis.net
*
* PostGIS is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* PostGIS is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
*
**********************************************************************
*
* Copyright 2009 Paul Ramsey <pramsey@cleverelephant.ca>
* Copyright 2017-2019 Darafei Praliaskouski <me@komzpa.net>
*
**********************************************************************/
/*
** R-Tree Bibliography
**
** [1] A. Guttman. R-tree: a dynamic index structure for spatial searching.
** Proceedings of the ACM SIGMOD Conference, pp 47-57, June 1984.
** [2] C.-H. Ang and T. C. Tan. New linear node splitting algorithm for
** R-Trees. Advances in Spatial Databases - 5th International Symposium,
** 1997
** [3] N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger. The R*tree: an
** efficient and robust access method for points and rectangles.
** Proceedings of the ACM SIGMOD Conference. June 1990.
*/
#include "postgres.h"
#include "access/gist.h" /* For GiST */
#include "access/itup.h"
#include "access/skey.h"
#include "../postgis_config.h"
#include "liblwgeom.h" /* For standard geometry types. */
#include "lwgeom_pg.h" /* For debugging macros. */
#include "gserialized_gist.h" /* For utility functions. */
#include "geography.h"
#include <assert.h>
#include <math.h>
/*
** When is a node split not so good? If more than 90% of the entries
** end up in one of the children.
*/
#define LIMIT_RATIO 0.1
/*
** For debugging
*/
#if POSTGIS_DEBUG_LEVEL > 0
static int geog_counter_leaf = 0;
static int geog_counter_internal = 0;
#endif
/*
** ND Index key type stub prototypes
*/
Datum gidx_out(PG_FUNCTION_ARGS);
Datum gidx_in(PG_FUNCTION_ARGS);
/*
** ND GiST prototypes
*/
Datum gserialized_gist_consistent(PG_FUNCTION_ARGS);
Datum gserialized_gist_compress(PG_FUNCTION_ARGS);
Datum gserialized_gist_decompress(PG_FUNCTION_ARGS);
Datum gserialized_gist_penalty(PG_FUNCTION_ARGS);
Datum gserialized_gist_picksplit(PG_FUNCTION_ARGS);
Datum gserialized_gist_union(PG_FUNCTION_ARGS);
Datum gserialized_gist_same(PG_FUNCTION_ARGS);
Datum gserialized_gist_distance(PG_FUNCTION_ARGS);
Datum gserialized_gist_geog_distance(PG_FUNCTION_ARGS);
/*
** ND Operator prototypes
*/
Datum gserialized_overlaps(PG_FUNCTION_ARGS);
Datum gserialized_gidx_geom_overlaps(PG_FUNCTION_ARGS);
Datum gserialized_gidx_gidx_overlaps(PG_FUNCTION_ARGS);
Datum gserialized_contains(PG_FUNCTION_ARGS);
Datum gserialized_gidx_geom_contains(PG_FUNCTION_ARGS);
Datum gserialized_gidx_gidx_contains(PG_FUNCTION_ARGS);
Datum gserialized_within(PG_FUNCTION_ARGS);
Datum gserialized_gidx_geom_within(PG_FUNCTION_ARGS);
Datum gserialized_gidx_gidx_within(PG_FUNCTION_ARGS);
Datum gserialized_same(PG_FUNCTION_ARGS);
Datum gserialized_gidx_geom_same(PG_FUNCTION_ARGS);
Datum gserialized_gidx_gidx_same(PG_FUNCTION_ARGS);
Datum gserialized_distance_nd(PG_FUNCTION_ARGS);
/*
** GIDX true/false test function type
*/
typedef bool (*gidx_predicate)(GIDX *a, GIDX *b);
/* Allocate a new copy of GIDX */
GIDX *
gidx_copy(GIDX *b)
{
GIDX *c = (GIDX *)palloc(VARSIZE(b));
POSTGIS_DEBUGF(5, "copied gidx (%p) to gidx (%p)", b, c);
memcpy((void *)c, (void *)b, VARSIZE(b));
return c;
}
/* Ensure all minimums are below maximums. */
void
gidx_validate(GIDX *b)
{
uint32_t i;
Assert(b);
POSTGIS_DEBUGF(5, "validating gidx (%s)", gidx_to_string(b));
for (i = 0; i < GIDX_NDIMS(b); i++)
{
if (GIDX_GET_MIN(b, i) > GIDX_GET_MAX(b, i))
{
float tmp;
tmp = GIDX_GET_MIN(b, i);
GIDX_SET_MIN(b, i, GIDX_GET_MAX(b, i));
GIDX_SET_MAX(b, i, tmp);
}
}
return;
}
/* An "unknown" GIDX is used to represent the bounds of an EMPTY
geometry or other-wise unindexable geometry (like one with NaN
or Inf bounds) */
inline bool
gidx_is_unknown(const GIDX *a)
{
size_t size = VARSIZE_ANY_EXHDR(a);
/* "unknown" gidx objects have a too-small size of one float */
if (size <= 0.0)
return true;
return false;
}
void
gidx_set_unknown(GIDX *a)
{
SET_VARSIZE(a, VARHDRSZ);
}
/* Enlarge b_union to contain b_new. */
void
gidx_merge(GIDX **b_union, GIDX *b_new)
{
int i, dims_union, dims_new;
Assert(b_union);
Assert(*b_union);
Assert(b_new);
/* Can't merge an unknown into any thing */
/* Q: Unknown is 0 dimensions. Should we reset result to unknown instead? (ticket #4232) */
if (gidx_is_unknown(b_new))
return;
/* Merge of unknown and known is known */
/* Q: Unknown is 0 dimensions. Should we never modify unknown instead? (ticket #4232) */
if (gidx_is_unknown(*b_union))
{
pfree(*b_union);
*b_union = gidx_copy(b_new);
return;
}
dims_union = GIDX_NDIMS(*b_union);
dims_new = GIDX_NDIMS(b_new);
/* Shrink unshared dimensions.
* Unset dimension is essentially [-FLT_MAX, FLT_MAX], so we can either trim it or reset to that range.*/
if (dims_new < dims_union)
{
*b_union = (GIDX *)repalloc(*b_union, GIDX_SIZE(dims_new));
SET_VARSIZE(*b_union, VARSIZE(b_new));
dims_union = dims_new;
}
for (i = 0; i < dims_union; i++)
{
/* Adjust minimums */
GIDX_SET_MIN(*b_union, i, Min(GIDX_GET_MIN(*b_union, i), GIDX_GET_MIN(b_new, i)));
/* Adjust maximums */
GIDX_SET_MAX(*b_union, i, Max(GIDX_GET_MAX(*b_union, i), GIDX_GET_MAX(b_new, i)));
}
}
/* Calculate the volume (in n-d units) of the GIDX */
static float
gidx_volume(GIDX *a)
{
float result;
uint32_t i;
if (!a || gidx_is_unknown(a))
return 0.0;
result = GIDX_GET_MAX(a, 0) - GIDX_GET_MIN(a, 0);
for (i = 1; i < GIDX_NDIMS(a); i++)
result *= (GIDX_GET_MAX(a, i) - GIDX_GET_MIN(a, i));
POSTGIS_DEBUGF(5, "calculated volume of %s as %.8g", gidx_to_string(a), result);
return result;
}
/* Calculate the edge of the GIDX */
static float
gidx_edge(GIDX *a)
{
float result;
uint32_t i;
if (!a || gidx_is_unknown(a))
return 0.0;
result = GIDX_GET_MAX(a, 0) - GIDX_GET_MIN(a, 0);
for (i = 1; i < GIDX_NDIMS(a); i++)
result += (GIDX_GET_MAX(a, i) - GIDX_GET_MIN(a, i));
POSTGIS_DEBUGF(5, "calculated edge of %s as %.8g", gidx_to_string(a), result);
return result;
}
/* Ensure the first argument has the higher dimensionality. */
static void
gidx_dimensionality_check(GIDX **a, GIDX **b)
{
if (GIDX_NDIMS(*a) < GIDX_NDIMS(*b))
{
GIDX *tmp = *b;
*b = *a;
*a = tmp;
}
}
/* Calculate the volume of the union of the boxes. Avoids creating an intermediate box. */
static float
gidx_union_volume(GIDX *a, GIDX *b)
{
float result;
int i;
int ndims_a, ndims_b;
POSTGIS_DEBUG(5, "entered function");
if (!a && !b)
{
elog(ERROR, "gidx_union_volume received two null arguments");
return 0.0;
}
if (!a || gidx_is_unknown(a))
return gidx_volume(b);
if (!b || gidx_is_unknown(b))
return gidx_volume(a);
if (gidx_is_unknown(a) && gidx_is_unknown(b))
return 0.0;
/* Ensure 'a' has the most dimensions. */
gidx_dimensionality_check(&a, &b);
ndims_a = GIDX_NDIMS(a);
ndims_b = GIDX_NDIMS(b);
/* Initialize with maximal length of first dimension. */
result = Max(GIDX_GET_MAX(a, 0), GIDX_GET_MAX(b, 0)) - Min(GIDX_GET_MIN(a, 0), GIDX_GET_MIN(b, 0));
/* Multiply by maximal length of remaining dimensions. */
for (i = 1; i < ndims_b; i++)
result *= (Max(GIDX_GET_MAX(a, i), GIDX_GET_MAX(b, i)) - Min(GIDX_GET_MIN(a, i), GIDX_GET_MIN(b, i)));
/* Add in dimensions of higher dimensional box. */
for (i = ndims_b; i < ndims_a; i++)
result *= (GIDX_GET_MAX(a, i) - GIDX_GET_MIN(a, i));
POSTGIS_DEBUGF(5, "volume( %s union %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result);
return result;
}
/* Calculate the edge of the union of the boxes. Avoids creating an intermediate box. */
static float
gidx_union_edge(GIDX *a, GIDX *b)
{
float result;
int i;
int ndims_a, ndims_b;
POSTGIS_DEBUG(5, "entered function");
if (!a && !b)
{
elog(ERROR, "gidx_union_edge received two null arguments");
return 0.0;
}
if (!a || gidx_is_unknown(a))
return gidx_volume(b);
if (!b || gidx_is_unknown(b))
return gidx_volume(a);
if (gidx_is_unknown(a) && gidx_is_unknown(b))
return 0.0;
/* Ensure 'a' has the most dimensions. */
gidx_dimensionality_check(&a, &b);
ndims_a = GIDX_NDIMS(a);
ndims_b = GIDX_NDIMS(b);
/* Initialize with maximal length of first dimension. */
result = Max(GIDX_GET_MAX(a, 0), GIDX_GET_MAX(b, 0)) - Min(GIDX_GET_MIN(a, 0), GIDX_GET_MIN(b, 0));
/* Add maximal length of remaining dimensions. */
for (i = 1; i < ndims_b; i++)
result += (Max(GIDX_GET_MAX(a, i), GIDX_GET_MAX(b, i)) - Min(GIDX_GET_MIN(a, i), GIDX_GET_MIN(b, i)));
/* Add in dimensions of higher dimensional box. */
for (i = ndims_b; i < ndims_a; i++)
result += (GIDX_GET_MAX(a, i) - GIDX_GET_MIN(a, i));
POSTGIS_DEBUGF(5, "edge( %s union %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result);
return result;
}
/* Calculate the volume of the intersection of the boxes. */
static float
gidx_inter_volume(GIDX *a, GIDX *b)
{
uint32_t i;
float result;
POSTGIS_DEBUG(5, "entered function");
if (!a || !b)
{
elog(ERROR, "gidx_inter_volume received a null argument");
return 0.0;
}
if (gidx_is_unknown(a) || gidx_is_unknown(b))
return 0.0;
/* Ensure 'a' has the most dimensions. */
gidx_dimensionality_check(&a, &b);
/* Initialize with minimal length of first dimension. */
result = Min(GIDX_GET_MAX(a, 0), GIDX_GET_MAX(b, 0)) - Max(GIDX_GET_MIN(a, 0), GIDX_GET_MIN(b, 0));
/* If they are disjoint (max < min) then return zero. */
if (result < 0.0)
return 0.0;
/* Continue for remaining dimensions. */
for (i = 1; i < GIDX_NDIMS(b); i++)
{
float width = Min(GIDX_GET_MAX(a, i), GIDX_GET_MAX(b, i)) - Max(GIDX_GET_MIN(a, i), GIDX_GET_MIN(b, i));
if (width < 0.0)
return 0.0;
/* Multiply by minimal length of remaining dimensions. */
result *= width;
}
POSTGIS_DEBUGF(5, "volume( %s intersection %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result);
return result;
}
/*
** Overlapping GIDX box test.
**
** Box(A) Overlaps Box(B) IFF for every dimension d:
** min(A,d) <= max(B,d) && max(A,d) => min(B,d)
**
** Any missing dimension is assumed by convention to
** overlap whatever finite range available on the
** other operand. See
** http://lists.osgeo.org/pipermail/postgis-devel/2015-February/024757.html
**
** Empty boxes never overlap.
*/
bool
gidx_overlaps(GIDX *a, GIDX *b)
{
int i, dims_a, dims_b;
POSTGIS_DEBUG(5, "entered function");
if (!a || !b)
return false;
if (gidx_is_unknown(a) || gidx_is_unknown(b))
return false;
dims_a = GIDX_NDIMS(a);
dims_b = GIDX_NDIMS(b);
/* For all shared dimensions min(a) > max(b) and min(b) > max(a)
Unshared dimensions do not matter */
for (i = 0; i < Min(dims_a, dims_b); i++)
{
/* If the missing dimension was not padded with -+FLT_MAX */
if (GIDX_GET_MAX(a, i) != FLT_MAX && GIDX_GET_MAX(b, i) != FLT_MAX)
{
if (GIDX_GET_MIN(a, i) > GIDX_GET_MAX(b, i))
return false;
if (GIDX_GET_MIN(b, i) > GIDX_GET_MAX(a, i))
return false;
}
}
return true;
}
/*
** Containment GIDX test.
**
** Box(A) CONTAINS Box(B) IFF (pt(A)LL < pt(B)LL) && (pt(A)UR > pt(B)UR)
*/
bool
gidx_contains(GIDX *a, GIDX *b)
{
uint32_t i, dims_a, dims_b;
if (!a || !b)
return false;
if (gidx_is_unknown(a) || gidx_is_unknown(b))
return false;
dims_a = GIDX_NDIMS(a);
dims_b = GIDX_NDIMS(b);
/* For all shared dimensions min(a) > min(b) and max(a) < max(b)
Unshared dimensions do not matter */
for (i = 0; i < Min(dims_a, dims_b); i++)
{
/* If the missing dimension was not padded with -+FLT_MAX */
if (GIDX_GET_MAX(a, i) != FLT_MAX && GIDX_GET_MAX(b, i) != FLT_MAX)
{
if (GIDX_GET_MIN(a, i) > GIDX_GET_MIN(b, i))
return false;
if (GIDX_GET_MAX(a, i) < GIDX_GET_MAX(b, i))
return false;
}
}
return true;
}
/*
** Equality GIDX test.
**
** Box(A) EQUALS Box(B) IFF (pt(A)LL == pt(B)LL) && (pt(A)UR == pt(B)UR)
*/
bool
gidx_equals(GIDX *a, GIDX *b)
{
uint32_t i, dims_a, dims_b;
if (!a && !b)
return true;
if (!a || !b)
return false;
if (gidx_is_unknown(a) && gidx_is_unknown(b))
return true;
if (gidx_is_unknown(a) || gidx_is_unknown(b))
return false;
dims_a = GIDX_NDIMS(a);
dims_b = GIDX_NDIMS(b);
/* For all shared dimensions min(a) == min(b), max(a) == max(b)
Unshared dimensions do not matter */
for (i = 0; i < Min(dims_a, dims_b); i++)
{
/* If the missing dimension was not padded with -+FLT_MAX */
if (GIDX_GET_MAX(a, i) != FLT_MAX && GIDX_GET_MAX(b, i) != FLT_MAX)
{
if (GIDX_GET_MIN(a, i) != GIDX_GET_MIN(b, i))
return false;
if (GIDX_GET_MAX(a, i) != GIDX_GET_MAX(b, i))
return false;
}
}
return true;
}
/**
* Support function. Based on two datums return true if
* they satisfy the predicate and false otherwise.
*/
static int
gserialized_datum_predicate(Datum gs1, Datum gs2, gidx_predicate predicate)
{
/* Put aside some stack memory and use it for GIDX pointers. */
char boxmem1[GIDX_MAX_SIZE];
char boxmem2[GIDX_MAX_SIZE];
GIDX *gidx1 = (GIDX *)boxmem1;
GIDX *gidx2 = (GIDX *)boxmem2;
POSTGIS_DEBUG(3, "entered function");
/* Must be able to build box for each arguement (ie, not empty geometry)
and predicate function to return true. */
if ((gserialized_datum_get_gidx_p(gs1, gidx1) == LW_SUCCESS) &&
(gserialized_datum_get_gidx_p(gs2, gidx2) == LW_SUCCESS) && predicate(gidx1, gidx2))
{
POSTGIS_DEBUGF(3, "got boxes %s and %s", gidx_to_string(gidx1), gidx_to_string(gidx2));
return LW_TRUE;
}
return LW_FALSE;
}
static int
gserialized_datum_predicate_gidx_geom(GIDX *gidx1, Datum gs2, gidx_predicate predicate)
{
/* Put aside some stack memory and use it for GIDX pointers. */
char boxmem2[GIDX_MAX_SIZE];
GIDX *gidx2 = (GIDX *)boxmem2;
POSTGIS_DEBUG(3, "entered function");
/* Must be able to build box for gs2 arguement (ie, not empty geometry)
and predicate function to return true. */
if ((gserialized_datum_get_gidx_p(gs2, gidx2) == LW_SUCCESS) && predicate(gidx1, gidx2))
{
POSTGIS_DEBUGF(3, "got boxes %s and %s", gidx_to_string(gidx1), gidx_to_string(gidx2));
return LW_TRUE;
}
return LW_FALSE;
}
static int
gserialized_datum_predicate_geom_gidx(Datum gs1, GIDX *gidx2, gidx_predicate predicate)
{
/* Put aside some stack memory and use it for GIDX pointers. */
char boxmem2[GIDX_MAX_SIZE];
GIDX *gidx1 = (GIDX *)boxmem2;
POSTGIS_DEBUG(3, "entered function");
/* Must be able to build box for gs2 arguement (ie, not empty geometry)
and predicate function to return true. */
if ((gserialized_datum_get_gidx_p(gs1, gidx1) == LW_SUCCESS) && predicate(gidx1, gidx2))
{
POSTGIS_DEBUGF(3, "got boxes %s and %s", gidx_to_string(gidx1), gidx_to_string(gidx2));
return LW_TRUE;
}
return LW_FALSE;
}
/**
* Calculate the box->box distance.
*/
static double
gidx_distance(const GIDX *a, const GIDX *b, int m_is_time)
{
int ndims, i;
double sum = 0;
/* Base computation on least available dimensions */
ndims = Min(GIDX_NDIMS(b), GIDX_NDIMS(a));
for (i = 0; i < ndims; ++i)
{
double d;
double amin = GIDX_GET_MIN(a, i);
double amax = GIDX_GET_MAX(a, i);
double bmin = GIDX_GET_MIN(b, i);
double bmax = GIDX_GET_MAX(b, i);
POSTGIS_DEBUGF(3, "A %g - %g", amin, amax);
POSTGIS_DEBUGF(3, "B %g - %g", bmin, bmax);
if ((amin <= bmax && amax >= bmin))
{
/* overlaps */
d = 0;
}
else if (i == 4 && m_is_time)
{
return FLT_MAX;
}
else if (bmax < amin)
{
/* is "left" */
d = amin - bmax;
}
else
{
/* is "right" */
assert(bmin > amax);
d = bmin - amax;
}
if (!isfinite(d))
{
/* Can happen if coordinates are corrupted/NaN */
continue;
}
sum += d * d;
POSTGIS_DEBUGF(3, "dist %g, squared %g, grows sum to %g", d, d * d, sum);
}
return sqrt(sum);
}
/**
* Return a #GSERIALIZED with an expanded bounding box.
*/
GSERIALIZED *
gserialized_expand(GSERIALIZED *g, double distance)
{
GBOX gbox;
gbox_init(&gbox);
/* Get our bounding box out of the geography, return right away if
input is an EMPTY geometry. */
if (gserialized_get_gbox_p(g, &gbox) == LW_FAILURE)
return g;
gbox_expand(&gbox, distance);
return gserialized_set_gbox(g, &gbox);
}
/***********************************************************************
* GiST N-D Index Operator Functions
*/
/*
* do "real" n-d distance
*/
PG_FUNCTION_INFO_V1(gserialized_distance_nd);
Datum gserialized_distance_nd(PG_FUNCTION_ARGS)
{
/* Feature-to-feature distance */
GSERIALIZED *geom1 = PG_GETARG_GSERIALIZED_P(0);
GSERIALIZED *geom2 = PG_GETARG_GSERIALIZED_P(1);
LWGEOM *lw1 = lwgeom_from_gserialized(geom1);
LWGEOM *lw2 = lwgeom_from_gserialized(geom2);
LWGEOM *closest;
double distance;
/* Find an exact shortest line w/ the dimensions we support */
if (lwgeom_has_z(lw1) && lwgeom_has_z(lw2))
{
closest = lwgeom_closest_line_3d(lw1, lw2);
distance = lwgeom_length(closest);
}
else
{
closest = lwgeom_closest_line(lw1, lw2);
distance = lwgeom_length_2d(closest);
}
/* Can only add the M term if both objects have M */
if (lwgeom_has_m(lw1) && lwgeom_has_m(lw2))
{
double m1 = 0, m2 = 0;
int usebox = false;
/* Un-sqrt the distance so we can add extra terms */
distance = distance * distance;
if (lwgeom_get_type(lw1) == POINTTYPE)
{
POINT4D p;
lwpoint_getPoint4d_p((LWPOINT *)lw1, &p);
m1 = p.m;
}
else if (lwgeom_get_type(lw1) == LINETYPE)
{
LWPOINT *lwp1 = lwline_get_lwpoint(lwgeom_as_lwline(closest), 0);
m1 = lwgeom_interpolate_point(lw1, lwp1);
lwpoint_free(lwp1);
}
else
usebox = true;
if (lwgeom_get_type(lw2) == POINTTYPE)
{
POINT4D p;
lwpoint_getPoint4d_p((LWPOINT *)lw2, &p);
m2 = p.m;
}
else if (lwgeom_get_type(lw2) == LINETYPE)
{
LWPOINT *lwp2 = lwline_get_lwpoint(lwgeom_as_lwline(closest), 1);
m2 = lwgeom_interpolate_point(lw2, lwp2);
lwpoint_free(lwp2);
}
else
usebox = true;
if (usebox)
{
GBOX b1, b2;
if (gserialized_get_gbox_p(geom1, &b1) && gserialized_get_gbox_p(geom2, &b2))
{
double d;
/* Disjoint left */
if (b1.mmin > b2.mmax)
d = b1.mmin - b2.mmax;
/* Disjoint right */
else if (b2.mmin > b1.mmax)
d = b2.mmin - b1.mmax;
/* Not Disjoint */
else
d = 0;
distance += d * d;
}
}
else
distance += (m2 - m1) * (m2 - m1);
distance = sqrt(distance);
}
lwgeom_free(closest);
PG_FREE_IF_COPY(geom1, 0);
PG_FREE_IF_COPY(geom2, 1);
PG_RETURN_FLOAT8(distance);
}
/*
** '~~' and operator function. Based on two serialized return true if
** the first is contained by the second.
*/
PG_FUNCTION_INFO_V1(gserialized_within);
Datum gserialized_within(PG_FUNCTION_ARGS)
{
if (gserialized_datum_predicate(PG_GETARG_DATUM(1), PG_GETARG_DATUM(0), gidx_contains))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
/*
** '~~' and operator function. Based on a GIDX and a serialized return true if
** the first is contained by the second.
*/
PG_FUNCTION_INFO_V1(gserialized_gidx_geom_within);
Datum gserialized_gidx_geom_within(PG_FUNCTION_ARGS)
{
GIDX *gidx = (GIDX *)PG_GETARG_POINTER(0);
if (gserialized_datum_predicate_geom_gidx(PG_GETARG_DATUM(1), gidx, gidx_contains))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
/*
** '~~' and operator function. Based on two GIDX return true if
** the first is contained by the second.
*/
PG_FUNCTION_INFO_V1(gserialized_gidx_gidx_within);
Datum gserialized_gidx_gidx_within(PG_FUNCTION_ARGS)
{
if (gidx_contains((GIDX *)PG_GETARG_POINTER(1), (GIDX *)PG_GETARG_POINTER(0)))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
/*
** '@@' and operator function. Based on two serialized return true if
** the first contains the second.
*/
PG_FUNCTION_INFO_V1(gserialized_contains);
Datum gserialized_contains(PG_FUNCTION_ARGS)
{
if (gserialized_datum_predicate(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), gidx_contains))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
/*
** '@@' and operator function. Based on a GIDX and a serialized return true if
** the first contains the second.
*/
PG_FUNCTION_INFO_V1(gserialized_gidx_geom_contains);
Datum gserialized_gidx_geom_contains(PG_FUNCTION_ARGS)
{
GIDX *gidx = (GIDX *)PG_GETARG_POINTER(0);
if (gserialized_datum_predicate_gidx_geom(gidx, PG_GETARG_DATUM(1), gidx_contains))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
/*
** '@@' and operator function. Based on two GIDX return true if
** the first contains the second.
*/
PG_FUNCTION_INFO_V1(gserialized_gidx_gidx_contains);
Datum gserialized_gidx_gidx_contains(PG_FUNCTION_ARGS)
{
if (gidx_contains((GIDX *)PG_GETARG_POINTER(0), (GIDX *)PG_GETARG_POINTER(1)))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
/*
** '~=' and operator function. Based on two serialized return true if
** the first equals the second.
*/
PG_FUNCTION_INFO_V1(gserialized_same);
Datum gserialized_same(PG_FUNCTION_ARGS)
{
if (gserialized_datum_predicate(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), gidx_equals))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
PG_FUNCTION_INFO_V1(gserialized_gidx_geom_same);
Datum gserialized_gidx_geom_same(PG_FUNCTION_ARGS)
{
GIDX *gidx = (GIDX *)PG_GETARG_POINTER(0);
if (gserialized_datum_predicate_gidx_geom(gidx, PG_GETARG_DATUM(1), gidx_equals))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
/*
** '~=' and operator function. Based on two GIDX return true if
** the first equals the second.
*/
PG_FUNCTION_INFO_V1(gserialized_gidx_gidx_same);
Datum gserialized_gidx_gidx_same(PG_FUNCTION_ARGS)
{
if (gidx_equals((GIDX *)PG_GETARG_POINTER(0), (GIDX *)PG_GETARG_POINTER(1)))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
/*
** '&&&' operator function. Based on two serialized return true if
** they overlap and false otherwise.
*/
PG_FUNCTION_INFO_V1(gserialized_overlaps);
Datum gserialized_overlaps(PG_FUNCTION_ARGS)
{
if (gserialized_datum_predicate(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), gidx_overlaps))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
/*
* This is the cross-operator for the geographies
*/
PG_FUNCTION_INFO_V1(gserialized_gidx_geog_overlaps);
Datum gserialized_gidx_geog_overlaps(PG_FUNCTION_ARGS)
{
GIDX *gidx = (GIDX *)PG_GETARG_POINTER(0);
if (gserialized_datum_predicate_gidx_geom(gidx, PG_GETARG_DATUM(1), gidx_overlaps))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
PG_FUNCTION_INFO_V1(gserialized_gidx_geom_overlaps);
Datum gserialized_gidx_geom_overlaps(PG_FUNCTION_ARGS)
{
GIDX *gidx = (GIDX *)PG_GETARG_POINTER(0);
if (gserialized_datum_predicate_gidx_geom(gidx, PG_GETARG_DATUM(1), gidx_overlaps))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
PG_FUNCTION_INFO_V1(gserialized_gidx_gidx_overlaps);
Datum gserialized_gidx_gidx_overlaps(PG_FUNCTION_ARGS)
{
if (gidx_overlaps((GIDX *)PG_GETARG_POINTER(0), (GIDX *)PG_GETARG_POINTER(1)))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(false);
}
/***********************************************************************
* GiST Index Support Functions
*/
/*
** GiST support function. Given a geography, return a "compressed"
** version. In this case, we convert the geography into a geocentric
** bounding box. If the geography already has the box embedded in it
** we pull that out and hand it back.
*/
PG_FUNCTION_INFO_V1(gserialized_gist_compress);
Datum gserialized_gist_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry_in = (GISTENTRY *)PG_GETARG_POINTER(0);
GISTENTRY *entry_out = NULL;
char gidxmem[GIDX_MAX_SIZE];
GIDX *bbox_out = (GIDX *)gidxmem;
int result = LW_SUCCESS;
uint32_t i;
POSTGIS_DEBUG(4, "[GIST] 'compress' function called");
/*
** Not a leaf key? There's nothing to do.
** Return the input unchanged.
*/
if (!entry_in->leafkey)
{
POSTGIS_DEBUG(4, "[GIST] non-leafkey entry, returning input unaltered");
PG_RETURN_POINTER(entry_in);
}
POSTGIS_DEBUG(4, "[GIST] processing leafkey input");
entry_out = palloc(sizeof(GISTENTRY));
/*
** Null key? Make a copy of the input entry and
** return.
*/
if (!DatumGetPointer(entry_in->key))
{
POSTGIS_DEBUG(4, "[GIST] leafkey is null");
gistentryinit(*entry_out, (Datum)0, entry_in->rel, entry_in->page, entry_in->offset, false);
POSTGIS_DEBUG(4, "[GIST] returning copy of input");
PG_RETURN_POINTER(entry_out);
}
/* Extract our index key from the GiST entry. */
result = gserialized_datum_get_gidx_p(entry_in->key, bbox_out);
/* Is the bounding box valid (non-empty, non-infinite) ?
* If not, use the "unknown" GIDX. */
if (result == LW_FAILURE)
{
POSTGIS_DEBUG(4, "[GIST] empty geometry!");
gidx_set_unknown(bbox_out);
gistentryinit(*entry_out,
PointerGetDatum(gidx_copy(bbox_out)),
entry_in->rel,
entry_in->page,
entry_in->offset,
false);
PG_RETURN_POINTER(entry_out);
}
POSTGIS_DEBUGF(4, "[GIST] got entry_in->key: %s", gidx_to_string(bbox_out));
/* ORIGINAL VERSION */
/* Check all the dimensions for finite values.
* If not, use the "unknown" GIDX as a key */
for (i = 0; i < GIDX_NDIMS(bbox_out); i++)
{
if (!isfinite(GIDX_GET_MAX(bbox_out, i)) || !isfinite(GIDX_GET_MIN(bbox_out, i)))
{
gidx_set_unknown(bbox_out);
gistentryinit(*entry_out,
PointerGetDatum(gidx_copy(bbox_out)),
entry_in->rel,
entry_in->page,
entry_in->offset,
false);
PG_RETURN_POINTER(entry_out);
}
}
/* Ensure bounding box has minimums below maximums. */
gidx_validate(bbox_out);
/* Prepare GISTENTRY for return. */
gistentryinit(
*entry_out, PointerGetDatum(gidx_copy(bbox_out)), entry_in->rel, entry_in->page, entry_in->offset, false);
/* Return GISTENTRY. */
POSTGIS_DEBUG(4, "[GIST] 'compress' function complete");
PG_RETURN_POINTER(entry_out);
}
/*
** GiST support function.
** Decompress an entry. Unused for geography, so we return.
*/
PG_FUNCTION_INFO_V1(gserialized_gist_decompress);
Datum gserialized_gist_decompress(PG_FUNCTION_ARGS)
{
POSTGIS_DEBUG(5, "[GIST] 'decompress' function called");
/* We don't decompress. Just return the input. */
PG_RETURN_POINTER(PG_GETARG_POINTER(0));
}
/*
** GiST support function. Called from gserialized_gist_consistent below.
*/
static inline bool
gserialized_gist_consistent_leaf(GIDX *key, GIDX *query, StrategyNumber strategy)
{
bool retval;
POSTGIS_DEBUGF(4, "[GIST] leaf consistent, strategy [%d], count[%i]", strategy, geog_counter_leaf++);
switch (strategy)
{
case RTOverlapStrategyNumber:
retval = (bool)gidx_overlaps(key, query);
break;
case RTSameStrategyNumber:
retval = (bool)gidx_equals(key, query);
break;
case RTContainsStrategyNumber:
case RTOldContainsStrategyNumber:
retval = (bool)gidx_contains(key, query);
break;
case RTContainedByStrategyNumber:
case RTOldContainedByStrategyNumber:
retval = (bool)gidx_contains(query, key);
break;
default:
retval = false;
}
return retval;
}
/*
** GiST support function. Called from gserialized_gist_consistent below.
*/
static inline bool
gserialized_gist_consistent_internal(GIDX *key, GIDX *query, StrategyNumber strategy)
{
bool retval;
POSTGIS_DEBUGF(4,
"[GIST] internal consistent, strategy [%d], count[%i], query[%s], key[%s]",
strategy,
geog_counter_internal++,
gidx_to_string(query),
gidx_to_string(key));
switch (strategy)
{
case RTOverlapStrategyNumber:
case RTContainedByStrategyNumber:
case RTOldContainedByStrategyNumber:
retval = (bool)gidx_overlaps(key, query);
break;
case RTSameStrategyNumber:
case RTContainsStrategyNumber:
case RTOldContainsStrategyNumber:
retval = (bool)gidx_contains(key, query);
break;
default:
retval = false;
}
return retval;
}
/*
** GiST support function. Take in a query and an entry and see what the
** relationship is, based on the query strategy.
*/
PG_FUNCTION_INFO_V1(gserialized_gist_consistent);
Datum gserialized_gist_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
StrategyNumber strategy = (StrategyNumber)PG_GETARG_UINT16(2);
bool result;
char gidxmem[GIDX_MAX_SIZE];
GIDX *query_gbox_index = (GIDX *)gidxmem;
/* PostgreSQL 8.4 and later require the RECHECK flag to be set here,
rather than being supplied as part of the operator class definition */
bool *recheck = (bool *)PG_GETARG_POINTER(4);
/* We set recheck to false to avoid repeatedly pulling every "possibly matched" geometry
out during index scans. For cases when the geometries are large, rechecking
can make things twice as slow. */
*recheck = false;
POSTGIS_DEBUG(4, "[GIST] 'consistent' function called");
/* Quick sanity check on query argument. */
if (!DatumGetPointer(PG_GETARG_DATUM(1)))
{
POSTGIS_DEBUG(4, "[GIST] null query pointer (!?!), returning false");
PG_RETURN_BOOL(false); /* NULL query! This is screwy! */
}
/* Quick sanity check on entry key. */
if (!DatumGetPointer(entry->key))
{
POSTGIS_DEBUG(4, "[GIST] null index entry, returning false");
PG_RETURN_BOOL(false); /* NULL entry! */
}
/* Null box should never make this far. */
if (gserialized_datum_get_gidx_p(PG_GETARG_DATUM(1), query_gbox_index) == LW_FAILURE)
{
POSTGIS_DEBUG(4, "[GIST] null query_gbox_index!");
PG_RETURN_BOOL(false);
}
/* Treat leaf node tests different from internal nodes */
if (GIST_LEAF(entry))
{
result =
gserialized_gist_consistent_leaf((GIDX *)PG_DETOAST_DATUM(entry->key), query_gbox_index, strategy);
}
else
{
result = gserialized_gist_consistent_internal(
(GIDX *)PG_DETOAST_DATUM(entry->key), query_gbox_index, strategy);
}
PG_RETURN_BOOL(result);
}
/*
** Function to pack floats of different realms.
** This function serves to pack bit flags inside float type.
** Result value represent can be from two different "realms".
** Every value from realm 1 is greater than any value from realm 0.
** Values from the same realm loose one bit of precision.
**
** This technique is possible due to floating point numbers specification
** according to IEEE 754: exponent bits are highest
** (excluding sign bits, but here penalty is always positive). If float a is
** greater than float b, integer A with same bit representation as a is greater
** than integer B with same bits as b.
*/
static inline float
pack_float(const float value, const uint8_t realm)
{
union {
float f;
struct {
unsigned value : 31, sign : 1;
} vbits;
struct {
unsigned value : 30, realm : 1, sign : 1;
} rbits;
} a;
a.f = value;
a.rbits.value = a.vbits.value >> 1;
a.rbits.realm = realm;
return a.f;
}
/*
** GiST support function. Calculate the "penalty" cost of adding this entry into an existing entry.
** Calculate the change in volume of the old entry once the new entry is added.
*/
PG_FUNCTION_INFO_V1(gserialized_gist_penalty);
Datum gserialized_gist_penalty(PG_FUNCTION_ARGS)
{
GISTENTRY *origentry = (GISTENTRY *)PG_GETARG_POINTER(0);
GISTENTRY *newentry = (GISTENTRY *)PG_GETARG_POINTER(1);
float *result = (float *)PG_GETARG_POINTER(2);
GIDX *gbox_index_orig, *gbox_index_new;
gbox_index_orig = (GIDX *)DatumGetPointer(origentry->key);
gbox_index_new = (GIDX *)DatumGetPointer(newentry->key);
/* Penalty value of 0 has special code path in Postgres's gistchoose.
* It is used as an early exit condition in subtree loop, allowing faster tree descend.
* For multi-column index, it lets next column break the tie, possibly more confidently.
*/
*result = 0.0;
/* Drop out if we're dealing with null inputs. Shouldn't happen. */
if (gbox_index_orig && gbox_index_new)
{
/* Calculate the size difference of the boxes (volume difference in this case). */
float size_union = gidx_union_volume(gbox_index_orig, gbox_index_new);
float size_orig = gidx_volume(gbox_index_orig);
float volume_extension = size_union - size_orig;
gbox_index_orig = (GIDX *)PG_DETOAST_DATUM(origentry->key);
gbox_index_new = (GIDX *)PG_DETOAST_DATUM(newentry->key);
/* REALM 1: Area extension is nonzero, return it */
if (volume_extension > FLT_EPSILON)
*result = pack_float(volume_extension, 1);
else
{
/* REALM 0: Area extension is zero, return nonzero edge extension */
float edge_union = gidx_union_edge(gbox_index_orig, gbox_index_new);
float edge_orig = gidx_edge(gbox_index_orig);
float edge_extension = edge_union - edge_orig;
if (edge_extension > FLT_EPSILON)
*result = pack_float(edge_extension, 0);
}
}
PG_RETURN_POINTER(result);
}
/*
** GiST support function. Merge all the boxes in a page into one master box.
*/
PG_FUNCTION_INFO_V1(gserialized_gist_union);
Datum gserialized_gist_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *)PG_GETARG_POINTER(0);
int *sizep = (int *)PG_GETARG_POINTER(1); /* Size of the return value */
int numranges, i;
GIDX *box_cur, *box_union;
POSTGIS_DEBUG(4, "[GIST] 'union' function called");
numranges = entryvec->n;
box_cur = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[0].key);
box_union = gidx_copy(box_cur);
for (i = 1; i < numranges; i++)
{
box_cur = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[i].key);
gidx_merge(&box_union, box_cur);
}
*sizep = VARSIZE(box_union);
POSTGIS_DEBUGF(4, "[GIST] union called, numranges(%i), pageunion %s", numranges, gidx_to_string(box_union));
PG_RETURN_POINTER(box_union);
}
/*
** GiST support function. Test equality of keys.
*/
PG_FUNCTION_INFO_V1(gserialized_gist_same);
Datum gserialized_gist_same(PG_FUNCTION_ARGS)
{
GIDX *b1 = (GIDX *)PG_GETARG_POINTER(0);
GIDX *b2 = (GIDX *)PG_GETARG_POINTER(1);
bool *result = (bool *)PG_GETARG_POINTER(2);
POSTGIS_DEBUG(4, "[GIST] 'same' function called");
*result = gidx_equals(b1, b2);
PG_RETURN_POINTER(result);
}
PG_FUNCTION_INFO_V1(gserialized_gist_geog_distance);
Datum gserialized_gist_geog_distance(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
Datum query_datum = PG_GETARG_DATUM(1);
StrategyNumber strategy = (StrategyNumber)PG_GETARG_UINT16(2);
bool *recheck = (bool *)PG_GETARG_POINTER(4);
char query_box_mem[GIDX_MAX_SIZE];
GIDX *query_box = (GIDX *)query_box_mem;
GIDX *entry_box;
double distance;
POSTGIS_DEBUGF(3, "[GIST] '%s' function called", __func__);
/* We are using '13' as the gist geography distance <-> strategy number */
if (strategy != 13)
{
elog(ERROR, "unrecognized strategy number: %d", strategy);
PG_RETURN_FLOAT8(FLT_MAX);
}
/* Null box should never make this far. */
if (gserialized_datum_get_gidx_p(query_datum, query_box) == LW_FAILURE)
{
POSTGIS_DEBUG(2, "[GIST] null query_gbox_index!");
PG_RETURN_FLOAT8(FLT_MAX);
}
/* When we hit leaf nodes, it's time to turn on recheck */
if (GIST_LEAF(entry))
*recheck = true;
/* Get the entry box */
entry_box = (GIDX *)PG_DETOAST_DATUM(entry->key);
/* Return distances from key-based tests should always be */
/* the minimum possible distance, box-to-box */
/* We scale up to "world units" so that the box-to-box distances */
/* compare reasonably with the over-the-spheroid distances that */
/* the recheck process will turn up */
distance = WGS84_RADIUS * gidx_distance(entry_box, query_box, 0);
POSTGIS_DEBUGF(2, "[GIST] '%s' got distance %g", __func__, distance);
PG_RETURN_FLOAT8(distance);
}
/*
** GiST support function.
** Take in a query and an entry and return the "distance" between them.
**
** Given an index entry p and a query value q, this function determines the
** index entry's "distance" from the query value. This function must be
** supplied if the operator class contains any ordering operators. A query
** using the ordering operator will be implemented by returning index entries
** with the smallest "distance" values first, so the results must be consistent
** with the operator's semantics. For a leaf index entry the result just
** represents the distance to the index entry; for an internal tree node, the
** result must be the smallest distance that any child entry could have.
**
** Strategy 13 is centroid-based distance tests <<->>
** Strategy 20 is cpa based distance tests |=|
*/
PG_FUNCTION_INFO_V1(gserialized_gist_distance);
Datum gserialized_gist_distance(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
StrategyNumber strategy = (StrategyNumber)PG_GETARG_UINT16(2);
char query_box_mem[GIDX_MAX_SIZE];
GIDX *query_box = (GIDX *)query_box_mem;
GIDX *entry_box;
bool *recheck = (bool *)PG_GETARG_POINTER(4);
double distance;
POSTGIS_DEBUG(4, "[GIST] 'distance' function called");
/* Strategy 13 is <<->> */
/* Strategy 20 is |=| */
if (strategy != 13 && strategy != 20)
{
elog(ERROR, "unrecognized strategy number: %d", strategy);
PG_RETURN_FLOAT8(FLT_MAX);
}
/* Null box should never make this far. */
if (gserialized_datum_get_gidx_p(PG_GETARG_DATUM(1), query_box) == LW_FAILURE)
{
POSTGIS_DEBUG(4, "[GIST] null query_gbox_index!");
PG_RETURN_FLOAT8(FLT_MAX);
}
/* Get the entry box */
entry_box = (GIDX *)PG_DETOAST_DATUM(entry->key);
/* Strategy 20 is |=| */
distance = gidx_distance(entry_box, query_box, strategy == 20);
/* Treat leaf node tests different from internal nodes */
if (GIST_LEAF(entry))
*recheck = true;
PG_RETURN_FLOAT8(distance);
}
/*
** Utility function to add entries to the axis partition lists in the
** picksplit function.
*/
static void
gserialized_gist_picksplit_addlist(OffsetNumber *list, GIDX **box_union, GIDX *box_current, int *pos, int num)
{
if (*pos)
gidx_merge(box_union, box_current);
else
{
pfree(*box_union);
*box_union = gidx_copy(box_current);
}
list[*pos] = num;
(*pos)++;
}
/*
** Utility function check whether the number of entries two halves of the
** space constitute a "bad ratio" (poor balance).
*/
static int
gserialized_gist_picksplit_badratio(int x, int y)
{
POSTGIS_DEBUGF(4, "[GIST] checking split ratio (%d, %d)", x, y);
if ((y == 0) || (((float)x / (float)y) < LIMIT_RATIO) || (x == 0) || (((float)y / (float)x) < LIMIT_RATIO))
return true;
return false;
}
static bool
gserialized_gist_picksplit_badratios(int *pos, int dims)
{
int i;
for (i = 0; i < dims; i++)
{
if (gserialized_gist_picksplit_badratio(pos[2 * i], pos[2 * i + 1]) == false)
return false;
}
return true;
}
/*
** Where the picksplit algorithm cannot find any basis for splitting one way
** or another, we simply split the overflowing node in half.
*/
static void
gserialized_gist_picksplit_fallback(GistEntryVector *entryvec, GIST_SPLITVEC *v)
{
OffsetNumber i, maxoff;
GIDX *unionL = NULL;
GIDX *unionR = NULL;
int nbytes;
POSTGIS_DEBUG(4, "[GIST] in fallback picksplit function");
maxoff = entryvec->n - 1;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *)palloc(nbytes);
v->spl_right = (OffsetNumber *)palloc(nbytes);
v->spl_nleft = v->spl_nright = 0;
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
GIDX *cur = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[i].key);
if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
{
v->spl_left[v->spl_nleft] = i;
if (!unionL)
unionL = gidx_copy(cur);
else
gidx_merge(&unionL, cur);
v->spl_nleft++;
}
else
{
v->spl_right[v->spl_nright] = i;
if (!unionR)
unionR = gidx_copy(cur);
else
gidx_merge(&unionR, cur);
v->spl_nright++;
}
}
if (v->spl_ldatum_exists)
gidx_merge(&unionL, (GIDX *)PG_DETOAST_DATUM(v->spl_ldatum));
v->spl_ldatum = PointerGetDatum(unionL);
if (v->spl_rdatum_exists)
gidx_merge(&unionR, (GIDX *)PG_DETOAST_DATUM(v->spl_rdatum));
v->spl_rdatum = PointerGetDatum(unionR);
v->spl_ldatum_exists = v->spl_rdatum_exists = false;
}
static void
gserialized_gist_picksplit_constructsplit(GIST_SPLITVEC *v,
OffsetNumber *list1,
int nlist1,
GIDX **union1,
OffsetNumber *list2,
int nlist2,
GIDX **union2)
{
bool firstToLeft = true;
POSTGIS_DEBUG(4, "[GIST] picksplit in constructsplit function");
if (v->spl_ldatum_exists || v->spl_rdatum_exists)
{
if (v->spl_ldatum_exists && v->spl_rdatum_exists)
{
GIDX *LRl = gidx_copy(*union1);
GIDX *LRr = gidx_copy(*union2);
GIDX *RLl = gidx_copy(*union2);
GIDX *RLr = gidx_copy(*union1);
double sizeLR, sizeRL;
gidx_merge(&LRl, (GIDX *)PG_DETOAST_DATUM(v->spl_ldatum));
gidx_merge(&LRr, (GIDX *)PG_DETOAST_DATUM(v->spl_rdatum));
gidx_merge(&RLl, (GIDX *)PG_DETOAST_DATUM(v->spl_ldatum));
gidx_merge(&RLr, (GIDX *)PG_DETOAST_DATUM(v->spl_rdatum));
sizeLR = gidx_inter_volume(LRl, LRr);
sizeRL = gidx_inter_volume(RLl, RLr);
POSTGIS_DEBUGF(4, "[GIST] sizeLR / sizeRL == %.12g / %.12g", sizeLR, sizeRL);
if (sizeLR > sizeRL)
firstToLeft = false;
}
else
{
float p1 = 0.0, p2 = 0.0;
GISTENTRY oldUnion, addon;
gistentryinit(oldUnion,
(v->spl_ldatum_exists) ? v->spl_ldatum : v->spl_rdatum,
NULL,
NULL,
InvalidOffsetNumber,
false);
gistentryinit(addon, PointerGetDatum(*union1), NULL, NULL, InvalidOffsetNumber, false);
DirectFunctionCall3(gserialized_gist_penalty,
PointerGetDatum(&oldUnion),
PointerGetDatum(&addon),
PointerGetDatum(&p1));
gistentryinit(addon, PointerGetDatum(*union2), NULL, NULL, InvalidOffsetNumber, false);
DirectFunctionCall3(gserialized_gist_penalty,
PointerGetDatum(&oldUnion),
PointerGetDatum(&addon),
PointerGetDatum(&p2));
POSTGIS_DEBUGF(4, "[GIST] p1 / p2 == %.12g / %.12g", p1, p2);
if ((v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2))
firstToLeft = false;
}
}
POSTGIS_DEBUGF(4, "[GIST] firstToLeft == %d", firstToLeft);
if (firstToLeft)
{
v->spl_left = list1;
v->spl_right = list2;
v->spl_nleft = nlist1;
v->spl_nright = nlist2;
if (v->spl_ldatum_exists)
gidx_merge(union1, (GIDX *)PG_DETOAST_DATUM(v->spl_ldatum));
v->spl_ldatum = PointerGetDatum(*union1);
if (v->spl_rdatum_exists)
gidx_merge(union2, (GIDX *)PG_DETOAST_DATUM(v->spl_rdatum));
v->spl_rdatum = PointerGetDatum(*union2);
}
else
{
v->spl_left = list2;
v->spl_right = list1;
v->spl_nleft = nlist2;
v->spl_nright = nlist1;
if (v->spl_ldatum_exists)
gidx_merge(union2, (GIDX *)PG_DETOAST_DATUM(v->spl_ldatum));
v->spl_ldatum = PointerGetDatum(*union2);
if (v->spl_rdatum_exists)
gidx_merge(union1, (GIDX *)PG_DETOAST_DATUM(v->spl_rdatum));
v->spl_rdatum = PointerGetDatum(*union1);
}
v->spl_ldatum_exists = v->spl_rdatum_exists = false;
}
#define BELOW(d) (2 * (d))
#define ABOVE(d) ((2 * (d)) + 1)
/*
** GiST support function. Split an overflowing node into two new nodes.
** Uses linear algorithm from Ang & Tan [2], dividing node extent into
** four quadrants, and splitting along the axis that most evenly distributes
** entries between the new nodes.
** TODO: Re-evaluate this in light of R*Tree picksplit approaches.
*/
PG_FUNCTION_INFO_V1(gserialized_gist_picksplit);
Datum gserialized_gist_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *)PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *)PG_GETARG_POINTER(1);
OffsetNumber i;
/* One union box for each half of the space. */
GIDX **box_union;
/* One offset number list for each half of the space. */
OffsetNumber **list;
/* One position index for each half of the space. */
int *pos;
GIDX *box_pageunion;
GIDX *box_current;
int direction = -1;
bool all_entries_equal = true;
OffsetNumber max_offset;
int nbytes, ndims_pageunion, d;
int posmin = entryvec->n;
POSTGIS_DEBUG(4, "[GIST] 'picksplit' function called");
/*
** First calculate the bounding box and maximum number of dimensions in this page.
*/
max_offset = entryvec->n - 1;
box_current = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[FirstOffsetNumber].key);
box_pageunion = gidx_copy(box_current);
/* Calculate the containing box (box_pageunion) for the whole page we are going to split. */
for (i = OffsetNumberNext(FirstOffsetNumber); i <= max_offset; i = OffsetNumberNext(i))
{
box_current = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[i].key);
if (all_entries_equal && !gidx_equals(box_pageunion, box_current))
all_entries_equal = false;
gidx_merge(&box_pageunion, box_current);
}
POSTGIS_DEBUGF(3, "[GIST] box_pageunion: %s", gidx_to_string(box_pageunion));
/* Every box in the page is the same! So, we split and just put half the boxes in each child. */
if (all_entries_equal)
{
POSTGIS_DEBUG(4, "[GIST] picksplit finds all entries equal!");
gserialized_gist_picksplit_fallback(entryvec, v);
PG_RETURN_POINTER(v);
}
/* Initialize memory structures. */
nbytes = (max_offset + 2) * sizeof(OffsetNumber);
ndims_pageunion = GIDX_NDIMS(box_pageunion);
POSTGIS_DEBUGF(4, "[GIST] ndims_pageunion == %d", ndims_pageunion);
pos = palloc(2 * ndims_pageunion * sizeof(int));
list = palloc(2 * ndims_pageunion * sizeof(OffsetNumber *));
box_union = palloc(2 * ndims_pageunion * sizeof(GIDX *));
for (d = 0; d < ndims_pageunion; d++)
{
list[BELOW(d)] = (OffsetNumber *)palloc(nbytes);
list[ABOVE(d)] = (OffsetNumber *)palloc(nbytes);
box_union[BELOW(d)] = gidx_new(ndims_pageunion);
box_union[ABOVE(d)] = gidx_new(ndims_pageunion);
pos[BELOW(d)] = 0;
pos[ABOVE(d)] = 0;
}
/*
** Assign each entry in the node to the volume partitions it belongs to,
** such as "above the x/y plane, left of the y/z plane, below the x/z plane".
** Each entry thereby ends up in three of the six partitions.
*/
POSTGIS_DEBUG(4, "[GIST] 'picksplit' calculating best split axis");
for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
{
box_current = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[i].key);
for (d = 0; d < ndims_pageunion; d++)
{
if (GIDX_GET_MIN(box_current, d) - GIDX_GET_MIN(box_pageunion, d) <
GIDX_GET_MAX(box_pageunion, d) - GIDX_GET_MAX(box_current, d))
gserialized_gist_picksplit_addlist(
list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
else
gserialized_gist_picksplit_addlist(
list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
}
}
/*
** "Bad disposition", too many entries fell into one octant of the space, so no matter which
** plane we choose to split on, we're going to end up with a mostly full node. Where the
** data is pretty homogeneous (lots of duplicates) entries that are equidistant from the
** sides of the page union box can occasionally all end up in one place, leading
** to this condition.
*/
if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion))
{
/*
** Instead we split on center points and see if we do better.
** First calculate the average center point for each axis.
*/
double *avgCenter = palloc(ndims_pageunion * sizeof(double));
for (d = 0; d < ndims_pageunion; d++)
avgCenter[d] = 0.0;
POSTGIS_DEBUG(4, "[GIST] picksplit can't find good split axis, trying center point method");
for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
{
box_current = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[i].key);
for (d = 0; d < ndims_pageunion; d++)
avgCenter[d] += (GIDX_GET_MAX(box_current, d) + GIDX_GET_MIN(box_current, d)) / 2.0;
}
for (d = 0; d < ndims_pageunion; d++)
{
avgCenter[d] /= max_offset;
pos[BELOW(d)] = pos[ABOVE(d)] = 0; /* Re-initialize our counters. */
POSTGIS_DEBUGF(4, "[GIST] picksplit average center point[%d] = %.12g", d, avgCenter[d]);
}
/* For each of our entries... */
for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
{
double center;
box_current = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[i].key);
for (d = 0; d < ndims_pageunion; d++)
{
center = (GIDX_GET_MIN(box_current, d) + GIDX_GET_MAX(box_current, d)) / 2.0;
if (center < avgCenter[d])
gserialized_gist_picksplit_addlist(
list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
else if (FPeq(center, avgCenter[d]))
if (pos[BELOW(d)] > pos[ABOVE(d)])
gserialized_gist_picksplit_addlist(list[ABOVE(d)],
&(box_union[ABOVE(d)]),
box_current,
&(pos[ABOVE(d)]),
i);
else
gserialized_gist_picksplit_addlist(list[BELOW(d)],
&(box_union[BELOW(d)]),
box_current,
&(pos[BELOW(d)]),
i);
else
gserialized_gist_picksplit_addlist(
list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
}
}
/* Do we have a good disposition now? If not, screw it, just cut the node in half. */
if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion))
{
POSTGIS_DEBUG(4,
"[GIST] picksplit still cannot find a good split! just cutting the node in half");
gserialized_gist_picksplit_fallback(entryvec, v);
PG_RETURN_POINTER(v);
}
}
/*
** Now, what splitting plane gives us the most even ratio of
** entries in our child pages? Since each split region has been apportioned entries
** against the same number of total entries, the axis that has the smallest maximum
** number of entries in its regions is the most evenly distributed.
** TODO: what if the distributions are equal in two or more axes?
*/
for (d = 0; d < ndims_pageunion; d++)
{
int posd = Max(pos[ABOVE(d)], pos[BELOW(d)]);
if (posd < posmin)
{
direction = d;
posmin = posd;
}
}
if (direction == -1 || posmin == entryvec->n)
elog(ERROR, "Error in building split, unable to determine split direction.");
POSTGIS_DEBUGF(3, "[GIST] 'picksplit' splitting on axis %d", direction);
gserialized_gist_picksplit_constructsplit(v,
list[BELOW(direction)],
pos[BELOW(direction)],
&(box_union[BELOW(direction)]),
list[ABOVE(direction)],
pos[ABOVE(direction)],
&(box_union[ABOVE(direction)]));
POSTGIS_DEBUGF(4, "[GIST] spl_ldatum: %s", gidx_to_string((GIDX *)v->spl_ldatum));
POSTGIS_DEBUGF(4, "[GIST] spl_rdatum: %s", gidx_to_string((GIDX *)v->spl_rdatum));
POSTGIS_DEBUGF(
4,
"[GIST] axis %d: parent range (%.12g, %.12g) left range (%.12g, %.12g), right range (%.12g, %.12g)",
direction,
GIDX_GET_MIN(box_pageunion, direction),
GIDX_GET_MAX(box_pageunion, direction),
GIDX_GET_MIN((GIDX *)v->spl_ldatum, direction),
GIDX_GET_MAX((GIDX *)v->spl_ldatum, direction),
GIDX_GET_MIN((GIDX *)v->spl_rdatum, direction),
GIDX_GET_MAX((GIDX *)v->spl_rdatum, direction));
PG_RETURN_POINTER(v);
}
/*
** The GIDX key must be defined as a PostgreSQL type, even though it is only
** ever used internally. These no-op stubs are used to bind the type.
*/
PG_FUNCTION_INFO_V1(gidx_in);
Datum gidx_in(PG_FUNCTION_ARGS)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("function gidx_in not implemented")));
PG_RETURN_POINTER(NULL);
}
PG_FUNCTION_INFO_V1(gidx_out);
Datum gidx_out(PG_FUNCTION_ARGS)
{
GIDX *box = (GIDX *)PG_GETARG_POINTER(0);
char *result = gidx_to_string(box);
PG_RETURN_CSTRING(result);
}