STCentroid*: Alternate Functions for Compute a Centroid

The top download from my website is for my package of centroid algorithms for Oracle sdo_geometry.

There is nothing wrong with the STCentroid function implemented in SQL Server Spatial (for one thing it handles geography objects natively), but it seems that having a choice of centroid algorithms is a big thing for users of spatial data within databases.

The original algorithm for getting the centroid of a polygon involves use of LAG/LEAD SQL99 analytic functions that have not been a part of SQL Server until Denali.

For the polygon centroid function (STCentroid_P) one can ask the function to generate a centroid:

  • In the middle of the MBR of the geometry, or
  • In a weighted position based on coordinate density, or
  • With a defined or fixed (supplied) “seed” X ordinate position.

Here are the functions.

Centroid for MultiPoint Geometries

SQL Server Spatial returns null when executing STCentroid against multipoint geometries, so let’s build our own.

 USE [GISDB] -- Change to your database
 GO
 -- MultiPoint Centroid Creator
 --
 DROP   FUNCTION [dbo].[STCENTROID_P] ;
 CREATE FUNCTION [dbo].[STCENTROID_P] (
     @p_geometry  GEOMETRY,
     @p_round_x   INT = 3,
     @p_round_y   INT = 3,
     @p_round_z   INT = 3 )
 RETURNS GEOMETRY
 AS
 BEGIN
   DECLARE
      @v_x         FLOAT = 0.0,
      @v_y         FLOAT = 0.0,
      @v_z         FLOAT,
      @v_geomn     INT,
      @v_part_geom geometry,
      @v_geometry  geometry,
      @v_gtype     nvarchar(MAX);
   BEGIN
       IF ( @p_geometry IS NULL )
        RETURN @p_geometry;
       SET @v_gtype = @p_geometry.STGeometryType();
       IF ( @v_gtype = 'MultiPoint' )
       BEGIN
         -- Get parts of multi-point geometry
         --
         SET @v_geomn  = 1;
         WHILE ( @v_geomn <= @p_geometry.STNumGeometries() )
         BEGIN
           SET @v_part_geom = @p_geometry.STGeometryN(@v_geomn);
           SET @v_x = @v_x + @v_part_geom.STX
           SET @v_y = @v_y + @v_part_geom.STY
           SET @v_geomn = @v_geomn + 1;
         END;
         SET @v_geometry = geometry::Point(@v_x/@p_geometry.STNumGeometries(),
                                           @v_y/@p_geometry.STNumGeometries(),
                                            @p_geometry.STSrid);
       END
       ELSE
          SET @v_geometry = @p_geometry;
       RETURN @v_geometry;
     END;
 END
 GO

Now let’s test noting that SQL Server’s STCentroid always returns NULL for MultiPoint geometries

 WITH multiPoint AS (
    SELECT geometry::STGeomFromText('MULTIPOINT((0 0),(100 0),(100 100),(0 100),(150 110),(150 150),(110 150),(110 110))',0) AS geom
 )
 SELECT 'O' AS label,  geom.STBuffer(2) AS geom FROM multiPoint
 UNION ALL
 SELECT 'M' AS label, dbo.STCentroid_P(geom,3,3,3).STBuffer(5) AS geom FROM multiPoint
 UNION ALL
 SELECT 'S' AS label, geom.STCentroid().STBuffer(5) AS geom FROM multiPoint;

Which looks like this:

Centroid for Area Geometries

 DROP FUNCTION [dbo].[STCENTROID_A];
 CREATE FUNCTION [dbo].[STCENTROID_A] (
   @p_geometry  geometry,
   @p_start     INT = 0,  /* 0 = use average of all Area's vertices for starting X centroid calculation
                             1 = centre X of MBR
                             2 = User supplied starting seed X */
   @p_seed_x    FLOAT = NULL,
   @p_round_x   INT = 3,
   @p_round_y   INT = 3,
   @p_round_z   INT = 3
 )
 RETURNS geometry
 AS
 BEGIN
   DECLARE
      @v_cx    FLOAT,
      @v_cy    FLOAT,
      @v_seed  geometry,
      @v_wkt   nvarchar(MAX);
   BEGIN
     IF ( @p_geometry IS NULL )
        RETURN @p_geometry;
     IF ( @p_start NOT IN (0,1,2) )
        RETURN CAST('Starting position must be 0, 1 or 2.' AS VARCHAR(MAX)); -- geometry);
     IF ( @p_start = 2 AND @p_seed_x IS NULL )
        RETURN CAST('Seed X value not provided.' AS VARCHAR(MAX)); -- geometry);
     -- Create starting seed point geometry
     --
   SET @v_seed = CASE @p_start
                      WHEN 0 THEN @p_geometry
                WHEN 1 THEN geometry::Point(@p_geometry.STEnvelope().STPointN(1).STX,
                                                    @p_geometry.STEnvelope().STPointN(1).STY,
                                                    @p_geometry.STSrid).STUnion(
                                    geometry::Point(@p_geometry.STEnvelope().STPointN(3).STX,
                                                    @p_geometry.STEnvelope().STPointN(3).STY,
                                                    @p_geometry.STSrid))
                    WHEN 2 THEN geometry::Point(@p_seed_x,0,@p_geometry.STSrid)
                ELSE @p_geometry
           END;
     -- Check user seed X between MBR of object
   --
   IF ( @p_start = 2 AND
      ( @p_seed_x <= @p_geometry.STEnvelope().STPointN(1).STX OR @p_seed_x >= @p_geometry.STEnvelope().STPointN(3).STX ) )
        RETURN CAST('Seed X value not between provided geometry''s MBR.' AS VARCHAR(MAX)); -- geometry);
     SELECT TOP 1
            @v_cx = cx,
            @v_cy = cy
       FROM (SELECT z.x                 AS cx,
                    z.y + ( ydiff / 2 ) AS cy,
                    ydiff
               FROM (SELECT w.id,
                            w.x,
                            w.y,
                            CASE WHEN w.ydiff IS NULL THEN 0 ELSE w.ydiff END AS ydiff,
                            CASE WHEN w.id = 1
                                 THEN CASE WHEN w.INOUT = 1
                                           THEN 'INSIDE'
                                           ELSE 'OUTSIDE'
                                       END
                                 WHEN SUM(w.INOUT) OVER (ORDER BY w.id) % 2 = 1
                                 /* Need to look at previous result as inside/outside is a binary switch */
                                 THEN 'INSIDE'
                                 ELSE 'OUTSIDE'
                             END AS INOUT
                       FROM (SELECT ROW_NUMBER() OVER (ORDER BY u.y ASC) AS id,
                                    u.x,
                                    u.y,
                                    CASE WHEN u.touchCross IN (0)         /* Cross */ THEN 1
                                         WHEN u.touchCross IN (-1,-2,1,2) /* Touch */ THEN 0
                                         ELSE 0
                                     END AS INOUT,
                                    ABS(LEAD(u.y,1) OVER(ORDER BY u.y) - u.y) AS YDiff
                               FROM (SELECT s.x,
                                            s.y,
                                            /* In cases where polygons have boundaries/holes that touch at a point we need to count them more than once */
                                            CASE WHEN COUNT(*) > 2 THEN 1 ELSE SUM(s.touchcross) END AS touchcross
                                       FROM (SELECT t.x,
                                                    t.y,
                                                    t.touchcross
                                               FROM (SELECT r.x,
                                                            CASE WHEN (r.endx = r.startx)
                                                                 THEN (r.starty + r.endy ) / 2
                                                                 ELSE round(r.starty + ( (r.endy-r.starty)/(r.endx-r.startx) ) * (r.x-r.startx),@p_round_y)
                                                             END AS y,
                                                            CASE WHEN ( r.x = r.startx AND r.x = r.endx )
                                                                 THEN 99  /* Line is Vertical */
                                                                 WHEN ( ( r.x = r.startx AND r.x > r.endx )
                                                                     OR ( r.x = r.endX   AND r.x > r.startX ) )
                                                                 THEN -1 /* Left Touch */
                                                                 WHEN ( ( r.x = r.endX   AND r.x < r.startX  )
                                                                     OR ( r.x = r.startX AND r.x < r.endX ) )
                                                                 THEN 1 /* Right Touch */
                                                                 ELSE 0 /* cross */
                                                             END AS TouchCross
                                                        FROM (SELECT c.x,
                                                                     round(v.sx,@p_round_x) AS startX,
                                                                     round(v.sy,@p_round_y) AS startY,
                                                                     round(v.ex,@p_round_x) AS endX,
                                                                     round(v.ey,@p_round_y) AS endY
                                                                FROM (SELECT round(avg(p.x),@p_round_x) AS x
                                                                        FROM [dbo].[STDUMPPOINTS](@v_seed) p
                                                                     ) c
                                                                     CROSS apply
                                                                     [dbo].[STVECTORIZE](@p_geometry) v
                                                             ) r
                                                       WHERE r.x BETWEEN r.StartX AND r.endx
                                                          OR r.x BETWEEN r.endx   AND r.startx
                                                    ) t
                                            ) s
                                      GROUP BY s.x,s.y
                                    ) u
                            ) w
                    ) z
              WHERE z.INOUT = 'INSIDE'
            ) f
         ORDER BY f.ydiff DESC;
      RETURN geometry::Point(@v_cx, @v_cy, @p_geometry.STSrid);
   END;
 END
 GO

Now let’s test this function weighting for area (vertex density), MBR (will be the same as the geometry is balanced around its centroid spine of X=2300 ) and via a supplied seed. Comparison to STCentroid is provided.

 WITH poly AS (
    SELECT geometry::STGeomFromText('POLYGON((2300 -700, 2800 -300, 2300 700, 2800 1100, 2300 1100, 1800 1100, 2300 400, 2300 200, 2100 100, 2500 100, 2300 -200, 1800 -300, 2300 -500, 2200 -400, 2400 -400, 2300 -700),
                                                     (2300 1000, 2400  900, 2200 900, 2300 1000))',0) AS geom
 )
 SELECT 'O' AS Label, geom FROM poly
 UNION ALL
 SELECT 'A' AS Label, [GISDB].[dbo].[STCentroid_A](geom,0,NULL,3,3,3).STBuffer(15) AS geom FROM poly
 UNION ALL
 SELECT 'M' AS Label, [GISDB].[dbo].[STCentroid_A](geom,1,NULL,3,3,3).STBuffer(15) AS geom FROM poly
 UNION ALL
 SELECT 'U' AS Label, [GISDB].[dbo].[STCentroid_A](geom,2,2050,3,3,3).STBuffer(15) AS geom FROM poly
 UNION ALL
 SELECT 'S' AS Label, geom.STCentroid().STBuffer(15) AS geom  FROM poly;

This looks like:

Testing a different geometry shows all four positions more clearly.

 WITH weightedPoly AS (
    SELECT geometry::STGeomFromText('POLYGON((258.72254365233152 770.97400259630615, 268.79365642517564 739.08214548229967, 278.86476919801976 707.1902883682933, 332.57737065318844 693.76213800450114, 366.14774656266889 676.97695004976094, 426.57442319973364 697.11917559544918, 520.57147574627891 737.40362668682576, 631.35371624756431 744.11770186872184, 829.41893411349884 797.83030332389046, 1547.8249785763801 791.11622814199438, 1205.4071442996797 895.18439346138371, 832.77597170444687 1039.5370098721496, 490.3581374277465 1086.5355361454222, 416.50331042688953 1076.464423372578, 381.25441572193506 1059.6792354178378, 346.00552101698065 1042.8940474630976, 320.82773908487036 1019.3947843264614, 295.64995715276001 995.89552118982499, 287.25736317538986 964.00366407581862, 278.86476919801976 932.11180696181225, 282.2218067889678 891.82735587043567, 277.18625040254574 858.25697996095528, 272.15069401612368 824.68660405147489, 258.72254365233152 770.97400259630615))',0) AS geom
 )
 SELECT 'O' AS Label,  geom FROM weightedPoly
 UNION ALL
 SELECT 'A' AS Label, [GISDB].[dbo].[STCentroid_A](geom,0,NULL,2,2,2).STBuffer(10) AS geom FROM weightedPoly
 UNION ALL
 SELECT 'M' AS Label, [GISDB].[dbo].[STCentroid_A](geom,1,NULL,2,2,2).STBuffer(10) AS geom FROM weightedPoly
 UNION ALL
 SELECT 'U' AS Label, [GISDB].[dbo].[STCentroid_A](geom,2,1200,2,2,2).STBuffer(10) AS geom FROM weightedPoly
 UNION ALL
 SELECT 'S' AS Label, geom.STCentroid().STBuffer(10) AS geom FROM weightedPoly;

Looks like this:

Centroid for Linear geometries

 DROP FUNCTION [dbo].[STCentroid_L];
 CREATE FUNCTION [dbo].[STCENTROID_L] (
   @p_geometry          geometry,
   @p_position_as_ratio FLOAT = 0.5,
   @p_round_x           INT = 3,
   @p_round_y           INT = 3,
   @p_round_z           INT = 3
 )
 RETURNS geometry
 AS
 BEGIN
   DECLARE
     @v_geometry geometry;
   BEGIN
     IF ( @p_geometry IS NULL )
        RETURN @p_geometry;
     SELECT TOP 1
            @v_geometry = geometry::STGeomFromText('POINT(' +
                      LTRIM(STR(round(i3.x2-((i3.x2-i3.x1)*i3.vectorPositionRatio),@p_round_x),24,15)) + ' ' +
                      LTRIM(STR(round(i3.y2-((i3.y2-i3.y1)*i3.vectorPositionRatio),@p_round_y),24,15)) +
                      CASE WHEN i3.z2 IS NOT NULL
                           THEN ' ' + LTRIM(STR(round(i3.z2-((i3.z2-i3.z1)*vectorPositionRatio),@p_round_z),24,15))
                           ELSE ''
                       END + ')',@p_geometry.STSrid)
             FROM (SELECT /* select vector/segment "containing" centroid or mid-point of linestring */
                          i2.SRID,
                          i2.X1,i2.Y1,i2.Z1,
                          i2.X2,i2.Y2,i2.Z2,
                          (i2.cumLength-i2.pointDistance)/i2.vectorLength AS vectorPositionRatio,
                          CASE WHEN pointDistance BETWEEN CASE WHEN lag(cumLength,1) OVER (ORDER BY VID) IS NULL
                                                               THEN 0
                                                               ELSE lag(cumLength,1) OVER (ORDER BY VID)
                                                            END
                                                      AND vectorLength +
                                                          CASE WHEN lag(cumLength,1) OVER (ORDER BY VID) IS NULL
                                                               THEN 0
                                                               ELSE lag(cumLength,1) OVER (ORDER BY VID)
                                                           END
                               THEN 1
                               ELSE 0
                           END AS RightSegment
                     FROM (SELECT i1.VID,
                                  i1.SRID,
                                  i1.X1,i1.Y1,i1.Z1,
                                  i1.X2,i1.Y2,i1.Z2,
                                  i1.vectorLength,
                                  i1.pointDistance,
                                  /* generate cumulative length */
                                 SUM(vectorLength) OVER (ORDER BY vid ROWS UNBOUNDED PRECEDING) AS cumLength
                             FROM (SELECT v.id AS vid,
                                          @p_geometry.STSrid AS srid,
                                          (@p_geometry.STLength() * @p_position_as_ratio) AS pointDistance,
                                          v.sx AS X1, v.sy AS Y1, v.sz AS Z1,
                                          v.ex AS X2, v.ey AS Y2, v.ez AS Z2,
                                          geometry::Point(v.sX,v.sy/*,v.sz*/,@p_geometry.STSrid).STDistance(geometry::Point(v.eX,v.ey,/*,v.sz*/@p_geometry.STSrid)) AS vectorLength
                                     FROM [dbo].[STVECTORIZE](@p_geometry) AS v
                                  ) i1
                        ) i2
                       )  i3
                 WHERE i3.rightSegment = 1;
      RETURN @v_geometry;
   END;
 END
 GO

Testing a range of percentage values (5/10, 0.5 or 50% is centroid)

 WITH line AS (
   SELECT geometry::STGeomFromText('LINESTRING(258.72254365233152 770.97400259630615, 268.79365642517564 739.08214548229967, 278.86476919801976 707.1902883682933, 332.57737065318844 693.76213800450114, 366.14774656266889 676.97695004976094, 426.57442319973364 697.11917559544918, 520.57147574627891 737.40362668682576, 631.35371624756431 744.11770186872184, 829.41893411349884 797.83030332389046, 1547.8249785763801 791.11622814199438, 1205.4071442996797 895.18439346138371, 832.77597170444687 1039.5370098721496, 490.3581374277465 1086.5355361454222, 416.50331042688953 1076.464423372578, 381.25441572193506 1059.6792354178378, 346.00552101698065 1042.8940474630976, 320.82773908487036 1019.3947843264614, 295.64995715276001 995.89552118982499, 287.25736317538986 964.00366407581862, 278.86476919801976 932.11180696181225, 282.2218067889678 891.82735587043567, 277.18625040254574 858.25697996095528, 272.15069401612368 824.68660405147489, 258.72254365233152 770.97400259630615)',0) AS geom
 )
 SELECT 'O' AS label, geom FROM line
 UNION ALL
 SELECT CAST(a.intValue*10 AS VARCHAR) + '%' AS label, [dbo].[STCentroid_L](geom,CAST(a.IntValue AS FLOAT)/10.0,3,3,2).STBuffer(15) AS geom
   FROM line
        CROSS apply
      dbo.generate_series(0,9,1) a;

This looks like:

Generic Centroid function

Selects the right function out of those above.

 DROP FUNCTION [dbo].[STCentroid];
 CREATE FUNCTION [dbo].[STCentroid] (
   @p_geometry     geometry,
   @p_start        INT = 0,  /* 0 = use average of all Area's vertices for starting X centroid calculation otherwise centre X of MBR */
   @p_largest      INT = 1,
   @p_round_x      INT = 3,
   @p_round_y      INT = 3,
   @p_round_z      INT = 3
 )
 RETURNS geometry
 AS
 BEGIN
   DECLARE
      @v_geomn    INT,
      @v_geom     geometry,
      @v_val      FLOAT,
      @v_test_val FLOAT,
      @v_geometry geometry,
      @v_gtype    nvarchar(MAX);
   BEGIN
       IF ( @p_geometry IS NULL )
        RETURN @p_geometry;
       SET @v_gtype = @p_geometry.STGeometryType();
       SET @v_geometry = @p_geometry;
       IF ( @v_gtype IN ('MultiPolygon','MultiLineString') )
       BEGIN
         SET @v_test_val = -9999999999.9999999;
         IF ( @p_largest = 0 )
            SET @v_test_val = 9999999999.9999999;
         -- Get parts of multi-part geometry
         --
         SET @v_geomn  = 1;
         WHILE ( @v_geomn <= @p_geometry.STNumGeometries() )
         BEGIN
           SET @v_geometry = @p_geometry.STGeometryN(@v_geomn);
           IF ( @v_geometry.STGeometryType() = 'LineString' )
              SET @v_val = @v_geometry.STLength()
           ELSE
              SET @v_val = @v_geometry.STArea();
           IF ( @p_largest = 1 )
           BEGIN
              IF ( @v_val >= @v_test_val )
              BEGIN
                 SET @v_test_val  = @v_val;
                 SET @v_geom = @v_geometry;
              END;
           END;
           ELSE
           BEGIN
              IF ( @v_val <= @v_test_val )
              BEGIN
                 SET @v_test_val  = @v_val;
                 SET @v_geom = @v_geometry;
              END;
           END;
           SET @v_geomn = @v_geomn + 1;
         END;
         SET @v_geometry = @v_geom;
       END;
       -- Now get centroid
       --
       IF ( @v_gtype = 'Point' )                            SET @v_geometry = @p_geometry;
       IF ( @v_gtype = 'MultiPoint' )                       SET @v_geometry = dbo.STCentroid_P(@p_geometry,@p_round_x,@p_round_y,@p_round_z)
       IF ( @v_gtype IN ('LineString','MultiLineString' ) ) SET @v_geometry = dbo.STCentroid_L(@v_geometry,0.5,@p_round_x,@p_round_y,@p_round_z)
       IF ( @v_gtype IN ('Polygon','MultiPolygon') )        SET @v_geometry = dbo.STCentroid_A(@v_geometry,@p_start,NULL,@p_round_x,@p_round_y,@p_round_z)
     RETURN @v_geometry;
   END;
 END
 GO

Testing this we get.

 WITH mPoly AS (
    SELECT geometry::STGeomFromText('MULTIPOLYGON (((0 0, 100 0, 100 100, 0 100, 0 0)), ((110 110, 150 110, 150 150, 110 150, 110 110)))',0) AS geom
 )
 SELECT geom FROM mPoly
 UNION ALL
 SELECT dbo.STCentroid(geom,1 /* Weight By MBR not Vertices */,1 /* Use Largest part in any multipart */,3,3,3).STBuffer(5) AS geom
   FROM mPoly;

This looks like:

MultiCentroid function for multi part geometries

 DROP   FUNCTION [dbo].[STMULTICENTROID];
 CREATE FUNCTION [dbo].[STMULTICENTROID] (
     @p_geometry  GEOMETRY,
     @p_start     INT = 0,  /* 0 = use average of all Area's vertices for starting X centroid calculation otherwise centre X of MBR */
     @p_round_x   INT = 3,
     @p_round_y   INT = 3,
     @p_round_z   INT = 3 )
 RETURNS GEOMETRY
 AS
 BEGIN
   DECLARE
      @v_geomn     INT,
      @v_centroid  geometry,
      @v_part_geom geometry,
      @v_geometry  geometry,
      @v_gtype     nvarchar(MAX);
   BEGIN
       IF ( @p_geometry IS NULL )
        RETURN @p_geometry;
       SET @v_gtype = @p_geometry.STGeometryType();
       IF ( @v_gtype = 'Point' )      SET @v_geometry = @p_geometry;
       IF ( @v_gtype = 'LineString')  SET @v_geometry = [dbo].[STCENTROID_L](@p_geometry,0.5,@p_round_x,@p_round_y,@p_round_z)
       IF ( @v_gtype = 'Polygon' )    SET @v_geometry = [dbo].[STCENTROID_A](@p_geometry,@p_start,NULL,@p_round_x,@p_round_y,@p_round_z)
       IF ( @v_gtype = 'MultiPoint' ) SET @v_geometry = @p_geometry;
       IF ( @v_gtype IN ('MultiPolygon','MultiLineString') )
       BEGIN
         -- Get parts of multi-part geometry
         --
         SET @v_geomn  = 1;
         WHILE ( @v_geomn <= @p_geometry.STNumGeometries() )
         BEGIN
           SET @v_part_geom = @p_geometry.STGeometryN(@v_geomn);
           IF ( @v_part_geom.STGeometryType() = 'LineString' )
              SET @v_centroid = [dbo].[STCENTROID_L](@v_part_geom,0.5,@p_round_x,@p_round_y,@p_round_z)
           ELSE
              SET @v_centroid = [dbo].[STCENTROID_A](@v_part_geom,@p_start,NULL,@p_round_x,@p_round_y,@p_round_z);
           SET @v_geometry = CASE WHEN @v_geometry IS NULL THEN @v_centroid ELSE @v_geometry.STUnion(@v_centroid) END;
           SET @v_geomn = @v_geomn + 1;
         END;
       END;
       RETURN @v_geometry;
     END;
 END
 GO

Testing we get.

 WITH mPoly AS (
   SELECT geometry::STGeomFromText('MULTIPOLYGON (((0 0, 100 0, 100 100, 0 100, 0 0)), ((110 110, 150 110, 150 150, 110 150, 110 110)))',0) AS geom
 )
 SELECT geom FROM mPoly
 UNION ALL
 SELECT dbo.STMultiCentroid(geom,1 /* Weight By MBR not Vertices */,3,3,3).STBuffer(5) AS geom
   FROM mPoly;

Which looks like:

I hope this is of use to SQL Server Spatial users out there.