Filtering very short linestrings via bitmap function index

A colleague wrote to me today asking:

I’m looking for a way to improve a query to detect all line geometries which are shorter then 10 mm.

It takes days to check all our lines (full tabel scans on millions of rows);

Can’t I use the domain index to filter out the desired lines?

 DROP   TABLE Test10MM  PURGE;
 -- table TEST10MM dropped.
 CREATE TABLE Test10MM ( id INTEGER, geom mdsys.sdo_geometry );
 -- table TEST10MM created.
 INSERT INTO Test10MM (id, geom)
      SELECT rownum,
             mdsys.sdo_geometry(2002,NULL,NULL,
                                 MDSYS.SDO_ELEM_INFO_ARRAY(1,2,1),
                                 MDSYS.SDO_ORDINATE_ARRAY(
                                    ROUND(dbms_random.VALUE(0,0.1),3),
                                    ROUND(dbms_random.VALUE(0,0.1),3),
                                    ROUND(dbms_random.VALUE(0,0.1),3),
                                    ROUND(dbms_random.VALUE(0,0.1),3)))
       FROM DUAL
    CONNECT BY LEVEL <= 5000;
 -- 5,000 rows inserted.
 COMMIT;
 -- commited.
 SELECT 'Inserted '||COUNT(*)||' records into Test10MM' AS RecCount FROM Test10MM;
 -- RECCOUNT
 -- -----------------------------------------------------------------------
 -- Inserted 5000 records into Test10MM
 DELETE FROM USER_SDO_GEOM_METADATA WHERE TABLE_NAME = 'TEST10MM';
 -- 1 rows deleted.
 DECLARE
   v_shape mdsys.sdo_geometry;
 BEGIN
   SELECT SDO_AGGR_MBR(geom) INTO v_shape FROM Test10MM;
   INSERT INTO USER_SDO_GEOM_METADATA(TABLE_NAME,COLUMN_NAME,DIMINFO,SRID)
     VALUES ('TEST10MM','GEOM',MDSYS.SDO_DIM_ARRAY(
     MDSYS.SDO_DIM_ELEMENT('X',V_SHAPE.SDO_ORDINATES(1),V_SHAPE.SDO_ORDINATES(2),0.05),
     MDSYS.SDO_DIM_ELEMENT('Y',V_SHAPE.SDO_ORDINATES(3),V_SHAPE.SDO_ORDINATES(4),0.05)),NULL);
 END;
 /
 SHOW ERRORS
 -- anonymous block completed
 -- No Errors.
 COMMIT;
 -- commited.
 CREATE INDEX Test10MM_GEOM ON Test10MM(geom)
        INDEXTYPE IS mdsys.spatial_index
        parameters('sdo_indx_dims=2, layer_gtype=line');
 -- index TEST10MM_GEOM created.
 EXEC dbms_stats.gather_table_stats(ownname => 'CODESYS', tabname => 'TEST10MM', estimate_percent => 100, method_opt => 'for all indexed columns size auto' )
 -- anonymous block completed
 -- Normal filtering without index
 --
 SET autotrace ON EXPLAIN
 -- Autotrace Enabled
 -- Displays the execution plan only.
 SELECT COUNT(*) AS LenTo10mmCount
   FROM Test10MM a
  WHERE round(sdo_geom.sdo_length(a.geom,0.0005),2) <= 0.01;
 /*
 LENTO10MMCOUNT
 --------------
 308
 Plan hash value: 2043904050
 -------------------------------------------------------------------------------
 | Id  | Operation          | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
 -------------------------------------------------------------------------------
 |   0 | SELECT STATEMENT   |          |     1 |     8 |    26   (4)| 00:00:01 |
 |   1 |  SORT AGGREGATE    |          |     1 |     8 |            |          |
 |*  2 |   TABLE ACCESS FULL| TEST10MM |   250 |  2000 |    26   (4)| 00:00:01 |
 -------------------------------------------------------------------------------
 Predicate Information (identified by operation id):
 ---------------------------------------------------
   2 - filter(ROUND("SDO_GEOM"."SDO_LENGTH"("A"."GEOM",0.0005),2)<=0.01)
 */
 -- Another method of identifying lines with length < 10mm
 --
 SELECT COUNT(*) AS tenmmcount
   FROM Test10MM a
  WHERE CASE WHEN round(sdo_geom.sdo_length(a.geom,0.0005),2) * 100 <= 1 THEN 1 ELSE 0 END = 1;
 /*
 TENMMCOUNT
 ----------------------
 308
 Plan hash value: 2043904050
 -------------------------------------------------------------------------------
 | Id  | Operation          | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
 -------------------------------------------------------------------------------
 |   0 | SELECT STATEMENT   |          |     1 |     8 |    26   (4)| 00:00:01 |
 |   1 |  SORT AGGREGATE    |          |     1 |     8 |            |          |
 |*  2 |   TABLE ACCESS FULL| TEST10MM |   250 |  2000 |    26   (4)| 00:00:01 |
 -------------------------------------------------------------------------------
 Predicate Information (identified by operation id):
 ---------------------------------------------------
    2 - filter(ROUND("SDO_GEOM"."SDO_LENGTH"("A"."GEOM",0.0005),2)*100<=1)
 */
 -- Can we create a function based index on this predicate?
 --
 CREATE bitmap INDEX Test10MM_10mm_idx ON Test10MM(CASE WHEN round(sdo_geom.sdo_length(geom,0.0005),2) * 100 <= 1 THEN 1 ELSE 0 END);
 /*
 SQL Error: ORA-30553: The function is not deterministic
 30553. 00000 -  "The function is not deterministic"
 *Cause:    The function on which the index is defined is not deterministic
 *Action:   If the function is deterministic, mark it DETERMINISTIC.  If it
            is not deterministic (it depends on package state, database state,
            current time, or anything other than the function inputs) then
            do not create the index.  The values returned by a deterministic
            function should not change even when the function is rewritten or
            recompiled.
 */
 -- No We can't
 -- Even if we could the use of functional index requires one to use the exact predicate in the query where clause!!!
 -- Let's wrap the predicate in a deterministic function
 --
 CREATE OR REPLACE FUNCTION is10mm(p_geom IN mdsys.sdo_geometry)
 RETURN NUMBER deterministic
 IS
 BEGIN
   RETURN CASE WHEN round(sdo_geom.sdo_length(p_geom,0.0005),2) * 100 <= 1 THEN 1 ELSE 0 END;
 END is10mm;
 /
 SHOW errors
 -- FUNCTION is10mm compiled
 -- No Errors.
 -- Now create a bitmap index on the function
 --
 CREATE bitmap INDEX Test10MM_10mm_idx ON Test10MM(is10mm(geom));
 -- bitmap index TEST10MM_10MM_IDX created.
 -- Gather stats on it.
 --
 EXEC dbms_stats.gather_index_stats(ownname => 'CODESYS', indname => 'TEST10MM_10MM_IDX', estimate_percent => 100);
 -- anonymous block completed
 SET autotrace ON EXPLAIN
 -- Autotrace Enabled
 -- Displays the execution plan only.
 SELECT COUNT(*)
   FROM Test10MM a
  WHERE is10MM(geom) = 1;
 SET autotrace ON EXPLAIN
 -- Autotrace Enabled
 -- Displays the execution plan only.
 SELECT COUNT(*)
   FROM Test10MM a
  WHERE is10MM(geom) = 1;
 /*
 COUNT(*)
 ----------------------
 308
 Plan hash value: 577608402
 ---------------------------------------------------------------------------------------------------
 | Id  | Operation                     | Name              | Rows  | Bytes | Cost (%CPU)| Time     |
 ---------------------------------------------------------------------------------------------------
 |   0 | SELECT STATEMENT              |                   |     1 |     8 |     1   (0)| 00:00:01 |
 |   1 |  SORT AGGREGATE               |                   |     1 |     8 |            |          |
 |   2 |   BITMAP CONVERSION COUNT     |                   |    50 |   400 |     1   (0)| 00:00:01 |
 |*  3 |    BITMAP INDEX FAST FULL SCAN| TEST10MM_10MM_IDX |       |       |            |          |
 ---------------------------------------------------------------------------------------------------
 Predicate Information (identified by operation id):
 ---------------------------------------------------
    3 - filter("CODESYS"."IS10MM"("GEOM")=1)
 */

Note that the query now correctly uses the bitmap index to speed the query achieving that which we set out to do.

The result is that a suitable function can be created that identifies small lines that can be used to execute fast queries against large tables. Of course, the result of the function is10MM could be cached or persisted in its own column and kept up to date via a trigger, but that is about implementation not the validity of the method.

I hope this is of use to someone.