postgres/doc/src/sgml/intarray.sgml

<sect1 id="intarray">
 <title>intarray</title>
 
 <indexterm zone="intarray">
  <primary>intarray</primary>
 </indexterm>

 <para>
  This is an implementation of RD-tree data structure using GiST interface
  of PostgreSQL. It has built-in lossy compression.
 </para>

 <para>
  Current implementation provides index support for one-dimensional array of
  int4's - gist__int_ops, suitable for small and medium size of arrays (used on
  default), and gist__intbig_ops for indexing large arrays (we use superimposed
  signature with length of 4096 bits to represent sets).
 </para>

 <sect2>
  <title>Functions</title>

  <itemizedlist>

   <listitem>
    <para>
     <literal>int   icount(int[])</literal> - the number of elements in intarray
    </para>
    <programlisting>
test=# select icount('{1,2,3}'::int[]);   
 icount 
--------
      3
(1 row)
    </programlisting>
   </listitem>

   <listitem>
    <para>
     <literal>int[] sort(int[], 'asc' | 'desc')</literal> - sort intarray
    </para>
    <programlisting>
test=# select sort('{1,2,3}'::int[],'desc');
  sort   
---------
 {3,2,1}
(1 row)
    </programlisting>
   </listitem>

   <listitem>
    <para>
     <literal>int[] sort(int[])</literal> - sort in ascending order
    </para>
   </listitem>

   <listitem>
    <para>
     <literal>int[] sort_asc(int[]),sort_desc(int[])</literal> - shortcuts for sort 
    </para>
   </listitem>

   <listitem>
    <para>
     <literal>int[] uniq(int[])</literal> - returns unique elements
    </para>
    <programlisting>
test=# select uniq(sort('{1,2,3,2,1}'::int[]));
  uniq   
---------
 {1,2,3}
(1 row)
    </programlisting>
   </listitem>

   <listitem>
    <para>
     <literal>int   idx(int[], int item)</literal> - returns index of first 
      intarray matching element to item, or '0' if matching failed.
    </para>
    <programlisting>
test=# select idx('{1,2,3,2,1}'::int[],2);
 idx 
-----
   2
(1 row)
    </programlisting>
   </listitem>

   <listitem>
    <para>
     <literal>int[] subarray(int[],int START [, int LEN])</literal> - returns 
    part of intarray starting from element number START (from 1) and length LEN. 
    </para>
    <programlisting>
test=# select subarray('{1,2,3,2,1}'::int[],2,3);
 subarray 
----------
 {2,3,2}
(1 row)
    </programlisting>
   </listitem>

   <listitem>
    <para>
     <literal>int[] intset(int4)</literal> - casting int4 to int[]
    </para>
    <programlisting>
test=# select intset(1);
 intset 
--------
 {1}
(1 row)
    </programlisting>
   </listitem>

  </itemizedlist>
 </sect2>

 <sect2>
  <title>Operations</title>
  <table>
   <title>Operations</title>
   <tgroup cols="2">
    <thead>
     <row>
      <entry>Operator</entry>
      <entry>Description</entry>
     </row>
    </thead>
    <tbody>
     <row>
      <entry><literal>int[] && int[]</literal></entry>
      <entry>overlap - returns TRUE if arrays have at least one common element</entry>
     </row>
     <row>
      <entry><literal>int[] @> int[]</literal></entry>
      <entry>contains - returns TRUE if left array contains right array</entry>
     </row>
     <row>
      <entry><literal>int[] <@ int[]</literal></entry>
      <entry>contained - returns TRUE if left array is contained in right array</entry>
     </row>
     <row>
      <entry><literal># int[]</literal></entry>
      <entry>returns the number of elements in array</entry>
     </row>
     <row>
      <entry><literal>int[] + int</literal></entry>
      <entry>push element to array ( add to end of array)</entry>
     </row>
     <row>
      <entry><literal>int[] + int[]  </literal></entry>
      <entry>merge of arrays (right array added to the end of left one)</entry>
     </row>
     <row>
      <entry><literal>int[] - int</literal></entry>
      <entry>remove entries matched by right argument from array</entry>
     </row>
     <row>
      <entry><literal>int[] - int[]</literal></entry>
      <entry>remove right array from left</entry>
     </row>
     <row>
      <entry><literal>int[] | int</literal></entry>
      <entry>returns intarray - union of arguments</entry>
     </row>
     <row>
      <entry><literal>int[] | int[]</literal></entry>
      <entry>returns intarray as a union of two arrays</entry>
     </row>

     <row>
      <entry><literal>int[] & int[]</literal></entry>
      <entry>returns intersection of arrays</entry>
     </row>

     <row>
      <entry><literal>int[] @@ query_int</literal></entry>
      <entry>
       returns TRUE if array satisfies query (like 
       <literal>'1&amp;(2|3)'</literal>) 
      </entry>
     </row>

     <row>
      <entry><literal>query_int ~~ int[]</literal></entry>
      <entry>returns TRUE if array satisfies query (commutator of @@)</entry>
     </row>
    </tbody>
   </tgroup>
  </table>
  <para>
   (Before PostgreSQL 8.2, the containment operators @> and <@ were
   respectively called @ and ~.  These names are still available, but are
   deprecated and will eventually be retired.  Notice that the old names
   are reversed from the convention formerly followed by the core geometric
   datatypes!)
  </para>
 </sect2>

 <sect2>
  <title>Example</title>

  <programlisting>
CREATE TABLE message (mid INT NOT NULL,sections INT[]);
CREATE TABLE message_section_map (mid INT NOT NULL,sid INT NOT NULL);

-- create indices
CREATE unique index message_key ON message ( mid );
CREATE unique index message_section_map_key2 ON message_section_map (sid, mid );
CREATE INDEX message_rdtree_idx ON message USING GIST ( sections gist__int_ops);

-- select some messages with section in 1 OR 2 - OVERLAP operator
SELECT message.mid FROM message WHERE message.sections && '{1,2}';  

-- select messages contains in sections 1 AND 2 - CONTAINS operator
SELECT message.mid FROM message WHERE message.sections @> '{1,2}';
-- the same, CONTAINED operator
SELECT message.mid FROM message WHERE '{1,2}' <@ message.sections;
  </programlisting>
 </sect2>

 <sect2>
  <title>Benchmark</title>
  <para>
  subdirectory bench contains benchmark suite.
  </para>
  <programlisting>
  cd ./bench
  1. createdb TEST
  2. psql TEST < ../_int.sql
  3. ./create_test.pl | psql TEST
  4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries
                  with/without RD-Tree. Run script without arguments to 
                  see availbale options.

     a)test without RD-Tree (OR)
       ./bench.pl -d TEST -c -s 1,2 -v
     b)test with RD-Tree 
       ./bench.pl -d TEST -c -s 1,2 -v -r

     BENCHMARKS:
     
     Size of table &lt;message>: 200000
     Size of table &lt;message_section_map>: 269133 
     
     Distribution of messages by sections:
     
     section 0: 74377 messages
     section 1: 16284 messages
     section 50: 1229 messages
     section 99: 683 messages
     
     old - without RD-Tree support,
     new - with RD-Tree
     
     +----------+---------------+----------------+
     |Search set|OR, time in sec|AND, time in sec|
     |          +-------+-------+--------+-------+
     |          |  old  |  new  |   old  |  new  |
     +----------+-------+-------+--------+-------+
     |         1|  0.625|  0.101|       -|      -|
     +----------+-------+-------+--------+-------+
     |        99|  0.018|  0.017|       -|      -|
     +----------+-------+-------+--------+-------+
     |       1,2|  0.766|  0.133|   0.628|  0.045|
     +----------+-------+-------+--------+-------+
     | 1,2,50,65|  0.794|  0.141|   0.030|  0.006|
     +----------+-------+-------+--------+-------+
  </programlisting>
 </sect2>

 <sect2>
  <title>Authors</title>
  <para>
   All work was done by Teodor Sigaev (<email>teodor@stack.net</email>) and Oleg 
   Bartunov (<email>oleg@sai.msu.su</email>). See 
   <ulink url="http://www.sai.msu.su/~megera/postgres/gist"></ulink> for 
   additional information. Andrey Oktyabrski did a great work on adding new 
   functions and operations.
  </para>
 </sect2>

</sect1>
Move most /contrib README files into SGML. Some still need conversion or will never be converted. 18 years ago			`<sect1 id="intarray">`
			`<title>intarray</title>`

			`<indexterm zone="intarray">`
			`<primary>intarray</primary>`
			`</indexterm>`

			`<para>`
			`This is an implementation of RD-tree data structure using GiST interface`
			`of PostgreSQL. It has built-in lossy compression.`
			`</para>`

			`<para>`
			`Current implementation provides index support for one-dimensional array of`
			`int4's - gist__int_ops, suitable for small and medium size of arrays (used on`
			`default), and gist__intbig_ops for indexing large arrays (we use superimposed`
			`signature with length of 4096 bits to represent sets).`
			`</para>`

			`<sect2>`
			`<title>Functions</title>`

			`<itemizedlist>`

			`<listitem>`
			`<para>`
			`<literal>int icount(int[])</literal> - the number of elements in intarray`
			`</para>`
			`<programlisting>`
			`test=# select icount('{1,2,3}'::int[]);`
			`icount`
			`--------`
			`3`
			`(1 row)`
			`</programlisting>`
			`</listitem>`

			`<listitem>`
			`<para>`
			`<literal>int[] sort(int[], 'asc' \| 'desc')</literal> - sort intarray`
			`</para>`
			`<programlisting>`
			`test=# select sort('{1,2,3}'::int[],'desc');`
			`sort`
			`---------`
			`{3,2,1}`
			`(1 row)`
			`</programlisting>`
			`</listitem>`

			`<listitem>`
			`<para>`
			`<literal>int[] sort(int[])</literal> - sort in ascending order`
			`</para>`
			`</listitem>`

			`<listitem>`
			`<para>`
			`<literal>int[] sort_asc(int[]),sort_desc(int[])</literal> - shortcuts for sort`
			`</para>`
			`</listitem>`

			`<listitem>`
			`<para>`
			`<literal>int[] uniq(int[])</literal> - returns unique elements`
			`</para>`
			`<programlisting>`
			`test=# select uniq(sort('{1,2,3,2,1}'::int[]));`
			`uniq`
			`---------`
			`{1,2,3}`
			`(1 row)`
			`</programlisting>`
			`</listitem>`

			`<listitem>`
			`<para>`
			`<literal>int idx(int[], int item)</literal> - returns index of first`
			`intarray matching element to item, or '0' if matching failed.`
			`</para>`
			`<programlisting>`
			`test=# select idx('{1,2,3,2,1}'::int[],2);`
			`idx`
			`-----`
			`2`
			`(1 row)`
			`</programlisting>`
			`</listitem>`

			`<listitem>`
			`<para>`
			`<literal>int[] subarray(int[],int START [, int LEN])</literal> - returns`
			`part of intarray starting from element number START (from 1) and length LEN.`
			`</para>`
			`<programlisting>`
			`test=# select subarray('{1,2,3,2,1}'::int[],2,3);`
			`subarray`
			`----------`
			`{2,3,2}`
			`(1 row)`
			`</programlisting>`
			`</listitem>`

			`<listitem>`
			`<para>`
			`<literal>int[] intset(int4)</literal> - casting int4 to int[]`
			`</para>`
			`<programlisting>`
			`test=# select intset(1);`
			`intset`
			`--------`
			`{1}`
			`(1 row)`
			`</programlisting>`
			`</listitem>`

			`</itemizedlist>`
			`</sect2>`

			`<sect2>`
			`<title>Operations</title>`
			`<table>`
			`<title>Operations</title>`
			`<tgroup cols="2">`
			`<thead>`
			`<row>`
			`<entry>Operator</entry>`
			`<entry>Description</entry>`
			`</row>`
			`</thead>`
			`<tbody>`
			`<row>`
			`<entry><literal>int[] && int[]</literal></entry>`
			`<entry>overlap - returns TRUE if arrays have at least one common element</entry>`
			`</row>`
			`<row>`
			`<entry><literal>int[] @> int[]</literal></entry>`
			`<entry>contains - returns TRUE if left array contains right array</entry>`
			`</row>`
			`<row>`
			`<entry><literal>int[] <@ int[]</literal></entry>`
			`<entry>contained - returns TRUE if left array is contained in right array</entry>`
			`</row>`
			`<row>`
			`<entry><literal># int[]</literal></entry>`
			`<entry>returns the number of elements in array</entry>`
			`</row>`
			`<row>`
			`<entry><literal>int[] + int</literal></entry>`
			`<entry>push element to array ( add to end of array)</entry>`
			`</row>`
			`<row>`
			`<entry><literal>int[] + int[] </literal></entry>`
			`<entry>merge of arrays (right array added to the end of left one)</entry>`
			`</row>`
			`<row>`
			`<entry><literal>int[] - int</literal></entry>`
			`<entry>remove entries matched by right argument from array</entry>`
			`</row>`
			`<row>`
			`<entry><literal>int[] - int[]</literal></entry>`
			`<entry>remove right array from left</entry>`
			`</row>`
			`<row>`
			`<entry><literal>int[] \| int</literal></entry>`
			`<entry>returns intarray - union of arguments</entry>`
			`</row>`
			`<row>`
			`<entry><literal>int[] \| int[]</literal></entry>`
			`<entry>returns intarray as a union of two arrays</entry>`
			`</row>`

			`<row>`
			`<entry><literal>int[] & int[]</literal></entry>`
			`<entry>returns intersection of arrays</entry>`
			`</row>`

			`<row>`
			`<entry><literal>int[] @@ query_int</literal></entry>`
			`<entry>`
			`returns TRUE if array satisfies query (like`
			`<literal>'1&(2\|3)'</literal>)`
			`</entry>`
			`</row>`

			`<row>`
			`<entry><literal>query_int ~~ int[]</literal></entry>`
			`<entry>returns TRUE if array satisfies query (commutator of @@)</entry>`
			`</row>`
			`</tbody>`
			`</tgroup>`
			`</table>`
			`<para>`
			`(Before PostgreSQL 8.2, the containment operators @> and <@ were`
			`respectively called @ and ~. These names are still available, but are`
			`deprecated and will eventually be retired. Notice that the old names`
			`are reversed from the convention formerly followed by the core geometric`
			`datatypes!)`
			`</para>`
			`</sect2>`

			`<sect2>`
			`<title>Example</title>`

			`<programlisting>`
			`CREATE TABLE message (mid INT NOT NULL,sections INT[]);`
			`CREATE TABLE message_section_map (mid INT NOT NULL,sid INT NOT NULL);`

			`-- create indices`
			`CREATE unique index message_key ON message ( mid );`
			`CREATE unique index message_section_map_key2 ON message_section_map (sid, mid );`
			`CREATE INDEX message_rdtree_idx ON message USING GIST ( sections gist__int_ops);`

			`-- select some messages with section in 1 OR 2 - OVERLAP operator`
			`SELECT message.mid FROM message WHERE message.sections && '{1,2}';`

			`-- select messages contains in sections 1 AND 2 - CONTAINS operator`
			`SELECT message.mid FROM message WHERE message.sections @> '{1,2}';`
			`-- the same, CONTAINED operator`
			`SELECT message.mid FROM message WHERE '{1,2}' <@ message.sections;`
			`</programlisting>`
			`</sect2>`

			`<sect2>`
			`<title>Benchmark</title>`
			`<para>`
			`subdirectory bench contains benchmark suite.`
			`</para>`
			`<programlisting>`
			`cd ./bench`
			`1. createdb TEST`
			`2. psql TEST < ../_int.sql`
			`3. ./create_test.pl \| psql TEST`
			`4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries`
			`with/without RD-Tree. Run script without arguments to`
			`see availbale options.`

			`a)test without RD-Tree (OR)`
			`./bench.pl -d TEST -c -s 1,2 -v`
			`b)test with RD-Tree`
			`./bench.pl -d TEST -c -s 1,2 -v -r`

			`BENCHMARKS:`

			`Size of table <message>: 200000`
			`Size of table <message_section_map>: 269133`

			`Distribution of messages by sections:`

			`section 0: 74377 messages`
			`section 1: 16284 messages`
			`section 50: 1229 messages`
			`section 99: 683 messages`

			`old - without RD-Tree support,`
			`new - with RD-Tree`

			`+----------+---------------+----------------+`
			`\|Search set\|OR, time in sec\|AND, time in sec\|`
			`\| +-------+-------+--------+-------+`
			`\| \| old \| new \| old \| new \|`
			`+----------+-------+-------+--------+-------+`
			`\| 1\| 0.625\| 0.101\| -\| -\|`
			`+----------+-------+-------+--------+-------+`
			`\| 99\| 0.018\| 0.017\| -\| -\|`
			`+----------+-------+-------+--------+-------+`
			`\| 1,2\| 0.766\| 0.133\| 0.628\| 0.045\|`
			`+----------+-------+-------+--------+-------+`
			`\| 1,2,50,65\| 0.794\| 0.141\| 0.030\| 0.006\|`
			`+----------+-------+-------+--------+-------+`
			`</programlisting>`
			`</sect2>`

			`<sect2>`
			`<title>Authors</title>`
			`<para>`
			`All work was done by Teodor Sigaev (<email>teodor@stack.net</email>) and Oleg`
			`Bartunov (<email>oleg@sai.msu.su</email>). See`
			`<ulink url="http://www.sai.msu.su/~megera/postgres/gist"></ulink> for`
			`additional information. Andrey Oktyabrski did a great work on adding new`
			`functions and operations.`
			`</para>`
			`</sect2>`

			`</sect1>`