Add SGML docs for contrib/dict_int and contrib/dict_xsyn.

Albert Cervera i Areny
19 years ago · ec2ff52045
parent 43da837eda
commit ec2ff52045
4 changed files with 162 additions and 2 deletions
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.4 2007/11/14 02:36:43 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.5 2007/12/02 21:13:34 tgl Exp $ -->

 <appendix id="contrib">
 <title>Additional Supplied Modules</title>
@ -82,6 +82,8 @@ psql -d dbname -f <replaceable>SHAREDIR</>/contrib/<replaceable>module</>.sql
 &chkpass;
 &cube;
 &dblink;
+ &dict-int;
+ &dict-xsyn;
 &earthdistance;
 &fuzzystrmatch;
 &hstore;
--- a/doc/src/sgml/dict-int.sgml
+++ b/doc/src/sgml/dict-int.sgml
@ -0,0 +1,78 @@
+<sect1 id="dict-int">
+ <title>dict_int</title>
+ 
+ <indexterm zone="dict-int">
+  <primary>dict_int</primary>
+ </indexterm>
+
+ <para>
+  The motivation for this example dictionary is to control the indexing of
+  integers (signed and unsigned), and, consequently, to minimize the number of
+  unique words which greatly affect the performance of searching.
+ </para>
+
+ <sect2>
+  <title>Configuration</title>
+  <para>
+   The dictionary accepts two options: 
+  </para>
+
+  <itemizedlist>
+   <listitem>
+    <para>
+     The MAXLEN parameter specifies the maximum length (number of digits)
+     allowed in an integer word.  The default value is 6.
+    </para>
+   </listitem>
+   <listitem>
+    <para>
+     The REJECTLONG parameter specifies if an overlength integer should be
+     truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns
+     the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the
+     dictionary treats an overlength integer as a stop word, so that it will
+     not be indexed.
+    </para>
+   </listitem>
+  </itemizedlist>
+ </sect2>
+
+ <sect2>
+  <title>Usage</title>
+
+  <para>
+   Running the installation script creates a text search template
+   <literal>intdict_template</> and a dictionary <literal>intdict</>
+   based on it, with the default parameters.  You can alter the
+   parameters, for example
+
+<programlisting>
+mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true);
+ALTER TEXT SEARCH DICTIONARY
+</programlisting>
+
+   or create new dictionaries based on the template.
+  </para>
+
+  <para>
+   To test the dictionary, you can try
+
+<programlisting>
+mydb# select ts_lexize('intdict', '12345678');
+ ts_lexize
+-----------
+ {123456}
+</programlisting>
+
+   but real-world usage will involve including it in a text search
+   configuration as described in <xref linkend="textsearch">.
+   That might look like this:
+
+<programlisting>
+ALTER TEXT SEARCH CONFIGURATION english
+    ALTER MAPPING FOR int, uint WITH intdict;
+</programlisting>
+
+  </para>
+ </sect2>
+
+</sect1>
--- a/doc/src/sgml/dict-xsyn.sgml
+++ b/doc/src/sgml/dict-xsyn.sgml
@ -0,0 +1,78 @@
+<sect1 id="dict-xsyn">
+ <title>dict_xsyn</title>
+ 
+ <indexterm zone="dict-xsyn">
+  <primary>dict_xsyn</primary>
+ </indexterm>
+
+ <para>
+  The Extended Synonym Dictionary module replaces words with groups of their
+  synonyms, and so makes it possible to search for a word using any of its
+  synonyms.
+ </para>
+
+ <sect2>
+  <title>Configuration</title>
+  <para>
+   A <literal>dict_xsyn</> dictionary accepts the following options:
+  </para>
+  <itemizedlist>
+   <listitem>
+    <para>
+     KEEPORIG controls whether the original word is included, or only its
+     synonyms. Default is 'true'.
+    </para>
+   </listitem>
+   <listitem>
+    <para>
+     RULES is the base name of the file containing the list of synonyms.
+     This file must be in $(prefix)/share/tsearch_data/, and its name must
+     end in ".rules" (which is not included in the RULES parameter).
+    </para>
+   </listitem>
+  </itemizedlist>
+  <para>
+   The rules file has the following format:
+  </para>
+  <itemizedlist>
+   <listitem>
+    <para>
+     Each line represents a group of synonyms for a single word, which is
+     given first on the line. Synonyms are separated by whitespace:
+    </para>
+    <programlisting>
+word syn1 syn2 syn3
+    </programlisting>
+   </listitem>
+   <listitem>
+    <para>
+     Sharp ('#') sign is a comment delimiter. It may appear at any position
+     inside the line.  The rest of the line will be skipped.
+    </para>
+   </listitem>
+  </itemizedlist>
+
+  <para>
+   Look at xsyn_sample.rules, which is installed in $(prefix)/share/tsearch_data/,
+   for an example.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Usage</title>
+  <programlisting>
+mydb=# SELECT ts_lexize('xsyn','word');
+ts_lexize
+----------------
+{word,syn1,syn2,syn3)
+  </programlisting>
+  <para>
+   Change dictionary options:
+  </para>
+  <programlisting>
+mydb# ALTER TEXT SEARCH DICTIONARY xsyn (KEEPORIG=false);
+ALTER TEXT SEARCH DICTIONARY
+  </programlisting>
+ </sect2>
+
+</sect1>
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.53 2007/11/14 01:09:50 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.54 2007/12/02 21:13:34 tgl Exp $ -->

 <!entity history    SYSTEM "history.sgml">
 <!entity info       SYSTEM "info.sgml">
@ -96,6 +96,8 @@
 <!entity chkpass         SYSTEM "chkpass.sgml">
 <!entity cube            SYSTEM "cube.sgml">
 <!entity dblink          SYSTEM "dblink.sgml">
+<!entity dict-int        SYSTEM "dict-int.sgml">
+<!entity dict-xsyn       SYSTEM "dict-xsyn.sgml">
 <!entity earthdistance   SYSTEM "earthdistance.sgml">
 <!entity fuzzystrmatch   SYSTEM "fuzzystrmatch.sgml">
 <!entity hstore          SYSTEM "hstore.sgml">