SQL2003-standard statistical aggregates, by Sergey Koposov. I've added only

the float8 versions of the aggregates, which is all that the standard requires. Sergey's original patch also provided versions using numeric arithmetic, but given the size and slowness of the code, I doubt we ought to include those in core.
19 years ago · 1249cf8f38
parent 0fd087af83
commit 1249cf8f38
12 changed files with 880 additions and 103 deletions
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.327 2006/07/16 23:59:58 neilc Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.328 2006/07/28 18:33:03 tgl Exp $ -->

 <chapter id="functions">
  <title>Functions and Operators</title>
@ -7785,16 +7785,18 @@ SELECT NULLIF(value, '(none)') ...

  <para>
   <firstterm>Aggregate functions</firstterm> compute a single result
-   value from a set of input values.  <xref
-   linkend="functions-aggregate-table"> shows the built-in aggregate
-   functions.  The special syntax considerations for aggregate
+   value from a set of input values.  The built-in aggregate functions
+   are listed in
+   <xref linkend="functions-aggregate-table"> and
+   <xref linkend="functions-aggregate-statistics-table">.
+   The special syntax considerations for aggregate
   functions are explained in <xref linkend="syntax-aggregates">.
   Consult <xref linkend="tutorial-agg"> for additional introductory
   information.
  </para>

  <table id="functions-aggregate-table">
-   <title>Aggregate Functions</title>
+   <title>General-Purpose Aggregate Functions</title>

   <tgroup cols="4">
    <thead>
@ -7897,7 +7899,7 @@ SELECT NULLIF(value, '(none)') ...
      <entry><function>count(*)</function></entry>
      <entry></entry>
      <entry><type>bigint</type></entry>
-      <entry>number of input values</entry>
+      <entry>number of input rows</entry>
     </row>

     <row>
@ -7905,7 +7907,7 @@ SELECT NULLIF(value, '(none)') ...
      <entry>any</entry>
      <entry><type>bigint</type></entry>
      <entry>
-       number of input values for which the value of <replaceable
+       number of input rows for which the value of <replaceable
       class="parameter">expression</replaceable> is not null
      </entry>
     </row>
@ -7948,6 +7950,333 @@ SELECT NULLIF(value, '(none)') ...
      </entry>
     </row>

+     <row>
+      <entry><function>sum(<replaceable class="parameter">expression</replaceable>)</function></entry>
+      <entry>
+       <type>smallint</type>, <type>int</type>,
+       <type>bigint</type>, <type>real</type>, <type>double
+       precision</type>, <type>numeric</type>, or
+       <type>interval</type>
+      </entry>
+      <entry>
+       <type>bigint</type> for <type>smallint</type> or
+       <type>int</type> arguments, <type>numeric</type> for
+       <type>bigint</type> arguments, <type>double precision</type>
+       for floating-point arguments, otherwise the same as the
+       argument data type
+      </entry>
+      <entry>sum of <replaceable class="parameter">expression</replaceable> across all input values</entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   It should be noted that except for <function>count</function>,
+   these functions return a null value when no rows are selected.  In
+   particular, <function>sum</function> of no rows returns null, not
+   zero as one might expect.  The <function>coalesce</function> function may be
+   used to substitute zero for null when necessary.
+  </para>
+
+  <note>
+    <indexterm>
+      <primary>ANY</primary>
+    </indexterm>
+    <indexterm>
+      <primary>SOME</primary>
+    </indexterm>
+    <para>
+      Boolean aggregates <function>bool_and</function> and 
+      <function>bool_or</function> correspond to standard SQL aggregates
+      <function>every</function> and <function>any</function> or
+      <function>some</function>. 
+      As for <function>any</function> and <function>some</function>, 
+      it seems that there is an ambiguity built into the standard syntax:
+<programlisting>
+SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...;
+</programlisting>
+      Here <function>ANY</function> can be considered both as leading
+      to a subquery or as an aggregate if the select expression returns 1 row.
+      Thus the standard name cannot be given to these aggregates.
+    </para>
+  </note>
+
+  <note>
+   <para>
+    Users accustomed to working with other SQL database management
+    systems may be surprised by the performance of the
+    <function>count</function> aggregate when it is applied to the
+    entire table. A query like:
+<programlisting>
+SELECT count(*) FROM sometable;
+</programlisting>
+    will be executed by <productname>PostgreSQL</productname> using a
+    sequential scan of the entire table.
+   </para>
+  </note>
+
+
+  <para>
+   <xref linkend="functions-aggregate-statistics-table"> shows
+   aggregate functions typically used in statistical analysis.
+   (These are separated out merely to avoid cluttering the listing
+   of more-commonly-used aggregates.)  Where the description mentions
+   <replaceable class="parameter">N</replaceable>, it means the
+   number of input rows for which all the input expressions are non-null.
+   In all cases, null is returned if the computation is meaningless,
+   for example when <replaceable class="parameter">N</replaceable> is zero.
+  </para>
+
+  <indexterm>
+   <primary>statistics</primary>
+  </indexterm>
+  <indexterm>
+   <primary>linear regression</primary>
+  </indexterm>
+
+  <table id="functions-aggregate-statistics-table">
+   <title>Aggregate Functions for Statistics</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Function</entry>
+      <entry>Argument Type</entry>
+      <entry>Return Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>correlation</primary>
+       </indexterm>
+       <function>corr(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sqrt((<replaceable class="parameter">N</replaceable> *
+      sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>))^2 / ((<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2) * (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">Y</replaceable>^2) - sum(<replaceable
+      class="parameter">Y</replaceable>)^2)))</entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>covariance</primary>
+        <secondary>population</secondary>
+       </indexterm>
+       <function>covar_pop(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>) / <replaceable
+      class="parameter">N</replaceable>) / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>covariance</primary>
+        <secondary>sample</secondary>
+       </indexterm>
+       <function>covar_samp(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>) / <replaceable
+      class="parameter">N</replaceable>) / (<replaceable
+      class="parameter">N</replaceable> - 1)</entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_avgx(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">X</replaceable>) /
+      <replaceable class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_avgy(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">Y</replaceable>) /
+      <replaceable class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_count(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>bigint</type>
+      </entry>
+      <entry>number of input rows in which both expressions are non-null</entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>regression intercept</primary>
+       </indexterm>
+       <function>regr_intercept(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(sum(<replaceable class="parameter">Y</replaceable>) *
+      sum(<replaceable class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>)) / (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2)</entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_r2(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(<replaceable class="parameter">N</replaceable> *
+      sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>))^2 / ((<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2) * (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">Y</replaceable>^2) - sum(<replaceable
+      class="parameter">Y</replaceable>)^2))</entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
+        <primary>regression slope</primary>
+       </indexterm>
+       <function>regr_slope(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>(<replaceable class="parameter">N</replaceable> *
+      sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>)) / (<replaceable
+      class="parameter">N</replaceable> * sum(<replaceable
+      class="parameter">X</replaceable>^2) - sum(<replaceable
+      class="parameter">X</replaceable>)^2)</entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_sxx(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">X</replaceable>^2) -
+      sum(<replaceable class="parameter">X</replaceable>)^2 / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_sxy(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">X</replaceable>*<replaceable
+      class="parameter">Y</replaceable>) - sum(<replaceable
+      class="parameter">X</replaceable>) * sum(<replaceable
+      class="parameter">Y</replaceable>) / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
+     <row>
+      <entry>
+       <function>regr_syy(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>
+       <type>double precision</type>
+      </entry>
+      <entry>sum(<replaceable class="parameter">Y</replaceable>^2) -
+      sum(<replaceable class="parameter">Y</replaceable>)^2 / <replaceable
+      class="parameter">N</replaceable></entry>
+     </row>
+
     <row>
      <entry>
       <indexterm>
@ -8007,24 +8336,6 @@ SELECT NULLIF(value, '(none)') ...
      <entry>sample standard deviation of the input values</entry>
     </row>

-     <row>
-      <entry><function>sum(<replaceable class="parameter">expression</replaceable>)</function></entry>
-      <entry>
-       <type>smallint</type>, <type>int</type>,
-       <type>bigint</type>, <type>real</type>, <type>double
-       precision</type>, <type>numeric</type>, or
-       <type>interval</type>
-      </entry>
-      <entry>
-       <type>bigint</type> for <type>smallint</type> or
-       <type>int</type> arguments, <type>numeric</type> for
-       <type>bigint</type> arguments, <type>double precision</type>
-       for floating-point arguments, otherwise the same as the
-       argument data type
-      </entry>
-      <entry>sum of <replaceable class="parameter">expression</replaceable> across all input values</entry>
-     </row>
-
     <row>
      <entry>
       <indexterm>
@ -8087,50 +8398,6 @@ SELECT NULLIF(value, '(none)') ...
   </tgroup>
  </table>

-  <para>
-   It should be noted that except for <function>count</function>,
-   these functions return a null value when no rows are selected.  In
-   particular, <function>sum</function> of no rows returns null, not
-   zero as one might expect.  The <function>coalesce</function> function may be
-   used to substitute zero for null when necessary.
-  </para>
-
-  <note>
-    <indexterm>
-      <primary>ANY</primary>
-    </indexterm>
-    <indexterm>
-      <primary>SOME</primary>
-    </indexterm>
-    <para>
-      Boolean aggregates <function>bool_and</function> and 
-      <function>bool_or</function> correspond to standard SQL aggregates
-      <function>every</function> and <function>any</function> or
-      <function>some</function>. 
-      As for <function>any</function> and <function>some</function>, 
-      it seems that there is an ambiguity built into the standard syntax:
-<programlisting>
-SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...;
-</programlisting>
-      Here <function>ANY</function> can be considered both as leading
-      to a subquery or as an aggregate if the select expression returns 1 row.
-      Thus the standard name cannot be given to these aggregates.
-    </para>
-  </note>
-
-  <note>
-   <para>
-    Users accustomed to working with other SQL database management
-    systems may be surprised by the performance of the
-    <function>count</function> aggregate when it is applied to the
-    entire table. A query like:
-<programlisting>
-SELECT count(*) FROM sometable;
-</programlisting>
-    will be executed by <productname>PostgreSQL</productname> using a
-    sequential scan of the entire table.
-   </para>
-  </note>
 </sect1>


--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/float.c,v 1.127 2006/07/14 14:52:24 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/float.c,v 1.128 2006/07/28 18:33:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1878,18 +1878,18 @@ setseed(PG_FUNCTION_ARGS)
 */

 static float8 *
-check_float8_array(ArrayType *transarray, const char *caller)
+check_float8_array(ArrayType *transarray, const char *caller, int n)
 {
 	/*
-	 * We expect the input to be a 3-element float array; verify that. We
+	 * We expect the input to be an N-element float array; verify that. We
 	 * don't need to use deconstruct_array() since the array data is just
-	 * going to look like a C array of 3 float8 values.
+	 * going to look like a C array of N float8 values.
 	 */
 	if (ARR_NDIM(transarray) != 1 ||
-		ARR_DIMS(transarray)[0] != 3 ||
+		ARR_DIMS(transarray)[0] != n ||
 		ARR_HASNULL(transarray) ||
 		ARR_ELEMTYPE(transarray) != FLOAT8OID)
-		elog(ERROR, "%s: expected 3-element float8 array", caller);
+		elog(ERROR, "%s: expected %d-element float8 array", caller, n);
 	return (float8 *) ARR_DATA_PTR(transarray);
 }

@ -1903,7 +1903,7 @@ float8_accum(PG_FUNCTION_ARGS)
 				sumX,
 				sumX2;

-	transvalues = check_float8_array(transarray, "float8_accum");
+	transvalues = check_float8_array(transarray, "float8_accum", 3);
 	N = transvalues[0];
 	sumX = transvalues[1];
 	sumX2 = transvalues[2];
@ -1953,7 +1953,7 @@ float4_accum(PG_FUNCTION_ARGS)
 				sumX2,
 				newval;

-	transvalues = check_float8_array(transarray, "float4_accum");
+	transvalues = check_float8_array(transarray, "float4_accum", 3);
 	N = transvalues[0];
 	sumX = transvalues[1];
 	sumX2 = transvalues[2];
@ -2003,7 +2003,7 @@ float8_avg(PG_FUNCTION_ARGS)
 	float8		N,
 				sumX;

-	transvalues = check_float8_array(transarray, "float8_avg");
+	transvalues = check_float8_array(transarray, "float8_avg", 3);
 	N = transvalues[0];
 	sumX = transvalues[1];
 	/* ignore sumX2 */
@ -2025,7 +2025,7 @@ float8_var_pop(PG_FUNCTION_ARGS)
 				sumX2,
 				numerator;

-	transvalues = check_float8_array(transarray, "float8_var_pop");
+	transvalues = check_float8_array(transarray, "float8_var_pop", 3);
 	N = transvalues[0];
 	sumX = transvalues[1];
 	sumX2 = transvalues[2];
@ -2053,7 +2053,7 @@ float8_var_samp(PG_FUNCTION_ARGS)
 				sumX2,
 				numerator;

-	transvalues = check_float8_array(transarray, "float8_var_samp");
+	transvalues = check_float8_array(transarray, "float8_var_samp", 3);
 	N = transvalues[0];
 	sumX = transvalues[1];
 	sumX2 = transvalues[2];
@ -2081,7 +2081,7 @@ float8_stddev_pop(PG_FUNCTION_ARGS)
 				sumX2,
 				numerator;

-	transvalues = check_float8_array(transarray, "float8_stddev_pop");
+	transvalues = check_float8_array(transarray, "float8_stddev_pop", 3);
 	N = transvalues[0];
 	sumX = transvalues[1];
 	sumX2 = transvalues[2];
@ -2109,7 +2109,7 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
 				sumX2,
 				numerator;

-	transvalues = check_float8_array(transarray, "float8_stddev_samp");
+	transvalues = check_float8_array(transarray, "float8_stddev_samp", 3);
 	N = transvalues[0];
 	sumX = transvalues[1];
 	sumX2 = transvalues[2];
@ -2127,6 +2127,362 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
 	PG_RETURN_FLOAT8(sqrt(numerator / (N * (N - 1.0))));
 }

+/*
+ *		=========================
+ *		SQL2003 BINARY AGGREGATES
+ *		=========================
+ *
+ * The transition datatype for all these aggregates is a 6-element array of
+ * float8, holding the values N, sum(X), sum(X*X), sum(Y), sum(Y*Y), sum(X*Y)
+ * in that order.  Note that Y is the first argument to the aggregates!
+ *
+ * It might seem attractive to optimize this by having multiple accumulator
+ * functions that only calculate the sums actually needed.  But on most
+ * modern machines, a couple of extra floating-point multiplies will be
+ * insignificant compared to the other per-tuple overhead, so I've chosen
+ * to minimize code space instead.
+ */
+
+Datum
+float8_regr_accum(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8		newvalY = PG_GETARG_FLOAT8(1);
+	float8		newvalX = PG_GETARG_FLOAT8(2);
+	float8	   *transvalues;
+	float8		N, sumX, sumX2, sumY, sumY2, sumXY;
+
+	transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
+	N = transvalues[0];
+	sumX = transvalues[1];
+	sumX2 = transvalues[2];
+	sumY = transvalues[3];
+	sumY2 = transvalues[4];
+	sumXY = transvalues[5];
+
+	N += 1.0;
+	sumX += newvalX;
+	sumX2 += newvalX * newvalX;
+	sumY += newvalY;
+	sumY2 += newvalY * newvalY;
+	sumXY += newvalX * newvalY;
+
+	/*
+	 * If we're invoked by nodeAgg, we can cheat and modify our first
+	 * parameter in-place to reduce palloc overhead. Otherwise we construct a
+	 * new array with the updated transition data and return it.
+	 */
+	if (fcinfo->context && IsA(fcinfo->context, AggState))
+	{
+		transvalues[0] = N;
+		transvalues[1] = sumX;
+		transvalues[2] = sumX2;
+		transvalues[3] = sumY;
+		transvalues[4] = sumY2;
+		transvalues[5] = sumXY;
+
+		PG_RETURN_ARRAYTYPE_P(transarray);
+	}
+	else
+	{
+		Datum		transdatums[6];
+		ArrayType  *result;
+
+		transdatums[0] = Float8GetDatumFast(N);
+		transdatums[1] = Float8GetDatumFast(sumX);
+		transdatums[2] = Float8GetDatumFast(sumX2);
+		transdatums[3] = Float8GetDatumFast(sumY);
+		transdatums[4] = Float8GetDatumFast(sumY2);
+		transdatums[5] = Float8GetDatumFast(sumXY);
+
+		result = construct_array(transdatums, 6,
+								 FLOAT8OID,
+								 sizeof(float8),
+								 false /* float8 byval */ , 'd');
+
+		PG_RETURN_ARRAYTYPE_P(result);
+	}
+}
+
+Datum
+float8_regr_sxx(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N,
+				sumX,
+				sumX2,
+				numerator;
+
+	transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
+	N = transvalues[0];
+	sumX = transvalues[1];
+	sumX2 = transvalues[2];
+
+	/* if N is 0 we should return NULL */
+	if (N < 1.0)
+		PG_RETURN_NULL();
+
+	numerator = N * sumX2 - sumX * sumX;
+
+	/* Watch out for roundoff error producing a negative numerator */
+	if (numerator <= 0.0)
+		PG_RETURN_FLOAT8(0.0);
+
+	PG_RETURN_FLOAT8(numerator / N);
+}
+
+Datum
+float8_regr_syy(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N,
+				sumY,
+				sumY2,
+				numerator;
+
+	transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
+	N = transvalues[0];
+	sumY = transvalues[3];
+	sumY2 = transvalues[4];
+
+	/* if N is 0 we should return NULL */
+	if (N < 1.0)
+		PG_RETURN_NULL();
+
+	numerator = N * sumY2 - sumY * sumY;
+
+	/* Watch out for roundoff error producing a negative numerator */
+	if (numerator <= 0.0)
+		PG_RETURN_FLOAT8(0.0);
+
+	PG_RETURN_FLOAT8(numerator / N);
+}
+
+Datum
+float8_regr_sxy(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N, sumX, sumY, sumXY, numerator;
+
+	transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
+	N = transvalues[0];
+	sumX = transvalues[1];
+	sumY = transvalues[3];
+	sumXY = transvalues[5];
+
+	/* if N is 0 we should return NULL */
+	if (N < 1.0)
+		PG_RETURN_NULL();
+
+	numerator = N * sumXY - sumX * sumY;
+
+	/* A negative result is valid here */
+
+	PG_RETURN_FLOAT8(numerator / N);
+}
+
+Datum
+float8_regr_avgx(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N,
+				sumX;
+
+	transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
+	N = transvalues[0];
+	sumX = transvalues[1];
+
+	/* if N is 0 we should return NULL */
+	if (N < 1.0)
+		PG_RETURN_NULL();
+
+	PG_RETURN_FLOAT8(sumX / N);
+}
+
+Datum
+float8_regr_avgy(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N,
+				sumY;
+
+	transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
+	N = transvalues[0];
+	sumY = transvalues[3];
+
+	/* if N is 0 we should return NULL */
+	if (N < 1.0)
+		PG_RETURN_NULL();
+
+	PG_RETURN_FLOAT8(sumY / N);
+}
+
+Datum
+float8_covar_pop(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N, sumX, sumY, sumXY, numerator;
+
+	transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
+	N = transvalues[0];
+	sumX = transvalues[1];
+	sumY = transvalues[3];
+	sumXY = transvalues[5];
+
+	/* if N is 0 we should return NULL */
+	if (N < 1.0)
+		PG_RETURN_NULL();
+
+	numerator = N * sumXY - sumX * sumY;
+
+	PG_RETURN_FLOAT8(numerator / (N * N));
+}
+
+Datum
+float8_covar_samp(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N, sumX, sumY, sumXY, numerator;
+
+	transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
+	N = transvalues[0];
+	sumX = transvalues[1];
+	sumY = transvalues[3];
+	sumXY = transvalues[5];
+
+	/* if N is <= 1 we should return NULL */
+	if (N < 2.0)
+		PG_RETURN_NULL();
+
+	numerator = N * sumXY - sumX * sumY;
+
+	PG_RETURN_FLOAT8(numerator / (N * (N - 1.0)));
+}
+
+Datum
+float8_corr(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N, sumX, sumX2, sumY, sumY2, sumXY, numeratorX,
+		numeratorY, numeratorXY;
+
+	transvalues = check_float8_array(transarray, "float8_corr", 6);
+	N = transvalues[0];
+	sumX = transvalues[1];
+	sumX2 = transvalues[2];
+	sumY = transvalues[3];
+	sumY2 = transvalues[4];
+	sumXY = transvalues[5];
+
+	/* if N is 0 we should return NULL */
+	if (N < 1.0)
+		PG_RETURN_NULL();
+
+	numeratorX = N * sumX2 - sumX * sumX;
+	numeratorY = N * sumY2 - sumY * sumY;
+	numeratorXY = N * sumXY - sumX * sumY;
+	if (numeratorX <= 0 || numeratorY <= 0)
+		PG_RETURN_NULL();
+	
+	PG_RETURN_FLOAT8(sqrt((numeratorXY * numeratorXY) /
+						  (numeratorX * numeratorY)));
+}
+
+Datum
+float8_regr_r2(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N, sumX, sumX2, sumY, sumY2, sumXY, numeratorX,
+		numeratorY, numeratorXY;
+
+	transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
+	N = transvalues[0];
+	sumX = transvalues[1];
+	sumX2 = transvalues[2];
+	sumY = transvalues[3];
+	sumY2 = transvalues[4];
+	sumXY = transvalues[5];
+
+	/* if N is 0 we should return NULL */
+	if (N < 1.0)
+		PG_RETURN_NULL();
+
+	numeratorX = N * sumX2 - sumX * sumX;
+	numeratorY = N * sumY2 - sumY * sumY;
+	numeratorXY = N * sumXY - sumX * sumY;
+	if (numeratorX <= 0)
+		PG_RETURN_NULL();
+	/* per spec, horizontal line produces 1.0 */
+	if (numeratorY <= 0)
+		PG_RETURN_FLOAT8(1.0);
+
+	PG_RETURN_FLOAT8((numeratorXY * numeratorXY) /
+					 (numeratorX * numeratorY));
+}
+
+Datum
+float8_regr_slope(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N, sumX, sumX2, sumY, sumXY, numeratorX,
+		numeratorXY;
+
+	transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
+	N = transvalues[0];
+	sumX = transvalues[1];
+	sumX2 = transvalues[2];
+	sumY = transvalues[3];
+	sumXY = transvalues[5];
+
+	/* if N is 0 we should return NULL */
+	if (N < 1.0)
+		PG_RETURN_NULL();
+
+	numeratorX = N * sumX2 - sumX * sumX;
+	numeratorXY = N * sumXY - sumX * sumY;
+	if (numeratorX <= 0)
+		PG_RETURN_NULL();
+	
+	PG_RETURN_FLOAT8(numeratorXY / numeratorX);
+}
+
+Datum
+float8_regr_intercept(PG_FUNCTION_ARGS)
+{
+	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+	float8	   *transvalues;
+	float8		N, sumX, sumX2, sumY, sumXY, numeratorX,
+		numeratorXXY;
+
+	transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
+	N = transvalues[0];
+	sumX = transvalues[1];
+	sumX2 = transvalues[2];
+	sumY = transvalues[3];
+	sumXY = transvalues[5];
+
+	/* if N is 0 we should return NULL */
+	if (N < 1.0)
+		PG_RETURN_NULL();
+
+	numeratorX = N * sumX2 - sumX * sumX;
+	numeratorXXY = sumY * sumX2 - sumX * sumXY;
+	if (numeratorX <= 0)
+		PG_RETURN_NULL();
+	
+	PG_RETURN_FLOAT8(numeratorXXY / numeratorX);
+}
+

 /*
 *		====================================
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.60 2006/03/05 15:58:42 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.61 2006/07/28 18:33:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -694,6 +694,28 @@ int8inc(PG_FUNCTION_ARGS)
 	}
 }

+/*
+ * These functions are exactly like int8inc but are used for aggregates that
+ * count only non-null values.  Since the functions are declared strict,
+ * the null checks happen before we ever get here, and all we need do is
+ * increment the state value.  We could actually make these pg_proc entries
+ * point right at int8inc, but then the opr_sanity regression test would
+ * complain about mismatched entries for a built-in function.
+ */
+
+Datum
+int8inc_any(PG_FUNCTION_ARGS)
+{
+	return int8inc(fcinfo);
+}
+
+Datum
+int8inc_float8_float8(PG_FUNCTION_ARGS)
+{
+	return int8inc(fcinfo);
+}
+
+
 Datum
 int8larger(PG_FUNCTION_ARGS)
 {
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@ -37,7 +37,7 @@
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.342 2006/07/27 19:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.343 2006/07/28 18:33:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -53,6 +53,6 @@
 */

 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200607271
+#define CATALOG_VERSION_NO	200607281

 #endif
--- a/src/include/catalog/pg_aggregate.h
+++ b/src/include/catalog/pg_aggregate.h
@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/catalog/pg_aggregate.h,v 1.56 2006/07/27 19:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_aggregate.h,v 1.57 2006/07/28 18:33:04 tgl Exp $
 *
 * NOTES
 *	  the genbki.sh script reads this file and generates .bki
@ -192,6 +192,20 @@ DATA(insert ( 2157	float4_accum	float8_stddev_samp	0	1022	"{0,0,0}" ));
 DATA(insert ( 2158	float8_accum	float8_stddev_samp	0	1022	"{0,0,0}" ));
 DATA(insert ( 2159	numeric_accum	numeric_stddev_samp	0	1231	"{0,0,0}" ));

+/* SQL2003 binary regression aggregates */
+DATA(insert ( 2818	int8inc_float8_float8		-				0	20		"0" ));
+DATA(insert ( 2819	float8_regr_accum	float8_regr_sxx			0	1022	"{0,0,0,0,0,0}" ));
+DATA(insert ( 2820	float8_regr_accum	float8_regr_syy			0	1022	"{0,0,0,0,0,0}" ));
+DATA(insert ( 2821	float8_regr_accum	float8_regr_sxy			0	1022	"{0,0,0,0,0,0}" ));
+DATA(insert ( 2822	float8_regr_accum	float8_regr_avgx		0	1022	"{0,0,0,0,0,0}" ));
+DATA(insert ( 2823	float8_regr_accum	float8_regr_avgy		0	1022	"{0,0,0,0,0,0}" ));
+DATA(insert ( 2824	float8_regr_accum	float8_regr_r2			0	1022	"{0,0,0,0,0,0}" ));
+DATA(insert ( 2825	float8_regr_accum	float8_regr_slope		0	1022	"{0,0,0,0,0,0}" ));
+DATA(insert ( 2826	float8_regr_accum	float8_regr_intercept	0	1022	"{0,0,0,0,0,0}" ));
+DATA(insert ( 2827	float8_regr_accum	float8_covar_pop		0	1022	"{0,0,0,0,0,0}" ));
+DATA(insert ( 2828	float8_regr_accum	float8_covar_samp		0	1022	"{0,0,0,0,0,0}" ));
+DATA(insert ( 2829	float8_regr_accum	float8_corr				0	1022	"{0,0,0,0,0,0}" ));
+
 /* boolean-and and boolean-or */
 DATA(insert ( 2517	booland_statefunc	-			0	16		_null_ ));
 DATA(insert ( 2518	boolor_statefunc	-			0	16		_null_ ));
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.418 2006/07/27 19:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.419 2006/07/28 18:33:04 tgl Exp $
 *
 * NOTES
 *	  The script catalog/genbki.sh reads this file and generates .bki
@ -1534,7 +1534,7 @@ DESCR("truncate interval to specified units");

 DATA(insert OID = 1219 (  int8inc		   PGNSP PGUID 12 f f t f i 1 20 "20" _null_ _null_ _null_	int8inc - _null_ ));
 DESCR("increment");
-DATA(insert OID = 2804 (  int8inc_any	   PGNSP PGUID 12 f f t f i 2 20 "20 2276" _null_ _null_ _null_	int8inc - _null_ ));
+DATA(insert OID = 2804 (  int8inc_any	   PGNSP PGUID 12 f f t f i 2 20 "20 2276" _null_ _null_ _null_	int8inc_any - _null_ ));
 DESCR("increment, ignores second argument");
 DATA(insert OID = 1230 (  int8abs		   PGNSP PGUID 12 f f t f i 1 20 "20" _null_ _null_ _null_	int8abs - _null_ ));
 DESCR("absolute value");
@ -2730,6 +2730,32 @@ DATA(insert OID = 1963 (  int4_avg_accum   PGNSP PGUID 12 f f t f i 2 1016 "1016
 DESCR("AVG(int4) transition function");
 DATA(insert OID = 1964 (  int8_avg		   PGNSP PGUID 12 f f t f i 1 1700 "1016" _null_ _null_ _null_	int8_avg - _null_ ));
 DESCR("AVG(int) aggregate final function");
+DATA(insert OID = 2805 (  int8inc_float8_float8		PGNSP PGUID 12 f f t f i 3 20 "20 701 701" _null_ _null_ _null_ int8inc_float8_float8 - _null_ ));
+DESCR("REGR_COUNT(double, double) transition function");
+DATA(insert OID = 2806 (  float8_regr_accum			PGNSP PGUID 12 f f t f i 3 1022 "1022 701 701" _null_ _null_ _null_ float8_regr_accum - _null_ ));
+DESCR("REGR_...(double, double) transition function");
+DATA(insert OID = 2807 (  float8_regr_sxx			PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_sxx - _null_ ));
+DESCR("REGR_SXX(double, double) aggregate final function");
+DATA(insert OID = 2808 (  float8_regr_syy			PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_syy - _null_ ));
+DESCR("REGR_SYY(double, double) aggregate final function");
+DATA(insert OID = 2809 (  float8_regr_sxy			PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_sxy - _null_ ));
+DESCR("REGR_SXY(double, double) aggregate final function");
+DATA(insert OID = 2810 (  float8_regr_avgx			PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_avgx - _null_ ));
+DESCR("REGR_AVGX(double, double) aggregate final function");
+DATA(insert OID = 2811 (  float8_regr_avgy			PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_avgy - _null_ ));
+DESCR("REGR_AVGY(double, double) aggregate final function");
+DATA(insert OID = 2812 (  float8_regr_r2			PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_r2 - _null_ ));
+DESCR("REGR_R2(double, double) aggregate final function");
+DATA(insert OID = 2813 (  float8_regr_slope			PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_slope - _null_ ));
+DESCR("REGR_SLOPE(double, double) aggregate final function");
+DATA(insert OID = 2814 (  float8_regr_intercept		PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_regr_intercept - _null_ ));
+DESCR("REGR_INTERCEPT(double, double) aggregate final function");
+DATA(insert OID = 2815 (  float8_covar_pop			PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_covar_pop - _null_ ));
+DESCR("COVAR_POP(double, double) aggregate final function");
+DATA(insert OID = 2816 (  float8_covar_samp			PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_covar_samp - _null_ ));
+DESCR("COVAR_SAMP(double, double) aggregate final function");
+DATA(insert OID = 2817 (  float8_corr				PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_corr - _null_ ));
+DESCR("CORR(double, double) aggregate final function");

 /* To ASCII conversion */
 DATA(insert OID = 1845 ( to_ascii	PGNSP PGUID 12 f f t f i 1	25 "25" _null_ _null_ _null_	to_ascii_default - _null_ ));
@ -3196,6 +3222,20 @@ DATA(insert OID = 2157 (  stddev			PGNSP PGUID 12 t f f f i 1 701 "700" _null_ _
 DATA(insert OID = 2158 (  stddev			PGNSP PGUID 12 t f f f i 1 701 "701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
 DATA(insert OID = 2159 (  stddev			PGNSP PGUID 12 t f f f i 1 1700 "1700" _null_ _null_ _null_ aggregate_dummy - _null_ ));

+DATA(insert OID = 2818 (  regr_count		PGNSP PGUID 12 t f f f i 2 20 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2819 (  regr_sxx			PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2820 (  regr_syy			PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2821 (  regr_sxy			PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2822 (  regr_avgx			PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2823 (  regr_avgy			PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2824 (  regr_r2			PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2825 (  regr_slope		PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2826 (  regr_intercept	PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+
+DATA(insert OID = 2827 (  covar_pop			PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2828 (  covar_samp		PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+DATA(insert OID = 2829 (  corr				PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+
 DATA(insert OID = 2160 ( text_pattern_lt	 PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_lt - _null_ ));
 DATA(insert OID = 2161 ( text_pattern_le	 PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_le - _null_ ));
 DATA(insert OID = 2162 ( text_pattern_eq	 PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_eq - _null_ ));
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.280 2006/07/21 20:51:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.281 2006/07/28 18:33:04 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -348,6 +348,18 @@ extern Datum float8_var_pop(PG_FUNCTION_ARGS);
 extern Datum float8_var_samp(PG_FUNCTION_ARGS);
 extern Datum float8_stddev_pop(PG_FUNCTION_ARGS);
 extern Datum float8_stddev_samp(PG_FUNCTION_ARGS);
+extern Datum float8_regr_accum(PG_FUNCTION_ARGS);
+extern Datum float8_regr_sxx(PG_FUNCTION_ARGS);
+extern Datum float8_regr_syy(PG_FUNCTION_ARGS);
+extern Datum float8_regr_sxy(PG_FUNCTION_ARGS);
+extern Datum float8_regr_avgx(PG_FUNCTION_ARGS);
+extern Datum float8_regr_avgy(PG_FUNCTION_ARGS);
+extern Datum float8_covar_pop(PG_FUNCTION_ARGS);
+extern Datum float8_covar_samp(PG_FUNCTION_ARGS);
+extern Datum float8_corr(PG_FUNCTION_ARGS);
+extern Datum float8_regr_r2(PG_FUNCTION_ARGS);
+extern Datum float8_regr_slope(PG_FUNCTION_ARGS);
+extern Datum float8_regr_intercept(PG_FUNCTION_ARGS);
 extern Datum float48pl(PG_FUNCTION_ARGS);
 extern Datum float48mi(PG_FUNCTION_ARGS);
 extern Datum float48mul(PG_FUNCTION_ARGS);
--- a/src/include/utils/int8.h
+++ b/src/include/utils/int8.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/int8.h,v 1.44 2006/03/05 15:59:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/int8.h,v 1.45 2006/07/28 18:33:04 tgl Exp $
 *
 * NOTES
 * These data types are supported on all 64-bit architectures, and may
@ -74,6 +74,8 @@ extern Datum int8div(PG_FUNCTION_ARGS);
 extern Datum int8abs(PG_FUNCTION_ARGS);
 extern Datum int8mod(PG_FUNCTION_ARGS);
 extern Datum int8inc(PG_FUNCTION_ARGS);
+extern Datum int8inc_any(PG_FUNCTION_ARGS);
+extern Datum int8inc_float8_float8(PG_FUNCTION_ARGS);
 extern Datum int8larger(PG_FUNCTION_ARGS);
 extern Datum int8smaller(PG_FUNCTION_ARGS);

--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@ -137,6 +137,61 @@ SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
          0 |            
 (1 row)

+-- SQL2003 binary aggregates
+SELECT regr_count(b, a) FROM aggtest;
+ regr_count 
+------------
+          4
+(1 row)
+
+SELECT regr_sxx(b, a) FROM aggtest;
+ regr_sxx 
+----------
+     5099
+(1 row)
+
+SELECT regr_syy(b, a) FROM aggtest;
+     regr_syy     
+------------------
+ 68756.2156939293
+(1 row)
+
+SELECT regr_sxy(b, a) FROM aggtest;
+     regr_sxy     
+------------------
+ 2614.51582155004
+(1 row)
+
+SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
+ regr_avgx |    regr_avgy     
+-----------+------------------
+      49.5 | 107.943152273074
+(1 row)
+
+SELECT regr_r2(b, a) FROM aggtest;
+      regr_r2       
+--------------------
+ 0.0194977982031803
+(1 row)
+
+SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
+    regr_slope     |  regr_intercept  
+-------------------+------------------
+ 0.512750700441271 | 82.5619926012309
+(1 row)
+
+SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
+    covar_pop    |    covar_samp    
+-----------------+------------------
+ 653.62895538751 | 871.505273850014
+(1 row)
+
+SELECT corr(b, a) FROM aggtest;
+       corr        
+-------------------
+ 0.139634516517873
+(1 row)
+
 SELECT count(four) AS cnt_1000 FROM onek;
 cnt_1000 
 ----------
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@ -66,15 +66,15 @@ WHERE p1.oid != p2.oid AND
 -- of the same internal function (ie, matching prosrc fields).  It's OK to
 -- have several entries with different pronames for the same internal function,
 -- but conflicts in the number of arguments and other critical items should
-- be complained of.
-- Ignore aggregates, since they all use "aggregate_dummy".
-- As of 8.2, this finds int8inc and int8inc_any, which are OK.
+-- be complained of.  (We don't check data types here; see next query.)
+-- Note: ignore aggregate functions here, since they all point to the same
+-- dummy built-in function.
 SELECT p1.oid, p1.proname, p2.oid, p2.proname
 FROM pg_proc AS p1, pg_proc AS p2
 WHERE p1.oid < p2.oid AND
    p1.prosrc = p2.prosrc AND
    p1.prolang = 12 AND p2.prolang = 12 AND
-    p1.proisagg = false AND p2.proisagg = false AND
+    (p1.proisagg = false OR p2.proisagg = false) AND
    (p1.prolang != p2.prolang OR
     p1.proisagg != p2.proisagg OR
     p1.prosecdef != p2.prosecdef OR
@ -82,10 +82,9 @@ WHERE p1.oid < p2.oid AND
     p1.proretset != p2.proretset OR
     p1.provolatile != p2.provolatile OR
     p1.pronargs != p2.pronargs);
- oid  | proname | oid  |   proname   
------+---------+------+-------------
- 1219 | int8inc | 2804 | int8inc_any
-(1 row)
+ oid | proname | oid | proname 
+-----+---------+-----+---------
+(0 rows)

 -- Look for uses of different type OIDs in the argument/result type fields
 -- for different aliases of the same built-in function.
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@ -39,6 +39,17 @@ SELECT var_samp(b::numeric) FROM aggtest;
 SELECT var_pop(1.0), var_samp(2.0);
 SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);

+-- SQL2003 binary aggregates
+SELECT regr_count(b, a) FROM aggtest;
+SELECT regr_sxx(b, a) FROM aggtest;
+SELECT regr_syy(b, a) FROM aggtest;
+SELECT regr_sxy(b, a) FROM aggtest;
+SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
+SELECT regr_r2(b, a) FROM aggtest;
+SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
+SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
+SELECT corr(b, a) FROM aggtest;
+
 SELECT count(four) AS cnt_1000 FROM onek;
 SELECT count(DISTINCT four) AS cnt_4 FROM onek;

--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@ -68,17 +68,16 @@ WHERE p1.oid != p2.oid AND
 -- of the same internal function (ie, matching prosrc fields).  It's OK to
 -- have several entries with different pronames for the same internal function,
 -- but conflicts in the number of arguments and other critical items should
-- be complained of.
-- Ignore aggregates, since they all use "aggregate_dummy".
-
-- As of 8.2, this finds int8inc and int8inc_any, which are OK.
+-- be complained of.  (We don't check data types here; see next query.)
+-- Note: ignore aggregate functions here, since they all point to the same
+-- dummy built-in function.

 SELECT p1.oid, p1.proname, p2.oid, p2.proname
 FROM pg_proc AS p1, pg_proc AS p2
 WHERE p1.oid < p2.oid AND
    p1.prosrc = p2.prosrc AND
    p1.prolang = 12 AND p2.prolang = 12 AND
-    p1.proisagg = false AND p2.proisagg = false AND
+    (p1.proisagg = false OR p2.proisagg = false) AND
    (p1.prolang != p2.prolang OR
     p1.proisagg != p2.proisagg OR
     p1.prosecdef != p2.prosecdef OR