mirror of https://github.com/postgres/postgres
Tag:
Branch:
Tree:
12bb35fc9b
REL2_0B
REL6_4
REL6_5_PATCHES
REL7_0_PATCHES
REL7_1_STABLE
REL7_2_STABLE
REL7_3_STABLE
REL7_4_STABLE
REL8_0_STABLE
REL8_1_STABLE
REL8_2_STABLE
REL8_3_STABLE
REL8_4_STABLE
REL8_5_ALPHA1_BRANCH
REL8_5_ALPHA2_BRANCH
REL8_5_ALPHA3_BRANCH
REL9_0_ALPHA4_BRANCH
REL9_0_ALPHA5_BRANCH
REL9_0_STABLE
REL9_1_STABLE
REL9_2_STABLE
REL9_3_STABLE
REL9_4_STABLE
REL9_5_STABLE
REL9_6_STABLE
REL_10_STABLE
REL_11_STABLE
REL_12_STABLE
REL_13_STABLE
REL_14_STABLE
REL_15_STABLE
REL_16_STABLE
REL_17_STABLE
REL_18_STABLE
Release_1_0_3
WIN32_DEV
ecpg_big_bison
master
PG95-1_01
PG95-1_08
PG95-1_09
REL2_0
REL6_1
REL6_1_1
REL6_2
REL6_2_1
REL6_3
REL6_3_2
REL6_4_2
REL6_5
REL6_5_1
REL6_5_2
REL6_5_3
REL7_0
REL7_0_2
REL7_0_3
REL7_1
REL7_1_1
REL7_1_2
REL7_1_3
REL7_1_BETA
REL7_1_BETA2
REL7_1_BETA3
REL7_2
REL7_2_1
REL7_2_2
REL7_2_3
REL7_2_4
REL7_2_5
REL7_2_6
REL7_2_7
REL7_2_8
REL7_2_BETA1
REL7_2_BETA2
REL7_2_BETA3
REL7_2_BETA4
REL7_2_BETA5
REL7_2_RC1
REL7_2_RC2
REL7_3
REL7_3_1
REL7_3_10
REL7_3_11
REL7_3_12
REL7_3_13
REL7_3_14
REL7_3_15
REL7_3_16
REL7_3_17
REL7_3_18
REL7_3_19
REL7_3_2
REL7_3_20
REL7_3_21
REL7_3_3
REL7_3_4
REL7_3_5
REL7_3_6
REL7_3_7
REL7_3_8
REL7_3_9
REL7_4
REL7_4_1
REL7_4_10
REL7_4_11
REL7_4_12
REL7_4_13
REL7_4_14
REL7_4_15
REL7_4_16
REL7_4_17
REL7_4_18
REL7_4_19
REL7_4_2
REL7_4_20
REL7_4_21
REL7_4_22
REL7_4_23
REL7_4_24
REL7_4_25
REL7_4_26
REL7_4_27
REL7_4_28
REL7_4_29
REL7_4_3
REL7_4_30
REL7_4_4
REL7_4_5
REL7_4_6
REL7_4_7
REL7_4_8
REL7_4_9
REL7_4_BETA1
REL7_4_BETA2
REL7_4_BETA3
REL7_4_BETA4
REL7_4_BETA5
REL7_4_RC1
REL7_4_RC2
REL8_0_0
REL8_0_0BETA1
REL8_0_0BETA2
REL8_0_0BETA3
REL8_0_0BETA4
REL8_0_0BETA5
REL8_0_0RC1
REL8_0_0RC2
REL8_0_0RC3
REL8_0_0RC4
REL8_0_0RC5
REL8_0_1
REL8_0_10
REL8_0_11
REL8_0_12
REL8_0_13
REL8_0_14
REL8_0_15
REL8_0_16
REL8_0_17
REL8_0_18
REL8_0_19
REL8_0_2
REL8_0_20
REL8_0_21
REL8_0_22
REL8_0_23
REL8_0_24
REL8_0_25
REL8_0_26
REL8_0_3
REL8_0_4
REL8_0_5
REL8_0_6
REL8_0_7
REL8_0_8
REL8_0_9
REL8_1_0
REL8_1_0BETA1
REL8_1_0BETA2
REL8_1_0BETA3
REL8_1_0BETA4
REL8_1_0RC1
REL8_1_1
REL8_1_10
REL8_1_11
REL8_1_12
REL8_1_13
REL8_1_14
REL8_1_15
REL8_1_16
REL8_1_17
REL8_1_18
REL8_1_19
REL8_1_2
REL8_1_20
REL8_1_21
REL8_1_22
REL8_1_23
REL8_1_3
REL8_1_4
REL8_1_5
REL8_1_6
REL8_1_7
REL8_1_8
REL8_1_9
REL8_2_0
REL8_2_1
REL8_2_10
REL8_2_11
REL8_2_12
REL8_2_13
REL8_2_14
REL8_2_15
REL8_2_16
REL8_2_17
REL8_2_18
REL8_2_19
REL8_2_2
REL8_2_20
REL8_2_21
REL8_2_22
REL8_2_23
REL8_2_3
REL8_2_4
REL8_2_5
REL8_2_6
REL8_2_7
REL8_2_8
REL8_2_9
REL8_2_BETA1
REL8_2_BETA2
REL8_2_BETA3
REL8_2_RC1
REL8_3_0
REL8_3_1
REL8_3_10
REL8_3_11
REL8_3_12
REL8_3_13
REL8_3_14
REL8_3_15
REL8_3_16
REL8_3_17
REL8_3_18
REL8_3_19
REL8_3_2
REL8_3_20
REL8_3_21
REL8_3_22
REL8_3_23
REL8_3_3
REL8_3_4
REL8_3_5
REL8_3_6
REL8_3_7
REL8_3_8
REL8_3_9
REL8_3_BETA1
REL8_3_BETA2
REL8_3_BETA3
REL8_3_BETA4
REL8_3_RC1
REL8_3_RC2
REL8_4_0
REL8_4_1
REL8_4_10
REL8_4_11
REL8_4_12
REL8_4_13
REL8_4_14
REL8_4_15
REL8_4_16
REL8_4_17
REL8_4_18
REL8_4_19
REL8_4_2
REL8_4_20
REL8_4_21
REL8_4_22
REL8_4_3
REL8_4_4
REL8_4_5
REL8_4_6
REL8_4_7
REL8_4_8
REL8_4_9
REL8_4_BETA1
REL8_4_BETA2
REL8_4_RC1
REL8_4_RC2
REL8_5_ALPHA1
REL8_5_ALPHA2
REL8_5_ALPHA3
REL9_0_0
REL9_0_1
REL9_0_10
REL9_0_11
REL9_0_12
REL9_0_13
REL9_0_14
REL9_0_15
REL9_0_16
REL9_0_17
REL9_0_18
REL9_0_19
REL9_0_2
REL9_0_20
REL9_0_21
REL9_0_22
REL9_0_23
REL9_0_3
REL9_0_4
REL9_0_5
REL9_0_6
REL9_0_7
REL9_0_8
REL9_0_9
REL9_0_ALPHA4
REL9_0_ALPHA5
REL9_0_BETA1
REL9_0_BETA2
REL9_0_BETA3
REL9_0_BETA4
REL9_0_RC1
REL9_1_0
REL9_1_1
REL9_1_10
REL9_1_11
REL9_1_12
REL9_1_13
REL9_1_14
REL9_1_15
REL9_1_16
REL9_1_17
REL9_1_18
REL9_1_19
REL9_1_2
REL9_1_20
REL9_1_21
REL9_1_22
REL9_1_23
REL9_1_24
REL9_1_3
REL9_1_4
REL9_1_5
REL9_1_6
REL9_1_7
REL9_1_8
REL9_1_9
REL9_1_ALPHA1
REL9_1_ALPHA2
REL9_1_ALPHA3
REL9_1_ALPHA4
REL9_1_ALPHA5
REL9_1_BETA1
REL9_1_BETA2
REL9_1_BETA3
REL9_1_RC1
REL9_2_0
REL9_2_1
REL9_2_10
REL9_2_11
REL9_2_12
REL9_2_13
REL9_2_14
REL9_2_15
REL9_2_16
REL9_2_17
REL9_2_18
REL9_2_19
REL9_2_2
REL9_2_20
REL9_2_21
REL9_2_22
REL9_2_23
REL9_2_24
REL9_2_3
REL9_2_4
REL9_2_5
REL9_2_6
REL9_2_7
REL9_2_8
REL9_2_9
REL9_2_BETA1
REL9_2_BETA2
REL9_2_BETA3
REL9_2_BETA4
REL9_2_RC1
REL9_3_0
REL9_3_1
REL9_3_10
REL9_3_11
REL9_3_12
REL9_3_13
REL9_3_14
REL9_3_15
REL9_3_16
REL9_3_17
REL9_3_18
REL9_3_19
REL9_3_2
REL9_3_20
REL9_3_21
REL9_3_22
REL9_3_23
REL9_3_24
REL9_3_25
REL9_3_3
REL9_3_4
REL9_3_5
REL9_3_6
REL9_3_7
REL9_3_8
REL9_3_9
REL9_3_BETA1
REL9_3_BETA2
REL9_3_RC1
REL9_4_0
REL9_4_1
REL9_4_10
REL9_4_11
REL9_4_12
REL9_4_13
REL9_4_14
REL9_4_15
REL9_4_16
REL9_4_17
REL9_4_18
REL9_4_19
REL9_4_2
REL9_4_20
REL9_4_21
REL9_4_22
REL9_4_23
REL9_4_24
REL9_4_25
REL9_4_26
REL9_4_3
REL9_4_4
REL9_4_5
REL9_4_6
REL9_4_7
REL9_4_8
REL9_4_9
REL9_4_BETA1
REL9_4_BETA2
REL9_4_BETA3
REL9_4_RC1
REL9_5_0
REL9_5_1
REL9_5_10
REL9_5_11
REL9_5_12
REL9_5_13
REL9_5_14
REL9_5_15
REL9_5_16
REL9_5_17
REL9_5_18
REL9_5_19
REL9_5_2
REL9_5_20
REL9_5_21
REL9_5_22
REL9_5_23
REL9_5_24
REL9_5_25
REL9_5_3
REL9_5_4
REL9_5_5
REL9_5_6
REL9_5_7
REL9_5_8
REL9_5_9
REL9_5_ALPHA1
REL9_5_ALPHA2
REL9_5_BETA1
REL9_5_BETA2
REL9_5_RC1
REL9_6_0
REL9_6_1
REL9_6_10
REL9_6_11
REL9_6_12
REL9_6_13
REL9_6_14
REL9_6_15
REL9_6_16
REL9_6_17
REL9_6_18
REL9_6_19
REL9_6_2
REL9_6_20
REL9_6_21
REL9_6_22
REL9_6_23
REL9_6_24
REL9_6_3
REL9_6_4
REL9_6_5
REL9_6_6
REL9_6_7
REL9_6_8
REL9_6_9
REL9_6_BETA1
REL9_6_BETA2
REL9_6_BETA3
REL9_6_BETA4
REL9_6_RC1
REL_10_0
REL_10_1
REL_10_10
REL_10_11
REL_10_12
REL_10_13
REL_10_14
REL_10_15
REL_10_16
REL_10_17
REL_10_18
REL_10_19
REL_10_2
REL_10_20
REL_10_21
REL_10_22
REL_10_23
REL_10_3
REL_10_4
REL_10_5
REL_10_6
REL_10_7
REL_10_8
REL_10_9
REL_10_BETA1
REL_10_BETA2
REL_10_BETA3
REL_10_BETA4
REL_10_RC1
REL_11_0
REL_11_1
REL_11_10
REL_11_11
REL_11_12
REL_11_13
REL_11_14
REL_11_15
REL_11_16
REL_11_17
REL_11_18
REL_11_19
REL_11_2
REL_11_20
REL_11_21
REL_11_22
REL_11_3
REL_11_4
REL_11_5
REL_11_6
REL_11_7
REL_11_8
REL_11_9
REL_11_BETA1
REL_11_BETA2
REL_11_BETA3
REL_11_BETA4
REL_11_RC1
REL_12_0
REL_12_1
REL_12_10
REL_12_11
REL_12_12
REL_12_13
REL_12_14
REL_12_15
REL_12_16
REL_12_17
REL_12_18
REL_12_19
REL_12_2
REL_12_20
REL_12_21
REL_12_22
REL_12_3
REL_12_4
REL_12_5
REL_12_6
REL_12_7
REL_12_8
REL_12_9
REL_12_BETA1
REL_12_BETA2
REL_12_BETA3
REL_12_BETA4
REL_12_RC1
REL_13_0
REL_13_1
REL_13_10
REL_13_11
REL_13_12
REL_13_13
REL_13_14
REL_13_15
REL_13_16
REL_13_17
REL_13_18
REL_13_19
REL_13_2
REL_13_20
REL_13_21
REL_13_22
REL_13_23
REL_13_3
REL_13_4
REL_13_5
REL_13_6
REL_13_7
REL_13_8
REL_13_9
REL_13_BETA1
REL_13_BETA2
REL_13_BETA3
REL_13_RC1
REL_14_0
REL_14_1
REL_14_10
REL_14_11
REL_14_12
REL_14_13
REL_14_14
REL_14_15
REL_14_16
REL_14_17
REL_14_18
REL_14_19
REL_14_2
REL_14_20
REL_14_3
REL_14_4
REL_14_5
REL_14_6
REL_14_7
REL_14_8
REL_14_9
REL_14_BETA1
REL_14_BETA2
REL_14_BETA3
REL_14_RC1
REL_15_0
REL_15_1
REL_15_10
REL_15_11
REL_15_12
REL_15_13
REL_15_14
REL_15_15
REL_15_2
REL_15_3
REL_15_4
REL_15_5
REL_15_6
REL_15_7
REL_15_8
REL_15_9
REL_15_BETA1
REL_15_BETA2
REL_15_BETA3
REL_15_BETA4
REL_15_RC1
REL_15_RC2
REL_16_0
REL_16_1
REL_16_10
REL_16_11
REL_16_2
REL_16_3
REL_16_4
REL_16_5
REL_16_6
REL_16_7
REL_16_8
REL_16_9
REL_16_BETA1
REL_16_BETA2
REL_16_BETA3
REL_16_RC1
REL_17_0
REL_17_1
REL_17_2
REL_17_3
REL_17_4
REL_17_5
REL_17_6
REL_17_7
REL_17_BETA1
REL_17_BETA2
REL_17_BETA3
REL_17_RC1
REL_18_0
REL_18_1
REL_18_BETA1
REL_18_BETA2
REL_18_BETA3
REL_18_RC1
Release_1_0_2
Release_2_0
Release_2_0_0
release-6-3
${ noResults }
156 Commits (12bb35fc9b000d462b9bd6b8856e1884ef1bb3d7)
| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
5e1963fb76 |
Collations with nondeterministic comparison
This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com |
7 years ago |
|
|
97c39498e5 |
Update copyright for 2019
Backpatch-through: certain files through 9.4 |
7 years ago |
|
|
41c912cad1 |
Clean up warnings from -Wimplicit-fallthrough.
Recent gcc can warn about switch-case fall throughs that are not explicitly labeled as intentional. This seems like a good thing, so clean up the warnings exposed thereby by labeling all such cases with comments that gcc will recognize. In files that already had one or more suitable comments, I generally matched the existing style of those. Otherwise I went with /* FALLTHROUGH */, which is one of the spellings approved at the more-restrictive-than-default level -Wimplicit-fallthrough=4. (At the default level you can also spell it /* FALL ?THRU */, and it's not picky about case. What you can't do is include additional text in the same comment, so some existing comments containing versions of this aren't good enough.) Testing with gcc 8.0.1 (Fedora 28's current version), I found that I also had to put explicit "break"s after elog(ERROR) or ereport(ERROR); apparently, for this purpose gcc doesn't recognize that those don't return. That seems like possibly a gcc bug, but it's fine because in most places we did that anyway; so this amounts to a visit from the style police. Discussion: https://postgr.es/m/15083.1525207729@sss.pgh.pa.us |
8 years ago |
|
|
9d4649ca49 |
Update copyright for 2018
Backpatch-through: certain files through 9.3 |
8 years ago |
|
|
ed87e19807 |
Mop-up for commit 85feb77aa0.
Adjust commentary in regc_pg_locale.c to remove mention of the possibility of not having <wctype.h> functions, since we no longer consider that. Eliminate duplicate code in wparser_def.c by generalizing the p_iswhat macro to take a parameter saying what to return for non-ASCII chars in C locale. (That's not really a consequence of the USE_WIDE_UPPER_LOWER-ectomy, but I noticed it while doing that.) |
8 years ago |
|
|
85feb77aa0 |
Assume wcstombs(), towlower(), and sibling functions are always present.
These functions are required by SUS v2, which is our minimum baseline
for Unix platforms, and are present on all interesting Windows versions
as well. Even our oldest buildfarm members have them. Thus, we were not
testing the "!USE_WIDE_UPPER_LOWER" code paths, which explains why the bug
fixed in commit
|
8 years ago |
|
|
c7b8998ebb |
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit
|
9 years ago |
|
|
e3860ffa4d |
Initial pgindent run with pg_bsd_indent version 2.0.
The new indent version includes numerous fixes thanks to Piotr Stefaniak. The main changes visible in this commit are: * Nicer formatting of function-pointer declarations. * No longer unexpectedly removes spaces in expressions using casts, sizeof, or offsetof. * No longer wants to add a space in "struct structname *varname", as well as some similar cases for const- or volatile-qualified pointers. * Declarations using PG_USED_FOR_ASSERTS_ONLY are formatted more nicely. * Fixes bug where comments following declarations were sometimes placed with no space separating them from the code. * Fixes some odd decisions for comments following case labels. * Fixes some cases where comments following code were indented to less than the expected column 33. On the less good side, it now tends to put more whitespace around typedef names that are not listed in typedefs.list. This might encourage us to put more effort into typedef name collection; it's not really a bug in indent itself. There are more changes coming after this round, having to do with comment indentation and alignment of lines appearing within parentheses. I wanted to limit the size of the diffs to something that could be reviewed without one's eyes completely glazing over, so it seemed better to split up the changes as much as practical. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us |
9 years ago |
|
|
6cfaffc0dd |
Fix regexport.c to behave sanely with lookaround constraints.
regexport.c thought it could just ignore LACON arcs, but the correct
behavior is to treat them as satisfiable while consuming zero input
(rather reminiscently of commit
|
9 years ago |
|
|
eccfef81e1 |
ICU support
Add a column collprovider to pg_collation that determines which library provides the collation data. The existing choices are default and libc, and this adds an icu choice, which uses the ICU4C library. The pg_locale_t type is changed to a union that contains the provider-specific locale handles. Users of locale information are changed to look into that struct for the appropriate handle to use. Also add a collversion column that records the version of the collation when it is created, and check at run time whether it is still the same. This detects potentially incompatible library upgrades that can corrupt indexes and other structures. This is currently only supported by ICU-provided collations. initdb initializes the default collation set as before from the `locale -a` output but also adds all available ICU locales with a "-x-icu" appended. Currently, ICU-provided collations can only be explicitly named collations. The global database locales are still always libc-provided. ICU support is enabled by configure --with-icu. Reviewed-by: Thomas Munro <thomas.munro@enterprisedb.com> Reviewed-by: Andreas Karlsson <andreas@proxel.se> |
9 years ago |
|
|
f97a028d8e |
Spelling fixes in code comments
From: Josh Soref <jsoref@gmail.com> |
9 years ago |
|
|
d6b059ec74 |
Document intentional violations of header inclusion policy.
Although there are good reasons for our policy of including postgres.h as the first #include in every .c file, never from .h files, there are two places where it seems expedient to violate the policy because the alternative is to modify externally-supplied .c files. (In the case of the regexp library, the idea that it's externally-supplied is kind of at odds with reality, but I haven't entirely given up hope that it will become a standalone project some day.) Add some comments to make it explicit that this is a policy violation and provide the reasoning. In passing, move #include "miscadmin.h" out of regcomp.c and into regcustom.h, which is where it should be if we're taking this reasoning seriously at all. Discussion: https://postgr.es/m/CAEepm=2zCoeq3QxVwhS5DFeUh=yU6z81pbWMgfOB8OzyiBwxzw@mail.gmail.com Discussion: https://postgr.es/m/11634.1488932128@sss.pgh.pa.us |
9 years ago |
|
|
1d25779284 |
Update copyright via script for 2017
|
9 years ago |
|
|
c54159d44c |
Make locale-dependent regex character classes work for large char codes.
Previously, we failed to recognize Unicode characters above U+7FF as being members of locale-dependent character classes such as [[:alpha:]]. (Actually, the same problem occurs for large pg_wchar values in any multibyte encoding, but UTF8 is the only case people have actually complained about.) It's impractical to get Spencer's original code to handle character classes or ranges containing many thousands of characters, because it insists on considering each member character individually at regex compile time, whether or not the character will ever be of interest at run time. To fix, choose a cutoff point MAX_SIMPLE_CHR below which we process characters individually as before, and deal with entire ranges or classes as single entities above that. We can actually make things cheaper than before for chars below the cutoff, because the color map can now be a simple linear array for those chars, rather than the multilevel tree structure Spencer designed. It's more expensive than before for chars above the cutoff, because we must do a binary search in a list of high chars and char ranges used in the regex pattern, plus call iswalpha() and friends for each locale-dependent character class used in the pattern. However, multibyte encodings are normally designed to give smaller codes to popular characters, so that we can expect that the slow path will be taken relatively infrequently. In any case, the speed penalty appears minor except when we have to apply iswalpha() etc. to high character codes at runtime --- and the previous coding gave wrong answers for those cases, so whether it was faster is moot. Tom Lane, reviewed by Heikki Linnakangas Discussion: <15563.1471913698@sss.pgh.pa.us> |
9 years ago |
|
|
a859e64003 |
Clean up another pre-ANSI-C-ism in regex code: get rid of pcolor typedef.
pcolor was used to represent function arguments that are nominally of type color, but when using a pre-ANSI C compiler would be passed as the promoted integer type. We really don't need that anymore. |
9 years ago |
|
|
6eefd2422e |
Remove typedef celt from the regex library, along with macro NOCELT.
The regex library used to have a notion of a "collating element" that was distinct from a "character", but Henry Spencer never actually implemented his planned support for multi-character collating elements, and the Tcl crew ripped out most of the stubs for that years ago. The only thing left that distinguished the "celt" typedef from the "chr" typedef was that "celt" was supposed to also be able to hold the not-a-character "NOCELT" value. However, NOCELT was not used anywhere after the MCCE stub removal changes, which means there's no need for celt to be different from chr. Removing the separate typedef simplifies matters and also removes a trap for the unwary, in that celt is signed while chr may not be, so comparisons could mean different things. There's no bug there today because we restrict CHR_MAX to be less than INT_MAX, but I think there may have been such bugs before we did that, and there could be again if anyone ever decides to fool with the range of chr. This patch also removes assorted unnecessary casts to "chr" of values that are already chrs. Many of these seem to be leftover from days when the code was compatible with pre-ANSI C. |
9 years ago |
|
|
8c95ae81fa |
Suppress compiler warnings about useless comparison of unsigned to zero.
Reportedly, some compilers warn about tests like "c < 0" if c is unsigned,
and hence complain about the character range checks I added in commit
|
10 years ago |
|
|
3bb3f42f37 |
Fix some regex issues with out-of-range characters and large char ranges.
Previously, our regex code defined CHR_MAX as 0xfffffffe, which is a bad choice because it is outside the range of type "celt" (int32). Characters approaching that limit could lead to infinite loops in logic such as "for (c = a; c <= b; c++)" where c is of type celt but the range bounds are chr. Such loops will work safely only if CHR_MAX+1 is representable in celt, since c must advance to beyond b before the loop will exit. Fortunately, there seems no reason not to restrict CHR_MAX to 0x7ffffffe. It's highly unlikely that Unicode will ever assign codes that high, and none of our other backend encodings need characters beyond that either. In addition to modifying the macro, we have to explicitly enforce character range restrictions on the values of \u, \U, and \x escape sequences, else the limit is trivially bypassed. Also, the code for expanding case-independent character ranges in bracket expressions had a potential integer overflow in its calculation of the number of characters it could generate, which could lead to allocating too small a character vector and then overwriting memory. An attacker with the ability to supply arbitrary regex patterns could easily cause transient DOS via server crashes, and the possibility for privilege escalation has not been ruled out. Quite aside from the integer-overflow problem, the range expansion code was unnecessarily inefficient in that it always produced a result consisting of individual characters, abandoning the knowledge that we had a range to start with. If the input range is large, this requires excessive memory. Change it so that the original range is reported as-is, and then we add on any case-equivalent characters that are outside that range. With this approach, we can bound the number of individual characters allowed without sacrificing much. This patch allows at most 100000 individual characters, which I believe to be more than the number of case pairs existing in Unicode, so that the restriction will never be hit in practice. It's still possible for range() to take awhile given a large character code range, so also add statement-cancel detection to its loop. The downstream function dovec() also lacked cancel detection, and could take a long time given a large output from range(). Per fuzz testing by Greg Stark. Back-patch to all supported branches. Security: CVE-2016-0773 |
10 years ago |
|
|
ee94300446 |
Update copyright for 2016
Backpatch certain files through 9.1 |
10 years ago |
|
|
a43b4ab111 |
Fix enforcement of restrictions inside regexp lookaround constraints.
Lookahead and lookbehind constraints aren't allowed to contain backrefs, and parentheses within them are always considered non-capturing. Or so says the manual. But the regexp parser forgot about these rules once inside a parenthesized subexpression, so that constructs like (\w)(?=(\1)) were accepted (but then not correctly executed --- a case like this acted like (\w)(?=\w), without any enforcement that the two \w's match the same text). And in (?=((foo))) the innermost parentheses would be counted as capturing parentheses, though no text would ever be captured for them. To fix, properly pass down the "type" argument to the recursive invocation of parse(). Back-patch to all supported branches; it was agreed that silent misexecution of such patterns is worse than throwing an error, even though new errors in minor releases are generally not desirable. |
10 years ago |
|
|
12c9a04008 |
Implement lookbehind constraints in our regular-expression engine.
A lookbehind constraint is like a lookahead constraint in that it consumes no text; but it checks for existence (or nonexistence) of a match *ending* at the current point in the string, rather than one *starting* at the current point. This is a long-requested feature since it exists in many other regex libraries, but Henry Spencer had never got around to implementing it in the code we use. Just making it work is actually pretty trivial; but naive copying of the logic for lookahead constraints leads to code that often spends O(N^2) time to scan an N-character string, because we have to run the match engine from string start to the current probe point each time the constraint is checked. In typical use-cases a lookbehind constraint will be written at the start of the regex and hence will need to be checked at every character --- so O(N^2) work overall. To fix that, I introduced a third copy of the core DFA matching loop, paralleling the existing longest() and shortest() loops. This version, matchuntil(), can suspend and resume matching given a couple of pointers' worth of storage space. So we need only run it across the string once, stopping at each interesting probe point and then resuming to advance to the next one. I also put in an optimization that simplifies one-character lookahead and lookbehind constraints, such as "(?=x)" or "(?<!\w)", into AHEAD and BEHIND constraints, which already existed in the engine. This avoids the overhead of the LACON machinery entirely for these rather common cases. The net result is that lookbehind constraints run a factor of three or so slower than Perl's for multi-character constraints, but faster than Perl's for one-character constraints ... and they work fine for variable-length constraints, which Perl gives up on entirely. So that's not bad from a competitive perspective, and there's room for further optimization if anyone cares. (In reality, raw scan rate across a large input string is probably not that big a deal for Postgres usage anyway; so I'm happy if it's linear.) |
10 years ago |
|
|
9f1e642d50 |
Fix incorrect handling of lookahead constraints in pg_regprefix().
pg_regprefix was doing nothing with lookahead constraints, which would be fine if it were the right kind of nothing, but it isn't: we have to terminate our search for a fixed prefix, not just pretend the LACON arc isn't there. Otherwise, if the current state has both a LACON outarc and a single plain-color outarc, we'd falsely conclude that the color represents an addition to the fixed prefix, and generate an extracted index condition that restricts the indexscan too much. (See added regression test case.) Terminating the search is conservative: we could traverse the LACON arc (thus assuming that the constraint can be satisfied at runtime) and then examine the outarcs of the linked-to state. But that would be a lot more work than it seems worth, because writing a LACON followed by a single plain character is a pretty silly thing to do. This makes a difference only in rather contrived cases, but it's a bug, so back-patch to all supported branches. |
10 years ago |
|
|
afdfcd3f76 |
Miscellaneous cleanup of regular-expression compiler.
Revert our previous addition of "all" flags to copyins() and copyouts(); they're no longer needed, and were never anything but an unsightly hack. Improve a couple of infelicities in the REG_DEBUG code for dumping the NFA data structure, including adding code to count the total number of states and arcs. Add a couple of missed error checks. Add some more documentation in the README file, and some regression tests illustrating cases that exceeded the state-count limit and/or took unreasonable amounts of time before this set of patches. Back-patch to all supported branches. |
10 years ago |
|
|
538b3b8b35 |
Improve memory-usage accounting in regular-expression compiler.
This code previously counted the number of NFA states it created, and complained if a limit was exceeded, so as to prevent bizarre regex patterns from consuming unreasonable time or memory. That's fine as far as it went, but the code paid no attention to how many arcs linked those states. Since regexes can be contrived that have O(N) states but will need O(N^2) arcs after fixempties() processing, it was still possible to blow out memory, and take a long time doing it too. To fix, modify the bookkeeping to count space used by both states and arcs. I did not bother with including the "color map" in the accounting; it can only grow to a few megabytes, which is not a lot in comparison to what we're allowing for states+arcs (about 150MB on 64-bit machines or half that on 32-bit machines). Looking at some of the larger real-world regexes captured in the Tcl regression test suite suggests that the most that is likely to be needed for regexes found in the wild is under 10MB, so I believe that the current limit has enough headroom to make it okay to keep it as a hard-wired limit. In connection with this, redefine REG_ETOOBIG as meaning "regular expression is too complex"; the previous wording of "nfa has too many states" was already somewhat inapropos because of the error code's use for stack depth overrun, and it was not very user-friendly either. Back-patch to all supported branches. |
10 years ago |
|
|
6a7153661d |
Improve performance of pullback/pushfwd in regular-expression compiler.
The previous coding would create a new intermediate state every time it wanted to interchange the ordering of two constraint arcs. Certain regex features such as \Y can generate large numbers of parallel constraint arcs, and if we needed to reorder the results of that, we created unreasonable numbers of intermediate states. To improve matters, keep a list of already-created intermediate states associated with the state currently being considered by the outer loop; we can re-use such states to place all the new arcs leading to the same destination or source. I also took the trouble to redefine push() and pull() to have a less risky API: they no longer delete any state or arc that the caller might possibly have a pointer to, except for the specifically-passed constraint arc. This reduces the risk of re-introducing the same type of error seen in the failed patch for CVE-2007-4772. Back-patch to all supported branches. |
10 years ago |
|
|
f5b7d103bc |
Improve performance of fixempties() pass in regular-expression compiler.
The previous coding took something like O(N^4) time to fully process a chain of N EMPTY arcs. We can't really do much better than O(N^2) because we have to insert about that many arcs, but we can do lots better than what's there now. The win comes partly from using mergeins() to amortize de-duplication of arcs across multiple source states, and partly from exploiting knowledge of the ordering of arcs for each state to avoid looking at arcs we don't need to consider during the scan. We do have to be a bit careful of the possible reordering of arcs introduced by the sort-merge coding of the previous commit, but that's not hard to deal with. Back-patch to all supported branches. |
10 years ago |
|
|
579840ca05 |
Fix O(N^2) performance problems in regular-expression compiler.
Change the singly-linked in-arc and out-arc lists to be doubly-linked, so that arc deletion is constant time rather than having worst-case time proportional to the number of other arcs on the connected states. Modify the bulk arc transfer operations copyins(), copyouts(), moveins(), moveouts() so that they use a sort-and-merge algorithm whenever there's more than a small number of arcs to be copied or moved. The previous method is O(N^2) in the number of arcs involved, because it performs duplicate checking independently for each copied arc. The new method may change the ordering of existing arcs for the destination state, but nothing really cares about that. Provide another bulk arc copying method mergeins(), which is unused as of this commit but is needed for the next one. It basically is like copyins(), but the source arcs might not all come from the same state. Replace the O(N^2) bubble-sort algorithm used in carcsort() with a qsort() call. These changes greatly improve the performance of regex compilation for large or complex regexes, at the cost of extra space for arc storage during compilation. The original tradeoff was probably fine when it was made, but now we care more about speed and less about memory consumption. Back-patch to all supported branches. |
10 years ago |
|
|
48789c5d23 |
Fix regular-expression compiler to handle loops of constraint arcs.
It's possible to construct regular expressions that contain loops of constraint arcs (that is, ^ $ AHEAD BEHIND or LACON arcs). There's no use in fully traversing such a loop at execution, since you'd just end up in the same NFA state without having consumed any input. Worse, such a loop leads to infinite looping in the pullback/pushfwd stage of compilation, because we keep pushing or pulling the same constraints around the loop in a vain attempt to move them to the pre or post state. Such looping was previously recognized in CVE-2007-4772; but the fix only handled the case of trivial single-state loops (that is, a constraint arc leading back to its source state) ... and not only that, it was incorrect even for that case, because it broke the admittedly-not-very-clearly-stated API contract of the pull() and push() subroutines. The first two regression test cases added by this commit exhibit patterns that result in assertion failures because of that (though there seem to be no ill effects in non-assert builds). The other new test cases exhibit multi-state constraint loops; in an unpatched build they will run until the NFA state-count limit is exceeded. To fix, remove the code added for CVE-2007-4772, and instead create a general-purpose constraint-loop-breaking phase of regex compilation that executes before we do pullback/pushfwd. Since we never need to traverse a constraint loop fully, we can just break the loop at any chosen spot, if we add clone states that can replicate any sequence of arc transitions that would've traversed just part of the loop. Also add some commentary clarifying why we have to have all these machinations in the first place. This class of problems has been known for some time --- we had a report from Marc Mamin about two years ago, for example, and there are related complaints in the Tcl bug tracker. I had discussed a fix of this kind off-list with Henry Spencer, but didn't get around to doing something about it until the issue was rediscovered by Greg Stark recently. Back-patch to all supported branches. |
10 years ago |
|
|
b63fc28776 |
Add recursion depth protections to regular expression matching.
Some of the functions in regex compilation and execution recurse, and therefore could in principle be driven to stack overflow. The Tcl crew has seen this happen in practice in duptraverse(), though their fix was to put in a hard-wired limit on the number of recursive levels, which is not too appetizing --- fortunately, we have enough infrastructure to check the actually available stack. Greg Stark has also seen it in other places while fuzz testing on a machine with limited stack space. Let's put guards in to prevent crashes in all these places. Since the regex code would leak memory if we simply threw elog(ERROR), we have to introduce an API that checks for stack depth without throwing such an error. Fortunately that's not difficult. |
10 years ago |
|
|
f2c4ffc330 |
Fix potential infinite loop in regular expression execution.
In cfindloop(), if the initial call to shortest() reports that a
zero-length match is possible at the current search start point, but then
it is unable to construct any actual match to that, it'll just loop around
with the same start point, and thus make no progress. We need to force the
start point to be advanced. This is safe because the loop over "begin"
points has already tried and failed to match starting at "close", so there
is surely no need to try that again.
This bug was introduced in commit
|
10 years ago |
|
|
9fe8fe9c9e |
Add some more query-cancel checks to regular expression matching.
Commit
|
10 years ago |
|
|
d9c0c728af |
Fix low-probability memory leak in regex execution.
After an internal failure in shortest() or longest() while pinning down the exact location of a match, find() forgot to free the DFA structure before returning. This is pretty unlikely to occur, since we just successfully ran the "search" variant of the DFA; but it could happen, and it would result in a session-lifespan memory leak since this code uses malloc() directly. Problem seems to have been aboriginal in Spencer's library, so back-patch all the way. In passing, correct a thinko in a comment I added awhile back about the meaning of the "ntree" field. I happened across these issues while comparing our code to Tcl's version of the library. |
10 years ago |
|
|
b44d92b67b |
Sync regex code with Tcl 8.6.4.
Sync our regex code with upstream changes since last time we did this,
which was Tcl 8.5.11 (see commit
|
10 years ago |
|
|
91cf3135b9 |
Fix minor bug in regexp makesearch() function.
The list-wrangling here was done wrong, allowing the same state to get put into the list twice. The following loop then would clone it twice. The second clone would wind up with no inarcs, so that there was no observable misbehavior AFAICT, but a useless state in the finished NFA isn't an especially good thing. |
10 years ago |
|
|
8a0258c318 |
Fix some possible low-memory failures in regexp compilation.
newnfa() failed to set the regex error state when malloc() fails. Several places in regcomp.c failed to check for an error after calling subre(). Each of these mistakes could lead to null-pointer-dereference crashes in memory-starved backends. Report and patch by Andreas Seltenreich. Back-patch to all branches. |
11 years ago |
|
|
586dd5d6a5 |
Replace a bunch more uses of strncpy() with safer coding.
strncpy() has a well-deserved reputation for being unsafe, so make an effort to get rid of nearly all occurrences in HEAD. A large fraction of the remaining uses were passing length less than or equal to the known strlen() of the source, in which case no null-padding can occur and the behavior is equivalent to memcpy(), though doubtless slower and certainly harder to reason about. So just use memcpy() in these cases. In other cases, use either StrNCpy() or strlcpy() as appropriate (depending on whether padding to the full length of the destination buffer seems useful). I left a few strncpy() calls alone in the src/timezone/ code, to keep it in sync with upstream (the IANA tzcode distribution). There are also a few such calls in ecpg that could possibly do with more analysis. AFAICT, none of these changes are more than cosmetic, except for the four occurrences in fe-secure-openssl.c, which are in fact buggy: an overlength source leads to a non-null-terminated destination buffer and ensuing misbehavior. These don't seem like security issues, first because no stack clobber is possible and second because if your values of sslcert etc are coming from untrusted sources then you've got problems way worse than this. Still, it's undesirable to have unpredictable behavior for overlength inputs, so back-patch those four changes to all active branches. |
11 years ago |
|
|
4baaf863ec |
Update copyright for 2015
Backpatch certain files through 9.0 |
11 years ago |
|
|
3694b4d7e1 |
Fix incorrect search for "x?" style matches in creviterdissect().
When the number of allowed iterations is limited (either a "?" quantifier
or a bound expression), the last sub-match has to reach to the end of the
target string. The previous coding here first tried the shortest possible
match (one character, usually) and then gave up and back-tracked if that
didn't work, typically leading to failure to match overall, as shown in
bug #11478 from Christoph Berg. The minimum change to fix that would be to
not decrement k before "goto backtrack"; but that would be a pretty stupid
solution, because we'd laboriously try each possible sub-match length
before finally discovering that only ending at the end can work. Instead,
force the sub-match endpoint limit up to the end for even the first
shortest() call if we cannot have any more sub-matches after this one.
Bug introduced in my rewrite that added the iterdissect logic, commit
|
11 years ago |
|
|
1567e659a8 |
Fix two low-probability memory leaks in regular expression parsing.
If pg_regcomp failed after having invoked markst/cleanst, it would leak any "struct subre" nodes it had created. (We've already detected all regex syntax errors at that point, so the only likely causes of later failure would be query cancel or out-of-memory.) To fix, make sure freesrnode knows the difference between the pre-cleanst and post-cleanst cleanup procedures. Add some documentation of this less-than-obvious point. Also, newlacon did the wrong thing with an out-of-memory failure from realloc(), so that the previously allocated array would be leaked. Both of these are pretty low-probability scenarios, but a bug is a bug, so patch all the way back. Per bug #10976 from Arthur O'Dwyer. |
12 years ago |
|
|
0a78320057 |
pgindent run for 9.4
This includes removing tabs after periods in C comments, which was applied to back branches, so this change should not effect backpatching. |
12 years ago |
|
|
ea8c7e9054 |
Fix memory leak during regular expression execution.
For a regex containing backrefs, pg_regexec() might fail to free all the
sub-DFAs that were created during execution, resulting in a permanent
(session lifespan) memory leak. Problem was introduced by me in commit
|
12 years ago |
|
|
9662143f0c |
Allow regex operations to be terminated early by query cancel requests.
The regex code didn't have any provision for query cancel; which is unsurprising given its non-Postgres origin, but still problematic since some operations can take a long time. Introduce a callback function to check for a pending query cancel or session termination request, and call it in a couple of strategic spots where we can make the regex code exit with an error indicator. If we ever actually split out the regex code as a standalone library, some additional work will be needed to let the cancel callback function be specified externally to the library. But that's straightforward (certainly so by comparison to putting the locale-dependent character classification logic on a similar arms-length basis), and there seems no need to do it right now. A bigger issue is that there may be more places than these two where we need to check for cancels. We can always add more checks later, now that the infrastructure is in place. Since there are known examples of not-terribly-long regexes that can lock up a backend for a long time, back-patch to all supported branches. I have hopes of fixing the known performance problems later, but adding query cancel ability seems like a good idea even if they were all fixed. |
12 years ago |
|
|
0d79c0a8cc |
Make various variables const (read-only).
These changes should generally improve correctness/maintainability. A nice side benefit is that several kilobytes move from initialized data to text segment, allowing them to be shared across processes and probably reducing copy-on-write overhead while forking a new backend. Unfortunately this doesn't seem to help libpq in the same way (at least not when it's compiled with -fpic on x86_64), but we can hope the linker at least collects all nominally-const data together even if it's not actually part of the text segment. Also, make pg_encname_tbl[] static in encnames.c, since there seems no very good reason for any other code to use it; per a suggestion from Wim Lewis, who independently submitted a patch that was mostly a subset of this one. Oskari Saarenmaa, with some editorialization by me |
12 years ago |
|
|
7e04792a1c |
Update copyright for 2014
Update all files in head, and files COPYRIGHT and legal.sgml in all back branches. |
12 years ago |
|
|
e2bd904955 |
Fix regex match failures for backrefs combined with non-greedy quantifiers.
An ancient logic error in cfindloop() could cause the regex engine to fail to find matches that begin later than the start of the string. This function is only used when the regex pattern contains a back reference, and so far as we can tell the error is only reachable if the pattern is non-greedy (i.e. its first quantifier uses the ? modifier). Furthermore, the actual match must begin after some potential match that satisfies the DFA but then fails the back-reference's match test. Reported and fixed by Jeevan Chalke, with cosmetic adjustments by me. |
13 years ago |
|
|
9af4159fce |
pgindent run for release 9.3
This is the first run of the Perl-based pgindent script. Also update pgindent instructions. |
13 years ago |
|
|
3ccae48f44 |
Support indexing of regular-expression searches in contrib/pg_trgm.
This works by extracting trigrams from the given regular expression, in generally the same spirit as the previously-existing support for LIKE searches, though of course the details are far more complicated. Currently, only GIN indexes are supported. We might be able to make it work with GiST indexes later. The implementation includes adding API functions to backend/regex/ to provide a view of the search NFA created from a regular expression. These functions are meant to be generic enough to be supportable in a standalone version of the regex library, should that ever happen. Alexander Korotkov, reviewed by Heikki Linnakangas and Tom Lane |
13 years ago |
|
|
bf2b0a1478 |
Fix crash on compiling a regular expression with more than 32k colors.
Throw an error instead. Backpatch to all supported branches. |
13 years ago |
|
|
a7b61d4f5a |
Fix infinite-loop risk in fixempties() stage of regex compilation.
The previous coding of this function could get into situations where it would never terminate, because successive passes would re-add EMPTY arcs that had been removed by the previous pass. Rewrite the function completely using a new algorithm that is guaranteed to terminate, and also seems to be usually faster than the old one. Per Tcl bugs 3604074 and 3606683. Tom Lane and Don Porter |
13 years ago |
|
|
73dc003bee |
Add missing error check in regexp parser.
parseqatom() failed to check for an error return (NULL result) from its recursive call to parsebranch(), and in consequence could crash with a null-pointer dereference after an error return. This bug has been there since day one, but wasn't noticed before, probably because most error cases in parsebranch() didn't actually lead to returning NULL. Add the missing error check, and also tweak parsebranch() to exit in a less indirect fashion after a call to parseqatom() fails. Report by Tomasz Karlik, fix by me. |
13 years ago |