From 2e480d2b1f9e8e1119d7d2b305eaf4714b39a79d Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 8 Feb 2023 13:09:27 +0900
Subject: [PATCH 01/78] Make EXEC_BACKEND more convenient on Linux and FreeBSD.

Try to disable ASLR when building in EXEC_BACKEND mode, to avoid random
memory mapping failures while testing.  For developer use only, no
effect on regular builds.

This has been originally applied as of f3e7806 for v15~, but
recently-added buildfarm member gokiburi tests this configuration on
older branches as well, causing it to fail randomly as ASLR would be
enabled.

Suggested-by: Andres Freund <andres@anarazel.de>
Tested-by: Bossart, Nathan <bossartn@amazon.com>
Discussion: https://postgr.es/m/20210806032944.m4tz7j2w47mant26%40alap3.anarazel.de
Backpatch-through: 12
---
 configure                     |  2 +-
 configure.ac                  |  1 +
 src/bin/pg_ctl/pg_ctl.c       |  4 ++++
 src/common/exec.c             | 33 +++++++++++++++++++++++++++++++++
 src/include/pg_config.h.in    |  3 +++
 src/include/port.h            |  5 +++++
 src/test/regress/pg_regress.c |  4 ++++
 src/tools/msvc/Solution.pm    |  1 +
 8 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index e6a9e520a88..31145cac632 100755
--- a/configure
+++ b/configure
@@ -16895,7 +16895,7 @@ $as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h
 fi
 
 
-for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/signalfd.h sys/sockio.h sys/tas.h sys/uio.h sys/un.h termios.h ucred.h wctype.h
+for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/personality.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/signalfd.h sys/sockio.h sys/tas.h sys/uio.h sys/un.h termios.h ucred.h wctype.h
 do :
   as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
 ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
diff --git a/configure.ac b/configure.ac
index 85c18bc94a7..44bc8c47c30 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1904,6 +1904,7 @@ AC_CHECK_HEADERS(m4_normalize([
 	sys/epoll.h
 	sys/event.h
 	sys/ipc.h
+	sys/personality.h
 	sys/prctl.h
 	sys/procctl.h
 	sys/pstat.h
diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index cc733cb7be1..26d62e823cf 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -466,6 +466,10 @@ start_postmaster(void)
 	fflush(stdout);
 	fflush(stderr);
 
+#ifdef EXEC_BACKEND
+	pg_disable_aslr();
+#endif
+
 	pm_pid = fork();
 	if (pm_pid < 0)
 	{
diff --git a/src/common/exec.c b/src/common/exec.c
index 5159b616a39..dbac0598be0 100644
--- a/src/common/exec.c
+++ b/src/common/exec.c
@@ -27,6 +27,14 @@
 #include "common/mdb_locale.h"
 
 
+#ifdef EXEC_BACKEND
+#if defined(HAVE_SYS_PERSONALITY_H)
+#include <sys/personality.h>
+#elif defined(HAVE_SYS_PROCCTL_H)
+#include <sys/procctl.h>
+#endif
+#endif
+
 /* Inhibit mingw CRT's auto-globbing of command line arguments */
 #if defined(WIN32) && !defined(_MSC_VER)
 extern int _CRT_glob = 0; /* 0 turns off globbing; 1 turns it on */
@@ -477,6 +485,31 @@ set_pglocale_pgservice(const char *argv0, const char *app)
 	}
 }
 
+#ifdef EXEC_BACKEND
+/*
+ * For the benefit of PostgreSQL developers testing EXEC_BACKEND on Unix
+ * systems (code paths normally exercised only on Windows), provide a way to
+ * disable address space layout randomization, if we know how on this platform.
+ * Otherwise, backends may fail to attach to shared memory at the fixed address
+ * chosen by the postmaster.  (See also the macOS-specific hack in
+ * sysv_shmem.c.)
+ */
+int
+pg_disable_aslr(void)
+{
+#if defined(HAVE_SYS_PERSONALITY_H)
+	return personality(ADDR_NO_RANDOMIZE);
+#elif defined(HAVE_SYS_PROCCTL_H) && defined(PROC_ASLR_FORCE_DISABLE)
+	int			data = PROC_ASLR_FORCE_DISABLE;
+
+	return procctl(P_PID, 0, PROC_ASLR_CTL, &data);
+#else
+	errno = ENOSYS;
+	return -1;
+#endif
+}
+#endif
+
 #ifdef WIN32
 
 /*
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index be448f49978..e329e93bf47 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -703,6 +703,9 @@
 /* Define to 1 if you have the <sys/ipc.h> header file. */
 #undef HAVE_SYS_IPC_H
 
+/* Define to 1 if you have the <sys/personality.h> header file. */
+#undef HAVE_SYS_PERSONALITY_H
+
 /* Define to 1 if you have the <sys/prctl.h> header file. */
 #undef HAVE_SYS_PRCTL_H
 
diff --git a/src/include/port.h b/src/include/port.h
index 28d96d67313..b74a38b3060 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -142,6 +142,11 @@ extern char *pipe_read_line(char *cmd, char *line, int maxsize);
 #define PG_VERSIONSTR "postgres (Apache Cloudberry) " PG_VERSION "\n"
 #define PG_BACKEND_VERSIONSTR "postgres (Apache Cloudberry) " PG_VERSION "\n"
 
+#ifdef EXEC_BACKEND
+/* Disable ASLR before exec, for developer builds only (in exec.c) */
+extern int pg_disable_aslr(void);
+#endif
+
 
 #if defined(WIN32) || defined(__CYGWIN__)
 #define EXE ".exe"
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index c479142222e..c9921aaf5ce 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -1625,6 +1625,10 @@ spawn_process(const char *cmdline)
 	if (logfile)
 		fflush(logfile);
 
+#ifdef EXEC_BACKEND
+	pg_disable_aslr();
+#endif
+
 	pid = fork();
 	if (pid == -1)
 	{
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index 8d76035ed92..58fbf64d120 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -399,6 +399,7 @@ sub GenerateFiles
 		HAVE_SYS_EPOLL_H                         => undef,
 		HAVE_SYS_EVENT_H                         => undef,
 		HAVE_SYS_IPC_H                           => undef,
+		HAVE_SYS_PERSONALITY_H                   => undef,
 		HAVE_SYS_PRCTL_H                         => undef,
 		HAVE_SYS_PROCCTL_H                       => undef,
 		HAVE_SYS_PSTAT_H                         => undef,

From 3cd80fd4276fa0dfcd308f4caebc8cde56b2a5d5 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 8 Feb 2023 17:15:23 -0500
Subject: [PATCH 02/78] Stop recommending auto-download of DTD files, and
 indeed disable it.

It appears no longer possible to build the SGML docs without a local
installation of the DocBook DTD, because sourceforge.net now only
permits HTTPS access, and no common version of xsltproc supports that.
Hence, remove the bits of our documentation suggesting that that's
possible or useful.

In fact, we might as well add the --nonet option to the build recipes
automatically, for a bit of extra security.

Also fix our documentation-tool-installation recipes for macOS to
ensure that xmllint and xsltproc are pulled in from MacPorts or
Homebrew.  The previous recipes assumed you could use the
Apple-supplied versions of these tools; which still works, except that
you'd need to set an environment variable to ensure that they would
find DTD files provided by those package managers.  Simpler and easier
to just recommend pulling in the additional packages.

In HEAD, also document how to build docs using Meson, and adjust
"ninja docs" to just build the HTML docs, for consistency with the
default behavior of doc/src/sgml/Makefile.

In a fit of neatnik-ism, I also made the ordering of the package
lists match the order in which the tools are described at the head
of the appendix.

Aleksander Alekseev, Peter Eisentraut, Tom Lane

Discussion: https://postgr.es/m/CAJ7c6TO8Aro2nxg=EQsVGiSDe-TstP4EsSvDHd7DSRsP40PgGA@mail.gmail.com
---
 doc/src/sgml/Makefile        |  8 ++++--
 doc/src/sgml/docguide.sgml   | 55 ++++++++++++++++++------------------
 doc/src/sgml/images/Makefile |  2 +-
 3 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index d10f9e5b398..58ae606bccf 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -44,11 +44,15 @@ endif
 
 XMLINCLUDE = --path .
 
-ifndef XMLLINT
+ifdef XMLLINT
+XMLLINT := $(XMLLINT) --nonet
+else
 XMLLINT = $(missing) xmllint
 endif
 
-ifndef XSLTPROC
+ifdef XSLTPROC
+XSLTPROC := $(XSLTPROC) --nonet
+else
 XSLTPROC = $(missing) xsltproc
 endif
 
diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml
index e1bac68604f..55ef6417749 100644
--- a/doc/src/sgml/docguide.sgml
+++ b/doc/src/sgml/docguide.sgml
@@ -136,6 +136,7 @@
      <listitem>
       <para>
        This is a program for converting, among other things, XML to PDF.
+       It is needed only if you want to build the documentation in PDF format.
       </para>
      </listitem>
     </varlistentry>
@@ -151,25 +152,13 @@
    here.
   </para>
 
-  <para>
-   You can get away with not installing DocBook XML and the DocBook XSLT
-   stylesheets locally, because the required files will be downloaded from the
-   Internet and cached locally.  This may in fact be the preferred solution if
-   your operating system packages provide only an old version of these files,
-   or if no packages are available at all.
-   If you want to prevent any attempt to access the Internet while building
-   the documentation, you need to pass the <option>--nonet</option> option
-   to <command>xmllint</command> and <command>xsltproc</command>; see below
-   for an example.
-  </para>
-
   <sect2>
    <title>Installation on Fedora, RHEL, and Derivatives</title>
 
    <para>
     To install the required packages, use:
 <programlisting>
-yum install docbook-dtds docbook-style-xsl fop libxslt
+yum install docbook-dtds docbook-style-xsl libxslt fop
 </programlisting>
    </para>
   </sect2>
@@ -180,7 +169,7 @@ yum install docbook-dtds docbook-style-xsl fop libxslt
    <para>
     To install the required packages with <command>pkg</command>, use:
 <programlisting>
-pkg install docbook-xml docbook-xsl fop libxslt
+pkg install docbook-xml docbook-xsl libxslt fop
 </programlisting>
    </para>
 
@@ -199,7 +188,7 @@ pkg install docbook-xml docbook-xsl fop libxslt
     available for <productname>Debian GNU/Linux</productname>.
     To install, simply use:
 <programlisting>
-apt-get install docbook-xml docbook-xsl fop libxml2-utils xsltproc
+apt-get install docbook-xml docbook-xsl libxml2-utils xsltproc fop
 </programlisting>
    </para>
   </sect2>
@@ -208,21 +197,37 @@ apt-get install docbook-xml docbook-xsl fop libxml2-utils xsltproc
    <title>macOS</title>
 
    <para>
-    On macOS, you can build the HTML and man documentation without installing
-    anything extra.  If you want to build PDFs or want to install a local copy
-    of DocBook, you can get those from your preferred package manager.
+    If you use MacPorts, the following will get you set up:
+<programlisting>
+sudo port install docbook-xml docbook-xsl-nons libxslt fop
+</programlisting>
+    If you use Homebrew, use this:
+<programlisting>
+brew install docbook docbook-xsl libxslt fop
+</programlisting>
    </para>
 
    <para>
-    If you use MacPorts, the following will get you set up:
+    The Homebrew-supplied programs require the following environment variable
+    to be set:
 <programlisting>
-sudo port install docbook-xml-4.5 docbook-xsl fop
+export XML_CATALOG_FILES=/usr/local/etc/xml/catalog
 </programlisting>
-    If you use Homebrew, use this:
+    Without it, <command>xsltproc</command> will throw errors like this:
 <programlisting>
-brew install docbook docbook-xsl fop
+I/O error : Attempt to load network entity http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd
+postgres.sgml:21: warning: failed to load external entity "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd"
+...
 </programlisting>
    </para>
+
+   <para>
+    While it is possible to use the Apple-provided versions
+    of <command>xmllint</command> and <command>xsltproc</command>
+    instead of those from MacPorts or Homebrew, you'll still need
+    to install the DocBook DTD and stylesheets, and set up a catalog
+    file that points to them.
+   </para>
   </sect2>
 
   <sect2 id="docguide-toolsets-configure">
@@ -253,12 +258,6 @@ checking for dbtoepub... dbtoepub
    these programs, for example
 <screen>
 ./configure ... XMLLINT=/opt/local/bin/xmllint ...
-</screen>
-   Also, if you want to ensure that <filename>xmllint</filename>
-   and <filename>xsltproc</filename> will not perform any network access,
-   you can do something like
-<screen>
-./configure ... XMLLINT="xmllint --nonet" XSLTPROC="xsltproc --nonet" ...
 </screen>
   </para>
   </sect2>
diff --git a/doc/src/sgml/images/Makefile b/doc/src/sgml/images/Makefile
index f9e356348b2..645519095d0 100644
--- a/doc/src/sgml/images/Makefile
+++ b/doc/src/sgml/images/Makefile
@@ -9,7 +9,7 @@ ALL_IMAGES = \
 
 DITAA = ditaa
 DOT = dot
-XSLTPROC = xsltproc
+XSLTPROC = xsltproc --nonet
 
 all: $(ALL_IMAGES)
 

From bf3ee4c7f939303106fb9ce01c15609ccf7cc7c6 Mon Sep 17 00:00:00 2001
From: David Rowley <drowley@postgresql.org>
Date: Mon, 13 Feb 2023 17:09:55 +1300
Subject: [PATCH 03/78] Disable WindowAgg inverse transitions when subplans are
 present

When an aggregate function is used as a WindowFunc and a tuple transitions
out of the window frame, we ordinarily try to make use of the aggregate
function's inverse transition function to "unaggregate" the exiting tuple.

This optimization is disabled for various cases, including when the
aggregate contains a volatile function.  In such a case we'd be unable to
ensure that the transition value was calculated to the same value during
transitions and inverse transitions.  Unfortunately, we did this check by
calling contain_volatile_functions() which does not recursively search
SubPlans for volatile functions.  If the aggregate function's arguments or
its FILTER clause contained a subplan with volatile functions then we'd
fail to notice this.

Here we fix this by just disabling the optimization when the WindowFunc
contains any subplans.  Volatile functions are not the only reason that a
subplan may have nonrepeatable results.

Bug: #17777
Reported-by: Anban Company
Discussion: https://postgr.es/m/17777-860b739b6efde977%40postgresql.org
Reviewed-by: Tom Lane
Backpatch-through: 11
---
 src/backend/executor/nodeWindowAgg.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index 611fcf94a89..f36a46cc79f 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -43,6 +43,8 @@
 #include "nodes/execnodes.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/optimizer.h"
 #include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_oper.h"
@@ -3032,16 +3034,24 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc,
 	 * aggregate's arguments (and FILTER clause if any) contain any calls to
 	 * volatile functions.  Otherwise, the difference between restarting and
 	 * not restarting the aggregation would be user-visible.
+	 *
+	 * We also don't risk using moving aggregates when there are subplans in
+	 * the arguments or FILTER clause.  This is partly because
+	 * contain_volatile_functions() doesn't look inside subplans; but there
+	 * are other reasons why a subplan's output might be volatile.  For
+	 * example, syncscan mode can render the results nonrepeatable.
 	 */
 	if (!OidIsValid(aggform->aggminvtransfn))
 		use_ma_code = false;	/* sine qua non */
 	else if (aggform->aggmfinalmodify == AGGMODIFY_READ_ONLY &&
-			 aggform->aggfinalmodify != AGGMODIFY_READ_ONLY)
+		aggform->aggfinalmodify != AGGMODIFY_READ_ONLY)
 		use_ma_code = true;		/* decision forced by safety */
 	else if (winstate->frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING)
 		use_ma_code = false;	/* non-moving frame head */
 	else if (contain_volatile_functions((Node *) wfunc))
 		use_ma_code = false;	/* avoid possible behavioral change */
+	else if (contain_subplans((Node *) wfunc))
+		use_ma_code = false;	/* subplans might contain volatile functions */
 	else
 		use_ma_code = true;		/* yes, let's use it */
 	if (use_ma_code)

From 48813fbd8e73727db86f675959ae251f63198d77 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 15 Feb 2023 10:12:33 +0900
Subject: [PATCH 04/78] Fix handling of SCRAM-SHA-256's channel binding with
 RSA-PSS certificates

OpenSSL 1.1.1 and newer versions have added support for RSA-PSS
certificates, which requires the use of a specific routine in OpenSSL to
determine which hash function to use when compiling it when using
channel binding in SCRAM-SHA-256.  X509_get_signature_nid(), that is the
original routine the channel binding code has relied on, is not able to
determine which hash algorithm to use for such certificates.  However,
X509_get_signature_info(), new to OpenSSL 1.1.1, is able to do it.  This
commit switches the channel binding logic to rely on
X509_get_signature_info() over X509_get_signature_nid(), which would be
the choice when building with 1.1.1 or newer.

The error could have been triggered on the client or the server, hence
libpq and the backend need to have their related code paths patched.
Note that attempting to load an RSA-PSS certificate with OpenSSL 1.1.0
or older leads to a failure due to an unsupported algorithm.

The discovery of relying on X509_get_signature_info() comes from Jacob,
the tests have been written by Heikki (with few tweaks from me), while I
have bundled the whole together while adding the bits needed for MSVC
and meson.

This issue exists since channel binding exists, so backpatch all the way
down.  Some tests are added in 15~, triggered if compiling with OpenSSL
1.1.1 or newer, where the certificate and key files can easily be
generated for RSA-PSS.

Reported-by: Gunnar "Nick" Bluth
Author: Jacob Champion, Heikki Linnakangas
Discussion: https://postgr.es/m/17760-b6c61e752ec07060@postgresql.org
Backpatch-through: 11
---
 configure                                | 12 ++++++++++++
 configure.ac                             |  2 ++
 src/backend/libpq/be-secure-openssl.c    |  9 +++++++--
 src/include/libpq/libpq-be.h             |  2 +-
 src/include/pg_config.h.in               |  3 +++
 src/interfaces/libpq/fe-secure-openssl.c |  9 +++++++--
 src/interfaces/libpq/libpq-int.h         |  2 +-
 src/tools/msvc/Solution.pm               | 10 +++++++++-
 8 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/configure b/configure
index 31145cac632..127c9f2ee09 100755
--- a/configure
+++ b/configure
@@ -15802,6 +15802,18 @@ if test "x$ac_cv_func_CRYPTO_lock" = xyes; then :
 #define HAVE_CRYPTO_LOCK 1
 _ACEOF
 
+fi
+done
+
+  # Function introduced in OpenSSL 1.1.1.
+  for ac_func in X509_get_signature_info
+do :
+  ac_fn_c_check_func "$LINENO" "X509_get_signature_info" "ac_cv_func_X509_get_signature_info"
+if test "x$ac_cv_func_X509_get_signature_info" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_X509_GET_SIGNATURE_INFO 1
+_ACEOF
+
 fi
 done
 
diff --git a/configure.ac b/configure.ac
index 44bc8c47c30..7e3292a37ae 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1741,6 +1741,8 @@ if test "$with_ssl" = openssl ; then
   # thread-safety. In 1.1.0, it's no longer required, and CRYPTO_lock()
   # function was removed.
   AC_CHECK_FUNCS([CRYPTO_lock])
+  # Function introduced in OpenSSL 1.1.1.
+  AC_CHECK_FUNCS([X509_get_signature_info])
   AC_DEFINE([USE_OPENSSL], 1, [Define to 1 to build with OpenSSL support. (--with-ssl=openssl)])
 elif test "$with_ssl" != no ; then
   AC_MSG_ERROR([--with-ssl must specify openssl])
diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index c48812f955a..e39952494e6 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -1308,7 +1308,7 @@ be_tls_get_peer_serial(Port *port, char *ptr, size_t len)
 		ptr[0] = '\0';
 }
 
-#ifdef HAVE_X509_GET_SIGNATURE_NID
+#if defined(HAVE_X509_GET_SIGNATURE_NID) || defined(HAVE_X509_GET_SIGNATURE_INFO)
 char *
 be_tls_get_certificate_hash(Port *port, size_t *len)
 {
@@ -1326,10 +1326,15 @@ be_tls_get_certificate_hash(Port *port, size_t *len)
 
 	/*
 	 * Get the signature algorithm of the certificate to determine the hash
-	 * algorithm to use for the result.
+	 * algorithm to use for the result.  Prefer X509_get_signature_info(),
+	 * introduced in OpenSSL 1.1.1, which can handle RSA-PSS signatures.
 	 */
+#if HAVE_X509_GET_SIGNATURE_INFO
+	if (!X509_get_signature_info(server_cert, &algo_nid, NULL, NULL, NULL))
+#else
 	if (!OBJ_find_sigid_algs(X509_get_signature_nid(server_cert),
 							 &algo_nid, NULL))
+#endif
 		elog(ERROR, "could not determine server certificate signature algorithm");
 
 	/*
diff --git a/src/include/libpq/libpq-be.h b/src/include/libpq/libpq-be.h
index de4883f4f69..8d2258aea44 100644
--- a/src/include/libpq/libpq-be.h
+++ b/src/include/libpq/libpq-be.h
@@ -307,7 +307,7 @@ extern void be_tls_get_peer_serial(Port *port, char *ptr, size_t len);
  * This is not supported with old versions of OpenSSL that don't have
  * the X509_get_signature_nid() function.
  */
-#if defined(USE_OPENSSL) && defined(HAVE_X509_GET_SIGNATURE_NID)
+#if defined(USE_OPENSSL) && (defined(HAVE_X509_GET_SIGNATURE_NID) || defined(HAVE_X509_GET_SIGNATURE_INFO))
 #define HAVE_BE_TLS_GET_CERTIFICATE_HASH
 extern char *be_tls_get_certificate_hash(Port *port, size_t *len);
 #endif
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index e329e93bf47..b037fc11501 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -814,6 +814,9 @@
 /* Define to 1 if you have the `writev' function. */
 #undef HAVE_WRITEV
 
+/* Define to 1 if you have the `X509_get_signature_info' function. */
+#undef HAVE_X509_GET_SIGNATURE_INFO
+
 /* Define to 1 if you have the `X509_get_signature_nid' function. */
 #undef HAVE_X509_GET_SIGNATURE_NID
 
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index c0988e10a30..d75a823b880 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -378,7 +378,7 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
 	return n;
 }
 
-#ifdef HAVE_X509_GET_SIGNATURE_NID
+#if defined(HAVE_X509_GET_SIGNATURE_NID) || defined(HAVE_X509_GET_SIGNATURE_INFO)
 char *
 pgtls_get_peer_certificate_hash(PGconn *conn, size_t *len)
 {
@@ -398,10 +398,15 @@ pgtls_get_peer_certificate_hash(PGconn *conn, size_t *len)
 
 	/*
 	 * Get the signature algorithm of the certificate to determine the hash
-	 * algorithm to use for the result.
+	 * algorithm to use for the result.  Prefer X509_get_signature_info(),
+	 * introduced in OpenSSL 1.1.1, which can handle RSA-PSS signatures.
 	 */
+#if HAVE_X509_GET_SIGNATURE_INFO
+	if (!X509_get_signature_info(peer_cert, &algo_nid, NULL, NULL, NULL))
+#else
 	if (!OBJ_find_sigid_algs(X509_get_signature_nid(peer_cert),
 							 &algo_nid, NULL))
+#endif
 	{
 		appendPQExpBufferStr(&conn->errorMessage,
 							 libpq_gettext("could not determine server certificate signature algorithm\n"));
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 0a05756cb1e..7c29ff42432 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -873,7 +873,7 @@ extern ssize_t pgtls_write(PGconn *conn, const void *ptr, size_t len);
  * This is not supported with old versions of OpenSSL that don't have
  * the X509_get_signature_nid() function.
  */
-#if defined(USE_OPENSSL) && defined(HAVE_X509_GET_SIGNATURE_NID)
+#if defined(USE_OPENSSL) && (defined(HAVE_X509_GET_SIGNATURE_NID) || defined(HAVE_X509_GET_SIGNATURE_INFO))
 #define HAVE_PGTLS_GET_PEER_CERTIFICATE_HASH
 extern char *pgtls_get_peer_certificate_hash(PGconn *conn, size_t *len);
 #endif
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index 58fbf64d120..648ecab25a7 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -435,6 +435,7 @@ sub GenerateFiles
 		HAVE_WCTYPE_H                            => 1,
 		HAVE_WRITEV                              => undef,
 		HAVE_X509_GET_SIGNATURE_NID              => 1,
+		HAVE_X509_GET_SIGNATURE_INFO             => undef,
 		HAVE_X86_64_POPCNTQ                      => undef,
 		HAVE__BOOL                               => undef,
 		HAVE__BUILTIN_BSWAP16                    => undef,
@@ -550,7 +551,14 @@ sub GenerateFiles
 
 		my ($digit1, $digit2, $digit3) = $self->GetOpenSSLVersion();
 
-		# More symbols are needed with OpenSSL 1.1.0 and above.
+		# Symbols needed with OpenSSL 1.1.1 and above.
+		if (   ($digit1 >= '3' && $digit2 >= '0' && $digit3 >= '0')
+			|| ($digit1 >= '1' && $digit2 >= '1' && $digit3 >= '1'))
+		{
+			$define{HAVE_X509_GET_SIGNATURE_INFO} = 1;
+		}
+
+		# Symbols needed with OpenSSL 1.1.0 and above.
 		if (   ($digit1 >= '3' && $digit2 >= '0' && $digit3 >= '0')
 			|| ($digit1 >= '1' && $digit2 >= '1' && $digit3 >= '0'))
 		{

From 7cdf8fee9464dbd8bc90b32bac46686c74bc2ba4 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 17 Feb 2023 16:40:34 -0500
Subject: [PATCH 05/78] Print the correct aliases for DML target tables in
 ruleutils.

ruleutils.c blindly printed the user-given alias (or nothing if there
hadn't been one) for the target table of INSERT/UPDATE/DELETE queries.
That works a large percentage of the time, but not always: for queries
appearing in WITH, it's possible that we chose a different alias to
avoid conflict with outer-scope names.  Since the chosen alias would
be used in any Var references to the target table, this'd lead to an
inconsistent printout with consequences such as dump/restore failures.

The correct logic for printing (or not) a relation alias was embedded
in get_from_clause_item.  Factor it out to a separate function so that
we don't need a jointree node to use it.  (Only a limited part of that
function can be reached from these new call sites, but this seems like
the cleanest non-duplicative factorization.)

In passing, I got rid of a redundant "\d+ rules_src" step in rules.sql.

Initial report from Jonathan Katz; thanks to Vignesh C for analysis.
This has been broken for a long time, so back-patch to all supported
branches.

Discussion: https://postgr.es/m/e947fa21-24b2-f922-375a-d4f763ef3e4b@postgresql.org
Discussion: https://postgr.es/m/CALDaNm1MMntjmT_NJGp-Z=xbF02qHGAyuSHfYHias3TqQbPF2w@mail.gmail.com
---
 src/backend/utils/adt/ruleutils.c   | 145 ++++++++++++++++------------
 src/test/regress/expected/rules.out |  47 +++++----
 src/test/regress/sql/rules.sql      |  13 ++-
 3 files changed, 126 insertions(+), 79 deletions(-)

diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index bf663afb3c5..dbbb2a70a07 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -481,6 +481,8 @@ static void get_from_clause(Query *query, const char *prefix,
 							deparse_context *context);
 static void get_from_clause_item(Node *jtnode, Query *query,
 								 deparse_context *context);
+static void get_rte_alias(RangeTblEntry *rte, int varno, bool use_as,
+						  deparse_context *context);
 static void get_column_alias_list(deparse_columns *colinfo,
 								  deparse_context *context);
 static void get_from_clause_coldeflist(RangeTblFunction *rtfunc,
@@ -6661,12 +6663,14 @@ get_insert_query_def(Query *query, deparse_context *context,
 		context->indentLevel += PRETTYINDENT_STD;
 		appendStringInfoChar(buf, ' ');
 	}
-	appendStringInfo(buf, "INSERT INTO %s ",
+	appendStringInfo(buf, "INSERT INTO %s",
 					 generate_relation_name(rte->relid, NIL));
-	/* INSERT requires AS keyword for target alias */
-	if (rte->alias != NULL)
-		appendStringInfo(buf, "AS %s ",
-						 quote_identifier(rte->alias->aliasname));
+
+	/* Print the relation alias, if needed; INSERT requires explicit AS */
+	get_rte_alias(rte, query->resultRelation, true, context);
+
+	/* always want a space here */
+	appendStringInfoChar(buf, ' ');
 
 	/*
 	 * Add the insert-column-names list.  Any indirection decoration needed on
@@ -6848,9 +6852,10 @@ get_update_query_def(Query *query, deparse_context *context,
 	appendStringInfo(buf, "UPDATE %s%s",
 					 only_marker(rte),
 					 generate_relation_name(rte->relid, NIL));
-	if (rte->alias != NULL)
-		appendStringInfo(buf, " %s",
-						 quote_identifier(rte->alias->aliasname));
+
+	/* Print the relation alias, if needed */
+	get_rte_alias(rte, query->resultRelation, false, context);
+
 	appendStringInfoString(buf, " SET ");
 
 	/* Deparse targetlist */
@@ -7056,9 +7061,9 @@ get_delete_query_def(Query *query, deparse_context *context,
 	appendStringInfo(buf, "DELETE FROM %s%s",
 					 only_marker(rte),
 					 generate_relation_name(rte->relid, NIL));
-	if (rte->alias != NULL)
-		appendStringInfo(buf, " %s",
-						 quote_identifier(rte->alias->aliasname));
+
+	/* Print the relation alias, if needed */
+	get_rte_alias(rte, query->resultRelation, false, context);
 
 	/* Add the USING clause if given */
 	get_from_clause(query, " USING ", context);
@@ -11231,10 +11236,8 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
 	{
 		int			varno = ((RangeTblRef *) jtnode)->rtindex;
 		RangeTblEntry *rte = rt_fetch(varno, query->rtable);
-		char	   *refname = get_rtable_name(varno, context);
 		deparse_columns *colinfo = deparse_columns_fetch(varno, dpns);
 		RangeTblFunction *rtfunc1 = NULL;
-		bool		printalias;
 
 		if (rte->lateral)
 			appendStringInfoString(buf, "LATERAL ");
@@ -11382,54 +11385,7 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
 		}
 
 		/* Print the relation alias, if needed */
-		printalias = false;
-		if (rte->alias != NULL)
-		{
-			/* Always print alias if user provided one */
-			printalias = true;
-		}
-		else if (colinfo->printaliases)
-		{
-			/* Always print alias if we need to print column aliases */
-			printalias = true;
-		}
-		else if (rte->rtekind == RTE_RELATION)
-		{
-			/*
-			 * No need to print alias if it's same as relation name (this
-			 * would normally be the case, but not if set_rtable_names had to
-			 * resolve a conflict).
-			 */
-			if (strcmp(refname, get_relation_name(rte->relid)) != 0)
-				printalias = true;
-		}
-		else if (rte->rtekind == RTE_FUNCTION || rte->rtekind == RTE_TABLEFUNCTION)
-		{
-			/*
-			 * For a function RTE, always print alias.  This covers possible
-			 * renaming of the function and/or instability of the
-			 * FigureColname rules for things that aren't simple functions.
-			 * Note we'd need to force it anyway for the columndef list case.
-			 */
-			printalias = true;
-		}
-		else if (rte->rtekind == RTE_VALUES)
-		{
-			/* Alias is syntactically required for VALUES */
-			printalias = true;
-		}
-		else if (rte->rtekind == RTE_CTE)
-		{
-			/*
-			 * No need to print alias if it's same as CTE name (this would
-			 * normally be the case, but not if set_rtable_names had to
-			 * resolve a conflict).
-			 */
-			if (strcmp(refname, rte->ctename) != 0)
-				printalias = true;
-		}
-		if (printalias)
-			appendStringInfo(buf, " %s", quote_identifier(refname));
+		get_rte_alias(rte, varno, false, context);
 
 		/* Print the column definitions or aliases, if needed */
 		if (rtfunc1 && rtfunc1->funccolnames != NIL)
@@ -11567,6 +11523,73 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
 			 (int) nodeTag(jtnode));
 }
 
+/*
+ * get_rte_alias - print the relation's alias, if needed
+ *
+ * If printed, the alias is preceded by a space, or by " AS " if use_as is true.
+ */
+static void
+get_rte_alias(RangeTblEntry *rte, int varno, bool use_as,
+			  deparse_context *context)
+{
+	deparse_namespace *dpns = (deparse_namespace *) linitial(context->namespaces);
+	char	   *refname = get_rtable_name(varno, context);
+	deparse_columns *colinfo = deparse_columns_fetch(varno, dpns);
+	bool		printalias = false;
+
+	if (rte->alias != NULL)
+	{
+		/* Always print alias if user provided one */
+		printalias = true;
+	}
+	else if (colinfo->printaliases)
+	{
+		/* Always print alias if we need to print column aliases */
+		printalias = true;
+	}
+	else if (rte->rtekind == RTE_RELATION)
+	{
+		/*
+		 * No need to print alias if it's same as relation name (this would
+		 * normally be the case, but not if set_rtable_names had to resolve a
+		 * conflict).
+		 */
+		if (strcmp(refname, get_relation_name(rte->relid)) != 0)
+			printalias = true;
+	}
+	else if (rte->rtekind == RTE_FUNCTION)
+	{
+		/*
+		 * For a function RTE, always print alias.  This covers possible
+		 * renaming of the function and/or instability of the FigureColname
+		 * rules for things that aren't simple functions.  Note we'd need to
+		 * force it anyway for the columndef list case.
+		 */
+		printalias = true;
+	}
+	else if (rte->rtekind == RTE_SUBQUERY ||
+			 rte->rtekind == RTE_VALUES)
+	{
+		/* Alias is syntactically required for SUBQUERY and VALUES */
+		printalias = true;
+	}
+	else if (rte->rtekind == RTE_CTE)
+	{
+		/*
+		 * No need to print alias if it's same as CTE name (this would
+		 * normally be the case, but not if set_rtable_names had to resolve a
+		 * conflict).
+		 */
+		if (strcmp(refname, rte->ctename) != 0)
+			printalias = true;
+	}
+
+	if (printalias)
+		appendStringInfo(context->buf, "%s%s",
+						 use_as ? " AS " : " ",
+						 quote_identifier(refname));
+}
+
 /*
  * get_column_alias_list - print column alias list for an RTE
  *
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index a2922a0a9ec..a51729af420 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -3171,28 +3171,21 @@ select * from rules_log;
 (16 rows)
 
 create rule r4 as on delete to rules_src do notify rules_src_deletion;
-\d+ rules_src
-                                 Table "public.rules_src"
- Column |  Type   | Collation | Nullable | Default | Storage | Stats target | Description 
---------+---------+-----------+----------+---------+---------+--------------+-------------
- f1     | integer |           |          |         | plain   |              | 
- f2     | integer |           |          | 0       | plain   |              | 
-Rules:
-    r1 AS
-    ON UPDATE TO rules_src DO  INSERT INTO rules_log (f1, f2, tag, id) VALUES (old.f1,old.f2,'old'::text,DEFAULT), (new.f1,new.f2,'new'::text,DEFAULT)
-    r2 AS
-    ON UPDATE TO rules_src DO  VALUES (old.f1,old.f2,'old'::text), (new.f1,new.f2,'new'::text)
-    r3 AS
-    ON INSERT TO rules_src DO  INSERT INTO rules_log (f1, f2, tag, id) VALUES (NULL::integer,NULL::integer,'-'::text,DEFAULT), (new.f1,new.f2,'new'::text,DEFAULT)
-    r4 AS
-    ON DELETE TO rules_src DO
- NOTIFY rules_src_deletion
-
 --
 -- Ensure an aliased target relation for insert is correctly deparsed.
 --
 create rule r5 as on insert to rules_src do instead insert into rules_log AS trgt SELECT NEW.* RETURNING trgt.f1, trgt.f2;
 create rule r6 as on update to rules_src do instead UPDATE rules_log AS trgt SET tag = 'updated' WHERE trgt.f1 = new.f1;
+--
+-- Check deparse disambiguation of INSERT/UPDATE/DELETE targets.
+--
+create rule r7 as on delete to rules_src do instead
+  with wins as (insert into int4_tbl as trgt values (0) returning *),
+       wupd as (update int4_tbl trgt set f1 = f1+1 returning *),
+       wdel as (delete from int4_tbl trgt where f1 = 0 returning *)
+  insert into rules_log AS trgt select old.* from wins, wupd, wdel
+  returning trgt.f1, trgt.f2;
+-- check display of all rules added above
 \d+ rules_src
                                  Table "public.rules_src"
  Column |  Type   | Collation | Nullable | Default | Storage | Stats target | Description 
@@ -3217,6 +3210,26 @@ Rules:
     r6 AS
     ON UPDATE TO rules_src DO INSTEAD  UPDATE rules_log trgt SET tag = 'updated'::text
   WHERE trgt.f1 = new.f1
+    r7 AS
+    ON DELETE TO rules_src DO INSTEAD  WITH wins AS (
+         INSERT INTO int4_tbl AS trgt_1 (f1)
+          VALUES (0)
+          RETURNING trgt_1.f1
+        ), wupd AS (
+         UPDATE int4_tbl trgt_1 SET f1 = trgt_1.f1 + 1
+          RETURNING trgt_1.f1
+        ), wdel AS (
+         DELETE FROM int4_tbl trgt_1
+          WHERE trgt_1.f1 = 0
+          RETURNING trgt_1.f1
+        )
+ INSERT INTO rules_log AS trgt (f1, f2)  SELECT old.f1,
+            old.f2
+           FROM wins,
+            wupd,
+            wdel
+  RETURNING trgt.f1,
+    trgt.f2
 
 --
 -- Also check multiassignment deparsing.
diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql
index 7b0cd28720c..1318482d747 100644
--- a/src/test/regress/sql/rules.sql
+++ b/src/test/regress/sql/rules.sql
@@ -1034,13 +1034,24 @@ insert into rules_src values(22,23), (33,default);
 select * from rules_src;
 select * from rules_log;
 create rule r4 as on delete to rules_src do notify rules_src_deletion;
-\d+ rules_src
 
 --
 -- Ensure an aliased target relation for insert is correctly deparsed.
 --
 create rule r5 as on insert to rules_src do instead insert into rules_log AS trgt SELECT NEW.* RETURNING trgt.f1, trgt.f2;
 create rule r6 as on update to rules_src do instead UPDATE rules_log AS trgt SET tag = 'updated' WHERE trgt.f1 = new.f1;
+
+--
+-- Check deparse disambiguation of INSERT/UPDATE/DELETE targets.
+--
+create rule r7 as on delete to rules_src do instead
+  with wins as (insert into int4_tbl as trgt values (0) returning *),
+       wupd as (update int4_tbl trgt set f1 = f1+1 returning *),
+       wdel as (delete from int4_tbl trgt where f1 = 0 returning *)
+  insert into rules_log AS trgt select old.* from wins, wupd, wdel
+  returning trgt.f1, trgt.f2;
+
+-- check display of all rules added above
 \d+ rules_src
 
 --

From 6ca8ff5e6cfe5a8110fe12e96bf4c4be67dadd20 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Sun, 19 Feb 2023 00:41:18 +0100
Subject: [PATCH 06/78] Fix handling of multi-column BRIN indexes

When evaluating clauses on multiple scan keys of a multi-column BRIN
index, we can stop processing as soon as we find a scan key eliminating
the range, and the range should not be added to tbe bitmap.

That's how it worked before 14, but since a681e3c107a the code treated
the range as matching if it matched at least the last scan key.

Backpatch to 14, where this code was introduced.

Backpatch-through: 14
Discussion: https://postgr.es/m/ebc18613-125e-60df-7520-fcbe0f9274fc%40enterprisedb.com
---
 src/backend/access/brin/brin.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index a64d70bafc7..a6549d9dcbf 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -777,6 +777,13 @@ bringetbitmap(IndexScanDesc scan, Node **bmNodeP)
 								break;
 						}
 					}
+
+					/*
+					 * If we found a scan key eliminating the range, no need to
+					 * check additional ones.
+					 */
+					if (!addrange)
+						break;
 				}
 			}
 		}

From 54a888371ad3b78d6b20e3a2589b7576abdcf842 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Tue, 21 Feb 2023 10:56:37 +0100
Subject: [PATCH 07/78] pgbench: Prepare commands in pipelines in advance

Failing to do so results in an error when a pgbench script tries to
start a serializable transaction inside a pipeline, because by the time
BEGIN ISOLATION LEVEL SERIALIZABLE is executed, we're already in a
transaction that has acquired a snapshot, so the server rightfully
complains.

We can work around that by preparing all commands in the pipeline before
actually starting the pipeline.  This changes the existing code in two
aspects: first, we now prepare each command individually at the point
where that command is about to be executed; previously, we would prepare
all commands in a script as soon as the first command of that script
would be executed.  It's hard to see that this would make much of a
difference (particularly since it only affects the first time to execute
each script in a client), but I didn't actually try to measure it.

Secondly, we no longer use PQsendPrepare() in pipeline mode, but only
PQprepare.  There's no specific reason for this change other than no
longer needing to do differently in pipeline mode.  (Previously we had
no choice, because in pipeline mode PQprepare could not be used.)

Backpatch to 14, where pgbench got support for pipeline mode.

Reported-by: Yugo NAGATA <nagata@sraoss.co.jp>
Discussion: https://postgr.es/m/20210716153013.fc53b1c780b06fccc07a7f0d@sraoss.co.jp
---
 src/bin/pgbench/pgbench.c                    | 162 +++++++++++++------
 src/bin/pgbench/t/001_pgbench_with_server.pl |  20 +++
 2 files changed, 130 insertions(+), 52 deletions(-)

diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index e0dfadcf414..c87bc3dd007 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -481,7 +481,8 @@ typedef struct
 	pg_time_usec_t txn_begin;	/* used for measuring schedule lag times */
 	pg_time_usec_t stmt_begin;	/* used for measuring statement latencies */
 
-	bool		prepared[MAX_SCRIPTS];	/* whether client prepared the script */
+	/* whether client prepared each command of each script */
+	bool	  **prepared;
 
 	/* per client collected stats */
 	int64		cnt;			/* client transaction count, for -t */
@@ -573,7 +574,8 @@ static const char *QUERYMODE[] = {"simple", "extended", "prepared"};
  * argv			Command arguments, the first of which is the command or SQL
  *				string itself.  For SQL commands, after post-processing
  *				argv[0] is the same as 'lines' with variables substituted.
- * varprefix 	SQL commands terminated with \gset or \aset have this set
+ * prepname		The name that this command is prepared under, in prepare mode
+ * varprefix	SQL commands terminated with \gset or \aset have this set
  *				to a non NULL value.  If nonempty, it's used to prefix the
  *				variable name that receives the value.
  * aset			do gset on all possible queries of a combined query (\;).
@@ -588,6 +590,7 @@ typedef struct Command
 	MetaCommand meta;
 	int			argc;
 	char	   *argv[MAX_ARGS];
+	char	   *prepname;
 	char	   *varprefix;
 	PgBenchExpr *expr;
 	SimpleStats stats;
@@ -2836,13 +2839,9 @@ runShellCommand(CState *st, char *variable, char **argv, int argc)
 	return true;
 }
 
-#define MAX_PREPARE_NAME		32
-static void
-preparedStatementName(char *buffer, int file, int state)
-{
-	sprintf(buffer, "P%d_%d", file, state);
-}
-
+/*
+ * Report the abortion of the client when processing SQL commands.
+ */
 static void
 commandFailed(CState *st, const char *cmd, const char *message)
 {
@@ -2869,6 +2868,87 @@ chooseScript(TState *thread)
 	return i - 1;
 }
 
+/*
+ * Prepare the SQL command from st->use_file at command_num.
+ */
+static void
+prepareCommand(CState *st, int command_num)
+{
+	Command    *command = sql_script[st->use_file].commands[command_num];
+
+	/* No prepare for non-SQL commands */
+	if (command->type != SQL_COMMAND)
+		return;
+
+	/*
+	 * If not already done, allocate space for 'prepared' flags: one boolean
+	 * for each command of each script.
+	 */
+	if (!st->prepared)
+	{
+		st->prepared = pg_malloc(sizeof(bool *) * num_scripts);
+		for (int i = 0; i < num_scripts; i++)
+		{
+			ParsedScript *script = &sql_script[i];
+			int			numcmds;
+
+			for (numcmds = 0; script->commands[numcmds] != NULL; numcmds++)
+				;
+			st->prepared[i] = pg_malloc0(sizeof(bool) * numcmds);
+		}
+	}
+
+	if (!st->prepared[st->use_file][command_num])
+	{
+		PGresult   *res;
+
+		pg_log_debug("client %d preparing %s", st->id, command->prepname);
+		res = PQprepare(st->con, command->prepname,
+						command->argv[0], command->argc - 1, NULL);
+		if (PQresultStatus(res) != PGRES_COMMAND_OK)
+			pg_log_error("%s", PQerrorMessage(st->con));
+		PQclear(res);
+		st->prepared[st->use_file][command_num] = true;
+	}
+}
+
+/*
+ * Prepare all the commands in the script that come after the \startpipeline
+ * that's at position st->command, and the first \endpipeline we find.
+ *
+ * This sets the ->prepared flag for each relevant command as well as the
+ * \startpipeline itself, but doesn't move the st->command counter.
+ */
+static void
+prepareCommandsInPipeline(CState *st)
+{
+	int			j;
+	Command   **commands = sql_script[st->use_file].commands;
+
+	Assert(commands[st->command]->type == META_COMMAND &&
+		   commands[st->command]->meta == META_STARTPIPELINE);
+
+	/*
+	 * We set the 'prepared' flag on the \startpipeline itself to flag that we
+	 * don't need to do this next time without calling prepareCommand(), even
+	 * though we don't actually prepare this command.
+	 */
+	if (st->prepared &&
+		st->prepared[st->use_file][st->command])
+		return;
+
+	for (j = st->command + 1; commands[j] != NULL; j++)
+	{
+		if (commands[j]->type == META_COMMAND &&
+			commands[j]->meta == META_ENDPIPELINE)
+			break;
+
+		prepareCommand(st, j);
+	}
+
+	st->prepared[st->use_file][st->command] = true;
+}
+
 /* Send a SQL command, using the chosen querymode */
 static bool
 sendCommand(CState *st, Command *command)
@@ -2899,50 +2979,13 @@ sendCommand(CState *st, Command *command)
 	}
 	else if (querymode == QUERY_PREPARED)
 	{
-		char		name[MAX_PREPARE_NAME];
 		const char *params[MAX_ARGS];
 
-		if (!st->prepared[st->use_file])
-		{
-			int			j;
-			Command   **commands = sql_script[st->use_file].commands;
-
-			for (j = 0; commands[j] != NULL; j++)
-			{
-				PGresult   *res;
-				char		name[MAX_PREPARE_NAME];
-
-				if (commands[j]->type != SQL_COMMAND)
-					continue;
-				preparedStatementName(name, st->use_file, j);
-				if (PQpipelineStatus(st->con) == PQ_PIPELINE_OFF)
-				{
-					res = PQprepare(st->con, name,
-									commands[j]->argv[0], commands[j]->argc - 1, NULL);
-					if (PQresultStatus(res) != PGRES_COMMAND_OK)
-						pg_log_error("%s", PQerrorMessage(st->con));
-					PQclear(res);
-				}
-				else
-				{
-					/*
-					 * In pipeline mode, we use asynchronous functions. If a
-					 * server-side error occurs, it will be processed later
-					 * among the other results.
-					 */
-					if (!PQsendPrepare(st->con, name,
-									   commands[j]->argv[0], commands[j]->argc - 1, NULL))
-						pg_log_error("%s", PQerrorMessage(st->con));
-				}
-			}
-			st->prepared[st->use_file] = true;
-		}
-
+		prepareCommand(st, st->command);
 		getQueryParams(st, command, params);
-		preparedStatementName(name, st->use_file, st->command);
 
-		pg_log_debug("client %d sending %s", st->id, name);
-		r = PQsendQueryPrepared(st->con, name, command->argc - 1,
+		pg_log_debug("client %d sending %s", st->id, command->prepname);
+		r = PQsendQueryPrepared(st->con, command->prepname, command->argc - 1,
 								params, NULL, NULL, 0);
 	}
 	else						/* unknown sql mode */
@@ -3202,7 +3245,8 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 					thread->conn_duration += now - start;
 
 					/* Reset session-local state */
-					memset(st->prepared, 0, sizeof(st->prepared));
+					pg_free(st->prepared);
+					st->prepared = NULL;
 				}
 
 				/* record transaction start time */
@@ -3777,6 +3821,16 @@ executeMetaCommand(CState *st, pg_time_usec_t *now)
 			return CSTATE_ABORTED;
 		}
 
+		/*
+		 * If we're in prepared-query mode, we need to prepare all the
+		 * commands that are inside the pipeline before we actually start the
+		 * pipeline itself.  This solves the problem that running BEGIN
+		 * ISOLATION LEVEL SERIALIZABLE in a pipeline would fail due to a
+		 * snapshot having been acquired by the prepare within the pipeline.
+		 */
+		if (querymode == QUERY_PREPARED)
+			prepareCommandsInPipeline(st);
+
 		if (PQpipelineStatus(st->con) != PQ_PIPELINE_OFF)
 		{
 			commandFailed(st, "startpipeline", "already in pipeline mode");
@@ -4818,6 +4872,7 @@ create_sql_command(PQExpBuffer buf, const char *source)
 	my_command->varprefix = NULL;	/* allocated later, if needed */
 	my_command->expr = NULL;
 	initSimpleStats(&my_command->stats);
+	my_command->prepname = NULL;	/* set later, if needed */
 
 	return my_command;
 }
@@ -4849,6 +4904,7 @@ static void
 postprocess_sql_command(Command *my_command)
 {
 	char		buffer[128];
+	static int	prepnum = 0;
 
 	Assert(my_command->type == SQL_COMMAND);
 
@@ -4857,15 +4913,17 @@ postprocess_sql_command(Command *my_command)
 	buffer[strcspn(buffer, "\n\r")] = '\0';
 	my_command->first_line = pg_strdup(buffer);
 
-	/* parse query if necessary */
+	/* Parse query and generate prepared statement name, if necessary */
 	switch (querymode)
 	{
 		case QUERY_SIMPLE:
 			my_command->argv[0] = my_command->lines.data;
 			my_command->argc++;
 			break;
-		case QUERY_EXTENDED:
 		case QUERY_PREPARED:
+			my_command->prepname = psprintf("P_%d", prepnum++);
+			/* fall through */
+		case QUERY_EXTENDED:
 			if (!parseQuery(my_command))
 				exit(1);
 			break;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 282ccc24aeb..76ecd9efeba 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -841,6 +841,26 @@
 }
 	});
 
+# Working \startpipeline in prepared query mode with serializable
+$node->pgbench(
+	'-c4 -j2 -t 10 -n -M prepared',
+	0,
+	[
+		qr{type: .*/001_pgbench_pipeline_serializable},
+		qr{actually processed: (\d+)/\1}
+	],
+	[],
+	'working \startpipeline with serializable',
+	{
+		'001_pgbench_pipeline_serializable' => q{
+-- test startpipeline with serializable
+\startpipeline
+BEGIN ISOLATION LEVEL SERIALIZABLE;
+} . "select 1;\n" x 10 . q{
+END;
+\endpipeline
+}
+	});
 
 # trigger many expression errors
 my @errors = (

From c36369bcebaebe7f9dd2b6b75abb7a11c20e63f0 Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rasheed@gmail.com>
Date: Wed, 22 Feb 2023 13:26:20 +0000
Subject: [PATCH 08/78] Add missing support for the latest SPI status codes.

SPI_result_code_string() was missing support for SPI_OK_TD_REGISTER,
and in v15 and later, it was missing support for SPI_OK_MERGE, as was
pltcl_process_SPI_result().

The last of those would trigger an error if a MERGE was executed from
PL/Tcl. The others seem fairly innocuous, but worth fixing.

Back-patch to all supported branches. Before v15, this is just adding
SPI_OK_TD_REGISTER to SPI_result_code_string(), which is unlikely to
be seen by anyone, but seems worth doing for completeness.

Reviewed by Tom Lane.

Discussion:
  https://postgr.es/m/CAEZATCUg8V%2BK%2BGcafOPqymxk84Y_prXgfe64PDoopjLFH6Z0Aw%40mail.gmail.com
  https://postgr.es/m/CAEZATCUMe%2B_KedPMM9AxKqm%3DSZogSxjUcrMe%2BsakusZh3BFcQw%40mail.gmail.com
---
 src/backend/executor/spi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 4a2ddd5dff3..8f04c342d5a 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -2084,6 +2084,8 @@ SPI_result_code_string(int code)
 			return "SPI_OK_REL_REGISTER";
 		case SPI_OK_REL_UNREGISTER:
 			return "SPI_OK_REL_UNREGISTER";
+		case SPI_OK_TD_REGISTER:
+			return "SPI_OK_TD_REGISTER";
 	}
 	/* Unrecognized code ... return something useful ... */
 	sprintf(buf, "Unrecognized SPI code %d", code);

From c3aab62960983c392578d42a50446710aea1ba78 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Wed, 22 Feb 2023 15:24:09 +0100
Subject: [PATCH 09/78] Fix snapshot handling in logicalmsg_decode

Whe decoding a transactional logical message, logicalmsg_decode called
SnapBuildGetOrBuildSnapshot. But we may not have a consistent snapshot
yet at that point. We don't actually need the snapshot in this case
(during replay we'll have the snapshot from the transaction), so in
practice this is harmless. But in assert-enabled build this crashes.

Fixed by requesting the snapshot only in non-transactional case, where
we are guaranteed to have SNAPBUILD_CONSISTENT.

Backpatch to 11. The issue exists since 9.6.

Backpatch-through: 11
Reviewed-by: Andres Freund
Discussion: https://postgr.es/m/84d60912-6eab-9b84-5de3-41765a5449e8@enterprisedb.com
---
 src/backend/replication/logical/decode.c        | 14 ++++++++++++--
 src/backend/replication/logical/reorderbuffer.c | 10 ++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 7a6323c3989..755d7ae6d2d 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -572,7 +572,7 @@ logicalmsg_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	TransactionId xid = XLogRecGetXid(r);
 	uint8		info = XLogRecGetInfo(r) & ~XLR_INFO_MASK;
 	RepOriginId origin_id = XLogRecGetOrigin(r);
-	Snapshot	snapshot;
+	Snapshot	snapshot = NULL;
 	xl_logical_message *message;
 
 	if (info != XLOG_LOGICAL_MESSAGE)
@@ -602,7 +602,17 @@ logicalmsg_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 			  SnapBuildXactNeedsSkip(builder, buf->origptr)))
 		return;
 
-	snapshot = SnapBuildGetOrBuildSnapshot(builder, xid);
+	/*
+	 * If this is a non-transactional change, get the snapshot we're expected
+	 * to use. We only get here when the snapshot is consistent, and the
+	 * change is not meant to be skipped.
+	 *
+	 * For transactional changes we don't need a snapshot, we'll use the
+	 * regular snapshot maintained by ReorderBuffer. We just leave it NULL.
+	 */
+	if (!message->transactional)
+		snapshot = SnapBuildGetOrBuildSnapshot(builder, xid);
+
 	ReorderBufferQueueMessage(ctx->reorder, xid, snapshot, buf->endptr,
 							  message->transactional,
 							  message->message, /* first part of message is
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 709365fc8c6..721fa652d25 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -821,6 +821,13 @@ ReorderBufferQueueMessage(ReorderBuffer *rb, TransactionId xid,
 
 		Assert(xid != InvalidTransactionId);
 
+		/*
+		 * We don't expect snapshots for transactional changes - we'll use the
+		 * snapshot derived later during apply (unless the change gets
+		 * skipped).
+		 */
+		Assert(!snapshot);
+
 		oldcontext = MemoryContextSwitchTo(rb->context);
 
 		change = ReorderBufferGetChange(rb);
@@ -839,6 +846,9 @@ ReorderBufferQueueMessage(ReorderBuffer *rb, TransactionId xid,
 		ReorderBufferTXN *txn = NULL;
 		volatile Snapshot snapshot_now = snapshot;
 
+		/* Non-transactional changes require a valid snapshot. */
+		Assert(snapshot_now);
+
 		if (xid != InvalidTransactionId)
 			txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
 

From ead09a5d43bf94d7fc530ad90aec2b36522f2232 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 23 Feb 2023 15:40:28 -0500
Subject: [PATCH 10/78] Don't repeatedly register cache callbacks in pgoutput
 plugin.

Multiple cycles of starting up and shutting down the plugin within a
single session would eventually lead to "out of relcache_callback_list
slots", because pgoutput_startup blindly re-registered its cache
callbacks each time.  Fix it to register them only once, as all other
users of cache callbacks already take care to do.

This has been broken all along, so back-patch to all supported branches.

Shi Yu

Discussion: https://postgr.es/m/OSZPR01MB631004A78D743D68921FFAD3FDA79@OSZPR01MB6310.jpnprd01.prod.outlook.com
---
 src/backend/replication/pgoutput/pgoutput.c | 24 ++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index ff9cf5d406d..df2ea94d468 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -260,6 +260,7 @@ pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
 				 bool is_init)
 {
 	PGOutputData *data = palloc0(sizeof(PGOutputData));
+	static bool publication_callback_registered = false;
 
 	/* Create our memory context for private allocations. */
 	data->context = AllocSetContextCreate(ctx->context,
@@ -323,9 +324,18 @@ pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
 		/* Init publication state. */
 		data->publications = NIL;
 		publications_valid = false;
-		CacheRegisterSyscacheCallback(PUBLICATIONOID,
-									  publication_invalidation_cb,
-									  (Datum) 0);
+
+		/*
+		 * Register callback for pg_publication if we didn't already do that
+		 * during some previous call in this process.
+		 */
+		if (!publication_callback_registered)
+		{
+			CacheRegisterSyscacheCallback(PUBLICATIONOID,
+										  publication_invalidation_cb,
+										  (Datum) 0);
+			publication_callback_registered = true;
+		}
 
 		/* Initialize relation schema cache. */
 		init_rel_sync_cache(CacheMemoryContext);
@@ -948,7 +958,9 @@ static void
 init_rel_sync_cache(MemoryContext cachectx)
 {
 	HASHCTL		ctl;
+	static bool relation_callbacks_registered = false;
 
+	/* Nothing to do if hash table already exists */
 	if (RelationSyncCache != NULL)
 		return;
 
@@ -963,10 +975,16 @@ init_rel_sync_cache(MemoryContext cachectx)
 
 	Assert(RelationSyncCache != NULL);
 
+	/* No more to do if we already registered callbacks */
+	if (relation_callbacks_registered)
+		return;
+
 	CacheRegisterRelcacheCallback(rel_sync_cache_relation_cb, (Datum) 0);
 	CacheRegisterSyscacheCallback(PUBLICATIONRELMAP,
 								  rel_sync_cache_publication_cb,
 								  (Datum) 0);
+
+	relation_callbacks_registered = true;
 }
 
 /*

From cbce817b2639f24de1fc75cf94f599266d4f78cc Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rasheed@gmail.com>
Date: Sat, 25 Feb 2023 14:44:49 +0000
Subject: [PATCH 11/78] Fix mishandling of OLD/NEW references in subqueries in
 rule actions.

If a rule action contains a subquery that refers to columns from OLD
or NEW, then those are really lateral references, and the planner will
complain if it sees such things in a subquery that isn't marked as
lateral. However, at rule-definition time, the user isn't required to
mark the subquery with LATERAL, and so it can fail when the rule is
used.

Fix this by marking such subqueries as lateral in the rewriter, at the
point where they're used.

Dean Rasheed and Tom Lane, per report from Alexander Lakhin.
Back-patch to all supported branches.

Discussion: https://postgr.es/m/5e09da43-aaba-7ea7-0a51-a2eb981b058b%40gmail.com
---
 src/backend/rewrite/rewriteHandler.c | 22 ++++++++++++++++++----
 src/test/regress/expected/rules.out  | 25 +++++++++++++++++++++++++
 src/test/regress/sql/rules.sql       | 17 +++++++++++++++++
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 9670eb5d34a..91ca4cb83d1 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -409,6 +409,7 @@ rewriteRuleAction(Query *parsetree,
 	Query	   *sub_action;
 	Query	  **sub_action_ptr;
 	acquireLocksOnSubLinks_context context;
+	ListCell   *lc;
 
 	context.for_execute = true;
 
@@ -447,6 +448,23 @@ rewriteRuleAction(Query *parsetree,
 	ChangeVarNodes(rule_qual,
 				   PRS2_OLD_VARNO + rt_length, rt_index, 0);
 
+	/*
+	 * Mark any subquery RTEs in the rule action as LATERAL if they contain
+	 * Vars referring to the current query level (references to NEW/OLD).
+	 * Those really are lateral references, but we've historically not
+	 * required users to mark such subqueries with LATERAL explicitly.  But
+	 * the planner will complain if such Vars exist in a non-LATERAL subquery,
+	 * so we have to fix things up here.
+	 */
+	foreach(lc, sub_action->rtable)
+	{
+		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+
+		if (rte->rtekind == RTE_SUBQUERY && !rte->lateral &&
+			contain_vars_of_level((Node *) rte->subquery, 1))
+			rte->lateral = true;
+	}
+
 	/*
 	 * Generate expanded rtable consisting of main parsetree's rtable plus
 	 * rule action's rtable; this becomes the complete rtable for the rule
@@ -488,8 +506,6 @@ rewriteRuleAction(Query *parsetree,
 	 */
 	if (parsetree->hasSubLinks && !sub_action->hasSubLinks)
 	{
-		ListCell   *lc;
-
 		foreach(lc, parsetree->rtable)
 		{
 			RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
@@ -591,8 +607,6 @@ rewriteRuleAction(Query *parsetree,
 	 */
 	if (parsetree->cteList != NIL && sub_action->commandType != CMD_UTILITY)
 	{
-		ListCell   *lc;
-
 		/*
 		 * Annoying implementation restriction: because CTEs are identified by
 		 * name within a cteList, we can't merge a CTE from the original query
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index a51729af420..77fbb582cab 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -3256,6 +3256,31 @@ Rules:
 
 drop table rule_t1, rule_dest;
 --
+-- Test implicit LATERAL references to old/new in rules
+--
+CREATE TABLE rule_t1(a int, b text DEFAULT 'xxx', c int);
+CREATE VIEW rule_v1 AS SELECT * FROM rule_t1;
+CREATE RULE v1_ins AS ON INSERT TO rule_v1
+  DO ALSO INSERT INTO rule_t1
+  SELECT * FROM (SELECT a + 10 FROM rule_t1 WHERE a = NEW.a) tt;
+CREATE RULE v1_upd AS ON UPDATE TO rule_v1
+  DO ALSO UPDATE rule_t1 t
+  SET c = tt.a * 10
+  FROM (SELECT a FROM rule_t1 WHERE a = OLD.a) tt WHERE t.a = tt.a;
+INSERT INTO rule_v1 VALUES (1, 'a'), (2, 'b');
+UPDATE rule_v1 SET b = upper(b);
+SELECT * FROM rule_t1;
+ a  |  b  |  c  
+----+-----+-----
+  1 | A   |  10
+  2 | B   |  20
+ 11 | XXX | 110
+ 12 | XXX | 120
+(4 rows)
+
+DROP TABLE rule_t1 CASCADE;
+NOTICE:  drop cascades to view rule_v1
+--
 -- check alter rename rule
 --
 CREATE TABLE rule_t1 (a INT);
diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql
index 1318482d747..90b37af2097 100644
--- a/src/test/regress/sql/rules.sql
+++ b/src/test/regress/sql/rules.sql
@@ -1065,6 +1065,23 @@ create rule rr as on update to rule_t1 do instead UPDATE rule_dest trgt
 \d+ rule_t1
 drop table rule_t1, rule_dest;
 
+--
+-- Test implicit LATERAL references to old/new in rules
+--
+CREATE TABLE rule_t1(a int, b text DEFAULT 'xxx', c int);
+CREATE VIEW rule_v1 AS SELECT * FROM rule_t1;
+CREATE RULE v1_ins AS ON INSERT TO rule_v1
+  DO ALSO INSERT INTO rule_t1
+  SELECT * FROM (SELECT a + 10 FROM rule_t1 WHERE a = NEW.a) tt;
+CREATE RULE v1_upd AS ON UPDATE TO rule_v1
+  DO ALSO UPDATE rule_t1 t
+  SET c = tt.a * 10
+  FROM (SELECT a FROM rule_t1 WHERE a = OLD.a) tt WHERE t.a = tt.a;
+INSERT INTO rule_v1 VALUES (1, 'a'), (2, 'b');
+UPDATE rule_v1 SET b = upper(b);
+SELECT * FROM rule_t1;
+DROP TABLE rule_t1 CASCADE;
+
 --
 -- check alter rename rule
 --

From 7edfa9df87ad5c74bd8ca84c8de9bdae45124881 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Sun, 26 Feb 2023 06:48:41 -0500
Subject: [PATCH 12/78] Don't force SQL_ASCII/no-locale for installcheck in
 vcregress.pl

It's been this way for a very long time, but it appears to have been
masking an issue that only manifests with different settings. Therefore,
run the tests in the installation's default encoding/locale.

Backpatch to all live branches.
---
 src/tools/msvc/vcregress.pl | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/tools/msvc/vcregress.pl b/src/tools/msvc/vcregress.pl
index f19b0a4f755..0dc8e0519e6 100644
--- a/src/tools/msvc/vcregress.pl
+++ b/src/tools/msvc/vcregress.pl
@@ -163,9 +163,7 @@ sub installcheck_internal
 		"--bindir=../../../$Config/psql",
 		"--schedule=${schedule}_schedule",
 		"--max-concurrent-tests=20",
-		"--make-testtablespace-dir",
-		"--encoding=SQL_ASCII",
-		"--no-locale");
+		"--make-testtablespace-dir");
 	push(@args, $maxconn) if $maxconn;
 	push(@args, @EXTRA_REGRESS_OPTS);
 	system(@args);

From 40b493c8f32543709695ef78a1950333a91437b4 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 27 Feb 2023 16:29:51 -0500
Subject: [PATCH 13/78] Harden postgres_fdw tests against unexpected cache
 flushes.

postgres_fdw will close its remote session if an sinval cache reset
occurs, since it's possible that that means some FDW parameters
changed.  We had two tests that were trying to ensure that the
session remains alive by setting debug_discard_caches = 0; but
that's not sufficient.  Even though the tests seem stable enough
in the buildfarm, they flap a lot under CI.

In the first test, which is checking the ability to recover from
a lost connection, we can stabilize the results by just not
caring whether pg_terminate_backend() finds a victim backend.
If a reset did happen, there won't be a session to terminate
anymore, but the test can proceed anyway.  (Arguably, we are
then not testing the unintentional-disconnect case, but as long
as that scenario is exercised in most runs I think it's fine;
testing the reset-driven case is of value too.)

In the second test, which is trying to verify the application_name
displayed in pg_stat_activity by a remote session, we had a race
condition in that the remote session might go away before we can
fetch its pg_stat_activity entry.  We can close that race and make
the test more certainly test what it intends to by arranging things
so that the remote session itself fetches its pg_stat_activity entry
(based on PID rather than a somewhat-circular assumption about the
application name).

Both tests now demonstrably pass under debug_discard_caches = 1,
so we can remove that hack.

Back-patch into relevant back branches.

Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
---
 .../postgres_fdw/expected/postgres_fdw.out    | 26 ++++++-------------
 contrib/postgres_fdw/sql/postgres_fdw.sql     | 18 ++++++-------
 2 files changed, 16 insertions(+), 28 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 67fde96a858..10700d6fd4a 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -9730,11 +9730,6 @@ WARNING:  there is no transaction in progress
 -- Change application_name of remote connection to special one
 -- so that we can easily terminate the connection later.
 ALTER SERVER loopback OPTIONS (application_name 'fdw_retry_check');
--- If debug_discard_caches is active, it results in
--- dropping remote connections after every transaction, making it
--- impossible to test termination meaningfully.  So turn that off
--- for this test.
-SET debug_discard_caches = 0;
 -- Make sure we have a remote connection.
 SELECT 1 FROM ft1 LIMIT 1;
  ?column? 
@@ -9743,13 +9738,12 @@ SELECT 1 FROM ft1 LIMIT 1;
 (1 row)
 
 -- Terminate the remote connection and wait for the termination to complete.
-SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity
+-- (If a cache flush happens, the remote connection might have already been
+-- dropped; so code this step in a way that doesn't fail if no connection.)
+DO $$ BEGIN
+PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity
 	WHERE application_name = 'fdw_retry_check';
- pg_terminate_backend 
-----------------------
- t
-(1 row)
-
+END $$;
 -- This query should detect the broken connection when starting new remote
 -- transaction, reestablish new connection, and then succeed.
 BEGIN;
@@ -9762,13 +9756,10 @@ SELECT 1 FROM ft1 LIMIT 1;
 -- If we detect the broken connection when starting a new remote
 -- subtransaction, we should fail instead of establishing a new connection.
 -- Terminate the remote connection and wait for the termination to complete.
-SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity
+DO $$ BEGIN
+PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity
 	WHERE application_name = 'fdw_retry_check';
- pg_terminate_backend 
-----------------------
- t
-(1 row)
-
+END $$;
 SAVEPOINT s;
 -- The text of the error might vary across platforms, so only show SQLSTATE.
 \set VERBOSITY sqlstate
@@ -9776,7 +9767,6 @@ SELECT 1 FROM ft1 LIMIT 1;    -- should fail
 ERROR:  08006
 \set VERBOSITY default
 COMMIT;
-RESET debug_discard_caches;
 -- =============================================================================
 -- test connection invalidation cases and postgres_fdw_get_connections function
 -- =============================================================================
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index f8c813d2175..793dd64811d 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2912,18 +2912,16 @@ ROLLBACK;
 -- so that we can easily terminate the connection later.
 ALTER SERVER loopback OPTIONS (application_name 'fdw_retry_check');
 
--- If debug_discard_caches is active, it results in
--- dropping remote connections after every transaction, making it
--- impossible to test termination meaningfully.  So turn that off
--- for this test.
-SET debug_discard_caches = 0;
-
 -- Make sure we have a remote connection.
 SELECT 1 FROM ft1 LIMIT 1;
 
 -- Terminate the remote connection and wait for the termination to complete.
-SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity
+-- (If a cache flush happens, the remote connection might have already been
+-- dropped; so code this step in a way that doesn't fail if no connection.)
+DO $$ BEGIN
+PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity
 	WHERE application_name = 'fdw_retry_check';
+END $$;
 
 -- This query should detect the broken connection when starting new remote
 -- transaction, reestablish new connection, and then succeed.
@@ -2933,8 +2931,10 @@ SELECT 1 FROM ft1 LIMIT 1;
 -- If we detect the broken connection when starting a new remote
 -- subtransaction, we should fail instead of establishing a new connection.
 -- Terminate the remote connection and wait for the termination to complete.
-SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity
+DO $$ BEGIN
+PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity
 	WHERE application_name = 'fdw_retry_check';
+END $$;
 SAVEPOINT s;
 -- The text of the error might vary across platforms, so only show SQLSTATE.
 \set VERBOSITY sqlstate
@@ -2942,8 +2942,6 @@ SELECT 1 FROM ft1 LIMIT 1;    -- should fail
 \set VERBOSITY default
 COMMIT;
 
-RESET debug_discard_caches;
-
 -- =============================================================================
 -- test connection invalidation cases and postgres_fdw_get_connections function
 -- =============================================================================

From 8761ea5bcf25f126236beef9e4cb094aa57f78a2 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 1 Mar 2023 11:30:17 -0500
Subject: [PATCH 14/78] Avoid fetching one past the end of translate()'s "to"
 parameter.

This is usually harmless, but if you were very unlucky it could
provoke a segfault due to the "to" string being right up against
the end of memory.  Found via valgrind testing (so we might've
found it earlier, except that our regression tests lacked any
exercise of translate()'s deletion feature).

Fix by switching the order of the test-for-end-of-string and
advance-pointer steps.  While here, compute "to_ptr + tolen"
just once.  (Smarter compilers might figure that out for
themselves, but let's just make sure.)

Report and fix by Daniil Anisimov, in bug #17816.

Discussion: https://postgr.es/m/17816-70f3d2764e88a108@postgresql.org
---
 src/backend/utils/adt/oracle_compat.c | 12 +++++++-----
 src/test/regress/expected/strings.out |  6 ++++++
 src/test/regress/sql/strings.sql      |  1 +
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
index f737aa6fbde..bd9e5f9e243 100644
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -797,7 +797,8 @@ translate(PG_FUNCTION_ARGS)
 	text	   *to = PG_GETARG_TEXT_PP(2);
 	text	   *result;
 	char	   *from_ptr,
-			   *to_ptr;
+			   *to_ptr,
+			   *to_end;
 	char	   *source,
 			   *target;
 	int			m,
@@ -819,6 +820,7 @@ translate(PG_FUNCTION_ARGS)
 	from_ptr = VARDATA_ANY(from);
 	tolen = VARSIZE_ANY_EXHDR(to);
 	to_ptr = VARDATA_ANY(to);
+	to_end = to_ptr + tolen;
 
 	/*
 	 * The worst-case expansion is to substitute a max-length character for a
@@ -852,16 +854,16 @@ translate(PG_FUNCTION_ARGS)
 		}
 		if (i < fromlen)
 		{
-			/* substitute */
+			/* substitute, or delete if no corresponding "to" character */
 			char	   *p = to_ptr;
 
 			for (i = 0; i < from_index; i++)
 			{
-				p += pg_mblen(p);
-				if (p >= (to_ptr + tolen))
+				if (p >= to_end)
 					break;
+				p += pg_mblen(p);
 			}
-			if (p < (to_ptr + tolen))
+			if (p < to_end)
 			{
 				len = pg_mblen(p);
 				memcpy(target, p, len);
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 1745ca9ca68..d1485df7063 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2256,6 +2256,12 @@ SELECT translate('12345', '14', 'ax');
  a23x5
 (1 row)
 
+SELECT translate('12345', '134', 'a');
+ translate 
+-----------
+ a25
+(1 row)
+
 SELECT ascii('x');
  ascii 
 -------
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index c53727f68d3..3c438a304ac 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -770,6 +770,7 @@ SELECT ltrim('zzzytrim', 'xyz');
 
 SELECT translate('', '14', 'ax');
 SELECT translate('12345', '14', 'ax');
+SELECT translate('12345', '134', 'a');
 
 SELECT ascii('x');
 SELECT ascii('');

From bee87f87674e7c1f3d6cd0600a16344536062f29 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Thu, 2 Mar 2023 14:03:21 +0900
Subject: [PATCH 15/78] pageinspect: Fix crash with gist_page_items()

Attempting to use this function with a raw page not coming from a GiST
index would cause a crash, as it was missing the same sanity checks as
gist_page_items_bytea().  This slightly refactors the code so as all the
basic validation checks for GiST pages are done in a single routine,
in the same fashion as the pageinspect functions for hash and BRIN.

This fixes an issue similar to 076f4d9.  A test is added to stress for
this case.  While on it, I have added a similar test for
brin_page_items() with a combination make of a valid GiST index and a
raw btree page.  This one was already protected, but it was not tested.

Reported-by: Egor Chindyaskin
Author: Dmitry Koval
Discussion: https://postgr.es/m/17815-fc4a2d3b74705703@postgresql.org
Backpatch-through: 14
---
 contrib/pageinspect/expected/brin.out |  8 ++-
 contrib/pageinspect/expected/gist.out | 10 ++--
 contrib/pageinspect/gistfuncs.c       | 82 +++++++++++++--------------
 contrib/pageinspect/sql/brin.sql      |  8 ++-
 contrib/pageinspect/sql/gist.sql      | 10 ++--
 5 files changed, 62 insertions(+), 56 deletions(-)

diff --git a/contrib/pageinspect/expected/brin.out b/contrib/pageinspect/expected/brin.out
index d19cdc3b957..e12fbeb4774 100644
--- a/contrib/pageinspect/expected/brin.out
+++ b/contrib/pageinspect/expected/brin.out
@@ -48,12 +48,14 @@ SELECT * FROM brin_page_items(get_raw_page('test1_a_idx', 2), 'test1_a_idx')
           1 |      0 |      1 | f        | f        | f           | {1 .. 1}
 (1 row)
 
--- Failure for non-BRIN index.
+-- Mask DETAIL messages as these are not portable across architectures.
+\set VERBOSITY terse
+-- Failures for non-BRIN index.
 CREATE INDEX test1_a_btree ON test1 (a);
 SELECT brin_page_items(get_raw_page('test1_a_btree', 0), 'test1_a_btree');
 ERROR:  "test1_a_btree" is not a BRIN index
--- Mask DETAIL messages as these are not portable across architectures.
-\set VERBOSITY terse
+SELECT brin_page_items(get_raw_page('test1_a_btree', 0), 'test1_a_idx');
+ERROR:  input page is not a valid BRIN page
 -- Invalid special area size
 SELECT brin_page_type(get_raw_page('test1', 0));
 ERROR:  input page is not a valid BRIN page
diff --git a/contrib/pageinspect/expected/gist.out b/contrib/pageinspect/expected/gist.out
index eec1fd91cb9..cae739219bd 100644
--- a/contrib/pageinspect/expected/gist.out
+++ b/contrib/pageinspect/expected/gist.out
@@ -56,14 +56,16 @@ SELECT itemoffset, ctid, itemlen FROM gist_page_items_bytea(get_raw_page('test_g
           2 | (2,65535) |      40
 (2 rows)
 
--- Failure with non-GiST index.
+-- Suppress the DETAIL message, to allow the tests to work across various
+-- page sizes and architectures.
+\set VERBOSITY terse
+-- Failures with non-GiST index.
 CREATE INDEX test_gist_btree on test_gist(t);
 SELECT gist_page_items(get_raw_page('test_gist_btree', 0), 'test_gist_btree');
 ERROR:  "test_gist_btree" is not a GiST index
+SELECT gist_page_items(get_raw_page('test_gist_btree', 0), 'test_gist_idx');
+ERROR:  input page is not a valid GiST page
 -- Failure with various modes.
--- Suppress the DETAIL message, to allow the tests to work across various
--- page sizes and architectures.
-\set VERBOSITY terse
 -- invalid page size
 SELECT gist_page_items_bytea('aaa'::bytea);
 ERROR:  invalid page size
diff --git a/contrib/pageinspect/gistfuncs.c b/contrib/pageinspect/gistfuncs.c
index d1c3c321f83..0ae8f7459c1 100644
--- a/contrib/pageinspect/gistfuncs.c
+++ b/contrib/pageinspect/gistfuncs.c
@@ -34,29 +34,20 @@ PG_FUNCTION_INFO_V1(gist_page_items_bytea);
 #define ItemPointerGetDatum(X)	 PointerGetDatum(X)
 
 
-Datum
-gist_page_opaque_info(PG_FUNCTION_ARGS)
+static Page verify_gist_page(bytea *raw_page);
+
+/*
+ * Verify that the given bytea contains a GIST page or die in the attempt.
+ * A pointer to the page is returned.
+ */
+static Page
+verify_gist_page(bytea *raw_page)
 {
-	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
-	TupleDesc	tupdesc;
-	Page		page;
+	Page		page = get_page_from_raw(raw_page);
 	GISTPageOpaque opaq;
-	HeapTuple	resultTuple;
-	Datum		values[4];
-	bool		nulls[4];
-	Datum		flags[16];
-	int			nflags = 0;
-	uint16		flagbits;
-
-	if (!superuser())
-		ereport(ERROR,
-				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
-				 errmsg("must be superuser to use raw page functions")));
-
-	page = get_page_from_raw(raw_page);
 
 	if (PageIsNew(page))
-		PG_RETURN_NULL();
+		return page;
 
 	/* verify the special space has the expected size */
 	if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
@@ -76,12 +67,38 @@ gist_page_opaque_info(PG_FUNCTION_ARGS)
 							   GIST_PAGE_ID,
 							   opaq->gist_page_id)));
 
+	return page;
+}
+
+Datum
+gist_page_opaque_info(PG_FUNCTION_ARGS)
+{
+	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+	TupleDesc	tupdesc;
+	Page		page;
+	HeapTuple	resultTuple;
+	Datum		values[4];
+	bool		nulls[4];
+	Datum		flags[16];
+	int			nflags = 0;
+	uint16		flagbits;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to use raw page functions")));
+
+	page = verify_gist_page(raw_page);
+
+	if (PageIsNew(page))
+		PG_RETURN_NULL();
+
 	/* Build a tuple descriptor for our result type */
 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
 		elog(ERROR, "return type must be a row type");
 
 	/* Convert the flags bitmask to an array of human-readable names */
-	flagbits = opaq->flags;
+	flagbits = GistPageGetOpaque(page)->flags;
 	if (flagbits & F_LEAF)
 		flags[nflags++] = CStringGetTextDatum("leaf");
 	if (flagbits & F_DELETED)
@@ -103,7 +120,7 @@ gist_page_opaque_info(PG_FUNCTION_ARGS)
 
 	values[0] = LSNGetDatum(PageGetLSN(page));
 	values[1] = LSNGetDatum(GistPageGetNSN(page));
-	values[2] = Int64GetDatum(opaq->rightlink);
+	values[2] = Int64GetDatum(GistPageGetOpaque(page)->rightlink);
 	values[3] = PointerGetDatum(construct_array(flags, nflags,
 												TEXTOID,
 												-1, false, TYPALIGN_INT));
@@ -124,7 +141,6 @@ gist_page_items_bytea(PG_FUNCTION_ARGS)
 	Tuplestorestate *tupstore;
 	MemoryContext oldcontext;
 	Page		page;
-	GISTPageOpaque opaq;
 	OffsetNumber offset;
 	OffsetNumber maxoff = InvalidOffsetNumber;
 
@@ -157,29 +173,11 @@ gist_page_items_bytea(PG_FUNCTION_ARGS)
 
 	MemoryContextSwitchTo(oldcontext);
 
-	page = get_page_from_raw(raw_page);
+	page = verify_gist_page(raw_page);
 
 	if (PageIsNew(page))
 		PG_RETURN_NULL();
 
-	/* verify the special space has the expected size */
-	if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-					 errmsg("input page is not a valid %s page", "GiST"),
-					 errdetail("Expected special size %d, got %d.",
-							   (int) MAXALIGN(sizeof(GISTPageOpaqueData)),
-							   (int) PageGetSpecialSize(page))));
-
-	opaq = (GISTPageOpaque) PageGetSpecialPointer(page);
-	if (opaq->gist_page_id != GIST_PAGE_ID)
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-					 errmsg("input page is not a valid %s page", "GiST"),
-					 errdetail("Expected %08x, got %08x.",
-							   GIST_PAGE_ID,
-							   opaq->gist_page_id)));
-
 	/* Avoid bogus PageGetMaxOffsetNumber() call with deleted pages */
 	if (GistPageIsDeleted(page))
 		elog(NOTICE, "page is deleted");
@@ -276,7 +274,7 @@ gist_page_items(PG_FUNCTION_ARGS)
 				 errmsg("\"%s\" is not a %s index",
 						RelationGetRelationName(indexRel), "GiST")));
 
-	page = get_page_from_raw(raw_page);
+	page = verify_gist_page(raw_page);
 
 	if (PageIsNew(page))
 	{
diff --git a/contrib/pageinspect/sql/brin.sql b/contrib/pageinspect/sql/brin.sql
index 45098c1ef5e..96b4645187e 100644
--- a/contrib/pageinspect/sql/brin.sql
+++ b/contrib/pageinspect/sql/brin.sql
@@ -15,12 +15,14 @@ SELECT * FROM brin_revmap_data(get_raw_page('test1_a_idx', 1)) LIMIT 5;
 SELECT * FROM brin_page_items(get_raw_page('test1_a_idx', 2), 'test1_a_idx')
     ORDER BY blknum, attnum LIMIT 5;
 
--- Failure for non-BRIN index.
+-- Mask DETAIL messages as these are not portable across architectures.
+\set VERBOSITY terse
+
+-- Failures for non-BRIN index.
 CREATE INDEX test1_a_btree ON test1 (a);
 SELECT brin_page_items(get_raw_page('test1_a_btree', 0), 'test1_a_btree');
+SELECT brin_page_items(get_raw_page('test1_a_btree', 0), 'test1_a_idx');
 
--- Mask DETAIL messages as these are not portable across architectures.
-\set VERBOSITY terse
 -- Invalid special area size
 SELECT brin_page_type(get_raw_page('test1', 0));
 SELECT * FROM brin_metapage_info(get_raw_page('test1', 0));
diff --git a/contrib/pageinspect/sql/gist.sql b/contrib/pageinspect/sql/gist.sql
index ee46e09053e..963d5d40a3c 100644
--- a/contrib/pageinspect/sql/gist.sql
+++ b/contrib/pageinspect/sql/gist.sql
@@ -26,14 +26,16 @@ SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 1), 'test_gist_idx')
 -- platform-dependent (endianess), so omit the actual key data from the output.
 SELECT itemoffset, ctid, itemlen FROM gist_page_items_bytea(get_raw_page('test_gist_idx', 0));
 
--- Failure with non-GiST index.
+-- Suppress the DETAIL message, to allow the tests to work across various
+-- page sizes and architectures.
+\set VERBOSITY terse
+
+-- Failures with non-GiST index.
 CREATE INDEX test_gist_btree on test_gist(t);
 SELECT gist_page_items(get_raw_page('test_gist_btree', 0), 'test_gist_btree');
+SELECT gist_page_items(get_raw_page('test_gist_btree', 0), 'test_gist_idx');
 
 -- Failure with various modes.
--- Suppress the DETAIL message, to allow the tests to work across various
--- page sizes and architectures.
-\set VERBOSITY terse
 -- invalid page size
 SELECT gist_page_items_bytea('aaa'::bytea);
 SELECT gist_page_items('aaa'::bytea, 'test_gist_idx'::regclass);

From 1b0d060a6ebf872956c67d345f47fa6cf220e9aa Mon Sep 17 00:00:00 2001
From: Thomas Munro <tmunro@postgresql.org>
Date: Mon, 6 Mar 2023 15:07:15 +1300
Subject: [PATCH 16/78] Fix assert failures in parallel SERIALIZABLE READ ONLY.

1.  Make sure that we don't decrement SxactGlobalXminCount twice when
the SXACT_FLAG_RO_SAFE optimization is reached in a parallel query.
This could trigger a sanity check failure in assert builds.  Non-assert
builds recompute the count in SetNewSxactGlobalXmin(), so the problem
was hidden, explaining the lack of field reports.  Add a new isolation
test to exercise that case.

2.  Remove an assertion that the DOOMED flag can't be set on a partially
released SERIALIZABLEXACT.  Instead, ignore the flag (our transaction
was already determined to be read-only safe, and DOOMED is in fact set
during partial release, and there was already an assertion that it
wasn't set sooner).  Improve an existing isolation test so that it
reaches that case (previously it wasn't quite testing what it was
supposed to be testing; see discussion).

Back-patch to 12.  Bug #17116.  Defects in commit 47a338cf.

Reported-by: Alexander Lakhin <exclusion@gmail.com>
Discussion: https://postgr.es/m/17116-d6ca217acc180e30%40postgresql.org
---
 src/backend/storage/lmgr/predicate.c          | 20 +++-
 .../expected/serializable-parallel-2.out      | 57 +++--------
 .../expected/serializable-parallel-3.out      | 97 +++++++++++++++++++
 src/test/isolation/isolation_schedule         |  1 +
 .../specs/serializable-parallel-2.spec        | 12 ++-
 .../specs/serializable-parallel-3.spec        | 47 +++++++++
 6 files changed, 184 insertions(+), 50 deletions(-)
 create mode 100644 src/test/isolation/expected/serializable-parallel-3.out
 create mode 100644 src/test/isolation/specs/serializable-parallel-3.spec

diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index f5668bdb4ff..3b0daf723b6 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -3331,6 +3331,7 @@ SetNewSxactGlobalXmin(void)
 void
 ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe)
 {
+	bool		partiallyReleasing = false;
 	bool		needToClear;
 	RWConflict	conflict,
 				nextConflict,
@@ -3431,6 +3432,7 @@ ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe)
 		else
 		{
 			MySerializableXact->flags |= SXACT_FLAG_PARTIALLY_RELEASED;
+			partiallyReleasing = true;
 			/* ... and proceed to perform the partial release below. */
 		}
 	}
@@ -3681,9 +3683,15 @@ ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe)
 	 * serializable transactions completes.  We then find the "new oldest"
 	 * xmin and purge any transactions which finished before this transaction
 	 * was launched.
+	 *
+	 * For parallel queries in read-only transactions, it might run twice.
+	 * We only release the reference on the first call.
 	 */
 	needToClear = false;
-	if (TransactionIdEquals(MySerializableXact->xmin, PredXact->SxactGlobalXmin))
+	if ((partiallyReleasing ||
+		 !SxactIsPartiallyReleased(MySerializableXact)) &&
+		TransactionIdEquals(MySerializableXact->xmin,
+							PredXact->SxactGlobalXmin))
 	{
 		Assert(PredXact->SxactGlobalXminCount > 0);
 		if (--(PredXact->SxactGlobalXminCount) == 0)
@@ -4839,10 +4847,14 @@ PreCommit_CheckForSerializationFailure(void)
 
 	LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
 
-	/* Check if someone else has already decided that we need to die */
-	if (SxactIsDoomed(MySerializableXact))
+	/*
+	 * Check if someone else has already decided that we need to die.  Since
+	 * we set our own DOOMED flag when partially releasing, ignore in that
+	 * case.
+	 */
+	if (SxactIsDoomed(MySerializableXact) &&
+		!SxactIsPartiallyReleased(MySerializableXact))
 	{
-		Assert(!SxactIsPartiallyReleased(MySerializableXact));
 		LWLockRelease(SerializableXactHashLock);
 		ereport(ERROR,
 				(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
diff --git a/src/test/isolation/expected/serializable-parallel-2.out b/src/test/isolation/expected/serializable-parallel-2.out
index 92753ccf39f..904fdd90806 100644
--- a/src/test/isolation/expected/serializable-parallel-2.out
+++ b/src/test/isolation/expected/serializable-parallel-2.out
@@ -1,50 +1,23 @@
 Parsed test spec with 2 sessions
 
 starting permutation: s1r s2r1 s1c s2r2 s2c
-step s1r: SELECT * FROM foo;
- a
---
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
-10
-(10 rows)
+step s1r: SELECT COUNT(*) FROM foo;
+count
+-----
+  100
+(1 row)
 
-step s2r1: SELECT * FROM foo;
- a
---
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
-10
-(10 rows)
+step s2r1: SELECT COUNT(*) FROM foo;
+count
+-----
+  100
+(1 row)
 
 step s1c: COMMIT;
-step s2r2: SELECT * FROM foo;
- a
---
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
-10
-(10 rows)
+step s2r2: SELECT COUNT(*) FROM foo;
+count
+-----
+  100
+(1 row)
 
 step s2c: COMMIT;
diff --git a/src/test/isolation/expected/serializable-parallel-3.out b/src/test/isolation/expected/serializable-parallel-3.out
new file mode 100644
index 00000000000..654276a3856
--- /dev/null
+++ b/src/test/isolation/expected/serializable-parallel-3.out
@@ -0,0 +1,97 @@
+Parsed test spec with 4 sessions
+
+starting permutation: s1r s3r s2r1 s4r1 s1c s2r2 s3c s4r2 s4c s2c
+step s1r: SELECT * FROM foo;
+ a
+--
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+(10 rows)
+
+step s3r: SELECT * FROM foo;
+ a
+--
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+(10 rows)
+
+step s2r1: SELECT * FROM foo;
+ a
+--
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+(10 rows)
+
+step s4r1: SELECT * FROM foo;
+ a
+--
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+(10 rows)
+
+step s1c: COMMIT;
+step s2r2: SELECT * FROM foo;
+ a
+--
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+(10 rows)
+
+step s3c: COMMIT;
+step s4r2: SELECT * FROM foo;
+ a
+--
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+(10 rows)
+
+step s4c: COMMIT;
+step s2c: COMMIT;
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 9122028e15d..3f12f923c02 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -146,6 +146,7 @@ test: plpgsql-toast
 test: truncate-conflict
 #test: serializable-parallel
 #test: serializable-parallel-2
+#test: serializable-parallel-3
 
 #test: prepared-transactions
 
diff --git a/src/test/isolation/specs/serializable-parallel-2.spec b/src/test/isolation/specs/serializable-parallel-2.spec
index f3941f78631..c975d96d772 100644
--- a/src/test/isolation/specs/serializable-parallel-2.spec
+++ b/src/test/isolation/specs/serializable-parallel-2.spec
@@ -3,7 +3,8 @@
 
 setup
 {
-	CREATE TABLE foo AS SELECT generate_series(1, 10)::int a;
+	CREATE TABLE foo AS SELECT generate_series(1, 100)::int a;
+	CREATE INDEX ON foo(a);
 	ALTER TABLE foo SET (parallel_workers = 2);
 }
 
@@ -14,7 +15,7 @@ teardown
 
 session s1
 setup 		{ BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; }
-step s1r	{ SELECT * FROM foo; }
+step s1r	{ SELECT COUNT(*) FROM foo; }
 step s1c 	{ COMMIT; }
 
 session s2
@@ -22,9 +23,12 @@ setup		{
 			  BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY;
 			  SET parallel_setup_cost = 0;
 			  SET parallel_tuple_cost = 0;
+			  SET min_parallel_index_scan_size = 0;
+			  SET parallel_leader_participation = off;
+			  SET enable_seqscan = off;
 			}
-step s2r1	{ SELECT * FROM foo; }
-step s2r2	{ SELECT * FROM foo; }
+step s2r1	{ SELECT COUNT(*) FROM foo; }
+step s2r2	{ SELECT COUNT(*) FROM foo; }
 step s2c	{ COMMIT; }
 
 permutation s1r s2r1 s1c s2r2 s2c
diff --git a/src/test/isolation/specs/serializable-parallel-3.spec b/src/test/isolation/specs/serializable-parallel-3.spec
new file mode 100644
index 00000000000..c27298c24ff
--- /dev/null
+++ b/src/test/isolation/specs/serializable-parallel-3.spec
@@ -0,0 +1,47 @@
+# Exercise the case where a read-only serializable transaction has
+# SXACT_FLAG_RO_SAFE set in a parallel query.  This variant is like
+# two copies of #2 running at the same time, and excercises the case
+# where another transaction has the same xmin, and it is the oldest.
+
+setup
+{
+	CREATE TABLE foo AS SELECT generate_series(1, 10)::int a;
+	ALTER TABLE foo SET (parallel_workers = 2);
+}
+
+teardown
+{
+	DROP TABLE foo;
+}
+
+session s1
+setup 		{ BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; }
+step s1r	{ SELECT * FROM foo; }
+step s1c 	{ COMMIT; }
+
+session s2
+setup		{
+			  BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY;
+			  SET parallel_setup_cost = 0;
+			  SET parallel_tuple_cost = 0;
+			}
+step s2r1	{ SELECT * FROM foo; }
+step s2r2	{ SELECT * FROM foo; }
+step s2c	{ COMMIT; }
+
+session s3
+setup		{ BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; }
+step s3r	{ SELECT * FROM foo; }
+step s3c	{ COMMIT; }
+
+session s4
+setup		{
+			  BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY;
+			  SET parallel_setup_cost = 0;
+			  SET parallel_tuple_cost = 0;
+			}
+step s4r1	{ SELECT * FROM foo; }
+step s4r2	{ SELECT * FROM foo; }
+step s4c	{ COMMIT; }
+
+permutation s1r s3r s2r1 s4r1 s1c s2r2 s3c s4r2 s4c s2c

From d2ce335197aad25e20937977d5016cd41d5ba5c1 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 7 Mar 2023 18:21:37 -0500
Subject: [PATCH 17/78] Fix more bugs caused by adding columns to the end of a
 view.

If a view is defined atop another view, and then CREATE OR REPLACE
VIEW is used to add columns to the lower view, then when the upper
view's referencing RTE is expanded by ApplyRetrieveRule we will have
a subquery RTE with fewer eref->colnames than output columns.  This
confuses various code that assumes those lists are always in sync,
as they are in plain parser output.

We have seen such problems before (cf commit d5b760ecb), and now
I think the time has come to do what was speculated about in that
commit: let's make ApplyRetrieveRule synthesize some column names to
preserve the invariant that holds in parser output.  Otherwise we'll
be chasing this class of bugs indefinitely.  Moreover, it appears from
testing that this actually gives us better results in the test case
d5b760ecb added, and likely in other corner cases that we lack
coverage for.

In HEAD, I replaced d5b760ecb's hack to make expandRTE exit early with
an elog(ERROR) call, since the case is now presumably unreachable.
But it seems like changing that in back branches would bring more risk
than benefit, so there I just updated the comment.

Per bug #17811 from Alexander Lakhin.  Back-patch to all supported
branches.

Discussion: https://postgr.es/m/17811-d31686b78f0dffc9@postgresql.org

f
---
 .../src/test/regress/expected/alter_table.out | 14 +++---
 src/backend/parser/parse_relation.c           | 17 ++++---
 src/backend/rewrite/rewriteHandler.c          | 16 +++++++
 src/test/regress/expected/alter_table.out     | 45 +++++++++++++++----
 src/test/regress/sql/alter_table.sql          | 20 ++++++++-
 .../expected/alter_table.out                  | 12 ++---
 6 files changed, 96 insertions(+), 28 deletions(-)

diff --git a/contrib/pax_storage/src/test/regress/expected/alter_table.out b/contrib/pax_storage/src/test/regress/expected/alter_table.out
index fdc79c03c82..8d90e20a98d 100644
--- a/contrib/pax_storage/src/test/regress/expected/alter_table.out
+++ b/contrib/pax_storage/src/test/regress/expected/alter_table.out
@@ -2628,20 +2628,20 @@ View definition:
    FROM at_view_1 v1;
 
 explain (verbose, costs off) select * from at_view_2;
-                              QUERY PLAN                              
-----------------------------------------------------------------------
+                              QUERY PLAN                           
+-------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   Output: bt.id, bt.stuff, (to_json(ROW(bt.id, bt.stuff, NULL)))
+   Output: bt.id, bt.stuff, (to_json(ROW(bt.id, bt.stuff, 4)))
    ->  Seq Scan on public.at_base_table bt
-         Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, NULL))
+         Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, 4))
  Optimizer: Postgres query optimizer
  Settings: constraint_exclusion=partition
 (6 rows)
 
 select * from at_view_2;
- id | stuff  |                   j                    
-----+--------+----------------------------------------
- 23 | skidoo | {"id":23,"stuff":"skidoo","more":null}
+ id | stuff  |                  j                  
+----+--------+-------------------------------------
+ 23 | skidoo | {"id":23,"stuff":"skidoo","more":4}
 (1 row)
 
 drop view at_view_2;
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index dfe348c1f40..8e158d63dc1 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -2944,12 +2944,17 @@ expandRTE(RangeTblEntry *rte, int rtindex, int sublevels_up,
 					Assert(varattno == te->resno);
 
 					/*
-					 * In scenarios where columns have been added to a view
-					 * since the outer query was originally parsed, there can
-					 * be more items in the subquery tlist than the outer
-					 * query expects.  We should ignore such extra column(s)
-					 * --- compare the behavior for composite-returning
-					 * functions, in the RTE_FUNCTION case below.
+					 * In a just-parsed subquery RTE, rte->eref->colnames
+					 * should always have exactly as many entries as the
+					 * subquery has non-junk output columns.  However, if the
+					 * subquery RTE was created by expansion of a view,
+					 * perhaps the subquery tlist could now have more entries
+					 * than existed when the outer query was parsed.  Such
+					 * cases should now be prevented because ApplyRetrieveRule
+					 * will extend the colnames list to match.  But out of
+					 * caution, we'll keep the code like this in the back
+					 * branches: just ignore any columns that lack colnames
+					 * entries.
 					 */
 					if (!aliasp_item)
 						break;
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 91ca4cb83d1..23c528b60f9 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -28,6 +28,7 @@
 #include "catalog/dependency.h"
 #include "catalog/pg_type.h"
 #include "commands/trigger.h"
+#include "executor/executor.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -1807,6 +1808,7 @@ ApplyRetrieveRule(Query *parsetree,
 	RangeTblEntry *rte,
 			   *subrte;
 	RowMarkClause *rc;
+	int			numCols;
 
 	if (list_length(rule->actions) != 1)
 		elog(ERROR, "expected just one rule action");
@@ -1972,6 +1974,20 @@ ApplyRetrieveRule(Query *parsetree,
 	rte->updatedCols = NULL;
 	rte->extraUpdatedCols = NULL;
 
+	/*
+	 * Since we allow CREATE OR REPLACE VIEW to add columns to a view, the
+	 * rule_action might emit more columns than we expected when the current
+	 * query was parsed.  Various places expect rte->eref->colnames to be
+	 * consistent with the non-junk output columns of the subquery, so patch
+	 * things up if necessary by adding some dummy column names.
+	 */
+	numCols = ExecCleanTargetListLength(rule_action->targetList);
+	while (list_length(rte->eref->colnames) < numCols)
+	{
+		rte->eref->colnames = lappend(rte->eref->colnames,
+									  makeString(pstrdup("?column?")));
+	}
+
 	return parsetree;
 }
 
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index d29bcc0da6d..cbc578e3586 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -2644,26 +2644,55 @@ View definition:
    FROM at_view_1 v1;
 
 explain (verbose, costs off) select * from at_view_2;
-                              QUERY PLAN                              
-----------------------------------------------------------------------
+                              QUERY PLAN                           
+-------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   Output: bt.id, bt.stuff, (to_json(ROW(bt.id, bt.stuff, NULL)))
+   Output: bt.id, bt.stuff, (to_json(ROW(bt.id, bt.stuff, 4)))
    ->  Seq Scan on public.at_base_table bt
-         Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, NULL))
+         Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, 4))
  Optimizer: Postgres query optimizer
  Settings: constraint_exclusion=partition
 (6 rows)
 
 select * from at_view_2;
- id | stuff  |                   j                    
-----+--------+----------------------------------------
- 23 | skidoo | {"id":23,"stuff":"skidoo","more":null}
+ id | stuff  |                  j                  
+----+--------+-------------------------------------
+ 23 | skidoo | {"id":23,"stuff":"skidoo","more":4}
 (1 row)
 
 drop view at_view_2;
 drop view at_view_1;
 drop table at_base_table;
--- check adding a column not iself requiring a rewrite, together with
+-- related case (bug #17811)
+begin;
+create temp table t1 as select * from int8_tbl;
+create temp view v1 as select 1::int8 as q1;
+create temp view v2 as select * from v1;
+create or replace temp view v1 with (security_barrier = true)
+  as select * from t1;
+create temp table log (q1 int8, q2 int8);
+create rule v1_upd_rule as on update to v1
+  do also insert into log values (new.*);
+update v2 set q1 = q1 + 1 where q1 = 123;
+select * from t1;
+        q1        |        q2         
+------------------+-------------------
+ 4567890123456789 |               123
+ 4567890123456789 |  4567890123456789
+ 4567890123456789 | -4567890123456789
+              124 |               456
+              124 |  4567890123456789
+(5 rows)
+
+select * from log;
+ q1  |        q2        
+-----+------------------
+ 124 |              456
+ 124 | 4567890123456789
+(2 rows)
+
+rollback;
+-- check adding a column not itself requiring a rewrite, together with
 -- a column requiring a default (bug #16038)
 -- ensure that rewrites aren't silently optimized away, removing the
 -- value of the test
diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql
index 7ddf9f898a8..9da0e5603ea 100644
--- a/src/test/regress/sql/alter_table.sql
+++ b/src/test/regress/sql/alter_table.sql
@@ -1671,7 +1671,25 @@ drop view at_view_2;
 drop view at_view_1;
 drop table at_base_table;
 
--- check adding a column not iself requiring a rewrite, together with
+-- related case (bug #17811)
+begin;
+create temp table t1 as select * from int8_tbl;
+create temp view v1 as select 1::int8 as q1;
+create temp view v2 as select * from v1;
+create or replace temp view v1 with (security_barrier = true)
+  as select * from t1;
+
+create temp table log (q1 int8, q2 int8);
+create rule v1_upd_rule as on update to v1
+  do also insert into log values (new.*);
+
+update v2 set q1 = q1 + 1 where q1 = 123;
+
+select * from t1;
+select * from log;
+rollback;
+
+-- check adding a column not itself requiring a rewrite, together with
 -- a column requiring a default (bug #16038)
 
 -- ensure that rewrites aren't silently optimized away, removing the
diff --git a/src/test/singlenode_regress/expected/alter_table.out b/src/test/singlenode_regress/expected/alter_table.out
index 16d6768bbed..f28310d8bce 100644
--- a/src/test/singlenode_regress/expected/alter_table.out
+++ b/src/test/singlenode_regress/expected/alter_table.out
@@ -2575,18 +2575,18 @@ View definition:
    FROM at_view_1 v1;
 
 explain (verbose, costs off) select * from at_view_2;
-                           QUERY PLAN                           
-----------------------------------------------------------------
+                        QUERY PLAN                          
+-------------------------------------------------------------
  Seq Scan on public.at_base_table bt
-   Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, NULL))
+   Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, 4))
  Settings: constraint_exclusion = 'partition'
  Optimizer: Postgres query optimizer
 (4 rows)
 
 select * from at_view_2;
- id | stuff  |                   j                    
-----+--------+----------------------------------------
- 23 | skidoo | {"id":23,"stuff":"skidoo","more":null}
+ id | stuff  |                  j                  
+----+--------+-------------------------------------
+ 23 | skidoo | {"id":23,"stuff":"skidoo","more":4}
 (1 row)
 
 drop view at_view_2;

From f45fca3c48583b25aad7d5e199b9b3f454eb2c88 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Tue, 7 Mar 2023 21:36:49 -0800
Subject: [PATCH 18/78] Fix corruption due to vacuum_defer_cleanup_age
 underflowing 64bit xids

When vacuum_defer_cleanup_age is bigger than the current xid, including the
epoch, the subtraction of vacuum_defer_cleanup_age would lead to a wrapped
around xid. While that normally is not a problem, the subsequent conversion to
a 64bit xid results in a 64bit-xid very far into the future. As that xid is
used as a horizon to detect whether rows versions are old enough to be
removed, that allows removal of rows that are still visible (i.e. corruption).

If vacuum_defer_cleanup_age was never changed from the default, there is no
chance of this bug occurring.

This bug was introduced in dc7420c2c92.  A lesser version of it exists in
12-13, introduced by fb5344c969a, affecting only GiST.

The 12-13 version of the issue can, in rare cases, lead to pages in a gist
index getting recycled too early, potentially causing index entries to be
found multiple times.

The fix is fairly simple - don't allow vacuum_defer_cleanup_age to retreat
further than FirstNormalTransactionId.

Patches to make similar bugs easier to find, by adding asserts to the 64bit
xid infrastructure, have been proposed, but are not suitable for backpatching.

Currently there are no tests for vacuum_defer_cleanup_age. A patch introducing
infrastructure to make writing a test easier has been posted to the list.

Reported-by: Michail Nikolaev <michail.nikolaev@gmail.com>
Reviewed-by: Matthias van de Meent <boekewurm+postgres@gmail.com>
Author: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20230108002923.cyoser3ttmt63bfn@awork3.anarazel.de
Backpatch: 12-, but impact/fix is smaller for 12-13
---
 src/backend/storage/ipc/procarray.c | 87 ++++++++++++++++++++++++-----
 1 file changed, 74 insertions(+), 13 deletions(-)

diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 3c7701e15b6..208874f793e 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -387,6 +387,9 @@ static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId l
 static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
 static void MaintainLatestCompletedXid(TransactionId latestXid);
 static void MaintainLatestCompletedXidRecovery(TransactionId latestXid);
+static void TransactionIdRetreatSafely(TransactionId *xid,
+									   int retreat_by,
+									   FullTransactionId rel);
 
 static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel,
 												  TransactionId xid);
@@ -1995,17 +1998,35 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h, bool updateGlobalVis)
 		 * so guc.c should limit it to no more than the xidStopLimit threshold
 		 * in varsup.c.  Also note that we intentionally don't apply
 		 * vacuum_defer_cleanup_age on standby servers.
+		 *
+		 * Need to use TransactionIdRetreatSafely() instead of open-coding the
+		 * subtraction, to prevent creating an xid before
+		 * FirstNormalTransactionId.
 		 */
-		h->oldest_considered_running =
-			TransactionIdRetreatedBy(h->oldest_considered_running,
-									 vacuum_defer_cleanup_age);
-		h->shared_oldest_nonremovable =
-			TransactionIdRetreatedBy(h->shared_oldest_nonremovable,
-									 vacuum_defer_cleanup_age);
-		h->data_oldest_nonremovable =
-			TransactionIdRetreatedBy(h->data_oldest_nonremovable,
-									 vacuum_defer_cleanup_age);
-		/* defer doesn't apply to temp relations */
+		Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
+											 h->shared_oldest_nonremovable));
+		Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable,
+											 h->data_oldest_nonremovable));
+
+		if (vacuum_defer_cleanup_age > 0)
+		{
+			TransactionIdRetreatSafely(&h->oldest_considered_running,
+									   vacuum_defer_cleanup_age,
+									   h->latest_completed);
+			TransactionIdRetreatSafely(&h->shared_oldest_nonremovable,
+									   vacuum_defer_cleanup_age,
+									   h->latest_completed);
+			TransactionIdRetreatSafely(&h->data_oldest_nonremovable,
+									   vacuum_defer_cleanup_age,
+									   h->latest_completed);
+			/* defer doesn't apply to temp relations */
+
+
+			Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
+												 h->shared_oldest_nonremovable));
+			Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable,
+												 h->data_oldest_nonremovable));
+		}
 	}
 
 	/*
@@ -3329,8 +3350,10 @@ GetSnapshotData(Snapshot snapshot, DtxContext distributedTransactionContext)
 		oldestfxid = FullXidRelativeTo(latest_completed, oldestxid);
 
 		/* apply vacuum_defer_cleanup_age */
-		def_vis_xid_data =
-			TransactionIdRetreatedBy(globalxmin, vacuum_defer_cleanup_age);
+		def_vis_xid_data = globalxmin;
+		TransactionIdRetreatSafely(&def_vis_xid_data,
+								   vacuum_defer_cleanup_age,
+								   oldestfxid);
 
 		/* Check whether there's a replication slot requiring an older xmin. */
 		def_vis_xid_data =
@@ -5360,6 +5383,44 @@ GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
 	return GlobalVisTestIsRemovableXid(state, xid);
 }
 
+/*
+ * Safely retract *xid by retreat_by, store the result in *xid.
+ *
+ * Need to be careful to prevent *xid from retreating below
+ * FirstNormalTransactionId during epoch 0. This is important to prevent
+ * generating xids that cannot be converted to a FullTransactionId without
+ * wrapping around.
+ *
+ * If retreat_by would lead to a too old xid, FirstNormalTransactionId is
+ * returned instead.
+ */
+static void
+TransactionIdRetreatSafely(TransactionId *xid, int retreat_by, FullTransactionId rel)
+{
+	TransactionId original_xid = *xid;
+	FullTransactionId fxid;
+	uint64		fxid_i;
+
+	Assert(TransactionIdIsNormal(original_xid));
+	Assert(retreat_by >= 0);	/* relevant GUCs are stored as ints */
+	AssertTransactionIdInAllowableRange(original_xid);
+
+	if (retreat_by == 0)
+		return;
+
+	fxid = FullXidRelativeTo(rel, original_xid);
+	fxid_i = U64FromFullTransactionId(fxid);
+
+	if ((fxid_i - FirstNormalTransactionId) <= retreat_by)
+		*xid = FirstNormalTransactionId;
+	else
+	{
+		*xid = TransactionIdRetreatedBy(original_xid, retreat_by);
+		Assert(TransactionIdIsNormal(*xid));
+		Assert(NormalTransactionIdPrecedes(*xid, original_xid));
+	}
+}
+
 /*
  * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
  * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
@@ -6681,4 +6742,4 @@ LoopBackendProc(BackendProcCallbackFunction func, void *args)
 		(*func)(proc, args);
 	}
 	LWLockRelease(ProcArrayLock);
-}
\ No newline at end of file
+}

From f08cf76551224160e65c511bd138a055f9b124d2 Mon Sep 17 00:00:00 2001
From: Thomas Munro <tmunro@postgresql.org>
Date: Thu, 9 Mar 2023 16:33:24 +1300
Subject: [PATCH 19/78] Fix race in SERIALIZABLE READ ONLY.

Commit bdaabb9b started skipping doomed transactions when building the
list of possible conflicts for SERIALIZABLE READ ONLY.  That makes
sense, because doomed transactions won't commit, but a couple of subtle
things broke:

1.  If all uncommitted r/w transactions are doomed, a READ ONLY
transaction would arbitrarily not benefit from the safe snapshot
optimization.  It would not be taken immediately, and yet no other
transaction would set SXACT_FLAG_RO_SAFE later.

2.  In the same circumstances but with DEFERRABLE, GetSafeSnapshot()
would correctly exit its wait loop without sleeping and then take the
optimization in non-assert builds, but assert builds would fail a sanity
check that SXACT_FLAG_RO_SAFE had been set by another transaction.

This is similar to the case for PredXact->WritableSxactCount == 0.  We
should opt out immediately if our possibleUnsafeConflicts list is empty
after filtering.

The code to maintain the serializable global xmin is moved down below
the new opt out site, because otherwise we'd have to reverse its effects
before returning.

Back-patch to all supported releases.  Bug #17368.

Reported-by: Alexander Lakhin <exclusion@gmail.com>
Discussion: https://postgr.es/m/17116-d6ca217acc180e30%40postgresql.org
Discussion: https://postgr.es/m/20110707212159.GF76634%40csail.mit.edu
---
 src/backend/storage/lmgr/predicate.c | 49 ++++++++++++++++++----------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 3b0daf723b6..ec6f26a72ba 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -1846,24 +1846,6 @@ GetSerializableTransactionSnapshotInt(Snapshot snapshot,
 		return snapshot;
 	}
 
-	/* Maintain serializable global xmin info. */
-	if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
-	{
-		Assert(PredXact->SxactGlobalXminCount == 0);
-		PredXact->SxactGlobalXmin = snapshot->xmin;
-		PredXact->SxactGlobalXminCount = 1;
-		SerialSetActiveSerXmin(snapshot->xmin);
-	}
-	else if (TransactionIdEquals(snapshot->xmin, PredXact->SxactGlobalXmin))
-	{
-		Assert(PredXact->SxactGlobalXminCount > 0);
-		PredXact->SxactGlobalXminCount++;
-	}
-	else
-	{
-		Assert(TransactionIdFollows(snapshot->xmin, PredXact->SxactGlobalXmin));
-	}
-
 	/* Initialize the structure. */
 	sxact->vxid = vxid;
 	sxact->SeqNo.lastCommitBeforeSnapshot = PredXact->LastSxactCommitSeqNo;
@@ -1900,6 +1882,19 @@ GetSerializableTransactionSnapshotInt(Snapshot snapshot,
 				SetPossibleUnsafeConflict(sxact, othersxact);
 			}
 		}
+
+		/*
+		 * If we didn't find any possibly unsafe conflicts because every
+		 * uncommitted writable transaction turned out to be doomed, then we
+		 * can "opt out" immediately.  See comments above the earlier check for
+		 * PredXact->WritableSxactCount == 0.
+		 */
+		if (SHMQueueEmpty(&sxact->possibleUnsafeConflicts))
+		{
+			ReleasePredXact(sxact);
+			LWLockRelease(SerializableXactHashLock);
+			return snapshot;
+		}
 	}
 	else
 	{
@@ -1908,6 +1903,24 @@ GetSerializableTransactionSnapshotInt(Snapshot snapshot,
 			   (MaxBackends + max_prepared_xacts));
 	}
 
+	/* Maintain serializable global xmin info. */
+	if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
+	{
+		Assert(PredXact->SxactGlobalXminCount == 0);
+		PredXact->SxactGlobalXmin = snapshot->xmin;
+		PredXact->SxactGlobalXminCount = 1;
+		SerialSetActiveSerXmin(snapshot->xmin);
+	}
+	else if (TransactionIdEquals(snapshot->xmin, PredXact->SxactGlobalXmin))
+	{
+		Assert(PredXact->SxactGlobalXminCount > 0);
+		PredXact->SxactGlobalXminCount++;
+	}
+	else
+	{
+		Assert(TransactionIdFollows(snapshot->xmin, PredXact->SxactGlobalXmin));
+	}
+
 	MySerializableXact = sxact;
 	MyXactDidWrite = false;		/* haven't written anything yet */
 

From e1c98a9d9a62ff4443ca69b68ad1c8c4bc92f218 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 11 Mar 2023 12:15:41 -0500
Subject: [PATCH 20/78] Fix misbehavior in contrib/pg_trgm with an
 unsatisfiable regex.

If the regex compiler can see that a regex is unsatisfiable
(for example, '$foo') then it may emit an NFA having no arcs.
pg_trgm's packGraph function did the wrong thing in this case;
it would access off the end of a work array, and with bad luck
could produce a corrupted output data structure causing more
problems later.  This could end with wrong answers or crashes
in queries using a pg_trgm GIN or GiST index with such a regex.

Fix by not trying to de-duplicate if there aren't at least 2 arcs.

Per bug #17830 from Alexander Lakhin.  Back-patch to all supported
branches.

Discussion: https://postgr.es/m/17830-57ff5f89bdb02b09@postgresql.org
---
 contrib/pg_trgm/expected/pg_word_trgm.out |  6 ++++++
 contrib/pg_trgm/sql/pg_word_trgm.sql      |  3 +++
 contrib/pg_trgm/trgm_regexp.c             | 26 ++++++++++++++---------
 3 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/contrib/pg_trgm/expected/pg_word_trgm.out b/contrib/pg_trgm/expected/pg_word_trgm.out
index 9f0ca502a6d..4c6b49934b6 100644
--- a/contrib/pg_trgm/expected/pg_word_trgm.out
+++ b/contrib/pg_trgm/expected/pg_word_trgm.out
@@ -1048,3 +1048,9 @@ select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kaban
  Waikala                          |      0.3
 (89 rows)
 
+-- test unsatisfiable pattern
+select * from test_trgm2 where t ~ '.*$x';
+ t 
+---
+(0 rows)
+
diff --git a/contrib/pg_trgm/sql/pg_word_trgm.sql b/contrib/pg_trgm/sql/pg_word_trgm.sql
index d9fa1c55e5e..d2ada49133a 100644
--- a/contrib/pg_trgm/sql/pg_word_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_word_trgm.sql
@@ -43,3 +43,6 @@ select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t
 select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t;
 select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t;
 select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t;
+
+-- test unsatisfiable pattern
+select * from test_trgm2 where t ~ '.*$x';
diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c
index 71e4ebee4e9..3485a725cde 100644
--- a/contrib/pg_trgm/trgm_regexp.c
+++ b/contrib/pg_trgm/trgm_regexp.c
@@ -1944,9 +1944,7 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
 				arcsCount;
 	HASH_SEQ_STATUS scan_status;
 	TrgmState  *state;
-	TrgmPackArcInfo *arcs,
-			   *p1,
-			   *p2;
+	TrgmPackArcInfo *arcs;
 	TrgmPackedArc *packedArcs;
 	TrgmPackedGraph *result;
 	int			i,
@@ -2018,17 +2016,25 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
 	qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
 
 	/* We could have duplicates because states were merged. Remove them. */
-	/* p1 is probe point, p2 is last known non-duplicate. */
-	p2 = arcs;
-	for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
+	if (arcIndex > 1)
 	{
-		if (packArcInfoCmp(p1, p2) > 0)
+		/* p1 is probe point, p2 is last known non-duplicate. */
+		TrgmPackArcInfo *p1,
+				   *p2;
+
+		p2 = arcs;
+		for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
 		{
-			p2++;
-			*p2 = *p1;
+			if (packArcInfoCmp(p1, p2) > 0)
+			{
+				p2++;
+				*p2 = *p1;
+			}
 		}
+		arcsCount = (p2 - arcs) + 1;
 	}
-	arcsCount = (p2 - arcs) + 1;
+	else
+		arcsCount = arcIndex;
 
 	/* Create packed representation */
 	result = (TrgmPackedGraph *)

From ce31c347e2f0d34e80e1e62f4bb0df46978ca79b Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Sat, 11 Mar 2023 14:12:51 -0800
Subject: [PATCH 21/78] amcheck: Fix ordering bug in update_cached_xid_range()

The initialization order in update_cached_xid_range() was wrong, calling
FullTransactionIdFromXidAndCtx() before setting
->next_xid. FullTransactionIdFromXidAndCtx() uses ->next_xid.

In most situations this will not cause visible issues, because the next call
to update_cached_xid_range() will use a less wrong ->next_xid. It's rare that
xids advance fast enough for this to be a problem.

Found while adding more asserts to the 64bit xid infrastructure.

Reviewed-by: Mark Dilger <mark.dilger@enterprisedb.com>
Discussion: https://postgr.es/m/20230108002923.cyoser3ttmt63bfn@awork3.anarazel.de
Backpatch: 14-, where heapam verification was introduced
---
 contrib/amcheck/verify_heapam.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index 9abeca607d7..6614b609e83 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -1579,6 +1579,9 @@ FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx)
 {
 	uint32		epoch;
 
+	Assert(TransactionIdIsNormal(ctx->next_xid));
+	Assert(FullTransactionIdIsNormal(ctx->next_fxid));
+
 	if (!TransactionIdIsNormal(xid))
 		return FullTransactionIdFromEpochAndXid(0, xid);
 	epoch = EpochFromFullTransactionId(ctx->next_fxid);
@@ -1600,8 +1603,8 @@ update_cached_xid_range(HeapCheckContext *ctx)
 	LWLockRelease(XidGenLock);
 
 	/* And compute alternate versions of the same */
-	ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx);
 	ctx->next_xid = XidFromFullTransactionId(ctx->next_fxid);
+	ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx);
 }
 
 /*

From 48bb92a211808713e3b26cb14cf73a113adbecf1 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Sat, 11 Mar 2023 14:12:51 -0800
Subject: [PATCH 22/78] amcheck: Fix FullTransactionIdFromXidAndCtx() for xids
 before epoch 0

64bit xids can't represent xids before epoch 0 (see also be504a3e974). When
FullTransactionIdFromXidAndCtx() was passed such an xid, it'd create a 64bit
xid far into the future. Noticed while adding assertions in the course of
investigating be504a3e974, as amcheck's test create such xids.

To fix the issue, just return FirstNormalFullTransactionId in this case. A
freshly initdb'd cluster already has a newer horizon. The most minimal version
of this would make the messages for some detected corruptions differently
inaccurate. To make those cases accurate, switch
FullTransactionIdFromXidAndCtx() to use the 32bit modulo difference between
xid and nextxid to compute the 64bit xid, yielding sensible "in the future" /
"in the past" answers.

Reviewed-by: Mark Dilger <mark.dilger@enterprisedb.com>
Discussion: https://postgr.es/m/20230108002923.cyoser3ttmt63bfn@awork3.anarazel.de
Backpatch: 14-, where heapam verification was introduced
---
 contrib/amcheck/verify_heapam.c           | 33 +++++++++++++++++++----
 src/bin/pg_amcheck/t/004_verify_heapam.pl | 30 ++++++++++++++-------
 2 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index 6614b609e83..f7964b78173 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -1577,17 +1577,40 @@ check_tuple(HeapCheckContext *ctx)
 static FullTransactionId
 FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx)
 {
-	uint32		epoch;
+	uint64		nextfxid_i;
+	int32		diff;
+	FullTransactionId fxid;
 
 	Assert(TransactionIdIsNormal(ctx->next_xid));
 	Assert(FullTransactionIdIsNormal(ctx->next_fxid));
+	Assert(XidFromFullTransactionId(ctx->next_fxid) == ctx->next_xid);
 
 	if (!TransactionIdIsNormal(xid))
 		return FullTransactionIdFromEpochAndXid(0, xid);
-	epoch = EpochFromFullTransactionId(ctx->next_fxid);
-	if (xid > ctx->next_xid)
-		epoch--;
-	return FullTransactionIdFromEpochAndXid(epoch, xid);
+
+	nextfxid_i = U64FromFullTransactionId(ctx->next_fxid);
+
+	/* compute the 32bit modulo difference */
+	diff = (int32) (ctx->next_xid - xid);
+
+	/*
+	 * In cases of corruption we might see a 32bit xid that is before epoch
+	 * 0. We can't represent that as a 64bit xid, due to 64bit xids being
+	 * unsigned integers, without the modulo arithmetic of 32bit xid. There's
+	 * no really nice way to deal with that, but it works ok enough to use
+	 * FirstNormalFullTransactionId in that case, as a freshly initdb'd
+	 * cluster already has a newer horizon.
+	 */
+	if (diff > 0 && (nextfxid_i - FirstNormalTransactionId) < (int64) diff)
+	{
+		Assert(EpochFromFullTransactionId(ctx->next_fxid) == 0);
+		fxid = FirstNormalFullTransactionId;
+	}
+	else
+		fxid = FullTransactionIdFromU64(nextfxid_i - diff);
+
+	Assert(FullTransactionIdIsNormal(fxid));
+	return fxid;
 }
 
 /*
diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl
index b603efad929..4cadb837730 100644
--- a/src/bin/pg_amcheck/t/004_verify_heapam.pl
+++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl
@@ -217,7 +217,7 @@ sub write_tuple
 my $relpath = "$pgdata/$rel";
 
 # Insert data and freeze public.test
-use constant ROWCOUNT => 16;
+use constant ROWCOUNT => 17;
 $node->safe_psql(
 	'postgres', qq(
 	INSERT INTO public.test (a, b, c)
@@ -296,7 +296,7 @@ sub write_tuple
 $node->start;
 
 # Ok, Xids and page layout look ok.  We can run corruption tests.
-plan tests => 19;
+plan tests => 20;
 
 # Check that pg_amcheck runs against the uncorrupted table without error.
 $node->command_ok(
@@ -379,23 +379,24 @@ sub header
 	elsif ($offnum == 3)
 	{
 		# Corruptly set xmin < datfrozenxid, further back, noting circularity
-		# of xid comparison.  For a new cluster with epoch = 0, the corrupt
-		# xmin will be interpreted as in the future
-		$tup->{t_xmin} = 4026531839;
+		# of xid comparison.
+		my $xmin = 4026531839;
+		$tup->{t_xmin} = $xmin;
 		$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
 		$tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
 
 		push @expected,
-		  qr/${$header}xmin 4026531839 equals or exceeds next valid transaction ID 0:\d+/;
+		  qr/${$header}xmin ${xmin} precedes oldest valid transaction ID 0:\d+/;
 	}
 	elsif ($offnum == 4)
 	{
 		# Corruptly set xmax < relminmxid;
-		$tup->{t_xmax} = 4026531839;
+		my $xmax = 4026531839;
+		$tup->{t_xmax} = $xmax;
 		$tup->{t_infomask} &= ~HEAP_XMAX_INVALID;
 
 		push @expected,
-		  qr/${$header}xmax 4026531839 equals or exceeds next valid transaction ID 0:\d+/;
+		  qr/${$header}xmax ${xmax} precedes oldest valid transaction ID 0:\d+/;
 	}
 	elsif ($offnum == 5)
 	{
@@ -503,7 +504,7 @@ sub header
 		push @expected,
 		  qr/${header}multitransaction ID 4 equals or exceeds next valid multitransaction ID 1/;
 	}
-	elsif ($offnum == 15)    # Last offnum must equal ROWCOUNT
+	elsif ($offnum == 15)
 	{
 		# Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI
 		$tup->{t_infomask} |= HEAP_XMAX_COMMITTED;
@@ -513,6 +514,17 @@ sub header
 		push @expected,
 		  qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/;
 	}
+	elsif ($offnum == 16)    # Last offnum must equal ROWCOUNT
+	{
+		# Corruptly set xmin > next_xid to be in the future.
+		my $xmin = 123456;
+		$tup->{t_xmin} = $xmin;
+		$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+		$tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
+
+		push @expected,
+          qr/${$header}xmin ${xmin} equals or exceeds next valid transaction ID 0:\d+/;
+	}
 	write_tuple($file, $offset, $tup);
 }
 close($file)

From 6f3249ead06d6b7f01777f7b32a8fb92c313b94b Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Sun, 12 Mar 2023 09:00:32 -0400
Subject: [PATCH 23/78] Mark unsafe_tests module as not runnable with
 installcheck

This was an omission in the original creation of the module.

Also slightly adjust some wording to avoid a double "is".

Backpatch the non-meson piece of this to release 12, where the module
was introduced.

Discussion: https://postgr.es/m/be869e1c-8e3f-4cde-8609-212c899cccf9@dunslane.net
---
 src/test/modules/unsafe_tests/Makefile | 3 +++
 src/test/modules/unsafe_tests/README   | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/test/modules/unsafe_tests/Makefile b/src/test/modules/unsafe_tests/Makefile
index 3ecf5fcfc5b..1d989007bd5 100644
--- a/src/test/modules/unsafe_tests/Makefile
+++ b/src/test/modules/unsafe_tests/Makefile
@@ -2,6 +2,9 @@
 
 REGRESS = rolenames alter_system_table
 
+# the whole point of these tests is to not run installcheck
+NO_INSTALLCHECK = 1
+
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
diff --git a/src/test/modules/unsafe_tests/README b/src/test/modules/unsafe_tests/README
index a7e5b2a04f5..d9dbd038b95 100644
--- a/src/test/modules/unsafe_tests/README
+++ b/src/test/modules/unsafe_tests/README
@@ -1,6 +1,6 @@
 This directory doesn't actually contain any extension module.
 
-What it is is a home for regression tests that we don't want to run
+Instead it is a home for regression tests that we don't want to run
 during "make installcheck" because they could have side-effects that
 seem undesirable for a production installation.
 

From 3be1c053d625aa0c571c040830590683fc6cc6e9 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 13 Mar 2023 16:36:31 +0900
Subject: [PATCH 24/78] Fix inconsistent error handling for GSS encryption in
 PQconnectPoll()

The error cases for TLS and GSS encryption were inconsistent.  After TLS
fails, the connection is marked as dead and follow-up calls of
PQconnectPoll() would return immediately, but GSS encryption was not
doing that, so the connection would still have been allowed to enter the
GSS handling code.  This was handled incorrectly when gssencmode was set
to "require".  "prefer" was working correctly, and this could not happen
under "disable" as GSS encryption would not be attempted.

This commit makes the error handling of GSS encryption on par with TLS
portion, fixing the case of gssencmode=require.

Reported-by: Jacob Champion
Author: Michael Paquier
Reviewed-by: Jacob Champion, Stephen Frost
Discussion: https://postgr.es/m/23787477-5fe1-a161-6d2a-e459f74c4713@timescale.com
Backpatch-through: 12
---
 src/interfaces/libpq/fe-connect.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index 7f3dfd462a6..46e8540004e 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -3323,17 +3323,22 @@ PQconnectPoll(PGconn *conn)
 					conn->status = CONNECTION_MADE;
 					return PGRES_POLLING_WRITING;
 				}
-				else if (pollres == PGRES_POLLING_FAILED &&
-						 conn->gssencmode[0] == 'p')
+				else if (pollres == PGRES_POLLING_FAILED)
 				{
-					/*
-					 * We failed, but we can retry on "prefer".  Have to drop
-					 * the current connection to do so, though.
-					 */
-					conn->try_gss = false;
-					need_new_connection = true;
-					goto keep_going;
+					if (conn->gssencmode[0] == 'p')
+					{
+						/*
+						 * We failed, but we can retry on "prefer".  Have to
+						 * drop the current connection to do so, though.
+						 */
+						conn->try_gss = false;
+						need_new_connection = true;
+						goto keep_going;
+					}
+					/* Else it's a hard failure */
+					goto error_return;
 				}
+				/* Else, return POLLING_READING or POLLING_WRITING status */
 				return pollres;
 #else							/* !ENABLE_GSS */
 				/* unreachable */

From 8917dcb7b8c38fa0706a07b3ac1a5dbde1594c8e Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 14 Mar 2023 11:10:45 -0400
Subject: [PATCH 25/78] Remove unnecessary code in
 dependency_is_compatible_expression().

Scanning the expression for compatible Vars isn't really necessary,
because the subsequent match against StatisticExtInfo entries will
eliminate expressions containing other Vars just fine.  Moreover,
this code hadn't stopped to think about what to do with
PlaceHolderVars or Aggrefs in the clause; and at least for the PHV
case, that demonstrably leads to failures.  Rather than work out
whether it's reasonable to ignore those, let's just remove the
whole stanza.

Per report from Richard Guo.  Back-patch to v14 where this code
was added.

Discussion: https://postgr.es/m/CAMbWs48Mmvm-acGevXuwpB=g5JMqVSL6i9z5UaJyLGJqa-XPAA@mail.gmail.com
---
 src/backend/statistics/dependencies.c | 28 +++------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index 8b3d560cb20..06d930f9164 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -1170,13 +1170,12 @@ clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
  *		Determines if the expression is compatible with functional dependencies
  *
  * Similar to dependency_is_compatible_clause, but doesn't enforce that the
- * expression is a simple Var. OTOH we check that there's at least one
- * statistics object matching the expression.
+ * expression is a simple Var.  On success, return the matching statistics
+ * expression into *expr.
  */
 static bool
 dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
 {
-	List	   *vars;
 	ListCell   *lc,
 			   *lc2;
 	Node	   *clause_expr;
@@ -1324,29 +1323,8 @@ dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, N
 	if (IsA(clause_expr, RelabelType))
 		clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
 
-	vars = pull_var_clause(clause_expr, 0);
-
-	foreach(lc, vars)
-	{
-		Var		   *var = (Var *) lfirst(lc);
-
-		/* Ensure Var is from the correct relation */
-		if (var->varno != relid)
-			return false;
-
-		/* We also better ensure the Var is from the current level */
-		if (var->varlevelsup != 0)
-			return false;
-
-		/* Also ignore system attributes (we don't allow stats on those) */
-		if (!AttrNumberIsForUserDefinedAttr(var->varattno))
-			return false;
-	}
-
 	/*
-	 * Check if we actually have a matching statistics for the expression.
-	 *
-	 * XXX Maybe this is an overkill. We'll eliminate the expressions later.
+	 * Search for a matching statistics expression.
 	 */
 	foreach(lc, statlist)
 	{

From 90566f8e5e7f6743d87cf8be7caabc833e925688 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 14 Mar 2023 19:17:31 -0400
Subject: [PATCH 26/78] Fix corner case bug in numeric to_char() some more.

The band-aid applied in commit f0bedf3e4 turns out to still need
some work: it made sure we didn't set Np->last_relevant too small
(to the left of the decimal point), but it didn't prevent setting
it too large (off the end of the partially-converted string).
This could result in fetching data beyond the end of the allocated
space, which with very bad luck could cause a SIGSEGV, though
I don't see any hazard of interesting memory disclosure.

Per bug #17839 from Thiago Nunes.  The bug's pretty ancient,
so back-patch to all supported versions.

Discussion: https://postgr.es/m/17839-aada50db24d7b0da@postgresql.org
---
 src/backend/utils/adt/formatting.c    | 11 +++++++++--
 src/test/regress/expected/numeric.out |  6 ++++++
 src/test/regress/sql/numeric.sql      |  1 +
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 84bb4d47330..a72546711c9 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -5719,13 +5719,20 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
 
 			/*
 			 * If any '0' specifiers are present, make sure we don't strip
-			 * those digits.
+			 * those digits.  But don't advance last_relevant beyond the last
+			 * character of the Np->number string, which is a hazard if the
+			 * number got shortened due to precision limitations.
 			 */
 			if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
 			{
+				int			last_zero_pos;
 				char	   *last_zero;
 
-				last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
+				/* note that Np->number cannot be zero-length here */
+				last_zero_pos = strlen(Np->number) - 1;
+				last_zero_pos = Min(last_zero_pos,
+									Np->Num->zero_end - Np->out_pre_spaces);
+				last_zero = Np->number + last_zero_pos;
 				if (Np->last_relevant < last_zero)
 					Np->last_relevant = last_zero;
 			}
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
index bea33181bee..2f2818e001c 100644
--- a/src/test/regress/expected/numeric.out
+++ b/src/test/regress/expected/numeric.out
@@ -1929,6 +1929,12 @@ SELECT to_char('100'::numeric, 'FM999');
  100
 (1 row)
 
+SELECT to_char('12345678901'::float8, 'FM9999999999D9999900000000000000000');
+     to_char     
+-----------------
+ ##########.####
+(1 row)
+
 -- Check parsing of literal text in a format string
 SELECT to_char('100'::numeric, 'foo999');
  to_char 
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
index 9233c666d4b..56294da5ae9 100644
--- a/src/test/regress/sql/numeric.sql
+++ b/src/test/regress/sql/numeric.sql
@@ -979,6 +979,7 @@ FROM v;
 SELECT to_char('100'::numeric, 'FM999.9');
 SELECT to_char('100'::numeric, 'FM999.');
 SELECT to_char('100'::numeric, 'FM999');
+SELECT to_char('12345678901'::float8, 'FM9999999999D9999900000000000000000');
 
 -- Check parsing of literal text in a format string
 SELECT to_char('100'::numeric, 'foo999');

From b87e5c231a31df7a32dffbe2bdce905f7b225325 Mon Sep 17 00:00:00 2001
From: Thomas Munro <tmunro@postgresql.org>
Date: Wed, 15 Mar 2023 13:17:18 +1300
Subject: [PATCH 27/78] Fix waitpid() emulation on Windows.

Our waitpid() emulation didn't prevent a PID from being recycled by the
OS before the call to waitpid().  The postmaster could finish up
tracking more than one child process with the same PID, and confuse
them.

Fix, by moving the guts of pgwin32_deadchild_callback() into waitpid(),
so that resources are released synchronously.  The process and PID
continue to exist until we close the process handle, which only happens
once we're ready to adjust our book-keeping of running children.

This seems to explain a couple of failures on CI.  It had never been
reported before, despite the code being as old as the Windows port.
Perhaps Windows started recycling PIDs more rapidly, or perhaps timing
changes due to commit 7389aad6 made it more likely to break.

Thanks to Alexander Lakhin for analysis and Andres Freund for tracking
down the root cause.

Back-patch to all supported branches.

Reported-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20230208012852.bvkn2am4h4iqjogq%40awork3.anarazel.de
---
 src/backend/postmaster/postmaster.c | 70 ++++++++++++++++-------------
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 32034dfb235..c1d4207757b 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -5474,7 +5474,7 @@ internal_forkexec(int argc, char *argv[], Port *port)
 				(errmsg_internal("could not register process for wait: error code %lu",
 								 GetLastError())));
 
-	/* Don't close pi.hProcess here - the wait thread needs access to it */
+	/* Don't close pi.hProcess here - waitpid() needs access to it */
 
 	CloseHandle(pi.hThread);
 
@@ -7307,36 +7307,21 @@ ShmemBackendArrayRemove(Backend *bn)
 static pid_t
 waitpid(pid_t pid, int *exitstatus, int options)
 {
+	win32_deadchild_waitinfo *childinfo;
+	DWORD		exitcode;
 	DWORD		dwd;
 	ULONG_PTR	key;
 	OVERLAPPED *ovl;
 
-	/*
-	 * Check if there are any dead children. If there are, return the pid of
-	 * the first one that died.
-	 */
-	if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
+	/* Try to consume one win32_deadchild_waitinfo from the queue. */
+	if (!GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
 	{
-		*exitstatus = (int) key;
-		return dwd;
+		errno = EAGAIN;
+		return -1;
 	}
 
-	return -1;
-}
-
-/*
- * Note! Code below executes on a thread pool! All operations must
- * be thread safe! Note that elog() and friends must *not* be used.
- */
-static void WINAPI
-pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
-{
-	win32_deadchild_waitinfo *childinfo = (win32_deadchild_waitinfo *) lpParameter;
-	DWORD		exitcode;
-
-	if (TimerOrWaitFired)
-		return;					/* timeout. Should never happen, since we use
-								 * INFINITE as timeout value. */
+	childinfo = (win32_deadchild_waitinfo *) key;
+	pid = childinfo->procId;
 
 	/*
 	 * Remove handle from wait - required even though it's set to wait only
@@ -7352,13 +7337,11 @@ pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
 		write_stderr("could not read exit code for process\n");
 		exitcode = 255;
 	}
-
-	if (!PostQueuedCompletionStatus(win32ChildQueue, childinfo->procId, (ULONG_PTR) exitcode, NULL))
-		write_stderr("could not post child completion status\n");
+	*exitstatus = exitcode;
 
 	/*
-	 * Handle is per-process, so we close it here instead of in the
-	 * originating thread
+	 * Close the process handle.  Only after this point can the PID can be
+	 * recycled by the kernel.
 	 */
 	CloseHandle(childinfo->procHandle);
 
@@ -7368,7 +7351,34 @@ pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
 	 */
 	free(childinfo);
 
-	/* Queue SIGCHLD signal */
+	return pid;
+}
+
+/*
+ * Note! Code below executes on a thread pool! All operations must
+ * be thread safe! Note that elog() and friends must *not* be used.
+ */
+static void WINAPI
+pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
+{
+	/* Should never happen, since we use INFINITE as timeout value. */
+	if (TimerOrWaitFired)
+		return;
+
+	/*
+	 * Post the win32_deadchild_waitinfo object for waitpid() to deal with. If
+	 * that fails, we leak the object, but we also leak a whole process and
+	 * get into an unrecoverable state, so there's not much point in worrying
+	 * about that.  We'd like to panic, but we can't use that infrastructure
+	 * from this thread.
+	 */
+	if (!PostQueuedCompletionStatus(win32ChildQueue,
+									0,
+									(ULONG_PTR) lpParameter,
+									NULL))
+		write_stderr("could not post child completion status\n");
+
+	/* Queue SIGCHLD signal. */
 	pg_queue_signal(SIGCHLD);
 }
 #endif							/* WIN32 */

From 4aaa6c40663ed3faa6f574e249c112dcbc237460 Mon Sep 17 00:00:00 2001
From: Thomas Munro <tmunro@postgresql.org>
Date: Wed, 15 Mar 2023 13:57:00 +1300
Subject: [PATCH 28/78] Fix fractional vacuum_cost_delay.

Commit 4753ef37 changed vacuum_delay_point() to use the WaitLatch() API,
to fix the problem that vacuum could keep running for a very long time
after the postmaster died.

Unfortunately, that broke commit caf626b2's support for fractional
vacuum_cost_delay, which shipped in PostgreSQL 12.  WaitLatch() works in
whole milliseconds.

For now, revert the change from commit 4753ef37, but add an explicit
check for postmaster death.  That's an extra system call on systems
other than Linux and FreeBSD, but that overhead doesn't matter much
considering that we willingly went to sleep and woke up again.  (In
later work, we might add higher resolution timeouts to the latch API so
that we could do this with our standard programming pattern, but that
wouldn't be back-patched.)

Back-patch to 14, where commit 4753ef37 arrived.

Reported-by: Melanie Plageman <melanieplageman@gmail.com>
Discussion: https://postgr.es/m/CAAKRu_b-q0hXCBUCAATh0Z4Zi6UkiC0k2DFgoD3nC-r3SkR3tg%40mail.gmail.com
---
 src/backend/commands/vacuum.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index c113f274b81..35c9a9fdafe 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -52,6 +52,7 @@
 #include "postmaster/bgworker_internals.h"
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
+#include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/acl.h"
@@ -3005,11 +3006,18 @@ vacuum_delay_point(void)
 		if (msec > VacuumCostDelay * 4)
 			msec = VacuumCostDelay * 4;
 
-		(void) WaitLatch(MyLatch,
-						 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
-						 msec,
-						 WAIT_EVENT_VACUUM_DELAY);
-		ResetLatch(MyLatch);
+		pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
+		pg_usleep(msec * 1000);
+		pgstat_report_wait_end();
+
+		/*
+		 * We don't want to ignore postmaster death during very long vacuums
+		 * with vacuum_cost_delay configured.  We can't use the usual
+		 * WaitLatch() approach here because we want microsecond-based sleep
+		 * durations above.
+		 */
+		if (IsUnderPostmaster && !PostmasterIsAlive())
+			exit(1);
 
 		VacuumCostBalance = 0;
 

From 37c7f8ed9cc72f026f4caa3e9ec1cee8dd327031 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 15 Mar 2023 12:56:10 +0900
Subject: [PATCH 29/78] Improve WIN32 port of fstat() to detect more file types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current implementation of _pgfstat64() is ineffective in detecting a
terminal handle or an anonymous named pipe.  This commit improves our
port of fstat() to detect more efficiently such cases by relying on
GetFileType(), and returning more correct data when the type found is
either a FILE_TYPE_PIPE (_S_IFIFO) or a FILE_TYPE_CHAR (_S_IFCHR).

This is part of a more global fix to address failures when feeding the
output generated by pg_dump to pg_restore through a pipe, for example,
but not all of it.   We are also going to need to do something about
fseek() and ftello() which are not reliable on WIN32 for the same cases
where fstat() was incorrect.  Fixing fstat() is independent of the rest,
though, which is why both fixes are handled separately, and this is the
first part of it.

Reported-by: Daniel Watzinger
Author: Daniel Watzinger, Juan José Santamaría Flecha
Discussion: https://postgr.es/m/b1448cd7-871e-20e3-8398-895e2d1d3bf9@gmail.com
Backpatch-through: 14
---
 src/port/win32stat.c | 69 ++++++++++++++++++++++++++++++--------------
 1 file changed, 48 insertions(+), 21 deletions(-)

diff --git a/src/port/win32stat.c b/src/port/win32stat.c
index 426e01f0efa..36c3b171f40 100644
--- a/src/port/win32stat.c
+++ b/src/port/win32stat.c
@@ -289,39 +289,66 @@ int
 _pgfstat64(int fileno, struct stat *buf)
 {
 	HANDLE		hFile = (HANDLE) _get_osfhandle(fileno);
-	BY_HANDLE_FILE_INFORMATION fiData;
+	DWORD		fileType = FILE_TYPE_UNKNOWN;
+	DWORD		lastError;
+	unsigned short st_mode;
 
-	if (hFile == INVALID_HANDLE_VALUE || buf == NULL)
+	/*
+	 * When stdin, stdout, and stderr aren't associated with a stream the
+	 * special value -2 is returned:
+	 * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/get-osfhandle
+	 */
+	if (hFile == INVALID_HANDLE_VALUE || hFile == (HANDLE) -2 || buf == NULL)
 	{
 		errno = EINVAL;
 		return -1;
 	}
 
+	fileType = GetFileType(hFile);
+	lastError = GetLastError();
+
 	/*
-	 * Check if the fileno is a data stream.  If so, unless it has been
-	 * redirected to a file, getting information through its HANDLE will fail,
-	 * so emulate its stat information in the most appropriate way and return
-	 * it instead.
+	 * Invoke GetLastError in order to distinguish between a "valid" return of
+	 * FILE_TYPE_UNKNOWN and its return due to a calling error.  In case of
+	 * success, GetLastError returns NO_ERROR.
 	 */
-	if ((fileno == _fileno(stdin) ||
-		 fileno == _fileno(stdout) ||
-		 fileno == _fileno(stderr)) &&
-		!GetFileInformationByHandle(hFile, &fiData))
+	if (fileType == FILE_TYPE_UNKNOWN && lastError != NO_ERROR)
 	{
-		memset(buf, 0, sizeof(*buf));
-		buf->st_mode = _S_IFCHR;
-		buf->st_dev = fileno;
-		buf->st_rdev = fileno;
-		buf->st_nlink = 1;
-		return 0;
+		_dosmaperr(lastError);
+		return -1;
 	}
 
-	/*
-	 * Since we already have a file handle there is no need to check for
-	 * ERROR_DELETE_PENDING.
-	 */
+	switch (fileType)
+	{
+			/* The specified file is a disk file */
+		case FILE_TYPE_DISK:
+			return fileinfo_to_stat(hFile, buf);
+
+			/*
+			 * The specified file is a socket, a named pipe, or an anonymous
+			 * pipe.
+			 */
+		case FILE_TYPE_PIPE:
+			st_mode = _S_IFIFO;
+			break;
+			/* The specified file is a character file */
+		case FILE_TYPE_CHAR:
+			st_mode = _S_IFCHR;
+			break;
+			/* Unused flag and unknown file type */
+		case FILE_TYPE_REMOTE:
+		case FILE_TYPE_UNKNOWN:
+		default:
+			errno = EINVAL;
+			return -1;
+	}
 
-	return fileinfo_to_stat(hFile, buf);
+	memset(buf, 0, sizeof(*buf));
+	buf->st_mode = st_mode;
+	buf->st_dev = fileno;
+	buf->st_rdev = fileno;
+	buf->st_nlink = 1;
+	return 0;
 }
 
 #endif							/* WIN32 */

From e5428b027f843fc065b7c93a2ef3c4fcca5c563c Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 16 Mar 2023 16:50:56 -0400
Subject: [PATCH 30/78] Doc: mention CREATE+ATTACH PARTITION with CREATE
 TABLE...PARTITION OF.

Clarify that ATTACH/DETACH PARTITION can be used to perform partition
maintenance with less locking than straight CREATE TABLE/DROP TABLE.
This was already stated in some places, but not emphasized.

Back-patch to v14 where DETACH PARTITION CONCURRENTLY was added.
(We had lower lock levels for ATTACH PARTITION before that, but
this wording wouldn't apply.)

Justin Pryzby, reviewed by Robert Treat and Jakub Wartak;
a little further wordsmithing by me

Discussion: https://postgr.es/m/20220718143304.GC18011@telsasoft.com
---
 doc/src/sgml/ddl.sgml              | 13 +++++++------
 doc/src/sgml/ref/create_table.sgml | 20 ++++++++++++++++----
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index e49d19716c1..78f4448a5bc 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -3951,9 +3951,15 @@ CREATE TABLE measurement_y2008m02 PARTITION OF measurement
 </programlisting>
 
      As an alternative, it is sometimes more convenient to create the
-     new table outside the partition structure, and make it a proper
+     new table outside the partition structure, and attach it as a
      partition later. This allows new data to be loaded, checked, and
      transformed prior to it appearing in the partitioned table.
+     Moreover, the <literal>ATTACH PARTITION</literal> operation requires
+     only <literal>SHARE UPDATE EXCLUSIVE</literal> lock on the
+     partitioned table, as opposed to the <literal>ACCESS
+     EXCLUSIVE</literal> lock that is required by <command>CREATE TABLE
+     ... PARTITION OF</command>, so it is more friendly to concurrent
+     operations on the partitioned table.
      The <literal>CREATE TABLE ... LIKE</literal> option is helpful
      to avoid tediously repeating the parent table's definition:
 
@@ -3973,11 +3979,6 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
 </programlisting>
     </para>
 
-    <para>
-     The <command>ATTACH PARTITION</command> command requires taking a
-     <literal>SHARE UPDATE EXCLUSIVE</literal> lock on the partitioned table.
-    </para>
-
     <para>
      Before running the <command>ATTACH PARTITION</command> command, it is
      recommended to create a <literal>CHECK</literal> constraint on the table to
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index 4a0b6dcfcd4..3a4920c4c35 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -661,12 +661,24 @@ Where column_reference_storage_directive is:
      </para>
 
      <para>
-      Operations such as TRUNCATE which normally affect a table and all of its
+      Operations such as <command>TRUNCATE</command>
+      which normally affect a table and all of its
       inheritance children will cascade to all partitions, but may also be
-      performed on an individual partition.  Note that dropping a partition
-      with <literal>DROP TABLE</literal> requires taking an <literal>ACCESS
-      EXCLUSIVE</literal> lock on the parent table.
+      performed on an individual partition.
      </para>
+
+     <para>
+      Note that creating a partition using <literal>PARTITION OF</literal>
+      requires taking an <literal>ACCESS EXCLUSIVE</literal> lock on the
+      parent partitioned table.  Likewise, dropping a partition
+      with <command>DROP TABLE</command> requires taking
+      an <literal>ACCESS EXCLUSIVE</literal> lock on the parent table.
+      It is possible to use <link linkend="sql-altertable"><command>ALTER
+      TABLE ATTACH/DETACH PARTITION</command></link> to perform these
+      operations with a weaker lock, thus reducing interference with
+      concurrent operations on the partitioned table.
+     </para>
+
     </listitem>
    </varlistentry>
 

From 2aa9a62530e161e6572f6940d8b65d4be2127808 Mon Sep 17 00:00:00 2001
From: Thomas Munro <tmunro@postgresql.org>
Date: Fri, 17 Mar 2023 09:44:42 +1300
Subject: [PATCH 31/78] Small tidyup for commit d41a178b.

A comment was left behind claiming that we needed to use malloc() rather
than palloc() because the corresponding free would run in another
thread, but that's not true anymore.  Remove that comment.  And, with
the reason being gone, we might as well actually use palloc().

Back-patch to supported releases, like d41a178b.

Discussion: https://postgr.es/m/CA%2BhUKG%2BpdM9v3Jv4tc2BFx2jh_daY3uzUyAGBhtDkotEQDNPYw%40mail.gmail.com
---
 src/backend/postmaster/postmaster.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index c1d4207757b..0fbeba83fcd 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -5449,13 +5449,10 @@ internal_forkexec(int argc, char *argv[], Port *port)
 
 	/*
 	 * Queue a waiter to signal when this child dies. The wait will be handled
-	 * automatically by an operating system thread pool.
-	 *
-	 * Note: use malloc instead of palloc, since it needs to be thread-safe.
-	 * Struct will be free():d from the callback function that runs on a
-	 * different thread.
+	 * automatically by an operating system thread pool.  The memory will be
+	 * freed by a later call to waitpid().
 	 */
-	childinfo = malloc(sizeof(win32_deadchild_waitinfo));
+	childinfo = palloc(sizeof(win32_deadchild_waitinfo));
 	if (!childinfo)
 		ereport(FATAL,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -7349,7 +7346,7 @@ waitpid(pid_t pid, int *exitstatus, int options)
 	 * Free struct that was allocated before the call to
 	 * RegisterWaitForSingleObject()
 	 */
-	free(childinfo);
+	pfree(childinfo);
 
 	return pid;
 }

From 9b2e2d141822076c6cb6c4110d9028fbe12e4b1f Mon Sep 17 00:00:00 2001
From: Thomas Munro <tmunro@postgresql.org>
Date: Fri, 17 Mar 2023 14:44:12 +1300
Subject: [PATCH 32/78] Small tidyup for commit d41a178b, part II.

Further to commit 6a9229da, checking for NULL is now redundant.  An "out
of memory" error would have been thrown already by palloc() and treated
as FATAL, so we can delete a few more lines.

Back-patch to all releases, like those other commits.

Reported-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/4040668.1679013388%40sss.pgh.pa.us
---
 src/backend/postmaster/postmaster.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 0fbeba83fcd..164ed03ad6e 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -5453,11 +5453,6 @@ internal_forkexec(int argc, char *argv[], Port *port)
 	 * freed by a later call to waitpid().
 	 */
 	childinfo = palloc(sizeof(win32_deadchild_waitinfo));
-	if (!childinfo)
-		ereport(FATAL,
-				(errcode(ERRCODE_OUT_OF_MEMORY),
-				 errmsg("out of memory")));
-
 	childinfo->procHandle = pi.hProcess;
 	childinfo->procId = pi.dwProcessId;
 

From 220ecb727bc6cf9989c0cc39c77103da655ab1df Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Thu, 16 Mar 2023 17:48:47 -0700
Subject: [PATCH 33/78] tests: Minimize syslog activity by slapd

Until now the tests using slapd spammed syslog for every connection /
query. Use logfile-only to prevent syslog activity. Unfortunately that only
takes effect after logging the first message, but that's still much better
than the prior situation.

Discussion: https://postgr.es/m/20230311233708.3yjdbjkly2q4gq2j@awork3.anarazel.de
Backpatch: 11-
---
 src/test/ldap/t/001_auth.pl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/test/ldap/t/001_auth.pl b/src/test/ldap/t/001_auth.pl
index 0a310ccb15a..5f2ba8e5967 100644
--- a/src/test/ldap/t/001_auth.pl
+++ b/src/test/ldap/t/001_auth.pl
@@ -78,6 +78,7 @@
 
 pidfile $slapd_pidfile
 logfile $slapd_logfile
+logfile-only on
 
 access to *
         by * read

From 5ef9103e51be3d641d0bc9edb0e6de1749433cd3 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Thu, 16 Mar 2023 23:03:31 -0700
Subject: [PATCH 34/78] tests: Prevent syslog activity by slapd, take 2

Unfortunately it turns out that the logfile-only option added in b9f8d1cbad7
is only available in openldap starting in 2.6.

Luckily the option to control the log level (loglevel/-s) have been around for
much longer. As it turns out loglevel/-s only control what goes into syslog,
not what ends up in the file specified with 'logfile' and stderr.

While we currently are specifying 'logfile', nothing ends up in it, as the
option only controls debug messages, and we didn't set a debug level. The
debug level can only be configured on the commandline and also prevents
forking. That'd require larger changes, so this commit doesn't tackle that
issue.

Specify the syslog level when starting slapd using -s, as that allows to
prevent all syslog messages if one uses '0' instead of 'none', while loglevel
doesn't prevent the first message.

Discussion: https://postgr.es/m/20230311233708.3yjdbjkly2q4gq2j@awork3.anarazel.de
Backpatch: 11-
---
 src/test/ldap/t/001_auth.pl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/ldap/t/001_auth.pl b/src/test/ldap/t/001_auth.pl
index 5f2ba8e5967..c2f588deeb8 100644
--- a/src/test/ldap/t/001_auth.pl
+++ b/src/test/ldap/t/001_auth.pl
@@ -78,7 +78,6 @@
 
 pidfile $slapd_pidfile
 logfile $slapd_logfile
-logfile-only on
 
 access to *
         by * read
@@ -114,7 +113,8 @@
   "-CA", "$slapd_certs/ca.crt", "-CAkey", "$slapd_certs/ca.key",
   "-CAcreateserial", "-out", "$slapd_certs/server.crt";
 
-system_or_bail $slapd, '-f', $slapd_conf, '-h', "$ldap_url $ldaps_url";
+# -s0 prevents log messages ending up in syslog
+system_or_bail $slapd, '-f', $slapd_conf,'-s0', '-h', "$ldap_url $ldaps_url";
 
 END
 {

From 8daf64e99f14b1647a116ce86b6df2d0dd2d510d Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 17 Mar 2023 13:31:40 -0400
Subject: [PATCH 35/78] Fix pg_dump for hash partitioning on enum columns.

Hash partitioning on an enum is problematic because the hash codes are
derived from the OIDs assigned to the enum values, which will almost
certainly be different after a dump-and-reload than they were before.
This means that some rows probably end up in different partitions than
before, causing restore to fail because of partition constraint
violations.  (pg_upgrade dodges this problem by using hacks to force
the enum values to keep the same OIDs, but that's not possible nor
desirable for pg_dump.)

Users can work around that by specifying --load-via-partition-root,
but since that's a dump-time not restore-time decision, one might
find out the need for it far too late.  Instead, teach pg_dump to
apply that option automatically when dealing with a partitioned
table that has hash-on-enum partitioning.

Also deal with a pre-existing issue for --load-via-partition-root
mode: in a parallel restore, we try to TRUNCATE target tables just
before loading them, in order to enable some backend optimizations.
This is bad when using --load-via-partition-root because (a) we're
likely to suffer deadlocks from restore jobs trying to restore rows
into other partitions than they came from, and (b) if we miss getting
a deadlock we might still lose data due to a TRUNCATE removing rows
from some already-completed restore job.

The fix for this is conceptually simple: just don't TRUNCATE if we're
dealing with a --load-via-partition-root case.  The tricky bit is for
pg_restore to identify those cases.  In dumps using COPY commands we
can inspect each COPY command to see if it targets the nominal target
table or some ancestor.  However, in dumps using INSERT commands it's
pretty impractical to examine the INSERTs in advance.  To provide a
solution for that going forward, modify pg_dump to mark TABLE DATA
items that are using --load-via-partition-root with a comment.
(This change also responds to a complaint from Robert Haas that
the dump output for --load-via-partition-root is pretty confusing.)
pg_restore checks for the special comment as well as checking the
COPY command if present.  This will fail to identify the combination
of --load-via-partition-root and --inserts in pre-existing dump files,
but that should be a pretty rare case in the field.  If it does
happen you will probably get a deadlock failure that you can work
around by not using parallel restore, which is the same as before
this bug fix.

Having done this, there seems no remaining reason for the alarmism
in the pg_dump man page about combining --load-via-partition-root
with parallel restore, so remove that warning.

Patch by me; thanks to Julien Rouhaud for review.  Back-patch to
v11 where hash partitioning was introduced.

Discussion: https://postgr.es/m/1376149.1675268279@sss.pgh.pa.us
---
 doc/src/sgml/ref/pg_dump.sgml | 10 ----------
 src/bin/pg_dump/pg_dump.c     |  2 +-
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 956f97e2537..4bf68d3fbd6 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -863,16 +863,6 @@ PostgreSQL documentation
         and the two systems have different definitions of the collation used
         to sort the partitioning column.
        </para>
-
-       <para>
-        It is best not to use parallelism when restoring from an archive made
-        with this option, because <application>pg_restore</application> will
-        not know exactly which partition(s) a given archive data item will
-        load data into.  This could result in inefficiency due to lock
-        conflicts between parallel jobs, or perhaps even restore failures due
-        to foreign key constraints being set up before all the relevant data
-        is loaded.
-       </para>
       </listitem>
      </varlistentry>
 
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 4a8f4937605..8a2095b21a3 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -8298,7 +8298,7 @@ getPartitioningInfo(Archive *fout)
 		tbinfo = findTableByOid(tabrelid);
 		if (tbinfo == NULL)
 			fatal("failed sanity check, table OID %u appearing in pg_partitioned_table not found",
-					 tabrelid);
+				  tabrelid);
 		tbinfo->unsafe_partitions = true;
 	}
 

From 92e5620db5a2f254e7bd1349711f133dfae00348 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 18 Mar 2023 16:11:22 -0400
Subject: [PATCH 36/78] Doc: fix documentation example for bytea hex output
 format.

Per report from rsindlin

Discussion: https://postgr.es/m/167907221210.1803488.5939223864945604536@wrigleys.postgresql.org
---
 doc/src/sgml/datatype.sgml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 0e89b768c5d..692b6fe2c43 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -1438,7 +1438,12 @@ SELECT b, char_length(b) FROM test2;
    <para>
     Example:
 <programlisting>
-SELECT '\xDEADBEEF';
+SET bytea_output = 'hex';
+
+SELECT '\xDEADBEEF'::bytea;
+   bytea
+------------
+ \xdeadbeef
 </programlisting>
    </para>
   </sect2>

From 4662460162e94c306f0f5cd8056ea983b33df411 Mon Sep 17 00:00:00 2001
From: David Rowley <drowley@postgresql.org>
Date: Mon, 20 Mar 2023 13:30:55 +1300
Subject: [PATCH 37/78] Fix memory leak in Memoize cache key evaluation

When probing the Memoize cache to check if the current cache key values
exist in the cache, we perform an evaluation of the expressions making up
the cache key before probing the hash table for those values.  This
operation could leak memory as it is possible that the cache key is an
expression which requires allocation of memory, as was the case in bug
17844.

Here we fix this by correctly switching to the per tuple context before
evaluating the cache expressions so that the memory is freed next time the
per tuple context is reset.

Bug: 17844
Reported-by: Alexey Ermakov
Discussion: https://postgr.es/m/17844-d2f6f9e75a622bed@postgresql.org
Backpatch-through: 14, where Memoize was introduced
---
 src/backend/executor/nodeMemoize.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/backend/executor/nodeMemoize.c b/src/backend/executor/nodeMemoize.c
index b32f2469135..c078b68740b 100644
--- a/src/backend/executor/nodeMemoize.c
+++ b/src/backend/executor/nodeMemoize.c
@@ -289,11 +289,18 @@ prepare_probe_slot(MemoizeState *mstate, MemoizeKey *key)
 
 	if (key == NULL)
 	{
+		ExprContext *econtext = mstate->ss.ps.ps_ExprContext;
+		MemoryContext oldcontext;
+
+		oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
+
 		/* Set the probeslot's values based on the current parameter values */
 		for (int i = 0; i < numKeys; i++)
 			pslot->tts_values[i] = ExecEvalExpr(mstate->param_exprs[i],
-												mstate->ss.ps.ps_ExprContext,
+												econtext,
 												&pslot->tts_isnull[i]);
+
+		MemoryContextSwitchTo(oldcontext);
 	}
 	else
 	{

From 02cdcd5991e58cda74243dd76dbf023547e64e6b Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Mon, 20 Mar 2023 09:51:50 +0100
Subject: [PATCH 38/78] Fix netmask handling in inet_minmax_multi_ops

When calculating distance in brin_minmax_multi_distance_inet(), the
netmask was applied incorrectly. This results in (seemingly) incorrect
ordering of values, triggering an assert.

For builds without asserts this is mostly harmless - we may merge other
ranges, possibly resulting in slightly less efficient index. But it's
still correct and the greedy algorithm doesn't guarantee optimality
anyway.

Backpatch to 14, where minmax-multi indexes were introduced.

Reported by Dmitry Dolgov, investigation and fix by me.

Reported-by: Dmitry Dolgov
Backpatch-through: 14
Discussion: https://postgr.es/m/17774-c6f3e36dd4471e67@postgresql.org
---
 src/backend/access/brin/brin_minmax_multi.c | 4 ++--
 src/test/regress/expected/brin_multi.out    | 6 ++++++
 src/test/regress/sql/brin_multi.sql         | 7 +++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/backend/access/brin/brin_minmax_multi.c b/src/backend/access/brin/brin_minmax_multi.c
index b343226a191..b4e50937609 100644
--- a/src/backend/access/brin/brin_minmax_multi.c
+++ b/src/backend/access/brin/brin_minmax_multi.c
@@ -2364,14 +2364,14 @@ brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS)
 		unsigned char mask;
 		int			nbits;
 
-		nbits = lena - (i * 8);
+		nbits = Max(0, lena - (i * 8));
 		if (nbits < 8)
 		{
 			mask = (0xFF << (8 - nbits));
 			addra[i] = (addra[i] & mask);
 		}
 
-		nbits = lenb - (i * 8);
+		nbits = Max(0, lenb - (i * 8));
 		if (nbits < 8)
 		{
 			mask = (0xFF << (8 - nbits));
diff --git a/src/test/regress/expected/brin_multi.out b/src/test/regress/expected/brin_multi.out
index 51277fdb887..0720a66b2ae 100644
--- a/src/test/regress/expected/brin_multi.out
+++ b/src/test/regress/expected/brin_multi.out
@@ -352,6 +352,12 @@ VACUUM brintest_multi;  -- force a summarization cycle in brinidx
 insert into public.brintest_multi (float4col) values (real 'nan');
 insert into public.brintest_multi (float8col) values (real 'nan');
 UPDATE brintest_multi SET int8col = int8col * int4col;
+-- Test handling of inet netmasks with inet_minmax_multi_ops
+CREATE TABLE brin_test_inet (a inet);
+CREATE INDEX ON brin_test_inet USING brin (a inet_minmax_multi_ops);
+INSERT INTO brin_test_inet VALUES ('127.0.0.1/0');
+INSERT INTO brin_test_inet VALUES ('0.0.0.0/12');
+DROP TABLE brin_test_inet;
 -- Tests for brin_summarize_new_values
 SELECT brin_summarize_new_values('brintest_multi'); -- error, not an index
 ERROR:  "brintest_multi" is not an index
diff --git a/src/test/regress/sql/brin_multi.sql b/src/test/regress/sql/brin_multi.sql
index 9deb8d2573d..a46c09951b5 100644
--- a/src/test/regress/sql/brin_multi.sql
+++ b/src/test/regress/sql/brin_multi.sql
@@ -359,6 +359,13 @@ insert into public.brintest_multi (float8col) values (real 'nan');
 
 UPDATE brintest_multi SET int8col = int8col * int4col;
 
+-- Test handling of inet netmasks with inet_minmax_multi_ops
+CREATE TABLE brin_test_inet (a inet);
+CREATE INDEX ON brin_test_inet USING brin (a inet_minmax_multi_ops);
+INSERT INTO brin_test_inet VALUES ('127.0.0.1/0');
+INSERT INTO brin_test_inet VALUES ('0.0.0.0/12');
+DROP TABLE brin_test_inet;
+
 -- Tests for brin_summarize_new_values
 SELECT brin_summarize_new_values('brintest_multi'); -- error, not an index
 SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index

From 38998dbcba0e01d9dff1470b54b22571571493bc Mon Sep 17 00:00:00 2001
From: Thomas Munro <tmunro@postgresql.org>
Date: Tue, 21 Mar 2023 14:29:34 +1300
Subject: [PATCH 39/78] Fix race in parallel hash join batch cleanup, take II.

With unlucky timing and parallel_leader_participation=off (not the
default), PHJ could attempt to access per-batch shared state just as it
was being freed.  There was code intended to prevent that by checking
for a cleared pointer, but it was racy.  Fix, by introducing an extra
barrier phase.  The new phase PHJ_BUILD_RUNNING means that it's safe to
access the per-batch state to find a batch to help with, and
PHJ_BUILD_DONE means that it is too late.  The last to detach will free
the array of per-batch state as before, but now it will also atomically
advance the phase, so that late attachers can avoid the hazard.  This
mirrors the way per-batch hash tables are freed (see phases
PHJ_BATCH_PROBING and PHJ_BATCH_DONE).

An earlier attempt to fix this (commit 3b8981b6, later reverted) missed
one special case.  When the inner side is empty (the "empty inner
optimization), the build barrier would only make it to
PHJ_BUILD_HASHING_INNER phase before workers attempted to detach from
the hashtable.  In that case, fast-forward the build barrier to
PHJ_BUILD_RUNNING before proceeding, so that our later assertions hold
and we can still negotiate who is cleaning up.

Revealed by build farm failures, where BarrierAttach() failed a sanity
check assertion, because the memory had been clobbered by dsa_free().
In non-assert builds, the result could be a segmentation fault.

Back-patch to all supported releases.

Author: Thomas Munro <thomas.munro@gmail.com>
Author: Melanie Plageman <melanieplageman@gmail.com>
Reported-by: Michael Paquier <michael@paquier.xyz>
Reported-by: David Geier <geidav.pg@gmail.com>
Tested-by: David Geier <geidav.pg@gmail.com>
Discussion: https://postgr.es/m/20200929061142.GA29096%40paquier.xyz
---
 src/backend/executor/nodeHash.c     | 50 +++++++++++++++++---------
 src/backend/executor/nodeHashjoin.c | 54 ++++++++++++++++++++---------
 src/include/executor/hashjoin.h     |  3 +-
 3 files changed, 74 insertions(+), 33 deletions(-)

diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 8779d93b06f..a236f5b4819 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -436,14 +436,21 @@ MultiExecParallelHash(HashState *node)
 	hashtable->nbuckets = pstate->nbuckets;
 	hashtable->log2_nbuckets = my_log2(hashtable->nbuckets);
 	hashtable->totalTuples = pstate->total_tuples;
-	ExecParallelHashEnsureBatchAccessors(hashtable);
+
+	/*
+	 * Unless we're completely done and the batch state has been freed, make
+	 * sure we have accessors.
+	 */
+	if (BarrierPhase(build_barrier) < PHJ_BUILD_DONE)
+		ExecParallelHashEnsureBatchAccessors(hashtable);
 
 	/*
 	 * The next synchronization point is in ExecHashJoin's HJ_BUILD_HASHTABLE
-	 * case, which will bring the build phase to PHJ_BUILD_DONE (if it isn't
-	 * there already).
+	 * case, which will bring the build phase to PHJ_BUILD_RUNNING (if it
+	 * isn't there already).
 	 */
 	Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER ||
+		   BarrierPhase(build_barrier) == PHJ_BUILD_RUNNING ||
 		   BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
 }
 
@@ -752,7 +759,7 @@ ExecHashTableCreate(HashState *state, HashJoinState *hjstate,
 		/*
 		 * The next Parallel Hash synchronization point is in
 		 * MultiExecParallelHash(), which will progress it all the way to
-		 * PHJ_BUILD_DONE.  The caller must not return control from this
+		 * PHJ_BUILD_RUNNING.  The caller must not return control from this
 		 * executor node between now and then.
 		 */
 	}
@@ -3763,14 +3770,11 @@ ExecParallelHashEnsureBatchAccessors(HashJoinTable hashtable)
 	}
 
 	/*
-	 * It's possible for a backend to start up very late so that the whole
-	 * join is finished and the shm state for tracking batches has already
-	 * been freed by ExecHashTableDetach().  In that case we'll just leave
-	 * hashtable->batches as NULL so that ExecParallelHashJoinNewBatch() gives
-	 * up early.
+	 * We should never see a state where the batch-tracking array is freed,
+	 * because we should have given up sooner if we join when the build
+	 * barrier has reached the PHJ_BUILD_DONE phase.
 	 */
-	if (!DsaPointerIsValid(pstate->batches))
-		return;
+	Assert(DsaPointerIsValid(pstate->batches));
 
 	/* Use hash join memory context. */
 	oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
@@ -3895,9 +3899,18 @@ ExecHashTableDetachBatch(HashJoinTable hashtable)
 void
 ExecHashTableDetach(HashJoinTable hashtable)
 {
-	if (hashtable->parallel_state)
+	ParallelHashJoinState *pstate = hashtable->parallel_state;
+
+	/*
+	 * If we're involved in a parallel query, we must either have gotten all
+	 * the way to PHJ_BUILD_RUNNING, or joined too late and be in
+	 * PHJ_BUILD_DONE.
+	 */
+	Assert(!pstate ||
+		   BarrierPhase(&pstate->build_barrier) >= PHJ_BUILD_RUNNING);
+
+	if (pstate && BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_RUNNING)
 	{
-		ParallelHashJoinState *pstate = hashtable->parallel_state;
 		int			i;
 
 		/* Make sure any temporary files are closed. */
@@ -3913,17 +3926,22 @@ ExecHashTableDetach(HashJoinTable hashtable)
 		}
 
 		/* If we're last to detach, clean up shared memory. */
-		if (BarrierDetach(&pstate->build_barrier))
+		if (BarrierArriveAndDetach(&pstate->build_barrier))
 		{
+			/*
+			 * Late joining processes will see this state and give up
+			 * immediately.
+			 */
+			Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_DONE);
+
 			if (DsaPointerIsValid(pstate->batches))
 			{
 				dsa_free(hashtable->area, pstate->batches);
 				pstate->batches = InvalidDsaPointer;
 			}
 		}
-
-		hashtable->parallel_state = NULL;
 	}
+	hashtable->parallel_state = NULL;
 }
 
 /*
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 9ec70f16e31..88eaaa10cef 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -47,7 +47,8 @@
  *   PHJ_BUILD_ALLOCATING            -- one sets up the batches and table 0
  *   PHJ_BUILD_HASHING_INNER         -- all hash the inner rel
  *   PHJ_BUILD_HASHING_OUTER         -- (multi-batch only) all hash the outer
- *   PHJ_BUILD_DONE                  -- building done, probing can begin
+ *   PHJ_BUILD_RUNNING               -- building done, probing can begin
+ *   PHJ_BUILD_DONE                  -- all work complete, one frees batches
  *
  * While in the phase PHJ_BUILD_HASHING_INNER a separate pair of barriers may
  * be used repeatedly as required to coordinate expansions in the number of
@@ -75,7 +76,7 @@
  * batches whenever it encounters them while scanning and probing, which it
  * can do because it processes batches in serial order.
  *
- * Once PHJ_BUILD_DONE is reached, backends then split up and process
+ * Once PHJ_BUILD_RUNNING is reached, backends then split up and process
  * different batches, or gang up and work together on probing batches if there
  * aren't enough to go around.  For each batch there is a separate barrier
  * with the following phases:
@@ -97,11 +98,16 @@
  *
  * To avoid deadlocks, we never wait for any barrier unless it is known that
  * all other backends attached to it are actively executing the node or have
- * already arrived.  Practically, that means that we never return a tuple
- * while attached to a barrier, unless the barrier has reached its final
- * state.  In the slightly special case of the per-batch barrier, we return
- * tuples while in PHJ_BATCH_PROBING phase, but that's OK because we use
- * BarrierArriveAndDetach() to advance it to PHJ_BATCH_DONE without waiting.
+ * finished.  Practically, that means that we never emit a tuple while attached
+ * to a barrier, unless the barrier has reached a phase that means that no
+ * process will wait on it again.  We emit tuples while attached to the build
+ * barrier in phase PHJ_BUILD_RUNNING, and to a per-batch barrier in phase
+ * PHJ_BATCH_PROBING.  These are advanced to PHJ_BUILD_DONE and PHJ_BATCH_DONE
+ * respectively without waiting, using BarrierArriveAndDetach().  The last to
+ * detach receives a different return value so that it knows that it's safe to
+ * clean up.  Any straggler process that attaches after that phase is reached
+ * will see that it's too late to participate or access the relevant shared
+ * memory objects.
  *
  *-------------------------------------------------------------------------
  */
@@ -387,7 +393,21 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 				 * outer relation.
 				 */
 				if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
+				{
+					if (parallel)
+					{
+						/*
+						 * Advance the build barrier to PHJ_BUILD_RUNNING
+						 * before proceeding so we can negotiate resource
+						 * cleanup.
+						 */
+						Barrier    *build_barrier = &parallel_state->build_barrier;
+
+						while (BarrierPhase(build_barrier) < PHJ_BUILD_RUNNING)
+							BarrierArriveAndWait(build_barrier, 0);
+					}
 					return NULL;
+				}
 
 				/*
 				 * Prefetch JoinQual or NonJoinQual to prevent motion hazard.
@@ -433,6 +453,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 
 					build_barrier = &parallel_state->build_barrier;
 					Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER ||
+						   BarrierPhase(build_barrier) == PHJ_BUILD_RUNNING ||
 						   BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
 					if (BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER)
 					{
@@ -453,9 +474,18 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 						BarrierArriveAndWait(build_barrier,
 											 WAIT_EVENT_HASH_BUILD_HASH_OUTER);
 					}
-					Assert(BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
+					else if (BarrierPhase(build_barrier) == PHJ_BUILD_DONE)
+					{
+						/*
+						 * If we attached so late that the job is finished and
+						 * the batch state has been freed, we can return
+						 * immediately.
+						 */
+						return NULL;
+					}
 
 					/* Each backend should now select a batch to work on. */
+					Assert(BarrierPhase(build_barrier) == PHJ_BUILD_RUNNING);
 					hashtable->curbatch = -1;
 					node->hj_JoinState = HJ_NEED_NEW_BATCH;
 
@@ -1428,14 +1458,6 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
 	Barrier		*batch0_barrier = NULL;
 	ParallelHashJoinState *pstate = hashtable->parallel_state;
 
-	/*
-	 * If we started up so late that the batch tracking array has been freed
-	 * already by ExecHashTableDetach(), then we are finished.  See also
-	 * ExecParallelHashEnsureBatchAccessors().
-	 */
-	if (hashtable->batches == NULL)
-		return false;
-
 	/*
 	 * If we were already attached to a batch, remember not to bother checking
 	 * it again, and detach from it (possibly freeing the hash table if we are
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index b1fbaacf5e9..e324e67d914 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -298,7 +298,8 @@ typedef struct ParallelHashJoinState
 #define PHJ_BUILD_ALLOCATING			1
 #define PHJ_BUILD_HASHING_INNER			2
 #define PHJ_BUILD_HASHING_OUTER			3
-#define PHJ_BUILD_DONE					4
+#define PHJ_BUILD_RUNNING				4
+#define PHJ_BUILD_DONE					5
 
 /* The phases for probing each batch, used by for batch_barrier. */
 #define PHJ_BATCH_ELECTING				0

From 11658db1342acf70962a6cbcf0ca66d34b0e8a60 Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Tue, 21 Mar 2023 09:18:51 +0530
Subject: [PATCH 40/78] Ignore dropped columns during apply of update/delete.

We fail to apply updates and deletes when the REPLICA IDENTITY FULL is
used for the table having dropped columns. We didn't use to ignore dropped
columns while doing tuple comparison among the tuples from the publisher
and subscriber during apply of updates and deletes.

Author: Onder Kalaci, Shi yu
Reviewed-by: Amit Kapila
Discussion: https://postgr.es/m/CACawEhVQC9WoofunvXg12aXtbqKnEgWxoRx3+v8q32AWYsdpGg@mail.gmail.com
---
 src/backend/executor/execReplication.c | 10 ++++-
 src/test/subscription/t/100_bugs.pl    | 56 +++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 8202e050ec8..093c0228c48 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -243,6 +243,14 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2,
 		Form_pg_attribute att;
 		TypeCacheEntry *typentry;
 
+		att = TupleDescAttr(slot1->tts_tupleDescriptor, attrnum);
+
+		/*
+		 * Ignore dropped columns as the publisher doesn't send those
+		 */
+		if (att->attisdropped)
+			continue;
+
 		/*
 		 * If one value is NULL and other is not, then they are certainly not
 		 * equal
@@ -256,8 +264,6 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2,
 		if (slot1->tts_isnull[attrnum] || slot2->tts_isnull[attrnum])
 			continue;
 
-		att = TupleDescAttr(slot1->tts_tupleDescriptor, attrnum);
-
 		typentry = eq[attrnum];
 		if (typentry == NULL)
 		{
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index 91602c43399..b9da72eaf2a 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -6,7 +6,7 @@
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 7;
+use Test::More tests => 8;
 
 # Bug #15114
 
@@ -298,3 +298,57 @@
 
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
+
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped columns,
+# we fail to apply updates and deletes
+my $node_publisher_d_cols = get_new_node('node_publisher_d_cols');
+$node_publisher_d_cols->init(allows_streaming => 'logical');
+$node_publisher_d_cols->start;
+
+my $node_subscriber_d_cols = get_new_node('node_subscriber_d_cols');
+$node_subscriber_d_cols->init(allows_streaming => 'logical');
+$node_subscriber_d_cols->start;
+
+$node_publisher_d_cols->safe_psql(
+	'postgres', qq(
+	CREATE TABLE dropped_cols (a int, b_drop int, c int);
+	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
+	-- some initial data
+	INSERT INTO dropped_cols VALUES (1, 1, 1);
+));
+
+$node_subscriber_d_cols->safe_psql(
+	'postgres', qq(
+	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
+));
+
+my $publisher_connstr_d_cols =
+  $node_publisher_d_cols->connstr . ' dbname=postgres';
+$node_subscriber_d_cols->safe_psql('postgres',
+	"CREATE SUBSCRIPTION sub_dropped_cols CONNECTION '$publisher_connstr_d_cols' PUBLICATION pub_dropped_cols"
+);
+$node_subscriber_d_cols->wait_for_subscription_sync;
+
+$node_publisher_d_cols->safe_psql(
+	'postgres', qq(
+		ALTER TABLE dropped_cols DROP COLUMN b_drop;
+));
+$node_subscriber_d_cols->safe_psql(
+	'postgres', qq(
+		ALTER TABLE dropped_cols DROP COLUMN b_drop;
+));
+
+$node_publisher_d_cols->safe_psql(
+	'postgres', qq(
+		UPDATE dropped_cols SET a = 100;
+));
+$node_publisher_d_cols->wait_for_catchup('sub_dropped_cols');
+
+is( $node_subscriber_d_cols->safe_psql(
+		'postgres', "SELECT count(*) FROM dropped_cols WHERE a = 100"),
+	qq(1),
+	'replication with RI FULL and dropped columns');
+
+$node_publisher_d_cols->stop('fast');
+$node_subscriber_d_cols->stop('fast');

From 3e52039d4bbadd6d091faee619a32df63d0c7586 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 22 Mar 2023 18:32:04 +0900
Subject: [PATCH 41/78] doc: Add description of some missing monitoring
 functions

This commit adds some documentation about two monitoring functions:
- pg_stat_get_xact_blocks_fetched()
- pg_stat_get_xact_blocks_hit()

The description of these functions has been removed in ddfc2d9, later
simplified by 5f2b089, assuming that all the functions whose
descriptions were removed are used in system views.  Unfortunately, some
of them were are not used in any system views, so they lacked
documentation.

This gap exists in the docs for a long time, so backpatch all the way
down.

Reported-by: Michael Paquier
Author: Bertrand Drouvot
Reviewed-by: Kyotaro Horiguchi
Discussion: https://postgr.es/m/ZBeeH5UoNkTPrwHO@paquier.xyz
Backpatch-through: 11
---
 doc/src/sgml/monitoring.sgml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index ce6c34d6ea1..3c56f9fa4f1 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -5051,6 +5051,34 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_stat_get_xact_blocks_fetched</primary>
+        </indexterm>
+        <function>pg_stat_get_xact_blocks_fetched</function> ( <type>oid</type> )
+        <returnvalue>bigint</returnvalue>
+       </para>
+       <para>
+        Returns the number of buffers fetched for table or index, in the current
+        transaction.
+       </para></entry>
+      </row>
+
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_stat_get_xact_blocks_hit</primary>
+        </indexterm>
+        <function>pg_stat_get_xact_blocks_hit</function> ( <type>oid</type> )
+        <returnvalue>bigint</returnvalue>
+       </para>
+       <para>
+        Returns the number of buffer hits for table or index, in the current
+        transaction.
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>

From 838b39e62615dd4598b3d9f96f2c263213da2527 Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Thu, 23 Mar 2023 11:32:22 +0530
Subject: [PATCH 42/78] Ignore generated columns during apply of update/delete.

We fail to apply updates and deletes when the REPLICA IDENTITY FULL is
used for the table having generated columns. We didn't use to ignore
generated columns while doing tuple comparison among the tuples from
the publisher and subscriber during apply of updates and deletes.

Author: Onder Kalaci
Reviewed-by: Shi yu, Amit Kapila
Backpatch-through: 12
Discussion: https://postgr.es/m/CACawEhVQC9WoofunvXg12aXtbqKnEgWxoRx3+v8q32AWYsdpGg@mail.gmail.com
---
 src/backend/executor/execReplication.c |  5 +++--
 src/test/subscription/t/100_bugs.pl    | 18 ++++++++++++++----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 093c0228c48..831699ff9f5 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -246,9 +246,10 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2,
 		att = TupleDescAttr(slot1->tts_tupleDescriptor, attrnum);
 
 		/*
-		 * Ignore dropped columns as the publisher doesn't send those
+		 * Ignore dropped and generated columns as the publisher doesn't send
+		 * those
 		 */
-		if (att->attisdropped)
+		if (att->attisdropped || att->attgenerated)
 			continue;
 
 		/*
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index b9da72eaf2a..cce91891ab9 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -6,7 +6,7 @@
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 8;
+use Test::More tests => 9;
 
 # Bug #15114
 
@@ -299,8 +299,8 @@
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped columns,
-# we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
+# generated columns, we fail to apply updates and deletes
 my $node_publisher_d_cols = get_new_node('node_publisher_d_cols');
 $node_publisher_d_cols->init(allows_streaming => 'logical');
 $node_publisher_d_cols->start;
@@ -313,14 +313,18 @@
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
+	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
+	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
+	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber_d_cols->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
+	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 my $publisher_connstr_d_cols =
@@ -342,6 +346,7 @@
 $node_publisher_d_cols->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
+		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher_d_cols->wait_for_catchup('sub_dropped_cols');
 
@@ -350,5 +355,10 @@
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
+is( $node_subscriber_d_cols->safe_psql(
+		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
+	qq(1),
+	'replication with RI FULL and generated columns');
+
 $node_publisher_d_cols->stop('fast');
 $node_subscriber_d_cols->stop('fast');

From b1baa951094bd4861413c18bc43013904f25606a Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Thu, 23 Mar 2023 15:29:28 -0400
Subject: [PATCH 43/78] amcheck: Fix verify_heapam for tuples where xmin or
 xmax is 0.

In such cases, get_xid_status() doesn't set its output parameter (the
third argument), so we shouldn't fall through to code which will test
the value of that parameter. There are five existing calls to
get_xid_status(), three of which seem to already handle this case
properly.  This commit tries to fix the other two.

If we're checking xmin and find that it is invalid (i.e. 0) just
report that as corruption, similar to what's already done in the
three cases that seem correct. If we're checking xmax and find
that's invalid, that's fine: it just means that the tuple hasn't
been updated or deleted.

Thanks to Andres Freund and valgrind for finding this problem, and
also to Andres for having a look at the patch.  This bug seems to go
all the way back to where verify_heapam was first introduced, but
wasn't detected until recently, possibly because of the new test cases
added for update chain verification.  Back-patch to v14, where this
code showed up.

Discussion: http://postgr.es/m/CA+TgmoZAYzQZqyUparXy_ks3OEOfLD9-bEXt8N-2tS1qghX9gQ@mail.gmail.com
---
 contrib/amcheck/verify_heapam.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index f7964b78173..cc49ccb26f6 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -775,6 +775,9 @@ check_tuple_visibility(HeapCheckContext *ctx)
 	switch (get_xid_status(xmin, ctx, &xmin_status))
 	{
 		case XID_INVALID:
+			report_corruption(ctx,
+							  pstrdup("xmin is invalid"));
+			return false;
 		case XID_BOUNDS_OK:
 			break;
 		case XID_IN_FUTURE:
@@ -1110,6 +1113,9 @@ check_tuple_visibility(HeapCheckContext *ctx)
 	xmax = HeapTupleHeaderGetRawXmax(tuphdr);
 	switch (get_xid_status(xmax, ctx, &xmax_status))
 	{
+		case XID_INVALID:
+			ctx->tuple_could_be_pruned = false;
+			return true;
 		case XID_IN_FUTURE:
 			report_corruption(ctx,
 							  psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u",
@@ -1132,7 +1138,6 @@ check_tuple_visibility(HeapCheckContext *ctx)
 									   XidFromFullTransactionId(ctx->oldest_fxid)));
 			return false;		/* corrupt */
 		case XID_BOUNDS_OK:
-		case XID_INVALID:
 			break;
 	}
 

From b21e7b7356e3d0209629f805c7ff5d65213cd919 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 26 Mar 2023 13:41:06 -0400
Subject: [PATCH 44/78] Fix oversights in array manipulation.

The nested-arrays code path in ExecEvalArrayExpr() used palloc to
allocate the result array, whereas every other array-creating function
has used palloc0 since 18c0b4ecc.  This mostly works, but unused bits
past the end of the nulls bitmap may end up undefined.  That causes
valgrind complaints with -DWRITE_READ_PARSE_PLAN_TREES, and could
cause planner misbehavior as cited in 18c0b4ecc.  There seems no very
good reason why we should strive to avoid palloc0 in just this one case,
so fix it the easy way with s/palloc/palloc0/.

While looking at that I noted that we also failed to check for overflow
of "nbytes" and "nitems" while summing the sizes of the sub-arrays,
potentially allowing a crash due to undersized output allocation.
For "nbytes", follow the policy used by other array-munging code of
checking for overflow after each addition.  (As elsewhere, the last
addition of the array's overhead space doesn't need an extra check,
since palloc itself will catch a value between 1Gb and 2Gb.)
For "nitems", there's no very good reason to sum the inputs at all,
since we can perfectly well use ArrayGetNItems' result instead of
ignoring it.

Per discussion of this bug, also remove redundant zeroing of the
nulls bitmap in array_set_element and array_set_slice.

Patch by Alexander Lakhin and myself, per bug #17858 from Alexander
Lakhin; thanks also to Richard Guo.  These bugs are a dozen years old,
so back-patch to all supported branches.

Discussion: https://postgr.es/m/17858-8fd287fd3663d051@postgresql.org
---
 src/backend/executor/execExprInterp.c | 13 +++++++++----
 src/backend/utils/adt/arrayfuncs.c    |  6 ++----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 7729117381f..56874a3f78a 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -2817,7 +2817,7 @@ ExecEvalArrayExpr(ExprState *state, ExprEvalStep *op)
 	{
 		/* Must be nested array expressions */
 		int			nbytes = 0;
-		int			nitems = 0;
+		int			nitems;
 		int			outer_nelems = 0;
 		int			elem_ndims = 0;
 		int		   *elem_dims = NULL;
@@ -2912,9 +2912,14 @@ ExecEvalArrayExpr(ExprState *state, ExprEvalStep *op)
 			subbitmaps[outer_nelems] = ARR_NULLBITMAP(array);
 			subbytes[outer_nelems] = ARR_SIZE(array) - ARR_DATA_OFFSET(array);
 			nbytes += subbytes[outer_nelems];
+			/* check for overflow of total request */
+			if (!AllocSizeIsValid(nbytes))
+				ereport(ERROR,
+						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+						 errmsg("array size exceeds the maximum allowed (%d)",
+								(int) MaxAllocSize)));
 			subnitems[outer_nelems] = ArrayGetNItems(this_ndims,
 													 ARR_DIMS(array));
-			nitems += subnitems[outer_nelems];
 			havenulls |= ARR_HASNULL(array);
 			outer_nelems++;
 		}
@@ -2948,7 +2953,7 @@ ExecEvalArrayExpr(ExprState *state, ExprEvalStep *op)
 		}
 
 		/* check for subscript overflow */
-		(void) ArrayGetNItems(ndims, dims);
+		nitems = ArrayGetNItems(ndims, dims);
 		ArrayCheckBounds(ndims, dims, lbs);
 
 		if (havenulls)
@@ -2962,7 +2967,7 @@ ExecEvalArrayExpr(ExprState *state, ExprEvalStep *op)
 			nbytes += ARR_OVERHEAD_NONULLS(ndims);
 		}
 
-		result = (ArrayType *) palloc(nbytes);
+		result = (ArrayType *) palloc0(nbytes);
 		SET_VARSIZE(result, nbytes);
 		result->ndim = ndims;
 		result->dataoffset = dataoffset;
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index c116a1783c2..63d7213158c 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -2478,8 +2478,7 @@ array_set_element(Datum arraydatum,
 	{
 		bits8	   *newnullbitmap = ARR_NULLBITMAP(newarray);
 
-		/* Zero the bitmap to take care of marking inserted positions null */
-		MemSet(newnullbitmap, 0, (newnitems + 7) / 8);
+		/* palloc0 above already marked any inserted positions as nulls */
 		/* Fix the inserted value */
 		if (addedafter)
 			array_set_isnull(newnullbitmap, newnitems - 1, isNull);
@@ -3127,8 +3126,7 @@ array_set_slice(Datum arraydatum,
 			bits8	   *newnullbitmap = ARR_NULLBITMAP(newarray);
 			bits8	   *oldnullbitmap = ARR_NULLBITMAP(array);
 
-			/* Zero the bitmap to handle marking inserted positions null */
-			MemSet(newnullbitmap, 0, (nitems + 7) / 8);
+			/* palloc0 above already marked any inserted positions as nulls */
 			array_bitmap_copy(newnullbitmap, addedbefore,
 							  oldnullbitmap, 0,
 							  itemsbefore);

From 640eb4ee83637a82cffbeb75aa83f2a2d8411b4d Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 27 Mar 2023 15:04:02 -0400
Subject: [PATCH 45/78] Reject attempts to alter composite types used in
 indexes.

find_composite_type_dependencies() ignored indexes, which is a poor
decision because an expression index could have a stored column of
a composite (or other container) type even when the underlying table
does not.  Teach it to detect such cases and error out.  We have to
work a bit harder than for other relations because the pg_depend entry
won't identify the specific index column of concern, but it's not much
new code.

This does not address bug #17872's original complaint that dropping
a column in such a type might lead to violations of the uniqueness
property that a unique index is supposed to ensure.  That seems of
much less concern to me because it won't lead to crashes.

Per bug #17872 from Alexander Lakhin.  Back-patch to all supported
branches.

Discussion: https://postgr.es/m/17872-d0fbb799dc3fd85d@postgresql.org
---
 src/backend/commands/tablecmds.c          | 57 +++++++++++++++++++----
 src/test/regress/expected/alter_table.out | 10 +++-
 src/test/regress/sql/alter_table.sql      | 11 ++++-
 3 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 29a7a57e681..9bdebc6e29b 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8100,6 +8100,7 @@ find_composite_type_dependencies(Oid typeOid, Relation origRelation,
 	{
 		Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
 		Relation	rel;
+		TupleDesc	tupleDesc;
 		Form_pg_attribute att;
 
 		/* Check for directly dependent types */
@@ -8116,18 +8117,58 @@ find_composite_type_dependencies(Oid typeOid, Relation origRelation,
 			continue;
 		}
 
-		/* Else, ignore dependees that aren't user columns of relations */
-		/* (we assume system columns are never of interesting types) */
-		if (pg_depend->classid != RelationRelationId ||
-			pg_depend->objsubid <= 0)
+		/* Else, ignore dependees that aren't relations */
+		if (pg_depend->classid != RelationRelationId)
 			continue;
 
 		rel = relation_open(pg_depend->objid, AccessShareLock);
-		att = TupleDescAttr(rel->rd_att, pg_depend->objsubid - 1);
+		tupleDesc = RelationGetDescr(rel);
 
-		if (rel->rd_rel->relkind == RELKIND_RELATION ||
-			rel->rd_rel->relkind == RELKIND_MATVIEW ||
-			rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		/*
+		 * If objsubid identifies a specific column, refer to that in error
+		 * messages.  Otherwise, search to see if there's a user column of the
+		 * type.  (We assume system columns are never of interesting types.)
+		 * The search is needed because an index containing an expression
+		 * column of the target type will just be recorded as a whole-relation
+		 * dependency.  If we do not find a column of the type, the dependency
+		 * must indicate that the type is transiently referenced in an index
+		 * expression but not stored on disk, which we assume is OK, just as
+		 * we do for references in views.  (It could also be that the target
+		 * type is embedded in some container type that is stored in an index
+		 * column, but the previous recursion should catch such cases.)
+		 */
+		if (pg_depend->objsubid > 0 && pg_depend->objsubid <= tupleDesc->natts)
+			att = TupleDescAttr(tupleDesc, pg_depend->objsubid - 1);
+		else
+		{
+			att = NULL;
+			for (int attno = 1; attno <= tupleDesc->natts; attno++)
+			{
+				att = TupleDescAttr(tupleDesc, attno - 1);
+				if (att->atttypid == typeOid && !att->attisdropped)
+					break;
+				att = NULL;
+			}
+			if (att == NULL)
+			{
+				/* No such column, so assume OK */
+				relation_close(rel, AccessShareLock);
+				continue;
+			}
+		}
+
+		/*
+		 * We definitely should reject if the relation has storage.  If it's
+		 * partitioned, then perhaps we don't have to reject: if there are
+		 * partitions then we'll fail when we find one, else there is no
+		 * stored data to worry about.  However, it's possible that the type
+		 * change would affect conclusions about whether the type is sortable
+		 * or hashable and thus (if it's a partitioning column) break the
+		 * partitioning rule.  For now, reject for partitioned rels too.
+		 */
+		if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind) ||
+			rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+			rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
 		{
 			if (origTypeName)
 				ereport(ERROR,
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index cbc578e3586..9b203cbc4ec 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -3198,6 +3198,13 @@ CREATE TYPE test_type1 AS (a int, b text);
 CREATE TABLE test_tbl1 (x int, y test_type1);
 ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails
 ERROR:  cannot alter type "test_type1" because column "test_tbl1.y" uses it
+DROP TABLE test_tbl1;
+CREATE TABLE test_tbl1 (x int, y text);
+CREATE INDEX test_tbl1_idx ON test_tbl1((row(x,y)::test_type1));
+ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails
+ERROR:  cannot alter type "test_type1" because column "test_tbl1_idx.row" uses it
+DROP TABLE test_tbl1;
+DROP TYPE test_type1;
 CREATE TYPE test_type2 AS (a int, b text);
 CREATE TABLE test_tbl2 OF test_type2;
 CREATE TABLE test_tbl2_subclass () INHERITS (test_tbl2);
@@ -3315,7 +3322,8 @@ Distributed by: (aa)
 Inherits: test_tbl2
 Distributed by: (aa)
 
-DROP TABLE test_tbl2_subclass;
+DROP TABLE test_tbl2_subclass, test_tbl2;
+DROP TYPE test_type2;
 CREATE TYPE test_typex AS (a int, b text);
 CREATE TABLE test_tblx (x int, y test_typex check ((y).a > 0));
 ALTER TYPE test_typex DROP ATTRIBUTE a; -- fails
diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql
index 9da0e5603ea..1c6fd62a117 100644
--- a/src/test/regress/sql/alter_table.sql
+++ b/src/test/regress/sql/alter_table.sql
@@ -2019,6 +2019,14 @@ CREATE TYPE test_type1 AS (a int, b text);
 CREATE TABLE test_tbl1 (x int, y test_type1);
 ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails
 
+DROP TABLE test_tbl1;
+CREATE TABLE test_tbl1 (x int, y text);
+CREATE INDEX test_tbl1_idx ON test_tbl1((row(x,y)::test_type1));
+ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails
+
+DROP TABLE test_tbl1;
+DROP TYPE test_type1;
+
 CREATE TYPE test_type2 AS (a int, b text);
 CREATE TABLE test_tbl2 OF test_type2;
 CREATE TABLE test_tbl2_subclass () INHERITS (test_tbl2);
@@ -2046,7 +2054,8 @@ ALTER TYPE test_type2 RENAME ATTRIBUTE a TO aa CASCADE;
 \d test_tbl2
 \d test_tbl2_subclass
 
-DROP TABLE test_tbl2_subclass;
+DROP TABLE test_tbl2_subclass, test_tbl2;
+DROP TYPE test_type2;
 
 CREATE TYPE test_typex AS (a int, b text);
 CREATE TABLE test_tblx (x int, y test_typex check ((y).a > 0));

From c80bda808e6c4196b2882a4b6238974849764e3b Mon Sep 17 00:00:00 2001
From: Daniel Gustafsson <dgustafsson@postgresql.org>
Date: Mon, 27 Mar 2023 21:35:27 +0200
Subject: [PATCH 46/78] doc: Fix XML_CATALOG_FILES env var for Apple Silicon
 machines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Homebrew changed the prefix for Apple Silicon based machines, so
our advice for XML_CATALOG_FILES needs to mention both.  More info
on the Homebrew change can be found at:

https://github.com/Homebrew/brew/issues/9177

This is backpatch of commits 4c8d65408 and 5a91c7975, the latter
which contained a small fix based on a report from Dagfinn Ilmari
Mannsåker.

Author: Julien Rouhaud <julien.rouhaud@free.fr>
Discussion: https://postgr.es/m/20230327082441.h7pa2vqiobbyo7rd@jrouhaud
---
 doc/src/sgml/docguide.sgml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml
index 55ef6417749..2d8d5cd64f2 100644
--- a/doc/src/sgml/docguide.sgml
+++ b/doc/src/sgml/docguide.sgml
@@ -209,9 +209,13 @@ brew install docbook docbook-xsl libxslt fop
 
    <para>
     The Homebrew-supplied programs require the following environment variable
-    to be set:
+    to be set.  For Intel based machines, use this:
 <programlisting>
 export XML_CATALOG_FILES=/usr/local/etc/xml/catalog
+</programlisting>
+    On Apple Silicon based machines, use this:
+<programlisting>
+export XML_CATALOG_FILES=/opt/homebrew/etc/xml/catalog
 </programlisting>
     Without it, <command>xsltproc</command> will throw errors like this:
 <programlisting>

From d9aeca1c8f8c93bc6ad09415abf1209f8bd39885 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Tue, 28 Mar 2023 16:16:53 -0400
Subject: [PATCH 47/78] amcheck: In verify_heapam, allows tuples with xmin 0.

Commit e88754a1965c0f40a723e6e46d670cacda9e19bd caused that case
to be reported as corruption, but Peter Geoghegan pointed out that
it can legitimately happen in the case of a speculative insertion
that aborts, so we'd better not flag it as corruption after all.

Back-patch to v14, like the commit that introduced the issue.

Discussion: http://postgr.es/m/CAH2-WzmEabzcPTxSY-NXKH6Qt3FkAPYHGQSe2PtvGgj17ZQkCw@mail.gmail.com
---
 contrib/amcheck/verify_heapam.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index cc49ccb26f6..1cda7b8f45c 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -775,8 +775,7 @@ check_tuple_visibility(HeapCheckContext *ctx)
 	switch (get_xid_status(xmin, ctx, &xmin_status))
 	{
 		case XID_INVALID:
-			report_corruption(ctx,
-							  pstrdup("xmin is invalid"));
+			/* Could be the result of a speculative insertion that aborted. */
 			return false;
 		case XID_BOUNDS_OK:
 			break;

From a678c6eca240652359274a1089af827389794c87 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 29 Mar 2023 11:31:30 -0400
Subject: [PATCH 48/78] Fix dereference of dangling pointer in GiST index
 buffering build.

gistBuildCallback tried to fetch the size of an index tuple that
might have already been freed by gistProcessEmptyingQueue.
While this seems to usually be harmless in production builds,
in principle it could result in a SIGSEGV, or more likely a bogus
value for indtuplesSize leading to poor page-split decisions later
in the build.

The memory management here is confusing and could stand to be
refactored, but for the moment it seems to be enough to fetch
the tuple size sooner.  AFAICT the indtuples[Size] totals aren't
used in between these places; even if they were, the updated
values shouldn't be any worse to use.  So just move the
incrementing of the totals up.

It's not very clear why our valgrind-using buildfarm animals
haven't noticed this problem, because the relevant code path
does seem to be exercised according to the code coverage report.
I think the reason that we didn't fix this bug after the first
report is that I'd wanted to try to understand that better.
However, now that it's been re-discovered let's just be pragmatic
and fix it already.

Original report by Alexander Lakhin (bug #16329),
later rediscovered by Egor Chindyaskin (bug #17874).

Patch by Alexander Lakhin (commentary by Pavel Borisov and me).
Back-patch to all supported branches.

Discussion: https://postgr.es/m/16329-7a6aa9b6fa1118a1@postgresql.org
Discussion: https://postgr.es/m/17874-63ca6c7ce42d2103@postgresql.org
---
 src/backend/access/gist/gistbuild.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index d281d89000f..907cc83857f 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -844,6 +844,19 @@ gistBuildCallback(Relation index,
 						 true);
 	itup->t_tid = *tid;
 
+	/* Update tuple count and total size. */
+	buildstate->indtuples += 1;
+	buildstate->indtuplesSize += IndexTupleSize(itup);
+
+	/*
+	 * XXX In buffering builds, the tempCxt is also reset down inside
+	 * gistProcessEmptyingQueue().  This is not great because it risks
+	 * confusion and possible use of dangling pointers (for example, itup
+	 * might be already freed when control returns here).  It's generally
+	 * better that a memory context be "owned" by only one function.  However,
+	 * currently this isn't causing issues so it doesn't seem worth the amount
+	 * of refactoring that would be needed to avoid it.
+	 */
 	if (buildstate->buildMode == GIST_BUFFERING_ACTIVE)
 	{
 		/* We have buffers, so use them. */
@@ -859,10 +872,6 @@ gistBuildCallback(Relation index,
 					 buildstate->giststate, buildstate->heaprel, true);
 	}
 
-	/* Update tuple count and total size. */
-	buildstate->indtuples += 1;
-	buildstate->indtuplesSize += IndexTupleSize(itup);
-
 	MemoryContextSwitchTo(oldCtx);
 	MemoryContextReset(buildstate->giststate->tempCxt);
 

From dc1aa5b07fcd1c7c87ce2d660f11e3eab9d660e9 Mon Sep 17 00:00:00 2001
From: David Rowley <drowley@postgresql.org>
Date: Fri, 31 Mar 2023 12:14:04 +1300
Subject: [PATCH 49/78] Fix List memory issue in transformColumnDefinition

When calling generateSerialExtraStmts(), we would pass in the
constraint->options.  In some cases, generateSerialExtraStmts() would
modify the referenced List to remove elements from it, but doing so is
invalid without assigning the list back to all variables that point to it.
In the particular reported problem case, the List became empty, in which
cases it became NIL, but the passed in constraint->options didn't get to
find out about that and was left pointing to free'd memory.

To fix this, just perform a list_copy() inside generateSerialExtraStmts().
We could just do a list_copy() just before we perform the delete from the
list, however, that seems less robust.  Let's make sure the generated
CreateSeqStmt gets a completely different copy of the list to be safe.

Bug: #17879
Reported-by: Fei Changhong
Diagnosed-by: Fei Changhong
Discussion: https://postgr.es/m/17879-b7dfb5debee58ff5@postgresql.org
Backpatch-through: 11, all supported versions
---
 src/backend/parser/parse_utilcmd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 223c6fcb48b..358ac56da30 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -476,6 +476,9 @@ generateSerialExtraStmts(CreateStmtContext *cxt, ColumnDef *column,
 
 	int			nameEl_idx = -1;
 
+	/* Make a copy of this as we may end up modifying it in the code below */
+	seqoptions = list_copy(seqoptions);
+
 	/*
 	 * Determine namespace and name to use for the sequence.
 	 *

From 147d6e6d057def04406918bb602ccf74091dc48b Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 31 Mar 2023 10:08:40 -0400
Subject: [PATCH 50/78] Ensure acquire_inherited_sample_rows sets its output
 parameters.

The totalrows/totaldeadrows outputs were left uninitialized in cases
where we find no analyzable child tables of a partitioned table.  This
could lead to setting the partitioned table's pg_class.reltuples value
to garbage.  It's not clear that that would have any very bad effects
in practice, but fix it anyway because it's making valgrind unhappy.

Reported and diagnosed by Alexander Lakhin (bug #17880).

Discussion: https://postgr.es/m/17880-9282037c923d856e@postgresql.org
---
 src/backend/commands/analyze.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 00fc25b2439..8d329d44503 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -2007,6 +2007,10 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 	ListCell   *lc;
 	bool		has_child;
 
+	/* Initialize output parameters to zero now, in case we exit early */
+	*totalrows = 0;
+	*totaldeadrows = 0;
+
 	/*
 	 * Find all members of inheritance set.  We only need AccessShareLock on
 	 * the children.
@@ -2158,8 +2162,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 	pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_TOTAL,
 								 nrels);
 	numrows = 0;
-	*totalrows = 0;
-	*totaldeadrows = 0;
 	for (i = 0; i < nrels; i++)
 	{
 		Relation	childrel = rels[i];

From ad1b5351bb0edb87b9fd50b3043c83e2feae3a9a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 31 Mar 2023 11:18:49 -0400
Subject: [PATCH 51/78] Reject system columns as elements of foreign keys.

Up through v11 it was sensible to use the "oid" system column as
a foreign key column, but since that was removed there's no visible
usefulness in making any of the remaining system columns a foreign
key.  Moreover, since the TupleTableSlot rewrites in v12, such cases
actively fail because of implicit assumptions that only user columns
appear in foreign keys.  The lack of complaints about that seems
like good evidence that no one is trying to do it.  Hence, rather
than trying to repair those assumptions (of which there are at least
two, maybe more), let's just forbid the case up front.

Per this patch, a system column in either the referenced or
referencing side of a foreign key will draw this error; however,
putting one in the referenced side would have failed later anyway,
since we don't allow unique indexes to be made on system columns.

Per bug #17877 from Alexander Lakhin.  Back-patch to v12; the
case still appears to work in v11, so we shouldn't break it there.

Discussion: https://postgr.es/m/17877-4bcc658e33df6de1@postgresql.org
---
 src/backend/commands/tablecmds.c          | 15 +++++++++++++--
 src/test/regress/expected/foreign_key.out | 11 ++++++-----
 src/test/regress/sql/foreign_key.sql      |  7 ++++---
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 9bdebc6e29b..34c910565a9 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -12893,6 +12893,11 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
  * transformColumnNameList - transform list of column names
  *
  * Lookup each name and return its attnum and type OID
+ *
+ * Note: the name of this function suggests that it's general-purpose,
+ * but actually it's only used to look up names appearing in foreign-key
+ * clauses.  The error messages would need work to use it in other cases,
+ * and perhaps the validity checks as well.
  */
 static int
 transformColumnNameList(Oid relId, List *colList,
@@ -12906,6 +12911,7 @@ transformColumnNameList(Oid relId, List *colList,
 	{
 		char	   *attname = strVal(lfirst(l));
 		HeapTuple	atttuple;
+		Form_pg_attribute attform;
 
 		atttuple = SearchSysCacheAttName(relId, attname);
 		if (!HeapTupleIsValid(atttuple))
@@ -12913,13 +12919,18 @@ transformColumnNameList(Oid relId, List *colList,
 					(errcode(ERRCODE_UNDEFINED_COLUMN),
 					 errmsg("column \"%s\" referenced in foreign key constraint does not exist",
 							attname)));
+		attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+		if (attform->attnum < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("system columns cannot be used in foreign keys")));
 		if (attnum >= INDEX_MAX_KEYS)
 			ereport(ERROR,
 					(errcode(ERRCODE_TOO_MANY_COLUMNS),
 					 errmsg("cannot have more than %d keys in a foreign key",
 							INDEX_MAX_KEYS)));
-		attnums[attnum] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum;
-		atttypids[attnum] = ((Form_pg_attribute) GETSTRUCT(atttuple))->atttypid;
+		attnums[attnum] = attform->attnum;
+		atttypids[attnum] = attform->atttypid;
 		ReleaseSysCache(atttuple);
 		attnum++;
 	}
diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out
index 8ea6434e222..ce86e400bcb 100644
--- a/src/test/regress/expected/foreign_key.out
+++ b/src/test/regress/expected/foreign_key.out
@@ -757,15 +757,16 @@ SELECT * from FKTABLE;
 
 DROP TABLE FKTABLE;
 DROP TABLE PKTABLE;
-CREATE TABLE PKTABLE (ptest1 int PRIMARY KEY);
+-- Test some invalid FK definitions
+CREATE TABLE PKTABLE (ptest1 int PRIMARY KEY, someoid oid);
 CREATE TABLE FKTABLE_FAIL1 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (ftest2) REFERENCES PKTABLE);
 ERROR:  column "ftest2" referenced in foreign key constraint does not exist
 CREATE TABLE FKTABLE_FAIL2 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (ftest1) REFERENCES PKTABLE(ptest2));
 ERROR:  column "ptest2" referenced in foreign key constraint does not exist
-DROP TABLE FKTABLE_FAIL1;
-ERROR:  table "fktable_fail1" does not exist
-DROP TABLE FKTABLE_FAIL2;
-ERROR:  table "fktable_fail2" does not exist
+CREATE TABLE FKTABLE_FAIL3 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (tableoid) REFERENCES PKTABLE(someoid));
+ERROR:  system columns cannot be used in foreign keys
+CREATE TABLE FKTABLE_FAIL4 ( ftest1 oid, CONSTRAINT fkfail1 FOREIGN KEY (ftest1) REFERENCES PKTABLE(tableoid));
+ERROR:  system columns cannot be used in foreign keys
 DROP TABLE PKTABLE;
 -- Test for referencing column number smaller than referenced constraint
 CREATE TABLE PKTABLE (ptest1 int, ptest2 int, UNIQUE(ptest1, ptest2));
diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql
index a8c9e77b800..426d10bf861 100644
--- a/src/test/regress/sql/foreign_key.sql
+++ b/src/test/regress/sql/foreign_key.sql
@@ -463,12 +463,13 @@ SELECT * from FKTABLE;
 DROP TABLE FKTABLE;
 DROP TABLE PKTABLE;
 
-CREATE TABLE PKTABLE (ptest1 int PRIMARY KEY);
+-- Test some invalid FK definitions
+CREATE TABLE PKTABLE (ptest1 int PRIMARY KEY, someoid oid);
 CREATE TABLE FKTABLE_FAIL1 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (ftest2) REFERENCES PKTABLE);
 CREATE TABLE FKTABLE_FAIL2 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (ftest1) REFERENCES PKTABLE(ptest2));
+CREATE TABLE FKTABLE_FAIL3 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (tableoid) REFERENCES PKTABLE(someoid));
+CREATE TABLE FKTABLE_FAIL4 ( ftest1 oid, CONSTRAINT fkfail1 FOREIGN KEY (ftest1) REFERENCES PKTABLE(tableoid));
 
-DROP TABLE FKTABLE_FAIL1;
-DROP TABLE FKTABLE_FAIL2;
 DROP TABLE PKTABLE;
 
 -- Test for referencing column number smaller than referenced constraint

From 78b5419ae8570c97a8bb9f8ecc006893512d0114 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 5 Apr 2023 07:59:52 +0900
Subject: [PATCH 52/78] doc: Add more details about
 pg_stat_get_xact_blocks_{fetched,hit}

The explanation describing the dependency to system read() calls for
these two functions has been removed in ddfc2d9.  And after more
discussion about d69c404, we have concluded that adding more details
makes them easier to understand.

While on it, use the term "block read requests" (maybe found in cache)
rather than "buffers fetched" and "buffer hits".

Per discussion with Melanie Plageman, Kyotaro Horiguchi, Bertrand
Drouvot and myself.

Discussion: https://postgr.es/m/CAAKRu_ZmdiScT4q83OAbfmR5AH-L5zWya3SXjaxiJvhCob-e2A@mail.gmail.com
Backpatch-through: 11
---
 doc/src/sgml/monitoring.sgml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 3c56f9fa4f1..44239b26775 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -5060,8 +5060,11 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
         <returnvalue>bigint</returnvalue>
        </para>
        <para>
-        Returns the number of buffers fetched for table or index, in the current
-        transaction.
+        Returns the number of block read requests for table or index, in the
+        current transaction. This number minus
+        <function>pg_stat_get_xact_blocks_hit</function> gives the number of
+        kernel <function>read()</function> calls; the number of actual
+        physical reads is usually lower due to kernel-level buffering.
        </para></entry>
       </row>
 
@@ -5074,8 +5077,9 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
         <returnvalue>bigint</returnvalue>
        </para>
        <para>
-        Returns the number of buffer hits for table or index, in the current
-        transaction.
+        Returns the number of block read requests for table or index, in the
+        current transaction, found in cache (not triggering kernel
+        <function>read()</function> calls).
        </para></entry>
       </row>
 

From 0a95d1c4b1a5959597d996fb62a640e6bf456340 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 5 Apr 2023 14:16:19 +0700
Subject: [PATCH 53/78] doc: Update error messages in RLS examples

Since 8b9e9644d, the messages for failed permissions checks report
"table" where appropriate, rather than "relation".

Backpatch to all supported branches
---
 doc/src/sgml/ddl.sgml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 78f4448a5bc..53ca87cc020 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -2457,7 +2457,7 @@ postgres=&gt; table passwd;
 postgres=&gt; set role alice;
 SET
 postgres=&gt; table passwd;
-ERROR:  permission denied for relation passwd
+ERROR:  permission denied for table passwd
 postgres=&gt; select user_name,real_name,home_phone,extra_info,home_dir,shell from passwd;
  user_name | real_name |  home_phone  | extra_info | home_dir    |   shell
 -----------+-----------+--------------+------------+-------------+-----------
@@ -2467,7 +2467,7 @@ postgres=&gt; select user_name,real_name,home_phone,extra_info,home_dir,shell fr
 (3 rows)
 
 postgres=&gt; update passwd set user_name = 'joe';
-ERROR:  permission denied for relation passwd
+ERROR:  permission denied for table passwd
 -- Alice is allowed to change her own real_name, but no others
 postgres=&gt; update passwd set real_name = 'Alice Doe';
 UPDATE 1
@@ -2476,9 +2476,9 @@ UPDATE 0
 postgres=&gt; update passwd set shell = '/bin/xx';
 ERROR:  new row violates WITH CHECK OPTION for "passwd"
 postgres=&gt; delete from passwd;
-ERROR:  permission denied for relation passwd
+ERROR:  permission denied for table passwd
 postgres=&gt; insert into passwd (user_name) values ('xxx');
-ERROR:  permission denied for relation passwd
+ERROR:  permission denied for table passwd
 -- Alice can change her own password; RLS silently prevents updating other rows
 postgres=&gt; update passwd set pwhash = 'abc';
 UPDATE 1

From 24eb9c708e3b637e7ccc7df35a9c77ac3ee76a46 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 6 Apr 2023 15:52:37 -0400
Subject: [PATCH 54/78] Fix ts_headline() edge cases for empty query and empty
 search text.

tsquery's GETQUERY() macro is only safe to apply to a tsquery
that is known non-empty; otherwise it gives a pointer to garbage.
Before commit 5a617d75d, ts_headline() avoided this pitfall, but
only in a very indirect, nonobvious way.  (hlCover could not reach
its TS_execute call, because if the query contains no lexemes
then hlFirstIndex would surely return -1.)  After that commit,
it fell into the trap, resulting in weird errors such as
"unrecognized operator" and/or valgrind complaints.  In HEAD,
fix this by not calling TS_execute_locations() at all for an
empty query.  In the back branches, add a defensive check to
hlCover() --- that's not fixing any live bug, but I judge the
code a bit too fragile as-is.

Also, both mark_hl_fragments() and mark_hl_words() were careless
about the possibility of empty search text: in the cases where
no match has been found, they'd end up telling mark_fragment() to
mark from word indexes 0 to 0 inclusive, even when there is no
word 0.  This is harmless since we over-allocated the prs->words
array, but it does annoy valgrind.  Fix so that the end index is -1
and thus mark_fragment() will do nothing in such cases.

Bottom line is that this fixes a live bug in HEAD, but in the
back branches it's only getting rid of a valgrind nitpick.
Back-patch anyway.

Per report from Alexander Lakhin.

Discussion: https://postgr.es/m/c27f642d-020b-01ff-ae61-086af287c4fd@gmail.com
---
 src/backend/tsearch/wparser_def.c     |  8 ++++++--
 src/test/regress/expected/tsearch.out | 21 +++++++++++++++++++++
 src/test/regress/sql/tsearch.sql      |  6 ++++++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index a3e5baf9782..98029e78fb4 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -2039,6 +2039,9 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int max_cover,
 				nextpmax;
 	hlCheck		ch;
 
+	if (query->size <= 0)
+		return false;			/* empty query matches nothing */
+
 	/*
 	 * We look for the earliest, shortest substring of prs->words that
 	 * satisfies the query.  Both the pmin and pmax indices must be words
@@ -2343,7 +2346,8 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
 	/* show the first min_words words if we have not marked anything */
 	if (num_f <= 0)
 	{
-		startpos = endpos = curlen = 0;
+		startpos = curlen = 0;
+		endpos = -1;
 		for (i = 0; i < prs->curwords && curlen < min_words; i++)
 		{
 			if (!NONWORDTOKEN(prs->words[i].type))
@@ -2498,7 +2502,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
 		if (bestlen < 0)
 		{
 			curlen = 0;
-			pose = 0;
+			pose = -1;
 			for (i = 0; i < prs->curwords && curlen < min_words; i++)
 			{
 				if (!NONWORDTOKEN(prs->words[i].type))
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index 27e71bb15d6..5f712a5484e 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -2012,6 +2012,27 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
  <b>Lorem</b> ipsum <b>urna</b>.  Nullam nullam <b>ullamcorper</b> <b>urna</b>
 (1 row)
 
+-- Edge cases with empty query
+SELECT ts_headline('english',
+'', ''::tsquery);
+NOTICE:  text-search query doesn't contain lexemes: ""
+LINE 2: '', ''::tsquery);
+            ^
+ ts_headline 
+-------------
+ 
+(1 row)
+
+SELECT ts_headline('english',
+'foo bar', ''::tsquery);
+NOTICE:  text-search query doesn't contain lexemes: ""
+LINE 2: 'foo bar', ''::tsquery);
+                   ^
+ ts_headline 
+-------------
+ foo bar
+(1 row)
+
 --Rewrite sub system
 CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
 \set ECHO none
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index af5004602a8..80a3e0558b9 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -550,6 +550,12 @@ SELECT ts_headline('english',
 to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
 'MaxFragments=100, MaxWords=100, MinWords=1');
 
+-- Edge cases with empty query
+SELECT ts_headline('english',
+'', ''::tsquery);
+SELECT ts_headline('english',
+'foo bar', ''::tsquery);
+
 --Rewrite sub system
 
 CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);

From 5e5a29361f34117972da0a0b1fe51129e256d3e0 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 6 Apr 2023 18:13:49 -0400
Subject: [PATCH 55/78] Stabilize just-added regression test cases.

The tests added by commits 029dea882 et al turn out to produce
different output under -DRANDOMIZE_ALLOCATED_MEMORY.  This is
not a bug exactly: that flag causes coerce_type() to invoke
the input function twice when coercing an unknown-type literal
to a specific type.  So you get tsqueryin's bleat about an empty
tsquery twice.  Revise the test query to avoid that.

Discussion: https://postgr.es/m/20230406213813.uep7plg6lvcywujo@awork3.anarazel.de
---
 src/test/regress/expected/tsearch.out | 8 ++------
 src/test/regress/sql/tsearch.sql      | 4 ++--
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index 5f712a5484e..36ae94a5255 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -2014,20 +2014,16 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
 
 -- Edge cases with empty query
 SELECT ts_headline('english',
-'', ''::tsquery);
+'', to_tsquery('english', ''));
 NOTICE:  text-search query doesn't contain lexemes: ""
-LINE 2: '', ''::tsquery);
-            ^
  ts_headline 
 -------------
  
 (1 row)
 
 SELECT ts_headline('english',
-'foo bar', ''::tsquery);
+'foo bar', to_tsquery('english', ''));
 NOTICE:  text-search query doesn't contain lexemes: ""
-LINE 2: 'foo bar', ''::tsquery);
-                   ^
  ts_headline 
 -------------
  foo bar
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 80a3e0558b9..e7e2aa907f7 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -552,9 +552,9 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
 
 -- Edge cases with empty query
 SELECT ts_headline('english',
-'', ''::tsquery);
+'', to_tsquery('english', ''));
 SELECT ts_headline('english',
-'foo bar', ''::tsquery);
+'foo bar', to_tsquery('english', ''));
 
 --Rewrite sub system
 

From 2096d93c04fbcbff9a4fea7469305136f6e96423 Mon Sep 17 00:00:00 2001
From: Stephen Frost <sfrost@snowman.net>
Date: Fri, 7 Apr 2023 19:36:12 -0400
Subject: [PATCH 56/78] For Kerberos testing, disable reverse DNS lookup

In our Kerberos test suite, there isn't much need to worry about the
normal canonicalization that Kerberos provides by looking up the reverse
DNS for the IP address connected to, and in some cases it can actively
cause problems (eg: a captive portal wifi where the normally not
resolvable localhost address used ends up being resolved anyway, and
not to the domain we are using for testing, causing the entire
regression test to fail with errors about not being able to get a TGT
for the remote realm for cross-realm trust).

Therefore, disable it by adding rdns = false into the krb5.conf that's
generated for the test.

Reviewed-By: Heikki Linnakangas
Discussion: https://postgr.es/m/Y/QD2zDkDYQA1GQt@tamriel.snowman.net
---
 src/test/kerberos/t/001_auth.pl | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/test/kerberos/t/001_auth.pl b/src/test/kerberos/t/001_auth.pl
index bd55339c761..b2a733f90fa 100644
--- a/src/test/kerberos/t/001_auth.pl
+++ b/src/test/kerberos/t/001_auth.pl
@@ -93,6 +93,17 @@
   or BAIL_OUT("could not get Kerberos version");
 $krb5_version = $1;
 
+# Build the krb5.conf to use.
+#
+# Explicitly specify the default (test) realm and the KDC for
+# that realm to avoid the Kerberos library trying to look up
+# that information in DNS, and also because we're using a
+# non-standard KDC port.
+#
+# Reverse DNS is explicitly disabled to avoid any issue with a
+# captive portal or other cases where the reverse DNS succeeds
+# and the Kerberos library uses that as the canonical name of
+# the host and then tries to acquire a cross-realm ticket.
 append_to_file(
 	$krb5_conf,
 	qq![logging]
@@ -101,6 +112,7 @@
 
 [libdefaults]
 default_realm = $realm
+rdns = false
 
 [realms]
 $realm = {

From 3ccccaac4615df038d537c9acbd7898710a8227b Mon Sep 17 00:00:00 2001
From: Stephen Frost <sfrost@snowman.net>
Date: Fri, 7 Apr 2023 19:36:12 -0400
Subject: [PATCH 57/78] For Kerberos testing, disable DNS lookups

Similar to 8dff2f224, this disables DNS lookups by the Kerberos library
to look up the KDC and the realm while the Kerberos tests are running.
In some environments, these lookups can take a long time and end up
timing out and causing tests to fail.  Further, since this isn't really
our domain, we shouldn't be sending out these DNS requests during our
tests.
---
 src/test/kerberos/t/001_auth.pl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/test/kerberos/t/001_auth.pl b/src/test/kerberos/t/001_auth.pl
index b2a733f90fa..6ced0b1faf4 100644
--- a/src/test/kerberos/t/001_auth.pl
+++ b/src/test/kerberos/t/001_auth.pl
@@ -100,6 +100,11 @@
 # that information in DNS, and also because we're using a
 # non-standard KDC port.
 #
+# Also explicitly disable DNS lookups since this isn't really
+# our domain and we shouldn't be causing random DNS requests
+# to be sent out (not to mention that broken DNS environments
+# can cause the tests to take an extra long time and timeout).
+#
 # Reverse DNS is explicitly disabled to avoid any issue with a
 # captive portal or other cases where the reverse DNS succeeds
 # and the Kerberos library uses that as the canonical name of
@@ -111,6 +116,8 @@
 kdc = FILE:$kdc_log
 
 [libdefaults]
+dns_lookup_realm = false
+dns_lookup_kdc = false
 default_realm = $realm
 rdns = false
 

From 40fa819131698a6fe8be40e0ac6128496e599183 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 10 Apr 2023 13:09:18 -0400
Subject: [PATCH 58/78] Doc: adjust examples of EXTRACT() output to match
 current reality.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

EXTRACT(EPOCH), EXTRACT(SECOND), and some related cases print more
trailing zeroes than they used to.  This behavior change happened
with commit a2da77cdb (Change return type of EXTRACT to numeric),
and it was intentional according to the commit log:

    - Return values when extracting fields with possibly fractional
      values, such as second and epoch, now have the full scale that the
      value has internally (so, for example, '1.000000' instead of just
      '1').

It's been like that for two releases now, so while I suggested
changing this back, it's probably better to adjust the documentation
examples.

Per bug #17866 from Евгений Жужнев.  Back-patch to v14 where the
change came in.

Discussion: https://postgr.es/m/17866-18eb70095b1594e2@postgresql.org
---
 doc/src/sgml/func.sgml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index ba8bccc135d..a7b91c033a4 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9389,11 +9389,11 @@ SELECT timestamp with time zone '2005-04-02 12:00:00-07' + interval '24 hours';
 <screen>
 SELECT EXTRACT(EPOCH FROM timestamptz '2013-07-01 12:00:00') -
        EXTRACT(EPOCH FROM timestamptz '2013-03-01 12:00:00');
-<lineannotation>Result: </lineannotation><computeroutput>10537200</computeroutput>
+<lineannotation>Result: </lineannotation><computeroutput>10537200.000000</computeroutput>
 SELECT (EXTRACT(EPOCH FROM timestamptz '2013-07-01 12:00:00') -
         EXTRACT(EPOCH FROM timestamptz '2013-03-01 12:00:00'))
         / 60 / 60 / 24;
-<lineannotation>Result: </lineannotation><computeroutput>121.958333333333</computeroutput>
+<lineannotation>Result: </lineannotation><computeroutput>121.9583333333333333</computeroutput>
 SELECT timestamptz '2013-07-01 12:00:00' - timestamptz '2013-03-01 12:00:00';
 <lineannotation>Result: </lineannotation><computeroutput>121 days 23:00:00</computeroutput>
 SELECT age(timestamptz '2013-07-01 12:00:00', timestamptz '2013-03-01 12:00:00');
@@ -9539,13 +9539,13 @@ SELECT EXTRACT(DOY FROM TIMESTAMP '2001-02-16 20:38:40');
 
 <screen>
 SELECT EXTRACT(EPOCH FROM TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40.12-08');
-<lineannotation>Result: </lineannotation><computeroutput>982384720.12</computeroutput>
+<lineannotation>Result: </lineannotation><computeroutput>982384720.120000</computeroutput>
 
 SELECT EXTRACT(EPOCH FROM TIMESTAMP '2001-02-16 20:38:40.12');
-<lineannotation>Result: </lineannotation><computeroutput>982355920.12</computeroutput>
+<lineannotation>Result: </lineannotation><computeroutput>982355920.120000</computeroutput>
 
 SELECT EXTRACT(EPOCH FROM INTERVAL '5 days 3 hours');
-<lineannotation>Result: </lineannotation><computeroutput>442800</computeroutput>
+<lineannotation>Result: </lineannotation><computeroutput>442800.000000</computeroutput>
 </screen>
 
        <para>
@@ -9692,7 +9692,7 @@ SELECT EXTRACT(MILLENNIUM FROM TIMESTAMP '2001-02-16 20:38:40');
 
 <screen>
 SELECT EXTRACT(MILLISECONDS FROM TIME '17:12:28.5');
-<lineannotation>Result: </lineannotation><computeroutput>28500</computeroutput>
+<lineannotation>Result: </lineannotation><computeroutput>28500.000</computeroutput>
 </screen>
       </listitem>
      </varlistentry>
@@ -9756,10 +9756,10 @@ SELECT EXTRACT(QUARTER FROM TIMESTAMP '2001-02-16 20:38:40');
 
 <screen>
 SELECT EXTRACT(SECOND FROM TIMESTAMP '2001-02-16 20:38:40');
-<lineannotation>Result: </lineannotation><computeroutput>40</computeroutput>
+<lineannotation>Result: </lineannotation><computeroutput>40.000000</computeroutput>
 
 SELECT EXTRACT(SECOND FROM TIME '17:12:28.5');
-<lineannotation>Result: </lineannotation><computeroutput>28.5</computeroutput>
+<lineannotation>Result: </lineannotation><computeroutput>28.500000</computeroutput>
 </screen>
       </listitem>
      </varlistentry>

From a8b7ab6c652234ba2cd6f7742477a031797af4c6 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 10 Apr 2023 15:49:48 -0400
Subject: [PATCH 59/78] Doc: add missed entries in BRIN extensibility tables.

The tables in "71.3. Extensibility" listing the support functions
for bloom and minmax-multi opclasses should include the associated
options function.  While this isn't quite as required as the rest,
you need it for full functionality of the opclass.

Back-patch to v14 where these functions were added.
---
 doc/src/sgml/brin.sgml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/doc/src/sgml/brin.sgml b/doc/src/sgml/brin.sgml
index 71697155d7c..9c5ffcddf84 100644
--- a/doc/src/sgml/brin.sgml
+++ b/doc/src/sgml/brin.sgml
@@ -1228,6 +1228,10 @@ typedef struct BrinOpcInfo
      <entry>Support Procedure 4</entry>
      <entry>internal function <function>brin_bloom_union()</function></entry>
     </row>
+    <row>
+     <entry>Support Procedure 5</entry>
+     <entry>internal function <function>brin_bloom_options()</function></entry>
+    </row>
     <row>
      <entry>Support Procedure 11</entry>
      <entry>function to compute hash of an element</entry>
@@ -1286,6 +1290,10 @@ typedef struct BrinOpcInfo
      <entry>Support Procedure 4</entry>
      <entry>internal function <function>brin_minmax_multi_union()</function></entry>
     </row>
+    <row>
+     <entry>Support Procedure 5</entry>
+     <entry>internal function <function>brin_minmax_multi_options()</function></entry>
+    </row>
     <row>
      <entry>Support Procedure 11</entry>
      <entry>function to compute distance between two values (length of a range)</entry>

From c42082d6227dff13ac7b64848b961aaa2b5f1015 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 12 Apr 2023 09:09:58 +0900
Subject: [PATCH 60/78] Fix detection of unseekable files for fseek() and
 ftello() with MSVC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Calling fseek() or ftello() on a handle to a non-seeking device such as
a pipe or a communications device is not supported.  Unfortunately,
MSVC's flavor of these routines, _fseeki64() and _ftelli64(), do not
return an error when given a pipe as handle.  Some of the logic of
pg_dump and restore relies on these routines to check if a handle is
seekable, causing failures when passing the contents of pg_dump to
pg_restore through a pipe, for example.

This commit introduces wrappers for fseeko() and ftello() on MSVC so as
any callers are able to properly detect the cases of non-seekable
handles.  This relies mainly on GetFileType(), sharing a bit of code
with the MSVC port for fstat().  The code in charge of getting a file
type is refactored into a new file called win32common.c, shared by
win32stat.c and the new win32fseek.c.  It includes the MSVC ports for
fseeko() and ftello().

Like 765f5df, this is backpatched down to 14, where the fstat()
implementation for MSVC is able to understand about files larger than
4GB in size.  Using a TAP test for that is proving to be tricky as
IPC::Run handles the pipes by itself, still I have been able to check
the fix manually.

Reported-by: Daniel Watzinger
Author: Juan José Santamaría Flecha, Michael Paquier
Discussion: https://postgr.es/m/CAC+AXB26a4EmxM2suXxPpJaGrqAdxracd7hskLg-zxtPB50h7A@mail.gmail.com
Backpatch-through: 14
---
 configure                     |  6 +++
 configure.ac                  |  1 +
 src/include/port/win32_port.h | 12 ++++--
 src/port/win32common.c        | 68 +++++++++++++++++++++++++++++++
 src/port/win32fseek.c         | 75 +++++++++++++++++++++++++++++++++++
 src/port/win32stat.c          | 22 ++--------
 src/tools/msvc/Mkvcbuild.pm   |  3 +-
 7 files changed, 164 insertions(+), 23 deletions(-)
 create mode 100644 src/port/win32common.c
 create mode 100644 src/port/win32fseek.c

diff --git a/configure b/configure
index 127c9f2ee09..1eba53d8c6d 100755
--- a/configure
+++ b/configure
@@ -20428,6 +20428,12 @@ esac
  ;;
 esac
 
+  case " $LIBOBJS " in
+  *" win32common.$ac_objext "* ) ;;
+  *) LIBOBJS="$LIBOBJS win32common.$ac_objext"
+ ;;
+esac
+
   case " $LIBOBJS " in
   *" win32env.$ac_objext "* ) ;;
   *) LIBOBJS="$LIBOBJS win32env.$ac_objext"
diff --git a/configure.ac b/configure.ac
index 7e3292a37ae..9a07159cecf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2521,6 +2521,7 @@ if test "$PORTNAME" = "win32"; then
   AC_LIBOBJ(kill)
   AC_LIBOBJ(open)
   AC_LIBOBJ(system)
+  AC_LIBOBJ(win32common)
   AC_LIBOBJ(win32env)
   AC_LIBOBJ(win32error)
   AC_LIBOBJ(win32security)
diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
index 05c5a534420..41427a9c1af 100644
--- a/src/include/port/win32_port.h
+++ b/src/include/port/win32_port.h
@@ -193,15 +193,21 @@ struct itimerval
 
 int			setitimer(int which, const struct itimerval *value, struct itimerval *ovalue);
 
+/* Convenience wrapper for GetFileType() */
+extern DWORD pgwin32_get_file_type(HANDLE hFile);
+
 /*
  * WIN32 does not provide 64-bit off_t, but does provide the functions operating
- * with 64-bit offsets.
+ * with 64-bit offsets.  Also, fseek() might not give an error for unseekable
+ * streams, so harden that function with our version.
  */
 #define pgoff_t __int64
 
 #ifdef _MSC_VER
-#define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
-#define ftello(stream) _ftelli64(stream)
+extern int	_pgfseeko64(FILE *stream, pgoff_t offset, int origin);
+extern pgoff_t _pgftello64(FILE *stream);
+#define fseeko(stream, offset, origin) _pgfseeko64(stream, offset, origin)
+#define ftello(stream) _pgftello64(stream)
 #else
 #ifndef fseeko
 #define fseeko(stream, offset, origin) fseeko64(stream, offset, origin)
diff --git a/src/port/win32common.c b/src/port/win32common.c
new file mode 100644
index 00000000000..2fd78f7f936
--- /dev/null
+++ b/src/port/win32common.c
@@ -0,0 +1,68 @@
+/*-------------------------------------------------------------------------
+ *
+ * win32common.c
+ *	  Common routines shared among the win32*.c ports.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/port/win32common.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef FRONTEND
+#include "postgres_fe.h"
+#else
+#include "postgres.h"
+#endif
+
+#ifdef WIN32
+
+/*
+ * pgwin32_get_file_type
+ *
+ * Convenience wrapper for GetFileType() with specific error handling for all the
+ * port implementations.  Returns the file type associated with a HANDLE.
+ *
+ * On error, sets errno with FILE_TYPE_UNKNOWN as file type.
+ */
+DWORD
+pgwin32_get_file_type(HANDLE hFile)
+{
+	DWORD		fileType = FILE_TYPE_UNKNOWN;
+	DWORD		lastError;
+
+	errno = 0;
+
+	/*
+	 * When stdin, stdout, and stderr aren't associated with a stream the
+	 * special value -2 is returned:
+	 * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/get-osfhandle
+	 */
+	if (hFile == INVALID_HANDLE_VALUE || hFile == (HANDLE) -2)
+	{
+		errno = EINVAL;
+		return FILE_TYPE_UNKNOWN;
+	}
+
+	fileType = GetFileType(hFile);
+	lastError = GetLastError();
+
+	/*
+	 * Invoke GetLastError in order to distinguish between a "valid" return of
+	 * FILE_TYPE_UNKNOWN and its return due to a calling error.  In case of
+	 * success, GetLastError() returns NO_ERROR.
+	 */
+	if (fileType == FILE_TYPE_UNKNOWN && lastError != NO_ERROR)
+	{
+		_dosmaperr(lastError);
+		return FILE_TYPE_UNKNOWN;
+	}
+
+	return fileType;
+}
+
+#endif							/* WIN32 */
diff --git a/src/port/win32fseek.c b/src/port/win32fseek.c
new file mode 100644
index 00000000000..985313c825f
--- /dev/null
+++ b/src/port/win32fseek.c
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * win32fseek.c
+ *	  Replacements for fseeko() and ftello().
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/port/win32fseek.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef FRONTEND
+#include "postgres_fe.h"
+#else
+#include "postgres.h"
+#endif
+
+#if defined(WIN32) && defined(_MSC_VER)
+
+/*
+ * _pgfseeko64
+ *
+ * Calling fseek() on a handle to a non-seeking device such as a pipe or
+ * a communications device is not supported, and fseek() may not return
+ * an error.  This wrapper relies on the file type to check which cases
+ * are supported.
+ */
+int
+_pgfseeko64(FILE *stream, pgoff_t offset, int origin)
+{
+	DWORD		fileType;
+	HANDLE		hFile = (HANDLE) _get_osfhandle(_fileno(stream));
+
+	fileType = pgwin32_get_file_type(hFile);
+	if (errno != 0)
+		return -1;
+
+	if (fileType == FILE_TYPE_DISK)
+		return _fseeki64(stream, offset, origin);
+	else if (fileType == FILE_TYPE_CHAR || fileType == FILE_TYPE_PIPE)
+		errno = ESPIPE;
+	else
+		errno = EINVAL;
+
+	return -1;
+}
+
+/*
+ * _pgftello64
+ *
+ * Same as _pgfseeko64().
+ */
+pgoff_t
+_pgftello64(FILE *stream)
+{
+	DWORD		fileType;
+	HANDLE		hFile = (HANDLE) _get_osfhandle(_fileno(stream));
+
+	fileType = pgwin32_get_file_type(hFile);
+	if (errno != 0)
+		return -1;
+
+	if (fileType == FILE_TYPE_DISK)
+		return _ftelli64(stream);
+	else if (fileType == FILE_TYPE_CHAR || fileType == FILE_TYPE_PIPE)
+		errno = ESPIPE;
+	else
+		errno = EINVAL;
+
+	return -1;
+}
+
+#endif							/* defined(WIN32) && defined(_MSC_VER) */
diff --git a/src/port/win32stat.c b/src/port/win32stat.c
index 36c3b171f40..acbf4c7e279 100644
--- a/src/port/win32stat.c
+++ b/src/port/win32stat.c
@@ -290,33 +290,17 @@ _pgfstat64(int fileno, struct stat *buf)
 {
 	HANDLE		hFile = (HANDLE) _get_osfhandle(fileno);
 	DWORD		fileType = FILE_TYPE_UNKNOWN;
-	DWORD		lastError;
 	unsigned short st_mode;
 
-	/*
-	 * When stdin, stdout, and stderr aren't associated with a stream the
-	 * special value -2 is returned:
-	 * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/get-osfhandle
-	 */
-	if (hFile == INVALID_HANDLE_VALUE || hFile == (HANDLE) -2 || buf == NULL)
+	if (buf == NULL)
 	{
 		errno = EINVAL;
 		return -1;
 	}
 
-	fileType = GetFileType(hFile);
-	lastError = GetLastError();
-
-	/*
-	 * Invoke GetLastError in order to distinguish between a "valid" return of
-	 * FILE_TYPE_UNKNOWN and its return due to a calling error.  In case of
-	 * success, GetLastError returns NO_ERROR.
-	 */
-	if (fileType == FILE_TYPE_UNKNOWN && lastError != NO_ERROR)
-	{
-		_dosmaperr(lastError);
+	fileType = pgwin32_get_file_type(hFile);
+	if (errno != 0)
 		return -1;
-	}
 
 	switch (fileType)
 	{
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index dd207a3714a..f8d65c9e00a 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -115,7 +115,8 @@ sub mkvcbuild
 	  pg_strong_random.c pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c
 	  pqsignal.c mkdtemp.c qsort.c qsort_arg.c bsearch_arg.c quotes.c system.c
 	  strerror.c tar.c thread.c timingsafe_bcmp.c
-	  win32env.c win32error.c win32security.c win32setlocale.c win32stat.c);
+	  win32common.c win32env.c win32error.c win32fseek.c win32security.c
+	  win32setlocale.c win32stat.c);
 
 	push(@pgportfiles, 'strtof.c') if ($vsVersion < '14.00');
 

From b23bd7f5562e2f7978d439143be4b2c5985b2b41 Mon Sep 17 00:00:00 2001
From: Daniel Gustafsson <dgustafsson@postgresql.org>
Date: Fri, 14 Apr 2023 10:15:50 +0200
Subject: [PATCH 61/78] doc: PQinitOpenSSL and PQinitSSL are obsolete in
 OpenSSL 1.1.0+

Starting with OpenSSL 1.1.0 there is no need to call PQinitOpenSSL
or PQinitSSL to avoid duplicate initialization of OpenSSL.  Add a
note to the documentation to explain this.

Backpatch to all supported versions as older OpenSSL versions are
equally likely to be used for all branches.

Reported-by: Sebastien Flaesch <sebastien.flaesch@4js.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/DBAP191MB12895BFFEC4B5FE0460D0F2FB0459@DBAP191MB1289.EURP191.PROD.OUTLOOK.COM
Backpatch-through: 11, all supported versions
---
 doc/src/sgml/libpq.sgml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index eb9a9b80a69..23023c96e94 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -8697,6 +8697,8 @@ ldap://ldap.acme.com/cn=dbserver,cn=hosts?pgconnectinfo?base?(objectclass=*)
    that the <literal>libssl</literal> and/or <literal>libcrypto</literal> libraries
    have been initialized by your application, so that
    <application>libpq</application> will not also initialize those libraries.
+   However, this is unnecessary when using <productname>OpenSSL</productname>
+   version 1.1.0 or later, as duplicate initializations are no longer problematic.
   </para>
 
   <para>

From 2fef36bdfecbe3f68718298929f5515bc512a034 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 17 Apr 2023 14:22:06 -0400
Subject: [PATCH 62/78] Avoid trying to write an empty WAL record in
 log_newpage_range().

If the last few pages in the specified range are empty (all zero),
then log_newpage_range() could try to emit an empty WAL record
containing no FPIs.  This at least upsets an Assert in
ReserveXLogInsertLocation, and might perhaps have bad real-world
consequences in non-assert builds.

This has been broken since log_newpage_range() was introduced,
but the case was hard if not impossible to hit before commit 3d6a98457
decided it was okay to leave VM and FSM pages intentionally zero.
Nonetheless, it seems prudent to back-patch.  log_newpage_range()
was added in v12 but later back-patched, so this affects all
supported branches.

Matthias van de Meent, per report from Justin Pryzby

Discussion: https://postgr.es/m/ZD1daibg4RF50IOj@telsasoft.com
---
 src/backend/access/transam/xloginsert.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 7243b97d467..bf728becbda 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -1237,6 +1237,10 @@ log_newpage_range(Relation rel, ForkNumber forkNum,
 			blkno++;
 		}
 
+		/* Nothing more to do if all remaining blocks were empty. */
+		if (nbufs == 0)
+			break;
+
 		/* Write WAL record for this batch. */
 		XLogBeginInsert();
 

From d76f465dd840a7233a53bab8be029f0b441b8852 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Tue, 18 Apr 2023 11:20:50 +0900
Subject: [PATCH 63/78] ecpg: Fix handling of strings in ORACLE compat code
 with SQLDA

When compiled with -C ORACLE, ecpg_get_data() had a one-off issue where
it would incorrectly store the null terminator byte to str[-1] when
varcharsize is 0, which is something that can happen when using SQLDA.
This would eat 1 byte from the previous field stored, corrupting the
results generated.

All the callers of ecpg_get_data() estimate and allocate enough storage
for the data received, and the fix of this commit relies on this
assumption.  Note that this maps to the case where no padding or
truncation is required.

This issue has been introduced by 3b7ab43 with the Oracle compatibility
option, so backpatch down to v11.

Author: Kyotaro Horiguchi
Discussion: https://postgr.es/m/20230410.173500.440060475837236886.horikyota.ntt@gmail.com
Backpatch-through: 11
---
 src/interfaces/ecpg/ecpglib/data.c            |  19 +-
 .../ecpg/test/compat_oracle/char_array.pgc    |  31 ++-
 .../test/expected/compat_oracle-char_array.c  | 216 +++++++++++++-----
 .../expected/compat_oracle-char_array.stderr  | 166 ++++++++------
 .../expected/compat_oracle-char_array.stdout  |   5 +
 5 files changed, 310 insertions(+), 127 deletions(-)

diff --git a/src/interfaces/ecpg/ecpglib/data.c b/src/interfaces/ecpg/ecpglib/data.c
index 6bc91ef7eb6..c94907bcc5f 100644
--- a/src/interfaces/ecpg/ecpglib/data.c
+++ b/src/interfaces/ecpg/ecpglib/data.c
@@ -582,7 +582,7 @@ ecpg_get_data(const PGresult *results, int act_tuple, int act_field, int lineno,
 						if (varcharsize == 0 && offset == sizeof(char *))
 							str = *(char **) str;
 
-						if (varcharsize == 0 || varcharsize > size)
+						if (varcharsize > size)
 						{
 							/*
 							 * compatibility mode, blank pad and null
@@ -642,16 +642,25 @@ ecpg_get_data(const PGresult *results, int act_tuple, int act_field, int lineno,
 						}
 						else
 						{
-							strncpy(str, pval, varcharsize);
+							int			charsize = varcharsize;
+
+							/*
+							 * assume that the caller provided storage exactly
+							 * fit when varcharsize is zero.
+							 */
+							if (varcharsize == 0)
+								charsize = size + 1;
+
+							strncpy(str, pval, charsize);
 
 							/* compatibility mode, null terminate char array */
-							if (ORACLE_MODE(compat) && (varcharsize - 1) < size)
+							if (ORACLE_MODE(compat) && (charsize - 1) < size)
 							{
 								if (type == ECPGt_char || type == ECPGt_unsigned_char)
-									str[varcharsize - 1] = '\0';
+									str[charsize - 1] = '\0';
 							}
 
-							if (varcharsize < size || (ORACLE_MODE(compat) && (varcharsize - 1) < size))
+							if (charsize < size || (ORACLE_MODE(compat) && (charsize - 1) < size))
 							{
 								/* truncation */
 								switch (ind_type)
diff --git a/src/interfaces/ecpg/test/compat_oracle/char_array.pgc b/src/interfaces/ecpg/test/compat_oracle/char_array.pgc
index 6a5d383d4eb..de18cbb57ff 100644
--- a/src/interfaces/ecpg/test/compat_oracle/char_array.pgc
+++ b/src/interfaces/ecpg/test/compat_oracle/char_array.pgc
@@ -2,6 +2,10 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <pgtypes_numeric.h>
+
+EXEC SQL INCLUDE sqlda.h;
+
 EXEC SQL INCLUDE ../regression;
 
 static void warn(void)
@@ -20,6 +24,8 @@ int main() {
 
   const char *ppppp = "XXXXX";
   int loopcount;
+  sqlda_t *sqlda = NULL;
+
   EXEC SQL BEGIN DECLARE SECTION;
   char shortstr[5];
   char bigstr[11];
@@ -53,11 +59,34 @@ int main() {
 
   EXEC SQL CLOSE C;
   EXEC SQL DROP TABLE strdbase;
+  EXEC SQL COMMIT WORK;
 
-  printf("\nGOOD-BYE!!\n\n");
+  /* SQLDA handling */
+  EXEC SQL WHENEVER SQLWARNING SQLPRINT;
+  EXEC SQL WHENEVER NOT FOUND STOP;
+  EXEC SQL PREPARE stmt1 FROM "SELECT 123::numeric(3,0), 't'::varchar(2)";
+  EXEC SQL DECLARE cur1 CURSOR FOR stmt1;
+  EXEC SQL OPEN cur1;
+  EXEC SQL FETCH NEXT FROM cur1 INTO DESCRIPTOR sqlda;
+
+  printf("\n-----------------\ntype    : data\n");
+  for (int i = 0 ; i < sqlda->sqld ; i++)
+  {
+	  sqlvar_t v = sqlda->sqlvar[i];
+	  char *sqldata = v.sqldata;
+
+	  if (v.sqltype == ECPGt_numeric)
+		  sqldata =
+			  PGTYPESnumeric_to_asc((numeric*) sqlda->sqlvar[i].sqldata, -1);
+
+	  printf("%-8s: \"%s\"\n", v.sqlname.data, sqldata);
+  }
 
+  EXEC SQL CLOSE cur1;
   EXEC SQL COMMIT WORK;
 
+  printf("\nGOOD-BYE!!\n\n");
+
   EXEC SQL DISCONNECT ALL;
 
   return 0;
diff --git a/src/interfaces/ecpg/test/expected/compat_oracle-char_array.c b/src/interfaces/ecpg/test/expected/compat_oracle-char_array.c
index 04d4e1969e3..16db663dcc5 100644
--- a/src/interfaces/ecpg/test/expected/compat_oracle-char_array.c
+++ b/src/interfaces/ecpg/test/expected/compat_oracle-char_array.c
@@ -11,6 +11,32 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <pgtypes_numeric.h>
+
+
+#line 1 "sqlda.h"
+#ifndef ECPG_SQLDA_H
+#define ECPG_SQLDA_H
+
+#ifdef _ECPG_INFORMIX_H
+
+#include "sqlda-compat.h"
+typedef struct sqlvar_compat sqlvar_t;
+typedef struct sqlda_compat sqlda_t;
+
+#else
+
+#include "sqlda-native.h"
+typedef struct sqlvar_struct sqlvar_t;
+typedef struct sqlda_struct sqlda_t;
+
+#endif
+
+#endif							/* ECPG_SQLDA_H */
+
+#line 7 "char_array.pgc"
+
+
 
 #line 1 "regression.h"
 
@@ -19,7 +45,7 @@
 
 
 
-#line 5 "char_array.pgc"
+#line 9 "char_array.pgc"
 
 
 static void warn(void)
@@ -34,125 +60,127 @@ static void warn(void)
 int main() {
 
   /* exec sql whenever sql_warning  do warn ( ) ; */
-#line 18 "char_array.pgc"
+#line 22 "char_array.pgc"
 
   /* exec sql whenever sqlerror  stop ; */
-#line 19 "char_array.pgc"
+#line 23 "char_array.pgc"
 
 
   const char *ppppp = "XXXXX";
   int loopcount;
+  sqlda_t *sqlda = NULL;
+
   /* exec sql begin declare section */
    
    
      
      
   
-#line 24 "char_array.pgc"
+#line 30 "char_array.pgc"
  char shortstr [ 5 ] ;
  
-#line 25 "char_array.pgc"
+#line 31 "char_array.pgc"
  char bigstr [ 11 ] ;
  
-#line 26 "char_array.pgc"
+#line 32 "char_array.pgc"
  short shstr_ind = 0 ;
  
-#line 27 "char_array.pgc"
+#line 33 "char_array.pgc"
  short bigstr_ind = 0 ;
 /* exec sql end declare section */
-#line 28 "char_array.pgc"
+#line 34 "char_array.pgc"
 
 
   ECPGdebug(1, stderr);
   { ECPGconnect(__LINE__, 3, "ecpg1_regression" , NULL, NULL , NULL, 0); 
-#line 31 "char_array.pgc"
+#line 37 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 31 "char_array.pgc"
+#line 37 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 31 "char_array.pgc"
+#line 37 "char_array.pgc"
 
 
   { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "create table strdbase ( strval varchar ( 10 ) )", ECPGt_EOIT, ECPGt_EORT);
-#line 33 "char_array.pgc"
+#line 39 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 33 "char_array.pgc"
+#line 39 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 33 "char_array.pgc"
+#line 39 "char_array.pgc"
 
   { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "insert into strdbase values ( '' )", ECPGt_EOIT, ECPGt_EORT);
-#line 34 "char_array.pgc"
+#line 40 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 34 "char_array.pgc"
+#line 40 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 34 "char_array.pgc"
+#line 40 "char_array.pgc"
 
   { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "insert into strdbase values ( 'AB' )", ECPGt_EOIT, ECPGt_EORT);
-#line 35 "char_array.pgc"
+#line 41 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 35 "char_array.pgc"
+#line 41 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 35 "char_array.pgc"
+#line 41 "char_array.pgc"
 
   { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "insert into strdbase values ( 'ABCD' )", ECPGt_EOIT, ECPGt_EORT);
-#line 36 "char_array.pgc"
+#line 42 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 36 "char_array.pgc"
+#line 42 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 36 "char_array.pgc"
+#line 42 "char_array.pgc"
 
   { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "insert into strdbase values ( 'ABCDE' )", ECPGt_EOIT, ECPGt_EORT);
-#line 37 "char_array.pgc"
+#line 43 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 37 "char_array.pgc"
+#line 43 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 37 "char_array.pgc"
+#line 43 "char_array.pgc"
 
   { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "insert into strdbase values ( 'ABCDEF' )", ECPGt_EOIT, ECPGt_EORT);
-#line 38 "char_array.pgc"
+#line 44 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 38 "char_array.pgc"
+#line 44 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 38 "char_array.pgc"
+#line 44 "char_array.pgc"
 
   { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "insert into strdbase values ( 'ABCDEFGHIJ' )", ECPGt_EOIT, ECPGt_EORT);
-#line 39 "char_array.pgc"
+#line 45 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 39 "char_array.pgc"
+#line 45 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 39 "char_array.pgc"
+#line 45 "char_array.pgc"
 
 
   /* declare C cursor for select strval , strval from strdbase */
-#line 41 "char_array.pgc"
+#line 47 "char_array.pgc"
 
   { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "declare C cursor for select strval , strval from strdbase", ECPGt_EOIT, ECPGt_EORT);
-#line 42 "char_array.pgc"
+#line 48 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 42 "char_array.pgc"
+#line 48 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 42 "char_array.pgc"
+#line 48 "char_array.pgc"
 
 
   /* exec sql whenever not found  break ; */
-#line 44 "char_array.pgc"
+#line 50 "char_array.pgc"
 
 
   printf("Full Str.  :  Short  Ind.\n");
@@ -164,59 +192,135 @@ if (sqlca.sqlcode < 0) exit (1);}
 	ECPGt_short,&(bigstr_ind),(long)1,(long)1,sizeof(short), 
 	ECPGt_char,(shortstr),(long)5,(long)1,(5)*sizeof(char), 
 	ECPGt_short,&(shstr_ind),(long)1,(long)1,sizeof(short), ECPGt_EORT);
-#line 50 "char_array.pgc"
+#line 56 "char_array.pgc"
 
 if (sqlca.sqlcode == ECPG_NOT_FOUND) break;
-#line 50 "char_array.pgc"
+#line 56 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 50 "char_array.pgc"
+#line 56 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 50 "char_array.pgc"
+#line 56 "char_array.pgc"
 
     printf("\"%s\": \"%s\"  %d\n", bigstr, shortstr, shstr_ind);
   }
 
   { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "close C", ECPGt_EOIT, ECPGt_EORT);
-#line 54 "char_array.pgc"
+#line 60 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 54 "char_array.pgc"
+#line 60 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 54 "char_array.pgc"
+#line 60 "char_array.pgc"
 
   { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "drop table strdbase", ECPGt_EOIT, ECPGt_EORT);
-#line 55 "char_array.pgc"
+#line 61 "char_array.pgc"
 
 if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 55 "char_array.pgc"
+#line 61 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 55 "char_array.pgc"
+#line 61 "char_array.pgc"
 
+  { ECPGtrans(__LINE__, NULL, "commit work");
+#line 62 "char_array.pgc"
 
-  printf("\nGOOD-BYE!!\n\n");
+if (sqlca.sqlwarn[0] == 'W') warn ( );
+#line 62 "char_array.pgc"
+
+if (sqlca.sqlcode < 0) exit (1);}
+#line 62 "char_array.pgc"
+
+
+  /* SQLDA handling */
+  /* exec sql whenever sql_warning  sqlprint ; */
+#line 65 "char_array.pgc"
+
+  /* exec sql whenever not found  stop ; */
+#line 66 "char_array.pgc"
+
+  { ECPGprepare(__LINE__, NULL, 0, "stmt1", "SELECT 123::numeric(3,0), 't'::varchar(2)");
+#line 67 "char_array.pgc"
+
+if (sqlca.sqlwarn[0] == 'W') sqlprint();
+#line 67 "char_array.pgc"
+
+if (sqlca.sqlcode < 0) exit (1);}
+#line 67 "char_array.pgc"
+
+  /* declare cur1 cursor for $1 */
+#line 68 "char_array.pgc"
+
+  { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "declare cur1 cursor for $1", 
+	ECPGt_char_variable,(ECPGprepared_statement(NULL, "stmt1", __LINE__)),(long)1,(long)1,(1)*sizeof(char), 
+	ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EOIT, ECPGt_EORT);
+#line 69 "char_array.pgc"
+
+if (sqlca.sqlwarn[0] == 'W') sqlprint();
+#line 69 "char_array.pgc"
+
+if (sqlca.sqlcode < 0) exit (1);}
+#line 69 "char_array.pgc"
+
+  { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "fetch next from cur1", ECPGt_EOIT, 
+	ECPGt_sqlda, &sqlda, 0L, 0L, 0L, 
+	ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT);
+#line 70 "char_array.pgc"
+
+if (sqlca.sqlcode == ECPG_NOT_FOUND) exit (1);
+#line 70 "char_array.pgc"
+
+if (sqlca.sqlwarn[0] == 'W') sqlprint();
+#line 70 "char_array.pgc"
+
+if (sqlca.sqlcode < 0) exit (1);}
+#line 70 "char_array.pgc"
+
+
+  printf("\n-----------------\ntype    : data\n");
+  for (int i = 0 ; i < sqlda->sqld ; i++)
+  {
+	  sqlvar_t v = sqlda->sqlvar[i];
+	  char *sqldata = v.sqldata;
+
+	  if (v.sqltype == ECPGt_numeric)
+		  sqldata =
+			  PGTYPESnumeric_to_asc((numeric*) sqlda->sqlvar[i].sqldata, -1);
+
+	  printf("%-8s: \"%s\"\n", v.sqlname.data, sqldata);
+  }
+
+  { ECPGdo(__LINE__, 3, 1, NULL, 0, ECPGst_normal, "close cur1", ECPGt_EOIT, ECPGt_EORT);
+#line 85 "char_array.pgc"
+
+if (sqlca.sqlwarn[0] == 'W') sqlprint();
+#line 85 "char_array.pgc"
+
+if (sqlca.sqlcode < 0) exit (1);}
+#line 85 "char_array.pgc"
 
   { ECPGtrans(__LINE__, NULL, "commit work");
-#line 59 "char_array.pgc"
+#line 86 "char_array.pgc"
 
-if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 59 "char_array.pgc"
+if (sqlca.sqlwarn[0] == 'W') sqlprint();
+#line 86 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 59 "char_array.pgc"
+#line 86 "char_array.pgc"
 
 
+  printf("\nGOOD-BYE!!\n\n");
+
   { ECPGdisconnect(__LINE__, "ALL");
-#line 61 "char_array.pgc"
+#line 90 "char_array.pgc"
 
-if (sqlca.sqlwarn[0] == 'W') warn ( );
-#line 61 "char_array.pgc"
+if (sqlca.sqlwarn[0] == 'W') sqlprint();
+#line 90 "char_array.pgc"
 
 if (sqlca.sqlcode < 0) exit (1);}
-#line 61 "char_array.pgc"
+#line 90 "char_array.pgc"
 
 
   return 0;
diff --git a/src/interfaces/ecpg/test/expected/compat_oracle-char_array.stderr b/src/interfaces/ecpg/test/expected/compat_oracle-char_array.stderr
index 40d9f7ddb04..24b9715d7b5 100644
--- a/src/interfaces/ecpg/test/expected/compat_oracle-char_array.stderr
+++ b/src/interfaces/ecpg/test/expected/compat_oracle-char_array.stderr
@@ -2,138 +2,174 @@
 [NO_PID]: sqlca: code: 0, state: 00000
 [NO_PID]: ECPGconnect: opening database ecpg1_regression on <DEFAULT> port <DEFAULT>  
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 33: query: create table strdbase ( strval varchar ( 10 ) ); with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 39: query: create table strdbase ( strval varchar ( 10 ) ); with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 33: using PQexec
+[NO_PID]: ecpg_execute on line 39: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 33: OK: CREATE TABLE
+[NO_PID]: ecpg_process_output on line 39: OK: CREATE TABLE
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 34: query: insert into strdbase values ( '' ); with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 40: query: insert into strdbase values ( '' ); with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 34: using PQexec
+[NO_PID]: ecpg_execute on line 40: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 34: OK: INSERT 0 1
+[NO_PID]: ecpg_process_output on line 40: OK: INSERT 0 1
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 35: query: insert into strdbase values ( 'AB' ); with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 41: query: insert into strdbase values ( 'AB' ); with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 35: using PQexec
+[NO_PID]: ecpg_execute on line 41: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 35: OK: INSERT 0 1
+[NO_PID]: ecpg_process_output on line 41: OK: INSERT 0 1
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 36: query: insert into strdbase values ( 'ABCD' ); with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 42: query: insert into strdbase values ( 'ABCD' ); with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 36: using PQexec
+[NO_PID]: ecpg_execute on line 42: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 36: OK: INSERT 0 1
+[NO_PID]: ecpg_process_output on line 42: OK: INSERT 0 1
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 37: query: insert into strdbase values ( 'ABCDE' ); with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 43: query: insert into strdbase values ( 'ABCDE' ); with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 37: using PQexec
+[NO_PID]: ecpg_execute on line 43: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 37: OK: INSERT 0 1
+[NO_PID]: ecpg_process_output on line 43: OK: INSERT 0 1
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 38: query: insert into strdbase values ( 'ABCDEF' ); with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 44: query: insert into strdbase values ( 'ABCDEF' ); with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 38: using PQexec
+[NO_PID]: ecpg_execute on line 44: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 38: OK: INSERT 0 1
+[NO_PID]: ecpg_process_output on line 44: OK: INSERT 0 1
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 39: query: insert into strdbase values ( 'ABCDEFGHIJ' ); with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 45: query: insert into strdbase values ( 'ABCDEFGHIJ' ); with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 39: using PQexec
+[NO_PID]: ecpg_execute on line 45: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 39: OK: INSERT 0 1
+[NO_PID]: ecpg_process_output on line 45: OK: INSERT 0 1
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 42: query: declare C cursor for select strval , strval from strdbase; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 48: query: declare C cursor for select strval , strval from strdbase; with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 42: using PQexec
+[NO_PID]: ecpg_execute on line 48: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 42: OK: DECLARE CURSOR
+[NO_PID]: ecpg_process_output on line 48: OK: DECLARE CURSOR
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 56: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: using PQexec
+[NO_PID]: ecpg_execute on line 56: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 50: correctly got 1 tuples with 2 fields
+[NO_PID]: ecpg_process_output on line 56: correctly got 1 tuples with 2 fields
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT:  offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT:  offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT:  offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT:  offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 56: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: using PQexec
+[NO_PID]: ecpg_execute on line 56: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 50: correctly got 1 tuples with 2 fields
+[NO_PID]: ecpg_process_output on line 56: correctly got 1 tuples with 2 fields
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT: AB offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT: AB offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT: AB offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT: AB offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 56: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: using PQexec
+[NO_PID]: ecpg_execute on line 56: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 50: correctly got 1 tuples with 2 fields
+[NO_PID]: ecpg_process_output on line 56: correctly got 1 tuples with 2 fields
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT: ABCD offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT: ABCD offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT: ABCD offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT: ABCD offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 56: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: using PQexec
+[NO_PID]: ecpg_execute on line 56: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 50: correctly got 1 tuples with 2 fields
+[NO_PID]: ecpg_process_output on line 56: correctly got 1 tuples with 2 fields
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT: ABCDE offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT: ABCDE offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT: ABCDE offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT: ABCDE offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
 Warning: At least one column was truncated
-[NO_PID]: ecpg_execute on line 50: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 56: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: using PQexec
+[NO_PID]: ecpg_execute on line 56: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 50: correctly got 1 tuples with 2 fields
+[NO_PID]: ecpg_process_output on line 56: correctly got 1 tuples with 2 fields
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT: ABCDEF offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT: ABCDEF offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT: ABCDEF offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT: ABCDEF offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
 Warning: At least one column was truncated
-[NO_PID]: ecpg_execute on line 50: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 56: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: using PQexec
+[NO_PID]: ecpg_execute on line 56: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 50: correctly got 1 tuples with 2 fields
+[NO_PID]: ecpg_process_output on line 56: correctly got 1 tuples with 2 fields
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT: ABCDEFGHIJ offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT: ABCDEFGHIJ offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_get_data on line 50: RESULT: ABCDEFGHIJ offset: -1; array: no
+[NO_PID]: ecpg_get_data on line 56: RESULT: ABCDEFGHIJ offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
 Warning: At least one column was truncated
-[NO_PID]: ecpg_execute on line 50: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 56: query: fetch C; with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 50: using PQexec
+[NO_PID]: ecpg_execute on line 56: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 50: correctly got 0 tuples with 2 fields
+[NO_PID]: ecpg_process_output on line 56: correctly got 0 tuples with 2 fields
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: raising sqlcode 100 on line 50: no data found on line 50
+[NO_PID]: raising sqlcode 100 on line 56: no data found on line 56
 [NO_PID]: sqlca: code: 100, state: 02000
-[NO_PID]: ecpg_execute on line 54: query: close C; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 60: query: close C; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_execute on line 60: using PQexec
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_process_output on line 60: OK: CLOSE CURSOR
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_execute on line 61: query: drop table strdbase; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_execute on line 61: using PQexec
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_process_output on line 61: OK: DROP TABLE
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ECPGtrans on line 62: action "commit work"; connection "ecpg1_regression"
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: prepare_common on line 67: name stmt1; query: "SELECT 123::numeric(3,0), 't'::varchar(2)"
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_execute on line 69: query: declare cur1 cursor for SELECT 123::numeric(3,0), 't'::varchar(2); with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_execute on line 69: using PQexec
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_process_output on line 69: OK: DECLARE CURSOR
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_execute on line 70: query: fetch next from cur1; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_execute on line 70: using PQexec
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_process_output on line 70: correctly got 1 tuples with 2 fields
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_build_native_sqlda on line 70 sqld = 2
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_process_output on line 70: new sqlda was built
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_set_native_sqlda on line 70 row 0 col 0 IS NOT NULL
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_set_native_sqlda on line 70 row 0 col 1 IS NOT NULL
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_get_data on line 70: RESULT: t offset: -1; array: no
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 54: using PQexec
+[NO_PID]: ecpg_process_output on line 70: putting result (1 tuple 2 fields) into sqlda descriptor
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 54: OK: CLOSE CURSOR
+[NO_PID]: ecpg_execute on line 85: query: close cur1; with 0 parameter(s) on connection ecpg1_regression
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 55: query: drop table strdbase; with 0 parameter(s) on connection ecpg1_regression
+[NO_PID]: ecpg_execute on line 85: using PQexec
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_execute on line 55: using PQexec
+[NO_PID]: ecpg_process_output on line 85: OK: CLOSE CURSOR
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ecpg_process_output on line 55: OK: DROP TABLE
+[NO_PID]: ECPGtrans on line 86: action "commit work"; connection "ecpg1_regression"
 [NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ECPGtrans on line 59: action "commit work"; connection "ecpg1_regression"
+[NO_PID]: deallocate_one on line 0: name stmt1
 [NO_PID]: sqlca: code: 0, state: 00000
 [NO_PID]: ecpg_finish: connection ecpg1_regression closed
 [NO_PID]: sqlca: code: 0, state: 00000
diff --git a/src/interfaces/ecpg/test/expected/compat_oracle-char_array.stdout b/src/interfaces/ecpg/test/expected/compat_oracle-char_array.stdout
index d58b3c7be47..3dc3e546c8a 100644
--- a/src/interfaces/ecpg/test/expected/compat_oracle-char_array.stdout
+++ b/src/interfaces/ecpg/test/expected/compat_oracle-char_array.stdout
@@ -6,5 +6,10 @@ Full Str.  :  Short  Ind.
 "ABCDEF    ": "ABCD"  6
 "ABCDEFGHIJ": "ABCD"  10
 
+-----------------
+type    : data
+numeric : "123"
+varchar : "t"
+
 GOOD-BYE!!
 

From 4249393ebb3b6ad6f65a82c9f2854e44575f39c1 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 20 Apr 2023 18:12:32 -0400
Subject: [PATCH 64/78] Use --strip-unneeded when stripping static libraries
 with GNU strip.

We've long used "--strip-unneeded" for shared libraries but plain
"-x" for static libraries when stripping symbols with GNU strip.
There doesn't seem to be any really good reason for that though,
since --strip-unneeded produces smaller output (as "-x" alone
does not remove debug symbols).  Moreover it seems that
llvm-strip, although it identifies as GNU strip, misbehaves when
given "-x" for this purpose.  It's unclear whether that's
intentional or a bug in llvm-strip, but in any case it seems like
changing to use --strip-unneeded in all cases should be a win.

Note that this doesn't change our behavior when dealing with
non-GNU strip.

Per gripes from Ed Maste and Palle Girgensohn.  Back-patch,
in case anyone wants to use llvm-strip with stable branches.

Discussion: https://postgr.es/m/17898-5308d09543463266@postgresql.org
Discussion: https://postgr.es/m/20230420153338.bbj2g5jiyy3afhjz@awork3.anarazel.de
---
 config/programs.m4 | 2 +-
 configure          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/programs.m4 b/config/programs.m4
index 1c0bd634530..ec76a77da53 100644
--- a/config/programs.m4
+++ b/config/programs.m4
@@ -327,7 +327,7 @@ AC_DEFUN([PGAC_CHECK_STRIP],
 
   AC_MSG_CHECKING([whether it is possible to strip libraries])
   if test x"$STRIP" != x"" && "$STRIP" -V 2>&1 | grep "GNU strip" >/dev/null; then
-    STRIP_STATIC_LIB="$STRIP -x"
+    STRIP_STATIC_LIB="$STRIP --strip-unneeded"
     STRIP_SHARED_LIB="$STRIP --strip-unneeded"
     AC_MSG_RESULT(yes)
   else
diff --git a/configure b/configure
index 1eba53d8c6d..e91414fb52c 100755
--- a/configure
+++ b/configure
@@ -11814,7 +11814,7 @@ fi
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether it is possible to strip libraries" >&5
 $as_echo_n "checking whether it is possible to strip libraries... " >&6; }
   if test x"$STRIP" != x"" && "$STRIP" -V 2>&1 | grep "GNU strip" >/dev/null; then
-    STRIP_STATIC_LIB="$STRIP -x"
+    STRIP_STATIC_LIB="$STRIP --strip-unneeded"
     STRIP_SHARED_LIB="$STRIP --strip-unneeded"
     { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }

From e3995679fccd13e6323efee763c47ca6be8bdd42 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Sun, 23 Apr 2023 13:55:49 +0300
Subject: [PATCH 65/78] Fix custom validators call in build_local_reloptions()

We need to call them only when validate == true.

Backpatch to 13, where opclass options were introduced.

Reported-by: Tom Lane
Discussion: https://postgr.es/m/2656633.1681831542%40sss.pgh.pa.us
Reviewed-by: Tom Lane, Pavel Borisov
Backpatch-through: 13
---
 src/backend/access/common/reloptions.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index fc9edaf7f7c..c65b368bb23 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -2009,8 +2009,9 @@ build_local_reloptions(local_relopts *relopts, Datum options, bool validate)
 	fillRelOptions(opts, relopts->relopt_struct_size, vals, noptions, validate,
 				   elems, noptions);
 
-	foreach(lc, relopts->validators)
-		((relopts_validator) lfirst(lc)) (opts, vals, noptions);
+	if (validate)
+		foreach(lc, relopts->validators)
+			((relopts_validator) lfirst(lc)) (opts, vals, noptions);
 
 	if (elems)
 		pfree(elems);

From 37e317875701670360c761229dbac6efb7756b29 Mon Sep 17 00:00:00 2001
From: Daniel Gustafsson <dgustafsson@postgresql.org>
Date: Mon, 24 Apr 2023 11:16:17 +0200
Subject: [PATCH 66/78] Remove duplicate lines of code

Commit 6df7a9698bb accidentally included two identical prototypes for
default_multirange_selectivi() and commit 086cf1458c6 added a break;
statement where one was already present, thus duplicating it.  While
there is no bug caused by this, fix by removing the duplicated lines
as they provide no value.

Backpatch the fix for duplicate prototypes to v14 and the duplicate
break statement fix to all supported branches to avoid backpatching
hazards due to the removal.

Reported-by: Anton Voloshin <a.voloshin@postgrespro.ru>
Discussion: https://postgr.es/m/0e69cb60-0176-f6d0-7e15-6478b7d85724@postgrespro.ru
---
 src/backend/utils/adt/multirangetypes_selfuncs.c | 1 -
 src/interfaces/ecpg/preproc/variable.c           | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/backend/utils/adt/multirangetypes_selfuncs.c b/src/backend/utils/adt/multirangetypes_selfuncs.c
index 8d085ca8d4f..0360a17d8c6 100644
--- a/src/backend/utils/adt/multirangetypes_selfuncs.c
+++ b/src/backend/utils/adt/multirangetypes_selfuncs.c
@@ -35,7 +35,6 @@ static double calc_multirangesel(TypeCacheEntry *typcache,
 								 VariableStatData *vardata,
 								 const MultirangeType *constval, Oid operator);
 static double default_multirange_selectivity(Oid operator);
-static double default_multirange_selectivity(Oid operator);
 static double calc_hist_selectivity(TypeCacheEntry *typcache,
 									VariableStatData *vardata,
 									const MultirangeType *constval,
diff --git a/src/interfaces/ecpg/preproc/variable.c b/src/interfaces/ecpg/preproc/variable.c
index 887d479e735..8926676ab71 100644
--- a/src/interfaces/ecpg/preproc/variable.c
+++ b/src/interfaces/ecpg/preproc/variable.c
@@ -105,7 +105,6 @@ find_struct_member(char *name, char *str, struct ECPGstruct_member *members, int
 						else
 							return find_struct_member(name, ++end, members->type->u.members, brace_level);
 						break;
-						break;
 					case '.':
 						if (members->type->type == ECPGt_array)
 							return find_struct_member(name, end, members->type->u.element->u.members, brace_level);

From 7105323974b198406ad2ec19ec245a54390b4a08 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 24 Apr 2023 14:19:46 -0400
Subject: [PATCH 67/78] Fix memory leakage in plpgsql DO blocks that use cast
 expressions.

Commit 04fe805a1 modified plpgsql so that datatype casts make use of
expressions cached by plancache.c, in place of older code where these
expression trees were managed by plpgsql itself.  However, I (tgl)
forgot that we use a separate, shorter-lived cast info hashtable in
DO blocks.  The new mechanism thus resulted in session-lifespan
leakage of the plancache data once a DO block containing one or more
casts terminated.  To fix, split the cast hash table into two parts,
one that tracks only the plancache's CachedExpressions and one that
tracks the expression state trees generated from them.  DO blocks need
their own expression state trees and hence their own version of the
second hash table, but there's no reason they can't share the
CachedExpressions with regular plpgsql functions.

Per report from Ajit Awekar.  Back-patch to v12 where the issue
was introduced.

Ajit Awekar and Tom Lane

Discussion: https://postgr.es/m/CAHv6PyrNaqdvyWUspzd3txYQguFTBSnhx+m6tS06TnM+KWc_LQ@mail.gmail.com
---
 src/pl/plpgsql/src/pl_exec.c | 94 +++++++++++++++++++++++++-----------
 src/pl/plpgsql/src/plpgsql.h |  2 +-
 2 files changed, 66 insertions(+), 30 deletions(-)

diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index 9a16ac70d2e..f91a2e6feed 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -136,18 +136,22 @@ static ResourceOwner shared_simple_eval_resowner = NULL;
 	MemoryContextAllocZero(get_eval_mcontext(estate), sz)
 
 /*
- * We use a session-wide hash table for caching cast information.
+ * We use two session-wide hash tables for caching cast information.
  *
- * Once built, the compiled expression trees (cast_expr fields) survive for
- * the life of the session.  At some point it might be worth invalidating
- * those after pg_cast changes, but for the moment we don't bother.
+ * cast_expr_hash entries (of type plpgsql_CastExprHashEntry) hold compiled
+ * expression trees for casts.  These survive for the life of the session and
+ * are shared across all PL/pgSQL functions and DO blocks.  At some point it
+ * might be worth invalidating them after pg_cast changes, but for the moment
+ * we don't bother.
  *
- * The evaluation state trees (cast_exprstate) are managed in the same way as
- * simple expressions (i.e., we assume cast expressions are always simple).
+ * There is a separate hash table shared_cast_hash (with entries of type
+ * plpgsql_CastHashEntry) containing evaluation state trees for these
+ * expressions, which are managed in the same way as simple expressions
+ * (i.e., we assume cast expressions are always simple).
  *
- * As with simple expressions, DO blocks don't use the shared hash table but
- * must have their own.  This isn't ideal, but we don't want to deal with
- * multiple simple_eval_estates within a DO block.
+ * As with simple expressions, DO blocks don't use the shared_cast_hash table
+ * but must have their own evaluation state trees.  This isn't ideal, but we
+ * don't want to deal with multiple simple_eval_estates within a DO block.
  */
 typedef struct					/* lookup key for cast info */
 {
@@ -158,18 +162,24 @@ typedef struct					/* lookup key for cast info */
 	int32		dsttypmod;		/* destination typmod for cast */
 } plpgsql_CastHashKey;
 
-typedef struct					/* cast_hash table entry */
+typedef struct					/* cast_expr_hash table entry */
 {
 	plpgsql_CastHashKey key;	/* hash key --- MUST BE FIRST */
 	Expr	   *cast_expr;		/* cast expression, or NULL if no-op cast */
 	CachedExpression *cast_cexpr;	/* cached expression backing the above */
+} plpgsql_CastExprHashEntry;
+
+typedef struct					/* cast_hash table entry */
+{
+	plpgsql_CastHashKey key;	/* hash key --- MUST BE FIRST */
+	plpgsql_CastExprHashEntry *cast_centry; /* link to matching expr entry */
 	/* ExprState is valid only when cast_lxid matches current LXID */
 	ExprState  *cast_exprstate; /* expression's eval tree */
 	bool		cast_in_use;	/* true while we're executing eval tree */
 	LocalTransactionId cast_lxid;
 } plpgsql_CastHashEntry;
 
-static MemoryContext shared_cast_context = NULL;
+static HTAB *cast_expr_hash = NULL;
 static HTAB *shared_cast_hash = NULL;
 
 /*
@@ -3985,6 +3995,17 @@ plpgsql_estate_setup(PLpgSQL_execstate *estate,
 	estate->paramLI->parserSetupArg = NULL; /* filled during use */
 	estate->paramLI->numParams = estate->ndatums;
 
+	/* Create the session-wide cast-expression hash if we didn't already */
+	if (cast_expr_hash == NULL)
+	{
+		ctl.keysize = sizeof(plpgsql_CastHashKey);
+		ctl.entrysize = sizeof(plpgsql_CastExprHashEntry);
+		cast_expr_hash = hash_create("PLpgSQL cast expressions",
+									 16,	/* start small and extend */
+									 &ctl,
+									 HASH_ELEM | HASH_BLOBS);
+	}
+
 	/* set up for use of appropriate simple-expression EState and cast hash */
 	if (simple_eval_estate)
 	{
@@ -3997,7 +4018,6 @@ plpgsql_estate_setup(PLpgSQL_execstate *estate,
 										16, /* start small and extend */
 										&ctl,
 										HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
-		estate->cast_hash_context = CurrentMemoryContext;
 	}
 	else
 	{
@@ -4005,19 +4025,14 @@ plpgsql_estate_setup(PLpgSQL_execstate *estate,
 		/* Create the session-wide cast-info hash table if we didn't already */
 		if (shared_cast_hash == NULL)
 		{
-			shared_cast_context = AllocSetContextCreate(TopMemoryContext,
-														"PLpgSQL cast info",
-														ALLOCSET_DEFAULT_SIZES);
 			ctl.keysize = sizeof(plpgsql_CastHashKey);
 			ctl.entrysize = sizeof(plpgsql_CastHashEntry);
-			ctl.hcxt = shared_cast_context;
 			shared_cast_hash = hash_create("PLpgSQL cast cache",
 										   16,	/* start small and extend */
 										   &ctl,
-										   HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+										   HASH_ELEM | HASH_BLOBS);
 		}
 		estate->cast_hash = shared_cast_hash;
-		estate->cast_hash_context = shared_cast_context;
 	}
 	/* likewise for the simple-expression resource owner */
 	if (simple_eval_resowner)
@@ -7719,6 +7734,7 @@ get_cast_hashentry(PLpgSQL_execstate *estate,
 {
 	plpgsql_CastHashKey cast_key;
 	plpgsql_CastHashEntry *cast_entry;
+	plpgsql_CastExprHashEntry *expr_entry;
 	bool		found;
 	LocalTransactionId curlxid;
 	MemoryContext oldcontext;
@@ -7732,10 +7748,28 @@ get_cast_hashentry(PLpgSQL_execstate *estate,
 													   (void *) &cast_key,
 													   HASH_ENTER, &found);
 	if (!found)					/* initialize if new entry */
-		cast_entry->cast_cexpr = NULL;
+	{
+		/* We need a second lookup to see if a cast_expr_hash entry exists */
+		expr_entry = (plpgsql_CastExprHashEntry *) hash_search(cast_expr_hash,
+															   &cast_key,
+															   HASH_ENTER,
+															   &found);
+		if (!found)				/* initialize if new expr entry */
+			expr_entry->cast_cexpr = NULL;
 
-	if (cast_entry->cast_cexpr == NULL ||
-		!cast_entry->cast_cexpr->is_valid)
+		cast_entry->cast_centry = expr_entry;
+		cast_entry->cast_exprstate = NULL;
+		cast_entry->cast_in_use = false;
+		cast_entry->cast_lxid = InvalidLocalTransactionId;
+	}
+	else
+	{
+		/* Use always-valid link to avoid a second hash lookup */
+		expr_entry = cast_entry->cast_centry;
+	}
+
+	if (expr_entry->cast_cexpr == NULL ||
+		!expr_entry->cast_cexpr->is_valid)
 	{
 		/*
 		 * We've not looked up this coercion before, or we have but the cached
@@ -7748,10 +7782,10 @@ get_cast_hashentry(PLpgSQL_execstate *estate,
 		/*
 		 * Drop old cached expression if there is one.
 		 */
-		if (cast_entry->cast_cexpr)
+		if (expr_entry->cast_cexpr)
 		{
-			FreeCachedExpression(cast_entry->cast_cexpr);
-			cast_entry->cast_cexpr = NULL;
+			FreeCachedExpression(expr_entry->cast_cexpr);
+			expr_entry->cast_cexpr = NULL;
 		}
 
 		/*
@@ -7832,9 +7866,11 @@ get_cast_hashentry(PLpgSQL_execstate *estate,
 			((RelabelType *) cast_expr)->arg == (Expr *) placeholder)
 			cast_expr = NULL;
 
-		/* Now we can fill in the hashtable entry. */
-		cast_entry->cast_cexpr = cast_cexpr;
-		cast_entry->cast_expr = (Expr *) cast_expr;
+		/* Now we can fill in the expression hashtable entry. */
+		expr_entry->cast_cexpr = cast_cexpr;
+		expr_entry->cast_expr = (Expr *) cast_expr;
+
+		/* Be sure to reset the exprstate hashtable entry, too. */
 		cast_entry->cast_exprstate = NULL;
 		cast_entry->cast_in_use = false;
 		cast_entry->cast_lxid = InvalidLocalTransactionId;
@@ -7843,7 +7879,7 @@ get_cast_hashentry(PLpgSQL_execstate *estate,
 	}
 
 	/* Done if we have determined that this is a no-op cast. */
-	if (cast_entry->cast_expr == NULL)
+	if (expr_entry->cast_expr == NULL)
 		return NULL;
 
 	/*
@@ -7862,7 +7898,7 @@ get_cast_hashentry(PLpgSQL_execstate *estate,
 	if (cast_entry->cast_lxid != curlxid || cast_entry->cast_in_use)
 	{
 		oldcontext = MemoryContextSwitchTo(estate->simple_eval_estate->es_query_cxt);
-		cast_entry->cast_exprstate = ExecInitExpr(cast_entry->cast_expr, NULL);
+		cast_entry->cast_exprstate = ExecInitExpr(expr_entry->cast_expr, NULL);
 		cast_entry->cast_in_use = false;
 		cast_entry->cast_lxid = curlxid;
 		MemoryContextSwitchTo(oldcontext);
diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h
index 00756d1b9df..c58b79ecc35 100644
--- a/src/pl/plpgsql/src/plpgsql.h
+++ b/src/pl/plpgsql/src/plpgsql.h
@@ -1077,7 +1077,7 @@ typedef struct PLpgSQL_execstate
 
 	/* lookup table to use for executing type casts */
 	HTAB	   *cast_hash;
-	MemoryContext cast_hash_context;
+	MemoryContext cast_hash_context;	/* not used; now always NULL */
 
 	/* memory context for statement-lifespan temporary values */
 	MemoryContext stmt_mcontext;	/* current stmt context, or NULL if none */

From 752f692e74aa2a9fd8a11f00476dc59af6be9e9d Mon Sep 17 00:00:00 2001
From: Daniel Gustafsson <dgustafsson@postgresql.org>
Date: Tue, 25 Apr 2023 13:54:10 +0200
Subject: [PATCH 68/78] Fix vacuum_cost_delay check for balance calculation.

Commit 1021bd6a89 excluded autovacuum workers from cost-limit balance
calculations when per-relation options were set.  The code checks for
limit and cost_delay being greater than zero, but since cost_delay can
be set to -1 the test needs to check for greater than or zero.

Backpatch to all supported branches since 1021bd6a89 was backpatched
all the way at the time.

Author: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Daniel Gustafsson <daniel@yesql.se>
Discussion: https://postgr.es/m/CAD21AoBS7o6Ljt_vfqPQPf67AhzKu3fR0iqk8B=vVYczMugKMQ@mail.gmail.com
Backpatch-through: v11 (all supported branches)
---
 src/backend/postmaster/autovacuum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 638494a080c..898b681f792 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -3084,7 +3084,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 		 */
 		tab->at_dobalance =
 			!(avopts && (avopts->vacuum_cost_limit > 0 ||
-						 avopts->vacuum_cost_delay > 0));
+						 avopts->vacuum_cost_delay >= 0));
 
 		/*
 		 * When we decide to do vacuum or analyze, the existing stats cannot

From 93e904fd54190c4f489b170525b350ce0b86189a Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 26 Apr 2023 07:30:47 +0900
Subject: [PATCH 69/78] Re-add tracking of wait event SLRUFlushSync

SLRUFlushSync has been accidently removed during dee663f, that has moved
the flush of the SLRU files to the checkpointer, so add it back.  The
issue has been noticed by Thomas when checking for orphaned wait
events.

Author: Thomas Munro
Reviewed-by: Bharath Rupireddy
Discussion: https://postgr.es/m/CA+hUKGK6tqm59KuF1z+h5Y8fsWcu5v8+84kduSHwRzwjB2aa_A@mail.gmail.com
---
 src/backend/access/transam/slru.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 6da20d7531d..f108f83dd7e 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -1617,7 +1617,9 @@ SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
 	if (fd < 0)
 		return -1;
 
+	pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC);
 	result = pg_fsync(fd);
+	pgstat_report_wait_end();
 	save_errno = errno;
 
 	CloseTransientFile(fd);

From 2f0cebcd33ba22b33a406271f8f4677cce45c64c Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 27 Apr 2023 11:55:06 -0400
Subject: [PATCH 70/78] In hstore_plpython, avoid crashing when return value
 isn't a mapping.

Python 3 changed the behavior of PyMapping_Check(), breaking the
test in plpython_to_hstore() that verifies whether a function result
to be transformed is acceptable.  A backwards-compatible fix is to
first verify that the object doesn't pass PySequence_Check().

Perhaps accidentally, our other uses of PyMapping_Check() already
follow uses of PySequence_Check(), so that no other bugs were
created by this change.

Per bug #17908 from Alexander Lakhin.  Back-patch to all supported
branches.

Dmitry Dolgov and Tom Lane

Discussion: https://postgr.es/m/17908-3f19a125d56a11d6@postgresql.org
---
 contrib/hstore_plpython/expected/hstore_plpython.out | 11 +++++++++++
 contrib/hstore_plpython/hstore_plpython.c            |  8 +++++++-
 contrib/hstore_plpython/sql/hstore_plpython.sql      | 11 +++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/contrib/hstore_plpython/expected/hstore_plpython.out b/contrib/hstore_plpython/expected/hstore_plpython.out
index ecf1dd61bc1..57d83fa2db5 100644
--- a/contrib/hstore_plpython/expected/hstore_plpython.out
+++ b/contrib/hstore_plpython/expected/hstore_plpython.out
@@ -32,6 +32,17 @@ INFO:  [('aa', 'bb'), ('cc', None)]
       2
 (1 row)
 
+-- test that a non-mapping result is correctly rejected
+CREATE FUNCTION test1bad() RETURNS hstore
+LANGUAGE plpythonu
+TRANSFORM FOR TYPE hstore
+AS $$
+return "foo"
+$$;
+SELECT test1bad();
+ERROR:  not a Python mapping
+CONTEXT:  while creating return value
+PL/Python function "test1bad"
 -- test hstore[] -> python
 CREATE FUNCTION test1arr(val hstore[]) RETURNS int
 LANGUAGE plpythonu
diff --git a/contrib/hstore_plpython/hstore_plpython.c b/contrib/hstore_plpython/hstore_plpython.c
index 39bad558023..372041dd253 100644
--- a/contrib/hstore_plpython/hstore_plpython.c
+++ b/contrib/hstore_plpython/hstore_plpython.c
@@ -133,7 +133,13 @@ plpython_to_hstore(PG_FUNCTION_ARGS)
 	HStore	   *volatile out;
 
 	dict = (PyObject *) PG_GETARG_POINTER(0);
-	if (!PyMapping_Check(dict))
+
+	/*
+	 * As of Python 3, PyMapping_Check() is unreliable unless one first checks
+	 * that the object isn't a sequence.  (Cleaner solutions exist, but not
+	 * before Python 3.10, which we're not prepared to require yet.)
+	 */
+	if (PySequence_Check(dict) || !PyMapping_Check(dict))
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("not a Python mapping")));
diff --git a/contrib/hstore_plpython/sql/hstore_plpython.sql b/contrib/hstore_plpython/sql/hstore_plpython.sql
index b6d98b7dd53..1aa4416512a 100644
--- a/contrib/hstore_plpython/sql/hstore_plpython.sql
+++ b/contrib/hstore_plpython/sql/hstore_plpython.sql
@@ -27,6 +27,17 @@ $$;
 SELECT test1n('aa=>bb, cc=>NULL'::hstore);
 
 
+-- test that a non-mapping result is correctly rejected
+CREATE FUNCTION test1bad() RETURNS hstore
+LANGUAGE plpythonu
+TRANSFORM FOR TYPE hstore
+AS $$
+return "foo"
+$$;
+
+SELECT test1bad();
+
+
 -- test hstore[] -> python
 CREATE FUNCTION test1arr(val hstore[]) RETURNS int
 LANGUAGE plpythonu

From 4e4cb975a524f22039be8652d90d5cd66b86c356 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Fri, 28 Apr 2023 19:29:38 +0900
Subject: [PATCH 71/78] Fix crashes with CREATE SCHEMA AUTHORIZATION and schema
 elements

CREATE SCHEMA AUTHORIZATION with appended schema elements can lead to
crashes when comparing the schema name of the query with the schemas
used in the qualification of some clauses in the elements' queries.

The origin of the problem is that the transformation routine for the
elements listed in a CREATE SCHEMA query uses as new, expected, schema
name the one listed in CreateSchemaStmt itself.  However, depending on
the query, CreateSchemaStmt.schemaname may be NULL, being computed
instead from the role specification of the query given by the
AUTHORIZATION clause, that could be either:
- A user name string, with the new schema name being set to the same
value as the role given.
- Guessed from CURRENT_ROLE, SESSION_ROLE or CURRENT_ROLE, with a new
schema name computed from the security context where CREATE SCHEMA is
running.

Regression tests are added for CREATE SCHEMA with some appended elements
(some of them with schema qualifications), covering also some role
specification patterns.

While on it, this simplifies the context structure used during the
transformation of the elements listed in a CREATE SCHEMA query by
removing the fields for the role specification and the role type.  They
were not used, and for the role specification this could be confusing as
the schema name may by extracted from that at the beginning of
CreateSchemaCommand().

This issue exists for a long time, so backpatch down to all the versions
supported.

Reported-by: Song Hongyu
Author: Michael Paquier
Reviewed-by: Richard Guo
Discussion: https://postgr.es/m/17909-f65c12dfc5f0451d@postgresql.org
Backpatch-through: 11
---
 src/backend/commands/schemacmds.c           |  3 +-
 src/backend/parser/parse_utilcmd.c          | 38 ++++----
 src/include/parser/parse_utilcmd.h          |  3 +-
 src/test/regress/expected/create_schema.out | 98 +++++++++++++++++++++
 src/test/regress/parallel_schedule          |  2 +-
 src/test/regress/sql/create_schema.sql      | 70 +++++++++++++++
 6 files changed, 192 insertions(+), 22 deletions(-)
 create mode 100644 src/test/regress/expected/create_schema.out
 create mode 100644 src/test/regress/sql/create_schema.sql

diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c
index 03f96bb6499..b06b1c55cc6 100644
--- a/src/backend/commands/schemacmds.c
+++ b/src/backend/commands/schemacmds.c
@@ -296,7 +296,8 @@ CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString,
 	 * we cannot, in general, run parse analysis on one statement until we
 	 * have actually executed the prior ones.
 	 */
-	parsetree_list = transformCreateSchemaStmt(stmt);
+	parsetree_list = transformCreateSchemaStmtElements(stmt->schemaElts,
+													   schemaName);
 
 	/*
 	 * Execute each command contained in the CREATE SCHEMA.  Since the grammar
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 358ac56da30..e7cdb8d7f17 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -111,12 +111,10 @@ typedef struct
 	MemoryContext tempCtx;
 } CreateStmtContext;
 
-/* State shared by transformCreateSchemaStmt and its subroutines */
+/* State shared by transformCreateSchemaStmtElements and its subroutines */
 typedef struct
 {
-	const char *stmtType;		/* "CREATE SCHEMA" or "ALTER SCHEMA" */
-	char	   *schemaname;		/* name of schema */
-	RoleSpec   *authrole;		/* owner of schema */
+	const char *schemaname;		/* name of schema */
 	List	   *sequences;		/* CREATE SEQUENCE items */
 	List	   *tables;			/* CREATE TABLE items */
 	List	   *views;			/* CREATE VIEW items */
@@ -151,7 +149,7 @@ static void transformCheckConstraints(CreateStmtContext *cxt,
 static void transformConstraintAttrs(CreateStmtContext *cxt,
 									 List *constraintList);
 static void transformColumnType(CreateStmtContext *cxt, ColumnDef *column);
-static void setSchemaName(char *context_schema, char **stmt_schema_name);
+static void setSchemaName(const char *context_schema, char **stmt_schema_name);
 static void transformPartitionCmd(CreateStmtContext *cxt, PartitionCmd *cmd);
 static List *transformPartitionRangeBounds(ParseState *pstate, List *blist,
 										   Relation parent, PartitionKey key);
@@ -4984,14 +4982,18 @@ transformColumnType(CreateStmtContext *cxt, ColumnDef *column)
 
 
 /*
- * transformCreateSchemaStmt -
- *	  analyzes the CREATE SCHEMA statement
+ * transformCreateSchemaStmtElements -
+ *	  analyzes the elements of a CREATE SCHEMA statement
  *
- * Split the schema element list into individual commands and place
- * them in the result list in an order such that there are no forward
- * references (e.g. GRANT to a table created later in the list). Note
- * that the logic we use for determining forward references is
- * presently quite incomplete.
+ * Split the schema element list from a CREATE SCHEMA statement into
+ * individual commands and place them in the result list in an order
+ * such that there are no forward references (e.g. GRANT to a table
+ * created later in the list). Note that the logic we use for determining
+ * forward references is presently quite incomplete.
+ *
+ * "schemaName" is the name of the schema that will be used for the creation
+ * of the objects listed, that may be compiled from the schema name defined
+ * in the statement or a role specification.
  *
  * SQL also allows constraints to make forward references, so thumb through
  * the table columns and move forward references to a posterior alter-table
@@ -5007,15 +5009,13 @@ transformColumnType(CreateStmtContext *cxt, ColumnDef *column)
  * extent.
  */
 List *
-transformCreateSchemaStmt(CreateSchemaStmt *stmt)
+transformCreateSchemaStmtElements(List *schemaElts, const char *schemaName)
 {
 	CreateSchemaStmtContext cxt;
 	List	   *result;
 	ListCell   *elements;
 
-	cxt.stmtType = "CREATE SCHEMA";
-	cxt.schemaname = stmt->schemaname;
-	cxt.authrole = (RoleSpec *) stmt->authrole;
+	cxt.schemaname = schemaName;
 	cxt.sequences = NIL;
 	cxt.tables = NIL;
 	cxt.views = NIL;
@@ -5027,7 +5027,7 @@ transformCreateSchemaStmt(CreateSchemaStmt *stmt)
 	 * Run through each schema element in the schema element list. Separate
 	 * statements by type, and do preliminary analysis.
 	 */
-	foreach(elements, stmt->schemaElts)
+	foreach(elements, schemaElts)
 	{
 		Node	   *element = lfirst(elements);
 
@@ -5112,10 +5112,10 @@ transformCreateSchemaStmt(CreateSchemaStmt *stmt)
  *		Set or check schema name in an element of a CREATE SCHEMA command
  */
 static void
-setSchemaName(char *context_schema, char **stmt_schema_name)
+setSchemaName(const char *context_schema, char **stmt_schema_name)
 {
 	if (*stmt_schema_name == NULL)
-		*stmt_schema_name = context_schema;
+		*stmt_schema_name = unconstify(char *, context_schema);
 	else if (strcmp(context_schema, *stmt_schema_name) != 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_SCHEMA_DEFINITION),
diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h
index 9fed9ba6d87..f1f51060038 100644
--- a/src/include/parser/parse_utilcmd.h
+++ b/src/include/parser/parse_utilcmd.h
@@ -33,7 +33,8 @@ extern CreateStatsStmt *transformStatsStmt(Oid relid, CreateStatsStmt *stmt,
 										   const char *queryString);
 extern void transformRuleStmt(RuleStmt *stmt, const char *queryString,
 							  List **actions, Node **whereClause);
-extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
+extern List *transformCreateSchemaStmtElements(List *schemaElts,
+											   const char *schemaName);
 extern PartitionBoundSpec *transformPartitionBound(ParseState *pstate, Relation parent,
 												   PartitionKey key, PartitionBoundSpec *spec);
 extern List *expandTableLikeClause(RangeVar *heapRel,
diff --git a/src/test/regress/expected/create_schema.out b/src/test/regress/expected/create_schema.out
new file mode 100644
index 00000000000..93302a07efc
--- /dev/null
+++ b/src/test/regress/expected/create_schema.out
@@ -0,0 +1,98 @@
+--
+-- CREATE_SCHEMA
+--
+-- Schema creation with elements.
+CREATE ROLE regress_create_schema_role SUPERUSER;
+-- Cases where schema creation fails as objects are qualified with a schema
+-- that does not match with what's expected.
+-- This checks all the object types that include schema qualifications.
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE SEQUENCE schema_not_existing.seq;
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE TABLE schema_not_existing.tab (id int);
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE VIEW schema_not_existing.view AS SELECT 1;
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE INDEX ON schema_not_existing.tab (id);
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE TRIGGER schema_trig BEFORE INSERT ON schema_not_existing.tab
+  EXECUTE FUNCTION schema_trig.no_func();
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+-- Again, with a role specification and no schema names.
+SET ROLE regress_create_schema_role;
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE SEQUENCE schema_not_existing.seq;
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE TABLE schema_not_existing.tab (id int);
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE VIEW schema_not_existing.view AS SELECT 1;
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE INDEX ON schema_not_existing.tab (id);
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE TRIGGER schema_trig BEFORE INSERT ON schema_not_existing.tab
+  EXECUTE FUNCTION schema_trig.no_func();
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+-- Again, with a schema name and a role specification.
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE SEQUENCE schema_not_existing.seq;
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_schema_1)
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE TABLE schema_not_existing.tab (id int);
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_schema_1)
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE VIEW schema_not_existing.view AS SELECT 1;
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_schema_1)
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE INDEX ON schema_not_existing.tab (id);
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_schema_1)
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE TRIGGER schema_trig BEFORE INSERT ON schema_not_existing.tab
+  EXECUTE FUNCTION schema_trig.no_func();
+ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_schema_1)
+RESET ROLE;
+-- Cases where the schema creation succeeds.
+-- The schema created matches the role name.
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE TABLE regress_create_schema_role.tab (id int);
+\d regress_create_schema_role.tab
+      Table "regress_create_schema_role.tab"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ id     | integer |           |          | 
+
+DROP SCHEMA regress_create_schema_role CASCADE;
+NOTICE:  drop cascades to table regress_create_schema_role.tab
+-- Again, with a different role specification and no schema names.
+SET ROLE regress_create_schema_role;
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE TABLE regress_create_schema_role.tab (id int);
+\d regress_create_schema_role.tab
+      Table "regress_create_schema_role.tab"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ id     | integer |           |          | 
+
+DROP SCHEMA regress_create_schema_role CASCADE;
+NOTICE:  drop cascades to table tab
+-- Again, with a schema name and a role specification.
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE TABLE regress_schema_1.tab (id int);
+\d regress_schema_1.tab
+           Table "regress_schema_1.tab"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ id     | integer |           |          | 
+
+DROP SCHEMA regress_schema_1 CASCADE;
+NOTICE:  drop cascades to table regress_schema_1.tab
+RESET ROLE;
+-- Clean up
+DROP ROLE regress_create_schema_role;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index b2ed818f677..5adb7d9df01 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -58,7 +58,7 @@ test: copy copyselect copydml insert insert_conflict
 # ----------
 # More groups of parallel tests
 # ----------
-test: create_misc create_operator create_procedure
+test: create_misc create_operator create_procedure create_schema
 # These depend on create_misc and create_operator
 test: create_index create_index_spgist create_view index_including index_including_gist
 # Depends on things setup for create_index
diff --git a/src/test/regress/sql/create_schema.sql b/src/test/regress/sql/create_schema.sql
new file mode 100644
index 00000000000..1b7064247a1
--- /dev/null
+++ b/src/test/regress/sql/create_schema.sql
@@ -0,0 +1,70 @@
+--
+-- CREATE_SCHEMA
+--
+
+-- Schema creation with elements.
+
+CREATE ROLE regress_create_schema_role SUPERUSER;
+
+-- Cases where schema creation fails as objects are qualified with a schema
+-- that does not match with what's expected.
+-- This checks all the object types that include schema qualifications.
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE SEQUENCE schema_not_existing.seq;
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE TABLE schema_not_existing.tab (id int);
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE VIEW schema_not_existing.view AS SELECT 1;
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE INDEX ON schema_not_existing.tab (id);
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE TRIGGER schema_trig BEFORE INSERT ON schema_not_existing.tab
+  EXECUTE FUNCTION schema_trig.no_func();
+-- Again, with a role specification and no schema names.
+SET ROLE regress_create_schema_role;
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE SEQUENCE schema_not_existing.seq;
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE TABLE schema_not_existing.tab (id int);
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE VIEW schema_not_existing.view AS SELECT 1;
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE INDEX ON schema_not_existing.tab (id);
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE TRIGGER schema_trig BEFORE INSERT ON schema_not_existing.tab
+  EXECUTE FUNCTION schema_trig.no_func();
+-- Again, with a schema name and a role specification.
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE SEQUENCE schema_not_existing.seq;
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE TABLE schema_not_existing.tab (id int);
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE VIEW schema_not_existing.view AS SELECT 1;
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE INDEX ON schema_not_existing.tab (id);
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE TRIGGER schema_trig BEFORE INSERT ON schema_not_existing.tab
+  EXECUTE FUNCTION schema_trig.no_func();
+RESET ROLE;
+
+-- Cases where the schema creation succeeds.
+-- The schema created matches the role name.
+CREATE SCHEMA AUTHORIZATION regress_create_schema_role
+  CREATE TABLE regress_create_schema_role.tab (id int);
+\d regress_create_schema_role.tab
+DROP SCHEMA regress_create_schema_role CASCADE;
+-- Again, with a different role specification and no schema names.
+SET ROLE regress_create_schema_role;
+CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
+  CREATE TABLE regress_create_schema_role.tab (id int);
+\d regress_create_schema_role.tab
+DROP SCHEMA regress_create_schema_role CASCADE;
+-- Again, with a schema name and a role specification.
+CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
+  CREATE TABLE regress_schema_1.tab (id int);
+\d regress_schema_1.tab
+DROP SCHEMA regress_schema_1 CASCADE;
+RESET ROLE;
+
+-- Clean up
+DROP ROLE regress_create_schema_role;

From 0d94316471d32f28dc0bf34b958df418367bc2e9 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 29 Apr 2023 13:06:44 -0400
Subject: [PATCH 72/78] Tighten array dimensionality checks in Perl -> SQL
 array conversion.

plperl_array_to_datum() wasn't sufficiently careful about checking
that nested lists represent a rectangular array structure; it would
accept inputs such as "[1, []]".  This is a bit related to the
PL/Python bug fixed in commit 81eaaf65e, but it doesn't seem to
provide any direct route to a memory stomp.  Instead the likely
failure mode is for makeMdArrayResult to be passed fewer Datums than
the claimed array dimensionality requires, possibly leading to a wild
pointer dereference and SIGSEGV.

Per report from Alexander Lakhin.  It's been broken for a long
time, so back-patch to all supported branches.

Discussion: https://postgr.es/m/5ebae5e4-d401-fadf-8585-ac3eaf53219c@gmail.com
---
 src/pl/plperl/expected/plperl_array.out | 43 +++++++++++++++++
 src/pl/plperl/plperl.c                  | 62 ++++++++++++++++---------
 src/pl/plperl/sql/plperl_array.sql      | 37 +++++++++++++++
 3 files changed, 119 insertions(+), 23 deletions(-)

diff --git a/src/pl/plperl/expected/plperl_array.out b/src/pl/plperl/expected/plperl_array.out
index 6347b5211d2..bd04a062fb9 100644
--- a/src/pl/plperl/expected/plperl_array.out
+++ b/src/pl/plperl/expected/plperl_array.out
@@ -215,6 +215,49 @@ select plperl_arrays_inout_l('{{1}, {2}, {3}}');
  {{1},{2},{3}}
 (1 row)
 
+-- check output of multi-dimensional arrays
+CREATE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [['a'], ['b'], ['c']];
+$$ LANGUAGE plperl;
+select plperl_md_array_out();
+ plperl_md_array_out 
+---------------------
+ {{a},{b},{c}}
+(1 row)
+
+CREATE OR REPLACE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [[], []];
+$$ LANGUAGE plperl;
+select plperl_md_array_out();
+ plperl_md_array_out 
+---------------------
+ {}
+(1 row)
+
+CREATE OR REPLACE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [[], [1]];
+$$ LANGUAGE plperl;
+select plperl_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  PL/Perl function "plperl_md_array_out"
+CREATE OR REPLACE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [[], 1];
+$$ LANGUAGE plperl;
+select plperl_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  PL/Perl function "plperl_md_array_out"
+CREATE OR REPLACE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [1, []];
+$$ LANGUAGE plperl;
+select plperl_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  PL/Perl function "plperl_md_array_out"
+CREATE OR REPLACE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [[1], [[]]];
+$$ LANGUAGE plperl;
+select plperl_md_array_out();  -- fail
+ERROR:  multidimensional arrays must have array expressions with matching dimensions
+CONTEXT:  PL/Perl function "plperl_md_array_out"
 -- make sure setof works
 create or replace function perl_setof_array(integer[]) returns setof integer[] language plperl as $$
 	my $arr = shift;
diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c
index 3aff8e95450..c214a1daa91 100644
--- a/src/pl/plperl/plperl.c
+++ b/src/pl/plperl/plperl.c
@@ -276,9 +276,9 @@ static Datum plperl_sv_to_datum(SV *sv, Oid typid, int32 typmod,
 								bool *isnull);
 static void _sv_to_datum_finfo(Oid typid, FmgrInfo *finfo, Oid *typioparam);
 static Datum plperl_array_to_datum(SV *src, Oid typid, int32 typmod);
-static void array_to_datum_internal(AV *av, ArrayBuildState *astate,
+static void array_to_datum_internal(AV *av, ArrayBuildState **astatep,
 									int *ndims, int *dims, int cur_depth,
-									Oid arraytypid, Oid elemtypid, int32 typmod,
+									Oid elemtypid, int32 typmod,
 									FmgrInfo *finfo, Oid typioparam);
 static Datum plperl_hash_to_datum(SV *src, TupleDesc td);
 
@@ -1164,11 +1164,16 @@ get_perl_array_ref(SV *sv)
 
 /*
  * helper function for plperl_array_to_datum, recurses for multi-D arrays
+ *
+ * The ArrayBuildState is created only when we first find a scalar element;
+ * if we didn't do it like that, we'd need some other convention for knowing
+ * whether we'd already found any scalars (and thus the number of dimensions
+ * is frozen).
  */
 static void
-array_to_datum_internal(AV *av, ArrayBuildState *astate,
+array_to_datum_internal(AV *av, ArrayBuildState **astatep,
 						int *ndims, int *dims, int cur_depth,
-						Oid arraytypid, Oid elemtypid, int32 typmod,
+						Oid elemtypid, int32 typmod,
 						FmgrInfo *finfo, Oid typioparam)
 {
 	dTHX;
@@ -1188,28 +1193,34 @@ array_to_datum_internal(AV *av, ArrayBuildState *astate,
 		{
 			AV		   *nav = (AV *) SvRV(sav);
 
-			/* dimensionality checks */
-			if (cur_depth + 1 > MAXDIM)
-				ereport(ERROR,
-						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-						 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-								cur_depth + 1, MAXDIM)));
-
 			/* set size when at first element in this level, else compare */
 			if (i == 0 && *ndims == cur_depth)
 			{
+				/* array after some scalars at same level? */
+				if (*astatep != NULL)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+							 errmsg("multidimensional arrays must have array expressions with matching dimensions")));
+				/* too many dimensions? */
+				if (cur_depth + 1 > MAXDIM)
+					ereport(ERROR,
+							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+							 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+									cur_depth + 1, MAXDIM)));
+				/* OK, add a dimension */
 				dims[*ndims] = av_len(nav) + 1;
 				(*ndims)++;
 			}
-			else if (av_len(nav) + 1 != dims[cur_depth])
+			else if (cur_depth >= *ndims ||
+					 av_len(nav) + 1 != dims[cur_depth])
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 						 errmsg("multidimensional arrays must have array expressions with matching dimensions")));
 
 			/* recurse to fetch elements of this sub-array */
-			array_to_datum_internal(nav, astate,
+			array_to_datum_internal(nav, astatep,
 									ndims, dims, cur_depth + 1,
-									arraytypid, elemtypid, typmod,
+									elemtypid, typmod,
 									finfo, typioparam);
 		}
 		else
@@ -1231,7 +1242,13 @@ array_to_datum_internal(AV *av, ArrayBuildState *astate,
 									 typioparam,
 									 &isnull);
 
-			(void) accumArrayResult(astate, dat, isnull,
+			/* Create ArrayBuildState if we didn't already */
+			if (*astatep == NULL)
+				*astatep = initArrayResult(elemtypid,
+										   CurrentMemoryContext, true);
+
+			/* ... and save the element value in it */
+			(void) accumArrayResult(*astatep, dat, isnull,
 									elemtypid, CurrentMemoryContext);
 		}
 	}
@@ -1244,7 +1261,8 @@ static Datum
 plperl_array_to_datum(SV *src, Oid typid, int32 typmod)
 {
 	dTHX;
-	ArrayBuildState *astate;
+	AV		   *nav = (AV *) SvRV(src);
+	ArrayBuildState *astate = NULL;
 	Oid			elemtypid;
 	FmgrInfo	finfo;
 	Oid			typioparam;
@@ -1260,21 +1278,19 @@ plperl_array_to_datum(SV *src, Oid typid, int32 typmod)
 				 errmsg("cannot convert Perl array to non-array type %s",
 						format_type_be(typid))));
 
-	astate = initArrayResult(elemtypid, CurrentMemoryContext, true);
-
 	_sv_to_datum_finfo(elemtypid, &finfo, &typioparam);
 
 	memset(dims, 0, sizeof(dims));
-	dims[0] = av_len((AV *) SvRV(src)) + 1;
+	dims[0] = av_len(nav) + 1;
 
-	array_to_datum_internal((AV *) SvRV(src), astate,
+	array_to_datum_internal(nav, &astate,
 							&ndims, dims, 1,
-							typid, elemtypid, typmod,
+							elemtypid, typmod,
 							&finfo, typioparam);
 
 	/* ensure we get zero-D array for no inputs, as per PG convention */
-	if (dims[0] <= 0)
-		ndims = 0;
+	if (astate == NULL)
+		return PointerGetDatum(construct_empty_array(elemtypid));
 
 	for (i = 0; i < ndims; i++)
 		lbs[i] = 1;
diff --git a/src/pl/plperl/sql/plperl_array.sql b/src/pl/plperl/sql/plperl_array.sql
index 66179294ce8..ca63b5db625 100644
--- a/src/pl/plperl/sql/plperl_array.sql
+++ b/src/pl/plperl/sql/plperl_array.sql
@@ -159,6 +159,43 @@ $$ LANGUAGE plperl;
 
 select plperl_arrays_inout_l('{{1}, {2}, {3}}');
 
+-- check output of multi-dimensional arrays
+CREATE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [['a'], ['b'], ['c']];
+$$ LANGUAGE plperl;
+
+select plperl_md_array_out();
+
+CREATE OR REPLACE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [[], []];
+$$ LANGUAGE plperl;
+
+select plperl_md_array_out();
+
+CREATE OR REPLACE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [[], [1]];
+$$ LANGUAGE plperl;
+
+select plperl_md_array_out();  -- fail
+
+CREATE OR REPLACE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [[], 1];
+$$ LANGUAGE plperl;
+
+select plperl_md_array_out();  -- fail
+
+CREATE OR REPLACE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [1, []];
+$$ LANGUAGE plperl;
+
+select plperl_md_array_out();  -- fail
+
+CREATE OR REPLACE FUNCTION plperl_md_array_out() RETURNS text[] AS $$
+	return [[1], [[]]];
+$$ LANGUAGE plperl;
+
+select plperl_md_array_out();  -- fail
+
 -- make sure setof works
 create or replace function perl_setof_array(integer[]) returns setof integer[] language plperl as $$
 	my $arr = shift;

From 1bf6fcf7f08dd52a9658047298748a94958269b6 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Tue, 2 May 2023 11:41:00 +0900
Subject: [PATCH 73/78] doc: Fix typo in pg_amcheck for term "schema"

Author: Alexander Lakhin
Discussion: https://postgr.es/m/e8c38840-596a-83d6-bd8d-cebc51111572@gmail.com
Backpatch-through: 14
---
 doc/src/sgml/ref/pg_amcheck.sgml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/src/sgml/ref/pg_amcheck.sgml b/doc/src/sgml/ref/pg_amcheck.sgml
index 43ee73ab93c..1abaef93a76 100644
--- a/doc/src/sgml/ref/pg_amcheck.sgml
+++ b/doc/src/sgml/ref/pg_amcheck.sgml
@@ -159,7 +159,7 @@ PostgreSQL documentation
        Patterns may be unqualified, e.g. <literal>myrel*</literal>, or they
        may be schema-qualified, e.g. <literal>myschema*.myrel*</literal> or
        database-qualified and schema-qualified, e.g.
-       <literal>mydb*.myscheam*.myrel*</literal>. A database-qualified
+       <literal>mydb*.myschema*.myrel*</literal>. A database-qualified
        pattern will add matching databases to the list of databases to be
        checked.
       </para>

From b8476c0c272cd5e1045de2680a7e7844d9fe271e Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 2 May 2023 17:55:01 -0400
Subject: [PATCH 74/78] Doc: clarify behavior of row-limit arguments in the
 PLs' SPI wrappers.

plperl, plpython, and pltcl all provide query-execution functions
that are thin wrappers around SPI_execute() or its variants.
The SPI functions document their row-count limit arguments clearly,
as "maximum number of rows to return, or 0 for no limit".  However
the PLs' documentation failed to explain this special behavior of
zero, so that a reader might well assume it means "fetch zero
rows".  Improve that.

Daniel Gustafsson and Tom Lane, per report from Kieran McCusker

Discussion: https://postgr.es/m/CAGgUQ6H6qYScctOhktQ9HLFDDoafBKHyUgJbZ6q_dOApnzNTXg@mail.gmail.com
---
 doc/src/sgml/plperl.sgml   | 21 ++++++++++++++++-----
 doc/src/sgml/plpython.sgml | 13 +++++++++++--
 doc/src/sgml/pltcl.sgml    |  8 +++++---
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/doc/src/sgml/plperl.sgml b/doc/src/sgml/plperl.sgml
index ee02c01f106..081a146a07b 100644
--- a/doc/src/sgml/plperl.sgml
+++ b/doc/src/sgml/plperl.sgml
@@ -441,7 +441,7 @@ use strict;
    <variablelist>
     <varlistentry>
      <term>
-      <literal><function>spi_exec_query</function>(<replaceable>query</replaceable> [, <replaceable>max-rows</replaceable>])</literal>
+      <literal><function>spi_exec_query</function>(<replaceable>query</replaceable> [, <replaceable>limit</replaceable>])</literal>
       <indexterm>
        <primary>spi_exec_query</primary>
        <secondary>in PL/Perl</secondary>
@@ -449,9 +449,17 @@ use strict;
      </term>
      <listitem>
       <para>
-       <literal>spi_exec_query</literal> executes an SQL command and
-returns the entire row set as a reference to an array of hash
-references.  <emphasis>You should only use this command when you know
+       <function>spi_exec_query</function> executes an SQL command and
+returns the entire row set as a reference to an array of hash references.
+If <replaceable>limit</replaceable> is specified and is greater than zero,
+then <function>spi_exec_query</function> retrieves at
+most <replaceable>limit</replaceable> rows, much as if the query included
+a <literal>LIMIT</literal> clause.  Omitting <replaceable>limit</replaceable>
+or specifying it as zero results in no row limit.
+      </para>
+
+      <para>
+<emphasis>You should only use this command when you know
 that the result set will be relatively small.</emphasis>  Here is an
 example of a query (<command>SELECT</command> command) with the
 optional maximum number of rows:
@@ -643,7 +651,10 @@ $plan = spi_prepare('SELECT * FROM test WHERE id &gt; $1 AND name = $2',
     by <literal>spi_exec_query</literal>, or in <literal>spi_query_prepared</literal> which returns a cursor
     exactly as <literal>spi_query</literal> does, which can be later passed to <literal>spi_fetchrow</literal>.
     The optional second parameter to <literal>spi_exec_prepared</literal> is a hash reference of attributes;
-    the only attribute currently supported is <literal>limit</literal>, which sets the maximum number of rows returned by a query.
+    the only attribute currently supported is <literal>limit</literal>, which
+    sets the maximum number of rows returned from the query.
+    Omitting <literal>limit</literal> or specifying it as zero results in no
+    row limit.
     </para>
 
     <para>
diff --git a/doc/src/sgml/plpython.sgml b/doc/src/sgml/plpython.sgml
index b67f8f4aaed..06ab7dd0708 100644
--- a/doc/src/sgml/plpython.sgml
+++ b/doc/src/sgml/plpython.sgml
@@ -937,7 +937,7 @@ $$ LANGUAGE plpythonu;
 
   <variablelist>
    <varlistentry>
-    <term><literal>plpy.<function>execute</function>(<replaceable>query</replaceable> [, <replaceable>max-rows</replaceable>])</literal></term>
+    <term><literal>plpy.<function>execute</function>(<replaceable>query</replaceable> [, <replaceable>limit</replaceable>])</literal></term>
     <listitem>
      <para>
       Calling <function>plpy.execute</function> with a query string and an
@@ -945,6 +945,15 @@ $$ LANGUAGE plpythonu;
       be returned in a result object.
      </para>
 
+     <para>
+      If <replaceable>limit</replaceable> is specified and is greater than
+      zero, then <function>plpy.execute</function> retrieves at
+      most <replaceable>limit</replaceable> rows, much as if the query
+      included a <literal>LIMIT</literal>
+      clause.  Omitting <replaceable>limit</replaceable> or specifying it as
+      zero results in no row limit.
+     </para>
+
      <para>
       The result object emulates a list or dictionary object.  The result
       object can be accessed by row number and column name.  For example:
@@ -1035,7 +1044,7 @@ foo = rv[i]["my_column"]
 
    <varlistentry>
     <term><literal>plpy.<function>prepare</function>(<replaceable>query</replaceable> [, <replaceable>argtypes</replaceable>])</literal></term>
-    <term><literal>plpy.<function>execute</function>(<replaceable>plan</replaceable> [, <replaceable>arguments</replaceable> [, <replaceable>max-rows</replaceable>]])</literal></term>
+    <term><literal>plpy.<function>execute</function>(<replaceable>plan</replaceable> [, <replaceable>arguments</replaceable> [, <replaceable>limit</replaceable>]])</literal></term>
     <listitem>
      <para>
       <indexterm><primary>preparing a query</primary><secondary>in PL/Python</secondary></indexterm>
diff --git a/doc/src/sgml/pltcl.sgml b/doc/src/sgml/pltcl.sgml
index 9839e375ad3..a4826eda8b1 100644
--- a/doc/src/sgml/pltcl.sgml
+++ b/doc/src/sgml/pltcl.sgml
@@ -341,9 +341,11 @@ $$ LANGUAGE pltcl;
        </para>
        <para>
         The optional <literal>-count</literal> value tells
-        <function>spi_exec</function> the maximum number of rows
-        to process in the command.  The effect of this is comparable to
-        setting up a query as a cursor and then saying <literal>FETCH <replaceable>n</replaceable></literal>.
+        <function>spi_exec</function> to stop
+        once <replaceable>n</replaceable> rows have been retrieved,
+        much as if the query included a <literal>LIMIT</literal> clause.
+        If <replaceable>n</replaceable> is zero, the query is run to
+        completion, the same as when <literal>-count</literal> is omitted.
        </para>
        <para>
         If the command is a <command>SELECT</command> statement, the values of the

From f8cdf6d77c29102618126fb359c8ffe54f391e55 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 4 May 2023 11:48:23 -0400
Subject: [PATCH 75/78] In array_position()/array_positions(), beware of empty
 input array.

These functions incautiously fetched the array's first lower bound
even when the array is zero-dimensional, thus fetching the word
after the allocated array space.  While almost always harmless,
with very bad luck this could result in SIGSEGV.  Fix by adding
an early exit for empty input.

Per bug #17920 from Alexander Lakhin.

Discussion: https://postgr.es/m/17920-f7c228c627b6d02e%40postgresql.org
---
 src/backend/utils/adt/array_userfuncs.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index dc0516c7dbc..bf777b5913b 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1514,7 +1514,6 @@ array_position_common(FunctionCallInfo fcinfo)
 		PG_RETURN_NULL();
 
 	array = PG_GETARG_ARRAYTYPE_P(0);
-	element_type = ARR_ELEMTYPE(array);
 
 	/*
 	 * We refuse to search for elements in multi-dimensional arrays, since we
@@ -1525,6 +1524,10 @@ array_position_common(FunctionCallInfo fcinfo)
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("searching for elements in multidimensional arrays is not supported")));
 
+	/* Searching in an empty array is well-defined, though: it always fails */
+	if (ARR_NDIM(array) < 1)
+		PG_RETURN_NULL();
+
 	if (PG_ARGISNULL(1))
 	{
 		/* fast return when the array doesn't have nulls */
@@ -1539,6 +1542,7 @@ array_position_common(FunctionCallInfo fcinfo)
 		null_search = false;
 	}
 
+	element_type = ARR_ELEMTYPE(array);
 	position = (ARR_LBOUND(array))[0] - 1;
 
 	/* figure out where to start */
@@ -1664,9 +1668,6 @@ array_positions(PG_FUNCTION_ARGS)
 		PG_RETURN_NULL();
 
 	array = PG_GETARG_ARRAYTYPE_P(0);
-	element_type = ARR_ELEMTYPE(array);
-
-	position = (ARR_LBOUND(array))[0] - 1;
 
 	/*
 	 * We refuse to search for elements in multi-dimensional arrays, since we
@@ -1679,6 +1680,10 @@ array_positions(PG_FUNCTION_ARGS)
 
 	astate = initArrayResult(INT4OID, CurrentMemoryContext, false);
 
+	/* Searching in an empty array is well-defined, though: it always fails */
+	if (ARR_NDIM(array) < 1)
+		PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+
 	if (PG_ARGISNULL(1))
 	{
 		/* fast return when the array doesn't have nulls */
@@ -1693,6 +1698,9 @@ array_positions(PG_FUNCTION_ARGS)
 		null_search = false;
 	}
 
+	element_type = ARR_ELEMTYPE(array);
+	position = (ARR_LBOUND(array))[0] - 1;
+
 	/*
 	 * We arrange to look up type info for array_create_iterator only once per
 	 * series of calls, assuming the element type doesn't change underneath

From 203bf50875ad91983e6dfc3fbdac85b82fdb375a Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Fri, 5 May 2023 06:29:49 +0200
Subject: [PATCH 76/78] Fix prove_installcheck when used with PGXS

Commit 153e215677 added the portlock directory.  This is created in
$ENV{top_builddir} if it is set.  Under PGXS, top_builddir points into
the installation directory, which is not necessarily writable and in
any case inappropriate to use by a test suite.  The cause of the
problem is that the prove_installcheck target in Makefile.global
exports top_builddir, which isn't useful (since no other Perl code
actually reads it) and breaks this use case.  The reason this code is
there is probably that is has been dragged around with various other
changes, in particular a0fc813266, but without a real purpose of its
own.  By just removing the exporting of top_builddir in
prove_installcheck, the portlock directory then ends up under
tmp_check in the build directory, which is more suitable.

Reviewed-by: Andrew Dunstan <andrew@dunslane.net>
Discussion: https://www.postgresql.org/message-id/78d1cfa6-0065-865d-584b-cde6d8c18aff@enterprisedb.com
---
 src/Makefile.global.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 062ec75b039..77b58e7aa76 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -520,7 +520,7 @@ rm -rf '$(CURDIR)'/tmp_check
 $(MKDIR_P) '$(CURDIR)'/tmp_check
 cd $(srcdir) && \
    TESTDIR='$(CURDIR)' PATH="$(bindir):$(CURDIR):$$PATH" \
-   PGPORT='6$(DEF_PGPORT)' top_builddir='$(top_builddir)' \
+   PGPORT='6$(DEF_PGPORT)' \
    PG_REGRESS='$(top_builddir)/src/test/regress/pg_regress' \
    $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
 endef

From 0fe678efc4785d8008aa109fbac4150f4cb5c562 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Fri, 5 May 2023 21:25:56 +0900
Subject: [PATCH 77/78] Fix typo with wait event for SLRU buffer of commit
 timestamps

This wait event was documented as "CommitTsBuffer" since its
introduction, but the code named it "CommitTSBuffer".  This commit fixes
the code to follow the term documented, which is also more consistent
with the naming of the other wait events used for commit timestamps.

Introduced by 5da1493.

Author: Alexander Lakhin
Discussion: https://postgr.es/m/e8c38840-596a-83d6-bd8d-cebc51111572@gmail.com
Backpatch-through: 13
---
 src/backend/storage/lmgr/lwlock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 06d97fd38db..f0cf60be6b5 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -131,7 +131,7 @@ static const char *const BuiltinTrancheNames[] = {
 	/* LWTRANCHE_XACT_BUFFER: */
 	"XactBuffer",
 	/* LWTRANCHE_COMMITTS_BUFFER: */
-	"CommitTSBuffer",
+	"CommitTsBuffer",
 	/* LWTRANCHE_SUBTRANS_BUFFER: */
 	"SubtransBuffer",
 	/* LWTRANCHE_MULTIXACTOFFSET_BUFFER: */

From f9fed4432c4ac6d9ada419178931f7ba11732696 Mon Sep 17 00:00:00 2001
From: reshke <reshke@double.cloud>
Date: Sat, 6 Jun 2026 13:55:44 +0000
Subject: [PATCH 78/78] Adapt regreesion test suite for changes

---
 .../regress/expected/brin_multi_optimizer.out   |  6 ++++++
 .../regress/expected/brin_multi_optimizer_1.out |  6 ++++++
 src/test/regress/expected/create_schema.out     |  6 +++---
 src/test/regress/expected/tsearch_optimizer.out | 17 +++++++++++++++++
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/src/test/regress/expected/brin_multi_optimizer.out b/src/test/regress/expected/brin_multi_optimizer.out
index 39100bfeb80..a31f69d9a16 100644
--- a/src/test/regress/expected/brin_multi_optimizer.out
+++ b/src/test/regress/expected/brin_multi_optimizer.out
@@ -502,6 +502,12 @@ VACUUM brintest_multi;  -- force a summarization cycle in brinidx
 insert into public.brintest_multi (float4col) values (real 'nan');
 insert into public.brintest_multi (float8col) values (real 'nan');
 UPDATE brintest_multi SET int8col = int8col * int4col;
+-- Test handling of inet netmasks with inet_minmax_multi_ops
+CREATE TABLE brin_test_inet (a inet);
+CREATE INDEX ON brin_test_inet USING brin (a inet_minmax_multi_ops);
+INSERT INTO brin_test_inet VALUES ('127.0.0.1/0');
+INSERT INTO brin_test_inet VALUES ('0.0.0.0/12');
+DROP TABLE brin_test_inet;
 -- Tests for brin_summarize_new_values
 SELECT brin_summarize_new_values('brintest_multi'); -- error, not an index
 ERROR:  "brintest_multi" is not an index
diff --git a/src/test/regress/expected/brin_multi_optimizer_1.out b/src/test/regress/expected/brin_multi_optimizer_1.out
index d995e7888d2..15e850fbca7 100644
--- a/src/test/regress/expected/brin_multi_optimizer_1.out
+++ b/src/test/regress/expected/brin_multi_optimizer_1.out
@@ -502,6 +502,12 @@ VACUUM brintest_multi;  -- force a summarization cycle in brinidx
 insert into public.brintest_multi (float4col) values (real 'nan');
 insert into public.brintest_multi (float8col) values (real 'nan');
 UPDATE brintest_multi SET int8col = int8col * int4col;
+-- Test handling of inet netmasks with inet_minmax_multi_ops
+CREATE TABLE brin_test_inet (a inet);
+CREATE INDEX ON brin_test_inet USING brin (a inet_minmax_multi_ops);
+INSERT INTO brin_test_inet VALUES ('127.0.0.1/0');
+INSERT INTO brin_test_inet VALUES ('0.0.0.0/12');
+DROP TABLE brin_test_inet;
 -- Tests for brin_summarize_new_values
 SELECT brin_summarize_new_values('brintest_multi'); -- error, not an index
 ERROR:  "brintest_multi" is not an index
diff --git a/src/test/regress/expected/create_schema.out b/src/test/regress/expected/create_schema.out
index 93302a07efc..324f357ae32 100644
--- a/src/test/regress/expected/create_schema.out
+++ b/src/test/regress/expected/create_schema.out
@@ -21,7 +21,7 @@ ERROR:  CREATE specifies a schema (schema_not_existing) different from the one b
 CREATE SCHEMA AUTHORIZATION regress_create_schema_role
   CREATE TRIGGER schema_trig BEFORE INSERT ON schema_not_existing.tab
   EXECUTE FUNCTION schema_trig.no_func();
-ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+ERROR:  Triggers for statements are not yet supported
 -- Again, with a role specification and no schema names.
 SET ROLE regress_create_schema_role;
 CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
@@ -39,7 +39,7 @@ ERROR:  CREATE specifies a schema (schema_not_existing) different from the one b
 CREATE SCHEMA AUTHORIZATION CURRENT_ROLE
   CREATE TRIGGER schema_trig BEFORE INSERT ON schema_not_existing.tab
   EXECUTE FUNCTION schema_trig.no_func();
-ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_create_schema_role)
+ERROR:  Triggers for statements are not yet supported
 -- Again, with a schema name and a role specification.
 CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
   CREATE SEQUENCE schema_not_existing.seq;
@@ -56,7 +56,7 @@ ERROR:  CREATE specifies a schema (schema_not_existing) different from the one b
 CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE
   CREATE TRIGGER schema_trig BEFORE INSERT ON schema_not_existing.tab
   EXECUTE FUNCTION schema_trig.no_func();
-ERROR:  CREATE specifies a schema (schema_not_existing) different from the one being created (regress_schema_1)
+ERROR:  Triggers for statements are not yet supported
 RESET ROLE;
 -- Cases where the schema creation succeeds.
 -- The schema created matches the role name.
diff --git a/src/test/regress/expected/tsearch_optimizer.out b/src/test/regress/expected/tsearch_optimizer.out
index 1381b7c5ca2..ff0728b014d 100644
--- a/src/test/regress/expected/tsearch_optimizer.out
+++ b/src/test/regress/expected/tsearch_optimizer.out
@@ -2006,6 +2006,23 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
  <b>Lorem</b> ipsum <b>urna</b>.  Nullam nullam <b>ullamcorper</b> <b>urna</b>
 (1 row)
 
+-- Edge cases with empty query
+SELECT ts_headline('english',
+'', to_tsquery('english', ''));
+NOTICE:  text-search query doesn't contain lexemes: ""
+ ts_headline 
+-------------
+ 
+(1 row)
+
+SELECT ts_headline('english',
+'foo bar', to_tsquery('english', ''));
+NOTICE:  text-search query doesn't contain lexemes: ""
+ ts_headline 
+-------------
+ foo bar
+(1 row)
+
 --Rewrite sub system
 CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
 \set ECHO none